From 7a6d81929582dff1a0a58933b3579d4dfbe3b110 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Tue, 15 Jul 2025 09:35:33 -0700 Subject: [PATCH 01/12] Test the JIT stencils build process --- Lib/test/test_jit_stencils.py | 49 +++++ Tools/jit/_targets.py | 23 ++- Tools/jit/build.py | 8 + Tools/jit/test/test_executor_cases.c.h | 27 +++ .../test_jit_stencils-aarch64-apple-darwin.h | 0 ...est_jit_stencils-aarch64-pc-windows-msvc.h | 0 ...t_jit_stencils-aarch64-unknown-linux-gnu.h | 192 ++++++++++++++++++ .../test_jit_stencils-i686-pc-windows-msvc.h | 0 .../test_jit_stencils-x86_64-apple-darwin.h | 0 ...test_jit_stencils-x86_64-pc-windows-msvc.h | 0 ...st_jit_stencils-x86_64-unknown-linux-gnu.h | 154 ++++++++++++++ 11 files changed, 446 insertions(+), 7 deletions(-) create mode 100644 Lib/test/test_jit_stencils.py create mode 100644 Tools/jit/test/test_executor_cases.c.h create mode 100644 Tools/jit/test/test_jit_stencils-aarch64-apple-darwin.h create mode 100644 Tools/jit/test/test_jit_stencils-aarch64-pc-windows-msvc.h create mode 100644 Tools/jit/test/test_jit_stencils-aarch64-unknown-linux-gnu.h create mode 100644 Tools/jit/test/test_jit_stencils-i686-pc-windows-msvc.h create mode 100644 Tools/jit/test/test_jit_stencils-x86_64-apple-darwin.h create mode 100644 Tools/jit/test/test_jit_stencils-x86_64-pc-windows-msvc.h create mode 100644 Tools/jit/test/test_jit_stencils-x86_64-unknown-linux-gnu.h diff --git a/Lib/test/test_jit_stencils.py b/Lib/test/test_jit_stencils.py new file mode 100644 index 00000000000000..2a96f24d744d25 --- /dev/null +++ b/Lib/test/test_jit_stencils.py @@ -0,0 +1,49 @@ + +import pathlib +import shlex +import sys +import sysconfig +import tempfile +import test.support +import unittest + +import test.support.script_helper + + +_CPYTHON = pathlib.Path(test.support.REPO_ROOT).resolve() +_TOOLS_JIT = _CPYTHON / "Tools" / "jit" +_TOOLS_JIT_TEST = _TOOLS_JIT / "test" +_TOOLS_JIT_BUILD_PY = _TOOLS_JIT / "build.py" + +@unittest.skipIf(test.support.Py_DEBUG, "XXX") +@unittest.skipUnless(sys._jit.is_available(), "XXX") +@unittest.skipIf(test.support.Py_GIL_DISABLED, "XXX") +@unittest.skipUnless(sysconfig.is_python_build(), "XXX") +class TestJITStencils(unittest.TestCase): + + def test_jit_stencils(self): + self.maxDiff = None + found = False + pyconfig_dir = pathlib.Path(sysconfig.get_config_h_filename()).parent + with tempfile.TemporaryDirectory() as work: + output_dir = pathlib.Path(work).resolve() + for test_jit_stencils_h in sorted(_TOOLS_JIT_TEST.glob("test_jit_stencils-*.h")): + target = test_jit_stencils_h.stem.removeprefix("test_jit_stencils-") + jit_stencils_h = output_dir / f"jit_stencils-{target}.h" + with self.subTest(target): + # relative = jit_stencils_h.relative_to(_CPYTHON) + result, args = test.support.script_helper.run_python_until_end( + _TOOLS_JIT_BUILD_PY, + "--input-file", _TOOLS_JIT_TEST / "test_executor_cases.c.h", + "--output-dir", output_dir, + "--pyconfig-dir", pyconfig_dir, + target, + __isolated=False + ) + if result.rc: + self.skipTest(shlex.join(map(str, args))) + found = True + expected = test_jit_stencils_h.read_text() + actual = "".join(jit_stencils_h.read_text().splitlines(True)[3:]) + self.assertEqual(expected, actual) + self.assertTrue(found, "No JIT stencil tests run!") diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 3883671e92aa39..b14ef6670d76b4 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -51,6 +51,7 @@ class _Target(typing.Generic[_S, _R]): verbose: bool = False cflags: str = "" known_symbols: dict[str, int] = dataclasses.field(default_factory=dict) + input_file: pathlib.Path = PYTHON_EXECUTOR_CASES_C_H pyconfig_dir: pathlib.Path = pathlib.Path.cwd().resolve() def _get_nop(self) -> bytes: @@ -68,7 +69,7 @@ def _compute_digest(self) -> str: hasher.update(self.debug.to_bytes()) hasher.update(self.cflags.encode()) # These dependencies are also reflected in _JITSources in regen.targets: - hasher.update(PYTHON_EXECUTOR_CASES_C_H.read_bytes()) + hasher.update(self.input_file.read_bytes()) hasher.update((self.pyconfig_dir / "pyconfig.h").read_bytes()) for dirpath, _, filenames in sorted(os.walk(TOOLS_JIT)): for filename in filenames: @@ -82,10 +83,16 @@ async def _parse(self, path: pathlib.Path) -> _stencils.StencilGroup: if output is not None: # Make sure that full paths don't leak out (for reproducibility): long, short = str(path), str(path.name) - group.code.disassembly.extend( - line.expandtabs().strip().replace(long, short) - for line in output.splitlines() - ) + lines = output.splitlines() + started = False + for line in lines: + if not started: + if "_JIT_ENTRY" not in line: + continue + started = True + cleaned = line.replace(long, short).expandtabs().strip() + if cleaned: + group.code.disassembly.append(cleaned) args = [ "--elf-output-style=JSON", "--expand-relocs", @@ -181,10 +188,12 @@ async def _compile( return await self._parse(o) async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]: - generated_cases = PYTHON_EXECUTOR_CASES_C_H.read_text() + generated_cases = self.input_file.read_text() cases_and_opnames = sorted( re.findall( - r"\n {8}(case (\w+): \{\n.*?\n {8}\})", generated_cases, flags=re.DOTALL + r"^ {8}(case (\w+): \{\n.*?\n {8}\})", + generated_cases, + flags=re.DOTALL | re.MULTILINE, ) ) tasks = [] diff --git a/Tools/jit/build.py b/Tools/jit/build.py index a0733005929bf2..b0122850e2de3a 100644 --- a/Tools/jit/build.py +++ b/Tools/jit/build.py @@ -22,6 +22,12 @@ parser.add_argument( "-f", "--force", action="store_true", help="force the entire JIT to be rebuilt" ) + parser.add_argument( + "-i", + "--input-file", + help="where to find the generated executor cases", + type=lambda p: pathlib.Path(p).resolve(), + ) parser.add_argument( "-o", "--output-dir", @@ -48,6 +54,8 @@ target.force = args.force target.verbose = args.verbose target.cflags = args.cflags + if args.input_file is not None: + target.input_file = args.input_file target.pyconfig_dir = args.pyconfig_dir target.build( comment=comment, diff --git a/Tools/jit/test/test_executor_cases.c.h b/Tools/jit/test/test_executor_cases.c.h new file mode 100644 index 00000000000000..7ce624682a5ee3 --- /dev/null +++ b/Tools/jit/test/test_executor_cases.c.h @@ -0,0 +1,27 @@ + case 0: { + break; + } + + case 1: { + if (CURRENT_OPARG()) { + JUMP_TO_JUMP_TARGET(); + } + break; + } + + case 2: { + if (CURRENT_OPARG()) { + JUMP_TO_ERROR(); + } + break; + } + + case 3: { + GOTO_TIER_ONE((void *)CURRENT_OPERAND0() + CURRENT_TARGET()); + break; + } + + case 4: { + GOTO_TIER_TWO((void *)CURRENT_OPERAND1()); + break; + } \ No newline at end of file diff --git a/Tools/jit/test/test_jit_stencils-aarch64-apple-darwin.h b/Tools/jit/test/test_jit_stencils-aarch64-apple-darwin.h new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/Tools/jit/test/test_jit_stencils-aarch64-pc-windows-msvc.h b/Tools/jit/test/test_jit_stencils-aarch64-pc-windows-msvc.h new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/Tools/jit/test/test_jit_stencils-aarch64-unknown-linux-gnu.h b/Tools/jit/test/test_jit_stencils-aarch64-unknown-linux-gnu.h new file mode 100644 index 00000000000000..6dd8486e1f0942 --- /dev/null +++ b/Tools/jit/test/test_jit_stencils-aarch64-unknown-linux-gnu.h @@ -0,0 +1,192 @@ +void +emit_shim( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0000000000000000 <_JIT_ENTRY>: + // 0: 6db63bef stp d15, d14, [sp, #-0xa0]! + // 4: a90857f6 stp x22, x21, [sp, #0x80] + // 8: aa0103f5 mov x21, x1 + // c: aa0203f6 mov x22, x2 + // 10: a9094ff4 stp x20, x19, [sp, #0x90] + // 14: aa0003f4 mov x20, x0 + // 18: 6d0133ed stp d13, d12, [sp, #0x10] + // 1c: 6d022beb stp d11, d10, [sp, #0x20] + // 20: 6d0323e9 stp d9, d8, [sp, #0x30] + // 24: a9047bfd stp x29, x30, [sp, #0x40] + // 28: 910103fd add x29, sp, #0x40 + // 2c: a9056ffc stp x28, x27, [sp, #0x50] + // 30: a90667fa stp x26, x25, [sp, #0x60] + // 34: a9075ff8 stp x24, x23, [sp, #0x70] + // 38: 9400000c bl 0x68 <_JIT_ENTRY+0x68> + // 3c: a9494ff4 ldp x20, x19, [sp, #0x90] + // 40: a94857f6 ldp x22, x21, [sp, #0x80] + // 44: a9475ff8 ldp x24, x23, [sp, #0x70] + // 48: a94667fa ldp x26, x25, [sp, #0x60] + // 4c: a9456ffc ldp x28, x27, [sp, #0x50] + // 50: a9447bfd ldp x29, x30, [sp, #0x40] + // 54: 6d4323e9 ldp d9, d8, [sp, #0x30] + // 58: 6d422beb ldp d11, d10, [sp, #0x20] + // 5c: 6d4133ed ldp d13, d12, [sp, #0x10] + // 60: 6cca3bef ldp d15, d14, [sp], #0xa0 + // 64: d65f03c0 ret + const unsigned char code_body[104] = { + 0xef, 0x3b, 0xb6, 0x6d, 0xf6, 0x57, 0x08, 0xa9, + 0xf5, 0x03, 0x01, 0xaa, 0xf6, 0x03, 0x02, 0xaa, + 0xf4, 0x4f, 0x09, 0xa9, 0xf4, 0x03, 0x00, 0xaa, + 0xed, 0x33, 0x01, 0x6d, 0xeb, 0x2b, 0x02, 0x6d, + 0xe9, 0x23, 0x03, 0x6d, 0xfd, 0x7b, 0x04, 0xa9, + 0xfd, 0x03, 0x01, 0x91, 0xfc, 0x6f, 0x05, 0xa9, + 0xfa, 0x67, 0x06, 0xa9, 0xf8, 0x5f, 0x07, 0xa9, + 0x0c, 0x00, 0x00, 0x94, 0xf4, 0x4f, 0x49, 0xa9, + 0xf6, 0x57, 0x48, 0xa9, 0xf8, 0x5f, 0x47, 0xa9, + 0xfa, 0x67, 0x46, 0xa9, 0xfc, 0x6f, 0x45, 0xa9, + 0xfd, 0x7b, 0x44, 0xa9, 0xe9, 0x23, 0x43, 0x6d, + 0xeb, 0x2b, 0x42, 0x6d, 0xed, 0x33, 0x41, 0x6d, + 0xef, 0x3b, 0xca, 0x6c, 0xc0, 0x03, 0x5f, 0xd6, + }; + memcpy(code, code_body, sizeof(code_body)); +} + +void +emit_0( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ +} + +void +emit_1( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0000000000000000 <_JIT_ENTRY>: + // 0: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 0000000000000000: R_AARCH64_ADR_GOT_PAGE _JIT_OPARG + // 4: f9400108 ldr x8, [x8] + // 0000000000000004: R_AARCH64_LD64_GOT_LO12_NC _JIT_OPARG + // 8: 72003d1f tst w8, #0xffff + // c: 54000040 b.eq 0x14 <_JIT_ENTRY+0x14> + // 10: 14000000 b 0x10 <_JIT_ENTRY+0x10> + // 0000000000000010: R_AARCH64_JUMP26 _JIT_JUMP_TARGET + const unsigned char code_body[20] = { + 0x08, 0x00, 0x00, 0x90, 0x08, 0x01, 0x40, 0xf9, + 0x1f, 0x3d, 0x00, 0x72, 0x40, 0x00, 0x00, 0x54, + 0x00, 0x00, 0x00, 0x14, + }; + // 0: OPARG + patch_64(data + 0x0, instruction->oparg); + memcpy(code, code_body, sizeof(code_body)); + patch_aarch64_33rx(code + 0x0, (uintptr_t)data); + patch_aarch64_26r(code + 0x10, state->instruction_starts[instruction->jump_target]); +} + +void +emit_2( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0000000000000000 <_JIT_ENTRY>: + // 0: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 0000000000000000: R_AARCH64_ADR_GOT_PAGE _JIT_OPARG + // 4: f9400108 ldr x8, [x8] + // 0000000000000004: R_AARCH64_LD64_GOT_LO12_NC _JIT_OPARG + // 8: 72003d1f tst w8, #0xffff + // c: 54000040 b.eq 0x14 <_JIT_ENTRY+0x14> + // 10: 14000000 b 0x10 <_JIT_ENTRY+0x10> + // 0000000000000010: R_AARCH64_JUMP26 _JIT_ERROR_TARGET + const unsigned char code_body[20] = { + 0x08, 0x00, 0x00, 0x90, 0x08, 0x01, 0x40, 0xf9, + 0x1f, 0x3d, 0x00, 0x72, 0x40, 0x00, 0x00, 0x54, + 0x00, 0x00, 0x00, 0x14, + }; + // 0: OPARG + patch_64(data + 0x0, instruction->oparg); + memcpy(code, code_body, sizeof(code_body)); + patch_aarch64_33rx(code + 0x0, (uintptr_t)data); + patch_aarch64_26r(code + 0x10, state->instruction_starts[instruction->error_target]); +} + +void +emit_3( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0000000000000000 <_JIT_ENTRY>: + // 0: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 0000000000000000: R_AARCH64_ADR_GOT_PAGE _JIT_TARGET + // 4: 90000009 adrp x9, 0x0 <_JIT_ENTRY> + // 0000000000000004: R_AARCH64_ADR_GOT_PAGE _JIT_OPERAND0 + // 8: f9400108 ldr x8, [x8] + // 0000000000000008: R_AARCH64_LD64_GOT_LO12_NC _JIT_TARGET + // c: f9400129 ldr x9, [x9] + // 000000000000000c: R_AARCH64_LD64_GOT_LO12_NC _JIT_OPERAND0 + // 10: f9008adf str xzr, [x22, #0x110] + // 14: f9002295 str x21, [x20, #0x40] + // 18: 8b284120 add x0, x9, w8, uxtw + // 1c: d65f03c0 ret + const unsigned char code_body[32] = { + 0x08, 0x00, 0x00, 0x90, 0x09, 0x00, 0x00, 0x90, + 0x08, 0x01, 0x40, 0xf9, 0x29, 0x01, 0x40, 0xf9, + 0xdf, 0x8a, 0x00, 0xf9, 0x95, 0x22, 0x00, 0xf9, + 0x20, 0x41, 0x28, 0x8b, 0xc0, 0x03, 0x5f, 0xd6, + }; + // 0: TARGET + // 8: OPERAND0 + patch_64(data + 0x0, instruction->target); + patch_64(data + 0x8, instruction->operand0); + memcpy(code, code_body, sizeof(code_body)); + patch_aarch64_21rx(code + 0x0, (uintptr_t)data); + patch_aarch64_21rx(code + 0x4, (uintptr_t)data + 0x8); + patch_aarch64_12x(code + 0x8, (uintptr_t)data); + patch_aarch64_12x(code + 0xc, (uintptr_t)data + 0x8); +} + +void +emit_4( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0000000000000000 <_JIT_ENTRY>: + // 0: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 0000000000000000: R_AARCH64_ADR_GOT_PAGE _JIT_OPERAND1 + // 4: f9400108 ldr x8, [x8] + // 0000000000000004: R_AARCH64_LD64_GOT_LO12_NC _JIT_OPERAND1 + // 8: f9403d00 ldr x0, [x8, #0x78] + // c: f9008ac8 str x8, [x22, #0x110] + // 10: d61f0000 br x0 + const unsigned char code_body[20] = { + 0x08, 0x00, 0x00, 0x90, 0x08, 0x01, 0x40, 0xf9, + 0x00, 0x3d, 0x40, 0xf9, 0xc8, 0x8a, 0x00, 0xf9, + 0x00, 0x00, 0x1f, 0xd6, + }; + // 0: OPERAND1 + patch_64(data + 0x0, instruction->operand1); + memcpy(code, code_body, sizeof(code_body)); + patch_aarch64_33rx(code + 0x0, (uintptr_t)data); +} + +static_assert(SYMBOL_MASK_WORDS >= 1, "SYMBOL_MASK_WORDS too small"); + +typedef struct { + void (*emit)( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state); + size_t code_size; + size_t data_size; + symbol_mask trampoline_mask; +} StencilGroup; + +static const StencilGroup shim = {emit_shim, 104, 0, {0}}; + +static const StencilGroup stencil_groups[MAX_UOP_ID + 1] = { + [0] = {emit_0, 0, 0, {0}}, + [1] = {emit_1, 20, 8, {0}}, + [2] = {emit_2, 20, 8, {0}}, + [3] = {emit_3, 32, 16, {0}}, + [4] = {emit_4, 20, 8, {0}}, +}; + +static const void * const symbols_map[1] = { + 0 +}; diff --git a/Tools/jit/test/test_jit_stencils-i686-pc-windows-msvc.h b/Tools/jit/test/test_jit_stencils-i686-pc-windows-msvc.h new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/Tools/jit/test/test_jit_stencils-x86_64-apple-darwin.h b/Tools/jit/test/test_jit_stencils-x86_64-apple-darwin.h new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/Tools/jit/test/test_jit_stencils-x86_64-pc-windows-msvc.h b/Tools/jit/test/test_jit_stencils-x86_64-pc-windows-msvc.h new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/Tools/jit/test/test_jit_stencils-x86_64-unknown-linux-gnu.h b/Tools/jit/test/test_jit_stencils-x86_64-unknown-linux-gnu.h new file mode 100644 index 00000000000000..fb2decfc97497d --- /dev/null +++ b/Tools/jit/test/test_jit_stencils-x86_64-unknown-linux-gnu.h @@ -0,0 +1,154 @@ +void +emit_shim( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0000000000000000 <_JIT_ENTRY>: + // 0: 41 57 pushq %r15 + // 2: 41 56 pushq %r14 + // 4: 41 55 pushq %r13 + // 6: 41 54 pushq %r12 + // 8: 53 pushq %rbx + // 9: 49 89 fc movq %rdi, %r12 + // c: 49 89 f5 movq %rsi, %r13 + // f: 49 89 d6 movq %rdx, %r14 + // 12: e8 0a 00 00 00 callq 0x21 <_JIT_ENTRY+0x21> + // 17: 5b popq %rbx + // 18: 41 5c popq %r12 + // 1a: 41 5d popq %r13 + // 1c: 41 5e popq %r14 + // 1e: 41 5f popq %r15 + // 20: c3 retq + const unsigned char code_body[33] = { + 0x41, 0x57, 0x41, 0x56, 0x41, 0x55, 0x41, 0x54, + 0x53, 0x49, 0x89, 0xfc, 0x49, 0x89, 0xf5, 0x49, + 0x89, 0xd6, 0xe8, 0x0a, 0x00, 0x00, 0x00, 0x5b, + 0x41, 0x5c, 0x41, 0x5d, 0x41, 0x5e, 0x41, 0x5f, + 0xc3, + }; + memcpy(code, code_body, sizeof(code_body)); +} + +void +emit_0( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ +} + +void +emit_1( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0000000000000000 <_JIT_ENTRY>: + // 0: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax + // 0000000000000002: R_X86_64_64 _JIT_OPARG + // a: 66 85 c0 testw %ax, %ax + // d: 0f 85 00 00 00 00 jne 0x13 <_JIT_ENTRY+0x13> + // 000000000000000f: R_X86_64_PLT32 _JIT_JUMP_TARGET-0x4 + const unsigned char code_body[19] = { + 0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x66, 0x85, 0xc0, 0x0f, 0x85, + }; + memcpy(code, code_body, sizeof(code_body)); + patch_64(code + 0x2, instruction->oparg); + patch_32r(code + 0xf, state->instruction_starts[instruction->jump_target] + -0x4); +} + +void +emit_2( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0000000000000000 <_JIT_ENTRY>: + // 0: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax + // 0000000000000002: R_X86_64_64 _JIT_OPARG + // a: 66 85 c0 testw %ax, %ax + // d: 0f 85 00 00 00 00 jne 0x13 <_JIT_ENTRY+0x13> + // 000000000000000f: R_X86_64_PLT32 _JIT_ERROR_TARGET-0x4 + const unsigned char code_body[19] = { + 0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x66, 0x85, 0xc0, 0x0f, 0x85, + }; + memcpy(code, code_body, sizeof(code_body)); + patch_64(code + 0x2, instruction->oparg); + patch_32r(code + 0xf, state->instruction_starts[instruction->error_target] + -0x4); +} + +void +emit_3( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0000000000000000 <_JIT_ENTRY>: + // 0: 49 c7 86 10 01 00 00 00 00 00 00 movq $0x0, 0x110(%r14) + // b: 4d 89 6c 24 40 movq %r13, 0x40(%r12) + // 10: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax + // 0000000000000012: R_X86_64_64 _JIT_TARGET + // 1a: 89 c1 movl %eax, %ecx + // 1c: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax + // 000000000000001e: R_X86_64_64 _JIT_OPERAND0 + // 26: 48 01 c8 addq %rcx, %rax + // 29: c3 retq + const unsigned char code_body[42] = { + 0x49, 0xc7, 0x86, 0x10, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x4d, 0x89, 0x6c, 0x24, 0x40, + 0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x89, 0xc1, 0x48, 0xb8, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x01, + 0xc8, 0xc3, + }; + memcpy(code, code_body, sizeof(code_body)); + patch_64(code + 0x12, instruction->target); + patch_64(code + 0x1e, instruction->operand0); +} + +void +emit_4( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0000000000000000 <_JIT_ENTRY>: + // 0: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax + // 0000000000000002: R_X86_64_64 _JIT_OPERAND1 + // a: 49 89 86 10 01 00 00 movq %rax, 0x110(%r14) + // 11: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax + // 0000000000000013: R_X86_64_64 _JIT_OPERAND1+0x78 + // 1b: 48 8b 00 movq (%rax), %rax + // 1e: ff e0 jmpq *%rax + const unsigned char code_body[32] = { + 0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x49, 0x89, 0x86, 0x10, 0x01, 0x00, + 0x00, 0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x48, 0x8b, 0x00, 0xff, 0xe0, + }; + memcpy(code, code_body, sizeof(code_body)); + patch_64(code + 0x2, instruction->operand1); + patch_64(code + 0x13, instruction->operand1 + 0x78); +} + +static_assert(SYMBOL_MASK_WORDS >= 1, "SYMBOL_MASK_WORDS too small"); + +typedef struct { + void (*emit)( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state); + size_t code_size; + size_t data_size; + symbol_mask trampoline_mask; +} StencilGroup; + +static const StencilGroup shim = {emit_shim, 33, 0, {0}}; + +static const StencilGroup stencil_groups[MAX_UOP_ID + 1] = { + [0] = {emit_0, 0, 0, {0}}, + [1] = {emit_1, 19, 0, {0}}, + [2] = {emit_2, 19, 0, {0}}, + [3] = {emit_3, 42, 0, {0}}, + [4] = {emit_4, 32, 0, {0}}, +}; + +static const void * const symbols_map[1] = { + 0 +}; From a322ad46cdf45e5a94ef92e476a2e47d64e26a1e Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Wed, 16 Jul 2025 13:23:08 -0700 Subject: [PATCH 02/12] Cleanup --- Lib/test/test_jit_stencils.py | 80 ++++++---- Tools/jit/_targets.py | 7 + Tools/jit/test/test_executor_cases.c.h | 28 ++-- ...t_jit_stencils-aarch64-unknown-linux-gnu.h | 150 +++++++----------- ...st_jit_stencils-x86_64-unknown-linux-gnu.h | 105 ++++-------- 5 files changed, 166 insertions(+), 204 deletions(-) diff --git a/Lib/test/test_jit_stencils.py b/Lib/test/test_jit_stencils.py index 2a96f24d744d25..94beecc9eed8d3 100644 --- a/Lib/test/test_jit_stencils.py +++ b/Lib/test/test_jit_stencils.py @@ -1,49 +1,67 @@ - import pathlib import shlex import sys import sysconfig import tempfile import test.support -import unittest - import test.support.script_helper - +import unittest _CPYTHON = pathlib.Path(test.support.REPO_ROOT).resolve() _TOOLS_JIT = _CPYTHON / "Tools" / "jit" _TOOLS_JIT_TEST = _TOOLS_JIT / "test" +_TOOLS_JIT_TEST_TEST_EXECUTOR_CASES_C_H = _TOOLS_JIT_TEST / "test_executor_cases.c.h" _TOOLS_JIT_BUILD_PY = _TOOLS_JIT / "build.py" -@unittest.skipIf(test.support.Py_DEBUG, "XXX") -@unittest.skipUnless(sys._jit.is_available(), "XXX") -@unittest.skipIf(test.support.Py_GIL_DISABLED, "XXX") -@unittest.skipUnless(sysconfig.is_python_build(), "XXX") + +@test.support.cpython_only +@unittest.skipIf(test.support.Py_DEBUG, "Debug stencils aren't tested.") +@unittest.skipIf(test.support.Py_GIL_DISABLED, "Free-threaded stencils aren't tested.") +@unittest.skipUnless(sysconfig.is_python_build(), "Requires a local Python build.") class TestJITStencils(unittest.TestCase): + def _build_jit_stencils(self, target: str) -> str: + with tempfile.TemporaryDirectory() as work: + jit_stencils_h = pathlib.Path(work, f"jit_stencils-{target}.h").resolve() + pyconfig_h = pathlib.Path(sysconfig.get_config_h_filename()).resolve() + result, args = test.support.script_helper.run_python_until_end( + _TOOLS_JIT_BUILD_PY, + "--input-file", _TOOLS_JIT_TEST_TEST_EXECUTOR_CASES_C_H, + "--output-dir", jit_stencils_h.parent, + "--pyconfig-dir", pyconfig_h.parent, + target, + __isolated=False, + ) + if result.rc: + self.skipTest(f"Build failed: {shlex.join(map(str, args))}") + body = jit_stencils_h.read_text() + # Strip out two lines of header comments: + _, _, body = body.split("\n", 2) + return body + + def _check_jit_stencils( + self, expected: str, actual: str, test_jit_stencils_h: pathlib.Path + ) -> None: + try: + self.assertEqual(expected.strip("\n"), actual.strip("\n")) + except AssertionError as e: + # Make it easy to re-validate the expected output: + relative = test_jit_stencils_h.relative_to(_CPYTHON) + message = f"If this is expected, replace {relative} with:" + banner = "=" * len(message) + e.add_note("\n".join([banner, message, banner])) + e.add_note(actual) + raise + def test_jit_stencils(self): self.maxDiff = None found = False - pyconfig_dir = pathlib.Path(sysconfig.get_config_h_filename()).parent - with tempfile.TemporaryDirectory() as work: - output_dir = pathlib.Path(work).resolve() - for test_jit_stencils_h in sorted(_TOOLS_JIT_TEST.glob("test_jit_stencils-*.h")): - target = test_jit_stencils_h.stem.removeprefix("test_jit_stencils-") - jit_stencils_h = output_dir / f"jit_stencils-{target}.h" - with self.subTest(target): - # relative = jit_stencils_h.relative_to(_CPYTHON) - result, args = test.support.script_helper.run_python_until_end( - _TOOLS_JIT_BUILD_PY, - "--input-file", _TOOLS_JIT_TEST / "test_executor_cases.c.h", - "--output-dir", output_dir, - "--pyconfig-dir", pyconfig_dir, - target, - __isolated=False - ) - if result.rc: - self.skipTest(shlex.join(map(str, args))) - found = True - expected = test_jit_stencils_h.read_text() - actual = "".join(jit_stencils_h.read_text().splitlines(True)[3:]) - self.assertEqual(expected, actual) - self.assertTrue(found, "No JIT stencil tests run!") + for test_jit_stencils_h in _TOOLS_JIT_TEST.glob("test_jit_stencils-*.h"): + target = test_jit_stencils_h.stem.removeprefix("test_jit_stencils-") + with self.subTest(target): + expected = test_jit_stencils_h.read_text() + actual = self._build_jit_stencils(target) + found = True + self._check_jit_stencils(expected, actual, test_jit_stencils_h) + # This is a local build. If the JIT is available, at least one test should run: + assert found or not sys._jit.is_available(), "No JIT stencils built!" diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index b14ef6670d76b4..60850377d5486e 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -566,36 +566,43 @@ def get_target(host: str) -> _COFF32 | _COFF64 | _ELF | _MachO: optimizer: type[_optimizers.Optimizer] target: _COFF32 | _COFF64 | _ELF | _MachO if re.fullmatch(r"aarch64-apple-darwin.*", host): + host = "aarch64-apple-darwin" condition = "defined(__aarch64__) && defined(__APPLE__)" optimizer = _optimizers.OptimizerAArch64 target = _MachO(host, condition, optimizer=optimizer) elif re.fullmatch(r"aarch64-pc-windows-msvc", host): + host = "aarch64-pc-windows-msvc" args = ["-fms-runtime-lib=dll", "-fplt"] condition = "defined(_M_ARM64)" optimizer = _optimizers.OptimizerAArch64 target = _COFF64(host, condition, args=args, optimizer=optimizer) elif re.fullmatch(r"aarch64-.*-linux-gnu", host): + host = "aarch64-unknown-linux-gnu" # -mno-outline-atomics: Keep intrinsics from being emitted. args = ["-fpic", "-mno-outline-atomics"] condition = "defined(__aarch64__) && defined(__linux__)" optimizer = _optimizers.OptimizerAArch64 target = _ELF(host, condition, args=args, optimizer=optimizer) elif re.fullmatch(r"i686-pc-windows-msvc", host): + host = "i686-pc-windows-msvc" # -Wno-ignored-attributes: __attribute__((preserve_none)) is not supported here. args = ["-DPy_NO_ENABLE_SHARED", "-Wno-ignored-attributes"] optimizer = _optimizers.OptimizerX86 condition = "defined(_M_IX86)" target = _COFF32(host, condition, args=args, optimizer=optimizer) elif re.fullmatch(r"x86_64-apple-darwin.*", host): + host = "x86_64-apple-darwin" condition = "defined(__x86_64__) && defined(__APPLE__)" optimizer = _optimizers.OptimizerX86 target = _MachO(host, condition, optimizer=optimizer) elif re.fullmatch(r"x86_64-pc-windows-msvc", host): + host = "x86_64-pc-windows-msvc" args = ["-fms-runtime-lib=dll"] condition = "defined(_M_X64)" optimizer = _optimizers.OptimizerX86 target = _COFF64(host, condition, args=args, optimizer=optimizer) elif re.fullmatch(r"x86_64-.*-linux-gnu", host): + host = "x86_64-unknown-linux-gnu" args = ["-fno-pic", "-mcmodel=medium", "-mlarge-data-threshold=0"] condition = "defined(__x86_64__) && defined(__linux__)" optimizer = _optimizers.OptimizerX86 diff --git a/Tools/jit/test/test_executor_cases.c.h b/Tools/jit/test/test_executor_cases.c.h index 7ce624682a5ee3..496380cfc081a5 100644 --- a/Tools/jit/test/test_executor_cases.c.h +++ b/Tools/jit/test/test_executor_cases.c.h @@ -1,27 +1,29 @@ case 0: { + // Zero-length jumps should be removed: break; } case 1: { - if (CURRENT_OPARG()) { - JUMP_TO_JUMP_TARGET(); + // -Os duplicates less code than -O3: + PyAPI_DATA(bool) sausage; + PyAPI_DATA(bool) spammed; + PyAPI_FUNC(void) order_eggs_and_bacon(void); + PyAPI_FUNC(void) order_eggs_sausage_and_bacon(void); + if (!sausage) { + order_eggs_and_bacon(); } + else { + order_eggs_sausage_and_bacon(); + } + spammed = false; break; } case 2: { - if (CURRENT_OPARG()) { + // The assembly optimizer inverts hot branches: + PyAPI_DATA(bool) spam; + if (spam) { JUMP_TO_ERROR(); } break; } - - case 3: { - GOTO_TIER_ONE((void *)CURRENT_OPERAND0() + CURRENT_TARGET()); - break; - } - - case 4: { - GOTO_TIER_TWO((void *)CURRENT_OPERAND1()); - break; - } \ No newline at end of file diff --git a/Tools/jit/test/test_jit_stencils-aarch64-unknown-linux-gnu.h b/Tools/jit/test/test_jit_stencils-aarch64-unknown-linux-gnu.h index 6dd8486e1f0942..42b36c0b8b7d6a 100644 --- a/Tools/jit/test/test_jit_stencils-aarch64-unknown-linux-gnu.h +++ b/Tools/jit/test/test_jit_stencils-aarch64-unknown-linux-gnu.h @@ -61,109 +61,81 @@ emit_1( const _PyUOpInstruction *instruction, jit_state *state) { // 0000000000000000 <_JIT_ENTRY>: - // 0: 90000008 adrp x8, 0x0 <_JIT_ENTRY> - // 0000000000000000: R_AARCH64_ADR_GOT_PAGE _JIT_OPARG - // 4: f9400108 ldr x8, [x8] - // 0000000000000004: R_AARCH64_LD64_GOT_LO12_NC _JIT_OPARG - // 8: 72003d1f tst w8, #0xffff - // c: 54000040 b.eq 0x14 <_JIT_ENTRY+0x14> - // 10: 14000000 b 0x10 <_JIT_ENTRY+0x10> - // 0000000000000010: R_AARCH64_JUMP26 _JIT_JUMP_TARGET - const unsigned char code_body[20] = { + // 0: a9bf7bfd stp x29, x30, [sp, #-0x10]! + // 4: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 0000000000000004: R_AARCH64_ADR_GOT_PAGE sausage + // 8: 910003fd mov x29, sp + // c: f9400108 ldr x8, [x8] + // 000000000000000c: R_AARCH64_LD64_GOT_LO12_NC sausage + // 10: 39400108 ldrb w8, [x8] + // 14: 36000088 tbz w8, #0x0, 0x24 <_JIT_ENTRY+0x24> + // 18: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 0000000000000018: R_AARCH64_ADR_GOT_PAGE order_eggs_sausage_and_bacon + // 1c: f9400108 ldr x8, [x8] + // 000000000000001c: R_AARCH64_LD64_GOT_LO12_NC order_eggs_sausage_and_bacon + // 20: 14000003 b 0x2c <_JIT_ENTRY+0x2c> + // 24: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 0000000000000024: R_AARCH64_ADR_GOT_PAGE order_eggs_and_bacon + // 28: f9400108 ldr x8, [x8] + // 0000000000000028: R_AARCH64_LD64_GOT_LO12_NC order_eggs_and_bacon + // 2c: d63f0100 blr x8 + // 30: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 0000000000000030: R_AARCH64_ADR_GOT_PAGE spammed + // 34: f9400108 ldr x8, [x8] + // 0000000000000034: R_AARCH64_LD64_GOT_LO12_NC spammed + // 38: 3900011f strb wzr, [x8] + // 3c: a8c17bfd ldp x29, x30, [sp], #0x10 + const unsigned char code_body[64] = { + 0xfd, 0x7b, 0xbf, 0xa9, 0x08, 0x00, 0x00, 0x90, + 0xfd, 0x03, 0x00, 0x91, 0x08, 0x01, 0x40, 0xf9, + 0x08, 0x01, 0x40, 0x39, 0x88, 0x00, 0x00, 0x36, 0x08, 0x00, 0x00, 0x90, 0x08, 0x01, 0x40, 0xf9, - 0x1f, 0x3d, 0x00, 0x72, 0x40, 0x00, 0x00, 0x54, - 0x00, 0x00, 0x00, 0x14, - }; - // 0: OPARG - patch_64(data + 0x0, instruction->oparg); - memcpy(code, code_body, sizeof(code_body)); - patch_aarch64_33rx(code + 0x0, (uintptr_t)data); - patch_aarch64_26r(code + 0x10, state->instruction_starts[instruction->jump_target]); -} - -void -emit_2( - unsigned char *code, unsigned char *data, _PyExecutorObject *executor, - const _PyUOpInstruction *instruction, jit_state *state) -{ - // 0000000000000000 <_JIT_ENTRY>: - // 0: 90000008 adrp x8, 0x0 <_JIT_ENTRY> - // 0000000000000000: R_AARCH64_ADR_GOT_PAGE _JIT_OPARG - // 4: f9400108 ldr x8, [x8] - // 0000000000000004: R_AARCH64_LD64_GOT_LO12_NC _JIT_OPARG - // 8: 72003d1f tst w8, #0xffff - // c: 54000040 b.eq 0x14 <_JIT_ENTRY+0x14> - // 10: 14000000 b 0x10 <_JIT_ENTRY+0x10> - // 0000000000000010: R_AARCH64_JUMP26 _JIT_ERROR_TARGET - const unsigned char code_body[20] = { + 0x03, 0x00, 0x00, 0x14, 0x08, 0x00, 0x00, 0x90, + 0x08, 0x01, 0x40, 0xf9, 0x00, 0x01, 0x3f, 0xd6, 0x08, 0x00, 0x00, 0x90, 0x08, 0x01, 0x40, 0xf9, - 0x1f, 0x3d, 0x00, 0x72, 0x40, 0x00, 0x00, 0x54, - 0x00, 0x00, 0x00, 0x14, + 0x1f, 0x01, 0x00, 0x39, 0xfd, 0x7b, 0xc1, 0xa8, }; - // 0: OPARG - patch_64(data + 0x0, instruction->oparg); + // 0: &sausage+0x0 + // 8: &order_eggs_sausage_and_bacon+0x0 + // 10: &order_eggs_and_bacon+0x0 + // 18: &spammed+0x0 + patch_64(data + 0x0, (uintptr_t)&sausage); + patch_64(data + 0x8, (uintptr_t)&order_eggs_sausage_and_bacon); + patch_64(data + 0x10, (uintptr_t)&order_eggs_and_bacon); + patch_64(data + 0x18, (uintptr_t)&spammed); memcpy(code, code_body, sizeof(code_body)); - patch_aarch64_33rx(code + 0x0, (uintptr_t)data); - patch_aarch64_26r(code + 0x10, state->instruction_starts[instruction->error_target]); + patch_aarch64_21rx(code + 0x4, (uintptr_t)data); + patch_aarch64_12x(code + 0xc, (uintptr_t)data); + patch_aarch64_33rx(code + 0x18, (uintptr_t)data + 0x8); + patch_aarch64_33rx(code + 0x24, (uintptr_t)data + 0x10); + patch_aarch64_33rx(code + 0x30, (uintptr_t)data + 0x18); } void -emit_3( - unsigned char *code, unsigned char *data, _PyExecutorObject *executor, - const _PyUOpInstruction *instruction, jit_state *state) -{ - // 0000000000000000 <_JIT_ENTRY>: - // 0: 90000008 adrp x8, 0x0 <_JIT_ENTRY> - // 0000000000000000: R_AARCH64_ADR_GOT_PAGE _JIT_TARGET - // 4: 90000009 adrp x9, 0x0 <_JIT_ENTRY> - // 0000000000000004: R_AARCH64_ADR_GOT_PAGE _JIT_OPERAND0 - // 8: f9400108 ldr x8, [x8] - // 0000000000000008: R_AARCH64_LD64_GOT_LO12_NC _JIT_TARGET - // c: f9400129 ldr x9, [x9] - // 000000000000000c: R_AARCH64_LD64_GOT_LO12_NC _JIT_OPERAND0 - // 10: f9008adf str xzr, [x22, #0x110] - // 14: f9002295 str x21, [x20, #0x40] - // 18: 8b284120 add x0, x9, w8, uxtw - // 1c: d65f03c0 ret - const unsigned char code_body[32] = { - 0x08, 0x00, 0x00, 0x90, 0x09, 0x00, 0x00, 0x90, - 0x08, 0x01, 0x40, 0xf9, 0x29, 0x01, 0x40, 0xf9, - 0xdf, 0x8a, 0x00, 0xf9, 0x95, 0x22, 0x00, 0xf9, - 0x20, 0x41, 0x28, 0x8b, 0xc0, 0x03, 0x5f, 0xd6, - }; - // 0: TARGET - // 8: OPERAND0 - patch_64(data + 0x0, instruction->target); - patch_64(data + 0x8, instruction->operand0); - memcpy(code, code_body, sizeof(code_body)); - patch_aarch64_21rx(code + 0x0, (uintptr_t)data); - patch_aarch64_21rx(code + 0x4, (uintptr_t)data + 0x8); - patch_aarch64_12x(code + 0x8, (uintptr_t)data); - patch_aarch64_12x(code + 0xc, (uintptr_t)data + 0x8); -} - -void -emit_4( +emit_2( unsigned char *code, unsigned char *data, _PyExecutorObject *executor, const _PyUOpInstruction *instruction, jit_state *state) { // 0000000000000000 <_JIT_ENTRY>: // 0: 90000008 adrp x8, 0x0 <_JIT_ENTRY> - // 0000000000000000: R_AARCH64_ADR_GOT_PAGE _JIT_OPERAND1 + // 0000000000000000: R_AARCH64_ADR_GOT_PAGE spam // 4: f9400108 ldr x8, [x8] - // 0000000000000004: R_AARCH64_LD64_GOT_LO12_NC _JIT_OPERAND1 - // 8: f9403d00 ldr x0, [x8, #0x78] - // c: f9008ac8 str x8, [x22, #0x110] - // 10: d61f0000 br x0 - const unsigned char code_body[20] = { + // 0000000000000004: R_AARCH64_LD64_GOT_LO12_NC spam + // 8: 39400108 ldrb w8, [x8] + // c: 7100051f cmp w8, #0x1 + // 10: 54000041 b.ne 0x18 <_JIT_ENTRY+0x18> + // 14: 14000000 b 0x14 <_JIT_ENTRY+0x14> + // 0000000000000014: R_AARCH64_JUMP26 _JIT_ERROR_TARGET + const unsigned char code_body[24] = { 0x08, 0x00, 0x00, 0x90, 0x08, 0x01, 0x40, 0xf9, - 0x00, 0x3d, 0x40, 0xf9, 0xc8, 0x8a, 0x00, 0xf9, - 0x00, 0x00, 0x1f, 0xd6, + 0x08, 0x01, 0x40, 0x39, 0x1f, 0x05, 0x00, 0x71, + 0x41, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x14, }; - // 0: OPERAND1 - patch_64(data + 0x0, instruction->operand1); + // 0: &spam+0x0 + patch_64(data + 0x0, (uintptr_t)&spam); memcpy(code, code_body, sizeof(code_body)); patch_aarch64_33rx(code + 0x0, (uintptr_t)data); + patch_aarch64_26r(code + 0x14, state->instruction_starts[instruction->error_target]); } static_assert(SYMBOL_MASK_WORDS >= 1, "SYMBOL_MASK_WORDS too small"); @@ -181,10 +153,8 @@ static const StencilGroup shim = {emit_shim, 104, 0, {0}}; static const StencilGroup stencil_groups[MAX_UOP_ID + 1] = { [0] = {emit_0, 0, 0, {0}}, - [1] = {emit_1, 20, 8, {0}}, - [2] = {emit_2, 20, 8, {0}}, - [3] = {emit_3, 32, 16, {0}}, - [4] = {emit_4, 20, 8, {0}}, + [1] = {emit_1, 64, 32, {0}}, + [2] = {emit_2, 24, 8, {0}}, }; static const void * const symbols_map[1] = { diff --git a/Tools/jit/test/test_jit_stencils-x86_64-unknown-linux-gnu.h b/Tools/jit/test/test_jit_stencils-x86_64-unknown-linux-gnu.h index fb2decfc97497d..e4de3a1dfb6b8f 100644 --- a/Tools/jit/test/test_jit_stencils-x86_64-unknown-linux-gnu.h +++ b/Tools/jit/test/test_jit_stencils-x86_64-unknown-linux-gnu.h @@ -42,18 +42,37 @@ emit_1( const _PyUOpInstruction *instruction, jit_state *state) { // 0000000000000000 <_JIT_ENTRY>: - // 0: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax - // 0000000000000002: R_X86_64_64 _JIT_OPARG - // a: 66 85 c0 testw %ax, %ax - // d: 0f 85 00 00 00 00 jne 0x13 <_JIT_ENTRY+0x13> - // 000000000000000f: R_X86_64_PLT32 _JIT_JUMP_TARGET-0x4 - const unsigned char code_body[19] = { - 0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x66, 0x85, 0xc0, 0x0f, 0x85, + // 0: 50 pushq %rax + // 1: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax + // 0000000000000003: R_X86_64_64 sausage + // b: 80 38 00 cmpb $0x0, (%rax) + // e: 74 08 je 0x18 <_JIT_ENTRY+0x18> + // 10: ff 15 00 00 00 00 callq *(%rip) # 0x16 <_JIT_ENTRY+0x16> + // 0000000000000012: R_X86_64_GOTPCRELX order_eggs_sausage_and_bacon-0x4 + // 16: eb 06 jmp 0x1e <_JIT_ENTRY+0x1e> + // 18: ff 15 00 00 00 00 callq *(%rip) # 0x1e <_JIT_ENTRY+0x1e> + // 000000000000001a: R_X86_64_GOTPCRELX order_eggs_and_bacon-0x4 + // 1e: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax + // 0000000000000020: R_X86_64_64 spammed + // 28: c6 00 00 movb $0x0, (%rax) + // 2b: 58 popq %rax + const unsigned char code_body[44] = { + 0x50, 0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x80, 0x38, 0x00, 0x74, 0x08, + 0xff, 0x15, 0x00, 0x00, 0x00, 0x00, 0xeb, 0x06, + 0xff, 0x15, 0x00, 0x00, 0x00, 0x00, 0x48, 0xb8, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xc6, 0x00, 0x00, 0x58, }; + // 0: &order_eggs_sausage_and_bacon+0x0 + // 8: &order_eggs_and_bacon+0x0 + patch_64(data + 0x0, (uintptr_t)&order_eggs_sausage_and_bacon); + patch_64(data + 0x8, (uintptr_t)&order_eggs_and_bacon); memcpy(code, code_body, sizeof(code_body)); - patch_64(code + 0x2, instruction->oparg); - patch_32r(code + 0xf, state->instruction_starts[instruction->jump_target] + -0x4); + patch_64(code + 0x3, (uintptr_t)&sausage); + patch_x86_64_32rx(code + 0x12, (uintptr_t)data + -0x4); + patch_x86_64_32rx(code + 0x1a, (uintptr_t)data + 0x4); + patch_64(code + 0x20, (uintptr_t)&spammed); } void @@ -63,71 +82,19 @@ emit_2( { // 0000000000000000 <_JIT_ENTRY>: // 0: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax - // 0000000000000002: R_X86_64_64 _JIT_OPARG - // a: 66 85 c0 testw %ax, %ax - // d: 0f 85 00 00 00 00 jne 0x13 <_JIT_ENTRY+0x13> + // 0000000000000002: R_X86_64_64 spam + // a: 80 38 01 cmpb $0x1, (%rax) + // d: 0f 84 00 00 00 00 je 0x13 <_JIT_ENTRY+0x13> // 000000000000000f: R_X86_64_PLT32 _JIT_ERROR_TARGET-0x4 const unsigned char code_body[19] = { 0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x66, 0x85, 0xc0, 0x0f, 0x85, + 0x00, 0x00, 0x80, 0x38, 0x01, 0x0f, 0x84, }; memcpy(code, code_body, sizeof(code_body)); - patch_64(code + 0x2, instruction->oparg); + patch_64(code + 0x2, (uintptr_t)&spam); patch_32r(code + 0xf, state->instruction_starts[instruction->error_target] + -0x4); } -void -emit_3( - unsigned char *code, unsigned char *data, _PyExecutorObject *executor, - const _PyUOpInstruction *instruction, jit_state *state) -{ - // 0000000000000000 <_JIT_ENTRY>: - // 0: 49 c7 86 10 01 00 00 00 00 00 00 movq $0x0, 0x110(%r14) - // b: 4d 89 6c 24 40 movq %r13, 0x40(%r12) - // 10: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax - // 0000000000000012: R_X86_64_64 _JIT_TARGET - // 1a: 89 c1 movl %eax, %ecx - // 1c: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax - // 000000000000001e: R_X86_64_64 _JIT_OPERAND0 - // 26: 48 01 c8 addq %rcx, %rax - // 29: c3 retq - const unsigned char code_body[42] = { - 0x49, 0xc7, 0x86, 0x10, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x4d, 0x89, 0x6c, 0x24, 0x40, - 0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x89, 0xc1, 0x48, 0xb8, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x01, - 0xc8, 0xc3, - }; - memcpy(code, code_body, sizeof(code_body)); - patch_64(code + 0x12, instruction->target); - patch_64(code + 0x1e, instruction->operand0); -} - -void -emit_4( - unsigned char *code, unsigned char *data, _PyExecutorObject *executor, - const _PyUOpInstruction *instruction, jit_state *state) -{ - // 0000000000000000 <_JIT_ENTRY>: - // 0: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax - // 0000000000000002: R_X86_64_64 _JIT_OPERAND1 - // a: 49 89 86 10 01 00 00 movq %rax, 0x110(%r14) - // 11: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax - // 0000000000000013: R_X86_64_64 _JIT_OPERAND1+0x78 - // 1b: 48 8b 00 movq (%rax), %rax - // 1e: ff e0 jmpq *%rax - const unsigned char code_body[32] = { - 0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x49, 0x89, 0x86, 0x10, 0x01, 0x00, - 0x00, 0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x48, 0x8b, 0x00, 0xff, 0xe0, - }; - memcpy(code, code_body, sizeof(code_body)); - patch_64(code + 0x2, instruction->operand1); - patch_64(code + 0x13, instruction->operand1 + 0x78); -} - static_assert(SYMBOL_MASK_WORDS >= 1, "SYMBOL_MASK_WORDS too small"); typedef struct { @@ -143,10 +110,8 @@ static const StencilGroup shim = {emit_shim, 33, 0, {0}}; static const StencilGroup stencil_groups[MAX_UOP_ID + 1] = { [0] = {emit_0, 0, 0, {0}}, - [1] = {emit_1, 19, 0, {0}}, + [1] = {emit_1, 44, 16, {0}}, [2] = {emit_2, 19, 0, {0}}, - [3] = {emit_3, 42, 0, {0}}, - [4] = {emit_4, 32, 0, {0}}, }; static const void * const symbols_map[1] = { From e1eb85d459b9f207f8b44a1778f2e7cbcf768694 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Wed, 16 Jul 2025 13:37:33 -0700 Subject: [PATCH 03/12] Add expected output for Windows --- ...est_jit_stencils-aarch64-pc-windows-msvc.h | 159 ++++++++++++++++ .../test_jit_stencils-i686-pc-windows-msvc.h | 128 +++++++++++++ ...test_jit_stencils-x86_64-pc-windows-msvc.h | 169 ++++++++++++++++++ 3 files changed, 456 insertions(+) diff --git a/Tools/jit/test/test_jit_stencils-aarch64-pc-windows-msvc.h b/Tools/jit/test/test_jit_stencils-aarch64-pc-windows-msvc.h index e69de29bb2d1d6..a9e71cc52d7136 100644 --- a/Tools/jit/test/test_jit_stencils-aarch64-pc-windows-msvc.h +++ b/Tools/jit/test/test_jit_stencils-aarch64-pc-windows-msvc.h @@ -0,0 +1,159 @@ +void +emit_shim( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0000000000000000 <_JIT_ENTRY>: + // 0: 6db63bef stp d15, d14, [sp, #-0xa0]! + // 4: a90857f6 stp x22, x21, [sp, #0x80] + // 8: aa0103f5 mov x21, x1 + // c: aa0203f6 mov x22, x2 + // 10: a9094ff4 stp x20, x19, [sp, #0x90] + // 14: aa0003f4 mov x20, x0 + // 18: 6d0133ed stp d13, d12, [sp, #0x10] + // 1c: 6d022beb stp d11, d10, [sp, #0x20] + // 20: 6d0323e9 stp d9, d8, [sp, #0x30] + // 24: f90023fe str x30, [sp, #0x40] + // 28: a9056ffc stp x28, x27, [sp, #0x50] + // 2c: a90667fa stp x26, x25, [sp, #0x60] + // 30: a9075ff8 stp x24, x23, [sp, #0x70] + // 34: 9400000c bl 0x64 <_JIT_ENTRY+0x64> + // 38: a9494ff4 ldp x20, x19, [sp, #0x90] + // 3c: f94023fe ldr x30, [sp, #0x40] + // 40: a94857f6 ldp x22, x21, [sp, #0x80] + // 44: a9475ff8 ldp x24, x23, [sp, #0x70] + // 48: a94667fa ldp x26, x25, [sp, #0x60] + // 4c: a9456ffc ldp x28, x27, [sp, #0x50] + // 50: 6d4323e9 ldp d9, d8, [sp, #0x30] + // 54: 6d422beb ldp d11, d10, [sp, #0x20] + // 58: 6d4133ed ldp d13, d12, [sp, #0x10] + // 5c: 6cca3bef ldp d15, d14, [sp], #0xa0 + // 60: d65f03c0 ret + const unsigned char code_body[100] = { + 0xef, 0x3b, 0xb6, 0x6d, 0xf6, 0x57, 0x08, 0xa9, + 0xf5, 0x03, 0x01, 0xaa, 0xf6, 0x03, 0x02, 0xaa, + 0xf4, 0x4f, 0x09, 0xa9, 0xf4, 0x03, 0x00, 0xaa, + 0xed, 0x33, 0x01, 0x6d, 0xeb, 0x2b, 0x02, 0x6d, + 0xe9, 0x23, 0x03, 0x6d, 0xfe, 0x23, 0x00, 0xf9, + 0xfc, 0x6f, 0x05, 0xa9, 0xfa, 0x67, 0x06, 0xa9, + 0xf8, 0x5f, 0x07, 0xa9, 0x0c, 0x00, 0x00, 0x94, + 0xf4, 0x4f, 0x49, 0xa9, 0xfe, 0x23, 0x40, 0xf9, + 0xf6, 0x57, 0x48, 0xa9, 0xf8, 0x5f, 0x47, 0xa9, + 0xfa, 0x67, 0x46, 0xa9, 0xfc, 0x6f, 0x45, 0xa9, + 0xe9, 0x23, 0x43, 0x6d, 0xeb, 0x2b, 0x42, 0x6d, + 0xed, 0x33, 0x41, 0x6d, 0xef, 0x3b, 0xca, 0x6c, + 0xc0, 0x03, 0x5f, 0xd6, + }; + memcpy(code, code_body, sizeof(code_body)); +} + +void +emit_0( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ +} + +void +emit_1( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0000000000000000 <_JIT_ENTRY>: + // 0: f81f0ffe str x30, [sp, #-0x10]! + // 4: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 0000000000000004: IMAGE_REL_ARM64_PAGEBASE_REL21 __imp_sausage + // 8: f9400108 ldr x8, [x8] + // 0000000000000008: IMAGE_REL_ARM64_PAGEOFFSET_12L __imp_sausage + // c: 39400108 ldrb w8, [x8] + // 10: 36000088 tbz w8, #0x0, 0x20 <_JIT_ENTRY+0x20> + // 14: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 0000000000000014: IMAGE_REL_ARM64_PAGEBASE_REL21 __imp_order_eggs_sausage_and_bacon + // 18: f9400108 ldr x8, [x8] + // 0000000000000018: IMAGE_REL_ARM64_PAGEOFFSET_12L __imp_order_eggs_sausage_and_bacon + // 1c: 14000003 b 0x28 <_JIT_ENTRY+0x28> + // 20: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 0000000000000020: IMAGE_REL_ARM64_PAGEBASE_REL21 __imp_order_eggs_and_bacon + // 24: f9400108 ldr x8, [x8] + // 0000000000000024: IMAGE_REL_ARM64_PAGEOFFSET_12L __imp_order_eggs_and_bacon + // 28: d63f0100 blr x8 + // 2c: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 000000000000002c: IMAGE_REL_ARM64_PAGEBASE_REL21 __imp_spammed + // 30: f9400108 ldr x8, [x8] + // 0000000000000030: IMAGE_REL_ARM64_PAGEOFFSET_12L __imp_spammed + // 34: 3900011f strb wzr, [x8] + // 38: f84107fe ldr x30, [sp], #0x10 + const unsigned char code_body[60] = { + 0xfe, 0x0f, 0x1f, 0xf8, 0x08, 0x00, 0x00, 0x90, + 0x08, 0x01, 0x40, 0xf9, 0x08, 0x01, 0x40, 0x39, + 0x88, 0x00, 0x00, 0x36, 0x08, 0x00, 0x00, 0x90, + 0x08, 0x01, 0x40, 0xf9, 0x03, 0x00, 0x00, 0x14, + 0x08, 0x00, 0x00, 0x90, 0x08, 0x01, 0x40, 0xf9, + 0x00, 0x01, 0x3f, 0xd6, 0x08, 0x00, 0x00, 0x90, + 0x08, 0x01, 0x40, 0xf9, 0x1f, 0x01, 0x00, 0x39, + 0xfe, 0x07, 0x41, 0xf8, + }; + // 0: &sausage+0x0 + // 8: &order_eggs_sausage_and_bacon+0x0 + // 10: &order_eggs_and_bacon+0x0 + // 18: &spammed+0x0 + patch_64(data + 0x0, (uintptr_t)&sausage); + patch_64(data + 0x8, (uintptr_t)&order_eggs_sausage_and_bacon); + patch_64(data + 0x10, (uintptr_t)&order_eggs_and_bacon); + patch_64(data + 0x18, (uintptr_t)&spammed); + memcpy(code, code_body, sizeof(code_body)); + patch_aarch64_33rx(code + 0x4, (uintptr_t)data); + patch_aarch64_33rx(code + 0x14, (uintptr_t)data + 0x8); + patch_aarch64_33rx(code + 0x20, (uintptr_t)data + 0x10); + patch_aarch64_33rx(code + 0x2c, (uintptr_t)data + 0x18); +} + +void +emit_2( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0000000000000000 <_JIT_ENTRY>: + // 0: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 0000000000000000: IMAGE_REL_ARM64_PAGEBASE_REL21 __imp_spam + // 4: f9400108 ldr x8, [x8] + // 0000000000000004: IMAGE_REL_ARM64_PAGEOFFSET_12L __imp_spam + // 8: 39400108 ldrb w8, [x8] + // c: 7100051f cmp w8, #0x1 + // 10: 54000041 b.ne 0x18 <_JIT_ENTRY+0x18> + // 14: 14000000 b 0x14 <_JIT_ENTRY+0x14> + // 0000000000000014: IMAGE_REL_ARM64_BRANCH26 _JIT_ERROR_TARGET + const unsigned char code_body[24] = { + 0x08, 0x00, 0x00, 0x90, 0x08, 0x01, 0x40, 0xf9, + 0x08, 0x01, 0x40, 0x39, 0x1f, 0x05, 0x00, 0x71, + 0x41, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x14, + }; + // 0: &spam+0x0 + patch_64(data + 0x0, (uintptr_t)&spam); + memcpy(code, code_body, sizeof(code_body)); + patch_aarch64_33rx(code + 0x0, (uintptr_t)data); + patch_aarch64_26r(code + 0x14, state->instruction_starts[instruction->error_target]); +} + +static_assert(SYMBOL_MASK_WORDS >= 1, "SYMBOL_MASK_WORDS too small"); + +typedef struct { + void (*emit)( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state); + size_t code_size; + size_t data_size; + symbol_mask trampoline_mask; +} StencilGroup; + +static const StencilGroup shim = {emit_shim, 100, 0, {0}}; + +static const StencilGroup stencil_groups[MAX_UOP_ID + 1] = { + [0] = {emit_0, 0, 0, {0}}, + [1] = {emit_1, 60, 32, {0}}, + [2] = {emit_2, 24, 8, {0}}, +}; + +static const void * const symbols_map[1] = { + 0 +}; diff --git a/Tools/jit/test/test_jit_stencils-i686-pc-windows-msvc.h b/Tools/jit/test/test_jit_stencils-i686-pc-windows-msvc.h index e69de29bb2d1d6..2ea27265604e4c 100644 --- a/Tools/jit/test/test_jit_stencils-i686-pc-windows-msvc.h +++ b/Tools/jit/test/test_jit_stencils-i686-pc-windows-msvc.h @@ -0,0 +1,128 @@ +void +emit_shim( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ +} + +void +emit_0( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 00000000 <__JIT_ENTRY>: + // 0: 8b 44 24 0c movl 0xc(%esp), %eax + // 4: 8b 4c 24 08 movl 0x8(%esp), %ecx + // 8: 8b 54 24 04 movl 0x4(%esp), %edx + // c: 89 54 24 04 movl %edx, 0x4(%esp) + // 10: 89 4c 24 08 movl %ecx, 0x8(%esp) + // 14: 89 44 24 0c movl %eax, 0xc(%esp) + const unsigned char code_body[24] = { + 0x8b, 0x44, 0x24, 0x0c, 0x8b, 0x4c, 0x24, 0x08, + 0x8b, 0x54, 0x24, 0x04, 0x89, 0x54, 0x24, 0x04, + 0x89, 0x4c, 0x24, 0x08, 0x89, 0x44, 0x24, 0x0c, + }; + memcpy(code, code_body, sizeof(code_body)); +} + +void +emit_1( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 00000000 <__JIT_ENTRY>: + // 0: 53 pushl %ebx + // 1: 57 pushl %edi + // 2: 56 pushl %esi + // 3: 8b 74 24 18 movl 0x18(%esp), %esi + // 7: 8b 7c 24 14 movl 0x14(%esp), %edi + // b: 8b 5c 24 10 movl 0x10(%esp), %ebx + // f: 80 3d 00 00 00 00 00 cmpb $0x0, 0x0 + // 00000011: IMAGE_REL_I386_DIR32 _sausage + // 16: 74 07 je 0x1f <__JIT_ENTRY+0x1f> + // 18: e8 00 00 00 00 calll 0x1d <__JIT_ENTRY+0x1d> + // 00000019: IMAGE_REL_I386_REL32 _order_eggs_sausage_and_bacon + // 1d: eb 05 jmp 0x24 <__JIT_ENTRY+0x24> + // 1f: e8 00 00 00 00 calll 0x24 <__JIT_ENTRY+0x24> + // 00000020: IMAGE_REL_I386_REL32 _order_eggs_and_bacon + // 24: c6 05 00 00 00 00 00 movb $0x0, 0x0 + // 00000026: IMAGE_REL_I386_DIR32 _spammed + // 2b: 89 5c 24 10 movl %ebx, 0x10(%esp) + // 2f: 89 7c 24 14 movl %edi, 0x14(%esp) + // 33: 89 74 24 18 movl %esi, 0x18(%esp) + // 37: 5e popl %esi + // 38: 5f popl %edi + // 39: 5b popl %ebx + const unsigned char code_body[58] = { + 0x53, 0x57, 0x56, 0x8b, 0x74, 0x24, 0x18, 0x8b, + 0x7c, 0x24, 0x14, 0x8b, 0x5c, 0x24, 0x10, 0x80, + 0x3d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x74, 0x07, + 0xe8, 0x00, 0x00, 0x00, 0x00, 0xeb, 0x05, 0xe8, + 0x00, 0x00, 0x00, 0x00, 0xc6, 0x05, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x89, 0x5c, 0x24, 0x10, 0x89, + 0x7c, 0x24, 0x14, 0x89, 0x74, 0x24, 0x18, 0x5e, + 0x5f, 0x5b, + }; + memcpy(code, code_body, sizeof(code_body)); + patch_32(code + 0x11, (uintptr_t)&sausage); + patch_x86_64_32rx(code + 0x19, (uintptr_t)&order_eggs_sausage_and_bacon + -0x4); + patch_x86_64_32rx(code + 0x20, (uintptr_t)&order_eggs_and_bacon + -0x4); + patch_32(code + 0x26, (uintptr_t)&spammed); +} + +void +emit_2( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 00000000 <__JIT_ENTRY>: + // 0: 8b 54 24 0c movl 0xc(%esp), %edx + // 4: 8b 4c 24 08 movl 0x8(%esp), %ecx + // 8: 8b 44 24 04 movl 0x4(%esp), %eax + // c: 80 3d 00 00 00 00 01 cmpb $0x1, 0x0 + // 0000000e: IMAGE_REL_I386_DIR32 _spam + // 13: 75 11 jne 0x26 <__JIT_ENTRY+0x26> + // 15: 89 54 24 0c movl %edx, 0xc(%esp) + // 19: 89 4c 24 08 movl %ecx, 0x8(%esp) + // 1d: 89 44 24 04 movl %eax, 0x4(%esp) + // 21: e9 00 00 00 00 jmp 0x26 <__JIT_ENTRY+0x26> + // 00000022: IMAGE_REL_I386_REL32 __JIT_ERROR_TARGET + // 26: 89 54 24 0c movl %edx, 0xc(%esp) + // 2a: 89 4c 24 08 movl %ecx, 0x8(%esp) + // 2e: 89 44 24 04 movl %eax, 0x4(%esp) + const unsigned char code_body[50] = { + 0x8b, 0x54, 0x24, 0x0c, 0x8b, 0x4c, 0x24, 0x08, + 0x8b, 0x44, 0x24, 0x04, 0x80, 0x3d, 0x00, 0x00, + 0x00, 0x00, 0x01, 0x75, 0x11, 0x89, 0x54, 0x24, + 0x0c, 0x89, 0x4c, 0x24, 0x08, 0x89, 0x44, 0x24, + 0x04, 0xe9, 0x00, 0x00, 0x00, 0x00, 0x89, 0x54, + 0x24, 0x0c, 0x89, 0x4c, 0x24, 0x08, 0x89, 0x44, + 0x24, 0x04, + }; + memcpy(code, code_body, sizeof(code_body)); + patch_32(code + 0xe, (uintptr_t)&spam); + patch_x86_64_32rx(code + 0x22, state->instruction_starts[instruction->error_target] + -0x4); +} + +static_assert(SYMBOL_MASK_WORDS >= 1, "SYMBOL_MASK_WORDS too small"); + +typedef struct { + void (*emit)( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state); + size_t code_size; + size_t data_size; + symbol_mask trampoline_mask; +} StencilGroup; + +static const StencilGroup shim = {emit_shim, 0, 0, {0}}; + +static const StencilGroup stencil_groups[MAX_UOP_ID + 1] = { + [0] = {emit_0, 24, 0, {0}}, + [1] = {emit_1, 58, 0, {0}}, + [2] = {emit_2, 50, 0, {0}}, +}; + +static const void * const symbols_map[1] = { + 0 +}; diff --git a/Tools/jit/test/test_jit_stencils-x86_64-pc-windows-msvc.h b/Tools/jit/test/test_jit_stencils-x86_64-pc-windows-msvc.h index e69de29bb2d1d6..8a4f58d9e397ac 100644 --- a/Tools/jit/test/test_jit_stencils-x86_64-pc-windows-msvc.h +++ b/Tools/jit/test/test_jit_stencils-x86_64-pc-windows-msvc.h @@ -0,0 +1,169 @@ +void +emit_shim( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0000000000000000 <_JIT_ENTRY>: + // 0: 41 57 pushq %r15 + // 2: 41 56 pushq %r14 + // 4: 41 55 pushq %r13 + // 6: 41 54 pushq %r12 + // 8: 56 pushq %rsi + // 9: 57 pushq %rdi + // a: 53 pushq %rbx + // b: 48 81 ec a0 00 00 00 subq $0xa0, %rsp + // 12: 44 0f 29 bc 24 90 00 00 00 movaps %xmm15, 0x90(%rsp) + // 1b: 44 0f 29 b4 24 80 00 00 00 movaps %xmm14, 0x80(%rsp) + // 24: 44 0f 29 6c 24 70 movaps %xmm13, 0x70(%rsp) + // 2a: 44 0f 29 64 24 60 movaps %xmm12, 0x60(%rsp) + // 30: 44 0f 29 5c 24 50 movaps %xmm11, 0x50(%rsp) + // 36: 44 0f 29 54 24 40 movaps %xmm10, 0x40(%rsp) + // 3c: 44 0f 29 4c 24 30 movaps %xmm9, 0x30(%rsp) + // 42: 44 0f 29 44 24 20 movaps %xmm8, 0x20(%rsp) + // 48: 0f 29 7c 24 10 movaps %xmm7, 0x10(%rsp) + // 4d: 0f 29 34 24 movaps %xmm6, (%rsp) + // 51: 49 89 cc movq %rcx, %r12 + // 54: 49 89 d5 movq %rdx, %r13 + // 57: 4d 89 c6 movq %r8, %r14 + // 5a: e8 52 00 00 00 callq 0xb1 <_JIT_ENTRY+0xb1> + // 5f: 0f 28 34 24 movaps (%rsp), %xmm6 + // 63: 0f 28 7c 24 10 movaps 0x10(%rsp), %xmm7 + // 68: 44 0f 28 44 24 20 movaps 0x20(%rsp), %xmm8 + // 6e: 44 0f 28 4c 24 30 movaps 0x30(%rsp), %xmm9 + // 74: 44 0f 28 54 24 40 movaps 0x40(%rsp), %xmm10 + // 7a: 44 0f 28 5c 24 50 movaps 0x50(%rsp), %xmm11 + // 80: 44 0f 28 64 24 60 movaps 0x60(%rsp), %xmm12 + // 86: 44 0f 28 6c 24 70 movaps 0x70(%rsp), %xmm13 + // 8c: 44 0f 28 b4 24 80 00 00 00 movaps 0x80(%rsp), %xmm14 + // 95: 44 0f 28 bc 24 90 00 00 00 movaps 0x90(%rsp), %xmm15 + // 9e: 48 81 c4 a0 00 00 00 addq $0xa0, %rsp + // a5: 5b popq %rbx + // a6: 5f popq %rdi + // a7: 5e popq %rsi + // a8: 41 5c popq %r12 + // aa: 41 5d popq %r13 + // ac: 41 5e popq %r14 + // ae: 41 5f popq %r15 + // b0: c3 retq + const unsigned char code_body[177] = { + 0x41, 0x57, 0x41, 0x56, 0x41, 0x55, 0x41, 0x54, + 0x56, 0x57, 0x53, 0x48, 0x81, 0xec, 0xa0, 0x00, + 0x00, 0x00, 0x44, 0x0f, 0x29, 0xbc, 0x24, 0x90, + 0x00, 0x00, 0x00, 0x44, 0x0f, 0x29, 0xb4, 0x24, + 0x80, 0x00, 0x00, 0x00, 0x44, 0x0f, 0x29, 0x6c, + 0x24, 0x70, 0x44, 0x0f, 0x29, 0x64, 0x24, 0x60, + 0x44, 0x0f, 0x29, 0x5c, 0x24, 0x50, 0x44, 0x0f, + 0x29, 0x54, 0x24, 0x40, 0x44, 0x0f, 0x29, 0x4c, + 0x24, 0x30, 0x44, 0x0f, 0x29, 0x44, 0x24, 0x20, + 0x0f, 0x29, 0x7c, 0x24, 0x10, 0x0f, 0x29, 0x34, + 0x24, 0x49, 0x89, 0xcc, 0x49, 0x89, 0xd5, 0x4d, + 0x89, 0xc6, 0xe8, 0x52, 0x00, 0x00, 0x00, 0x0f, + 0x28, 0x34, 0x24, 0x0f, 0x28, 0x7c, 0x24, 0x10, + 0x44, 0x0f, 0x28, 0x44, 0x24, 0x20, 0x44, 0x0f, + 0x28, 0x4c, 0x24, 0x30, 0x44, 0x0f, 0x28, 0x54, + 0x24, 0x40, 0x44, 0x0f, 0x28, 0x5c, 0x24, 0x50, + 0x44, 0x0f, 0x28, 0x64, 0x24, 0x60, 0x44, 0x0f, + 0x28, 0x6c, 0x24, 0x70, 0x44, 0x0f, 0x28, 0xb4, + 0x24, 0x80, 0x00, 0x00, 0x00, 0x44, 0x0f, 0x28, + 0xbc, 0x24, 0x90, 0x00, 0x00, 0x00, 0x48, 0x81, + 0xc4, 0xa0, 0x00, 0x00, 0x00, 0x5b, 0x5f, 0x5e, + 0x41, 0x5c, 0x41, 0x5d, 0x41, 0x5e, 0x41, 0x5f, + 0xc3, + }; + memcpy(code, code_body, sizeof(code_body)); +} + +void +emit_0( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ +} + +void +emit_1( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0000000000000000 <_JIT_ENTRY>: + // 0: 48 83 ec 28 subq $0x28, %rsp + // 4: 48 8b 05 00 00 00 00 movq (%rip), %rax # 0xb <_JIT_ENTRY+0xb> + // 0000000000000007: IMAGE_REL_AMD64_REL32 __imp_sausage + // b: 80 38 00 cmpb $0x0, (%rax) + // e: 74 08 je 0x18 <_JIT_ENTRY+0x18> + // 10: ff 15 00 00 00 00 callq *(%rip) # 0x16 <_JIT_ENTRY+0x16> + // 0000000000000012: IMAGE_REL_AMD64_REL32 __imp_order_eggs_sausage_and_bacon + // 16: eb 06 jmp 0x1e <_JIT_ENTRY+0x1e> + // 18: ff 15 00 00 00 00 callq *(%rip) # 0x1e <_JIT_ENTRY+0x1e> + // 000000000000001a: IMAGE_REL_AMD64_REL32 __imp_order_eggs_and_bacon + // 1e: 48 8b 05 00 00 00 00 movq (%rip), %rax # 0x25 <_JIT_ENTRY+0x25> + // 0000000000000021: IMAGE_REL_AMD64_REL32 __imp_spammed + // 25: c6 00 00 movb $0x0, (%rax) + // 28: 48 83 c4 28 addq $0x28, %rsp + const unsigned char code_body[44] = { + 0x48, 0x83, 0xec, 0x28, 0x48, 0x8b, 0x05, 0x00, + 0x00, 0x00, 0x00, 0x80, 0x38, 0x00, 0x74, 0x08, + 0xff, 0x15, 0x00, 0x00, 0x00, 0x00, 0xeb, 0x06, + 0xff, 0x15, 0x00, 0x00, 0x00, 0x00, 0x48, 0x8b, + 0x05, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x00, + 0x48, 0x83, 0xc4, 0x28, + }; + // 0: &sausage+0x0 + // 8: &order_eggs_sausage_and_bacon+0x0 + // 10: &order_eggs_and_bacon+0x0 + // 18: &spammed+0x0 + patch_64(data + 0x0, (uintptr_t)&sausage); + patch_64(data + 0x8, (uintptr_t)&order_eggs_sausage_and_bacon); + patch_64(data + 0x10, (uintptr_t)&order_eggs_and_bacon); + patch_64(data + 0x18, (uintptr_t)&spammed); + memcpy(code, code_body, sizeof(code_body)); + patch_x86_64_32rx(code + 0x7, (uintptr_t)data + -0x4); + patch_x86_64_32rx(code + 0x12, (uintptr_t)data + 0x4); + patch_x86_64_32rx(code + 0x1a, (uintptr_t)data + 0xc); + patch_x86_64_32rx(code + 0x21, (uintptr_t)data + 0x14); +} + +void +emit_2( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0000000000000000 <_JIT_ENTRY>: + // 0: 48 8b 05 00 00 00 00 movq (%rip), %rax # 0x7 <_JIT_ENTRY+0x7> + // 0000000000000003: IMAGE_REL_AMD64_REL32 __imp_spam + // 7: 80 38 01 cmpb $0x1, (%rax) + // a: 0f 84 00 00 00 00 je 0x10 <_JIT_ENTRY+0x10> + // 000000000000000c: IMAGE_REL_AMD64_REL32 _JIT_ERROR_TARGET + const unsigned char code_body[16] = { + 0x48, 0x8b, 0x05, 0x00, 0x00, 0x00, 0x00, 0x80, + 0x38, 0x01, 0x0f, 0x84, + }; + // 0: &spam+0x0 + patch_64(data + 0x0, (uintptr_t)&spam); + memcpy(code, code_body, sizeof(code_body)); + patch_x86_64_32rx(code + 0x3, (uintptr_t)data + -0x4); + patch_x86_64_32rx(code + 0xc, state->instruction_starts[instruction->error_target] + -0x4); +} + +static_assert(SYMBOL_MASK_WORDS >= 1, "SYMBOL_MASK_WORDS too small"); + +typedef struct { + void (*emit)( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state); + size_t code_size; + size_t data_size; + symbol_mask trampoline_mask; +} StencilGroup; + +static const StencilGroup shim = {emit_shim, 177, 0, {0}}; + +static const StencilGroup stencil_groups[MAX_UOP_ID + 1] = { + [0] = {emit_0, 0, 0, {0}}, + [1] = {emit_1, 44, 32, {0}}, + [2] = {emit_2, 16, 8, {0}}, +}; + +static const void * const symbols_map[1] = { + 0 +}; From f4c05b3d8346b701e82aeba634d35078fa492efd Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Wed, 16 Jul 2025 13:56:03 -0700 Subject: [PATCH 04/12] Add expected output for macOS --- Lib/test/test_jit_stencils.py | 4 + Tools/jit/_targets.py | 11 +- .../test_jit_stencils-aarch64-apple-darwin.h | 149 ++++++++++++++++++ ...est_jit_stencils-aarch64-pc-windows-msvc.h | 3 - ...t_jit_stencils-aarch64-unknown-linux-gnu.h | 3 - .../test_jit_stencils-i686-pc-windows-msvc.h | 3 - .../test_jit_stencils-x86_64-apple-darwin.h | 142 +++++++++++++++++ ...test_jit_stencils-x86_64-pc-windows-msvc.h | 3 - ...st_jit_stencils-x86_64-unknown-linux-gnu.h | 3 - 9 files changed, 300 insertions(+), 21 deletions(-) diff --git a/Lib/test/test_jit_stencils.py b/Lib/test/test_jit_stencils.py index 94beecc9eed8d3..72dee974f8c6c6 100644 --- a/Lib/test/test_jit_stencils.py +++ b/Lib/test/test_jit_stencils.py @@ -65,3 +65,7 @@ def test_jit_stencils(self): self._check_jit_stencils(expected, actual, test_jit_stencils_h) # This is a local build. If the JIT is available, at least one test should run: assert found or not sys._jit.is_available(), "No JIT stencils built!" + + +if __name__ == "__main__": + unittest.main() diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 60850377d5486e..2666d31665c37b 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -86,13 +86,12 @@ async def _parse(self, path: pathlib.Path) -> _stencils.StencilGroup: lines = output.splitlines() started = False for line in lines: - if not started: - if "_JIT_ENTRY" not in line: - continue + if line.lstrip().startswith("0:"): started = True - cleaned = line.replace(long, short).expandtabs().strip() - if cleaned: - group.code.disassembly.append(cleaned) + if started: + cleaned = line.replace(long, short).expandtabs().strip() + if cleaned: + group.code.disassembly.append(cleaned) args = [ "--elf-output-style=JSON", "--expand-relocs", diff --git a/Tools/jit/test/test_jit_stencils-aarch64-apple-darwin.h b/Tools/jit/test/test_jit_stencils-aarch64-apple-darwin.h index e69de29bb2d1d6..c26310b9ab4cc4 100644 --- a/Tools/jit/test/test_jit_stencils-aarch64-apple-darwin.h +++ b/Tools/jit/test/test_jit_stencils-aarch64-apple-darwin.h @@ -0,0 +1,149 @@ +void +emit_shim( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: 6db63bef stp d15, d14, [sp, #-0xa0]! + // 4: 6d0133ed stp d13, d12, [sp, #0x10] + // 8: 6d022beb stp d11, d10, [sp, #0x20] + // c: 6d0323e9 stp d9, d8, [sp, #0x30] + // 10: a9046ffc stp x28, x27, [sp, #0x40] + // 14: a90567fa stp x26, x25, [sp, #0x50] + // 18: a9065ff8 stp x24, x23, [sp, #0x60] + // 1c: a90757f6 stp x22, x21, [sp, #0x70] + // 20: a9084ff4 stp x20, x19, [sp, #0x80] + // 24: a9097bfd stp x29, x30, [sp, #0x90] + // 28: 910243fd add x29, sp, #0x90 + // 2c: aa0003f4 mov x20, x0 + // 30: aa0103f5 mov x21, x1 + // 34: aa0203f6 mov x22, x2 + // 38: 9400000c bl 0x68 + // 3c: a9497bfd ldp x29, x30, [sp, #0x90] + // 40: a9484ff4 ldp x20, x19, [sp, #0x80] + // 44: a94757f6 ldp x22, x21, [sp, #0x70] + // 48: a9465ff8 ldp x24, x23, [sp, #0x60] + // 4c: a94567fa ldp x26, x25, [sp, #0x50] + // 50: a9446ffc ldp x28, x27, [sp, #0x40] + // 54: 6d4323e9 ldp d9, d8, [sp, #0x30] + // 58: 6d422beb ldp d11, d10, [sp, #0x20] + // 5c: 6d4133ed ldp d13, d12, [sp, #0x10] + // 60: 6cca3bef ldp d15, d14, [sp], #0xa0 + // 64: d65f03c0 ret + const unsigned char code_body[104] = { + 0xef, 0x3b, 0xb6, 0x6d, 0xed, 0x33, 0x01, 0x6d, + 0xeb, 0x2b, 0x02, 0x6d, 0xe9, 0x23, 0x03, 0x6d, + 0xfc, 0x6f, 0x04, 0xa9, 0xfa, 0x67, 0x05, 0xa9, + 0xf8, 0x5f, 0x06, 0xa9, 0xf6, 0x57, 0x07, 0xa9, + 0xf4, 0x4f, 0x08, 0xa9, 0xfd, 0x7b, 0x09, 0xa9, + 0xfd, 0x43, 0x02, 0x91, 0xf4, 0x03, 0x00, 0xaa, + 0xf5, 0x03, 0x01, 0xaa, 0xf6, 0x03, 0x02, 0xaa, + 0x0c, 0x00, 0x00, 0x94, 0xfd, 0x7b, 0x49, 0xa9, + 0xf4, 0x4f, 0x48, 0xa9, 0xf6, 0x57, 0x47, 0xa9, + 0xf8, 0x5f, 0x46, 0xa9, 0xfa, 0x67, 0x45, 0xa9, + 0xfc, 0x6f, 0x44, 0xa9, 0xe9, 0x23, 0x43, 0x6d, + 0xeb, 0x2b, 0x42, 0x6d, 0xed, 0x33, 0x41, 0x6d, + 0xef, 0x3b, 0xca, 0x6c, 0xc0, 0x03, 0x5f, 0xd6, + }; + memcpy(code, code_body, sizeof(code_body)); +} + +void +emit_0( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ +} + +void +emit_1( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: a9bf7bfd stp x29, x30, [sp, #-0x10]! + // 4: 910003fd mov x29, sp + // 8: 90000008 adrp x8, 0x0 + // 0000000000000008: ARM64_RELOC_GOT_LOAD_PAGE21 _sausage + // c: f9400108 ldr x8, [x8] + // 000000000000000c: ARM64_RELOC_GOT_LOAD_PAGEOFF12 _sausage + // 10: 39400108 ldrb w8, [x8] + // 14: 36000068 tbz w8, #0x0, 0x20 + // 18: 94000000 bl 0x18 + // 0000000000000018: ARM64_RELOC_BRANCH26 _order_eggs_sausage_and_bacon + // 1c: 14000002 b 0x24 + // 20: 94000000 bl 0x20 + // 0000000000000020: ARM64_RELOC_BRANCH26 _order_eggs_and_bacon + // 24: 90000008 adrp x8, 0x0 + // 0000000000000024: ARM64_RELOC_GOT_LOAD_PAGE21 _spammed + // 28: f9400108 ldr x8, [x8] + // 0000000000000028: ARM64_RELOC_GOT_LOAD_PAGEOFF12 _spammed + // 2c: 3900011f strb wzr, [x8] + // 30: a8c17bfd ldp x29, x30, [sp], #0x10 + const unsigned char code_body[52] = { + 0xfd, 0x7b, 0xbf, 0xa9, 0xfd, 0x03, 0x00, 0x91, + 0x08, 0x00, 0x00, 0x90, 0x08, 0x01, 0x40, 0xf9, + 0x08, 0x01, 0x40, 0x39, 0x68, 0x00, 0x00, 0x36, + 0x00, 0x00, 0x00, 0x94, 0x02, 0x00, 0x00, 0x14, + 0x00, 0x00, 0x00, 0x94, 0x08, 0x00, 0x00, 0x90, + 0x08, 0x01, 0x40, 0xf9, 0x1f, 0x01, 0x00, 0x39, + 0xfd, 0x7b, 0xc1, 0xa8, + }; + // 0: &spammed+0x0 + // 8: &sausage+0x0 + patch_64(data + 0x0, (uintptr_t)&spammed); + patch_64(data + 0x8, (uintptr_t)&sausage); + memcpy(code, code_body, sizeof(code_body)); + patch_aarch64_33rx(code + 0x8, (uintptr_t)data + 0x8); + patch_aarch64_trampoline(code + 0x18, 0x1, state); + patch_aarch64_trampoline(code + 0x20, 0x0, state); + patch_aarch64_33rx(code + 0x24, (uintptr_t)data); +} + +void +emit_2( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: 90000008 adrp x8, 0x0 + // 0000000000000000: ARM64_RELOC_GOT_LOAD_PAGE21 _spam + // 4: f9400108 ldr x8, [x8] + // 0000000000000004: ARM64_RELOC_GOT_LOAD_PAGEOFF12 _spam + // 8: 39400108 ldrb w8, [x8] + // c: 7100051f cmp w8, #0x1 + // 10: 54000041 b.ne 0x18 + // 14: 14000000 b 0x14 + // 0000000000000014: ARM64_RELOC_BRANCH26 __JIT_ERROR_TARGET + const unsigned char code_body[24] = { + 0x08, 0x00, 0x00, 0x90, 0x08, 0x01, 0x40, 0xf9, + 0x08, 0x01, 0x40, 0x39, 0x1f, 0x05, 0x00, 0x71, + 0x41, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x14, + }; + // 0: &spam+0x0 + patch_64(data + 0x0, (uintptr_t)&spam); + memcpy(code, code_body, sizeof(code_body)); + patch_aarch64_33rx(code + 0x0, (uintptr_t)data); + patch_aarch64_26r(code + 0x14, state->instruction_starts[instruction->error_target]); +} + +static_assert(SYMBOL_MASK_WORDS >= 1, "SYMBOL_MASK_WORDS too small"); + +typedef struct { + void (*emit)( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state); + size_t code_size; + size_t data_size; + symbol_mask trampoline_mask; +} StencilGroup; + +static const StencilGroup shim = {emit_shim, 104, 0, {0}}; + +static const StencilGroup stencil_groups[MAX_UOP_ID + 1] = { + [0] = {emit_0, 0, 0, {0}}, + [1] = {emit_1, 52, 16, {0x03}}, + [2] = {emit_2, 24, 8, {0}}, +}; + +static const void * const symbols_map[2] = { + [0] = &order_eggs_and_bacon, + [1] = &order_eggs_sausage_and_bacon, +}; diff --git a/Tools/jit/test/test_jit_stencils-aarch64-pc-windows-msvc.h b/Tools/jit/test/test_jit_stencils-aarch64-pc-windows-msvc.h index a9e71cc52d7136..b8a3afbaee133e 100644 --- a/Tools/jit/test/test_jit_stencils-aarch64-pc-windows-msvc.h +++ b/Tools/jit/test/test_jit_stencils-aarch64-pc-windows-msvc.h @@ -3,7 +3,6 @@ emit_shim( unsigned char *code, unsigned char *data, _PyExecutorObject *executor, const _PyUOpInstruction *instruction, jit_state *state) { - // 0000000000000000 <_JIT_ENTRY>: // 0: 6db63bef stp d15, d14, [sp, #-0xa0]! // 4: a90857f6 stp x22, x21, [sp, #0x80] // 8: aa0103f5 mov x21, x1 @@ -59,7 +58,6 @@ emit_1( unsigned char *code, unsigned char *data, _PyExecutorObject *executor, const _PyUOpInstruction *instruction, jit_state *state) { - // 0000000000000000 <_JIT_ENTRY>: // 0: f81f0ffe str x30, [sp, #-0x10]! // 4: 90000008 adrp x8, 0x0 <_JIT_ENTRY> // 0000000000000004: IMAGE_REL_ARM64_PAGEBASE_REL21 __imp_sausage @@ -113,7 +111,6 @@ emit_2( unsigned char *code, unsigned char *data, _PyExecutorObject *executor, const _PyUOpInstruction *instruction, jit_state *state) { - // 0000000000000000 <_JIT_ENTRY>: // 0: 90000008 adrp x8, 0x0 <_JIT_ENTRY> // 0000000000000000: IMAGE_REL_ARM64_PAGEBASE_REL21 __imp_spam // 4: f9400108 ldr x8, [x8] diff --git a/Tools/jit/test/test_jit_stencils-aarch64-unknown-linux-gnu.h b/Tools/jit/test/test_jit_stencils-aarch64-unknown-linux-gnu.h index 42b36c0b8b7d6a..5e2ed0db8cc2d5 100644 --- a/Tools/jit/test/test_jit_stencils-aarch64-unknown-linux-gnu.h +++ b/Tools/jit/test/test_jit_stencils-aarch64-unknown-linux-gnu.h @@ -3,7 +3,6 @@ emit_shim( unsigned char *code, unsigned char *data, _PyExecutorObject *executor, const _PyUOpInstruction *instruction, jit_state *state) { - // 0000000000000000 <_JIT_ENTRY>: // 0: 6db63bef stp d15, d14, [sp, #-0xa0]! // 4: a90857f6 stp x22, x21, [sp, #0x80] // 8: aa0103f5 mov x21, x1 @@ -60,7 +59,6 @@ emit_1( unsigned char *code, unsigned char *data, _PyExecutorObject *executor, const _PyUOpInstruction *instruction, jit_state *state) { - // 0000000000000000 <_JIT_ENTRY>: // 0: a9bf7bfd stp x29, x30, [sp, #-0x10]! // 4: 90000008 adrp x8, 0x0 <_JIT_ENTRY> // 0000000000000004: R_AARCH64_ADR_GOT_PAGE sausage @@ -116,7 +114,6 @@ emit_2( unsigned char *code, unsigned char *data, _PyExecutorObject *executor, const _PyUOpInstruction *instruction, jit_state *state) { - // 0000000000000000 <_JIT_ENTRY>: // 0: 90000008 adrp x8, 0x0 <_JIT_ENTRY> // 0000000000000000: R_AARCH64_ADR_GOT_PAGE spam // 4: f9400108 ldr x8, [x8] diff --git a/Tools/jit/test/test_jit_stencils-i686-pc-windows-msvc.h b/Tools/jit/test/test_jit_stencils-i686-pc-windows-msvc.h index 2ea27265604e4c..2f8e7d768cd78f 100644 --- a/Tools/jit/test/test_jit_stencils-i686-pc-windows-msvc.h +++ b/Tools/jit/test/test_jit_stencils-i686-pc-windows-msvc.h @@ -10,7 +10,6 @@ emit_0( unsigned char *code, unsigned char *data, _PyExecutorObject *executor, const _PyUOpInstruction *instruction, jit_state *state) { - // 00000000 <__JIT_ENTRY>: // 0: 8b 44 24 0c movl 0xc(%esp), %eax // 4: 8b 4c 24 08 movl 0x8(%esp), %ecx // 8: 8b 54 24 04 movl 0x4(%esp), %edx @@ -30,7 +29,6 @@ emit_1( unsigned char *code, unsigned char *data, _PyExecutorObject *executor, const _PyUOpInstruction *instruction, jit_state *state) { - // 00000000 <__JIT_ENTRY>: // 0: 53 pushl %ebx // 1: 57 pushl %edi // 2: 56 pushl %esi @@ -75,7 +73,6 @@ emit_2( unsigned char *code, unsigned char *data, _PyExecutorObject *executor, const _PyUOpInstruction *instruction, jit_state *state) { - // 00000000 <__JIT_ENTRY>: // 0: 8b 54 24 0c movl 0xc(%esp), %edx // 4: 8b 4c 24 08 movl 0x8(%esp), %ecx // 8: 8b 44 24 04 movl 0x4(%esp), %eax diff --git a/Tools/jit/test/test_jit_stencils-x86_64-apple-darwin.h b/Tools/jit/test/test_jit_stencils-x86_64-apple-darwin.h index e69de29bb2d1d6..4d6ee50c4c1f41 100644 --- a/Tools/jit/test/test_jit_stencils-x86_64-apple-darwin.h +++ b/Tools/jit/test/test_jit_stencils-x86_64-apple-darwin.h @@ -0,0 +1,142 @@ +void +emit_shim( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: 55 pushq %rbp + // 1: 48 89 e5 movq %rsp, %rbp + // 4: 41 57 pushq %r15 + // 6: 41 56 pushq %r14 + // 8: 41 55 pushq %r13 + // a: 41 54 pushq %r12 + // c: 53 pushq %rbx + // d: 50 pushq %rax + // e: 49 89 fc movq %rdi, %r12 + // 11: 49 89 f5 movq %rsi, %r13 + // 14: 49 89 d6 movq %rdx, %r14 + // 17: e8 0f 00 00 00 callq 0x2b <__JIT_ENTRY+0x2b> + // 1c: 48 83 c4 08 addq $0x8, %rsp + // 20: 5b popq %rbx + // 21: 41 5c popq %r12 + // 23: 41 5d popq %r13 + // 25: 41 5e popq %r14 + // 27: 41 5f popq %r15 + // 29: 5d popq %rbp + // 2a: c3 retq + const unsigned char code_body[43] = { + 0x55, 0x48, 0x89, 0xe5, 0x41, 0x57, 0x41, 0x56, + 0x41, 0x55, 0x41, 0x54, 0x53, 0x50, 0x49, 0x89, + 0xfc, 0x49, 0x89, 0xf5, 0x49, 0x89, 0xd6, 0xe8, + 0x0f, 0x00, 0x00, 0x00, 0x48, 0x83, 0xc4, 0x08, + 0x5b, 0x41, 0x5c, 0x41, 0x5d, 0x41, 0x5e, 0x41, + 0x5f, 0x5d, 0xc3, + }; + memcpy(code, code_body, sizeof(code_body)); +} + +void +emit_0( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: 55 pushq %rbp + // 1: 48 89 e5 movq %rsp, %rbp + // 4: 5d popq %rbp + const unsigned char code_body[5] = { + 0x55, 0x48, 0x89, 0xe5, 0x5d, + }; + memcpy(code, code_body, sizeof(code_body)); +} + +void +emit_1( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: 55 pushq %rbp + // 1: 48 89 e5 movq %rsp, %rbp + // 4: 48 8b 05 00 00 00 00 movq (%rip), %rax ## 0xb <__JIT_ENTRY+0xb> + // 0000000000000007: X86_64_RELOC_GOT_LOAD _sausage@GOTPCREL + // b: 80 38 00 cmpb $0x0, (%rax) + // e: 74 08 je 0x18 <__JIT_ENTRY+0x18> + // 10: ff 15 00 00 00 00 callq *(%rip) ## 0x16 <__JIT_ENTRY+0x16> + // 0000000000000012: X86_64_RELOC_GOT _order_eggs_sausage_and_bacon@GOTPCREL + // 16: eb 06 jmp 0x1e <__JIT_ENTRY+0x1e> + // 18: ff 15 00 00 00 00 callq *(%rip) ## 0x1e <__JIT_ENTRY+0x1e> + // 000000000000001a: X86_64_RELOC_GOT _order_eggs_and_bacon@GOTPCREL + // 1e: 48 8b 05 00 00 00 00 movq (%rip), %rax ## 0x25 <__JIT_ENTRY+0x25> + // 0000000000000021: X86_64_RELOC_GOT_LOAD _spammed@GOTPCREL + // 25: c6 00 00 movb $0x0, (%rax) + // 28: 5d popq %rbp + const unsigned char code_body[41] = { + 0x55, 0x48, 0x89, 0xe5, 0x48, 0x8b, 0x05, 0x00, + 0x00, 0x00, 0x00, 0x80, 0x38, 0x00, 0x74, 0x08, + 0xff, 0x15, 0x00, 0x00, 0x00, 0x00, 0xeb, 0x06, + 0xff, 0x15, 0x00, 0x00, 0x00, 0x00, 0x48, 0x8b, + 0x05, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x00, + 0x5d, + }; + // 0: &spammed+0x0 + // 8: &order_eggs_and_bacon+0x0 + // 10: &order_eggs_sausage_and_bacon+0x0 + // 18: &sausage+0x0 + patch_64(data + 0x0, (uintptr_t)&spammed); + patch_64(data + 0x8, (uintptr_t)&order_eggs_and_bacon); + patch_64(data + 0x10, (uintptr_t)&order_eggs_sausage_and_bacon); + patch_64(data + 0x18, (uintptr_t)&sausage); + memcpy(code, code_body, sizeof(code_body)); + patch_x86_64_32rx(code + 0x7, (uintptr_t)data + 0x14); + patch_x86_64_32rx(code + 0x12, (uintptr_t)data + 0xc); + patch_x86_64_32rx(code + 0x1a, (uintptr_t)data + 0x4); + patch_x86_64_32rx(code + 0x21, (uintptr_t)data + -0x4); +} + +void +emit_2( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: 55 pushq %rbp + // 1: 48 89 e5 movq %rsp, %rbp + // 4: 48 8b 05 00 00 00 00 movq (%rip), %rax ## 0xb <__JIT_ENTRY+0xb> + // 0000000000000007: X86_64_RELOC_GOT_LOAD _spam@GOTPCREL + // b: 80 38 01 cmpb $0x1, (%rax) + // e: 75 06 jne 0x16 <__JIT_ENTRY+0x16> + // 10: 5d popq %rbp + // 11: e9 00 00 00 00 jmp 0x16 <__JIT_ENTRY+0x16> + // 0000000000000012: X86_64_RELOC_BRANCH __JIT_ERROR_TARGET + // 16: 5d popq %rbp + const unsigned char code_body[23] = { + 0x55, 0x48, 0x89, 0xe5, 0x48, 0x8b, 0x05, 0x00, + 0x00, 0x00, 0x00, 0x80, 0x38, 0x01, 0x75, 0x06, + 0x5d, 0xe9, 0x00, 0x00, 0x00, 0x00, 0x5d, + }; + // 0: &spam+0x0 + patch_64(data + 0x0, (uintptr_t)&spam); + memcpy(code, code_body, sizeof(code_body)); + patch_x86_64_32rx(code + 0x7, (uintptr_t)data + -0x4); + patch_32r(code + 0x12, state->instruction_starts[instruction->error_target] + -0x4); +} + +static_assert(SYMBOL_MASK_WORDS >= 1, "SYMBOL_MASK_WORDS too small"); + +typedef struct { + void (*emit)( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state); + size_t code_size; + size_t data_size; + symbol_mask trampoline_mask; +} StencilGroup; + +static const StencilGroup shim = {emit_shim, 43, 0, {0}}; + +static const StencilGroup stencil_groups[MAX_UOP_ID + 1] = { + [0] = {emit_0, 5, 0, {0}}, + [1] = {emit_1, 41, 32, {0}}, + [2] = {emit_2, 23, 8, {0}}, +}; + +static const void * const symbols_map[1] = { + 0 +}; diff --git a/Tools/jit/test/test_jit_stencils-x86_64-pc-windows-msvc.h b/Tools/jit/test/test_jit_stencils-x86_64-pc-windows-msvc.h index 8a4f58d9e397ac..356055584e4d61 100644 --- a/Tools/jit/test/test_jit_stencils-x86_64-pc-windows-msvc.h +++ b/Tools/jit/test/test_jit_stencils-x86_64-pc-windows-msvc.h @@ -3,7 +3,6 @@ emit_shim( unsigned char *code, unsigned char *data, _PyExecutorObject *executor, const _PyUOpInstruction *instruction, jit_state *state) { - // 0000000000000000 <_JIT_ENTRY>: // 0: 41 57 pushq %r15 // 2: 41 56 pushq %r14 // 4: 41 55 pushq %r13 @@ -85,7 +84,6 @@ emit_1( unsigned char *code, unsigned char *data, _PyExecutorObject *executor, const _PyUOpInstruction *instruction, jit_state *state) { - // 0000000000000000 <_JIT_ENTRY>: // 0: 48 83 ec 28 subq $0x28, %rsp // 4: 48 8b 05 00 00 00 00 movq (%rip), %rax # 0xb <_JIT_ENTRY+0xb> // 0000000000000007: IMAGE_REL_AMD64_REL32 __imp_sausage @@ -128,7 +126,6 @@ emit_2( unsigned char *code, unsigned char *data, _PyExecutorObject *executor, const _PyUOpInstruction *instruction, jit_state *state) { - // 0000000000000000 <_JIT_ENTRY>: // 0: 48 8b 05 00 00 00 00 movq (%rip), %rax # 0x7 <_JIT_ENTRY+0x7> // 0000000000000003: IMAGE_REL_AMD64_REL32 __imp_spam // 7: 80 38 01 cmpb $0x1, (%rax) diff --git a/Tools/jit/test/test_jit_stencils-x86_64-unknown-linux-gnu.h b/Tools/jit/test/test_jit_stencils-x86_64-unknown-linux-gnu.h index e4de3a1dfb6b8f..075ecac8be499b 100644 --- a/Tools/jit/test/test_jit_stencils-x86_64-unknown-linux-gnu.h +++ b/Tools/jit/test/test_jit_stencils-x86_64-unknown-linux-gnu.h @@ -3,7 +3,6 @@ emit_shim( unsigned char *code, unsigned char *data, _PyExecutorObject *executor, const _PyUOpInstruction *instruction, jit_state *state) { - // 0000000000000000 <_JIT_ENTRY>: // 0: 41 57 pushq %r15 // 2: 41 56 pushq %r14 // 4: 41 55 pushq %r13 @@ -41,7 +40,6 @@ emit_1( unsigned char *code, unsigned char *data, _PyExecutorObject *executor, const _PyUOpInstruction *instruction, jit_state *state) { - // 0000000000000000 <_JIT_ENTRY>: // 0: 50 pushq %rax // 1: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax // 0000000000000003: R_X86_64_64 sausage @@ -80,7 +78,6 @@ emit_2( unsigned char *code, unsigned char *data, _PyExecutorObject *executor, const _PyUOpInstruction *instruction, jit_state *state) { - // 0000000000000000 <_JIT_ENTRY>: // 0: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax // 0000000000000002: R_X86_64_64 spam // a: 80 38 01 cmpb $0x1, (%rax) From 00cd7e389ac9be4cbdd12328970f0a35f587b262 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Wed, 16 Jul 2025 15:38:05 -0700 Subject: [PATCH 05/12] Is it BSS? --- Tools/jit/_targets.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 2666d31665c37b..1e4415ec1f4692 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -270,6 +270,8 @@ def _handle_section( else: # Zeroed BSS data, seen with printf debugging calls: section_data_bytes = [0] * section["RawDataSize"] + # XXX + assert section["RawDataSize"] == 0, section["RawDataSize"] if "IMAGE_SCN_MEM_EXECUTE" in flags: value = _stencils.HoleValue.CODE stencil = group.code From 202fb6f9e102f782d3c960424b5d7dc53ac755bd Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Wed, 16 Jul 2025 18:41:22 -0700 Subject: [PATCH 06/12] It *is* BSS! --- Tools/jit/_targets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 1e4415ec1f4692..96e4de5934ec1c 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -271,7 +271,7 @@ def _handle_section( # Zeroed BSS data, seen with printf debugging calls: section_data_bytes = [0] * section["RawDataSize"] # XXX - assert section["RawDataSize"] == 0, section["RawDataSize"] + assert section["RawDataSize"] == 0, (group.symbols, section["Symbols"]) if "IMAGE_SCN_MEM_EXECUTE" in flags: value = _stencils.HoleValue.CODE stencil = group.code From 4e5554cf3c504ff41ef28e35335dd4e63cf40e0d Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 17 Jul 2025 12:25:56 -0700 Subject: [PATCH 07/12] Optimistically strip writable data --- Tools/jit/_stencils.py | 23 +++++++++++++++++------ Tools/jit/_targets.py | 17 ++++++++++++----- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py index 1d82f5366f6ce0..840bf312383aa7 100644 --- a/Tools/jit/_stencils.py +++ b/Tools/jit/_stencils.py @@ -19,12 +19,16 @@ class HoleValue(enum.Enum): CODE = enum.auto() # The base address of the read-only data for this uop: DATA = enum.auto() + # The base address of the machine code for the error jump target (exposed as _JIT_ERROR_TARGET): + ERROR_TARGET = enum.auto() # The address of the current executor (exposed as _JIT_EXECUTOR): EXECUTOR = enum.auto() # The base address of the "global" offset table located in the read-only data. # Shouldn't be present in the final stencils, since these are all replaced with # equivalent DATA values: GOT = enum.auto() + # The base address of the machine code for the jump target (exposed as _JIT_JUMP_TARGET): + JUMP_TARGET = enum.auto() # The current uop's oparg (exposed as _JIT_OPARG): OPARG = enum.auto() # The current uop's operand0 on 64-bit platforms (exposed as _JIT_OPERAND0): @@ -39,10 +43,9 @@ class HoleValue(enum.Enum): OPERAND1_LO = enum.auto() # The current uop's target (exposed as _JIT_TARGET): TARGET = enum.auto() - # The base address of the machine code for the jump target (exposed as _JIT_JUMP_TARGET): - JUMP_TARGET = enum.auto() - # The base address of the machine code for the error jump target (exposed as _JIT_ERROR_TARGET): - ERROR_TARGET = enum.auto() + # Writable data, which we don't support! Optimistically remove their data + # from the stencil, and raise later if they're actually used: + WRITABLE = enum.auto() # A hardcoded value of zero (used for symbol lookups): ZERO = enum.auto() @@ -96,9 +99,11 @@ class HoleValue(enum.Enum): _HOLE_EXPRS = { HoleValue.CODE: "(uintptr_t)code", HoleValue.DATA: "(uintptr_t)data", + HoleValue.ERROR_TARGET: "state->instruction_starts[instruction->error_target]", HoleValue.EXECUTOR: "(uintptr_t)executor", # These should all have been turned into DATA values by process_relocations: # HoleValue.GOT: "", + HoleValue.JUMP_TARGET: "state->instruction_starts[instruction->jump_target]", HoleValue.OPARG: "instruction->oparg", HoleValue.OPERAND0: "instruction->operand0", HoleValue.OPERAND0_HI: "(instruction->operand0 >> 32)", @@ -107,8 +112,8 @@ class HoleValue(enum.Enum): HoleValue.OPERAND1_HI: "(instruction->operand1 >> 32)", HoleValue.OPERAND1_LO: "(instruction->operand1 & UINT32_MAX)", HoleValue.TARGET: "instruction->target", - HoleValue.JUMP_TARGET: "state->instruction_starts[instruction->jump_target]", - HoleValue.ERROR_TARGET: "state->instruction_starts[instruction->error_target]", + # These should all have raised an error if they were actually used: + # HoleValue.WRITABLE: "", HoleValue.ZERO: "", } @@ -246,6 +251,12 @@ def process_relocations(self, known_symbols: dict[str, int]) -> None: self.data.pad(8) for stencil in [self.code, self.data]: for hole in stencil.holes: + if hole.symbol in self.symbols: + value, _ = self.symbols[hole.symbol] + if value is HoleValue.WRITABLE: + raise ValueError( + f"Writable data ({hole.symbol}) is not supported!" + ) if hole.value is HoleValue.GOT: assert hole.symbol is not None hole.value = HoleValue.DATA diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 96e4de5934ec1c..d598419823f8ac 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -268,10 +268,8 @@ def _handle_section( if "SectionData" in section: section_data_bytes = section["SectionData"]["Bytes"] else: - # Zeroed BSS data, seen with printf debugging calls: + # Zeroed BSS data: section_data_bytes = [0] * section["RawDataSize"] - # XXX - assert section["RawDataSize"] == 0, (group.symbols, section["Symbols"]) if "IMAGE_SCN_MEM_EXECUTE" in flags: value = _stencils.HoleValue.CODE stencil = group.code @@ -280,6 +278,10 @@ def _handle_section( stencil = group.data else: return + if "IMAGE_SCN_MEM_WRITE" in flags: + assert value is _stencils.HoleValue.DATA + value = _stencils.HoleValue.WRITABLE + section_data_bytes = [] base = len(stencil.body) group.symbols[section["Number"]] = value, base stencil.body.extend(section_data_bytes) @@ -382,7 +384,7 @@ def _handle_section( if value is _stencils.HoleValue.CODE: stencil = group.code else: - assert value is _stencils.HoleValue.DATA + assert value in (_stencils.HoleValue.DATA, _stencils.HoleValue.WRITABLE) stencil = group.data for wrapped_relocation in section["Relocations"]: relocation = wrapped_relocation["Relocation"] @@ -397,6 +399,11 @@ def _handle_section( else: value = _stencils.HoleValue.DATA stencil = group.data + section_data_bytes = section["SectionData"]["Bytes"] + if "SHF_WRITE" in flags: + assert value is _stencils.HoleValue.DATA + value = _stencils.HoleValue.WRITABLE + section_data_bytes = [] group.symbols[section["Index"]] = value, len(stencil.body) for wrapped_symbol in section["Symbols"]: symbol = wrapped_symbol["Symbol"] @@ -404,7 +411,7 @@ def _handle_section( name = symbol["Name"]["Name"] name = name.removeprefix(self.symbol_prefix) group.symbols[name] = value, offset - stencil.body.extend(section["SectionData"]["Bytes"]) + stencil.body.extend(section_data_bytes) assert not section["Relocations"] else: assert section_type in { From 9db056350efd7cad9ab24f083690acf06362bf2c Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 17 Jul 2025 12:37:21 -0700 Subject: [PATCH 08/12] Ditto for ELF --- Tools/jit/_schema.py | 1 + Tools/jit/_targets.py | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/Tools/jit/_schema.py b/Tools/jit/_schema.py index 228fc389584dd7..674c09027f11b1 100644 --- a/Tools/jit/_schema.py +++ b/Tools/jit/_schema.py @@ -102,6 +102,7 @@ class ELFSection(typing.TypedDict): Info: int Relocations: list[dict[typing.Literal["Relocation"], ELFRelocation]] SectionData: dict[typing.Literal["Bytes"], list[int]] + Size: int Symbols: list[dict[typing.Literal["Symbol"], _ELFSymbol]] Type: dict[typing.Literal["Name"], str] diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index d598419823f8ac..9ac37c2dfbd7fe 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -390,7 +390,7 @@ def _handle_section( relocation = wrapped_relocation["Relocation"] hole = self._handle_relocation(base, relocation, stencil.body) stencil.holes.append(hole) - elif section_type == "SHT_PROGBITS": + elif section_type in {"SHT_PROGBITS", "SHT_NOBITS"}: if "SHF_ALLOC" not in flags: return if "SHF_EXECINSTR" in flags: @@ -399,7 +399,11 @@ def _handle_section( else: value = _stencils.HoleValue.DATA stencil = group.data - section_data_bytes = section["SectionData"]["Bytes"] + if section_type == "SHT_PROGBITS": + section_data_bytes = section["SectionData"]["Bytes"] + else: + # Zeroed BSS data: + section_data_bytes = [0] * section["Size"] if "SHF_WRITE" in flags: assert value is _stencils.HoleValue.DATA value = _stencils.HoleValue.WRITABLE From c4c3cccfa1df8672df442fff16d8f865c2a25a57 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 17 Jul 2025 15:28:24 -0700 Subject: [PATCH 09/12] Ditto for Mach-O, and *way* simplify the parsing! --- Tools/jit/_schema.py | 4 +++- Tools/jit/_targets.py | 27 ++++++++++++++++----------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/Tools/jit/_schema.py b/Tools/jit/_schema.py index 674c09027f11b1..8f96305dbfa979 100644 --- a/Tools/jit/_schema.py +++ b/Tools/jit/_schema.py @@ -101,7 +101,7 @@ class ELFSection(typing.TypedDict): Index: int Info: int Relocations: list[dict[typing.Literal["Relocation"], ELFRelocation]] - SectionData: dict[typing.Literal["Bytes"], list[int]] + SectionData: typing.NotRequired[dict[typing.Literal["Bytes"], list[int]]] Size: int Symbols: list[dict[typing.Literal["Symbol"], _ELFSymbol]] Type: dict[typing.Literal["Name"], str] @@ -118,4 +118,6 @@ class MachOSection(typing.TypedDict): list[dict[typing.Literal["Relocation"], MachORelocation]] ] SectionData: typing.NotRequired[dict[typing.Literal["Bytes"], list[int]]] + Segment: dict[typing.Literal["Value"], str] + Size: int Symbols: typing.NotRequired[list[dict[typing.Literal["Symbol"], _MachOSymbol]]] diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 9ac37c2dfbd7fe..bc57c623b0acd8 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -400,6 +400,7 @@ def _handle_section( value = _stencils.HoleValue.DATA stencil = group.data if section_type == "SHT_PROGBITS": + assert "SectionData" in section section_data_bytes = section["SectionData"]["Bytes"] else: # Zeroed BSS data: @@ -474,7 +475,11 @@ def _handle_section( self, section: _schema.MachOSection, group: _stencils.StencilGroup ) -> None: assert section["Address"] >= len(group.code.body) - assert "SectionData" in section + if "SectionData" in section: + section_data_bytes = section["SectionData"]["Bytes"] + else: + # Zeroed BSS data: + section_data_bytes = [0] * section["Size"] flags = {flag["Name"] for flag in section["Attributes"]["Flags"]} name = section["Name"]["Value"] name = name.removeprefix(self.symbol_prefix) @@ -483,23 +488,23 @@ def _handle_section( if "PureInstructions" in flags: value = _stencils.HoleValue.CODE stencil = group.code - start_address = 0 - group.symbols[name] = value, section["Address"] - start_address else: value = _stencils.HoleValue.DATA stencil = group.data - start_address = len(group.code.body) - group.symbols[name] = value, len(group.code.body) - base = section["Address"] - start_address + segment = section["Segment"]["Value"] + if segment == "__DATA": + value = _stencils.HoleValue.WRITABLE + section_data_bytes = [] + else: + assert segment == "__TEXT", segment + base = len(stencil.body) + group.symbols[name] = value, base group.symbols[section["Index"]] = value, base - stencil.body.extend( - [0] * (section["Address"] - len(group.code.body) - len(group.data.body)) - ) - stencil.body.extend(section["SectionData"]["Bytes"]) + stencil.body.extend(section_data_bytes) assert "Symbols" in section for wrapped_symbol in section["Symbols"]: symbol = wrapped_symbol["Symbol"] - offset = symbol["Value"] - start_address + offset = symbol["Value"] - section["Address"] + base name = symbol["Name"]["Name"] name = name.removeprefix(self.symbol_prefix) group.symbols[name] = value, offset From c6fc7bd8a2d5c1982ad5bb290ec1356e9e90ded8 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 17 Jul 2025 16:26:21 -0700 Subject: [PATCH 10/12] Rework tests --- Lib/test/test_jit_stencils.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_jit_stencils.py b/Lib/test/test_jit_stencils.py index 72dee974f8c6c6..2a7a19a057d90f 100644 --- a/Lib/test/test_jit_stencils.py +++ b/Lib/test/test_jit_stencils.py @@ -1,9 +1,10 @@ +import asyncio import pathlib import shlex -import sys import sysconfig import tempfile import test.support +import test.test_tools import test.support.script_helper import unittest @@ -13,11 +14,13 @@ _TOOLS_JIT_TEST_TEST_EXECUTOR_CASES_C_H = _TOOLS_JIT_TEST / "test_executor_cases.c.h" _TOOLS_JIT_BUILD_PY = _TOOLS_JIT / "build.py" +test.test_tools.skip_if_missing("jit") +with test.test_tools.imports_under_tool("jit"): + import _llvm @test.support.cpython_only @unittest.skipIf(test.support.Py_DEBUG, "Debug stencils aren't tested.") @unittest.skipIf(test.support.Py_GIL_DISABLED, "Free-threaded stencils aren't tested.") -@unittest.skipUnless(sysconfig.is_python_build(), "Requires a local Python build.") class TestJITStencils(unittest.TestCase): def _build_jit_stencils(self, target: str) -> str: @@ -31,6 +34,9 @@ def _build_jit_stencils(self, target: str) -> str: "--pyconfig-dir", pyconfig_h.parent, target, __isolated=False, + # Windows leaks temporary files on failure because the JIT build + # process is async. This forces it to be "sync" for this test: + PYTHON_CPU_COUNT="1", ) if result.rc: self.skipTest(f"Build failed: {shlex.join(map(str, args))}") @@ -54,6 +60,8 @@ def _check_jit_stencils( raise def test_jit_stencils(self): + if not asyncio.run(_llvm._find_tool("clang")): + self.skipTest(f"LLVM {_llvm._LLVM_VERSION} isn't installed.") self.maxDiff = None found = False for test_jit_stencils_h in _TOOLS_JIT_TEST.glob("test_jit_stencils-*.h"): @@ -64,7 +72,7 @@ def test_jit_stencils(self): found = True self._check_jit_stencils(expected, actual, test_jit_stencils_h) # This is a local build. If the JIT is available, at least one test should run: - assert found or not sys._jit.is_available(), "No JIT stencils built!" + assert found, "No JIT stencils built!" if __name__ == "__main__": From 867a686038cdf7ffe85f7d0e39cf64e689c8a3d2 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Fri, 18 Jul 2025 12:06:33 -0700 Subject: [PATCH 11/12] Cleanup for PR --- Lib/test/test_jit_stencils.py | 3 +-- Tools/jit/_targets.py | 40 +++++++++++++++++------------------ 2 files changed, 20 insertions(+), 23 deletions(-) diff --git a/Lib/test/test_jit_stencils.py b/Lib/test/test_jit_stencils.py index 2a7a19a057d90f..5a8c4ecf28ca98 100644 --- a/Lib/test/test_jit_stencils.py +++ b/Lib/test/test_jit_stencils.py @@ -71,8 +71,7 @@ def test_jit_stencils(self): actual = self._build_jit_stencils(target) found = True self._check_jit_stencils(expected, actual, test_jit_stencils_h) - # This is a local build. If the JIT is available, at least one test should run: - assert found, "No JIT stencils built!" + self.assertTrue(found, "No JIT stencils built!") if __name__ == "__main__": diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index bc57c623b0acd8..29c5cad9e7eedc 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -393,30 +393,30 @@ def _handle_section( elif section_type in {"SHT_PROGBITS", "SHT_NOBITS"}: if "SHF_ALLOC" not in flags: return + if "SectionData" in section: + section_data_bytes = section["SectionData"]["Bytes"] + else: + # Zeroed BSS data: + section_data_bytes = [0] * section["Size"] if "SHF_EXECINSTR" in flags: value = _stencils.HoleValue.CODE stencil = group.code else: value = _stencils.HoleValue.DATA stencil = group.data - if section_type == "SHT_PROGBITS": - assert "SectionData" in section - section_data_bytes = section["SectionData"]["Bytes"] - else: - # Zeroed BSS data: - section_data_bytes = [0] * section["Size"] if "SHF_WRITE" in flags: assert value is _stencils.HoleValue.DATA value = _stencils.HoleValue.WRITABLE section_data_bytes = [] - group.symbols[section["Index"]] = value, len(stencil.body) + base = len(stencil.body) + group.symbols[section["Index"]] = value, base + stencil.body.extend(section_data_bytes) for wrapped_symbol in section["Symbols"]: symbol = wrapped_symbol["Symbol"] - offset = len(stencil.body) + symbol["Value"] + offset = base + symbol["Value"] name = symbol["Name"]["Name"] name = name.removeprefix(self.symbol_prefix) group.symbols[name] = value, offset - stencil.body.extend(section_data_bytes) assert not section["Relocations"] else: assert section_type in { @@ -474,15 +474,12 @@ class _MachO( def _handle_section( self, section: _schema.MachOSection, group: _stencils.StencilGroup ) -> None: - assert section["Address"] >= len(group.code.body) if "SectionData" in section: section_data_bytes = section["SectionData"]["Bytes"] else: # Zeroed BSS data: section_data_bytes = [0] * section["Size"] flags = {flag["Name"] for flag in section["Attributes"]["Flags"]} - name = section["Name"]["Value"] - name = name.removeprefix(self.symbol_prefix) if "Debug" in flags: return if "PureInstructions" in flags: @@ -492,19 +489,20 @@ def _handle_section( value = _stencils.HoleValue.DATA stencil = group.data segment = section["Segment"]["Value"] + assert segment in {"__DATA", "__TEXT"}, segment if segment == "__DATA": value = _stencils.HoleValue.WRITABLE section_data_bytes = [] - else: - assert segment == "__TEXT", segment base = len(stencil.body) - group.symbols[name] = value, base group.symbols[section["Index"]] = value, base stencil.body.extend(section_data_bytes) + name = section["Name"]["Value"] + name = name.removeprefix(self.symbol_prefix) + group.symbols[name] = value, base assert "Symbols" in section for wrapped_symbol in section["Symbols"]: symbol = wrapped_symbol["Symbol"] - offset = symbol["Value"] - section["Address"] + base + offset = base + symbol["Value"] - section["Address"] name = symbol["Name"]["Name"] name = name.removeprefix(self.symbol_prefix) group.symbols[name] = value, offset @@ -589,23 +587,23 @@ def get_target(host: str) -> _COFF32 | _COFF64 | _ELF | _MachO: target = _MachO(host, condition, optimizer=optimizer) elif re.fullmatch(r"aarch64-pc-windows-msvc", host): host = "aarch64-pc-windows-msvc" - args = ["-fms-runtime-lib=dll", "-fplt"] condition = "defined(_M_ARM64)" + args = ["-fms-runtime-lib=dll", "-fplt"] optimizer = _optimizers.OptimizerAArch64 target = _COFF64(host, condition, args=args, optimizer=optimizer) elif re.fullmatch(r"aarch64-.*-linux-gnu", host): host = "aarch64-unknown-linux-gnu" + condition = "defined(__aarch64__) && defined(__linux__)" # -mno-outline-atomics: Keep intrinsics from being emitted. args = ["-fpic", "-mno-outline-atomics"] - condition = "defined(__aarch64__) && defined(__linux__)" optimizer = _optimizers.OptimizerAArch64 target = _ELF(host, condition, args=args, optimizer=optimizer) elif re.fullmatch(r"i686-pc-windows-msvc", host): host = "i686-pc-windows-msvc" + condition = "defined(_M_IX86)" # -Wno-ignored-attributes: __attribute__((preserve_none)) is not supported here. args = ["-DPy_NO_ENABLE_SHARED", "-Wno-ignored-attributes"] optimizer = _optimizers.OptimizerX86 - condition = "defined(_M_IX86)" target = _COFF32(host, condition, args=args, optimizer=optimizer) elif re.fullmatch(r"x86_64-apple-darwin.*", host): host = "x86_64-apple-darwin" @@ -614,14 +612,14 @@ def get_target(host: str) -> _COFF32 | _COFF64 | _ELF | _MachO: target = _MachO(host, condition, optimizer=optimizer) elif re.fullmatch(r"x86_64-pc-windows-msvc", host): host = "x86_64-pc-windows-msvc" - args = ["-fms-runtime-lib=dll"] condition = "defined(_M_X64)" + args = ["-fms-runtime-lib=dll"] optimizer = _optimizers.OptimizerX86 target = _COFF64(host, condition, args=args, optimizer=optimizer) elif re.fullmatch(r"x86_64-.*-linux-gnu", host): host = "x86_64-unknown-linux-gnu" - args = ["-fno-pic", "-mcmodel=medium", "-mlarge-data-threshold=0"] condition = "defined(__x86_64__) && defined(__linux__)" + args = ["-fno-pic", "-mcmodel=medium", "-mlarge-data-threshold=0"] optimizer = _optimizers.OptimizerX86 target = _ELF(host, condition, args=args, optimizer=optimizer) else: From 7c9c3ff450334e856ff6092ef0d2b1ab34960777 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Mon, 21 Jul 2025 12:22:42 -0700 Subject: [PATCH 12/12] Check for all of the tools --- Lib/test/test_jit_stencils.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_jit_stencils.py b/Lib/test/test_jit_stencils.py index 5a8c4ecf28ca98..db836272353bb6 100644 --- a/Lib/test/test_jit_stencils.py +++ b/Lib/test/test_jit_stencils.py @@ -14,9 +14,14 @@ _TOOLS_JIT_TEST_TEST_EXECUTOR_CASES_C_H = _TOOLS_JIT_TEST / "test_executor_cases.c.h" _TOOLS_JIT_BUILD_PY = _TOOLS_JIT / "build.py" +# Skip this test if either the JIT build scripts or the needed LLVM utilities +# are missing: test.test_tools.skip_if_missing("jit") with test.test_tools.imports_under_tool("jit"): import _llvm +for tool in ["clang", "llvm-objdump", "llvm-readobj"]: + if not asyncio.run(_llvm._find_tool(tool)): + raise unittest.SkipTest(f"{tool} {_llvm._LLVM_VERSION} isn't installed.") @test.support.cpython_only @unittest.skipIf(test.support.Py_DEBUG, "Debug stencils aren't tested.") @@ -60,8 +65,6 @@ def _check_jit_stencils( raise def test_jit_stencils(self): - if not asyncio.run(_llvm._find_tool("clang")): - self.skipTest(f"LLVM {_llvm._LLVM_VERSION} isn't installed.") self.maxDiff = None found = False for test_jit_stencils_h in _TOOLS_JIT_TEST.glob("test_jit_stencils-*.h"): pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy