From bdf7dd8a4f3d07b6864373a9a6d6b4132da8ccfb Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Sat, 30 Mar 2024 19:44:09 -0700 Subject: [PATCH 1/9] Use GHC calling convention on i686 and x86_64 --- Include/cpython/optimizer.h | 1 + Lib/test/test_perf_profiler.py | 2 +- Python/jit.c | 47 +++++++++++++++++++++++----------- Python/optimizer.c | 2 ++ Tools/jit/_targets.py | 40 ++++++++++++++++++++++------- Tools/jit/_writer.py | 4 +++ Tools/jit/template.c | 4 +-- Tools/jit/trampoline.c | 17 ++++++++++++ 8 files changed, 90 insertions(+), 27 deletions(-) create mode 100644 Tools/jit/trampoline.c diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index 819251a25bb242..fa10a0ee6974ed 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -101,6 +101,7 @@ typedef struct _PyExecutorObject { uint32_t code_size; size_t jit_size; void *jit_code; + void *jit_code_ghccc; _PyExitData exits[1]; } _PyExecutorObject; diff --git a/Lib/test/test_perf_profiler.py b/Lib/test/test_perf_profiler.py index 040be63da11447..e7c03b99086013 100644 --- a/Lib/test/test_perf_profiler.py +++ b/Lib/test/test_perf_profiler.py @@ -216,7 +216,7 @@ def is_unwinding_reliable(): cflags = sysconfig.get_config_var("PY_CORE_CFLAGS") if not cflags: return False - return "no-omit-frame-pointer" in cflags + return "no-omit-frame-pointer" in cflags and "_Py_JIT" not in cflags def perf_command_works(): diff --git a/Python/jit.c b/Python/jit.c index 8782adb847cfd6..c754054c3bc80b 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -382,8 +382,8 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction *trace, size { // Loop once to find the total compiled size: size_t instruction_starts[UOP_MAX_TRACE_LENGTH]; - size_t code_size = 0; - size_t data_size = 0; + size_t code_size = trampoline.code.body_size; + size_t data_size = trampoline.data.body_size; for (size_t i = 0; i < length; i++) { _PyUOpInstruction *instruction = (_PyUOpInstruction *)&trace[i]; const StencilGroup *group = &stencil_groups[instruction->opcode]; @@ -405,11 +405,24 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction *trace, size // Loop again to emit the code: unsigned char *code = memory; unsigned char *data = memory + code_size; + { + const StencilGroup *group = &trampoline; + // Think of patches as a dictionary mapping HoleValue to uint64_t: + uint64_t patches[] = GET_PATCHES(); + patches[HoleValue_CODE] = (uintptr_t)code; + patches[HoleValue_CONTINUE] = (uintptr_t)code + group->code.body_size; + patches[HoleValue_DATA] = (uintptr_t)data; + patches[HoleValue_EXECUTOR] = (uintptr_t)executor; + patches[HoleValue_TOP] = (uintptr_t)memory + trampoline.code.body_size; + patches[HoleValue_ZERO] = 0; + emit(group, patches); + code += group->code.body_size; + data += group->data.body_size; + } assert(trace[0].opcode == _START_EXECUTOR || trace[0].opcode == _COLD_EXIT); for (size_t i = 0; i < length; i++) { _PyUOpInstruction *instruction = (_PyUOpInstruction *)&trace[i]; const StencilGroup *group = &stencil_groups[instruction->opcode]; - // Think of patches as a dictionary mapping HoleValue to uintptr_t: uintptr_t patches[] = GET_PATCHES(); patches[HoleValue_CODE] = (uintptr_t)code; patches[HoleValue_CONTINUE] = (uintptr_t)code + group->code.body_size; @@ -451,18 +464,20 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction *trace, size code += group->code.body_size; data += group->data.body_size; } - // Protect against accidental buffer overrun into data: - const StencilGroup *group = &stencil_groups[_FATAL_ERROR]; - uintptr_t patches[] = GET_PATCHES(); - patches[HoleValue_CODE] = (uintptr_t)code; - patches[HoleValue_CONTINUE] = (uintptr_t)code; - patches[HoleValue_DATA] = (uintptr_t)data; - patches[HoleValue_EXECUTOR] = (uintptr_t)executor; - patches[HoleValue_TOP] = (uintptr_t)code; - patches[HoleValue_ZERO] = 0; - emit(group, patches); - code += group->code.body_size; - data += group->data.body_size; + { + // Protect against accidental buffer overrun into data: + const StencilGroup *group = &stencil_groups[_FATAL_ERROR]; + uintptr_t patches[] = GET_PATCHES(); + patches[HoleValue_CODE] = (uintptr_t)code; + patches[HoleValue_CONTINUE] = (uintptr_t)code; + patches[HoleValue_DATA] = (uintptr_t)data; + patches[HoleValue_EXECUTOR] = (uintptr_t)executor; + patches[HoleValue_TOP] = (uintptr_t)code; + patches[HoleValue_ZERO] = 0; + emit(group, patches); + code += group->code.body_size; + data += group->data.body_size; + } assert(code == memory + code_size); assert(data == memory + code_size + data_size); if (mark_executable(memory, total_size)) { @@ -470,6 +485,7 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction *trace, size return -1; } executor->jit_code = memory; + executor->jit_code_ghccc = memory + trampoline.code.body_size; executor->jit_size = total_size; return 0; } @@ -481,6 +497,7 @@ _PyJIT_Free(_PyExecutorObject *executor) size_t size = executor->jit_size; if (memory) { executor->jit_code = NULL; + executor->jit_code_ghccc = NULL; executor->jit_size = 0; if (jit_free(memory, size)) { PyErr_WriteUnraisable(NULL); diff --git a/Python/optimizer.c b/Python/optimizer.c index 5863336c0d9ecf..c9d597dc9f678a 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1138,6 +1138,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil #endif #ifdef _Py_JIT executor->jit_code = NULL; + executor->jit_code_ghccc = NULL; executor->jit_size = 0; if (_PyJIT_Compile(executor, executor->trace, length)) { Py_DECREF(executor); @@ -1168,6 +1169,7 @@ init_cold_exit_executor(_PyExecutorObject *executor, int oparg) #endif #ifdef _Py_JIT executor->jit_code = NULL; + executor->jit_code_ghccc = NULL; executor->jit_size = 0; if (_PyJIT_Compile(executor, executor->trace, 1)) { return -1; diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 66db358679239e..faad42ca78838d 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -38,6 +38,7 @@ class _Target(typing.Generic[_S, _R]): _: dataclasses.KW_ONLY alignment: int = 1 args: typing.Sequence[str] = () + ghccc: bool = False prefix: str = "" debug: bool = False force: bool = False @@ -85,7 +86,8 @@ async def _parse(self, path: pathlib.Path) -> _stencils.StencilGroup: sections: list[dict[typing.Literal["Section"], _S]] = json.loads(output) for wrapped_section in sections: self._handle_section(wrapped_section["Section"], group) - assert group.symbols["_JIT_ENTRY"] == (_stencils.HoleValue.CODE, 0) + entry_symbol = "_JIT_ENTRY" if "_JIT_ENTRY" in group.symbols else "_ENTRY" + assert group.symbols[entry_symbol] == (_stencils.HoleValue.CODE, 0) if group.data.body: line = f"0: {str(bytes(group.data.body)).removeprefix('b')}" group.data.disassembly.append(line) @@ -103,6 +105,8 @@ def _handle_relocation( async def _compile( self, opname: str, c: pathlib.Path, tempdir: pathlib.Path ) -> _stencils.StencilGroup: + if opname == "trampoline" and not self.ghccc: + return _stencils.StencilGroup() o = tempdir / f"{opname}.o" args = [ f"--target={self.triple}", @@ -130,13 +134,28 @@ async def _compile( "-fno-plt", # Don't call stack-smashing canaries that we can't find or patch: "-fno-stack-protector", - "-o", - f"{o}", "-std=c11", - f"{c}", *self.args, ] - await _llvm.run("clang", args, echo=self.verbose) + if self.ghccc: + ll = tempdir / f"{opname}.ll" + args_ll = args + ["-S", "-emit-llvm", "-o", f"{ll}", f"{c}"] + await _llvm.run("clang", args_ll, echo=self.verbose) + ir = ll.read_text() + # Hack! Use __attribute__((preserve_none)) once we have Clang 19: + ir = re.sub(r"((noalias |nonnull )*ptr @_JIT_\w+\()", r"ghccc \1", ir) + for line in ir.splitlines(): + if re.match(r"ptr @_JIT_\w+\(", line): + assert re.match(r"ghccc (\w+ )*ptr @_JIT_\w+\(", line) + ir = re.sub(r"musttail call ([^g])", r"musttail call ghccc \1", ir) + for line in ir.splitlines(): + if re.match(r"musttail", line): + assert re.match(r"musttail call ghccc", line) + ll.write_text(ir) + args_o = args + ["-Wno-unused-command-line-argument", "-o", f"{o}", f"{ll}"] + else: + args_o = args + ["-o", f"{o}", f"{c}"] + await _llvm.run("clang", args_o, echo=self.verbose) return await self._parse(o) async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]: @@ -146,6 +165,8 @@ async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]: with tempfile.TemporaryDirectory() as tempdir: work = pathlib.Path(tempdir).resolve() async with asyncio.TaskGroup() as group: + coro = self._compile("trampoline", TOOLS_JIT / "trampoline.c", work) + tasks.append(group.create_task(coro, name="trampoline")) for opname in opnames: coro = self._compile(opname, TOOLS_JIT_TEMPLATE_C, work) tasks.append(group.create_task(coro, name=opname)) @@ -456,12 +477,13 @@ def get_target(host: str) -> _COFF | _ELF | _MachO: return _ELF(host, alignment=8, args=args) if re.fullmatch(r"i686-pc-windows-msvc", host): args = ["-DPy_NO_ENABLE_SHARED"] - return _COFF(host, args=args, prefix="_") + return _COFF(host, args=args, ghccc=True, prefix="_") if re.fullmatch(r"x86_64-apple-darwin.*", host): - return _MachO(host, prefix="_") + args = ["-fomit-frame-pointer"] + return _MachO(host, args=args, ghccc=True, prefix="_") if re.fullmatch(r"x86_64-pc-windows-msvc", host): args = ["-fms-runtime-lib=dll"] - return _COFF(host, args=args) + return _COFF(host, ghccc=True, args=args) if re.fullmatch(r"x86_64-.*-linux-gnu", host): - return _ELF(host) + return _ELF(host, ghccc=True) raise ValueError(host) diff --git a/Tools/jit/_writer.py b/Tools/jit/_writer.py index cbc1ed2fa6543a..6b36d8a9c66a3f 100644 --- a/Tools/jit/_writer.py +++ b/Tools/jit/_writer.py @@ -53,9 +53,13 @@ def _dump_footer(opnames: typing.Iterable[str]) -> typing.Iterator[str]: yield "" yield "static const StencilGroup stencil_groups[512] = {" for opname in opnames: + if opname == "trampoline": + continue yield f" [{opname}] = INIT_STENCIL_GROUP({opname})," yield "};" yield "" + yield "static const StencilGroup trampoline = INIT_STENCIL_GROUP(trampoline);" + yield "" yield "#define GET_PATCHES() { \\" for value in _stencils.HoleValue: yield f" [HoleValue_{value.name}] = (uintptr_t)0xBADBADBADBADBADB, \\" diff --git a/Tools/jit/template.c b/Tools/jit/template.c index 228dc83254d678..af42737961c133 100644 --- a/Tools/jit/template.c +++ b/Tools/jit/template.c @@ -48,7 +48,7 @@ do { \ OPT_STAT_INC(traces_executed); \ __attribute__((musttail)) \ - return ((jit_func)((EXECUTOR)->jit_code))(frame, stack_pointer, tstate); \ + return ((jit_func)((EXECUTOR)->jit_code_ghccc))(frame, stack_pointer, tstate); \ } while (0) #undef GOTO_TIER_ONE @@ -65,7 +65,7 @@ do { \ #define PATCH_VALUE(TYPE, NAME, ALIAS) \ PyAPI_DATA(void) ALIAS; \ - TYPE NAME = (TYPE)(uint64_t)&ALIAS; + TYPE NAME = (TYPE)(uintptr_t)&ALIAS; #define PATCH_JUMP(ALIAS) \ do { \ diff --git a/Tools/jit/trampoline.c b/Tools/jit/trampoline.c new file mode 100644 index 00000000000000..97d862f1f6f696 --- /dev/null +++ b/Tools/jit/trampoline.c @@ -0,0 +1,17 @@ +#include "Python.h" + +#include "pycore_ceval.h" +#include "pycore_frame.h" +#include "pycore_jit.h" + +_Py_CODEUNIT * +_ENTRY(_PyInterpreterFrame *frame, PyObject **stack_pointer, PyThreadState *tstate) +{ + PyAPI_DATA(void) _JIT_EXECUTOR; + PyObject *executor = (PyObject *)(uintptr_t)&_JIT_EXECUTOR; + Py_INCREF(executor); + PyAPI_DATA(void) _JIT_CONTINUE; + _Py_CODEUNIT *target = ((jit_func)&_JIT_CONTINUE)(frame, stack_pointer, tstate); + Py_SETREF(tstate->previous_executor, executor); + return target; +} From 07e95c4f5011943fe66da5edcee113cfb9a1eb66 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Tue, 23 Apr 2024 11:29:57 -0700 Subject: [PATCH 2/9] Fix call to emit on 32-bit platforms --- Python/jit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/jit.c b/Python/jit.c index c754054c3bc80b..00c330a34a7ff6 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -407,8 +407,8 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction *trace, size unsigned char *data = memory + code_size; { const StencilGroup *group = &trampoline; - // Think of patches as a dictionary mapping HoleValue to uint64_t: - uint64_t patches[] = GET_PATCHES(); + // Think of patches as a dictionary mapping HoleValue to uintptr_t: + uintptr_t patches[] = GET_PATCHES(); patches[HoleValue_CODE] = (uintptr_t)code; patches[HoleValue_CONTINUE] = (uintptr_t)code + group->code.body_size; patches[HoleValue_DATA] = (uintptr_t)data; From e3a5f8a96233715912a277a7ea4181ddf7275256 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 25 Apr 2024 08:43:40 -0700 Subject: [PATCH 3/9] Comments and cleanup --- Python/jit.c | 2 ++ Tools/jit/_targets.py | 28 +++++++++++++++++++--------- Tools/jit/trampoline.c | 8 ++++++++ 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/Python/jit.c b/Python/jit.c index 00c330a34a7ff6..470c9960b38c25 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -406,6 +406,8 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction *trace, size unsigned char *code = memory; unsigned char *data = memory + code_size; { + // Compile the trampoline, which handles changing the calling convention + // (on platforms where it's not necessary, the trampoline is empty): const StencilGroup *group = &trampoline; // Think of patches as a dictionary mapping HoleValue to uintptr_t: uintptr_t patches[] = GET_PATCHES(); diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index faad42ca78838d..fe2eeb0190ec66 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -86,6 +86,9 @@ async def _parse(self, path: pathlib.Path) -> _stencils.StencilGroup: sections: list[dict[typing.Literal["Section"], _S]] = json.loads(output) for wrapped_section in sections: self._handle_section(wrapped_section["Section"], group) + # The trampoline's entry point is just named "_ENTRY", since on some + # platforms we later assume that any function starting with "_JIT_" uses + # the GHC calling convention: entry_symbol = "_JIT_ENTRY" if "_JIT_ENTRY" in group.symbols else "_ENTRY" assert group.symbols[entry_symbol] == (_stencils.HoleValue.CODE, 0) if group.data.body: @@ -105,6 +108,7 @@ def _handle_relocation( async def _compile( self, opname: str, c: pathlib.Path, tempdir: pathlib.Path ) -> _stencils.StencilGroup: + # "Compile" the trampoline to an empty stencil group if it's not needed: if opname == "trampoline" and not self.ghccc: return _stencils.StencilGroup() o = tempdir / f"{opname}.o" @@ -137,20 +141,26 @@ async def _compile( "-std=c11", *self.args, ] - if self.ghccc: + if self.ghccc:t + # So, this is mostly a giant hack (but it makes the code much + # smaller and faster, so it's worth it). We need to use the GHC + # calling convention, but Clang doesn't support it. So, we *first* + # compile the code to LLVM IR, perform some text replacements on the + # IR to change the calling convention(!), and then compile *that*. + # Once we have access to Clang 19, we can get rid of this and use + # __attribute__((preserve_none)) directly in the C code instead: ll = tempdir / f"{opname}.ll" args_ll = args + ["-S", "-emit-llvm", "-o", f"{ll}", f"{c}"] await _llvm.run("clang", args_ll, echo=self.verbose) ir = ll.read_text() - # Hack! Use __attribute__((preserve_none)) once we have Clang 19: + # This handles declarations, definitions, and calls to named symbols + # starting with "_JIT_": ir = re.sub(r"((noalias |nonnull )*ptr @_JIT_\w+\()", r"ghccc \1", ir) - for line in ir.splitlines(): - if re.match(r"ptr @_JIT_\w+\(", line): - assert re.match(r"ghccc (\w+ )*ptr @_JIT_\w+\(", line) - ir = re.sub(r"musttail call ([^g])", r"musttail call ghccc \1", ir) - for line in ir.splitlines(): - if re.match(r"musttail", line): - assert re.match(r"musttail call ghccc", line) + # This handles calls to anonymous callees, since anything with + # "musttail" needs to use the same calling convention: + ir = ir.replace("musttail call", "musttail call ghccc") + # Sometimes *both* replacements happen at the same site, so fix it: + ir = ir.replace("ghccc ghccc", "ghccc") ll.write_text(ir) args_o = args + ["-Wno-unused-command-line-argument", "-o", f"{o}", f"{ll}"] else: diff --git a/Tools/jit/trampoline.c b/Tools/jit/trampoline.c index 97d862f1f6f696..f3fd5624d0f518 100644 --- a/Tools/jit/trampoline.c +++ b/Tools/jit/trampoline.c @@ -4,12 +4,20 @@ #include "pycore_frame.h" #include "pycore_jit.h" +// This is where the calling convention changes, on platforms that require it. +// The actual change is hacked in while the JIT compiler is being built, in +// Tools/jit/_targets.py. On other platforms, this function compiles to nothing. _Py_CODEUNIT * _ENTRY(_PyInterpreterFrame *frame, PyObject **stack_pointer, PyThreadState *tstate) { + // This is subtle. The actual trace will return to us once it exits, so we + // need to make sure that we stay alive until then. If our trace side-exits + // into another trace, and this trace is then invalidated, the code for + // *this function* will be freed and we'll crash upon return: PyAPI_DATA(void) _JIT_EXECUTOR; PyObject *executor = (PyObject *)(uintptr_t)&_JIT_EXECUTOR; Py_INCREF(executor); + // Note that this is *not* a tail call: PyAPI_DATA(void) _JIT_CONTINUE; _Py_CODEUNIT *target = ((jit_func)&_JIT_CONTINUE)(frame, stack_pointer, tstate); Py_SETREF(tstate->previous_executor, executor); From 0311f2fa65c03c3406b12c5de3c7d144e25325d1 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 25 Apr 2024 08:45:01 -0700 Subject: [PATCH 4/9] Is this really needed? --- Tools/jit/_targets.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index fe2eeb0190ec66..7f5b93f37bd2c8 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -489,11 +489,10 @@ def get_target(host: str) -> _COFF | _ELF | _MachO: args = ["-DPy_NO_ENABLE_SHARED"] return _COFF(host, args=args, ghccc=True, prefix="_") if re.fullmatch(r"x86_64-apple-darwin.*", host): - args = ["-fomit-frame-pointer"] - return _MachO(host, args=args, ghccc=True, prefix="_") + return _MachO(host, ghccc=True, prefix="_") if re.fullmatch(r"x86_64-pc-windows-msvc", host): args = ["-fms-runtime-lib=dll"] - return _COFF(host, ghccc=True, args=args) + return _COFF(host, args=args, ghccc=True) if re.fullmatch(r"x86_64-.*-linux-gnu", host): return _ELF(host, ghccc=True) raise ValueError(host) From 89764fd88ebc1862f89e0b7f14278cc2b9637686 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 25 Apr 2024 08:48:12 -0700 Subject: [PATCH 5/9] t --- Tools/jit/_targets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 7f5b93f37bd2c8..ce8d9200224b47 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -141,7 +141,7 @@ async def _compile( "-std=c11", *self.args, ] - if self.ghccc:t + if self.ghccc: # So, this is mostly a giant hack (but it makes the code much # smaller and faster, so it's worth it). We need to use the GHC # calling convention, but Clang doesn't support it. So, we *first* From 76ec119b75562d048c64755f937f62d07a66c4a4 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 25 Apr 2024 08:55:32 -0700 Subject: [PATCH 6/9] ...guess so. --- Tools/jit/_targets.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index ce8d9200224b47..8cc5ce92b62d5d 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -150,7 +150,11 @@ async def _compile( # Once we have access to Clang 19, we can get rid of this and use # __attribute__((preserve_none)) directly in the C code instead: ll = tempdir / f"{opname}.ll" - args_ll = args + ["-S", "-emit-llvm", "-o", f"{ll}", f"{c}"] + args_ll = args + [ + # -fomit-frame-pointer is necessary because the GHC calling + # convention uses RBP to pass arguments: + "-S", "-emit-llvm", "-fomit-frame-pointer", "-o", f"{ll}", f"{c}" + ] await _llvm.run("clang", args_ll, echo=self.verbose) ir = ll.read_text() # This handles declarations, definitions, and calls to named symbols From 159d318eb834ebc38b46e834f70eb8d6992fdbe1 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 25 Apr 2024 11:56:49 -0700 Subject: [PATCH 7/9] Wording --- Tools/jit/_targets.py | 4 ++-- Tools/jit/trampoline.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 8cc5ce92b62d5d..bba9826f23e472 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -142,8 +142,8 @@ async def _compile( *self.args, ] if self.ghccc: - # So, this is mostly a giant hack (but it makes the code much - # smaller and faster, so it's worth it). We need to use the GHC + # This is a bit of an ugly workaround, but it makes the code much + # smaller and faster, so it's worth it. We want to use the GHC # calling convention, but Clang doesn't support it. So, we *first* # compile the code to LLVM IR, perform some text replacements on the # IR to change the calling convention(!), and then compile *that*. diff --git a/Tools/jit/trampoline.c b/Tools/jit/trampoline.c index f3fd5624d0f518..01b3d63a6790ba 100644 --- a/Tools/jit/trampoline.c +++ b/Tools/jit/trampoline.c @@ -5,7 +5,7 @@ #include "pycore_jit.h" // This is where the calling convention changes, on platforms that require it. -// The actual change is hacked in while the JIT compiler is being built, in +// The actual change is patched in while the JIT compiler is being built, in // Tools/jit/_targets.py. On other platforms, this function compiles to nothing. _Py_CODEUNIT * _ENTRY(_PyInterpreterFrame *frame, PyObject **stack_pointer, PyThreadState *tstate) From 66db030baa77cd9bc439dbd6547930a9c4bebfe6 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Tue, 30 Apr 2024 22:30:08 -0700 Subject: [PATCH 8/9] jit_code_ghccc -> jit_side_entry --- Include/cpython/optimizer.h | 2 +- Python/jit.c | 4 ++-- Python/optimizer.c | 4 ++-- Tools/jit/template.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index fa10a0ee6974ed..a31314b70536c9 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -101,7 +101,7 @@ typedef struct _PyExecutorObject { uint32_t code_size; size_t jit_size; void *jit_code; - void *jit_code_ghccc; + void *jit_side_entry; _PyExitData exits[1]; } _PyExecutorObject; diff --git a/Python/jit.c b/Python/jit.c index e563db250c78ed..8432162adb8c53 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -490,7 +490,7 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction *trace, size return -1; } executor->jit_code = memory; - executor->jit_code_ghccc = memory + trampoline.code.body_size; + executor->jit_side_entry = memory + trampoline.code.body_size; executor->jit_size = total_size; return 0; } @@ -502,7 +502,7 @@ _PyJIT_Free(_PyExecutorObject *executor) size_t size = executor->jit_size; if (memory) { executor->jit_code = NULL; - executor->jit_code_ghccc = NULL; + executor->jit_side_entry = NULL; executor->jit_size = 0; if (jit_free(memory, size)) { PyErr_WriteUnraisable(NULL); diff --git a/Python/optimizer.c b/Python/optimizer.c index 884d5403deb00e..d1b23a89837f70 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1166,7 +1166,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil #endif #ifdef _Py_JIT executor->jit_code = NULL; - executor->jit_code_ghccc = NULL; + executor->jit_side_entry = NULL; executor->jit_size = 0; if (_PyJIT_Compile(executor, executor->trace, length)) { Py_DECREF(executor); @@ -1197,7 +1197,7 @@ init_cold_exit_executor(_PyExecutorObject *executor, int oparg) #endif #ifdef _Py_JIT executor->jit_code = NULL; - executor->jit_code_ghccc = NULL; + executor->jit_side_entry = NULL; executor->jit_size = 0; if (_PyJIT_Compile(executor, executor->trace, 1)) { return -1; diff --git a/Tools/jit/template.c b/Tools/jit/template.c index fc303b16db8f65..0dd0744f7aec9c 100644 --- a/Tools/jit/template.c +++ b/Tools/jit/template.c @@ -48,7 +48,7 @@ do { \ OPT_STAT_INC(traces_executed); \ __attribute__((musttail)) \ - return ((jit_func)((EXECUTOR)->jit_code_ghccc))(frame, stack_pointer, tstate); \ + return ((jit_func)((EXECUTOR)->jit_side_entry))(frame, stack_pointer, tstate); \ } while (0) #undef GOTO_TIER_ONE From d03f22741981e4c1ffc054490a2a98a522164236 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Tue, 30 Apr 2024 22:43:59 -0700 Subject: [PATCH 9/9] Clarify a few comments --- Python/jit.c | 7 +++++-- Tools/jit/_targets.py | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Python/jit.c b/Python/jit.c index 8432162adb8c53..df14e48c564447 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -409,8 +409,11 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction *trace, size unsigned char *code = memory; unsigned char *data = memory + code_size; { - // Compile the trampoline, which handles changing the calling convention - // (on platforms where it's not necessary, the trampoline is empty): + // Compile the trampoline, which handles converting between the native + // calling convention and the calling convention used by jitted code + // (which may be different for efficiency reasons). On platforms where + // we don't change calling conventions, the trampoline is empty and + // nothing is emitted here: const StencilGroup *group = &trampoline; // Think of patches as a dictionary mapping HoleValue to uintptr_t: uintptr_t patches[] = GET_PATCHES(); diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 24f46e513f0a30..274d17bcf38deb 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -480,6 +480,7 @@ def _handle_relocation( def get_target(host: str) -> _COFF | _ELF | _MachO: """Build a _Target for the given host "triple" and options.""" + # ghccc currently crashes Clang when combined with musttail on aarch64. :( if re.fullmatch(r"aarch64-apple-darwin.*", host): return _MachO(host, alignment=8, prefix="_") if re.fullmatch(r"aarch64-pc-windows-msvc", host): pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy