From 52bb3da654eaac496a3f7cf0e5fe96b37ab4af20 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Wed, 21 Feb 2024 23:15:23 -0800 Subject: [PATCH 1/6] Implement the small code model for x86_64 and aarch64 macOS and Linux --- Python/jit.c | 143 +++++++++++++++++++++++++++++++++++++----- Tools/jit/_schema.py | 10 +++ Tools/jit/_targets.py | 68 ++++++++++++++++---- 3 files changed, 191 insertions(+), 30 deletions(-) diff --git a/Python/jit.c b/Python/jit.c index 839414bd810677..8a1196139ac379 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -47,18 +47,18 @@ jit_error(const char *message) PyErr_Format(PyExc_RuntimeWarning, "JIT %s (%d)", message, hint); } -static char * +static unsigned char * jit_alloc(size_t size) { assert(size); assert(size % get_page_size() == 0); #ifdef MS_WINDOWS int flags = MEM_COMMIT | MEM_RESERVE; - char *memory = VirtualAlloc(NULL, size, flags, PAGE_READWRITE); + unsigned char *memory = VirtualAlloc(NULL, size, flags, PAGE_READWRITE); int failed = memory == NULL; #else int flags = MAP_ANONYMOUS | MAP_PRIVATE; - char *memory = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0); + unsigned char *memory = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0); int failed = memory == MAP_FAILED; #endif if (failed) { @@ -69,7 +69,7 @@ jit_alloc(size_t size) } static int -jit_free(char *memory, size_t size) +jit_free(unsigned char *memory, size_t size) { assert(size); assert(size % get_page_size() == 0); @@ -86,7 +86,7 @@ jit_free(char *memory, size_t size) } static int -mark_executable(char *memory, size_t size) +mark_executable(unsigned char *memory, size_t size) { if (size == 0) { return 0; @@ -113,7 +113,7 @@ mark_executable(char *memory, size_t size) } static int -mark_readable(char *memory, size_t size) +mark_readable(unsigned char *memory, size_t size) { if (size == 0) { return 0; @@ -169,18 +169,20 @@ set_bits(uint32_t *loc, uint8_t loc_start, uint64_t value, uint8_t value_start, // Fill all of stencil's holes in the memory pointed to by base, using the // values in patches. static void -patch(char *base, const Stencil *stencil, uint64_t *patches) +patch(unsigned char *base, const Stencil *stencil, uint64_t *patches) { for (uint64_t i = 0; i < stencil->holes_size; i++) { const Hole *hole = &stencil->holes[i]; - void *location = base + hole->offset; + unsigned char *location = base + hole->offset; uint64_t value = patches[hole->value] + (uint64_t)hole->symbol + hole->addend; + uint8_t *loc8 = (uint8_t *)location; uint32_t *loc32 = (uint32_t *)location; uint64_t *loc64 = (uint64_t *)location; // LLD is a great reference for performing relocations... just keep in // mind that Tools/jit/build.py does filtering and preprocessing for us! // Here's a good place to start for each platform: // - aarch64-apple-darwin: + // - https://github.com/llvm/llvm-project/blob/main/lld/MachO/Arch/ARM64.cpp // - https://github.com/llvm/llvm-project/blob/main/lld/MachO/Arch/ARM64Common.cpp // - https://github.com/llvm/llvm-project/blob/main/lld/MachO/Arch/ARM64Common.h // - aarch64-unknown-linux-gnu: @@ -208,6 +210,60 @@ patch(char *base, const Stencil *stencil, uint64_t *patches) // 64-bit absolute address. *loc64 = value; continue; + case HoleKind_R_X86_64_GOTPCRELX: + case HoleKind_R_X86_64_REX_GOTPCRELX: + case HoleKind_X86_64_RELOC_GOT: + case HoleKind_X86_64_RELOC_GOT_LOAD: { + // 32-bit relative address. + // Try to relax the GOT load into an immediate value: + uint64_t relaxed = *(uint64_t *)(value + 4) - 4; + if ((int64_t)relaxed - (int64_t)location >= -(1LL << 31) && + (int64_t)relaxed - (int64_t)location + 1 < (1LL << 31)) + { + if (loc8[-2] == 0x8B) { + // Before: mov eax, dword ptr [rip + AAA] + // After: lea eax, [rip + XXX] + assert(hole->kind == HoleKind_IMAGE_REL_AMD64_REL32 || + hole->kind == HoleKind_R_X86_64_GOTPCRELX || + hole->kind == HoleKind_R_X86_64_REX_GOTPCRELX || + hole->kind == HoleKind_X86_64_RELOC_GOT_LOAD); + loc8[-2] = 0x8D; + value = relaxed; + } + else if (loc8[-2] == 0xFF && loc8[-1] == 0x15) { + // Before: call qword ptr [rip + AAA] + // After: nop + // call XXX + assert(hole->kind == HoleKind_R_X86_64_GOTPCRELX || + hole->kind == HoleKind_X86_64_RELOC_GOT); + loc8[-2] = 0x90; + loc8[-1] = 0xE8; + value = relaxed; + } + else if (loc8[-2] == 0xFF && loc8[-1] == 0x25) { + // Before: jmp qword ptr [rip + AAA] + // After: nop + // jmp XXX + assert(hole->kind == HoleKind_IMAGE_REL_AMD64_REL32 || + hole->kind == HoleKind_R_X86_64_GOTPCRELX); + loc8[-2] = 0x90; + loc8[-1] = 0xE9; + value = relaxed; + } + } + } + // Fall through... + case HoleKind_R_X86_64_GOTPCREL: + case HoleKind_R_X86_64_PC32: + case HoleKind_X86_64_RELOC_SIGNED: + case HoleKind_X86_64_RELOC_BRANCH: + // 32-bit relative address. + value -= (uint64_t)location; + // Check that we're not out of range of 32 signed bits: + assert((int64_t)value >= -(1LL << 31)); + assert((int64_t)value < (1LL << 31)); + loc32[0] = (uint32_t)value; + continue; case HoleKind_R_AARCH64_CALL26: case HoleKind_R_AARCH64_JUMP26: // 28-bit relative branch. @@ -249,10 +305,61 @@ patch(char *base, const Stencil *stencil, uint64_t *patches) set_bits(loc32, 5, value, 48, 16); continue; case HoleKind_ARM64_RELOC_GOT_LOAD_PAGE21: + case HoleKind_R_AARCH64_ADR_GOT_PAGE: { // 21-bit count of pages between this page and an absolute address's // page... I know, I know, it's weird. Pairs nicely with // ARM64_RELOC_GOT_LOAD_PAGEOFF12 (below). - assert(IS_AARCH64_ADRP(*loc32)); + const Hole *next_hole = &stencil->holes[i + 1]; + if (i + 1 < stencil->holes_size && + (next_hole->kind == HoleKind_ARM64_RELOC_GOT_LOAD_PAGEOFF12 || + next_hole->kind == HoleKind_R_AARCH64_LD64_GOT_LO12_NC) && + next_hole->offset == hole->offset + 4 && + next_hole->symbol == hole->symbol && + next_hole->addend == hole->addend && + next_hole->value == hole->value) + { + assert(IS_AARCH64_ADRP(*loc32)); + unsigned char rd = get_bits(loc32[0], 0, 5); + assert(IS_AARCH64_LDR_OR_STR(loc32[1])); + unsigned char rt = get_bits(loc32[1], 0, 5); + unsigned char rn = get_bits(loc32[1], 5, 5); + assert(rd == rn && rn == rt); + uint64_t relaxed = *(uint64_t *)value; + if (relaxed < (1UL << 16)) { + // Before: adrp x0, AAA + // ldr x0, [x0 + BBB] + // After: movz x0, XXX + // nop + loc32[0] = 0xD2800000 | (get_bits(relaxed, 0, 16) << 5) | rd; + loc32[1] = 0xD503201F; + i++; + continue; + } + if (relaxed < (1ULL << 32)) { + // Before: adrp x0, AAA + // ldr x0, [x0 + BBB] + // After: movz x0, XXX + // movk x0, YYY + loc32[0] = 0xD2800000 | (get_bits(relaxed, 0, 16) << 5) | rd; + loc32[1] = 0xF2A00000 | (get_bits(relaxed, 16, 16) << 5) | rd; + i++; + continue; + } + relaxed = (uint64_t)value - (uint64_t)location; + if ((relaxed & 0x3) == 0 && + (int64_t)relaxed >= -(1L << 19) && + (int64_t)relaxed < (1L << 19)) + { + // Before: adrp x0, AAA + // ldr x0, [x0 + BBB] + // After: ldr x0, XXX + // nop + loc32[0] = 0x58000000 | (get_bits(relaxed, 2, 19) << 5) | rd; + loc32[1] = 0xD503201F; + i++; + continue; + } + } // Number of pages between this page and the value's page: value = (value >> 12) - ((uint64_t)location >> 12); // Check that we're not out of range of 21 signed bits: @@ -263,7 +370,9 @@ patch(char *base, const Stencil *stencil, uint64_t *patches) // value[2:21] goes in loc[5:26]: set_bits(loc32, 5, value, 2, 19); continue; + } case HoleKind_ARM64_RELOC_GOT_LOAD_PAGEOFF12: + case HoleKind_R_AARCH64_LD64_GOT_LO12_NC: // 12-bit low part of an absolute address. Pairs nicely with // ARM64_RELOC_GOT_LOAD_PAGE21 (above). assert(IS_AARCH64_LDR_OR_STR(*loc32) || IS_AARCH64_ADD_OR_SUB(*loc32)); @@ -285,7 +394,7 @@ patch(char *base, const Stencil *stencil, uint64_t *patches) } static void -copy_and_patch(char *base, const Stencil *stencil, uint64_t *patches) +copy_and_patch(unsigned char *base, const Stencil *stencil, uint64_t *patches) { memcpy(base, stencil->body, stencil->body_size); patch(base, stencil, patches); @@ -294,8 +403,8 @@ copy_and_patch(char *base, const Stencil *stencil, uint64_t *patches) static void emit(const StencilGroup *group, uint64_t patches[]) { - copy_and_patch((char *)patches[HoleValue_CODE], &group->code, patches); - copy_and_patch((char *)patches[HoleValue_DATA], &group->data, patches); + copy_and_patch((unsigned char *)patches[HoleValue_DATA], &group->data, patches); + copy_and_patch((unsigned char *)patches[HoleValue_CODE], &group->code, patches); } // Compiles executor in-place. Don't forget to call _PyJIT_Free later! @@ -316,14 +425,14 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction *trace, size assert((page_size & (page_size - 1)) == 0); code_size += page_size - (code_size & (page_size - 1)); data_size += page_size - (data_size & (page_size - 1)); - char *memory = jit_alloc(code_size + data_size); + unsigned char *memory = jit_alloc(code_size + data_size); if (memory == NULL) { return -1; } // Loop again to emit the code: - char *code = memory; - char *data = memory + code_size; - char *top = code; + unsigned char *code = memory; + unsigned char *data = memory + code_size; + unsigned char *top = code; if (trace[0].opcode == _START_EXECUTOR) { // Don't want to execute this more than once: top += stencil_groups[_START_EXECUTOR].code.body_size; @@ -360,7 +469,7 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction *trace, size void _PyJIT_Free(_PyExecutorObject *executor) { - char *memory = (char *)executor->jit_code; + unsigned char *memory = (unsigned char *)executor->jit_code; size_t size = executor->jit_size; if (memory) { executor->jit_code = NULL; diff --git a/Tools/jit/_schema.py b/Tools/jit/_schema.py index 8eeb78e6cd69ee..975ca650a13c1a 100644 --- a/Tools/jit/_schema.py +++ b/Tools/jit/_schema.py @@ -8,13 +8,23 @@ "IMAGE_REL_AMD64_ADDR64", "IMAGE_REL_I386_DIR32", "R_AARCH64_ABS64", + "R_AARCH64_ADR_GOT_PAGE", "R_AARCH64_CALL26", "R_AARCH64_JUMP26", + "R_AARCH64_LD64_GOT_LO12_NC", "R_AARCH64_MOVW_UABS_G0_NC", "R_AARCH64_MOVW_UABS_G1_NC", "R_AARCH64_MOVW_UABS_G2_NC", "R_AARCH64_MOVW_UABS_G3", "R_X86_64_64", + "R_X86_64_GOTPCREL", + "R_X86_64_GOTPCRELX", + "R_X86_64_PC32", + "R_X86_64_REX_GOTPCRELX", + "X86_64_RELOC_BRANCH", + "X86_64_RELOC_GOT", + "X86_64_RELOC_GOT_LOAD", + "X86_64_RELOC_SIGNED", "X86_64_RELOC_UNSIGNED", ] diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 51b091eb246413..50f98df8a553b7 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -37,6 +37,7 @@ class _Target(typing.Generic[_S, _R]): triple: str _: dataclasses.KW_ONLY alignment: int = 1 + args: typing.Sequence[str] = () prefix: str = "" debug: bool = False force: bool = False @@ -121,21 +122,14 @@ async def _compile( "-fno-asynchronous-unwind-tables", # SET_FUNCTION_ATTRIBUTE on 32-bit Windows debug builds: "-fno-jump-tables", - # Position-independent code adds indirection to every load and jump: - "-fno-pic", + "-fno-plt", # Don't make calls to weird stack-smashing canaries: "-fno-stack-protector", - # We have three options for code model: - # - "small": the default, assumes that code and data reside in the - # lowest 2GB of memory (128MB on aarch64) - # - "medium": assumes that code resides in the lowest 2GB of memory, - # and makes no assumptions about data (not available on aarch64) - # - "large": makes no assumptions about either code or data - "-mcmodel=large", "-o", f"{o}", "-std=c11", f"{c}", + *self.args, ] await _llvm.run("clang", args, echo=self.verbose) return await self._parse(o) @@ -284,7 +278,23 @@ def _handle_section( def _handle_relocation( self, base: int, relocation: _schema.ELFRelocation, raw: bytes ) -> _stencils.Hole: + symbol: str | None match relocation: + case { + "Addend": addend, + "Offset": offset, + "Symbol": {"Value": s}, + "Type": { + "Value": "R_AARCH64_ADR_GOT_PAGE" + | "R_AARCH64_LD64_GOT_LO12_NC" + | "R_X86_64_GOTPCREL" + | "R_X86_64_GOTPCRELX" + | "R_X86_64_REX_GOTPCRELX" as kind + }, + }: + offset += base + s = s.removeprefix(self.prefix) + value, symbol = _stencils.HoleValue.GOT, s case { "Addend": addend, "Offset": offset, @@ -356,6 +366,34 @@ def _handle_relocation( s = s.removeprefix(self.prefix) value, symbol = _stencils.HoleValue.GOT, s addend = 0 + case { + "Offset": offset, + "Symbol": {"Value": s}, + "Type": { + "Value": "X86_64_RELOC_GOT" | "X86_64_RELOC_GOT_LOAD" as kind + }, + }: + offset += base + s = s.removeprefix(self.prefix) + value, symbol = _stencils.HoleValue.GOT, s + addend = int.from_bytes(raw[offset : offset + 4], "little", signed=True) - 4 + case { + "Offset": offset, + "Section": {"Value": s}, + "Type": { + "Value": "X86_64_RELOC_SIGNED" as kind + }, + } | { + "Offset": offset, + "Symbol": {"Value": s}, + "Type": { + "Value": "X86_64_RELOC_BRANCH" | "X86_64_RELOC_SIGNED" as kind + }, + }: + offset += base + s = s.removeprefix(self.prefix) + value, symbol = _stencils.symbol_to_value(s) + addend = int.from_bytes(raw[offset : offset + 4], "little", signed=True) - 4 case { "Offset": offset, "Section": {"Value": s}, @@ -380,15 +418,19 @@ def _handle_relocation( def get_target(host: str) -> _COFF | _ELF | _MachO: """Build a _Target for the given host "triple" and options.""" if re.fullmatch(r"aarch64-apple-darwin.*", host): - return _MachO(host, alignment=8, prefix="_") + args = ["-mcmodel=large"] + return _MachO(host, alignment=8, args=args, prefix="_") if re.fullmatch(r"aarch64-.*-linux-gnu", host): - return _ELF(host, alignment=8) + args = ["-mcmodel=large"] + return _ELF(host, alignment=8, args=args) if re.fullmatch(r"i686-pc-windows-msvc", host): - return _COFF(host, prefix="_") + args = ["-mcmodel=large"] + return _COFF(host, args=args, prefix="_") if re.fullmatch(r"x86_64-apple-darwin.*", host): return _MachO(host, prefix="_") if re.fullmatch(r"x86_64-pc-windows-msvc", host): - return _COFF(host) + args = ["-mcmodel=large"] + return _COFF(host, args=args) if re.fullmatch(r"x86_64-.*-linux-gnu", host): return _ELF(host) raise ValueError(host) From 81fe5edbd01eba7af51b7db456e84a020d6aa292 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Wed, 21 Feb 2024 23:16:32 -0800 Subject: [PATCH 2/6] blacken --- Tools/jit/_targets.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 50f98df8a553b7..6f7b58a9b1dc2a 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -369,20 +369,18 @@ def _handle_relocation( case { "Offset": offset, "Symbol": {"Value": s}, - "Type": { - "Value": "X86_64_RELOC_GOT" | "X86_64_RELOC_GOT_LOAD" as kind - }, + "Type": {"Value": "X86_64_RELOC_GOT" | "X86_64_RELOC_GOT_LOAD" as kind}, }: offset += base s = s.removeprefix(self.prefix) value, symbol = _stencils.HoleValue.GOT, s - addend = int.from_bytes(raw[offset : offset + 4], "little", signed=True) - 4 + addend = ( + int.from_bytes(raw[offset : offset + 4], "little", signed=True) - 4 + ) case { "Offset": offset, "Section": {"Value": s}, - "Type": { - "Value": "X86_64_RELOC_SIGNED" as kind - }, + "Type": {"Value": "X86_64_RELOC_SIGNED" as kind}, } | { "Offset": offset, "Symbol": {"Value": s}, @@ -393,7 +391,9 @@ def _handle_relocation( offset += base s = s.removeprefix(self.prefix) value, symbol = _stencils.symbol_to_value(s) - addend = int.from_bytes(raw[offset : offset + 4], "little", signed=True) - 4 + addend = ( + int.from_bytes(raw[offset : offset + 4], "little", signed=True) - 4 + ) case { "Offset": offset, "Section": {"Value": s}, From 36de1cd4a879d9d6401008c5e52b7d75ac8c30f3 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 22 Feb 2024 00:11:32 -0800 Subject: [PATCH 3/6] Remove references to IMAGE_REL_AMD64_REL32 and clean up comments --- Python/jit.c | 34 ++++++---------------------------- 1 file changed, 6 insertions(+), 28 deletions(-) diff --git a/Python/jit.c b/Python/jit.c index 8a1196139ac379..07e5877cb1ce36 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -221,31 +221,18 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches) (int64_t)relaxed - (int64_t)location + 1 < (1LL << 31)) { if (loc8[-2] == 0x8B) { - // Before: mov eax, dword ptr [rip + AAA] - // After: lea eax, [rip + XXX] - assert(hole->kind == HoleKind_IMAGE_REL_AMD64_REL32 || - hole->kind == HoleKind_R_X86_64_GOTPCRELX || - hole->kind == HoleKind_R_X86_64_REX_GOTPCRELX || - hole->kind == HoleKind_X86_64_RELOC_GOT_LOAD); + // mov reg, dword ptr [rip + AAA] -> lea reg, [rip + XXX] loc8[-2] = 0x8D; value = relaxed; } else if (loc8[-2] == 0xFF && loc8[-1] == 0x15) { - // Before: call qword ptr [rip + AAA] - // After: nop - // call XXX - assert(hole->kind == HoleKind_R_X86_64_GOTPCRELX || - hole->kind == HoleKind_X86_64_RELOC_GOT); + // call qword ptr [rip + AAA] -> nop; call XXX loc8[-2] = 0x90; loc8[-1] = 0xE8; value = relaxed; } else if (loc8[-2] == 0xFF && loc8[-1] == 0x25) { - // Before: jmp qword ptr [rip + AAA] - // After: nop - // jmp XXX - assert(hole->kind == HoleKind_IMAGE_REL_AMD64_REL32 || - hole->kind == HoleKind_R_X86_64_GOTPCRELX); + // jmp qword ptr [rip + AAA] -> nop; jmp XXX loc8[-2] = 0x90; loc8[-1] = 0xE9; value = relaxed; @@ -326,20 +313,14 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches) assert(rd == rn && rn == rt); uint64_t relaxed = *(uint64_t *)value; if (relaxed < (1UL << 16)) { - // Before: adrp x0, AAA - // ldr x0, [x0 + BBB] - // After: movz x0, XXX - // nop + // adrp reg, AAA; ldr reg, [reg + BBB] -> movz reg, XXX; nop loc32[0] = 0xD2800000 | (get_bits(relaxed, 0, 16) << 5) | rd; loc32[1] = 0xD503201F; i++; continue; } if (relaxed < (1ULL << 32)) { - // Before: adrp x0, AAA - // ldr x0, [x0 + BBB] - // After: movz x0, XXX - // movk x0, YYY + // adrp reg, AAA; ldr reg, [reg + BBB] -> movz reg, XXX; movk reg, YYY loc32[0] = 0xD2800000 | (get_bits(relaxed, 0, 16) << 5) | rd; loc32[1] = 0xF2A00000 | (get_bits(relaxed, 16, 16) << 5) | rd; i++; @@ -350,10 +331,7 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches) (int64_t)relaxed >= -(1L << 19) && (int64_t)relaxed < (1L << 19)) { - // Before: adrp x0, AAA - // ldr x0, [x0 + BBB] - // After: ldr x0, XXX - // nop + // adrp reg, AAA; ldr reg, [reg + BBB] -> ldr x0, XXX; nop loc32[0] = 0x58000000 | (get_bits(relaxed, 2, 19) << 5) | rd; loc32[1] = 0xD503201F; i++; From 74860c12134a587d6b481e4112261fa4ba837685 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 22 Feb 2024 00:13:53 -0800 Subject: [PATCH 4/6] Add comment --- Python/jit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Python/jit.c b/Python/jit.c index 07e5877cb1ce36..8cc9bae6c79e4a 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -296,6 +296,7 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches) // 21-bit count of pages between this page and an absolute address's // page... I know, I know, it's weird. Pairs nicely with // ARM64_RELOC_GOT_LOAD_PAGEOFF12 (below). + // Try to relax the pair of GOT loads into an immediate value: const Hole *next_hole = &stencil->holes[i + 1]; if (i + 1 < stencil->holes_size && (next_hole->kind == HoleKind_ARM64_RELOC_GOT_LOAD_PAGEOFF12 || From aa53fab0ccc6ac98a704a3cecdfa0bbd6b4efdf9 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 22 Feb 2024 00:15:21 -0800 Subject: [PATCH 5/6] Move assert back --- Python/jit.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Python/jit.c b/Python/jit.c index 8cc9bae6c79e4a..552fb874690e24 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -292,11 +292,12 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches) set_bits(loc32, 5, value, 48, 16); continue; case HoleKind_ARM64_RELOC_GOT_LOAD_PAGE21: - case HoleKind_R_AARCH64_ADR_GOT_PAGE: { + case HoleKind_R_AARCH64_ADR_GOT_PAGE: // 21-bit count of pages between this page and an absolute address's // page... I know, I know, it's weird. Pairs nicely with // ARM64_RELOC_GOT_LOAD_PAGEOFF12 (below). // Try to relax the pair of GOT loads into an immediate value: + assert(IS_AARCH64_ADRP(*loc32)); const Hole *next_hole = &stencil->holes[i + 1]; if (i + 1 < stencil->holes_size && (next_hole->kind == HoleKind_ARM64_RELOC_GOT_LOAD_PAGEOFF12 || @@ -306,7 +307,6 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches) next_hole->addend == hole->addend && next_hole->value == hole->value) { - assert(IS_AARCH64_ADRP(*loc32)); unsigned char rd = get_bits(loc32[0], 0, 5); assert(IS_AARCH64_LDR_OR_STR(loc32[1])); unsigned char rt = get_bits(loc32[1], 0, 5); @@ -349,7 +349,6 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches) // value[2:21] goes in loc[5:26]: set_bits(loc32, 5, value, 2, 19); continue; - } case HoleKind_ARM64_RELOC_GOT_LOAD_PAGEOFF12: case HoleKind_R_AARCH64_LD64_GOT_LO12_NC: // 12-bit low part of an absolute address. Pairs nicely with From 462095c45048125e75953be2e2fbb2fa2db9fc9a Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 22 Feb 2024 00:16:05 -0800 Subject: [PATCH 6/6] fixup --- Python/jit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/jit.c b/Python/jit.c index 552fb874690e24..ac2c60ed925a26 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -296,8 +296,8 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches) // 21-bit count of pages between this page and an absolute address's // page... I know, I know, it's weird. Pairs nicely with // ARM64_RELOC_GOT_LOAD_PAGEOFF12 (below). - // Try to relax the pair of GOT loads into an immediate value: assert(IS_AARCH64_ADRP(*loc32)); + // Try to relax the pair of GOT loads into an immediate value: const Hole *next_hole = &stencil->holes[i + 1]; if (i + 1 < stencil->holes_size && (next_hole->kind == HoleKind_ARM64_RELOC_GOT_LOAD_PAGEOFF12 || pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy