Skip to content

inlineasm: Add inline assembler support for RV32. #15714

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jan 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions mpy-cross/mpconfigport.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
#define MICROPY_EMIT_INLINE_XTENSA (1)
#define MICROPY_EMIT_XTENSAWIN (1)
#define MICROPY_EMIT_RV32 (1)
#define MICROPY_EMIT_INLINE_RV32 (1)
#define MICROPY_EMIT_NATIVE_DEBUG (1)
#define MICROPY_EMIT_NATIVE_DEBUG_PRINTER (&mp_stdout_print)

Expand Down
8 changes: 6 additions & 2 deletions ports/qemu/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,20 @@ QSTR_DEFS = qstrdefsport.h
MICROPY_ROM_TEXT_COMPRESSION ?= 1

ifeq ($(QEMU_ARCH),arm)
FROZEN_MANIFEST ?= "require('unittest'); freeze('test-frzmpy')"
MICROPY_HEAP_SIZE ?= 122880
FROZEN_MANIFEST ?= "require('unittest'); freeze('test-frzmpy', ('frozen_asm_thumb.py', 'frozen_const.py', 'frozen_viper.py', 'native_frozen_align.py'))"
endif
ifeq ($(QEMU_ARCH),riscv32)
FROZEN_MANIFEST ?= "require('unittest'); freeze('test-frzmpy', ('frozen_const.py', 'frozen_viper.py', 'native_frozen_align.py'))"
MICROPY_HEAP_SIZE ?= 122880
FROZEN_MANIFEST ?= "require('unittest'); freeze('test-frzmpy', ('frozen_asm_rv32.py', 'frozen_const.py', 'frozen_viper.py', 'native_frozen_align.py'))"
endif

# include py core make definitions
include $(TOP)/py/py.mk
include $(TOP)/extmod/extmod.mk

CFLAGS += -DMICROPY_HEAP_SIZE=$(MICROPY_HEAP_SIZE)

################################################################################
# ARM specific settings

Expand Down
2 changes: 2 additions & 0 deletions ports/qemu/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,3 +122,5 @@ The following options can be specified on the `make` command line:
- `QEMU_DEBUG_ARGS`: defaults to `-s` (gdb on TCP port 1234), but can be overridden
with different qemu gdb arguments.
- `QEMU_DEBUG_EXTRA`: extra options to pass to qemu when `QEMU_DEBUG=1` is used.
- `MICROPY_HEAP_SIZE`: pass in an optional value (in bytes) for overriding the GC
heap size used by the port.
2 changes: 1 addition & 1 deletion ports/qemu/boards/SABRELITE.mk
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ SRC_BOARD_O = shared/runtime/gchelper_generic.o
MPY_CROSS_FLAGS += -march=armv6

# These tests don't work on Cortex-A9, so exclude them.
RUN_TESTS_ARGS = --exclude '(asmdiv|asmspecialregs).py'
RUN_TESTS_ARGS = --exclude 'inlineasm/thumb/(asmdiv|asmspecialregs).py'
3 changes: 0 additions & 3 deletions ports/qemu/boards/VIRT_RV32.mk
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,4 @@ SRC_BOARD_O += shared/runtime/gchelper_native.o shared/runtime/gchelper_rv32i.o

MPY_CROSS_FLAGS += -march=rv32imc

# These Thumb tests don't run on RV32, so exclude them.
RUN_TESTS_ARGS = --exclude 'inlineasm|qemu/asm_test'

RUN_NATMODTESTS_ARGS = --arch rv32imc
8 changes: 5 additions & 3 deletions ports/qemu/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,16 @@
#include "shared/runtime/gchelper.h"
#include "shared/runtime/pyexec.h"

#define HEAP_SIZE (100 * 1024)
#if MICROPY_HEAP_SIZE <= 0
#error MICROPY_HEAP_SIZE must be a positive integer.
#endif

static uint32_t gc_heap[HEAP_SIZE / sizeof(uint32_t)];
static uint32_t gc_heap[MICROPY_HEAP_SIZE / sizeof(uint32_t)];

int main(int argc, char **argv) {
mp_stack_ctrl_init();
mp_stack_set_limit(10240);
gc_init(gc_heap, (char *)gc_heap + HEAP_SIZE);
gc_init(gc_heap, (char *)gc_heap + MICROPY_HEAP_SIZE);

for (;;) {
mp_init();
Expand Down
1 change: 1 addition & 0 deletions ports/qemu/mpconfigport.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#define MICROPY_MAKE_POINTER_CALLABLE(p) ((void *)((mp_uint_t)(p) | 1))
#elif defined(__riscv)
#define MICROPY_EMIT_RV32 (1)
#define MICROPY_EMIT_INLINE_RV32 (1)
#endif

#define MICROPY_MALLOC_USES_ALLOCATED_SIZE (1)
Expand Down
31 changes: 31 additions & 0 deletions ports/qemu/test-frzmpy/frozen_asm_rv32.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Test freezing inline-asm code.

# ruff: noqa: F821 - @asm_rv32 decorator adds names to function scope

import micropython


@micropython.asm_rv32
def asm_add(a0, a1):
add(a0, a0, a1)


@micropython.asm_rv32
def asm_add1(a0) -> object:
slli(a0, a0, 1)
addi(a0, a0, 3)


@micropython.asm_rv32
def asm_cast_bool(a0) -> bool:
pass


@micropython.asm_rv32
def asm_shift_int(a0) -> int:
slli(a0, a0, 29)


@micropython.asm_rv32
def asm_shift_uint(a0) -> uint:
slli(a0, a0, 29)
1 change: 1 addition & 0 deletions ports/rp2/mpconfigport.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@
#endif
#elif PICO_RISCV
#define MICROPY_EMIT_RV32 (1)
#define MICROPY_EMIT_INLINE_RV32 (1)
#endif

// Optimisations
Expand Down
58 changes: 16 additions & 42 deletions py/asmrv32.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include <string.h>

#include "py/emit.h"
#include "py/misc.h"
#include "py/mpconfig.h"

// wrapper around everything in this file
Expand All @@ -43,34 +44,7 @@
#define DEBUG_printf(...) (void)0
#endif

#ifndef MP_POPCOUNT
#ifdef _MSC_VER
#include <intrin.h>
#define MP_POPCOUNT __popcnt
#else
#if defined __has_builtin
#if __has_builtin(__builtin_popcount)
#define MP_POPCOUNT __builtin_popcount
#endif
#else
static uint32_t fallback_popcount(uint32_t value) {
value = value - ((value >> 1) & 0x55555555);
value = (value & 0x33333333) + ((value >> 2) & 0x33333333);
value = (value + (value >> 4)) & 0x0F0F0F0F;
return value * 0x01010101;
}
#define MP_POPCOUNT fallback_popcount
#endif
#endif
#endif

#define INTERNAL_TEMPORARY ASM_RV32_REG_S0
#define AVAILABLE_REGISTERS_COUNT 32

#define IS_IN_C_REGISTER_WINDOW(register_number) \
(((register_number) >= ASM_RV32_REG_X8) && ((register_number) <= ASM_RV32_REG_X15))
#define MAP_IN_C_REGISTER_WINDOW(register_number) \
((register_number) - ASM_RV32_REG_X8)

#define FIT_UNSIGNED(value, bits) (((value) & ~((1U << (bits)) - 1)) == 0)
#define FIT_SIGNED(value, bits) \
Expand Down Expand Up @@ -126,7 +100,6 @@ static void split_immediate(mp_int_t immediate, mp_uint_t *upper, mp_uint_t *low
// Turn the lower half from unsigned to signed.
if ((*lower & 0x800) != 0) {
*upper += 0x1000;
*lower -= 0x1000;
}
}

Expand Down Expand Up @@ -200,7 +173,7 @@ void asm_rv32_emit_optimised_load_immediate(asm_rv32_t *state, mp_uint_t rd, mp_

static void emit_registers_store(asm_rv32_t *state, mp_uint_t registers_mask) {
mp_uint_t offset = 0;
for (mp_uint_t register_index = 0; register_index < AVAILABLE_REGISTERS_COUNT; register_index++) {
for (mp_uint_t register_index = 0; register_index < RV32_AVAILABLE_REGISTERS_COUNT; register_index++) {
if (registers_mask & (1U << register_index)) {
assert(FIT_UNSIGNED(offset >> 2, 6) && "Registers save stack offset out of range.");
// c.swsp register, offset
Expand All @@ -212,7 +185,7 @@ static void emit_registers_store(asm_rv32_t *state, mp_uint_t registers_mask) {

static void emit_registers_load(asm_rv32_t *state, mp_uint_t registers_mask) {
mp_uint_t offset = 0;
for (mp_uint_t register_index = 0; register_index < AVAILABLE_REGISTERS_COUNT; register_index++) {
for (mp_uint_t register_index = 0; register_index < RV32_AVAILABLE_REGISTERS_COUNT; register_index++) {
if (registers_mask & (1U << register_index)) {
assert(FIT_UNSIGNED(offset >> 2, 6) && "Registers load stack offset out of range.");
// c.lwsp register, offset
Expand Down Expand Up @@ -249,7 +222,7 @@ static void adjust_stack(asm_rv32_t *state, mp_int_t stack_size) {
// stack to hold all the tainted registers and an arbitrary amount of space
// for locals.
static void emit_function_prologue(asm_rv32_t *state, mp_uint_t registers) {
mp_uint_t registers_count = MP_POPCOUNT(registers);
mp_uint_t registers_count = mp_popcount(registers);
state->stack_size = (registers_count + state->locals_count) * sizeof(uint32_t);
mp_uint_t old_saved_registers_mask = state->saved_registers_mask;
// Move stack pointer up.
Expand Down Expand Up @@ -282,7 +255,7 @@ static bool calculate_displacement_for_label(asm_rv32_t *state, mp_uint_t label,

void asm_rv32_entry(asm_rv32_t *state, mp_uint_t locals) {
state->saved_registers_mask |= (1U << REG_FUN_TABLE) | (1U << REG_LOCAL_1) | \
(1U << REG_LOCAL_2) | (1U << REG_LOCAL_3) | (1U << INTERNAL_TEMPORARY);
(1U << REG_LOCAL_2) | (1U << REG_LOCAL_3);
state->locals_count = locals;
emit_function_prologue(state, state->saved_registers_mask);
}
Expand All @@ -301,10 +274,11 @@ void asm_rv32_emit_call_ind(asm_rv32_t *state, mp_uint_t index) {
mp_uint_t offset = index * ASM_WORD_SIZE;
state->saved_registers_mask |= (1U << ASM_RV32_REG_RA);

if (IS_IN_C_REGISTER_WINDOW(REG_FUN_TABLE) && IS_IN_C_REGISTER_WINDOW(INTERNAL_TEMPORARY) && FIT_UNSIGNED(offset, 6)) {
if (RV32_IS_IN_C_REGISTER_WINDOW(REG_FUN_TABLE) && RV32_IS_IN_C_REGISTER_WINDOW(INTERNAL_TEMPORARY) && FIT_UNSIGNED(offset, 6)) {
state->saved_registers_mask |= (1U << INTERNAL_TEMPORARY);
// c.lw temporary, offset(fun_table)
// c.jalr temporary
asm_rv32_opcode_clw(state, MAP_IN_C_REGISTER_WINDOW(INTERNAL_TEMPORARY), MAP_IN_C_REGISTER_WINDOW(REG_FUN_TABLE), offset);
asm_rv32_opcode_clw(state, RV32_MAP_IN_C_REGISTER_WINDOW(INTERNAL_TEMPORARY), RV32_MAP_IN_C_REGISTER_WINDOW(REG_FUN_TABLE), offset);
asm_rv32_opcode_cjalr(state, INTERNAL_TEMPORARY);
return;
}
Expand Down Expand Up @@ -361,9 +335,9 @@ void asm_rv32_emit_jump_if_reg_nonzero(asm_rv32_t *state, mp_uint_t rs, mp_uint_
ptrdiff_t displacement = 0;
bool can_emit_short_jump = calculate_displacement_for_label(state, label, &displacement);

if (can_emit_short_jump && FIT_SIGNED(displacement, 8) && IS_IN_C_REGISTER_WINDOW(rs)) {
if (can_emit_short_jump && FIT_SIGNED(displacement, 8) && RV32_IS_IN_C_REGISTER_WINDOW(rs)) {
// c.bnez rs', displacement
asm_rv32_opcode_cbnez(state, MAP_IN_C_REGISTER_WINDOW(rs), displacement);
asm_rv32_opcode_cbnez(state, RV32_MAP_IN_C_REGISTER_WINDOW(rs), displacement);
return;
}

Expand All @@ -384,8 +358,8 @@ void asm_rv32_emit_jump_if_reg_nonzero(asm_rv32_t *state, mp_uint_t rs, mp_uint_
// jalr zero, temporary, LO(displacement) ; PC + 8
// ... ; PC + 12

if (can_emit_short_jump && IS_IN_C_REGISTER_WINDOW(rs)) {
asm_rv32_opcode_cbeqz(state, MAP_IN_C_REGISTER_WINDOW(rs), 10);
if (can_emit_short_jump && RV32_IS_IN_C_REGISTER_WINDOW(rs)) {
asm_rv32_opcode_cbeqz(state, RV32_MAP_IN_C_REGISTER_WINDOW(rs), 10);
// Compensate for the C.BEQZ opcode.
displacement -= ASM_HALFWORD_SIZE;
} else {
Expand Down Expand Up @@ -458,9 +432,9 @@ void asm_rv32_emit_mov_reg_local(asm_rv32_t *state, mp_uint_t rd, mp_uint_t loca
void asm_rv32_emit_mov_reg_local_addr(asm_rv32_t *state, mp_uint_t rd, mp_uint_t local) {
mp_uint_t offset = state->locals_stack_offset + (local * ASM_WORD_SIZE);

if (FIT_UNSIGNED(offset, 10) && offset != 0 && IS_IN_C_REGISTER_WINDOW(rd)) {
if (FIT_UNSIGNED(offset, 10) && offset != 0 && RV32_IS_IN_C_REGISTER_WINDOW(rd)) {
// c.addi4spn rd', offset
asm_rv32_opcode_caddi4spn(state, MAP_IN_C_REGISTER_WINDOW(rd), offset);
asm_rv32_opcode_caddi4spn(state, RV32_MAP_IN_C_REGISTER_WINDOW(rd), offset);
return;
}

Expand All @@ -479,9 +453,9 @@ void asm_rv32_emit_mov_reg_local_addr(asm_rv32_t *state, mp_uint_t rd, mp_uint_t
void asm_rv32_emit_load_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, mp_int_t offset) {
mp_int_t scaled_offset = offset * sizeof(ASM_WORD_SIZE);

if (scaled_offset >= 0 && IS_IN_C_REGISTER_WINDOW(rd) && IS_IN_C_REGISTER_WINDOW(rs) && FIT_UNSIGNED(scaled_offset, 6)) {
if (scaled_offset >= 0 && RV32_IS_IN_C_REGISTER_WINDOW(rd) && RV32_IS_IN_C_REGISTER_WINDOW(rs) && FIT_UNSIGNED(scaled_offset, 6)) {
// c.lw rd', offset(rs')
asm_rv32_opcode_clw(state, MAP_IN_C_REGISTER_WINDOW(rd), MAP_IN_C_REGISTER_WINDOW(rs), scaled_offset);
asm_rv32_opcode_clw(state, RV32_MAP_IN_C_REGISTER_WINDOW(rd), RV32_MAP_IN_C_REGISTER_WINDOW(rs), scaled_offset);
return;
}

Expand Down
Loading
Loading
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy