py/asmrv32: Implement the full set of Viper load/store operations.

This commit expands the implementation of Viper load/store operations
that are optimised for the RV32 platform.

Given the way opcodes are encoded, all value sizes are implemented with
only two functions - one for loads and one for stores.  This should
reduce duplication with existing operations and should, in theory, save
space as some code is removed.  Both load and store emitters will
generate the shortest possible sequence (as long as the stack pointer is
not involved), using compressed opcodes when possible.

Signed-off-by: Alessandro Gatti <a.gatti@frob.it>
This commit is contained in:
Alessandro Gatti
2025-06-10 17:58:34 +02:00
committed by Damien George
parent 703d5acbad
commit a8e0369826
3 changed files with 58 additions and 74 deletions

View File

@@ -450,18 +450,24 @@ void asm_rv32_emit_mov_reg_local_addr(asm_rv32_t *state, mp_uint_t rd, mp_uint_t
asm_rv32_opcode_cadd(state, rd, ASM_RV32_REG_SP);
}
void asm_rv32_emit_load_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, mp_int_t offset) {
mp_int_t scaled_offset = offset * sizeof(ASM_WORD_SIZE);
static const uint8_t RV32_LOAD_OPCODE_TABLE[3] = {
0x04, 0x05, 0x02
};
if (scaled_offset >= 0 && RV32_IS_IN_C_REGISTER_WINDOW(rd) && RV32_IS_IN_C_REGISTER_WINDOW(rs) && FIT_UNSIGNED(scaled_offset, 6)) {
void asm_rv32_emit_load_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, int32_t offset, mp_uint_t operation_size) {
assert(operation_size <= 2 && "Operation size value out of range.");
int32_t scaled_offset = offset << operation_size;
if (scaled_offset >= 0 && operation_size == 2 && RV32_IS_IN_C_REGISTER_WINDOW(rd) && RV32_IS_IN_C_REGISTER_WINDOW(rs) && MP_FIT_UNSIGNED(6, scaled_offset)) {
// c.lw rd', offset(rs')
asm_rv32_opcode_clw(state, RV32_MAP_IN_C_REGISTER_WINDOW(rd), RV32_MAP_IN_C_REGISTER_WINDOW(rs), scaled_offset);
return;
}
if (FIT_SIGNED(scaled_offset, 12)) {
// lw rd, offset(rs)
asm_rv32_opcode_lw(state, rd, rs, scaled_offset);
if (MP_FIT_SIGNED(12, scaled_offset)) {
// lbu|lhu|lw rd, offset(rs)
asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_I(0x03, RV32_LOAD_OPCODE_TABLE[operation_size], rd, rs, scaled_offset));
return;
}
@@ -469,12 +475,12 @@ void asm_rv32_emit_load_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint_
mp_uint_t lower = 0;
split_immediate(scaled_offset, &upper, &lower);
// lui rd, HI(offset) ; Or c.lui if possible
// c.add rd, rs
// lw rd, LO(offset)(rd)
// lui rd, HI(offset) ; Or c.lui if possible
// c.add rd, rs
// lbu|lhu|lw rd, LO(offset)(rd)
load_upper_immediate(state, rd, upper);
asm_rv32_opcode_cadd(state, rd, rs);
asm_rv32_opcode_lw(state, rd, rd, lower);
asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_I(0x03, RV32_LOAD_OPCODE_TABLE[operation_size], rd, rd, lower));
}
void asm_rv32_emit_jump(asm_rv32_t *state, mp_uint_t label) {
@@ -497,12 +503,20 @@ void asm_rv32_emit_jump(asm_rv32_t *state, mp_uint_t label) {
asm_rv32_opcode_jalr(state, ASM_RV32_REG_ZERO, REG_TEMP2, lower);
}
void asm_rv32_emit_store_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, mp_int_t offset) {
mp_int_t scaled_offset = offset * ASM_WORD_SIZE;
void asm_rv32_emit_store_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, int32_t offset, mp_uint_t operation_size) {
assert(operation_size <= 2 && "Operation size value out of range.");
if (FIT_SIGNED(scaled_offset, 12)) {
// sw rd, offset(rs)
asm_rv32_opcode_sw(state, rd, rs, scaled_offset);
int32_t scaled_offset = offset << operation_size;
if (scaled_offset >= 0 && operation_size == 2 && RV32_IS_IN_C_REGISTER_WINDOW(rd) && RV32_IS_IN_C_REGISTER_WINDOW(rs) && MP_FIT_UNSIGNED(6, scaled_offset)) {
// c.sw rd', offset(rs')
asm_rv32_opcode_csw(state, RV32_MAP_IN_C_REGISTER_WINDOW(rd), RV32_MAP_IN_C_REGISTER_WINDOW(rs), scaled_offset);
return;
}
if (MP_FIT_SIGNED(12, scaled_offset)) {
// sb|sh|sw rd, offset(rs)
asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_S(0x23, operation_size, rs, rd, scaled_offset));
return;
}
@@ -510,12 +524,12 @@ void asm_rv32_emit_store_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint
mp_uint_t lower = 0;
split_immediate(scaled_offset, &upper, &lower);
// lui temporary, HI(offset) ; Or c.lui if possible
// c.add temporary, rs
// sw rd, LO(offset)(temporary)
// lui temporary, HI(offset) ; Or c.lui if possible
// c.add temporary, rs
// sb|sh|sw rd, LO(offset)(temporary)
load_upper_immediate(state, REG_TEMP2, upper);
asm_rv32_opcode_cadd(state, REG_TEMP2, rs);
asm_rv32_opcode_sw(state, rd, REG_TEMP2, lower);
asm_rv32_emit_word_opcode(state, RV32_ENCODE_TYPE_S(0x23, operation_size, REG_TEMP2, rd, lower));
}
void asm_rv32_emit_mov_reg_pcrel(asm_rv32_t *state, mp_uint_t rd, mp_uint_t label) {
@@ -530,27 +544,6 @@ void asm_rv32_emit_mov_reg_pcrel(asm_rv32_t *state, mp_uint_t rd, mp_uint_t labe
asm_rv32_opcode_addi(state, rd, rd, lower);
}
void asm_rv32_emit_load16_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, mp_int_t offset) {
mp_int_t scaled_offset = offset * sizeof(uint16_t);
if (FIT_SIGNED(scaled_offset, 12)) {
// lhu rd, offset(rs)
asm_rv32_opcode_lhu(state, rd, rs, scaled_offset);
return;
}
mp_uint_t upper = 0;
mp_uint_t lower = 0;
split_immediate(scaled_offset, &upper, &lower);
// lui rd, HI(offset) ; Or c.lui if possible
// c.add rd, rs
// lhu rd, LO(offset)(rd)
load_upper_immediate(state, rd, upper);
asm_rv32_opcode_cadd(state, rd, rs);
asm_rv32_opcode_lhu(state, rd, rd, lower);
}
void asm_rv32_emit_optimised_xor(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs) {
if (rs == rd) {
// c.li rd, 0

View File

@@ -709,14 +709,13 @@ void asm_rv32_emit_call_ind(asm_rv32_t *state, mp_uint_t index);
void asm_rv32_emit_jump(asm_rv32_t *state, mp_uint_t label);
void asm_rv32_emit_jump_if_reg_eq(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t label);
void asm_rv32_emit_jump_if_reg_nonzero(asm_rv32_t *state, mp_uint_t rs, mp_uint_t label);
void asm_rv32_emit_load16_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, mp_int_t offset);
void asm_rv32_emit_load_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, mp_int_t offset);
void asm_rv32_emit_load_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, int32_t offset, mp_uint_t operation_size);
void asm_rv32_emit_mov_local_reg(asm_rv32_t *state, mp_uint_t local, mp_uint_t rs);
void asm_rv32_emit_mov_reg_local_addr(asm_rv32_t *state, mp_uint_t rd, mp_uint_t local);
void asm_rv32_emit_mov_reg_local(asm_rv32_t *state, mp_uint_t rd, mp_uint_t local);
void asm_rv32_emit_mov_reg_pcrel(asm_rv32_t *state, mp_uint_t rd, mp_uint_t label);
void asm_rv32_emit_optimised_xor(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs);
void asm_rv32_emit_store_reg_reg_offset(asm_rv32_t *state, mp_uint_t source, mp_uint_t base, mp_int_t offset);
void asm_rv32_emit_store_reg_reg_offset(asm_rv32_t *state, mp_uint_t source, mp_uint_t base, int32_t offset, mp_uint_t operation_size);
#define ASM_T asm_rv32_t
#define ASM_ENTRY(state, labels) asm_rv32_entry(state, labels)
@@ -733,11 +732,12 @@ void asm_rv32_emit_store_reg_reg_offset(asm_rv32_t *state, mp_uint_t source, mp_
#define ASM_JUMP_IF_REG_ZERO(state, rs, label, bool_test) asm_rv32_emit_jump_if_reg_eq(state, rs, ASM_RV32_REG_ZERO, label)
#define ASM_JUMP_REG(state, rs) asm_rv32_opcode_cjr(state, rs)
#define ASM_LOAD_REG_REG_OFFSET(state, rd, rs, offset) ASM_LOAD32_REG_REG_OFFSET(state, rd, rs, offset)
#define ASM_LOAD16_REG_REG_OFFSET(state, rd, rs, offset) asm_rv32_emit_load16_reg_reg_offset(state, rd, rs, offset)
#define ASM_LOAD16_REG_REG(state, rd, rs) asm_rv32_opcode_lhu(state, rd, rs, 0)
#define ASM_LOAD8_REG_REG(state, rd, rs) ASM_LOAD8_REG_REG_OFFSET(state, rd, rs, 0)
#define ASM_LOAD16_REG_REG(state, rd, rs) ASM_LOAD16_REG_REG_OFFSET(state, rd, rs, 0)
#define ASM_LOAD32_REG_REG(state, rd, rs) ASM_LOAD32_REG_REG_OFFSET(state, rd, rs, 0)
#define ASM_LOAD32_REG_REG_OFFSET(state, rd, rs, offset) asm_rv32_emit_load_reg_reg_offset(state, rd, rs, offset)
#define ASM_LOAD8_REG_REG(state, rd, rs) asm_rv32_opcode_lbu(state, rd, rs, 0)
#define ASM_LOAD8_REG_REG_OFFSET(state, rd, rs, offset) asm_rv32_emit_load_reg_reg_offset(state, rd, rs, offset, 0)
#define ASM_LOAD16_REG_REG_OFFSET(state, rd, rs, offset) asm_rv32_emit_load_reg_reg_offset(state, rd, rs, offset, 1)
#define ASM_LOAD32_REG_REG_OFFSET(state, rd, rs, offset) asm_rv32_emit_load_reg_reg_offset(state, rd, rs, offset, 2)
#define ASM_LSL_REG_REG(state, rd, rs) asm_rv32_opcode_sll(state, rd, rd, rs)
#define ASM_LSR_REG_REG(state, rd, rs) asm_rv32_opcode_srl(state, rd, rd, rs)
#define ASM_MOV_LOCAL_REG(state, local, rs) asm_rv32_emit_mov_local_reg(state, local, rs)
@@ -751,13 +751,20 @@ void asm_rv32_emit_store_reg_reg_offset(asm_rv32_t *state, mp_uint_t source, mp_
#define ASM_NOT_REG(state, rd) asm_rv32_opcode_xori(state, rd, rd, -1)
#define ASM_OR_REG_REG(state, rd, rs) asm_rv32_opcode_or(state, rd, rd, rs)
#define ASM_STORE_REG_REG_OFFSET(state, rd, rs, offset) ASM_STORE32_REG_REG_OFFSET(state, rd, rs, offset)
#define ASM_STORE16_REG_REG(state, rs1, rs2) asm_rv32_opcode_sh(state, rs1, rs2, 0)
#define ASM_STORE8_REG_REG(state, rs1, rs2) ASM_STORE8_REG_REG_OFFSET(state, rs1, rs2, 0)
#define ASM_STORE16_REG_REG(state, rs1, rs2) ASM_STORE16_REG_REG_OFFSET(state, rs1, rs2, 0)
#define ASM_STORE32_REG_REG(state, rs1, rs2) ASM_STORE32_REG_REG_OFFSET(state, rs1, rs2, 0)
#define ASM_STORE32_REG_REG_OFFSET(state, rd, rs, offset) asm_rv32_emit_store_reg_reg_offset(state, rd, rs, offset)
#define ASM_STORE8_REG_REG(state, rs1, rs2) asm_rv32_opcode_sb(state, rs1, rs2, 0)
#define ASM_STORE8_REG_REG_OFFSET(state, rd, rs, offset) asm_rv32_emit_store_reg_reg_offset(state, rd, rs, offset, 0)
#define ASM_STORE16_REG_REG_OFFSET(state, rd, rs, offset) asm_rv32_emit_store_reg_reg_offset(state, rd, rs, offset, 1)
#define ASM_STORE32_REG_REG_OFFSET(state, rd, rs, offset) asm_rv32_emit_store_reg_reg_offset(state, rd, rs, offset, 2)
#define ASM_SUB_REG_REG(state, rd, rs) asm_rv32_opcode_sub(state, rd, rd, rs)
#define ASM_XOR_REG_REG(state, rd, rs) asm_rv32_emit_optimised_xor(state, rd, rs)
#define ASM_CLR_REG(state, rd)
#define ASM_LOAD8_REG_REG_REG(state, rd, rs1, rs2) \
do { \
asm_rv32_opcode_cadd(state, rs1, rs2); \
asm_rv32_opcode_lbu(state, rd, rs1, 0); \
} while (0)
#define ASM_LOAD16_REG_REG_REG(state, rd, rs1, rs2) \
do { \
asm_rv32_opcode_slli(state, rs2, rs2, 1); \
@@ -770,6 +777,11 @@ void asm_rv32_emit_store_reg_reg_offset(asm_rv32_t *state, mp_uint_t source, mp_
asm_rv32_opcode_cadd(state, rs1, rs2); \
asm_rv32_opcode_lw(state, rd, rs1, 0); \
} while (0)
#define ASM_STORE8_REG_REG_REG(state, rd, rs1, rs2) \
do { \
asm_rv32_opcode_cadd(state, rs1, rs2); \
asm_rv32_opcode_sb(state, rd, rs1, 0); \
} while (0)
#define ASM_STORE16_REG_REG_REG(state, rd, rs1, rs2) \
do { \
asm_rv32_opcode_slli(state, rs2, rs2, 1); \

View File

@@ -180,12 +180,6 @@ static const uint8_t reg_local_table[MAX_REGS_FOR_LOCAL_VARS] = {REG_LOCAL_1, RE
*emit->error_slot = mp_obj_new_exception_msg_varg(&mp_type_ViperTypeError, __VA_ARGS__); \
} while (0)
#if N_RV32
#define FIT_SIGNED(value, bits) \
((((value) & ~((1U << ((bits) - 1)) - 1)) == 0) || \
(((value) & ~((1U << ((bits) - 1)) - 1)) == ~((1U << ((bits) - 1)) - 1)))
#endif
typedef enum {
STACK_VALUE,
STACK_REG,
@@ -1540,12 +1534,7 @@ static void emit_native_load_subscr(emit_t *emit) {
#ifdef ASM_LOAD8_REG_REG_OFFSET
ASM_LOAD8_REG_REG_OFFSET(emit->as, REG_RET, reg_base, index_value);
#else
#if N_RV32
if (FIT_SIGNED(index_value, 12)) {
asm_rv32_opcode_lbu(emit->as, REG_RET, reg_base, index_value);
break;
}
#elif N_XTENSA || N_XTENSAWIN
#if N_XTENSA || N_XTENSAWIN
if (index_value >= 0 && index_value < 256) {
asm_xtensa_op_l8ui(emit->as, REG_RET, reg_base, index_value);
break;
@@ -1787,12 +1776,7 @@ static void emit_native_store_subscr(emit_t *emit) {
#ifdef ASM_STORE8_REG_REG_OFFSET
ASM_STORE8_REG_REG_OFFSET(emit->as, reg_value, reg_base, index_value);
#else
#if N_RV32
if (FIT_SIGNED(index_value, 12)) {
asm_rv32_opcode_sb(emit->as, reg_value, reg_base, index_value);
break;
}
#elif N_XTENSA || N_XTENSAWIN
#if N_XTENSA || N_XTENSAWIN
if (index_value >= 0 && index_value < 256) {
asm_xtensa_op_s8i(emit->as, reg_value, reg_base, index_value);
break;
@@ -1817,12 +1801,7 @@ static void emit_native_store_subscr(emit_t *emit) {
#ifdef ASM_STORE16_REG_REG_OFFSET
ASM_STORE16_REG_REG_OFFSET(emit->as, reg_value, reg_base, index_value);
#else
#if N_RV32
if (FIT_SIGNED(index_value, 11)) {
asm_rv32_opcode_sh(emit->as, reg_value, reg_base, index_value << 1);
break;
}
#elif N_XTENSA || N_XTENSAWIN
#if N_XTENSA || N_XTENSAWIN
if (index_value >= 0 && index_value < 256) {
asm_xtensa_op_s16i(emit->as, reg_value, reg_base, index_value);
break;