py/asmarm: Implement the full set of Viper load/store operations.

This commit expands the implementation of Viper load/store operations
that are optimised for the Arm platform.

Now both load and store emitters should generate the shortest possible
sequence in all cases.  Redundant specialised operation emitters have
been folded into the general case implementation - this was the case of
integer-indexed load/store operations with a fixed offset of zero.

Signed-off-by: Alessandro Gatti <a.gatti@frob.it>
This commit is contained in:
Alessandro Gatti
2025-06-10 19:24:10 +02:00
committed by Damien George
parent a8e0369826
commit cd1b921bf2
3 changed files with 46 additions and 34 deletions

View File

@@ -38,8 +38,6 @@
#define REG_TEMP ASM_ARM_REG_R8
#define SIGNED_FIT24(x) (((x) & 0xff800000) == 0) || (((x) & 0xff000000) == 0xff000000)
// Insert word into instruction flow
static void emit(asm_arm_t *as, uint op) {
uint8_t *c = mp_asm_base_get_cur_to_write_bytes(&as->base, 4);
@@ -347,11 +345,6 @@ void asm_arm_ldr_reg_reg_offset(asm_arm_t *as, uint rd, uint rn, uint byte_offse
}
}
void asm_arm_ldrh_reg_reg(asm_arm_t *as, uint rd, uint rn) {
// ldrh rd, [rn]
emit_al(as, 0x1d000b0 | (rn << 16) | (rd << 12));
}
void asm_arm_ldrh_reg_reg_reg(asm_arm_t *as, uint rd, uint rm, uint rn) {
// ldrh doesn't support scaled register index
emit_al(as, 0x1a00080 | (REG_TEMP << 12) | rn); // mov temp, rn, lsl #1
@@ -370,16 +363,23 @@ void asm_arm_ldrh_reg_reg_offset(asm_arm_t *as, uint rd, uint rn, uint byte_offs
}
}
void asm_arm_ldrb_reg_reg(asm_arm_t *as, uint rd, uint rn) {
// ldrb rd, [rn]
emit_al(as, 0x5d00000 | (rn << 16) | (rd << 12));
}
void asm_arm_ldrb_reg_reg_reg(asm_arm_t *as, uint rd, uint rm, uint rn) {
// ldrb rd, [rm, rn]
emit_al(as, 0x7d00000 | (rm << 16) | (rd << 12) | rn);
}
void asm_arm_ldrb_reg_reg_offset(asm_arm_t *as, uint rd, uint rn, uint byte_offset) {
if (byte_offset < 0x1000) {
// ldrb rd, [rn, #off]
emit_al(as, 0x5d00000 | (rn << 16) | (rd << 12) | byte_offset);
} else {
// mov temp, #off
// ldrb rd, [rn, temp]
asm_arm_mov_reg_i32_optimised(as, REG_TEMP, byte_offset);
emit_al(as, 0x7d00000 | (rn << 16) | (rd << 12) | REG_TEMP);
}
}
void asm_arm_ldr_reg_reg_reg(asm_arm_t *as, uint rd, uint rm, uint rn) {
// ldr rd, [rm, rn, lsl #2]
emit_al(as, 0x7900100 | (rm << 16) | (rd << 12) | rn);
@@ -397,14 +397,28 @@ void asm_arm_str_reg_reg_offset(asm_arm_t *as, uint rd, uint rm, uint byte_offse
}
}
void asm_arm_strh_reg_reg(asm_arm_t *as, uint rd, uint rm) {
// strh rd, [rm]
emit_al(as, 0x1c000b0 | (rm << 16) | (rd << 12));
void asm_arm_strh_reg_reg_offset(asm_arm_t *as, uint rd, uint rn, uint byte_offset) {
if (byte_offset < 0x100) {
// strh rd, [rn, #off]
emit_al(as, 0x1c000b0 | (rn << 16) | (rd << 12) | ((byte_offset & 0xf0) << 4) | (byte_offset & 0xf));
} else {
// mov temp, #off
// strh rd, [rn, temp]
asm_arm_mov_reg_i32_optimised(as, REG_TEMP, byte_offset);
emit_al(as, 0x18000b0 | (rn << 16) | (rd << 12) | REG_TEMP);
}
}
void asm_arm_strb_reg_reg(asm_arm_t *as, uint rd, uint rm) {
// strb rd, [rm]
emit_al(as, 0x5c00000 | (rm << 16) | (rd << 12));
void asm_arm_strb_reg_reg_offset(asm_arm_t *as, uint rd, uint rm, uint byte_offset) {
if (byte_offset < 0x1000) {
// strb rd, [rm, #off]
emit_al(as, 0x5c00000 | (rm << 16) | (rd << 12) | byte_offset);
} else {
// mov temp, #off
// strb rd, [rm, temp]
asm_arm_mov_reg_i32_optimised(as, REG_TEMP, byte_offset);
emit_al(as, 0x7c00000 | (rm << 16) | (rd << 12) | REG_TEMP);
}
}
void asm_arm_str_reg_reg_reg(asm_arm_t *as, uint rd, uint rm, uint rn) {
@@ -430,7 +444,7 @@ void asm_arm_bcc_label(asm_arm_t *as, int cond, uint label) {
rel -= 8; // account for instruction prefetch, PC is 8 bytes ahead of this instruction
rel >>= 2; // in ARM mode the branch target is 32-bit aligned, so the 2 LSB are omitted
if (SIGNED_FIT24(rel)) {
if (MP_FIT_SIGNED(24, rel)) {
emit(as, cond | 0xa000000 | (rel & 0xffffff));
} else {
printf("asm_arm_bcc: branch does not fit in 24 bits\n");

View File

@@ -110,12 +110,11 @@ void asm_arm_asr_reg_reg(asm_arm_t *as, uint rd, uint rs);
// memory
void asm_arm_ldr_reg_reg_offset(asm_arm_t *as, uint rd, uint rn, uint byte_offset);
void asm_arm_ldrh_reg_reg(asm_arm_t *as, uint rd, uint rn);
void asm_arm_ldrh_reg_reg_offset(asm_arm_t *as, uint rd, uint rn, uint byte_offset);
void asm_arm_ldrb_reg_reg(asm_arm_t *as, uint rd, uint rn);
void asm_arm_ldrb_reg_reg_offset(asm_arm_t *as, uint rd, uint rn, uint byte_offset);
void asm_arm_str_reg_reg_offset(asm_arm_t *as, uint rd, uint rm, uint byte_offset);
void asm_arm_strh_reg_reg(asm_arm_t *as, uint rd, uint rm);
void asm_arm_strb_reg_reg(asm_arm_t *as, uint rd, uint rm);
void asm_arm_strh_reg_reg_offset(asm_arm_t *as, uint rd, uint rm, uint byte_offset);
void asm_arm_strb_reg_reg_offset(asm_arm_t *as, uint rd, uint rm, uint byte_offset);
// load from array
void asm_arm_ldr_reg_reg_reg(asm_arm_t *as, uint rd, uint rm, uint rn);
@@ -209,16 +208,19 @@ void asm_arm_bx_reg(asm_arm_t *as, uint reg_src);
#define ASM_MUL_REG_REG(as, reg_dest, reg_src) asm_arm_mul_reg_reg_reg((as), (reg_dest), (reg_dest), (reg_src))
#define ASM_LOAD_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) ASM_LOAD32_REG_REG_OFFSET((as), (reg_dest), (reg_base), (word_offset))
#define ASM_LOAD8_REG_REG(as, reg_dest, reg_base) asm_arm_ldrb_reg_reg((as), (reg_dest), (reg_base))
#define ASM_LOAD16_REG_REG(as, reg_dest, reg_base) asm_arm_ldrh_reg_reg((as), (reg_dest), (reg_base))
#define ASM_LOAD16_REG_REG_OFFSET(as, reg_dest, reg_base, uint16_offset) asm_arm_ldrh_reg_reg_offset((as), (reg_dest), (reg_base), 2 * (uint16_offset))
#define ASM_LOAD32_REG_REG(as, reg_dest, reg_base) asm_arm_ldr_reg_reg_offset((as), (reg_dest), (reg_base), 0)
#define ASM_LOAD8_REG_REG(as, reg_dest, reg_base) ASM_LOAD8_REG_REG_OFFSET((as), (reg_dest), (reg_base), 0)
#define ASM_LOAD8_REG_REG_OFFSET(as, reg_dest, reg_base, byte_offset) asm_arm_ldrb_reg_reg_offset((as), (reg_dest), (reg_base), (byte_offset))
#define ASM_LOAD16_REG_REG(as, reg_dest, reg_base) ASM_LOAD16_REG_REG_OFFSET((as), (reg_dest), (reg_base), 0)
#define ASM_LOAD16_REG_REG_OFFSET(as, reg_dest, reg_base, halfword_offset) asm_arm_ldrh_reg_reg_offset((as), (reg_dest), (reg_base), 2 * (halfword_offset))
#define ASM_LOAD32_REG_REG(as, reg_dest, reg_base) ASM_LOAD32_REG_REG_OFFSET((as), (reg_dest), (reg_base), 0)
#define ASM_LOAD32_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) asm_arm_ldr_reg_reg_offset((as), (reg_dest), (reg_base), 4 * (word_offset))
#define ASM_STORE_REG_REG_OFFSET(as, reg_value, reg_base, word_offset) ASM_STORE32_REG_REG_OFFSET((as), (reg_value), (reg_base), (word_offset))
#define ASM_STORE8_REG_REG(as, reg_value, reg_base) asm_arm_strb_reg_reg((as), (reg_value), (reg_base))
#define ASM_STORE16_REG_REG(as, reg_value, reg_base) asm_arm_strh_reg_reg((as), (reg_value), (reg_base))
#define ASM_STORE32_REG_REG(as, reg_value, reg_base) asm_arm_str_reg_reg_offset((as), (reg_value), (reg_base), 0)
#define ASM_STORE8_REG_REG(as, reg_value, reg_base) ASM_STORE8_REG_REG_OFFSET((as), (reg_value), (reg_base), 0)
#define ASM_STORE8_REG_REG_OFFSET(as, reg_value, reg_base, byte_offset) asm_arm_strb_reg_reg_offset((as), (reg_value), (reg_base), (byte_offset))
#define ASM_STORE16_REG_REG(as, reg_value, reg_base) ASM_STORE16_REG_REG_OFFSET((as), (reg_value), (reg_base), 0)
#define ASM_STORE16_REG_REG_OFFSET(as, reg_value, reg_base, halfword_offset) asm_arm_strh_reg_reg_offset((as), (reg_value), (reg_base), 2 * (halfword_offset))
#define ASM_STORE32_REG_REG(as, reg_value, reg_base) ASM_STORE32_REG_REG_OFFSET((as), (reg_value), (reg_base), 0)
#define ASM_STORE32_REG_REG_OFFSET(as, reg_value, reg_base, word_offset) asm_arm_str_reg_reg_offset((as), (reg_value), (reg_base), 4 * (word_offset))
#define ASM_LOAD8_REG_REG_REG(as, reg_dest, reg_base, reg_index) asm_arm_ldrb_reg_reg_reg((as), (reg_dest), (reg_base), (reg_index))

View File

@@ -1785,10 +1785,6 @@ static void emit_native_store_subscr(emit_t *emit) {
if (index_value != 0) {
// index is non-zero
ASM_MOV_REG_IMM(emit->as, reg_index, index_value);
#if N_ARM
asm_arm_strb_reg_reg_reg(emit->as, reg_value, reg_base, reg_index);
break;
#endif
ASM_ADD_REG_REG(emit->as, reg_index, reg_base); // add index to base
reg_base = reg_index;
}