Files
micropython/py/asmrv32.c
Alessandro Gatti 0600e4f273 py/asmrv32: Make some code sequences smaller.
This commit changes a few code sequences to use more compressed opcodes
where possible.  The sequences in question are the ones that show up the
most in the test suite and require the least amount of code changes, namely
short offset loads from memory to RET/ARG registers, indirect calls through
the function table, register-based jumps, locals' offset calculation,
reg-is-null jumps, and register comparisons.

There are no speed losses or gains from these changes, but there is an
average 15-20% generated code size reduction.

Signed-off-by: Alessandro Gatti <a.gatti@frob.it>
2024-07-01 11:36:03 +10:00

637 lines
22 KiB
C

/*
* This file is part of the MicroPython project, https://micropython.org/
*
* The MIT License (MIT)
*
* Copyright (c) 2024 Alessandro Gatti
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "py/emit.h"
#include "py/mpconfig.h"
// wrapper around everything in this file
#if MICROPY_EMIT_RV32
#include "py/asmrv32.h"
#if MICROPY_DEBUG_VERBOSE
#define DEBUG_PRINT (1)
#define DEBUG_printf DEBUG_printf
#else
#define DEBUG_printf(...) (void)0
#endif
#ifndef MP_POPCOUNT
#ifdef _MSC_VER
#include <intrin.h>
#define MP_POPCOUNT __popcnt
#else
#if defined __has_builtin
#if __has_builtin(__builtin_popcount)
#define MP_POPCOUNT __builtin_popcount
#endif
#else
static uint32_t fallback_popcount(uint32_t value) {
value = value - ((value >> 1) & 0x55555555);
value = (value & 0x33333333) + ((value >> 2) & 0x33333333);
value = (value + (value >> 4)) & 0x0F0F0F0F;
return value * 0x01010101;
}
#define MP_POPCOUNT fallback_popcount
#endif
#endif
#endif
#define INTERNAL_TEMPORARY ASM_RV32_REG_S0
#define AVAILABLE_REGISTERS_COUNT 32
#define IS_IN_C_REGISTER_WINDOW(register_number) \
(((register_number) >= ASM_RV32_REG_X8) && ((register_number) <= ASM_RV32_REG_X15))
#define MAP_IN_C_REGISTER_WINDOW(register_number) \
((register_number) - ASM_RV32_REG_X8)
#define FIT_UNSIGNED(value, bits) (((value) & ~((1U << (bits)) - 1)) == 0)
#define FIT_SIGNED(value, bits) \
((((value) & ~((1U << ((bits) - 1)) - 1)) == 0) || \
(((value) & ~((1U << ((bits) - 1)) - 1)) == ~((1U << ((bits) - 1)) - 1)))
///////////////////////////////////////////////////////////////////////////////
void asm_rv32_emit_word_opcode(asm_rv32_t *state, mp_uint_t word) {
uint8_t *cursor = mp_asm_base_get_cur_to_write_bytes(&state->base, sizeof(uint32_t));
if (cursor == NULL) {
return;
}
#if MP_ENDIANNESS_LITTLE
cursor[0] = word & 0xFF;
cursor[1] = (word >> 8) & 0xFF;
cursor[2] = (word >> 16) & 0xFF;
cursor[3] = (word >> 24) & 0xFF;
#else
cursor[0] = (word >> 24) & 0xFF;
cursor[1] = (word >> 16) & 0xFF;
cursor[2] = (word >> 8) & 0xFF;
cursor[3] = word & 0xFF;
#endif
}
void asm_rv32_emit_halfword_opcode(asm_rv32_t *state, mp_uint_t word) {
uint8_t *cursor = mp_asm_base_get_cur_to_write_bytes(&state->base, sizeof(uint16_t));
if (cursor == NULL) {
return;
}
#if MP_ENDIANNESS_LITTLE
cursor[0] = word & 0xFF;
cursor[1] = (word >> 8) & 0xFF;
#else
cursor[0] = (word >> 8) & 0xFF;
cursor[1] = word & 0xFF;
#endif
}
///////////////////////////////////////////////////////////////////////////////
static void split_immediate(mp_int_t immediate, mp_uint_t *upper, mp_uint_t *lower) {
assert(upper != NULL && "Upper pointer is NULL.");
assert(lower != NULL && "Lower pointer is NULL.");
mp_uint_t unsigned_immediate = *((mp_uint_t *)&immediate);
*upper = unsigned_immediate & 0xFFFFF000;
*lower = unsigned_immediate & 0x00000FFF;
// Turn the lower half from unsigned to signed.
if ((*lower & 0x800) != 0) {
*upper += 0x1000;
*lower -= 0x1000;
}
}
static void load_upper_immediate(asm_rv32_t *state, mp_uint_t rd, mp_uint_t immediate) {
// if immediate fits in 17 bits and is ≠ 0:
// c.lui rd, HI(immediate)
// else:
// lui rd, HI(immediate)
if (FIT_SIGNED(immediate, 17) && ((immediate >> 12) != 0)) {
asm_rv32_opcode_clui(state, rd, immediate);
} else {
asm_rv32_opcode_lui(state, rd, immediate);
}
}
static void load_lower_immediate(asm_rv32_t *state, mp_uint_t rd, mp_uint_t immediate) {
// WARNING: This must be executed on a register that has either been
// previously cleared or was the target of a LUI/C.LUI or
// AUIPC opcode.
if (immediate == 0) {
return;
}
// if LO(immediate) fits in 6 bits:
// c.addi rd, LO(immediate)
// else:
// addi rd, rd, LO(immediate)
if (FIT_SIGNED(immediate, 6)) {
asm_rv32_opcode_caddi(state, rd, immediate);
} else {
asm_rv32_opcode_addi(state, rd, rd, immediate);
}
}
static void load_full_immediate(asm_rv32_t *state, mp_uint_t rd, mp_int_t immediate) {
mp_uint_t upper = 0;
mp_uint_t lower = 0;
split_immediate(immediate, &upper, &lower);
// if immediate fits in 17 bits:
// c.lui rd, HI(immediate)
// else:
// lui rd, HI(immediate)
// if LO(immediate) fits in 6 bits && LO(immediate) != 0:
// c.addi rd, LO(immediate)
// else:
// addi rd, rd, LO(immediate)
load_upper_immediate(state, rd, upper);
load_lower_immediate(state, rd, lower);
}
void asm_rv32_emit_optimised_load_immediate(asm_rv32_t *state, mp_uint_t rd, mp_int_t immediate) {
if (FIT_SIGNED(immediate, 6)) {
// c.li rd, immediate
asm_rv32_opcode_cli(state, rd, immediate);
return;
}
if (FIT_SIGNED(immediate, 12)) {
// addi rd, zero, immediate
asm_rv32_opcode_addi(state, rd, ASM_RV32_REG_ZERO, immediate);
return;
}
load_full_immediate(state, rd, immediate);
}
// RV32 does not have dedicated push/pop opcodes, so series of loads and
// stores are generated in their place.
static void emit_registers_store(asm_rv32_t *state, mp_uint_t registers_mask) {
mp_uint_t offset = 0;
for (mp_uint_t register_index = 0; register_index < AVAILABLE_REGISTERS_COUNT; register_index++) {
if (registers_mask & (1U << register_index)) {
assert(FIT_UNSIGNED(offset >> 2, 6) && "Registers save stack offset out of range.");
// c.swsp register, offset
asm_rv32_opcode_cswsp(state, register_index, offset);
offset += sizeof(uint32_t);
}
}
}
static void emit_registers_load(asm_rv32_t *state, mp_uint_t registers_mask) {
mp_uint_t offset = 0;
for (mp_uint_t register_index = 0; register_index < AVAILABLE_REGISTERS_COUNT; register_index++) {
if (registers_mask & (1U << register_index)) {
assert(FIT_UNSIGNED(offset >> 2, 6) && "Registers load stack offset out of range.");
// c.lwsp register, offset
asm_rv32_opcode_clwsp(state, register_index, offset);
offset += sizeof(uint32_t);
}
}
}
static void adjust_stack(asm_rv32_t *state, mp_int_t stack_size) {
if (stack_size == 0) {
return;
}
if (FIT_SIGNED(stack_size, 6)) {
// c.addi sp, stack_size
asm_rv32_opcode_caddi(state, ASM_RV32_REG_SP, stack_size);
return;
}
if (FIT_SIGNED(stack_size, 12)) {
// addi sp, sp, stack_size
asm_rv32_opcode_addi(state, ASM_RV32_REG_SP, ASM_RV32_REG_SP, stack_size);
return;
}
// li temporary, stack_size
// c.add sp, temporary
load_full_immediate(state, REG_TEMP0, stack_size);
asm_rv32_opcode_cadd(state, ASM_RV32_REG_SP, REG_TEMP0);
}
// Generate a generic function entry prologue code sequence, setting up the
// stack to hold all the tainted registers and an arbitrary amount of space
// for locals.
static void emit_function_prologue(asm_rv32_t *state, mp_uint_t registers) {
mp_uint_t registers_count = MP_POPCOUNT(registers);
state->stack_size = (registers_count + state->locals_count) * sizeof(uint32_t);
mp_uint_t old_saved_registers_mask = state->saved_registers_mask;
// Move stack pointer up.
adjust_stack(state, -state->stack_size);
// Store registers at the top of the saved stack area.
emit_registers_store(state, registers);
state->locals_stack_offset = registers_count * sizeof(uint32_t);
state->saved_registers_mask = old_saved_registers_mask;
}
// Restore registers and reset the stack pointer to its initial value.
static void emit_function_epilogue(asm_rv32_t *state, mp_uint_t registers) {
mp_uint_t old_saved_registers_mask = state->saved_registers_mask;
// Restore registers from the top of the stack area.
emit_registers_load(state, registers);
// Move stack pointer down.
adjust_stack(state, state->stack_size);
state->saved_registers_mask = old_saved_registers_mask;
}
///////////////////////////////////////////////////////////////////////////////
void asm_rv32_entry(asm_rv32_t *state, mp_uint_t locals) {
state->saved_registers_mask |= (1U << REG_FUN_TABLE) | (1U << REG_LOCAL_1) | \
(1U << REG_LOCAL_2) | (1U << REG_LOCAL_3) | (1U << INTERNAL_TEMPORARY);
state->locals_count = locals;
emit_function_prologue(state, state->saved_registers_mask);
}
void asm_rv32_exit(asm_rv32_t *state) {
emit_function_epilogue(state, state->saved_registers_mask);
// c.jr ra
asm_rv32_opcode_cjr(state, ASM_RV32_REG_RA);
}
void asm_rv32_end_pass(asm_rv32_t *state) {
(void)state;
}
void asm_rv32_emit_call_ind(asm_rv32_t *state, mp_uint_t index) {
mp_uint_t offset = index * ASM_WORD_SIZE;
state->saved_registers_mask |= (1U << ASM_RV32_REG_RA);
if (IS_IN_C_REGISTER_WINDOW(REG_FUN_TABLE) && IS_IN_C_REGISTER_WINDOW(INTERNAL_TEMPORARY) && FIT_SIGNED(offset, 7)) {
// c.lw temporary, offset(fun_table)
// c.jalr temporary
asm_rv32_opcode_clw(state, MAP_IN_C_REGISTER_WINDOW(INTERNAL_TEMPORARY), MAP_IN_C_REGISTER_WINDOW(REG_FUN_TABLE), offset);
asm_rv32_opcode_cjalr(state, INTERNAL_TEMPORARY);
return;
}
if (FIT_UNSIGNED(offset, 11)) {
// lw temporary, offset(fun_table)
// c.jalr temporary
asm_rv32_opcode_lw(state, INTERNAL_TEMPORARY, REG_FUN_TABLE, offset);
asm_rv32_opcode_cjalr(state, INTERNAL_TEMPORARY);
return;
}
mp_uint_t upper = 0;
mp_uint_t lower = 0;
split_immediate(offset, &upper, &lower);
// TODO: Can this clobber REG_TEMP[0:2]?
// lui temporary, HI(index) ; Or c.lui if possible
// c.add temporary, fun_table
// lw temporary, LO(index)(temporary)
// c.jalr temporary
load_upper_immediate(state, INTERNAL_TEMPORARY, upper);
asm_rv32_opcode_cadd(state, INTERNAL_TEMPORARY, REG_FUN_TABLE);
asm_rv32_opcode_lw(state, INTERNAL_TEMPORARY, INTERNAL_TEMPORARY, lower);
asm_rv32_opcode_cjalr(state, INTERNAL_TEMPORARY);
}
void asm_rv32_emit_jump_if_reg_eq(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t label) {
ptrdiff_t displacement = (ptrdiff_t)(state->base.label_offsets[label] - state->base.code_offset);
// The least significant bit is ignored anyway.
if (FIT_SIGNED(displacement, 13)) {
// beq rs1, rs2, displacement
asm_rv32_opcode_beq(state, rs1, rs2, displacement);
return;
}
// Compensate for the initial BNE opcode.
displacement -= ASM_WORD_SIZE;
mp_uint_t upper = 0;
mp_uint_t lower = 0;
split_immediate(displacement, &upper, &lower);
// TODO: Can this clobber REG_TEMP[0:2]?
// bne rs1, rs2, 12 ; PC + 0
// auipc temporary, HI(displacement) ; PC + 4
// jalr zero, temporary, LO(displacement) ; PC + 8
// ... ; PC + 12
asm_rv32_opcode_bne(state, rs1, rs2, 12);
asm_rv32_opcode_auipc(state, INTERNAL_TEMPORARY, upper);
asm_rv32_opcode_jalr(state, ASM_RV32_REG_ZERO, INTERNAL_TEMPORARY, lower);
}
void asm_rv32_emit_jump_if_reg_nonzero(asm_rv32_t *state, mp_uint_t rs, mp_uint_t label) {
ptrdiff_t displacement = (ptrdiff_t)(state->base.label_offsets[label] - state->base.code_offset);
if (FIT_SIGNED(displacement, 9) && IS_IN_C_REGISTER_WINDOW(rs)) {
// c.bnez rs', displacement
asm_rv32_opcode_cbnez(state, MAP_IN_C_REGISTER_WINDOW(rs), displacement);
return;
}
// The least significant bit is ignored anyway.
if (FIT_SIGNED(displacement, 13)) {
// bne rs, zero, displacement
asm_rv32_opcode_bne(state, rs, ASM_RV32_REG_ZERO, displacement);
return;
}
// Compensate for the initial C.BEQZ/BEQ opcode.
displacement -= IS_IN_C_REGISTER_WINDOW(rs) ? ASM_HALFWORD_SIZE : ASM_WORD_SIZE;
mp_uint_t upper = 0;
mp_uint_t lower = 0;
split_immediate(displacement, &upper, &lower);
// TODO: Can this clobber REG_TEMP[0:2]?
// if rs1 in C window (the offset always fits):
// c.beqz rs', 10 ; PC + 0
// auipc temporary, HI(displacement) ; PC + 2
// jalr zero, temporary, LO(displacement) ; PC + 6
// ... ; PC + 10
// else:
// beq rs, zero, 12 ; PC + 0
// auipc temporary, HI(displacement) ; PC + 4
// jalr zero, temporary, LO(displacement) ; PC + 8
// ... ; PC + 12
if (IS_IN_C_REGISTER_WINDOW(rs)) {
asm_rv32_opcode_cbeqz(state, MAP_IN_C_REGISTER_WINDOW(rs), 10);
} else {
asm_rv32_opcode_beq(state, rs, ASM_RV32_REG_ZERO, 12);
}
asm_rv32_opcode_auipc(state, INTERNAL_TEMPORARY, upper);
asm_rv32_opcode_jalr(state, ASM_RV32_REG_ZERO, INTERNAL_TEMPORARY, lower);
}
void asm_rv32_emit_mov_local_reg(asm_rv32_t *state, mp_uint_t local, mp_uint_t rs) {
mp_uint_t offset = state->locals_stack_offset + (local * ASM_WORD_SIZE);
if (FIT_UNSIGNED(offset >> 2, 6)) {
// c.swsp rs, offset
asm_rv32_opcode_cswsp(state, rs, offset);
return;
}
if (FIT_UNSIGNED(offset, 11)) {
// sw rs, offset(sp)
asm_rv32_opcode_sw(state, rs, ASM_RV32_REG_SP, offset);
return;
}
mp_uint_t upper = 0;
mp_uint_t lower = 0;
split_immediate(offset, &upper, &lower);
// TODO: Can this clobber REG_TEMP[0:2]?
// lui temporary, HI(offset) ; Or c.lui if possible
// c.add temporary, sp
// sw rs, LO(offset)(temporary)
load_upper_immediate(state, INTERNAL_TEMPORARY, upper);
asm_rv32_opcode_cadd(state, INTERNAL_TEMPORARY, ASM_RV32_REG_SP);
asm_rv32_opcode_sw(state, rs, INTERNAL_TEMPORARY, lower);
}
void asm_rv32_emit_mov_reg_local(asm_rv32_t *state, mp_uint_t rd, mp_uint_t local) {
mp_uint_t offset = state->locals_stack_offset + (local * ASM_WORD_SIZE);
if (FIT_UNSIGNED(offset >> 2, 6)) {
// c.lwsp rd, offset
asm_rv32_opcode_clwsp(state, rd, offset);
return;
}
if (FIT_UNSIGNED(offset, 11)) {
// lw rd, offset(sp)
asm_rv32_opcode_lw(state, rd, ASM_RV32_REG_SP, offset);
return;
}
mp_uint_t upper = 0;
mp_uint_t lower = 0;
split_immediate(offset, &upper, &lower);
// lui rd, HI(offset) ; Or c.lui if possible
// c.add rd, sp
// lw rd, LO(offset)(rd)
load_upper_immediate(state, rd, upper);
asm_rv32_opcode_cadd(state, rd, ASM_RV32_REG_SP);
asm_rv32_opcode_lw(state, rd, rd, lower);
}
void asm_rv32_emit_mov_reg_local_addr(asm_rv32_t *state, mp_uint_t rd, mp_uint_t local) {
mp_uint_t offset = state->locals_stack_offset + (local * ASM_WORD_SIZE);
if (FIT_UNSIGNED(offset, 10) && offset != 0 && IS_IN_C_REGISTER_WINDOW(rd)) {
// c.addi4spn rd', offset
asm_rv32_opcode_caddi4spn(state, MAP_IN_C_REGISTER_WINDOW(rd), offset);
return;
}
if (FIT_UNSIGNED(offset, 11)) {
// addi rd, sp, offset
asm_rv32_opcode_addi(state, rd, ASM_RV32_REG_SP, offset);
return;
}
// li rd, offset
// c.add rd, sp
load_full_immediate(state, rd, offset);
asm_rv32_opcode_cadd(state, rd, ASM_RV32_REG_SP);
}
void asm_rv32_emit_load_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, mp_int_t offset) {
mp_int_t scaled_offset = offset * sizeof(ASM_WORD_SIZE);
if (IS_IN_C_REGISTER_WINDOW(rd) && IS_IN_C_REGISTER_WINDOW(rs) && FIT_SIGNED(offset, 7)) {
// c.lw rd', offset(rs')
asm_rv32_opcode_clw(state, MAP_IN_C_REGISTER_WINDOW(rd), MAP_IN_C_REGISTER_WINDOW(rs), scaled_offset);
return;
}
if (FIT_SIGNED(scaled_offset, 12)) {
// lw rd, offset(rs)
asm_rv32_opcode_lw(state, rd, rs, scaled_offset);
return;
}
mp_uint_t upper = 0;
mp_uint_t lower = 0;
split_immediate(scaled_offset, &upper, &lower);
// lui rd, HI(offset) ; Or c.lui if possible
// c.add rd, rs
// lw rd, LO(offset)(rd)
load_upper_immediate(state, rd, upper);
asm_rv32_opcode_cadd(state, rd, rs);
asm_rv32_opcode_lw(state, rd, rd, lower);
}
void asm_rv32_emit_jump(asm_rv32_t *state, mp_uint_t label) {
ptrdiff_t displacement = (ptrdiff_t)(state->base.label_offsets[label] - state->base.code_offset);
// The least significant bit is ignored anyway.
if (FIT_SIGNED(displacement, 13)) {
// c.j displacement
asm_rv32_opcode_cj(state, displacement);
return;
}
mp_uint_t upper = 0;
mp_uint_t lower = 0;
split_immediate(displacement, &upper, &lower);
// TODO: Can this clobber REG_TEMP[0:2]?
// auipc temporary, HI(displacement)
// jalr zero, temporary, LO(displacement)
asm_rv32_opcode_auipc(state, INTERNAL_TEMPORARY, upper);
asm_rv32_opcode_jalr(state, ASM_RV32_REG_ZERO, INTERNAL_TEMPORARY, lower);
}
void asm_rv32_emit_store_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, mp_int_t offset) {
mp_int_t scaled_offset = offset * ASM_WORD_SIZE;
if (FIT_SIGNED(scaled_offset, 12)) {
// sw rd, offset(rs)
asm_rv32_opcode_sw(state, rd, rs, scaled_offset);
return;
}
mp_uint_t upper = 0;
mp_uint_t lower = 0;
split_immediate(scaled_offset, &upper, &lower);
// lui rd, HI(offset) ; Or c.lui if possible
// c.add rd, rs
// sw rd, LO(offset)(rd)
load_upper_immediate(state, rd, upper);
asm_rv32_opcode_cadd(state, rd, rs);
asm_rv32_opcode_sw(state, rd, rd, lower);
}
void asm_rv32_emit_mov_reg_pcrel(asm_rv32_t *state, mp_uint_t rd, mp_uint_t label) {
ptrdiff_t displacement = (ptrdiff_t)(state->base.label_offsets[label] - state->base.code_offset);
mp_uint_t upper = 0;
mp_uint_t lower = 0;
split_immediate(displacement, &upper, &lower);
// Compressed instructions are not used even if they may allow for code size
// savings as the code changes size between compute and emit passes
// otherwise. If that happens then the assertion at asmbase.c:93 triggers
// when built in debug mode.
// auipc rd, HI(relative)
// addi rd, rd, LO(relative)
asm_rv32_opcode_auipc(state, rd, upper);
asm_rv32_opcode_addi(state, rd, rd, lower);
}
void asm_rv32_emit_load16_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, mp_int_t offset) {
mp_int_t scaled_offset = offset * sizeof(uint16_t);
if (FIT_SIGNED(scaled_offset, 12)) {
// lhu rd, offset(rs)
asm_rv32_opcode_lhu(state, rd, rs, scaled_offset);
return;
}
mp_uint_t upper = 0;
mp_uint_t lower = 0;
split_immediate(scaled_offset, &upper, &lower);
// lui rd, HI(offset) ; Or c.lui if possible
// c.add rd, rs
// lhu rd, LO(offset)(rd)
load_upper_immediate(state, rd, upper);
asm_rv32_opcode_cadd(state, rd, rs);
asm_rv32_opcode_lhu(state, rd, rd, lower);
}
void asm_rv32_emit_optimised_xor(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs) {
if (rs == rd) {
// c.li rd, 0
asm_rv32_opcode_cli(state, rd, 0);
return;
}
// xor rd, rd, rs
asm_rv32_opcode_xor(state, rd, rd, rs);
}
void asm_rv32_meta_comparison_eq(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t rd) {
// c.li rd, 1 ;
// beq rs1, rs2, 6 ; PC + 0
// c.li rd, 0 ; PC + 4
// ... ; PC + 6
asm_rv32_opcode_cli(state, rd, 1);
asm_rv32_opcode_beq(state, rs1, rs2, 6);
asm_rv32_opcode_cli(state, rd, 0);
}
void asm_rv32_meta_comparison_ne(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t rd) {
// sub rd, rs1, rs2
// sltu rd, zero, rd
asm_rv32_opcode_sub(state, rd, rs1, rs2);
asm_rv32_opcode_sltu(state, rd, ASM_RV32_REG_ZERO, rd);
}
void asm_rv32_meta_comparison_lt(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t rd, bool unsigned_comparison) {
// slt(u) rd, rs1, rs2
if (unsigned_comparison) {
asm_rv32_opcode_sltu(state, rd, rs1, rs2);
} else {
asm_rv32_opcode_slt(state, rd, rs1, rs2);
}
}
void asm_rv32_meta_comparison_le(asm_rv32_t *state, mp_uint_t rs1, mp_uint_t rs2, mp_uint_t rd, bool unsigned_comparison) {
// c.li rd, 1 ;
// beq rs1, rs2, 8 ; PC + 0
// slt(u) rd, rs1, rs2 ; PC + 4
// ... ; PC + 8
asm_rv32_opcode_cli(state, rd, 1);
asm_rv32_opcode_beq(state, rs1, rs2, 8);
if (unsigned_comparison) {
asm_rv32_opcode_sltu(state, rd, rs1, rs2);
} else {
asm_rv32_opcode_slt(state, rd, rs1, rs2);
}
}
#endif // MICROPY_EMIT_RV32