mirror of
https://github.com/RRZE-HPC/ibench.git
synced 2025-07-21 04:41:09 +02:00
91 lines
2.5 KiB
ArmAsm
91 lines
2.5 KiB
ArmAsm
#define INSTR add
|
|
#define NINST 8
|
|
#define N x0
|
|
|
|
.globl ninst
|
|
.data
|
|
ninst:
|
|
.long NINST
|
|
.text
|
|
.globl latency
|
|
.type latency, @function
|
|
.align 2
|
|
latency:
|
|
|
|
# push callee-save registers onto stack
|
|
sub sp, sp, #64
|
|
st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
|
sub sp, sp, #64
|
|
st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
|
sub sp, sp, #64
|
|
st1 {v16.2d, v17.2d, v18.2d, v19.2d}, [sp]
|
|
sub sp, sp, #64
|
|
st1 {v20.2d, v21.2d, v22.2d, v23.2d}, [sp]
|
|
sub sp, sp, #64
|
|
st1 {v24.2d, v25.2d, v26.2d, v27.2d}, [sp]
|
|
sub sp, sp, #64
|
|
st1 {v28.2d, v29.2d, v30.2d, v31.2d}, [sp]
|
|
stp x19, x20, [sp, -96]!
|
|
stp x21, x22, [sp, 16]
|
|
stp x23, x24, [sp, 32]
|
|
stp x25, x26, [sp, 48]
|
|
stp x27, x28, [sp, 64]
|
|
stp x29, x30, [sp, 80]
|
|
|
|
mov x4, N
|
|
|
|
fmov v0.2d, #1.00000000
|
|
fmov v1.2d, #1.00000000
|
|
fmov v2.2d, #1.00000000
|
|
mov x1, #1
|
|
mov x2, #1
|
|
mov x3, #1
|
|
loop:
|
|
INSTR x5, x1, x1
|
|
adds x6, x2, x2
|
|
adds x7, x3, x3
|
|
INSTR x8, x1, x1
|
|
adds x9, x2, x2
|
|
adds x10, x3, x3
|
|
INSTR x11, x1, x1
|
|
adds x12, x2, x2
|
|
adds x13, x3, x3
|
|
INSTR x14, x1, x1
|
|
adds x15, x2, x2
|
|
adds x16, x3, x3
|
|
INSTR x17, x1, x1
|
|
adds x18, x2, x2
|
|
adds x19, x3, x3
|
|
INSTR x20, x1, x1
|
|
adds x21, x2, x2
|
|
adds x22, x3, x3
|
|
INSTR x23, x1, x1
|
|
adds x24, x2, x2
|
|
adds x25, x3, x3
|
|
INSTR x26, x1, x1
|
|
adds x27, x2, x2
|
|
adds x28, x3, x3
|
|
|
|
subs x4, x4, #1
|
|
bne loop
|
|
done:
|
|
|
|
# pop callee-save registers from stack
|
|
ldp x19, x20, [sp]
|
|
ldp x21, x22, [sp, 16]
|
|
ldp x23, x24, [sp, 32]
|
|
ldp x25, x26, [sp, 48]
|
|
ldp x27, x28, [sp, 64]
|
|
ldp x29, x30, [sp, 80]
|
|
add sp, sp, #96
|
|
ld1 {v28.2d, v29.2d, v30.2d, v31.2d}, [sp], #64
|
|
ld1 {v24.2d, v25.2d, v26.2d, v27.2d}, [sp], #64
|
|
ld1 {v20.2d, v21.2d, v22.2d, v23.2d}, [sp], #64
|
|
ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [sp], #64
|
|
ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp], #64
|
|
ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp], #64
|
|
|
|
ret
|
|
|
|
.size latency, .-latency
|