mirror of
https://github.com/RRZE-HPC/ibench.git
synced 2025-07-21 12:51:10 +02:00
more instrs
This commit is contained in:
108
src/NEON/str-q_mb-TP.S
Normal file
108
src/NEON/str-q_mb-TP.S
Normal file
@@ -0,0 +1,108 @@
|
||||
#define INSTR str
|
||||
#define NINST 64
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
sub sp, sp, #64
|
||||
st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
sub sp, sp, #64
|
||||
st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
|
||||
mov x4, N
|
||||
|
||||
fmov v0.2d, #1.00000000
|
||||
fmov v1.2d, #1.00000000
|
||||
fmov v2.2d, #1.00000000
|
||||
mov x24, #-64
|
||||
mov x25, #-128
|
||||
mov x28, #-192
|
||||
mov x27, #-256
|
||||
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR q2, [sp]
|
||||
INSTR q6, [sp]
|
||||
INSTR q8, [sp]
|
||||
INSTR q10, [sp]
|
||||
INSTR q12, [sp]
|
||||
INSTR q14, [sp]
|
||||
INSTR q16, [sp]
|
||||
INSTR q18, [sp]
|
||||
INSTR q2, [sp]
|
||||
INSTR q6, [sp]
|
||||
INSTR q8, [sp]
|
||||
INSTR q10, [sp]
|
||||
INSTR q12, [sp]
|
||||
INSTR q14, [sp]
|
||||
INSTR q16, [sp]
|
||||
INSTR q18, [sp]
|
||||
INSTR q2, [sp]
|
||||
INSTR q6, [sp]
|
||||
INSTR q8, [sp]
|
||||
INSTR q10, [sp]
|
||||
INSTR q12, [sp]
|
||||
INSTR q14, [sp]
|
||||
INSTR q16, [sp]
|
||||
INSTR q18, [sp]
|
||||
INSTR q2, [sp]
|
||||
INSTR q6, [sp]
|
||||
INSTR q8, [sp]
|
||||
INSTR q10, [sp]
|
||||
INSTR q12, [sp]
|
||||
INSTR q14, [sp]
|
||||
INSTR q16, [sp]
|
||||
INSTR q18, [sp]
|
||||
INSTR q2, [sp]
|
||||
INSTR q6, [sp]
|
||||
INSTR q8, [sp]
|
||||
INSTR q10, [sp]
|
||||
INSTR q12, [sp]
|
||||
INSTR q14, [sp]
|
||||
INSTR q16, [sp]
|
||||
INSTR q18, [sp]
|
||||
INSTR q2, [sp]
|
||||
INSTR q6, [sp]
|
||||
INSTR q8, [sp]
|
||||
INSTR q10, [sp]
|
||||
INSTR q12, [sp]
|
||||
INSTR q14, [sp]
|
||||
INSTR q16, [sp]
|
||||
INSTR q18, [sp]
|
||||
INSTR q2, [sp]
|
||||
INSTR q6, [sp]
|
||||
INSTR q8, [sp]
|
||||
INSTR q10, [sp]
|
||||
INSTR q12, [sp]
|
||||
INSTR q14, [sp]
|
||||
INSTR q16, [sp]
|
||||
INSTR q18, [sp]
|
||||
INSTR q2, [sp]
|
||||
INSTR q6, [sp]
|
||||
INSTR q8, [sp]
|
||||
INSTR q10, [sp]
|
||||
INSTR q12, [sp]
|
||||
INSTR q14, [sp]
|
||||
INSTR q16, [sp]
|
||||
INSTR q18, [sp]
|
||||
bne loop
|
||||
done:
|
||||
|
||||
# pop callee-save registers from stack
|
||||
ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
81
src/NEON/str-q_mbi-il_1_1_1-mul-x_x_x-add-x_x_x-TP.S
Normal file
81
src/NEON/str-q_mbi-il_1_1_1-mul-x_x_x-add-x_x_x-TP.S
Normal file
@@ -0,0 +1,81 @@
|
||||
#define INSTR str
|
||||
#define NINST 6
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
sub sp, sp, #64
|
||||
st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
sub sp, sp, #64
|
||||
st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
|
||||
mov x4, N
|
||||
|
||||
fmov v0.2d, #1.00000000
|
||||
fmov v1.2d, #1.00000000
|
||||
fmov v2.2d, #1.00000000
|
||||
fmov v3.2d, #1.00000000
|
||||
fmov v4.2d, #1.00000000
|
||||
fmov v5.2d, #1.00000000
|
||||
fmov v6.2d, #1.00000000
|
||||
fmov v7.2d, #1.00000000
|
||||
mov x9, #-64
|
||||
mov x10, #-128
|
||||
mov x11, #-192
|
||||
mov x12, #-256
|
||||
mov x13, #-320
|
||||
mov x14, #-384
|
||||
mov x0, #1
|
||||
mov x1, #1
|
||||
mov x2, #1
|
||||
mov x3, #1
|
||||
mov x5, #1
|
||||
mov x6, #1
|
||||
mov x7, #1
|
||||
mov x8, #1
|
||||
mov x15, #1
|
||||
mov x16, #1
|
||||
mov x17, #1
|
||||
mov x18, #1
|
||||
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR q0, [sp, x9]
|
||||
mul x0, x0, x0
|
||||
add x1, x1, x1
|
||||
INSTR q1, [sp, x10]
|
||||
mul x2, x2, x2
|
||||
add x3, x3, x3
|
||||
INSTR q2, [sp, x11]
|
||||
mul x5, x5, x5
|
||||
add x6, x6, x6
|
||||
INSTR q3, [sp, x12]
|
||||
mul x7, x7, x7
|
||||
add x8, x8, x8
|
||||
INSTR q4, [sp, x13]
|
||||
mul x15, x15, x15
|
||||
add x16, x16, x16
|
||||
INSTR q5, [sp, x14]
|
||||
mul x17, x17, x17
|
||||
add x18, x18, x18
|
||||
bne loop
|
||||
done:
|
||||
|
||||
# pop callee-save registers from stack
|
||||
ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
89
src/NEON/str-q_mbp-il_1_1_1-mul-x_x_x-add-x_x_x-TP.S
Normal file
89
src/NEON/str-q_mbp-il_1_1_1-mul-x_x_x-add-x_x_x-TP.S
Normal file
@@ -0,0 +1,89 @@
|
||||
#define INSTR str
|
||||
#define NINST 8
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
# sub sp, sp, #64
|
||||
# st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
# sub sp, sp, #64
|
||||
# st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
|
||||
mov x4, N
|
||||
|
||||
fmov v0.2d, #1.00000000
|
||||
fmov v1.2d, #1.00000000
|
||||
fmov v2.2d, #1.00000000
|
||||
mov x24, sp
|
||||
|
||||
mov x16, sp
|
||||
add x16, x16, #128
|
||||
mov x25, sp
|
||||
add x25, x25, #192
|
||||
mov x27, sp
|
||||
sub x27, x27, #256
|
||||
mov x28, sp
|
||||
sub x28, x28, #320
|
||||
mov x1, #1
|
||||
mov x2, #1
|
||||
mov x3, #1
|
||||
mov x5, #1
|
||||
mov x6, #1
|
||||
mov x7, #1
|
||||
mov x8, #1
|
||||
mov x9, #1
|
||||
mov x10, #1
|
||||
mov x11, #1
|
||||
mov x12, #1
|
||||
mov x13, #1
|
||||
mov x14, #1
|
||||
mov x15, #1
|
||||
mov x17, #1
|
||||
mov x18, #1
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR q1, [x25], #64
|
||||
mul x1, x1, x1
|
||||
add x2, x2, x2
|
||||
INSTR q2, [x27], #64
|
||||
mul x3, x3, x3
|
||||
add x5, x5, x5
|
||||
INSTR q3, [x28], #64
|
||||
mul x6, x6, x6
|
||||
add x7, x7, x7
|
||||
INSTR q4, [x16], #64
|
||||
mul x8, x8, x8
|
||||
add x9, x9, x9
|
||||
INSTR q6, [x25], #-64
|
||||
mul x10, x10, x10
|
||||
add x11, x11, x11
|
||||
INSTR q7, [x27], #-64
|
||||
mul x12, x8, x12
|
||||
add x13, x13, x13
|
||||
INSTR q8, [x28], #-64
|
||||
mul x14, x14, x14
|
||||
add x15, x15, x15
|
||||
INSTR q9, [x16], #-64
|
||||
mul x17, x17, x17
|
||||
add x18, x18, x18
|
||||
bne loop
|
||||
done:
|
||||
mov sp, x24
|
||||
# pop callee-save registers from stack
|
||||
# ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
# add sp, sp, #64
|
||||
# ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
# add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
Reference in New Issue
Block a user