mirror of
https://github.com/RRZE-HPC/ibench.git
synced 2025-07-21 12:51:10 +02:00
more benchmarks
This commit is contained in:
63
src/NEON/ldp-d_d_mbo-il_1_1-ldr-x_mb-TP.S
Normal file
63
src/NEON/ldp-d_d_mbo-il_1_1-ldr-x_mb-TP.S
Normal file
@@ -0,0 +1,63 @@
|
||||
#define INSTR ldp
|
||||
#define NINST 12
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
sub sp, sp, #64
|
||||
st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
sub sp, sp, #64
|
||||
st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
|
||||
mov x4, N
|
||||
|
||||
fmov v0.2d, #1.00000000
|
||||
fmov v1.2d, #1.00000000
|
||||
fmov v2.2d, #1.00000000
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR d24, d1, [sp, #-256]
|
||||
ldr x0, [sp]
|
||||
INSTR d2, d3, [sp, #-256]
|
||||
ldr x1, [sp]
|
||||
INSTR d25, d5, [sp, #-256]
|
||||
ldr x2, [sp]
|
||||
INSTR d6, d7, [sp, #-256]
|
||||
ldr x3, [sp]
|
||||
INSTR d8, d9, [sp, #-256]
|
||||
ldr x5, [sp]
|
||||
INSTR d10, d11, [sp, #-256]
|
||||
ldr x6, [sp]
|
||||
INSTR d12, d13, [sp, #-256]
|
||||
ldr x7, [sp]
|
||||
INSTR d14, d15, [sp, #-256]
|
||||
ldr x8, [sp]
|
||||
INSTR d16, d17, [sp, #-256]
|
||||
ldr x9, [sp]
|
||||
INSTR d18, d19, [sp, #-256]
|
||||
ldr x10, [sp]
|
||||
INSTR d20, d21, [sp, #-256]
|
||||
ldr x11, [sp]
|
||||
INSTR d22, d23, [sp, #-256]
|
||||
ldr x12, [sp]
|
||||
bne loop
|
||||
done:
|
||||
|
||||
# pop callee-save registers from stack
|
||||
ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
60
src/NEON/ldr-q_mb-TP.S
Normal file
60
src/NEON/ldr-q_mb-TP.S
Normal file
@@ -0,0 +1,60 @@
|
||||
#define INSTR ldr
|
||||
#define NINST 10
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
sub sp, sp, #64
|
||||
st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
sub sp, sp, #64
|
||||
st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
|
||||
mov x4, N
|
||||
|
||||
fmov v0.2d, #1.00000000
|
||||
fmov v1.2d, #1.00000000
|
||||
fmov v2.2d, #1.00000000
|
||||
mov x24, sp
|
||||
|
||||
mov x16, sp
|
||||
add x16, x16, #32
|
||||
mov x25, sp
|
||||
add x25, x25, #64
|
||||
mov x27, sp
|
||||
sub x27, x27, #32
|
||||
mov x28, sp
|
||||
sub x28, x28, #64
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR q0, [sp]
|
||||
INSTR q1, [x25]
|
||||
INSTR q2, [x27]
|
||||
INSTR q3, [x28]
|
||||
INSTR q4, [x16]
|
||||
INSTR q5, [sp]
|
||||
INSTR q6, [x25]
|
||||
INSTR q7, [x27]
|
||||
INSTR q8, [x28]
|
||||
INSTR q9, [x16]
|
||||
|
||||
bne loop
|
||||
done:
|
||||
mov sp, x24
|
||||
# pop callee-save registers from stack
|
||||
ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
58
src/NEON/ldr-x_mb-il_1_1-str-x_mb-TP.S
Normal file
58
src/NEON/ldr-x_mb-il_1_1-str-x_mb-TP.S
Normal file
@@ -0,0 +1,58 @@
|
||||
#define INSTR ldr
|
||||
#define NINST 8
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
sub sp, sp, #64
|
||||
st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
sub sp, sp, #64
|
||||
st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
|
||||
mov x4, N
|
||||
mov x24, sp
|
||||
sub x24, x24, #192
|
||||
|
||||
fmov v0.2d, #1.00000000
|
||||
fmov v1.2d, #1.00000000
|
||||
fmov v2.2d, #1.00000000
|
||||
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR x1, [sp]
|
||||
str x2, [x24]
|
||||
INSTR x3, [sp]
|
||||
str x5, [x24]
|
||||
INSTR x6, [sp]
|
||||
str x7, [x24]
|
||||
INSTR x8, [sp]
|
||||
str x9, [x24]
|
||||
INSTR x10, [sp]
|
||||
str x11, [x24]
|
||||
INSTR x12, [sp]
|
||||
str x13, [x24]
|
||||
INSTR x14, [sp]
|
||||
str x15, [x24]
|
||||
INSTR x16, [sp]
|
||||
str x17, [x24]
|
||||
bne loop
|
||||
done:
|
||||
|
||||
# pop callee-save registers from stack
|
||||
ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
60
src/NEON/ldr-x_mb-il_2_1-str-x_mb-TP.S
Normal file
60
src/NEON/ldr-x_mb-il_2_1-str-x_mb-TP.S
Normal file
@@ -0,0 +1,60 @@
|
||||
#define INSTR ldr
|
||||
#define NINST 6
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
sub sp, sp, #64
|
||||
st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
sub sp, sp, #64
|
||||
st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
|
||||
mov x4, N
|
||||
mov x24, sp
|
||||
sub x24, x24, #192
|
||||
|
||||
fmov v0.2d, #1.00000000
|
||||
fmov v1.2d, #1.00000000
|
||||
fmov v2.2d, #1.00000000
|
||||
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR x1, [sp]
|
||||
INSTR x2, [sp]
|
||||
str x3, [x24]
|
||||
INSTR x5, [sp]
|
||||
INSTR x6, [sp]
|
||||
str x7, [x24]
|
||||
INSTR x8, [sp]
|
||||
INSTR x9, [sp]
|
||||
str x10, [x24]
|
||||
INSTR x11, [sp]
|
||||
INSTR x12, [sp]
|
||||
str x13, [x24]
|
||||
INSTR x14, [sp]
|
||||
INSTR x15, [sp]
|
||||
str x16, [x24]
|
||||
INSTR x17, [sp]
|
||||
INSTR x18, [sp]
|
||||
str x28, [x24]
|
||||
bne loop
|
||||
done:
|
||||
|
||||
# pop callee-save registers from stack
|
||||
ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
63
src/NEON/stp-d_d_mbo-il_1_1-ldr-d_mb-TP.S
Normal file
63
src/NEON/stp-d_d_mbo-il_1_1-ldr-d_mb-TP.S
Normal file
@@ -0,0 +1,63 @@
|
||||
#define INSTR stp
|
||||
#define NINST 12
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
sub sp, sp, #64
|
||||
st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
sub sp, sp, #64
|
||||
st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
|
||||
mov x4, N
|
||||
|
||||
fmov v0.2d, #1.00000000
|
||||
fmov v1.2d, #1.00000000
|
||||
fmov v2.2d, #1.00000000
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR d0, d1, [sp, #-64]
|
||||
ldr x0, [sp]
|
||||
INSTR d2, d3, [sp, #-128]
|
||||
ldr x1, [sp]
|
||||
INSTR d4, d5, [sp, #-192]
|
||||
ldr x2, [sp]
|
||||
INSTR d6, d7, [sp, #-256]
|
||||
ldr x3, [sp]
|
||||
INSTR d8, d9, [sp, #-320]
|
||||
ldr x5, [sp]
|
||||
INSTR d10, d11, [sp, #-384]
|
||||
ldr x6, [sp]
|
||||
INSTR d12, d13, [sp, #-448]
|
||||
ldr x7, [sp]
|
||||
INSTR d14, d15, [sp, #-32]
|
||||
ldr x8, [sp]
|
||||
INSTR d16, d17, [sp, #-96]
|
||||
ldr x9, [sp]
|
||||
INSTR d18, d19, [sp, #-160]
|
||||
ldr x10, [sp]
|
||||
INSTR d20, d21, [sp, #-224]
|
||||
ldr x11, [sp]
|
||||
INSTR d22, d23, [sp, #-288]
|
||||
ldr x12, [sp]
|
||||
bne loop
|
||||
done:
|
||||
|
||||
# pop callee-save registers from stack
|
||||
ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
108
src/NEON/str-d_mb-TP.S
Normal file
108
src/NEON/str-d_mb-TP.S
Normal file
@@ -0,0 +1,108 @@
|
||||
#define INSTR str
|
||||
#define NINST 64
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
sub sp, sp, #64
|
||||
st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
sub sp, sp, #64
|
||||
st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
|
||||
mov x4, N
|
||||
|
||||
fmov v0.2d, #1.00000000
|
||||
fmov v1.2d, #1.00000000
|
||||
fmov v2.2d, #1.00000000
|
||||
mov x24, #-64
|
||||
mov x25, #-128
|
||||
mov x28, #-192
|
||||
mov x27, #-256
|
||||
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR d2, [sp]
|
||||
INSTR d6, [sp]
|
||||
INSTR d8, [sp]
|
||||
INSTR d10, [sp]
|
||||
INSTR d12, [sp]
|
||||
INSTR d14, [sp]
|
||||
INSTR d16, [sp]
|
||||
INSTR d18, [sp]
|
||||
INSTR d2, [sp]
|
||||
INSTR d6, [sp]
|
||||
INSTR d8, [sp]
|
||||
INSTR d10, [sp]
|
||||
INSTR d12, [sp]
|
||||
INSTR d14, [sp]
|
||||
INSTR d16, [sp]
|
||||
INSTR d18, [sp]
|
||||
INSTR d2, [sp]
|
||||
INSTR d6, [sp]
|
||||
INSTR d8, [sp]
|
||||
INSTR d10, [sp]
|
||||
INSTR d12, [sp]
|
||||
INSTR d14, [sp]
|
||||
INSTR d16, [sp]
|
||||
INSTR d18, [sp]
|
||||
INSTR d2, [sp]
|
||||
INSTR d6, [sp]
|
||||
INSTR d8, [sp]
|
||||
INSTR d10, [sp]
|
||||
INSTR d12, [sp]
|
||||
INSTR d14, [sp]
|
||||
INSTR d16, [sp]
|
||||
INSTR d18, [sp]
|
||||
INSTR d2, [sp]
|
||||
INSTR d6, [sp]
|
||||
INSTR d8, [sp]
|
||||
INSTR d10, [sp]
|
||||
INSTR d12, [sp]
|
||||
INSTR d14, [sp]
|
||||
INSTR d16, [sp]
|
||||
INSTR d18, [sp]
|
||||
INSTR d2, [sp]
|
||||
INSTR d6, [sp]
|
||||
INSTR d8, [sp]
|
||||
INSTR d10, [sp]
|
||||
INSTR d12, [sp]
|
||||
INSTR d14, [sp]
|
||||
INSTR d16, [sp]
|
||||
INSTR d18, [sp]
|
||||
INSTR d2, [sp]
|
||||
INSTR d6, [sp]
|
||||
INSTR d8, [sp]
|
||||
INSTR d10, [sp]
|
||||
INSTR d12, [sp]
|
||||
INSTR d14, [sp]
|
||||
INSTR d16, [sp]
|
||||
INSTR d18, [sp]
|
||||
INSTR d2, [sp]
|
||||
INSTR d6, [sp]
|
||||
INSTR d8, [sp]
|
||||
INSTR d10, [sp]
|
||||
INSTR d12, [sp]
|
||||
INSTR d14, [sp]
|
||||
INSTR d16, [sp]
|
||||
INSTR d18, [sp]
|
||||
bne loop
|
||||
done:
|
||||
|
||||
# pop callee-save registers from stack
|
||||
ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
59
src/NEON/str-d_mbp-TP.S
Normal file
59
src/NEON/str-d_mbp-TP.S
Normal file
@@ -0,0 +1,59 @@
|
||||
#define INSTR str
|
||||
#define NINST 8
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
# sub sp, sp, #64
|
||||
# st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
# sub sp, sp, #64
|
||||
# st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
|
||||
mov x4, N
|
||||
|
||||
fmov v0.2d, #1.00000000
|
||||
fmov v1.2d, #1.00000000
|
||||
fmov v2.2d, #1.00000000
|
||||
mov x24, sp
|
||||
mov x10, sp
|
||||
|
||||
mov x16, sp
|
||||
add x16, x16, #128
|
||||
mov x25, sp
|
||||
add x25, x25, #192
|
||||
mov x27, sp
|
||||
sub x27, x27, #256
|
||||
mov x28, sp
|
||||
sub x28, x28, #320
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR d1, [x25], #64
|
||||
INSTR d2, [x27], #64
|
||||
INSTR d3, [x28], #64
|
||||
INSTR d4, [x16], #64
|
||||
INSTR d6, [x25], #-64
|
||||
INSTR d7, [x27], #-64
|
||||
INSTR d8, [x28], #-64
|
||||
INSTR d9, [x16], #-64
|
||||
|
||||
bne loop
|
||||
done:
|
||||
mov sp, x10
|
||||
# pop callee-save registers from stack
|
||||
# ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
# add sp, sp, #64
|
||||
# ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
# add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
60
src/NEON/str-x_mb-il_2_1-ldr-x_mb-TP.S
Normal file
60
src/NEON/str-x_mb-il_2_1-ldr-x_mb-TP.S
Normal file
@@ -0,0 +1,60 @@
|
||||
#define INSTR str
|
||||
#define NINST 6
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
sub sp, sp, #64
|
||||
st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
sub sp, sp, #64
|
||||
st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
|
||||
mov x4, N
|
||||
mov x24, sp
|
||||
sub x24, x24, #192
|
||||
|
||||
fmov v0.2d, #1.00000000
|
||||
fmov v1.2d, #1.00000000
|
||||
fmov v2.2d, #1.00000000
|
||||
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR x1, [sp]
|
||||
INSTR x2, [sp]
|
||||
ldr x3, [x24]
|
||||
INSTR x5, [sp]
|
||||
INSTR x6, [sp]
|
||||
ldr x7, [x24]
|
||||
INSTR x8, [sp]
|
||||
INSTR x9, [sp]
|
||||
ldr x10, [x24]
|
||||
INSTR x11, [sp]
|
||||
INSTR x12, [sp]
|
||||
ldr x13, [x24]
|
||||
INSTR x14, [sp]
|
||||
INSTR x15, [sp]
|
||||
ldr x16, [x24]
|
||||
INSTR x17, [sp]
|
||||
INSTR x18, [sp]
|
||||
ldr x28, [x24]
|
||||
bne loop
|
||||
done:
|
||||
|
||||
# pop callee-save registers from stack
|
||||
ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
108
src/NEON/stur-d_mb-TP.S
Normal file
108
src/NEON/stur-d_mb-TP.S
Normal file
@@ -0,0 +1,108 @@
|
||||
#define INSTR stur
|
||||
#define NINST 64
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
sub sp, sp, #64
|
||||
st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
sub sp, sp, #64
|
||||
st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
|
||||
mov x4, N
|
||||
|
||||
fmov v0.2d, #1.00000000
|
||||
fmov v1.2d, #1.00000000
|
||||
fmov v2.2d, #1.00000000
|
||||
mov x24, #-64
|
||||
mov x25, #-128
|
||||
mov x28, #-192
|
||||
mov x27, #-256
|
||||
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR d2, [sp]
|
||||
INSTR d6, [sp]
|
||||
INSTR d8, [sp]
|
||||
INSTR d10, [sp]
|
||||
INSTR d12, [sp]
|
||||
INSTR d14, [sp]
|
||||
INSTR d16, [sp]
|
||||
INSTR d18, [sp]
|
||||
INSTR d2, [sp]
|
||||
INSTR d6, [sp]
|
||||
INSTR d8, [sp]
|
||||
INSTR d10, [sp]
|
||||
INSTR d12, [sp]
|
||||
INSTR d14, [sp]
|
||||
INSTR d16, [sp]
|
||||
INSTR d18, [sp]
|
||||
INSTR d2, [sp]
|
||||
INSTR d6, [sp]
|
||||
INSTR d8, [sp]
|
||||
INSTR d10, [sp]
|
||||
INSTR d12, [sp]
|
||||
INSTR d14, [sp]
|
||||
INSTR d16, [sp]
|
||||
INSTR d18, [sp]
|
||||
INSTR d2, [sp]
|
||||
INSTR d6, [sp]
|
||||
INSTR d8, [sp]
|
||||
INSTR d10, [sp]
|
||||
INSTR d12, [sp]
|
||||
INSTR d14, [sp]
|
||||
INSTR d16, [sp]
|
||||
INSTR d18, [sp]
|
||||
INSTR d2, [sp]
|
||||
INSTR d6, [sp]
|
||||
INSTR d8, [sp]
|
||||
INSTR d10, [sp]
|
||||
INSTR d12, [sp]
|
||||
INSTR d14, [sp]
|
||||
INSTR d16, [sp]
|
||||
INSTR d18, [sp]
|
||||
INSTR d2, [sp]
|
||||
INSTR d6, [sp]
|
||||
INSTR d8, [sp]
|
||||
INSTR d10, [sp]
|
||||
INSTR d12, [sp]
|
||||
INSTR d14, [sp]
|
||||
INSTR d16, [sp]
|
||||
INSTR d18, [sp]
|
||||
INSTR d2, [sp]
|
||||
INSTR d6, [sp]
|
||||
INSTR d8, [sp]
|
||||
INSTR d10, [sp]
|
||||
INSTR d12, [sp]
|
||||
INSTR d14, [sp]
|
||||
INSTR d16, [sp]
|
||||
INSTR d18, [sp]
|
||||
INSTR d2, [sp]
|
||||
INSTR d6, [sp]
|
||||
INSTR d8, [sp]
|
||||
INSTR d10, [sp]
|
||||
INSTR d12, [sp]
|
||||
INSTR d14, [sp]
|
||||
INSTR d16, [sp]
|
||||
INSTR d18, [sp]
|
||||
bne loop
|
||||
done:
|
||||
|
||||
# pop callee-save registers from stack
|
||||
ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
60
src/NEON/stur-q_mb-TP.S
Normal file
60
src/NEON/stur-q_mb-TP.S
Normal file
@@ -0,0 +1,60 @@
|
||||
#define INSTR stur
|
||||
#define NINST 16
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
sub sp, sp, #64
|
||||
st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
sub sp, sp, #64
|
||||
st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
|
||||
mov x4, N
|
||||
|
||||
fmov v0.2d, #1.00000000
|
||||
fmov v1.2d, #1.00000000
|
||||
fmov v2.2d, #1.00000000
|
||||
mov x24, #-64
|
||||
mov x25, #-128
|
||||
mov x28, #-192
|
||||
mov x27, #-256
|
||||
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR q1, [sp]
|
||||
INSTR q2, [sp]
|
||||
INSTR q3, [sp]
|
||||
INSTR q4, [sp]
|
||||
INSTR q5, [sp]
|
||||
INSTR q6, [sp]
|
||||
INSTR q7, [sp]
|
||||
INSTR q8, [sp]
|
||||
INSTR q9, [sp]
|
||||
INSTR q10, [sp]
|
||||
INSTR q11, [sp]
|
||||
INSTR q12, [sp]
|
||||
INSTR q13, [sp]
|
||||
INSTR q14, [sp]
|
||||
INSTR q16, [sp]
|
||||
INSTR q18, [sp]
|
||||
bne loop
|
||||
done:
|
||||
|
||||
# pop callee-save registers from stack
|
||||
ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
76
src/NEON/stur-q_mb-il_1_1-ldr-x_mb-TP.S
Normal file
76
src/NEON/stur-q_mb-il_1_1-ldr-x_mb-TP.S
Normal file
@@ -0,0 +1,76 @@
|
||||
#define INSTR stur
|
||||
#define NINST 16
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
sub sp, sp, #64
|
||||
st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
sub sp, sp, #64
|
||||
st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
|
||||
mov x4, N
|
||||
|
||||
fmov v0.2d, #1.00000000
|
||||
fmov v1.2d, #1.00000000
|
||||
fmov v2.2d, #1.00000000
|
||||
mov x24, #-64
|
||||
mov x25, #-128
|
||||
mov x28, #-192
|
||||
mov x27, #-256
|
||||
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR q1, [sp]
|
||||
ldr x0, [sp]
|
||||
INSTR q2, [sp]
|
||||
ldr x1, [sp]
|
||||
INSTR q3, [sp]
|
||||
ldr x2, [sp]
|
||||
INSTR q4, [sp]
|
||||
ldr x3, [sp]
|
||||
INSTR q5, [sp]
|
||||
ldr x5, [sp]
|
||||
INSTR q6, [sp]
|
||||
ldr x6, [sp]
|
||||
INSTR q7, [sp]
|
||||
ldr x7, [sp]
|
||||
INSTR q8, [sp]
|
||||
ldr x8, [sp]
|
||||
INSTR q9, [sp]
|
||||
ldr x9, [sp]
|
||||
INSTR q10, [sp]
|
||||
ldr x10, [sp]
|
||||
INSTR q11, [sp]
|
||||
ldr x11, [sp]
|
||||
INSTR q12, [sp]
|
||||
ldr x12, [sp]
|
||||
INSTR q13, [sp]
|
||||
ldr x13, [sp]
|
||||
INSTR q14, [sp]
|
||||
ldr x14, [sp]
|
||||
INSTR q16, [sp]
|
||||
ldr x15, [sp]
|
||||
INSTR q18, [sp]
|
||||
ldr x16, [sp]
|
||||
bne loop
|
||||
done:
|
||||
|
||||
# pop callee-save registers from stack
|
||||
ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
45
src/NEON/sub-w_w_i-LAT.S
Normal file
45
src/NEON/sub-w_w_i-LAT.S
Normal file
@@ -0,0 +1,45 @@
|
||||
#define INSTR sub
|
||||
#define NINST 6
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
sub sp, sp, #64
|
||||
st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [sp]
|
||||
sub sp, sp, #64
|
||||
st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [sp]
|
||||
|
||||
mov x4, N
|
||||
|
||||
fmov v0.4s, #1.00000000
|
||||
fmov v1.4s, #1.00000000
|
||||
mov x0, #1
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR w0, w0, #2
|
||||
INSTR w0, w0, #2
|
||||
INSTR w0, w0, #2
|
||||
INSTR w0, w0, #2
|
||||
INSTR w0, w0, #2
|
||||
INSTR w0, w0, #2
|
||||
bne loop
|
||||
done:
|
||||
|
||||
# pop callee-save registers from stack
|
||||
ld1 {v12.4s, v13.4s, v14.4s, v15.4s}, [sp]
|
||||
add sp, sp, #64
|
||||
ld1 {v8.4s, v9.4s, v10.4s, v11.4s}, [sp]
|
||||
add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
90
src/NEON/sub-w_w_i-TP.S
Normal file
90
src/NEON/sub-w_w_i-TP.S
Normal file
@@ -0,0 +1,90 @@
|
||||
#define INSTR sub
|
||||
#define NINST 48
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
sub sp, sp, #64
|
||||
st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
sub sp, sp, #64
|
||||
st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
|
||||
mov x4, N
|
||||
|
||||
fmov v0.2d, #1.00000000
|
||||
fmov v1.2d, #1.00000000
|
||||
fmov v2.2d, #1.00000000
|
||||
mov x1, #1
|
||||
mov x2, #1
|
||||
mov x3, #1
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR w5, w1, #64
|
||||
INSTR w6, w2, #128
|
||||
INSTR w7, w3, #192
|
||||
INSTR w8, w1, #256
|
||||
INSTR w9, w2, #320
|
||||
INSTR w10, w3, #384
|
||||
INSTR w11, w1, #448
|
||||
INSTR w12, w2, #512
|
||||
INSTR w13, w3, #576
|
||||
INSTR w14, w1, #640
|
||||
INSTR w15, w2, #704
|
||||
INSTR w16, w3, #764
|
||||
INSTR w5, w1, #64
|
||||
INSTR w6, w2, #128
|
||||
INSTR w7, w3, #192
|
||||
INSTR w8, w1, #256
|
||||
INSTR w9, w2, #320
|
||||
INSTR w10, w3, #384
|
||||
INSTR w11, w1, #448
|
||||
INSTR w12, w2, #512
|
||||
INSTR w13, w3, #576
|
||||
INSTR w14, w1, #640
|
||||
INSTR w15, w2, #704
|
||||
INSTR w16, w3, #764
|
||||
INSTR w5, w1, #64
|
||||
INSTR w6, w2, #128
|
||||
INSTR w7, w3, #192
|
||||
INSTR w8, w1, #256
|
||||
INSTR w9, w2, #320
|
||||
INSTR w10, w3, #384
|
||||
INSTR w11, w1, #448
|
||||
INSTR w12, w2, #512
|
||||
INSTR w13, w3, #576
|
||||
INSTR w14, w1, #640
|
||||
INSTR w15, w2, #704
|
||||
INSTR w16, w3, #764
|
||||
INSTR w5, w1, #64
|
||||
INSTR w6, w2, #128
|
||||
INSTR w7, w3, #192
|
||||
INSTR w8, w1, #256
|
||||
INSTR w9, w2, #320
|
||||
INSTR w10, w3, #384
|
||||
INSTR w11, w1, #448
|
||||
INSTR w12, w2, #512
|
||||
INSTR w13, w3, #576
|
||||
INSTR w14, w1, #640
|
||||
INSTR w15, w2, #704
|
||||
INSTR w16, w3, #764
|
||||
bne loop
|
||||
done:
|
||||
|
||||
# pop callee-save registers from stack
|
||||
ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
Reference in New Issue
Block a user