mirror of
https://github.com/RRZE-HPC/ibench.git
synced 2025-07-21 21:01:10 +02:00
removed duplicates
This commit is contained in:
@@ -1,58 +0,0 @@
|
||||
#define INSTR ldp
|
||||
#define NINST 12
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
sub sp, sp, #64
|
||||
st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
sub sp, sp, #64
|
||||
st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
stp x29, x30, [sp, -96]!
|
||||
stp x19, x20, [sp, 16]
|
||||
stp x21, x22, [sp, 32]
|
||||
stp x24, x25, [sp, 48]
|
||||
stp x26, x27, [sp, 64]
|
||||
str x28, [sp, 80]
|
||||
|
||||
|
||||
mov x4, N
|
||||
|
||||
fmov v0.2d, #1.00000000
|
||||
fmov v1.2d, #1.00000000
|
||||
fmov v2.2d, #1.00000000
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR d24, d1, [sp, #-256]
|
||||
INSTR d2, d3, [sp, #-256]
|
||||
INSTR d25, d5, [sp, #-256]
|
||||
INSTR d6, d7, [sp, #-256]
|
||||
INSTR d8, d9, [sp, #-256]
|
||||
INSTR d10, d11, [sp, #-256]
|
||||
INSTR d12, d13, [sp, #-256]
|
||||
INSTR d14, d15, [sp, #-256]
|
||||
INSTR d16, d17, [sp, #-256]
|
||||
INSTR d18, d19, [sp, #-256]
|
||||
INSTR d20, d21, [sp, #-256]
|
||||
INSTR d22, d23, [sp, #-256]
|
||||
bne loop
|
||||
done:
|
||||
|
||||
# pop callee-save registers from stack
|
||||
ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
@@ -1,70 +0,0 @@
|
||||
#define INSTR ldp
|
||||
#define NINST 12
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
sub sp, sp, #64
|
||||
st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
sub sp, sp, #64
|
||||
st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
stp x29, x30, [sp, -96]!
|
||||
stp x19, x20, [sp, 16]
|
||||
stp x21, x22, [sp, 32]
|
||||
stp x24, x25, [sp, 48]
|
||||
stp x26, x27, [sp, 64]
|
||||
str x28, [sp, 80]
|
||||
|
||||
|
||||
mov x4, N
|
||||
|
||||
fmov v0.2d, #1.00000000
|
||||
fmov v1.2d, #1.00000000
|
||||
fmov v2.2d, #1.00000000
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR d24, d1, [sp, #-256]
|
||||
ldr x0, [sp]
|
||||
INSTR d2, d3, [sp, #-256]
|
||||
ldr x1, [sp]
|
||||
INSTR d25, d5, [sp, #-256]
|
||||
ldr x2, [sp]
|
||||
INSTR d6, d7, [sp, #-256]
|
||||
ldr x3, [sp]
|
||||
INSTR d8, d9, [sp, #-256]
|
||||
ldr x5, [sp]
|
||||
INSTR d10, d11, [sp, #-256]
|
||||
ldr x6, [sp]
|
||||
INSTR d12, d13, [sp, #-256]
|
||||
ldr x7, [sp]
|
||||
INSTR d14, d15, [sp, #-256]
|
||||
ldr x8, [sp]
|
||||
INSTR d16, d17, [sp, #-256]
|
||||
ldr x9, [sp]
|
||||
INSTR d18, d19, [sp, #-256]
|
||||
ldr x10, [sp]
|
||||
INSTR d20, d21, [sp, #-256]
|
||||
ldr x11, [sp]
|
||||
INSTR d22, d23, [sp, #-256]
|
||||
ldr x12, [sp]
|
||||
bne loop
|
||||
done:
|
||||
|
||||
# pop callee-save registers from stack
|
||||
ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
@@ -1,58 +0,0 @@
|
||||
#define INSTR ldp
|
||||
#define NINST 12
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
sub sp, sp, #64
|
||||
st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
sub sp, sp, #64
|
||||
st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
stp x29, x30, [sp, -96]!
|
||||
stp x19, x20, [sp, 16]
|
||||
stp x21, x22, [sp, 32]
|
||||
stp x24, x25, [sp, 48]
|
||||
stp x26, x27, [sp, 64]
|
||||
str x28, [sp, 80]
|
||||
|
||||
|
||||
mov x4, N
|
||||
|
||||
fmov v0.2d, #1.00000000
|
||||
fmov v1.2d, #1.00000000
|
||||
fmov v2.2d, #1.00000000
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR q0, q1, [sp, #-256]
|
||||
INSTR q2, q3, [sp, #-256]
|
||||
INSTR q4, q5, [sp, #-256]
|
||||
INSTR q6, q7, [sp, #-256]
|
||||
INSTR q8, q9, [sp, #-256]
|
||||
INSTR q10, q11, [sp, #-256]
|
||||
INSTR q12, q13, [sp, #-256]
|
||||
INSTR q14, q15, [sp, #-256]
|
||||
INSTR q16, q17, [sp, #-256]
|
||||
INSTR q18, q19, [sp, #-256]
|
||||
INSTR q20, q21, [sp, #-256]
|
||||
INSTR q22, q23, [sp, #-256]
|
||||
bne loop
|
||||
done:
|
||||
|
||||
# pop callee-save registers from stack
|
||||
ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
@@ -1,62 +0,0 @@
|
||||
#define INSTR ldp
|
||||
#define NINST 14
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
sub sp, sp, #64
|
||||
st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
sub sp, sp, #64
|
||||
st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
stp x29, x30, [sp, -96]!
|
||||
stp x19, x20, [sp, 16]
|
||||
stp x21, x22, [sp, 32]
|
||||
stp x24, x25, [sp, 48]
|
||||
stp x26, x27, [sp, 64]
|
||||
str x28, [sp, 80]
|
||||
|
||||
|
||||
mov x4, N
|
||||
|
||||
fmov v0.2d, #1.00000000
|
||||
fmov v1.2d, #1.00000000
|
||||
fmov v2.2d, #1.00000000
|
||||
mov x24, sp
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR q0, q1, [sp], #64
|
||||
INSTR q2, q3, [sp], #64
|
||||
INSTR q4, q5, [sp], #64
|
||||
INSTR q6, q7, [sp], #64
|
||||
INSTR q8, q9, [sp], #64
|
||||
INSTR q10, q11, [sp], #64
|
||||
INSTR q12, q13, [sp], #64
|
||||
INSTR q14, q15, [sp], #-64
|
||||
INSTR q16, q17, [sp], #-64
|
||||
INSTR q18, q19, [sp], #-64
|
||||
INSTR q20, q21, [sp], #-64
|
||||
INSTR q22, q23, [sp], #-64
|
||||
INSTR q25, q26, [sp], #-64
|
||||
INSTR q27, q28, [sp], #-64
|
||||
|
||||
bne loop
|
||||
done:
|
||||
mov sp, x24
|
||||
# pop callee-save registers from stack
|
||||
ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
@@ -1,67 +0,0 @@
|
||||
#define INSTR ldp
|
||||
#define NINST 10
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
sub sp, sp, #64
|
||||
st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
sub sp, sp, #64
|
||||
st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
stp x29, x30, [sp, -96]!
|
||||
stp x19, x20, [sp, 16]
|
||||
stp x21, x22, [sp, 32]
|
||||
stp x24, x25, [sp, 48]
|
||||
stp x26, x27, [sp, 64]
|
||||
str x28, [sp, 80]
|
||||
|
||||
|
||||
mov x4, N
|
||||
|
||||
fmov v0.2d, #1.00000000
|
||||
fmov v1.2d, #1.00000000
|
||||
fmov v2.2d, #1.00000000
|
||||
mov x24, sp
|
||||
|
||||
mov x16, sp
|
||||
add x16, x16, #32
|
||||
mov x25, sp
|
||||
add x25, x25, #64
|
||||
mov x27, sp
|
||||
sub x27, x27, #32
|
||||
mov x28, sp
|
||||
sub x28, x28, #64
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR q0, q1, [sp], #64
|
||||
INSTR q2, q3, [x25], #64
|
||||
INSTR q4, q5, [x27], #64
|
||||
INSTR q6, q7, [x28], #64
|
||||
INSTR q18, q19, [x16], #64
|
||||
INSTR q8, q9, [sp], #-64
|
||||
INSTR q10, q11, [x25], #-64
|
||||
INSTR q12, q13, [x27], #-64
|
||||
INSTR q14, q15, [x28], #-64
|
||||
INSTR q20, q21, [x16], #-64
|
||||
|
||||
bne loop
|
||||
done:
|
||||
mov sp, x24
|
||||
# pop callee-save registers from stack
|
||||
ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
@@ -1,56 +0,0 @@
|
||||
#define INSTR ldp
|
||||
#define NINST 10
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
sub sp, sp, #64
|
||||
st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
sub sp, sp, #64
|
||||
st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
stp x29, x30, [sp, -96]!
|
||||
stp x19, x20, [sp, 16]
|
||||
stp x21, x22, [sp, 32]
|
||||
stp x24, x25, [sp, 48]
|
||||
stp x26, x27, [sp, 64]
|
||||
str x28, [sp, 80]
|
||||
|
||||
|
||||
mov x4, N
|
||||
|
||||
fmov v0.2d, #1.00000000
|
||||
fmov v1.2d, #1.00000000
|
||||
fmov v2.2d, #1.00000000
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR x24, x1, [sp, #-256]
|
||||
INSTR x2, x3, [sp, #-256]
|
||||
INSTR x25, x5, [sp, #-256]
|
||||
INSTR x6, x7, [sp, #-256]
|
||||
INSTR x8, x9, [sp, #-256]
|
||||
INSTR x10, x11, [sp, #-256]
|
||||
INSTR x12, x13, [sp, #-256]
|
||||
INSTR x14, x15, [sp, #-256]
|
||||
INSTR x16, x17, [sp, #-256]
|
||||
INSTR x28, x29, [sp, #-256]
|
||||
bne loop
|
||||
done:
|
||||
|
||||
# pop callee-save registers from stack
|
||||
ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
@@ -1,67 +0,0 @@
|
||||
#define INSTR ldr
|
||||
#define NINST 10
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
sub sp, sp, #64
|
||||
st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
sub sp, sp, #64
|
||||
st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
stp x29, x30, [sp, -96]!
|
||||
stp x19, x20, [sp, 16]
|
||||
stp x21, x22, [sp, 32]
|
||||
stp x24, x25, [sp, 48]
|
||||
stp x26, x27, [sp, 64]
|
||||
str x28, [sp, 80]
|
||||
|
||||
|
||||
mov x4, N
|
||||
|
||||
fmov v0.2d, #1.00000000
|
||||
fmov v1.2d, #1.00000000
|
||||
fmov v2.2d, #1.00000000
|
||||
mov x24, sp
|
||||
|
||||
mov x16, sp
|
||||
add x16, x16, #32
|
||||
mov x25, sp
|
||||
add x25, x25, #64
|
||||
mov x27, sp
|
||||
sub x27, x27, #32
|
||||
mov x28, sp
|
||||
sub x28, x28, #64
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR q0, [sp]
|
||||
INSTR q1, [x25]
|
||||
INSTR q2, [x27]
|
||||
INSTR q3, [x28]
|
||||
INSTR q4, [x16]
|
||||
INSTR q5, [sp]
|
||||
INSTR q6, [x25]
|
||||
INSTR q7, [x27]
|
||||
INSTR q8, [x28]
|
||||
INSTR q9, [x16]
|
||||
|
||||
bne loop
|
||||
done:
|
||||
mov sp, x24
|
||||
# pop callee-save registers from stack
|
||||
ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
@@ -1,65 +0,0 @@
|
||||
#define INSTR ldr
|
||||
#define NINST 8
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
sub sp, sp, #64
|
||||
st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
sub sp, sp, #64
|
||||
st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
stp x29, x30, [sp, -96]!
|
||||
stp x19, x20, [sp, 16]
|
||||
stp x21, x22, [sp, 32]
|
||||
stp x24, x25, [sp, 48]
|
||||
stp x26, x27, [sp, 64]
|
||||
str x28, [sp, 80]
|
||||
|
||||
|
||||
mov x4, N
|
||||
mov x24, sp
|
||||
sub x24, x24, #192
|
||||
|
||||
fmov v0.2d, #1.00000000
|
||||
fmov v1.2d, #1.00000000
|
||||
fmov v2.2d, #1.00000000
|
||||
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR q1, [sp]
|
||||
str q2, [x24]
|
||||
INSTR q3, [sp]
|
||||
str q5, [x24]
|
||||
INSTR q6, [sp]
|
||||
str q7, [x24]
|
||||
INSTR q8, [sp]
|
||||
str q9, [x24]
|
||||
INSTR q10, [sp]
|
||||
str q11, [x24]
|
||||
INSTR q12, [sp]
|
||||
str q13, [x24]
|
||||
INSTR q14, [sp]
|
||||
str q15, [x24]
|
||||
INSTR q16, [sp]
|
||||
str q17, [x24]
|
||||
bne loop
|
||||
done:
|
||||
|
||||
# pop callee-save registers from stack
|
||||
ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
@@ -1,73 +0,0 @@
|
||||
#define INSTR ldr
|
||||
#define NINST 8
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
sub sp, sp, #64
|
||||
st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
sub sp, sp, #64
|
||||
st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
stp x29, x30, [sp, -96]!
|
||||
stp x19, x20, [sp, 16]
|
||||
stp x21, x22, [sp, 32]
|
||||
stp x24, x25, [sp, 48]
|
||||
stp x26, x27, [sp, 64]
|
||||
str x28, [sp, 80]
|
||||
|
||||
|
||||
mov x4, N
|
||||
mov x24, sp
|
||||
sub x24, x24, #192
|
||||
|
||||
fmov v0.2d, #1.00000000
|
||||
fmov v1.2d, #1.00000000
|
||||
fmov v2.2d, #1.00000000
|
||||
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR q1, [sp]
|
||||
str q2, [x24]
|
||||
add x0, x0, x0
|
||||
INSTR q3, [sp]
|
||||
str q5, [x24]
|
||||
add x1, x1, x1
|
||||
INSTR q6, [sp]
|
||||
str q7, [x24]
|
||||
add x2, x2, x2
|
||||
INSTR q8, [sp]
|
||||
str q9, [x24]
|
||||
add x3, x3, x3
|
||||
INSTR q10, [sp]
|
||||
str q11, [x24]
|
||||
add x5, x5, x5
|
||||
INSTR q12, [sp]
|
||||
str q13, [x24]
|
||||
add x6, x6, x6
|
||||
INSTR q14, [sp]
|
||||
str q15, [x24]
|
||||
add x7, x7, x7
|
||||
INSTR q16, [sp]
|
||||
str q17, [x24]
|
||||
add x8, x8, x8
|
||||
bne loop
|
||||
done:
|
||||
|
||||
# pop callee-save registers from stack
|
||||
ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
@@ -1,115 +0,0 @@
|
||||
#define INSTR ldr
|
||||
#define NINST 64
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
sub sp, sp, #64
|
||||
st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
sub sp, sp, #64
|
||||
st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
stp x29, x30, [sp, -96]!
|
||||
stp x19, x20, [sp, 16]
|
||||
stp x21, x22, [sp, 32]
|
||||
stp x24, x25, [sp, 48]
|
||||
stp x26, x27, [sp, 64]
|
||||
str x28, [sp, 80]
|
||||
|
||||
|
||||
mov x4, N
|
||||
|
||||
fmov v0.2d, #1.00000000
|
||||
fmov v1.2d, #1.00000000
|
||||
fmov v2.2d, #1.00000000
|
||||
mov x24, #-64
|
||||
mov x25, #-128
|
||||
mov x28, #-192
|
||||
mov x27, #-256
|
||||
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR q2, [sp, x24]
|
||||
INSTR q6, [sp, x25]
|
||||
INSTR q8, [sp, x28]
|
||||
INSTR q10, [sp, x27]
|
||||
INSTR q12, [sp, x24]
|
||||
INSTR q14, [sp, x25]
|
||||
INSTR q16, [sp, x28]
|
||||
INSTR q18, [sp, x27]
|
||||
INSTR q2, [sp, x24]
|
||||
INSTR q6, [sp, x25]
|
||||
INSTR q8, [sp, x28]
|
||||
INSTR q10, [sp, x27]
|
||||
INSTR q12, [sp, x24]
|
||||
INSTR q14, [sp, x25]
|
||||
INSTR q16, [sp, x28]
|
||||
INSTR q18, [sp, x27]
|
||||
INSTR q2, [sp, x24]
|
||||
INSTR q6, [sp, x25]
|
||||
INSTR q8, [sp, x28]
|
||||
INSTR q10, [sp, x27]
|
||||
INSTR q12, [sp, x24]
|
||||
INSTR q14, [sp, x25]
|
||||
INSTR q16, [sp, x28]
|
||||
INSTR q18, [sp, x27]
|
||||
INSTR q2, [sp, x24]
|
||||
INSTR q6, [sp, x25]
|
||||
INSTR q8, [sp, x28]
|
||||
INSTR q10, [sp, x27]
|
||||
INSTR q12, [sp, x24]
|
||||
INSTR q14, [sp, x25]
|
||||
INSTR q16, [sp, x28]
|
||||
INSTR q18, [sp, x27]
|
||||
INSTR q2, [sp, x24]
|
||||
INSTR q6, [sp, x25]
|
||||
INSTR q8, [sp, x28]
|
||||
INSTR q10, [sp, x27]
|
||||
INSTR q12, [sp, x24]
|
||||
INSTR q14, [sp, x25]
|
||||
INSTR q16, [sp, x28]
|
||||
INSTR q18, [sp, x27]
|
||||
INSTR q2, [sp, x24]
|
||||
INSTR q6, [sp, x25]
|
||||
INSTR q8, [sp, x28]
|
||||
INSTR q10, [sp, x27]
|
||||
INSTR q12, [sp, x24]
|
||||
INSTR q14, [sp, x25]
|
||||
INSTR q16, [sp, x28]
|
||||
INSTR q18, [sp, x27]
|
||||
INSTR q2, [sp, x24]
|
||||
INSTR q6, [sp, x25]
|
||||
INSTR q8, [sp, x28]
|
||||
INSTR q10, [sp, x27]
|
||||
INSTR q12, [sp, x24]
|
||||
INSTR q14, [sp, x25]
|
||||
INSTR q16, [sp, x28]
|
||||
INSTR q18, [sp, x27]
|
||||
INSTR q2, [sp, x24]
|
||||
INSTR q6, [sp, x25]
|
||||
INSTR q8, [sp, x28]
|
||||
INSTR q10, [sp, x27]
|
||||
INSTR q12, [sp, x24]
|
||||
INSTR q14, [sp, x25]
|
||||
INSTR q16, [sp, x28]
|
||||
INSTR q18, [sp, x27]
|
||||
bne loop
|
||||
done:
|
||||
|
||||
# pop callee-save registers from stack
|
||||
ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
@@ -1,60 +0,0 @@
|
||||
#define INSTR ldr
|
||||
#define NINST 12
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
sub sp, sp, #64
|
||||
st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
sub sp, sp, #64
|
||||
st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
stp x29, x30, [sp, -96]!
|
||||
stp x19, x20, [sp, 16]
|
||||
stp x21, x22, [sp, 32]
|
||||
stp x24, x25, [sp, 48]
|
||||
stp x26, x27, [sp, 64]
|
||||
str x28, [sp, 80]
|
||||
|
||||
|
||||
mov x4, N
|
||||
|
||||
fmov v0.2d, #1.00000000
|
||||
fmov v1.2d, #1.00000000
|
||||
fmov v2.2d, #1.00000000
|
||||
mov x24, sp
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR q0, [sp], #64
|
||||
INSTR q1, [sp], #64
|
||||
INSTR q2, [sp], #64
|
||||
INSTR q3, [sp], #64
|
||||
INSTR q4, [sp], #64
|
||||
INSTR q5, [sp], #64
|
||||
INSTR q6, [sp], #-64
|
||||
INSTR q7, [sp], #-64
|
||||
INSTR q8, [sp], #-64
|
||||
INSTR q9, [sp], #-64
|
||||
INSTR q10, [sp], #-64
|
||||
INSTR q11, [sp], #-64
|
||||
|
||||
bne loop
|
||||
done:
|
||||
mov sp, x24
|
||||
# pop callee-save registers from stack
|
||||
ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
@@ -1,67 +0,0 @@
|
||||
#define INSTR ldr
|
||||
#define NINST 10
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
sub sp, sp, #64
|
||||
st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
sub sp, sp, #64
|
||||
st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
stp x29, x30, [sp, -96]!
|
||||
stp x19, x20, [sp, 16]
|
||||
stp x21, x22, [sp, 32]
|
||||
stp x24, x25, [sp, 48]
|
||||
stp x26, x27, [sp, 64]
|
||||
str x28, [sp, 80]
|
||||
|
||||
|
||||
mov x4, N
|
||||
|
||||
fmov v0.2d, #1.00000000
|
||||
fmov v1.2d, #1.00000000
|
||||
fmov v2.2d, #1.00000000
|
||||
mov x24, sp
|
||||
|
||||
mov x16, sp
|
||||
add x16, x16, #32
|
||||
mov x25, sp
|
||||
add x25, x25, #64
|
||||
mov x27, sp
|
||||
sub x27, x27, #32
|
||||
mov x28, sp
|
||||
sub x28, x28, #64
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR q0, [sp], #64
|
||||
INSTR q1, [x25], #64
|
||||
INSTR q2, [x27], #64
|
||||
INSTR q3, [x28], #64
|
||||
INSTR q4, [x16], #64
|
||||
INSTR q5, [sp], #-64
|
||||
INSTR q6, [x25], #-64
|
||||
INSTR q7, [x27], #-64
|
||||
INSTR q8, [x28], #-64
|
||||
INSTR q9, [x16], #-64
|
||||
|
||||
bne loop
|
||||
done:
|
||||
mov sp, x24
|
||||
# pop callee-save registers from stack
|
||||
ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
@@ -1,55 +0,0 @@
|
||||
#define INSTR ldr
|
||||
#define NINST 8
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
sub sp, sp, #64
|
||||
st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
sub sp, sp, #64
|
||||
st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
stp x29, x30, [sp, -96]!
|
||||
stp x19, x20, [sp, 16]
|
||||
stp x21, x22, [sp, 32]
|
||||
stp x24, x25, [sp, 48]
|
||||
stp x26, x27, [sp, 64]
|
||||
str x28, [sp, 80]
|
||||
|
||||
|
||||
mov x4, N
|
||||
|
||||
fmov v0.2d, #1.00000000
|
||||
fmov v1.2d, #1.00000000
|
||||
fmov v2.2d, #1.00000000
|
||||
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR x2, [sp]
|
||||
INSTR x6, [sp]
|
||||
INSTR x8, [sp]
|
||||
INSTR x10, [sp]
|
||||
INSTR x12, [sp]
|
||||
INSTR x14, [sp]
|
||||
INSTR x16, [sp]
|
||||
INSTR x18, [sp]
|
||||
bne loop
|
||||
done:
|
||||
|
||||
# pop callee-save registers from stack
|
||||
ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
@@ -1,58 +0,0 @@
|
||||
#define INSTR stp
|
||||
#define NINST 12
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
sub sp, sp, #64
|
||||
st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
sub sp, sp, #64
|
||||
st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
stp x29, x30, [sp, -96]!
|
||||
stp x19, x20, [sp, 16]
|
||||
stp x21, x22, [sp, 32]
|
||||
stp x24, x25, [sp, 48]
|
||||
stp x26, x27, [sp, 64]
|
||||
str x28, [sp, 80]
|
||||
|
||||
|
||||
mov x4, N
|
||||
|
||||
fmov v0.2d, #1.00000000
|
||||
fmov v1.2d, #1.00000000
|
||||
fmov v2.2d, #1.00000000
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR d0, d1, [sp, #-64]
|
||||
INSTR d2, d3, [sp, #-128]
|
||||
INSTR d4, d5, [sp, #-192]
|
||||
INSTR d6, d7, [sp, #-256]
|
||||
INSTR d8, d9, [sp, #-320]
|
||||
INSTR d10, d11, [sp, #-384]
|
||||
INSTR d12, d13, [sp, #-448]
|
||||
INSTR d14, d15, [sp, #-32]
|
||||
INSTR d16, d17, [sp, #-96]
|
||||
INSTR d18, d19, [sp, #-160]
|
||||
INSTR d20, d21, [sp, #-224]
|
||||
INSTR d22, d23, [sp, #-288]
|
||||
bne loop
|
||||
done:
|
||||
|
||||
# pop callee-save registers from stack
|
||||
ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
@@ -1,70 +0,0 @@
|
||||
#define INSTR stp
|
||||
#define NINST 12
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
sub sp, sp, #64
|
||||
st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
sub sp, sp, #64
|
||||
st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
stp x29, x30, [sp, -96]!
|
||||
stp x19, x20, [sp, 16]
|
||||
stp x21, x22, [sp, 32]
|
||||
stp x24, x25, [sp, 48]
|
||||
stp x26, x27, [sp, 64]
|
||||
str x28, [sp, 80]
|
||||
|
||||
|
||||
mov x4, N
|
||||
|
||||
fmov v0.2d, #1.00000000
|
||||
fmov v1.2d, #1.00000000
|
||||
fmov v2.2d, #1.00000000
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR d0, d1, [sp, #-64]
|
||||
ldr x0, [sp]
|
||||
INSTR d2, d3, [sp, #-128]
|
||||
ldr x1, [sp]
|
||||
INSTR d4, d5, [sp, #-192]
|
||||
ldr x2, [sp]
|
||||
INSTR d6, d7, [sp, #-256]
|
||||
ldr x3, [sp]
|
||||
INSTR d8, d9, [sp, #-320]
|
||||
ldr x5, [sp]
|
||||
INSTR d10, d11, [sp, #-384]
|
||||
ldr x6, [sp]
|
||||
INSTR d12, d13, [sp, #-448]
|
||||
ldr x7, [sp]
|
||||
INSTR d14, d15, [sp, #-32]
|
||||
ldr x8, [sp]
|
||||
INSTR d16, d17, [sp, #-96]
|
||||
ldr x9, [sp]
|
||||
INSTR d18, d19, [sp, #-160]
|
||||
ldr x10, [sp]
|
||||
INSTR d20, d21, [sp, #-224]
|
||||
ldr x11, [sp]
|
||||
INSTR d22, d23, [sp, #-288]
|
||||
ldr x12, [sp]
|
||||
bne loop
|
||||
done:
|
||||
|
||||
# pop callee-save registers from stack
|
||||
ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
@@ -1,58 +0,0 @@
|
||||
#define INSTR stp
|
||||
#define NINST 12
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
sub sp, sp, #64
|
||||
st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
sub sp, sp, #64
|
||||
st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
stp x29, x30, [sp, -96]!
|
||||
stp x19, x20, [sp, 16]
|
||||
stp x21, x22, [sp, 32]
|
||||
stp x24, x25, [sp, 48]
|
||||
stp x26, x27, [sp, 64]
|
||||
str x28, [sp, 80]
|
||||
|
||||
|
||||
mov x4, N
|
||||
|
||||
fmov v0.2d, #1.00000000
|
||||
fmov v1.2d, #1.00000000
|
||||
fmov v2.2d, #1.00000000
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR q0, q1, [sp]
|
||||
INSTR q2, q3, [sp]
|
||||
INSTR q4, q5, [sp]
|
||||
INSTR q6, q7, [sp]
|
||||
INSTR q8, q9, [sp]
|
||||
INSTR q10, q11, [sp]
|
||||
INSTR q12, q13, [sp]
|
||||
INSTR q14, q15, [sp]
|
||||
INSTR q16, q17, [sp]
|
||||
INSTR q18, q19, [sp]
|
||||
INSTR q20, q21, [sp]
|
||||
INSTR q22, q23, [sp]
|
||||
bne loop
|
||||
done:
|
||||
|
||||
# pop callee-save registers from stack
|
||||
ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
@@ -1,58 +0,0 @@
|
||||
#define INSTR stp
|
||||
#define NINST 12
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
sub sp, sp, #64
|
||||
st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
sub sp, sp, #64
|
||||
st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
stp x29, x30, [sp, -96]!
|
||||
stp x19, x20, [sp, 16]
|
||||
stp x21, x22, [sp, 32]
|
||||
stp x24, x25, [sp, 48]
|
||||
stp x26, x27, [sp, 64]
|
||||
str x28, [sp, 80]
|
||||
|
||||
|
||||
mov x4, N
|
||||
|
||||
fmov v0.2d, #1.00000000
|
||||
fmov v1.2d, #1.00000000
|
||||
fmov v2.2d, #1.00000000
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR q0, q1, [sp, #-64]
|
||||
INSTR q2, q3, [sp, #-128]
|
||||
INSTR q4, q5, [sp, #-192]
|
||||
INSTR q6, q7, [sp, #-256]
|
||||
INSTR q8, q9, [sp, #-320]
|
||||
INSTR q10, q11, [sp, #-384]
|
||||
INSTR q12, q13, [sp, #-448]
|
||||
INSTR q14, q15, [sp, #-512]
|
||||
INSTR q16, q17, [sp, #-576]
|
||||
INSTR q18, q19, [sp, #-640]
|
||||
INSTR q20, q21, [sp, #-704]
|
||||
INSTR q22, q23, [sp, #-768]
|
||||
bne loop
|
||||
done:
|
||||
|
||||
# pop callee-save registers from stack
|
||||
ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
@@ -1,58 +0,0 @@
|
||||
#define INSTR stp
|
||||
#define NINST 12
|
||||
#define N x0
|
||||
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 2
|
||||
latency:
|
||||
|
||||
# push callee-save registers onto stack
|
||||
sub sp, sp, #64
|
||||
st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
sub sp, sp, #64
|
||||
st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
stp x29, x30, [sp, -96]!
|
||||
stp x19, x20, [sp, 16]
|
||||
stp x21, x22, [sp, 32]
|
||||
stp x24, x25, [sp, 48]
|
||||
stp x26, x27, [sp, 64]
|
||||
str x28, [sp, 80]
|
||||
|
||||
|
||||
mov x4, N
|
||||
|
||||
fmov v0.2d, #1.00000000
|
||||
fmov v1.2d, #1.00000000
|
||||
fmov v2.2d, #1.00000000
|
||||
loop:
|
||||
subs x4, x4, #1
|
||||
INSTR x24, x1, [sp, #-256]
|
||||
INSTR x2, x3, [sp, #-256]
|
||||
INSTR x25, x5, [sp, #-256]
|
||||
INSTR x6, x7, [sp, #-256]
|
||||
INSTR x8, x9, [sp, #-256]
|
||||
INSTR x10, x11, [sp, #-256]
|
||||
INSTR x12, x13, [sp, #-256]
|
||||
INSTR x14, x15, [sp, #-256]
|
||||
INSTR x16, x17, [sp, #-256]
|
||||
INSTR x18, x19, [sp, #-256]
|
||||
INSTR x20, x21, [sp, #-256]
|
||||
INSTR x22, x23, [sp, #-256]
|
||||
bne loop
|
||||
done:
|
||||
|
||||
# pop callee-save registers from stack
|
||||
ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp]
|
||||
add sp, sp, #64
|
||||
|
||||
ret
|
||||
|
||||
.size latency, .-latency
|
Reference in New Issue
Block a user