mirror of
https://github.com/RRZE-HPC/ibench.git
synced 2025-07-21 04:41:09 +02:00
add SSE load/store instructions
This commit is contained in:
39
src/AVX/vmovapd-load-sse-TP.S
Normal file
39
src/AVX/vmovapd-load-sse-TP.S
Normal file
@@ -0,0 +1,39 @@
|
||||
#define INSTR vmovapd
|
||||
#define NINST 6
|
||||
#define N edi
|
||||
#define i r8d
|
||||
|
||||
|
||||
.intel_syntax noprefix
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.align 32
|
||||
PI:
|
||||
.long 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 32
|
||||
latency:
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
xor i, i
|
||||
test N, N
|
||||
jle done
|
||||
loop:
|
||||
inc i
|
||||
INSTR xmm0, [rip+PI]
|
||||
INSTR xmm1, [rip+PI]
|
||||
INSTR xmm2, [rip+PI]
|
||||
cmp i, N
|
||||
INSTR xmm3, [rip+PI]
|
||||
INSTR xmm4, [rip+PI]
|
||||
INSTR xmm5, [rip+PI]
|
||||
jl loop
|
||||
done:
|
||||
mov rsp, rbp
|
||||
pop rbp
|
||||
ret
|
||||
.size latency, .-latency
|
39
src/AVX/vmovapd-store-sse-TP.S
Normal file
39
src/AVX/vmovapd-store-sse-TP.S
Normal file
@@ -0,0 +1,39 @@
|
||||
#define INSTR vmovapd
|
||||
#define NINST 6
|
||||
#define N edi
|
||||
#define i r8d
|
||||
|
||||
|
||||
.intel_syntax noprefix
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.align 32
|
||||
PI:
|
||||
.long 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 32
|
||||
latency:
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
xor i, i
|
||||
test N, N
|
||||
jle done
|
||||
loop:
|
||||
inc i
|
||||
INSTR [rip+PI], xmm0
|
||||
INSTR [rip+PI], xmm1
|
||||
INSTR [rip+PI], xmm2
|
||||
cmp i, N
|
||||
INSTR [rip+PI], xmm3
|
||||
INSTR [rip+PI], xmm4
|
||||
INSTR [rip+PI], xmm5
|
||||
jl loop
|
||||
done:
|
||||
mov rsp, rbp
|
||||
pop rbp
|
||||
ret
|
||||
.size latency, .-latency
|
39
src/AVX/vmovupd-load-sse-TP.S
Normal file
39
src/AVX/vmovupd-load-sse-TP.S
Normal file
@@ -0,0 +1,39 @@
|
||||
#define INSTR vmovupd
|
||||
#define NINST 6
|
||||
#define N edi
|
||||
#define i r8d
|
||||
|
||||
|
||||
.intel_syntax noprefix
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.align 32
|
||||
PI:
|
||||
.long 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 32
|
||||
latency:
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
xor i, i
|
||||
test N, N
|
||||
jle done
|
||||
loop:
|
||||
inc i
|
||||
INSTR xmm0, [rip+PI]
|
||||
INSTR xmm1, [rip+PI]
|
||||
INSTR xmm2, [rip+PI]
|
||||
cmp i, N
|
||||
INSTR xmm3, [rip+PI]
|
||||
INSTR xmm4, [rip+PI]
|
||||
INSTR xmm5, [rip+PI]
|
||||
jl loop
|
||||
done:
|
||||
mov rsp, rbp
|
||||
pop rbp
|
||||
ret
|
||||
.size latency, .-latency
|
39
src/AVX/vmovupd-store-sse-TP.S
Normal file
39
src/AVX/vmovupd-store-sse-TP.S
Normal file
@@ -0,0 +1,39 @@
|
||||
#define INSTR vmovupd
|
||||
#define NINST 6
|
||||
#define N edi
|
||||
#define i r8d
|
||||
|
||||
|
||||
.intel_syntax noprefix
|
||||
.globl ninst
|
||||
.data
|
||||
ninst:
|
||||
.long NINST
|
||||
.align 32
|
||||
PI:
|
||||
.long 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9
|
||||
.text
|
||||
.globl latency
|
||||
.type latency, @function
|
||||
.align 32
|
||||
latency:
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
xor i, i
|
||||
test N, N
|
||||
jle done
|
||||
loop:
|
||||
inc i
|
||||
INSTR [rip+PI], xmm0
|
||||
INSTR [rip+PI], xmm1
|
||||
INSTR [rip+PI], xmm2
|
||||
cmp i, N
|
||||
INSTR [rip+PI], xmm3
|
||||
INSTR [rip+PI], xmm4
|
||||
INSTR [rip+PI], xmm5
|
||||
jl loop
|
||||
done:
|
||||
mov rsp, rbp
|
||||
pop rbp
|
||||
ret
|
||||
.size latency, .-latency
|
Reference in New Issue
Block a user