add SSE load/store instructions

This commit is contained in:
Johannes Hofmann
2018-01-08 12:46:51 +01:00
parent af07f9cf6f
commit b8a664920a
4 changed files with 156 additions and 0 deletions

View File

@@ -0,0 +1,39 @@
#define INSTR vmovapd
#define NINST 6
#define N edi
#define i r8d
.intel_syntax noprefix
.globl ninst
.data
ninst:
.long NINST
.align 32
PI:
.long 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9
.text
.globl latency
.type latency, @function
.align 32
latency:
push rbp
mov rbp, rsp
xor i, i
test N, N
jle done
loop:
inc i
INSTR xmm0, [rip+PI]
INSTR xmm1, [rip+PI]
INSTR xmm2, [rip+PI]
cmp i, N
INSTR xmm3, [rip+PI]
INSTR xmm4, [rip+PI]
INSTR xmm5, [rip+PI]
jl loop
done:
mov rsp, rbp
pop rbp
ret
.size latency, .-latency

View File

@@ -0,0 +1,39 @@
#define INSTR vmovapd
#define NINST 6
#define N edi
#define i r8d
.intel_syntax noprefix
.globl ninst
.data
ninst:
.long NINST
.align 32
PI:
.long 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9
.text
.globl latency
.type latency, @function
.align 32
latency:
push rbp
mov rbp, rsp
xor i, i
test N, N
jle done
loop:
inc i
INSTR [rip+PI], xmm0
INSTR [rip+PI], xmm1
INSTR [rip+PI], xmm2
cmp i, N
INSTR [rip+PI], xmm3
INSTR [rip+PI], xmm4
INSTR [rip+PI], xmm5
jl loop
done:
mov rsp, rbp
pop rbp
ret
.size latency, .-latency

View File

@@ -0,0 +1,39 @@
#define INSTR vmovupd
#define NINST 6
#define N edi
#define i r8d
.intel_syntax noprefix
.globl ninst
.data
ninst:
.long NINST
.align 32
PI:
.long 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9
.text
.globl latency
.type latency, @function
.align 32
latency:
push rbp
mov rbp, rsp
xor i, i
test N, N
jle done
loop:
inc i
INSTR xmm0, [rip+PI]
INSTR xmm1, [rip+PI]
INSTR xmm2, [rip+PI]
cmp i, N
INSTR xmm3, [rip+PI]
INSTR xmm4, [rip+PI]
INSTR xmm5, [rip+PI]
jl loop
done:
mov rsp, rbp
pop rbp
ret
.size latency, .-latency

View File

@@ -0,0 +1,39 @@
#define INSTR vmovupd
#define NINST 6
#define N edi
#define i r8d
.intel_syntax noprefix
.globl ninst
.data
ninst:
.long NINST
.align 32
PI:
.long 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9
.text
.globl latency
.type latency, @function
.align 32
latency:
push rbp
mov rbp, rsp
xor i, i
test N, N
jle done
loop:
inc i
INSTR [rip+PI], xmm0
INSTR [rip+PI], xmm1
INSTR [rip+PI], xmm2
cmp i, N
INSTR [rip+PI], xmm3
INSTR [rip+PI], xmm4
INSTR [rip+PI], xmm5
jl loop
done:
mov rsp, rbp
pop rbp
ret
.size latency, .-latency