diff --git a/src/AVX/vmovapd-load-sse-TP.S b/src/AVX/vmovapd-load-sse-TP.S new file mode 100644 index 0000000..cfc9f3a --- /dev/null +++ b/src/AVX/vmovapd-load-sse-TP.S @@ -0,0 +1,39 @@ +#define INSTR vmovapd +#define NINST 6 +#define N edi +#define i r8d + + +.intel_syntax noprefix +.globl ninst +.data +ninst: +.long NINST +.align 32 +PI: +.long 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9 +.text +.globl latency +.type latency, @function +.align 32 +latency: + push rbp + mov rbp, rsp + xor i, i + test N, N + jle done +loop: + inc i + INSTR xmm0, [rip+PI] + INSTR xmm1, [rip+PI] + INSTR xmm2, [rip+PI] + cmp i, N + INSTR xmm3, [rip+PI] + INSTR xmm4, [rip+PI] + INSTR xmm5, [rip+PI] + jl loop +done: + mov rsp, rbp + pop rbp + ret +.size latency, .-latency diff --git a/src/AVX/vmovapd-store-sse-TP.S b/src/AVX/vmovapd-store-sse-TP.S new file mode 100644 index 0000000..3f586af --- /dev/null +++ b/src/AVX/vmovapd-store-sse-TP.S @@ -0,0 +1,39 @@ +#define INSTR vmovapd +#define NINST 6 +#define N edi +#define i r8d + + +.intel_syntax noprefix +.globl ninst +.data +ninst: +.long NINST +.align 32 +PI: +.long 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9 +.text +.globl latency +.type latency, @function +.align 32 +latency: + push rbp + mov rbp, rsp + xor i, i + test N, N + jle done +loop: + inc i + INSTR [rip+PI], xmm0 + INSTR [rip+PI], xmm1 + INSTR [rip+PI], xmm2 + cmp i, N + INSTR [rip+PI], xmm3 + INSTR [rip+PI], xmm4 + INSTR [rip+PI], xmm5 + jl loop +done: + mov rsp, rbp + pop rbp + ret +.size latency, .-latency diff --git a/src/AVX/vmovupd-load-sse-TP.S b/src/AVX/vmovupd-load-sse-TP.S new file mode 100644 index 0000000..5082b52 --- /dev/null +++ b/src/AVX/vmovupd-load-sse-TP.S @@ -0,0 +1,39 @@ +#define INSTR vmovupd +#define NINST 6 +#define N edi +#define i r8d + + +.intel_syntax noprefix +.globl ninst +.data +ninst: +.long NINST +.align 32 +PI: +.long 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9 +.text +.globl latency +.type latency, @function +.align 32 +latency: + push rbp + mov rbp, rsp + xor i, i + test N, N + jle done +loop: + inc i + INSTR xmm0, [rip+PI] + INSTR xmm1, [rip+PI] + INSTR xmm2, [rip+PI] + cmp i, N + INSTR xmm3, [rip+PI] + INSTR xmm4, [rip+PI] + INSTR xmm5, [rip+PI] + jl loop +done: + mov rsp, rbp + pop rbp + ret +.size latency, .-latency diff --git a/src/AVX/vmovupd-store-sse-TP.S b/src/AVX/vmovupd-store-sse-TP.S new file mode 100644 index 0000000..1c3bfb0 --- /dev/null +++ b/src/AVX/vmovupd-store-sse-TP.S @@ -0,0 +1,39 @@ +#define INSTR vmovupd +#define NINST 6 +#define N edi +#define i r8d + + +.intel_syntax noprefix +.globl ninst +.data +ninst: +.long NINST +.align 32 +PI: +.long 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9 +.text +.globl latency +.type latency, @function +.align 32 +latency: + push rbp + mov rbp, rsp + xor i, i + test N, N + jle done +loop: + inc i + INSTR [rip+PI], xmm0 + INSTR [rip+PI], xmm1 + INSTR [rip+PI], xmm2 + cmp i, N + INSTR [rip+PI], xmm3 + INSTR [rip+PI], xmm4 + INSTR [rip+PI], xmm5 + jl loop +done: + mov rsp, rbp + pop rbp + ret +.size latency, .-latency