remove test benchmarks

This commit is contained in:
Johannes Hofmann
2019-03-29 17:13:24 +01:00
parent a8b004c9e2
commit 0d23cce999
4 changed files with 28 additions and 135 deletions

View File

@@ -1,40 +0,0 @@
#define INSTR vfmadd213sd
#define NINST 1
#define N edi
#define i r8d
.intel_syntax noprefix
.globl ninst
.data
ninst:
.long NINST
.text
.globl latency
.type latency, @function
.align 32
latency:
push rbp
mov rbp, rsp
xor i, i
test N, N
jle done
# create SSE DP 1.0
vpcmpeqw xmm0, xmm0, xmm0 # all ones
vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54 = 64 - (11 - 1))
vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero
# copy DP 1.0
vmovapd xmm1, xmm0
vmovapd xmm2, xmm0
loop:
inc i
vfmadd231sd xmm0, xmm1, xmm2
vfmadd231sd xmm0, xmm2, xmm1
vmulsd xmm0, xmm0, xmm1
cmp i, N
jl loop
done:
mov rsp, rbp
pop rbp
ret
.size latency, .-latency

View File

@@ -1,44 +0,0 @@
#define INSTR vfmadd213sd
#define NINST 1
#define N edi
#define i r8d
.intel_syntax noprefix
.globl ninst
.data
ninst:
.long NINST
.text
.globl latency
.type latency, @function
.align 32
latency:
push rbp
mov rbp, rsp
xor i, i
test N, N
jle done
# create SSE DP 1.0
vpcmpeqw xmm0, xmm0, xmm0 # all ones
vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54 = 64 - (11 - 1))
vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero
# expand from SSE to AVX
vinsertf128 ymm0, ymm0, xmm0, 0x1
# expand from AVX to AVX-512
vinsertf64x4 zmm0, zmm0, ymm0, 0x1
# copy DP 1.0
vmovapd zmm1, zmm0
vmovapd zmm2, zmm0
loop:
inc i
vfmadd231sd xmm0, xmm1, xmm2
vfmadd231sd xmm0, xmm2, xmm1
vmulsd xmm0, xmm0, xmm1
cmp i, N
jl loop
done:
mov rsp, rbp
pop rbp
ret
.size latency, .-latency

View File

@@ -1,43 +0,0 @@
#define INSTR vfmadd213sd
#define NINST 1
#define N edi
#define i r8d
.intel_syntax noprefix
.globl ninst
.data
ninst:
.long NINST
.text
.globl latency
.type latency, @function
.align 32
latency:
push rbp
mov rbp, rsp
xor i, i
test N, N
jle done
# create SSE DP 1.0
vpcmpeqw xmm0, xmm0, xmm0 # all ones
vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54 = 64 - (11 - 1))
vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero
# expand from SSE to AVX
vinsertf128 ymm0, ymm0, xmm0, 0x1
# expand from AVX to AVX-512
vinsertf64x4 zmm0, zmm0, ymm0, 0x1
# copy DP 1.0
vmovapd zmm1, zmm0
vmovapd zmm2, zmm0
loop:
inc i
vfmadd231sd xmm0, xmm1, xmm2
vmulsd xmm0, xmm0, xmm1
cmp i, N
jl loop
done:
mov rsp, rbp
pop rbp
ret
.size latency, .-latency

View File

@@ -1,5 +1,5 @@
#define INSTR vfmadd213sd
#define NINST 1
#define INSTR mulsd
#define NINST 6
#define N edi
#define i r8d
@@ -19,22 +19,42 @@ latency:
xor i, i
test N, N
jle done
# create SSE DP 1.0
# create SP 1.0
vpcmpeqw xmm0, xmm0, xmm0 # all ones
vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54 = 64 - (11 - 1))
vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero
# copy DP 1.0
# create SP 2.0
vaddpd xmm1, xmm0, xmm0
# create SP 0.5
vdivpd xmm2, xmm0, xmm1
# Mark registers as scalar
movsd xmm0, xmm0
movsd xmm1, xmm0
movsd xmm2, xmm0
movsd xmm1, xmm1
movsd xmm2, xmm2
sub rsp, 8
loop:
inc i
vfmadd231sd xmm0, xmm1, xmm2
vmulsd xmm0, xmm0, xmm1
movsd xmm0, [rsp]
movsd [rsp], xmm0
movsd xmm0, [rsp]
movsd [rsp], xmm0
movsd xmm0, [rsp]
movsd [rsp], xmm0
cmp i, N
movsd xmm0, [rsp]
movsd [rsp], xmm0
movsd xmm0, [rsp]
movsd [rsp], xmm0
movsd xmm0, [rsp]
movsd [rsp], xmm0
jl loop
done:
add rsp, 8
mov rsp, rbp
pop rbp
ret