Files
asmbench/random_pf1.txt
2020-02-14 16:57:29 +01:00

451 lines
9.7 KiB
Plaintext

## Selected Instructions
VPERMILPSri, MULPSrr, ANDPDrr, VPSIGNBrr, PSIGNBrr, PMOVZXWDrr, PMINUWrr, PADDSWrr, VPSHUFHWri, MOVUPDrr
## Generated Assembly (1x parallel)
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 13
.section __TEXT,__literal4,4byte_literals
.p2align 2
LCPI0_0:
.long 1065361408
.section __TEXT,__text,regular,pure_instructions
.globl _test
.p2align 4, 0x90
_test:
.cfi_startproc
testq %rdi, %rdi
jle LBB0_1
movabsq $LCPI0_0, %rax
vbroadcastss (%rax), %xmm0
movq $-1, %rcx
.p2align 4, 0x90
LBB0_3:
## InlineAsm Start
vpermilps $1, %xmm0, %xmm0
mulps %xmm0, %xmm0
andpd %xmm0, %xmm0
vpsignb %xmm0, %xmm0, %xmm0
psignb %xmm0, %xmm0
pmovzxwd %xmm0, %xmm0
pminuw %xmm0, %xmm0
paddsw %xmm0, %xmm0
vpshufhw $1, %xmm0, %xmm0
movupd %xmm0, %xmm0
## InlineAsm End
leaq 1(%rcx), %rax
addq $2, %rcx
cmpq %rdi, %rcx
movq %rax, %rcx
jl LBB0_3
retq
LBB0_1:
xorl %eax, %eax
retq
.cfi_endproc
.subsections_via_symbols
## Detailed Results
{'arguments': (24655919,),
'frequency': 2600000000.0,
'iterations': 24655919,
'parallel_factor': 1,
'returned': [24655918, 24655918, 24655918, 24655918],
'runtimes': [0.13202582497615367,
0.13208268792368472,
0.13151856907643378,
0.13161470007617027]}
minimal throughput: 13.87 cy
## Selected Instructions
VFMADD132PDYr, VPADDWYrr, VFMADD132PSYr, VPADDDYrr, VSUBPDYrr, VPACKUSDWYrr, VPMULHUWYrr, VMINPDYrr, VPUNPCKLWDYrr, VBLENDVPSYrr
## Generated Assembly (1x parallel)
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 13
.section __TEXT,__literal4,4byte_literals
.p2align 2
LCPI0_0:
.long 1065361408
.section __TEXT,__text,regular,pure_instructions
.globl _test
.p2align 4, 0x90
_test:
.cfi_startproc
testq %rdi, %rdi
jle LBB0_1
movabsq $LCPI0_0, %rax
vbroadcastss (%rax), %ymm0
movq $-1, %rcx
.p2align 4, 0x90
LBB0_3:
## InlineAsm Start
vfmadd132pd %ymm0, %ymm0, %ymm0
vpaddw %ymm0, %ymm0, %ymm0
vfmadd132ps %ymm0, %ymm0, %ymm0
vpaddd %ymm0, %ymm0, %ymm0
vsubpd %ymm0, %ymm0, %ymm0
vpackusdw %ymm0, %ymm0, %ymm0
vpmulhuw %ymm0, %ymm0, %ymm0
vminpd %ymm0, %ymm0, %ymm0
vpunpcklwd %ymm0, %ymm0, %ymm0
vblendvps %ymm0, %ymm0, %ymm0, %ymm0
## InlineAsm End
leaq 1(%rcx), %rax
addq $2, %rcx
cmpq %rdi, %rcx
movq %rax, %rcx
jl LBB0_3
vzeroupper
retq
LBB0_1:
xorl %eax, %eax
retq
.cfi_endproc
.subsections_via_symbols
## Detailed Results
{'arguments': (10000000,),
'frequency': 2600000000.0,
'iterations': 10000000,
'parallel_factor': 1,
'returned': [9999999, 9999999, 9999999, 9999999],
'runtimes': [0.11892832000739872,
0.11891822703182697,
0.11902078497223556,
0.12094117503147572]}
minimal throughput: 30.92 cy
## Selected Instructions
VCVTSI642SDrr, VFMADD213SDr, DIVSDrr, VCVTSI642SDrr, MAXSDrr, VFNMADD213SDr, VFMADD132SDr, VMAXSDrr, VFNMADD132SDr, SQRTSDr
## Generated Assembly (1x parallel)
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 13
.section __TEXT,__literal8,8byte_literals
.p2align 3
LCPI0_0:
.quad 4607186816846528512
.section __TEXT,__text,regular,pure_instructions
.globl _test
.p2align 4, 0x90
_test:
.cfi_startproc
testq %rdi, %rdi
jle LBB0_1
movq $-1, %rcx
movabsq $LCPI0_0, %rax
vmovsd (%rax), %xmm0
movl $3, %edx
.p2align 4, 0x90
LBB0_3:
## InlineAsm Start
vcvtsi2sdq %rdx, %xmm0, %xmm0
vfmadd213sd %xmm0, %xmm0, %xmm0
divsd %xmm0, %xmm0
vcvtsi2sdq %rdx, %xmm0, %xmm0
maxsd %xmm0, %xmm0
vfnmadd213sd %xmm0, %xmm0, %xmm0
vfmadd132sd %xmm0, %xmm0, %xmm0
vmaxsd %xmm0, %xmm0, %xmm0
vfnmadd132sd %xmm0, %xmm0, %xmm0
sqrtsd %xmm0, %xmm0
## InlineAsm End
leaq 1(%rcx), %rax
addq $2, %rcx
cmpq %rdi, %rcx
movq %rax, %rcx
jl LBB0_3
retq
LBB0_1:
xorl %eax, %eax
retq
.cfi_endproc
.subsections_via_symbols
## Detailed Results
{'arguments': (5841530,),
'frequency': 2600000000.0,
'iterations': 5841530,
'parallel_factor': 1,
'returned': [5841529, 5841529, 5841529, 5841529],
'runtimes': [0.13433505699504167,
0.13318849296774715,
0.13303690601605922,
0.13309408095665276]}
minimal throughput: 59.21 cy
## Selected Instructions
RCPSSr, VCVTSI2SSrr, MULSSrr, VCVTSD2SSrr, VROUNDSSr, VRCPSSr, VCVTSI2SSrr, VSQRTSSr, VFNMADD231SSr, VSQRTSSr
## Generated Assembly (1x parallel)
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 13
.section __TEXT,__literal4,4byte_literals
.p2align 2
LCPI0_0:
.long 1065361408
.section __TEXT,__literal8,8byte_literals
.p2align 3
LCPI0_1:
.quad 4607186816846528512
.section __TEXT,__text,regular,pure_instructions
.globl _test
.p2align 4, 0x90
_test:
.cfi_startproc
testq %rdi, %rdi
jle LBB0_1
movq $-1, %rcx
movabsq $LCPI0_0, %rax
vmovss (%rax), %xmm1
movl $3, %edx
movabsq $LCPI0_1, %rax
vmovsd (%rax), %xmm0
.p2align 4, 0x90
LBB0_3:
## InlineAsm Start
rcpss %xmm1, %xmm1
vcvtsi2ssl %edx, %xmm1, %xmm1
mulss %xmm1, %xmm1
vcvtsd2ss %xmm0, %xmm1, %xmm1
vroundss $1, %xmm1, %xmm1, %xmm1
vrcpss %xmm1, %xmm1, %xmm1
vcvtsi2ssl %edx, %xmm1, %xmm1
vsqrtss %xmm1, %xmm1, %xmm1
vfnmadd231ss %xmm1, %xmm1, %xmm1
vsqrtss %xmm1, %xmm1, %xmm1
## InlineAsm End
leaq 1(%rcx), %rax
addq $2, %rcx
cmpq %rdi, %rcx
movq %rax, %rcx
jl LBB0_3
retq
LBB0_1:
xorl %eax, %eax
retq
.cfi_endproc
.subsections_via_symbols
## Detailed Results
{'arguments': (6011291,),
'frequency': 2600000000.0,
'iterations': 6011291,
'parallel_factor': 1,
'returned': [6011290, 6011290, 6011290, 6011290],
'runtimes': [0.13239118899218738,
0.13244657206814736,
0.1326694720191881,
0.13262002903502434]}
minimal throughput: 57.26 cy
## Selected Instructions
ROR16ri, CMOVS16rr, SBB16ri, ADC16ri8, XOR16ri8, BTR16rr, XOR16ri8, SAR16r1, DEC16r, SUB16ri
## Generated Assembly (1x parallel)
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 13
.globl _test
.p2align 4, 0x90
_test:
.cfi_startproc
testq %rdi, %rdi
jle LBB0_1
movw $3, %cx
movq $-1, %rdx
.p2align 4, 0x90
LBB0_3:
## InlineAsm Start
rorw %cx
cmovsw %cx, %cx
sbbw $1, %cx
adcw $1, %cx
xorw $1, %cx
btrw %cx, %cx
xorw $1, %cx
sarw %cx
decw %cx
subw $1, %cx
## InlineAsm End
leaq 1(%rdx), %rax
addq $2, %rdx
cmpq %rdi, %rdx
movq %rax, %rdx
jl LBB0_3
retq
LBB0_1:
xorl %eax, %eax
retq
.cfi_endproc
.subsections_via_symbols
## Detailed Results
{'arguments': (31283731,),
'frequency': 2600000000.0,
'iterations': 31283731,
'parallel_factor': 1,
'returned': [31283730, 31283730, 31283730, 31283730],
'runtimes': [0.13291946100071073,
0.13294463406782597,
0.1332225619116798,
0.13287500606384128]}
minimal throughput: 11.04 cy
## Selected Instructions
SHLX32rr, CMOVO32rr, MOV32rr, CMOVS32rr, CRC32r32r8, SHR32r1, ADD32rr, CRC32r32r8, RCR32ri, SHR32r1
## Generated Assembly (1x parallel)
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 13
.globl _test
.p2align 4, 0x90
_test:
.cfi_startproc
testq %rdi, %rdi
jle LBB0_1
movl $3, %esi
movq $-1, %rdx
movb $3, %cl
.p2align 4, 0x90
LBB0_3:
## InlineAsm Start
shlxl %esi, %esi, %eax
cmovol %eax, %eax
movl %eax, %esi
cmovsl %esi, %esi
crc32b %cl, %esi
shrl %esi
addl %esi, %esi
crc32b %cl, %esi
rcrl %esi
shrl %esi
## InlineAsm End
leaq 1(%rdx), %rax
addq $2, %rdx
cmpq %rdi, %rdx
movq %rax, %rdx
jl LBB0_3
retq
LBB0_1:
xorl %eax, %eax
retq
.cfi_endproc
.subsections_via_symbols
## Detailed Results
{'arguments': (24008543,),
'frequency': 2600000000.0,
'iterations': 24008543,
'parallel_factor': 1,
'returned': [24008542, 24008542, 24008542, 24008542],
'runtimes': [0.13333229208365083,
0.13314284407533705,
0.13381975598167628,
0.13447994901798666]}
minimal throughput: 14.42 cy
## Selected Instructions
SHRX64rr, SBB64ri32, AND64ri8, MOV64rc, INC64r, SUB64ri32, POPCNT64rr, OR64ri8, BTS64rr, ROL64ri
## Generated Assembly (1x parallel)
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 13
.globl _test
.p2align 4, 0x90
_test:
.cfi_startproc
testq %rdi, %rdi
jle LBB0_1
movq $-1, %rcx
movl $3, %edx
.p2align 4, 0x90
LBB0_3:
## InlineAsm Start
shrxq %rdx, %rdx, %rax
sbbq $1, %rax
andq $1, %rax
movq %rax, %rax
incq %rax
subq $1, %rax
popcntq %rax, %rdx
orq $1, %rdx
btsq %rdx, %rdx
rolq %rdx
## InlineAsm End
leaq 1(%rcx), %rax
addq $2, %rcx
cmpq %rdi, %rcx
movq %rax, %rcx
jl LBB0_3
retq
LBB0_1:
xorl %eax, %eax
retq
.cfi_endproc
.subsections_via_symbols
## Detailed Results
{'arguments': (27539225,),
'frequency': 2600000000.0,
'iterations': 27539225,
'parallel_factor': 1,
'returned': [27539224, 27539224, 27539224, 27539224],
'runtimes': [0.1335972750093788,
0.13322542910464108,
0.13357082300353795,
0.13376462296582758]}
minimal throughput: 12.58 cy
## Selected Instructions
SAR8r1, SHR8ri, INC8r, AND8rr, RCR8ri, ROL8ri, SUB8ri, SBB8rr, NEG8r, NOT8r
## Generated Assembly (1x parallel)
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 13
.globl _test
.p2align 4, 0x90
_test:
.cfi_startproc
testq %rdi, %rdi
jle LBB0_1
movb $3, %cl
movq $-1, %rdx
.p2align 4, 0x90
LBB0_3:
## InlineAsm Start
sarb %cl
shrb %cl
incb %cl
andb %cl, %cl
rcrb %cl
rolb %cl
subb $1, %cl
sbbb %cl, %cl
negb %cl
notb %cl
## InlineAsm End
leaq 1(%rdx), %rax
addq $2, %rdx
cmpq %rdi, %rdx
movq %rax, %rdx
jl LBB0_3
retq
LBB0_1:
xorl %eax, %eax
retq
.cfi_endproc
.subsections_via_symbols
## Detailed Results
{'arguments': (30431254,),
'frequency': 2600000000.0,
'iterations': 30431254,
'parallel_factor': 1,
'returned': [30431253, 30431253, 30431253, 30431253],
'runtimes': [0.13894746906589717,
0.1348069809610024,
0.13318019802682102,
0.13318415405228734]}
minimal throughput: 11.38 cy