mirror of
https://github.com/RRZE-HPC/asmbench.git
synced 2025-07-21 12:41:06 +02:00
451 lines
9.7 KiB
Plaintext
451 lines
9.7 KiB
Plaintext
## Selected Instructions
|
|
VPERMILPSri, MULPSrr, ANDPDrr, VPSIGNBrr, PSIGNBrr, PMOVZXWDrr, PMINUWrr, PADDSWrr, VPSHUFHWri, MOVUPDrr
|
|
## Generated Assembly (1x parallel)
|
|
.section __TEXT,__text,regular,pure_instructions
|
|
.macosx_version_min 10, 13
|
|
.section __TEXT,__literal4,4byte_literals
|
|
.p2align 2
|
|
LCPI0_0:
|
|
.long 1065361408
|
|
.section __TEXT,__text,regular,pure_instructions
|
|
.globl _test
|
|
.p2align 4, 0x90
|
|
_test:
|
|
.cfi_startproc
|
|
testq %rdi, %rdi
|
|
jle LBB0_1
|
|
movabsq $LCPI0_0, %rax
|
|
vbroadcastss (%rax), %xmm0
|
|
movq $-1, %rcx
|
|
.p2align 4, 0x90
|
|
LBB0_3:
|
|
## InlineAsm Start
|
|
vpermilps $1, %xmm0, %xmm0
|
|
mulps %xmm0, %xmm0
|
|
andpd %xmm0, %xmm0
|
|
vpsignb %xmm0, %xmm0, %xmm0
|
|
psignb %xmm0, %xmm0
|
|
pmovzxwd %xmm0, %xmm0
|
|
pminuw %xmm0, %xmm0
|
|
paddsw %xmm0, %xmm0
|
|
vpshufhw $1, %xmm0, %xmm0
|
|
movupd %xmm0, %xmm0
|
|
## InlineAsm End
|
|
leaq 1(%rcx), %rax
|
|
addq $2, %rcx
|
|
cmpq %rdi, %rcx
|
|
movq %rax, %rcx
|
|
jl LBB0_3
|
|
retq
|
|
LBB0_1:
|
|
xorl %eax, %eax
|
|
retq
|
|
.cfi_endproc
|
|
|
|
|
|
.subsections_via_symbols
|
|
|
|
## Detailed Results
|
|
{'arguments': (24655919,),
|
|
'frequency': 2600000000.0,
|
|
'iterations': 24655919,
|
|
'parallel_factor': 1,
|
|
'returned': [24655918, 24655918, 24655918, 24655918],
|
|
'runtimes': [0.13202582497615367,
|
|
0.13208268792368472,
|
|
0.13151856907643378,
|
|
0.13161470007617027]}
|
|
minimal throughput: 13.87 cy
|
|
## Selected Instructions
|
|
VFMADD132PDYr, VPADDWYrr, VFMADD132PSYr, VPADDDYrr, VSUBPDYrr, VPACKUSDWYrr, VPMULHUWYrr, VMINPDYrr, VPUNPCKLWDYrr, VBLENDVPSYrr
|
|
## Generated Assembly (1x parallel)
|
|
.section __TEXT,__text,regular,pure_instructions
|
|
.macosx_version_min 10, 13
|
|
.section __TEXT,__literal4,4byte_literals
|
|
.p2align 2
|
|
LCPI0_0:
|
|
.long 1065361408
|
|
.section __TEXT,__text,regular,pure_instructions
|
|
.globl _test
|
|
.p2align 4, 0x90
|
|
_test:
|
|
.cfi_startproc
|
|
testq %rdi, %rdi
|
|
jle LBB0_1
|
|
movabsq $LCPI0_0, %rax
|
|
vbroadcastss (%rax), %ymm0
|
|
movq $-1, %rcx
|
|
.p2align 4, 0x90
|
|
LBB0_3:
|
|
## InlineAsm Start
|
|
vfmadd132pd %ymm0, %ymm0, %ymm0
|
|
vpaddw %ymm0, %ymm0, %ymm0
|
|
vfmadd132ps %ymm0, %ymm0, %ymm0
|
|
vpaddd %ymm0, %ymm0, %ymm0
|
|
vsubpd %ymm0, %ymm0, %ymm0
|
|
vpackusdw %ymm0, %ymm0, %ymm0
|
|
vpmulhuw %ymm0, %ymm0, %ymm0
|
|
vminpd %ymm0, %ymm0, %ymm0
|
|
vpunpcklwd %ymm0, %ymm0, %ymm0
|
|
vblendvps %ymm0, %ymm0, %ymm0, %ymm0
|
|
## InlineAsm End
|
|
leaq 1(%rcx), %rax
|
|
addq $2, %rcx
|
|
cmpq %rdi, %rcx
|
|
movq %rax, %rcx
|
|
jl LBB0_3
|
|
vzeroupper
|
|
retq
|
|
LBB0_1:
|
|
xorl %eax, %eax
|
|
retq
|
|
.cfi_endproc
|
|
|
|
|
|
.subsections_via_symbols
|
|
|
|
## Detailed Results
|
|
{'arguments': (10000000,),
|
|
'frequency': 2600000000.0,
|
|
'iterations': 10000000,
|
|
'parallel_factor': 1,
|
|
'returned': [9999999, 9999999, 9999999, 9999999],
|
|
'runtimes': [0.11892832000739872,
|
|
0.11891822703182697,
|
|
0.11902078497223556,
|
|
0.12094117503147572]}
|
|
minimal throughput: 30.92 cy
|
|
## Selected Instructions
|
|
VCVTSI642SDrr, VFMADD213SDr, DIVSDrr, VCVTSI642SDrr, MAXSDrr, VFNMADD213SDr, VFMADD132SDr, VMAXSDrr, VFNMADD132SDr, SQRTSDr
|
|
## Generated Assembly (1x parallel)
|
|
.section __TEXT,__text,regular,pure_instructions
|
|
.macosx_version_min 10, 13
|
|
.section __TEXT,__literal8,8byte_literals
|
|
.p2align 3
|
|
LCPI0_0:
|
|
.quad 4607186816846528512
|
|
.section __TEXT,__text,regular,pure_instructions
|
|
.globl _test
|
|
.p2align 4, 0x90
|
|
_test:
|
|
.cfi_startproc
|
|
testq %rdi, %rdi
|
|
jle LBB0_1
|
|
movq $-1, %rcx
|
|
movabsq $LCPI0_0, %rax
|
|
vmovsd (%rax), %xmm0
|
|
movl $3, %edx
|
|
.p2align 4, 0x90
|
|
LBB0_3:
|
|
## InlineAsm Start
|
|
vcvtsi2sdq %rdx, %xmm0, %xmm0
|
|
vfmadd213sd %xmm0, %xmm0, %xmm0
|
|
divsd %xmm0, %xmm0
|
|
vcvtsi2sdq %rdx, %xmm0, %xmm0
|
|
maxsd %xmm0, %xmm0
|
|
vfnmadd213sd %xmm0, %xmm0, %xmm0
|
|
vfmadd132sd %xmm0, %xmm0, %xmm0
|
|
vmaxsd %xmm0, %xmm0, %xmm0
|
|
vfnmadd132sd %xmm0, %xmm0, %xmm0
|
|
sqrtsd %xmm0, %xmm0
|
|
## InlineAsm End
|
|
leaq 1(%rcx), %rax
|
|
addq $2, %rcx
|
|
cmpq %rdi, %rcx
|
|
movq %rax, %rcx
|
|
jl LBB0_3
|
|
retq
|
|
LBB0_1:
|
|
xorl %eax, %eax
|
|
retq
|
|
.cfi_endproc
|
|
|
|
|
|
.subsections_via_symbols
|
|
|
|
## Detailed Results
|
|
{'arguments': (5841530,),
|
|
'frequency': 2600000000.0,
|
|
'iterations': 5841530,
|
|
'parallel_factor': 1,
|
|
'returned': [5841529, 5841529, 5841529, 5841529],
|
|
'runtimes': [0.13433505699504167,
|
|
0.13318849296774715,
|
|
0.13303690601605922,
|
|
0.13309408095665276]}
|
|
minimal throughput: 59.21 cy
|
|
## Selected Instructions
|
|
RCPSSr, VCVTSI2SSrr, MULSSrr, VCVTSD2SSrr, VROUNDSSr, VRCPSSr, VCVTSI2SSrr, VSQRTSSr, VFNMADD231SSr, VSQRTSSr
|
|
## Generated Assembly (1x parallel)
|
|
.section __TEXT,__text,regular,pure_instructions
|
|
.macosx_version_min 10, 13
|
|
.section __TEXT,__literal4,4byte_literals
|
|
.p2align 2
|
|
LCPI0_0:
|
|
.long 1065361408
|
|
.section __TEXT,__literal8,8byte_literals
|
|
.p2align 3
|
|
LCPI0_1:
|
|
.quad 4607186816846528512
|
|
.section __TEXT,__text,regular,pure_instructions
|
|
.globl _test
|
|
.p2align 4, 0x90
|
|
_test:
|
|
.cfi_startproc
|
|
testq %rdi, %rdi
|
|
jle LBB0_1
|
|
movq $-1, %rcx
|
|
movabsq $LCPI0_0, %rax
|
|
vmovss (%rax), %xmm1
|
|
movl $3, %edx
|
|
movabsq $LCPI0_1, %rax
|
|
vmovsd (%rax), %xmm0
|
|
.p2align 4, 0x90
|
|
LBB0_3:
|
|
## InlineAsm Start
|
|
rcpss %xmm1, %xmm1
|
|
vcvtsi2ssl %edx, %xmm1, %xmm1
|
|
mulss %xmm1, %xmm1
|
|
vcvtsd2ss %xmm0, %xmm1, %xmm1
|
|
vroundss $1, %xmm1, %xmm1, %xmm1
|
|
vrcpss %xmm1, %xmm1, %xmm1
|
|
vcvtsi2ssl %edx, %xmm1, %xmm1
|
|
vsqrtss %xmm1, %xmm1, %xmm1
|
|
vfnmadd231ss %xmm1, %xmm1, %xmm1
|
|
vsqrtss %xmm1, %xmm1, %xmm1
|
|
## InlineAsm End
|
|
leaq 1(%rcx), %rax
|
|
addq $2, %rcx
|
|
cmpq %rdi, %rcx
|
|
movq %rax, %rcx
|
|
jl LBB0_3
|
|
retq
|
|
LBB0_1:
|
|
xorl %eax, %eax
|
|
retq
|
|
.cfi_endproc
|
|
|
|
|
|
.subsections_via_symbols
|
|
|
|
## Detailed Results
|
|
{'arguments': (6011291,),
|
|
'frequency': 2600000000.0,
|
|
'iterations': 6011291,
|
|
'parallel_factor': 1,
|
|
'returned': [6011290, 6011290, 6011290, 6011290],
|
|
'runtimes': [0.13239118899218738,
|
|
0.13244657206814736,
|
|
0.1326694720191881,
|
|
0.13262002903502434]}
|
|
minimal throughput: 57.26 cy
|
|
## Selected Instructions
|
|
ROR16ri, CMOVS16rr, SBB16ri, ADC16ri8, XOR16ri8, BTR16rr, XOR16ri8, SAR16r1, DEC16r, SUB16ri
|
|
## Generated Assembly (1x parallel)
|
|
.section __TEXT,__text,regular,pure_instructions
|
|
.macosx_version_min 10, 13
|
|
.globl _test
|
|
.p2align 4, 0x90
|
|
_test:
|
|
.cfi_startproc
|
|
testq %rdi, %rdi
|
|
jle LBB0_1
|
|
movw $3, %cx
|
|
movq $-1, %rdx
|
|
.p2align 4, 0x90
|
|
LBB0_3:
|
|
## InlineAsm Start
|
|
rorw %cx
|
|
cmovsw %cx, %cx
|
|
sbbw $1, %cx
|
|
adcw $1, %cx
|
|
xorw $1, %cx
|
|
btrw %cx, %cx
|
|
xorw $1, %cx
|
|
sarw %cx
|
|
decw %cx
|
|
subw $1, %cx
|
|
## InlineAsm End
|
|
leaq 1(%rdx), %rax
|
|
addq $2, %rdx
|
|
cmpq %rdi, %rdx
|
|
movq %rax, %rdx
|
|
jl LBB0_3
|
|
retq
|
|
LBB0_1:
|
|
xorl %eax, %eax
|
|
retq
|
|
.cfi_endproc
|
|
|
|
|
|
.subsections_via_symbols
|
|
|
|
## Detailed Results
|
|
{'arguments': (31283731,),
|
|
'frequency': 2600000000.0,
|
|
'iterations': 31283731,
|
|
'parallel_factor': 1,
|
|
'returned': [31283730, 31283730, 31283730, 31283730],
|
|
'runtimes': [0.13291946100071073,
|
|
0.13294463406782597,
|
|
0.1332225619116798,
|
|
0.13287500606384128]}
|
|
minimal throughput: 11.04 cy
|
|
## Selected Instructions
|
|
SHLX32rr, CMOVO32rr, MOV32rr, CMOVS32rr, CRC32r32r8, SHR32r1, ADD32rr, CRC32r32r8, RCR32ri, SHR32r1
|
|
## Generated Assembly (1x parallel)
|
|
.section __TEXT,__text,regular,pure_instructions
|
|
.macosx_version_min 10, 13
|
|
.globl _test
|
|
.p2align 4, 0x90
|
|
_test:
|
|
.cfi_startproc
|
|
testq %rdi, %rdi
|
|
jle LBB0_1
|
|
movl $3, %esi
|
|
movq $-1, %rdx
|
|
movb $3, %cl
|
|
.p2align 4, 0x90
|
|
LBB0_3:
|
|
## InlineAsm Start
|
|
shlxl %esi, %esi, %eax
|
|
cmovol %eax, %eax
|
|
movl %eax, %esi
|
|
cmovsl %esi, %esi
|
|
crc32b %cl, %esi
|
|
shrl %esi
|
|
addl %esi, %esi
|
|
crc32b %cl, %esi
|
|
rcrl %esi
|
|
shrl %esi
|
|
## InlineAsm End
|
|
leaq 1(%rdx), %rax
|
|
addq $2, %rdx
|
|
cmpq %rdi, %rdx
|
|
movq %rax, %rdx
|
|
jl LBB0_3
|
|
retq
|
|
LBB0_1:
|
|
xorl %eax, %eax
|
|
retq
|
|
.cfi_endproc
|
|
|
|
|
|
.subsections_via_symbols
|
|
|
|
## Detailed Results
|
|
{'arguments': (24008543,),
|
|
'frequency': 2600000000.0,
|
|
'iterations': 24008543,
|
|
'parallel_factor': 1,
|
|
'returned': [24008542, 24008542, 24008542, 24008542],
|
|
'runtimes': [0.13333229208365083,
|
|
0.13314284407533705,
|
|
0.13381975598167628,
|
|
0.13447994901798666]}
|
|
minimal throughput: 14.42 cy
|
|
## Selected Instructions
|
|
SHRX64rr, SBB64ri32, AND64ri8, MOV64rc, INC64r, SUB64ri32, POPCNT64rr, OR64ri8, BTS64rr, ROL64ri
|
|
## Generated Assembly (1x parallel)
|
|
.section __TEXT,__text,regular,pure_instructions
|
|
.macosx_version_min 10, 13
|
|
.globl _test
|
|
.p2align 4, 0x90
|
|
_test:
|
|
.cfi_startproc
|
|
testq %rdi, %rdi
|
|
jle LBB0_1
|
|
movq $-1, %rcx
|
|
movl $3, %edx
|
|
.p2align 4, 0x90
|
|
LBB0_3:
|
|
## InlineAsm Start
|
|
shrxq %rdx, %rdx, %rax
|
|
sbbq $1, %rax
|
|
andq $1, %rax
|
|
movq %rax, %rax
|
|
incq %rax
|
|
subq $1, %rax
|
|
popcntq %rax, %rdx
|
|
orq $1, %rdx
|
|
btsq %rdx, %rdx
|
|
rolq %rdx
|
|
## InlineAsm End
|
|
leaq 1(%rcx), %rax
|
|
addq $2, %rcx
|
|
cmpq %rdi, %rcx
|
|
movq %rax, %rcx
|
|
jl LBB0_3
|
|
retq
|
|
LBB0_1:
|
|
xorl %eax, %eax
|
|
retq
|
|
.cfi_endproc
|
|
|
|
|
|
.subsections_via_symbols
|
|
|
|
## Detailed Results
|
|
{'arguments': (27539225,),
|
|
'frequency': 2600000000.0,
|
|
'iterations': 27539225,
|
|
'parallel_factor': 1,
|
|
'returned': [27539224, 27539224, 27539224, 27539224],
|
|
'runtimes': [0.1335972750093788,
|
|
0.13322542910464108,
|
|
0.13357082300353795,
|
|
0.13376462296582758]}
|
|
minimal throughput: 12.58 cy
|
|
## Selected Instructions
|
|
SAR8r1, SHR8ri, INC8r, AND8rr, RCR8ri, ROL8ri, SUB8ri, SBB8rr, NEG8r, NOT8r
|
|
## Generated Assembly (1x parallel)
|
|
.section __TEXT,__text,regular,pure_instructions
|
|
.macosx_version_min 10, 13
|
|
.globl _test
|
|
.p2align 4, 0x90
|
|
_test:
|
|
.cfi_startproc
|
|
testq %rdi, %rdi
|
|
jle LBB0_1
|
|
movb $3, %cl
|
|
movq $-1, %rdx
|
|
.p2align 4, 0x90
|
|
LBB0_3:
|
|
## InlineAsm Start
|
|
sarb %cl
|
|
shrb %cl
|
|
incb %cl
|
|
andb %cl, %cl
|
|
rcrb %cl
|
|
rolb %cl
|
|
subb $1, %cl
|
|
sbbb %cl, %cl
|
|
negb %cl
|
|
notb %cl
|
|
## InlineAsm End
|
|
leaq 1(%rdx), %rax
|
|
addq $2, %rdx
|
|
cmpq %rdi, %rdx
|
|
movq %rax, %rdx
|
|
jl LBB0_3
|
|
retq
|
|
LBB0_1:
|
|
xorl %eax, %eax
|
|
retq
|
|
.cfi_endproc
|
|
|
|
|
|
.subsections_via_symbols
|
|
|
|
## Detailed Results
|
|
{'arguments': (30431254,),
|
|
'frequency': 2600000000.0,
|
|
'iterations': 30431254,
|
|
'parallel_factor': 1,
|
|
'returned': [30431253, 30431253, 30431253, 30431253],
|
|
'runtimes': [0.13894746906589717,
|
|
0.1348069809610024,
|
|
0.13318019802682102,
|
|
0.13318415405228734]}
|
|
minimal throughput: 11.38 cy
|