mirror of
https://github.com/RRZE-HPC/asmbench.git
synced 2025-07-21 04:31:05 +02:00
1139 lines
24 KiB
Plaintext
1139 lines
24 KiB
Plaintext
## Selected Instructions
|
|
VPERMILPSri, MULPSrr, ANDPDrr, VPSIGNBrr, PSIGNBrr, PMOVZXWDrr, PMINUWrr, PADDSWrr, VPSHUFHWri, MOVUPDrr
|
|
## Generated Assembly (8x parallel)
|
|
.section __TEXT,__text,regular,pure_instructions
|
|
.macosx_version_min 10, 13
|
|
.section __TEXT,__literal4,4byte_literals
|
|
.p2align 2
|
|
LCPI0_0:
|
|
.long 1065361408
|
|
.section __TEXT,__text,regular,pure_instructions
|
|
.globl _test
|
|
.p2align 4, 0x90
|
|
_test:
|
|
.cfi_startproc
|
|
testq %rdi, %rdi
|
|
jle LBB0_1
|
|
movabsq $LCPI0_0, %rax
|
|
vbroadcastss (%rax), %xmm0
|
|
movq $-1, %rcx
|
|
vmovaps %xmm0, %xmm1
|
|
vmovaps %xmm0, %xmm2
|
|
vmovaps %xmm0, %xmm4
|
|
vmovaps %xmm0, %xmm3
|
|
vmovaps %xmm0, %xmm5
|
|
vmovaps %xmm0, %xmm6
|
|
vmovaps %xmm0, %xmm7
|
|
.p2align 4, 0x90
|
|
LBB0_3:
|
|
## InlineAsm Start
|
|
vpermilps $1, %xmm0, %xmm0
|
|
mulps %xmm0, %xmm0
|
|
andpd %xmm0, %xmm0
|
|
vpsignb %xmm0, %xmm0, %xmm0
|
|
psignb %xmm0, %xmm0
|
|
pmovzxwd %xmm0, %xmm0
|
|
pminuw %xmm0, %xmm0
|
|
paddsw %xmm0, %xmm0
|
|
vpshufhw $1, %xmm0, %xmm0
|
|
vpermilps $1, %xmm1, %xmm1
|
|
movupd %xmm0, %xmm0
|
|
mulps %xmm1, %xmm1
|
|
andpd %xmm1, %xmm1
|
|
vpsignb %xmm1, %xmm1, %xmm1
|
|
psignb %xmm1, %xmm1
|
|
pmovzxwd %xmm1, %xmm1
|
|
pminuw %xmm1, %xmm1
|
|
paddsw %xmm1, %xmm1
|
|
vpshufhw $1, %xmm1, %xmm1
|
|
movupd %xmm1, %xmm1
|
|
vpermilps $1, %xmm2, %xmm2
|
|
mulps %xmm2, %xmm2
|
|
andpd %xmm2, %xmm2
|
|
vpsignb %xmm2, %xmm2, %xmm2
|
|
psignb %xmm2, %xmm2
|
|
pmovzxwd %xmm2, %xmm2
|
|
pminuw %xmm2, %xmm2
|
|
paddsw %xmm2, %xmm2
|
|
vpshufhw $1, %xmm2, %xmm2
|
|
movupd %xmm2, %xmm2
|
|
vpermilps $1, %xmm4, %xmm4
|
|
mulps %xmm4, %xmm4
|
|
andpd %xmm4, %xmm4
|
|
vpsignb %xmm4, %xmm4, %xmm4
|
|
psignb %xmm4, %xmm4
|
|
pmovzxwd %xmm4, %xmm4
|
|
pminuw %xmm4, %xmm4
|
|
paddsw %xmm4, %xmm4
|
|
vpshufhw $1, %xmm4, %xmm4
|
|
vpermilps $1, %xmm3, %xmm3
|
|
movupd %xmm4, %xmm4
|
|
mulps %xmm3, %xmm3
|
|
andpd %xmm3, %xmm3
|
|
vpsignb %xmm3, %xmm3, %xmm3
|
|
psignb %xmm3, %xmm3
|
|
pmovzxwd %xmm3, %xmm3
|
|
pminuw %xmm3, %xmm3
|
|
paddsw %xmm3, %xmm3
|
|
vpshufhw $1, %xmm3, %xmm3
|
|
movupd %xmm3, %xmm3
|
|
vpermilps $1, %xmm5, %xmm5
|
|
mulps %xmm5, %xmm5
|
|
andpd %xmm5, %xmm5
|
|
vpsignb %xmm5, %xmm5, %xmm5
|
|
psignb %xmm5, %xmm5
|
|
pmovzxwd %xmm5, %xmm5
|
|
pminuw %xmm5, %xmm5
|
|
paddsw %xmm5, %xmm5
|
|
vpshufhw $1, %xmm5, %xmm5
|
|
movupd %xmm5, %xmm5
|
|
vpermilps $1, %xmm6, %xmm6
|
|
mulps %xmm6, %xmm6
|
|
andpd %xmm6, %xmm6
|
|
vpsignb %xmm6, %xmm6, %xmm6
|
|
psignb %xmm6, %xmm6
|
|
pmovzxwd %xmm6, %xmm6
|
|
pminuw %xmm6, %xmm6
|
|
paddsw %xmm6, %xmm6
|
|
vpshufhw $1, %xmm6, %xmm6
|
|
vpermilps $1, %xmm7, %xmm7
|
|
movupd %xmm6, %xmm6
|
|
mulps %xmm7, %xmm7
|
|
andpd %xmm7, %xmm7
|
|
vpsignb %xmm7, %xmm7, %xmm7
|
|
psignb %xmm7, %xmm7
|
|
pmovzxwd %xmm7, %xmm7
|
|
pminuw %xmm7, %xmm7
|
|
paddsw %xmm7, %xmm7
|
|
vpshufhw $1, %xmm7, %xmm7
|
|
## InlineAsm End
|
|
leaq 1(%rcx), %rax
|
|
addq $2, %rcx
|
|
cmpq %rdi, %rcx
|
|
movq %rax, %rcx
|
|
## InlineAsm Start
|
|
movupd %xmm7, %xmm7
|
|
## InlineAsm End
|
|
jl LBB0_3
|
|
retq
|
|
LBB0_1:
|
|
xorl %eax, %eax
|
|
retq
|
|
.cfi_endproc
|
|
|
|
|
|
.subsections_via_symbols
|
|
|
|
## Detailed Results
|
|
{'arguments': (10000000,),
|
|
'frequency': 2600000000.0,
|
|
'iterations': 10000000,
|
|
'parallel_factor': 8,
|
|
'returned': [9999999, 9999999, 9999999, 9999999],
|
|
'runtimes': [0.12005576398223639,
|
|
0.12028825294692069,
|
|
0.1209630100056529,
|
|
0.11989319801796228]}
|
|
minimal throughput: 3.90 cy
|
|
## Selected Instructions
|
|
VFMADD132PDYr, VPADDWYrr, VFMADD132PSYr, VPADDDYrr, VSUBPDYrr, VPACKUSDWYrr, VPMULHUWYrr, VMINPDYrr, VPUNPCKLWDYrr, VBLENDVPSYrr
|
|
## Generated Assembly (8x parallel)
|
|
.section __TEXT,__text,regular,pure_instructions
|
|
.macosx_version_min 10, 13
|
|
.section __TEXT,__literal4,4byte_literals
|
|
.p2align 2
|
|
LCPI0_0:
|
|
.long 1065361408
|
|
.section __TEXT,__text,regular,pure_instructions
|
|
.globl _test
|
|
.p2align 4, 0x90
|
|
_test:
|
|
.cfi_startproc
|
|
testq %rdi, %rdi
|
|
jle LBB0_1
|
|
movabsq $LCPI0_0, %rax
|
|
vbroadcastss (%rax), %ymm0
|
|
movq $-1, %rcx
|
|
vmovaps %ymm0, %ymm1
|
|
vmovaps %ymm0, %ymm2
|
|
vmovaps %ymm0, %ymm3
|
|
vmovaps %ymm0, %ymm6
|
|
vmovaps %ymm0, %ymm5
|
|
vmovaps %ymm0, %ymm7
|
|
vmovaps %ymm0, %ymm8
|
|
.p2align 4, 0x90
|
|
LBB0_3:
|
|
vmovaps %ymm2, %ymm4
|
|
## InlineAsm Start
|
|
vfmadd132pd %ymm3, %ymm6, %ymm4
|
|
## InlineAsm End
|
|
vmovaps %ymm1, %ymm9
|
|
## InlineAsm Start
|
|
vfmadd132pd %ymm2, %ymm3, %ymm9
|
|
vfmadd132pd %ymm6, %ymm5, %ymm3
|
|
vfmadd132pd %ymm5, %ymm7, %ymm6
|
|
vfmadd132pd %ymm7, %ymm8, %ymm5
|
|
vfmadd132pd %ymm8, %ymm0, %ymm7
|
|
vfmadd132pd %ymm0, %ymm1, %ymm8
|
|
vfmadd132pd %ymm1, %ymm2, %ymm0
|
|
vpaddw %ymm0, %ymm0, %ymm0
|
|
vfmadd132ps %ymm0, %ymm0, %ymm0
|
|
vpaddd %ymm0, %ymm0, %ymm0
|
|
vsubpd %ymm0, %ymm0, %ymm0
|
|
vpackusdw %ymm0, %ymm0, %ymm0
|
|
vpmulhuw %ymm0, %ymm0, %ymm0
|
|
vminpd %ymm0, %ymm0, %ymm0
|
|
vpunpcklwd %ymm0, %ymm0, %ymm0
|
|
vblendvps %ymm0, %ymm0, %ymm0, %ymm0
|
|
vpaddw %ymm3, %ymm3, %ymm1
|
|
vfmadd132ps %ymm1, %ymm1, %ymm1
|
|
vpaddd %ymm1, %ymm1, %ymm1
|
|
vsubpd %ymm1, %ymm1, %ymm1
|
|
vpackusdw %ymm1, %ymm1, %ymm1
|
|
vpmulhuw %ymm1, %ymm1, %ymm1
|
|
vminpd %ymm1, %ymm1, %ymm1
|
|
vpunpcklwd %ymm1, %ymm1, %ymm1
|
|
vblendvps %ymm1, %ymm1, %ymm1, %ymm1
|
|
vpaddw %ymm7, %ymm7, %ymm2
|
|
vfmadd132ps %ymm2, %ymm2, %ymm2
|
|
vpaddd %ymm2, %ymm2, %ymm2
|
|
vsubpd %ymm2, %ymm2, %ymm2
|
|
vpackusdw %ymm2, %ymm2, %ymm2
|
|
vpmulhuw %ymm2, %ymm2, %ymm2
|
|
vminpd %ymm2, %ymm2, %ymm2
|
|
vpunpcklwd %ymm2, %ymm2, %ymm2
|
|
vblendvps %ymm2, %ymm2, %ymm2, %ymm2
|
|
vpaddw %ymm9, %ymm9, %ymm3
|
|
vfmadd132ps %ymm3, %ymm3, %ymm3
|
|
vpaddd %ymm3, %ymm3, %ymm3
|
|
vsubpd %ymm3, %ymm3, %ymm3
|
|
vpackusdw %ymm3, %ymm3, %ymm3
|
|
vpmulhuw %ymm3, %ymm3, %ymm3
|
|
vminpd %ymm3, %ymm3, %ymm3
|
|
vpunpcklwd %ymm3, %ymm3, %ymm3
|
|
vblendvps %ymm3, %ymm3, %ymm3, %ymm3
|
|
vpaddw %ymm6, %ymm6, %ymm6
|
|
vfmadd132ps %ymm6, %ymm6, %ymm6
|
|
vpaddd %ymm6, %ymm6, %ymm6
|
|
vsubpd %ymm6, %ymm6, %ymm6
|
|
vpackusdw %ymm6, %ymm6, %ymm6
|
|
vpmulhuw %ymm6, %ymm6, %ymm6
|
|
vminpd %ymm6, %ymm6, %ymm6
|
|
vpunpcklwd %ymm6, %ymm6, %ymm6
|
|
vblendvps %ymm6, %ymm6, %ymm6, %ymm6
|
|
vpaddw %ymm8, %ymm8, %ymm7
|
|
vfmadd132ps %ymm7, %ymm7, %ymm7
|
|
vpaddd %ymm7, %ymm7, %ymm7
|
|
vsubpd %ymm7, %ymm7, %ymm7
|
|
vpackusdw %ymm7, %ymm7, %ymm7
|
|
vpmulhuw %ymm7, %ymm7, %ymm7
|
|
vminpd %ymm7, %ymm7, %ymm7
|
|
vpunpcklwd %ymm7, %ymm7, %ymm7
|
|
vpaddw %ymm5, %ymm5, %ymm8
|
|
vblendvps %ymm7, %ymm7, %ymm7, %ymm5
|
|
vpaddw %ymm4, %ymm4, %ymm4
|
|
vfmadd132ps %ymm4, %ymm4, %ymm4
|
|
vpaddd %ymm4, %ymm4, %ymm4
|
|
vsubpd %ymm4, %ymm4, %ymm4
|
|
vpackusdw %ymm4, %ymm4, %ymm4
|
|
vpmulhuw %ymm4, %ymm4, %ymm4
|
|
vminpd %ymm4, %ymm4, %ymm4
|
|
vpunpcklwd %ymm4, %ymm4, %ymm4
|
|
vblendvps %ymm4, %ymm4, %ymm4, %ymm7
|
|
vfmadd132ps %ymm8, %ymm8, %ymm8
|
|
vpaddd %ymm8, %ymm8, %ymm4
|
|
vsubpd %ymm4, %ymm4, %ymm4
|
|
vpackusdw %ymm4, %ymm4, %ymm4
|
|
vpmulhuw %ymm4, %ymm4, %ymm4
|
|
vminpd %ymm4, %ymm4, %ymm4
|
|
vpunpcklwd %ymm4, %ymm4, %ymm4
|
|
vblendvps %ymm4, %ymm4, %ymm4, %ymm8
|
|
## InlineAsm End
|
|
leaq 1(%rcx), %rax
|
|
addq $2, %rcx
|
|
cmpq %rdi, %rcx
|
|
movq %rax, %rcx
|
|
jl LBB0_3
|
|
vzeroupper
|
|
retq
|
|
LBB0_1:
|
|
xorl %eax, %eax
|
|
retq
|
|
.cfi_endproc
|
|
|
|
|
|
.subsections_via_symbols
|
|
|
|
## Detailed Results
|
|
{'arguments': (10000000,),
|
|
'frequency': 2600000000.0,
|
|
'iterations': 10000000,
|
|
'parallel_factor': 8,
|
|
'returned': [9999999, 9999999, 9999999, 9999999],
|
|
'runtimes': [0.17754955508280545,
|
|
0.17602652800269425,
|
|
0.17718603508546948,
|
|
0.17694135499186814]}
|
|
minimal throughput: 5.72 cy
|
|
## Selected Instructions
|
|
VCVTSI642SDrr, VFMADD213SDr, DIVSDrr, VCVTSI642SDrr, MAXSDrr, VFNMADD213SDr, VFMADD132SDr, VMAXSDrr, VFNMADD132SDr, SQRTSDr
|
|
## Generated Assembly (8x parallel)
|
|
.section __TEXT,__text,regular,pure_instructions
|
|
.macosx_version_min 10, 13
|
|
.section __TEXT,__literal8,8byte_literals
|
|
.p2align 3
|
|
LCPI0_0:
|
|
.quad 4607186816846528512
|
|
.section __TEXT,__text,regular,pure_instructions
|
|
.globl _test
|
|
.p2align 4, 0x90
|
|
_test:
|
|
.cfi_startproc
|
|
testq %rdi, %rdi
|
|
jle LBB0_1
|
|
movq $-1, %rcx
|
|
movabsq $LCPI0_0, %rax
|
|
vmovsd (%rax), %xmm0
|
|
movl $3, %edx
|
|
vmovaps %xmm0, %xmm1
|
|
vmovaps %xmm0, %xmm2
|
|
vmovaps %xmm0, %xmm3
|
|
vmovaps %xmm0, %xmm4
|
|
vmovaps %xmm0, %xmm5
|
|
vmovaps %xmm0, %xmm6
|
|
vmovaps %xmm0, %xmm7
|
|
.p2align 4, 0x90
|
|
LBB0_3:
|
|
## InlineAsm Start
|
|
vcvtsi2sdq %rdx, %xmm0, %xmm0
|
|
vfmadd213sd %xmm0, %xmm0, %xmm0
|
|
divsd %xmm0, %xmm0
|
|
vcvtsi2sdq %rdx, %xmm0, %xmm0
|
|
maxsd %xmm0, %xmm0
|
|
vfnmadd213sd %xmm0, %xmm0, %xmm0
|
|
vfmadd132sd %xmm0, %xmm0, %xmm0
|
|
vmaxsd %xmm0, %xmm0, %xmm0
|
|
vfnmadd132sd %xmm0, %xmm0, %xmm0
|
|
sqrtsd %xmm0, %xmm0
|
|
vcvtsi2sdq %rdx, %xmm1, %xmm1
|
|
vfmadd213sd %xmm1, %xmm1, %xmm1
|
|
divsd %xmm1, %xmm1
|
|
vcvtsi2sdq %rdx, %xmm1, %xmm1
|
|
maxsd %xmm1, %xmm1
|
|
vfnmadd213sd %xmm1, %xmm1, %xmm1
|
|
vfmadd132sd %xmm1, %xmm1, %xmm1
|
|
vmaxsd %xmm1, %xmm1, %xmm1
|
|
vfnmadd132sd %xmm1, %xmm1, %xmm1
|
|
sqrtsd %xmm1, %xmm1
|
|
vcvtsi2sdq %rdx, %xmm2, %xmm2
|
|
vfmadd213sd %xmm2, %xmm2, %xmm2
|
|
divsd %xmm2, %xmm2
|
|
vcvtsi2sdq %rdx, %xmm2, %xmm2
|
|
maxsd %xmm2, %xmm2
|
|
vfnmadd213sd %xmm2, %xmm2, %xmm2
|
|
vfmadd132sd %xmm2, %xmm2, %xmm2
|
|
vmaxsd %xmm2, %xmm2, %xmm2
|
|
vfnmadd132sd %xmm2, %xmm2, %xmm2
|
|
sqrtsd %xmm2, %xmm2
|
|
vcvtsi2sdq %rdx, %xmm3, %xmm3
|
|
vfmadd213sd %xmm3, %xmm3, %xmm3
|
|
divsd %xmm3, %xmm3
|
|
vcvtsi2sdq %rdx, %xmm3, %xmm3
|
|
maxsd %xmm3, %xmm3
|
|
vfnmadd213sd %xmm3, %xmm3, %xmm3
|
|
vfmadd132sd %xmm3, %xmm3, %xmm3
|
|
vmaxsd %xmm3, %xmm3, %xmm3
|
|
vfnmadd132sd %xmm3, %xmm3, %xmm3
|
|
sqrtsd %xmm3, %xmm3
|
|
vcvtsi2sdq %rdx, %xmm4, %xmm4
|
|
vfmadd213sd %xmm4, %xmm4, %xmm4
|
|
divsd %xmm4, %xmm4
|
|
vcvtsi2sdq %rdx, %xmm4, %xmm4
|
|
maxsd %xmm4, %xmm4
|
|
vfnmadd213sd %xmm4, %xmm4, %xmm4
|
|
vfmadd132sd %xmm4, %xmm4, %xmm4
|
|
vmaxsd %xmm4, %xmm4, %xmm4
|
|
vfnmadd132sd %xmm4, %xmm4, %xmm4
|
|
sqrtsd %xmm4, %xmm4
|
|
vcvtsi2sdq %rdx, %xmm5, %xmm5
|
|
vfmadd213sd %xmm5, %xmm5, %xmm5
|
|
divsd %xmm5, %xmm5
|
|
vcvtsi2sdq %rdx, %xmm5, %xmm5
|
|
maxsd %xmm5, %xmm5
|
|
vfnmadd213sd %xmm5, %xmm5, %xmm5
|
|
vfmadd132sd %xmm5, %xmm5, %xmm5
|
|
vmaxsd %xmm5, %xmm5, %xmm5
|
|
vfnmadd132sd %xmm5, %xmm5, %xmm5
|
|
sqrtsd %xmm5, %xmm5
|
|
vcvtsi2sdq %rdx, %xmm6, %xmm6
|
|
vfmadd213sd %xmm6, %xmm6, %xmm6
|
|
divsd %xmm6, %xmm6
|
|
vcvtsi2sdq %rdx, %xmm6, %xmm6
|
|
maxsd %xmm6, %xmm6
|
|
vfnmadd213sd %xmm6, %xmm6, %xmm6
|
|
vfmadd132sd %xmm6, %xmm6, %xmm6
|
|
vmaxsd %xmm6, %xmm6, %xmm6
|
|
vfnmadd132sd %xmm6, %xmm6, %xmm6
|
|
sqrtsd %xmm6, %xmm6
|
|
vcvtsi2sdq %rdx, %xmm7, %xmm7
|
|
vfmadd213sd %xmm7, %xmm7, %xmm7
|
|
divsd %xmm7, %xmm7
|
|
vcvtsi2sdq %rdx, %xmm7, %xmm7
|
|
maxsd %xmm7, %xmm7
|
|
vfnmadd213sd %xmm7, %xmm7, %xmm7
|
|
vfmadd132sd %xmm7, %xmm7, %xmm7
|
|
vmaxsd %xmm7, %xmm7, %xmm7
|
|
vfnmadd132sd %xmm7, %xmm7, %xmm7
|
|
sqrtsd %xmm7, %xmm7
|
|
## InlineAsm End
|
|
leaq 1(%rcx), %rax
|
|
addq $2, %rcx
|
|
cmpq %rdi, %rcx
|
|
movq %rax, %rcx
|
|
jl LBB0_3
|
|
retq
|
|
LBB0_1:
|
|
xorl %eax, %eax
|
|
retq
|
|
.cfi_endproc
|
|
|
|
|
|
.subsections_via_symbols
|
|
|
|
## Detailed Results
|
|
{'arguments': (4751663,),
|
|
'frequency': 2600000000.0,
|
|
'iterations': 4751663,
|
|
'parallel_factor': 8,
|
|
'returned': [4751662, 4751662, 4751662, 4751662],
|
|
'runtimes': [0.1328908569412306,
|
|
0.1338977849809453,
|
|
0.1339660519734025,
|
|
0.13359365100041032]}
|
|
minimal throughput: 9.09 cy
|
|
## Selected Instructions
|
|
RCPSSr, VCVTSI2SSrr, MULSSrr, VCVTSD2SSrr, VROUNDSSr, VRCPSSr, VCVTSI2SSrr, VSQRTSSr, VFNMADD231SSr, VSQRTSSr
|
|
## Generated Assembly (8x parallel)
|
|
.section __TEXT,__text,regular,pure_instructions
|
|
.macosx_version_min 10, 13
|
|
.section __TEXT,__literal4,4byte_literals
|
|
.p2align 2
|
|
LCPI0_0:
|
|
.long 1065361408
|
|
.section __TEXT,__literal8,8byte_literals
|
|
.p2align 3
|
|
LCPI0_1:
|
|
.quad 4607186816846528512
|
|
.section __TEXT,__text,regular,pure_instructions
|
|
.globl _test
|
|
.p2align 4, 0x90
|
|
_test:
|
|
.cfi_startproc
|
|
testq %rdi, %rdi
|
|
jle LBB0_1
|
|
movq $-1, %rcx
|
|
movabsq $LCPI0_0, %rax
|
|
vmovss (%rax), %xmm2
|
|
movl $3, %edx
|
|
movabsq $LCPI0_1, %rax
|
|
vmovsd (%rax), %xmm0
|
|
vmovaps %xmm2, %xmm8
|
|
vmovaps %xmm2, %xmm9
|
|
vmovaps %xmm2, %xmm4
|
|
vmovaps %xmm2, %xmm5
|
|
vmovaps %xmm2, %xmm6
|
|
vmovaps %xmm2, %xmm7
|
|
vmovaps %xmm2, %xmm1
|
|
.p2align 4, 0x90
|
|
LBB0_3:
|
|
## InlineAsm Start
|
|
rcpss %xmm2, %xmm2
|
|
vcvtsi2ssl %edx, %xmm2, %xmm2
|
|
mulss %xmm2, %xmm2
|
|
vcvtsd2ss %xmm0, %xmm2, %xmm2
|
|
vroundss $1, %xmm2, %xmm2, %xmm2
|
|
vrcpss %xmm2, %xmm2, %xmm2
|
|
vcvtsi2ssl %edx, %xmm2, %xmm2
|
|
vsqrtss %xmm2, %xmm2, %xmm2
|
|
vfnmadd231ss %xmm2, %xmm2, %xmm2
|
|
vsqrtss %xmm2, %xmm2, %xmm2
|
|
rcpss %xmm8, %xmm3
|
|
vcvtsi2ssl %edx, %xmm3, %xmm3
|
|
mulss %xmm3, %xmm3
|
|
vcvtsd2ss %xmm0, %xmm3, %xmm3
|
|
vroundss $1, %xmm3, %xmm3, %xmm3
|
|
vrcpss %xmm3, %xmm3, %xmm3
|
|
vcvtsi2ssl %edx, %xmm3, %xmm3
|
|
vsqrtss %xmm3, %xmm3, %xmm3
|
|
vfnmadd231ss %xmm3, %xmm3, %xmm3
|
|
vsqrtss %xmm3, %xmm3, %xmm8
|
|
rcpss %xmm9, %xmm3
|
|
vcvtsi2ssl %edx, %xmm3, %xmm3
|
|
mulss %xmm3, %xmm3
|
|
vcvtsd2ss %xmm0, %xmm3, %xmm3
|
|
vroundss $1, %xmm3, %xmm3, %xmm3
|
|
vrcpss %xmm3, %xmm3, %xmm3
|
|
vcvtsi2ssl %edx, %xmm3, %xmm3
|
|
vsqrtss %xmm3, %xmm3, %xmm3
|
|
vfnmadd231ss %xmm3, %xmm3, %xmm3
|
|
vsqrtss %xmm3, %xmm3, %xmm9
|
|
rcpss %xmm4, %xmm3
|
|
vcvtsi2ssl %edx, %xmm3, %xmm3
|
|
mulss %xmm3, %xmm3
|
|
vcvtsd2ss %xmm0, %xmm3, %xmm3
|
|
vroundss $1, %xmm3, %xmm3, %xmm3
|
|
vrcpss %xmm3, %xmm3, %xmm3
|
|
vcvtsi2ssl %edx, %xmm3, %xmm3
|
|
vsqrtss %xmm3, %xmm3, %xmm3
|
|
vfnmadd231ss %xmm3, %xmm3, %xmm3
|
|
vsqrtss %xmm3, %xmm3, %xmm4
|
|
rcpss %xmm5, %xmm3
|
|
vcvtsi2ssl %edx, %xmm3, %xmm3
|
|
mulss %xmm3, %xmm3
|
|
vcvtsd2ss %xmm0, %xmm3, %xmm3
|
|
vroundss $1, %xmm3, %xmm3, %xmm3
|
|
vrcpss %xmm3, %xmm3, %xmm3
|
|
vcvtsi2ssl %edx, %xmm3, %xmm3
|
|
vsqrtss %xmm3, %xmm3, %xmm3
|
|
vfnmadd231ss %xmm3, %xmm3, %xmm3
|
|
vsqrtss %xmm3, %xmm3, %xmm5
|
|
rcpss %xmm6, %xmm3
|
|
vcvtsi2ssl %edx, %xmm3, %xmm3
|
|
mulss %xmm3, %xmm3
|
|
vcvtsd2ss %xmm0, %xmm3, %xmm3
|
|
vroundss $1, %xmm3, %xmm3, %xmm3
|
|
vrcpss %xmm3, %xmm3, %xmm3
|
|
vcvtsi2ssl %edx, %xmm3, %xmm3
|
|
vsqrtss %xmm3, %xmm3, %xmm3
|
|
vfnmadd231ss %xmm3, %xmm3, %xmm3
|
|
vsqrtss %xmm3, %xmm3, %xmm6
|
|
rcpss %xmm7, %xmm3
|
|
vcvtsi2ssl %edx, %xmm3, %xmm3
|
|
mulss %xmm3, %xmm3
|
|
vcvtsd2ss %xmm0, %xmm3, %xmm3
|
|
vroundss $1, %xmm3, %xmm3, %xmm3
|
|
vrcpss %xmm3, %xmm3, %xmm3
|
|
vcvtsi2ssl %edx, %xmm3, %xmm3
|
|
vsqrtss %xmm3, %xmm3, %xmm3
|
|
vfnmadd231ss %xmm3, %xmm3, %xmm3
|
|
vsqrtss %xmm3, %xmm3, %xmm7
|
|
rcpss %xmm1, %xmm1
|
|
vcvtsi2ssl %edx, %xmm1, %xmm1
|
|
mulss %xmm1, %xmm1
|
|
vcvtsd2ss %xmm0, %xmm1, %xmm1
|
|
vroundss $1, %xmm1, %xmm1, %xmm1
|
|
vrcpss %xmm1, %xmm1, %xmm1
|
|
vcvtsi2ssl %edx, %xmm1, %xmm1
|
|
vsqrtss %xmm1, %xmm1, %xmm1
|
|
vfnmadd231ss %xmm1, %xmm1, %xmm1
|
|
vsqrtss %xmm1, %xmm1, %xmm1
|
|
## InlineAsm End
|
|
leaq 1(%rcx), %rax
|
|
addq $2, %rcx
|
|
cmpq %rdi, %rcx
|
|
movq %rax, %rcx
|
|
jl LBB0_3
|
|
retq
|
|
LBB0_1:
|
|
xorl %eax, %eax
|
|
retq
|
|
.cfi_endproc
|
|
|
|
|
|
.subsections_via_symbols
|
|
|
|
## Detailed Results
|
|
{'arguments': (1245590,),
|
|
'frequency': 2600000000.0,
|
|
'iterations': 1245590,
|
|
'parallel_factor': 8,
|
|
'returned': [1245589, 1245589, 1245589, 1245589],
|
|
'runtimes': [0.1349610739853233,
|
|
0.1329138990258798,
|
|
0.13283832801971585,
|
|
0.1326144520426169]}
|
|
minimal throughput: 34.60 cy
|
|
## Selected Instructions
|
|
ROR16ri, CMOVS16rr, SBB16ri, ADC16ri8, XOR16ri8, BTR16rr, XOR16ri8, SAR16r1, DEC16r, SUB16ri
|
|
## Generated Assembly (8x parallel)
|
|
.section __TEXT,__text,regular,pure_instructions
|
|
.macosx_version_min 10, 13
|
|
.globl _test
|
|
.p2align 4, 0x90
|
|
_test:
|
|
.cfi_startproc
|
|
pushq %rbp
|
|
.cfi_def_cfa_offset 16
|
|
pushq %rbx
|
|
.cfi_def_cfa_offset 24
|
|
.cfi_offset %rbx, -24
|
|
.cfi_offset %rbp, -16
|
|
testq %rdi, %rdi
|
|
jle LBB0_1
|
|
movw $3, %r9w
|
|
movq $-1, %r8
|
|
movw $3, %r10w
|
|
movw $3, %r11w
|
|
movw $3, %cx
|
|
movw $3, %si
|
|
movw $3, %dx
|
|
movw $3, %bx
|
|
movw $3, %bp
|
|
.p2align 4, 0x90
|
|
LBB0_3:
|
|
## InlineAsm Start
|
|
rorw %r9w
|
|
cmovsw %r9w, %r9w
|
|
sbbw $1, %r9w
|
|
adcw $1, %r9w
|
|
xorw $1, %r9w
|
|
btrw %r9w, %r9w
|
|
xorw $1, %r9w
|
|
sarw %r9w
|
|
decw %r9w
|
|
subw $1, %r9w
|
|
rorw %r10w
|
|
cmovsw %r10w, %r10w
|
|
sbbw $1, %r10w
|
|
adcw $1, %r10w
|
|
xorw $1, %r10w
|
|
btrw %r10w, %r10w
|
|
xorw $1, %r10w
|
|
sarw %r10w
|
|
decw %r10w
|
|
subw $1, %r10w
|
|
rorw %r11w
|
|
cmovsw %r11w, %r11w
|
|
sbbw $1, %r11w
|
|
adcw $1, %r11w
|
|
xorw $1, %r11w
|
|
btrw %r11w, %r11w
|
|
xorw $1, %r11w
|
|
sarw %r11w
|
|
decw %r11w
|
|
subw $1, %r11w
|
|
rorw %cx
|
|
cmovsw %cx, %cx
|
|
sbbw $1, %cx
|
|
adcw $1, %cx
|
|
xorw $1, %cx
|
|
btrw %cx, %cx
|
|
xorw $1, %cx
|
|
sarw %cx
|
|
decw %cx
|
|
subw $1, %cx
|
|
rorw %si
|
|
cmovsw %si, %si
|
|
sbbw $1, %si
|
|
adcw $1, %si
|
|
xorw $1, %si
|
|
btrw %si, %si
|
|
xorw $1, %si
|
|
sarw %si
|
|
decw %si
|
|
subw $1, %si
|
|
rorw %dx
|
|
cmovsw %dx, %dx
|
|
sbbw $1, %dx
|
|
adcw $1, %dx
|
|
xorw $1, %dx
|
|
btrw %dx, %dx
|
|
xorw $1, %dx
|
|
sarw %dx
|
|
decw %dx
|
|
subw $1, %dx
|
|
rorw %bx
|
|
cmovsw %bx, %bx
|
|
sbbw $1, %bx
|
|
adcw $1, %bx
|
|
xorw $1, %bx
|
|
btrw %bx, %bx
|
|
xorw $1, %bx
|
|
sarw %bx
|
|
decw %bx
|
|
subw $1, %bx
|
|
rorw %bp
|
|
cmovsw %bp, %bp
|
|
sbbw $1, %bp
|
|
adcw $1, %bp
|
|
xorw $1, %bp
|
|
btrw %bp, %bp
|
|
xorw $1, %bp
|
|
sarw %bp
|
|
decw %bp
|
|
subw $1, %bp
|
|
## InlineAsm End
|
|
leaq 1(%r8), %rax
|
|
addq $2, %r8
|
|
cmpq %rdi, %r8
|
|
movq %rax, %r8
|
|
jl LBB0_3
|
|
popq %rbx
|
|
popq %rbp
|
|
retq
|
|
LBB0_1:
|
|
xorl %eax, %eax
|
|
popq %rbx
|
|
popq %rbp
|
|
retq
|
|
.cfi_endproc
|
|
|
|
|
|
.subsections_via_symbols
|
|
|
|
## Detailed Results
|
|
{'arguments': (10000000,),
|
|
'frequency': 2600000000.0,
|
|
'iterations': 10000000,
|
|
'parallel_factor': 8,
|
|
'returned': [9999999, 9999999, 9999999, 9999999],
|
|
'runtimes': [0.18081542605068535,
|
|
0.17877629201393574,
|
|
0.17950556799769402,
|
|
0.1797733639832586]}
|
|
minimal throughput: 5.81 cy
|
|
## Selected Instructions
|
|
SHLX32rr, CMOVO32rr, MOV32rr, CMOVS32rr, CRC32r32r8, SHR32r1, ADD32rr, CRC32r32r8, RCR32ri, SHR32r1
|
|
## Generated Assembly (8x parallel)
|
|
.section __TEXT,__text,regular,pure_instructions
|
|
.macosx_version_min 10, 13
|
|
.globl _test
|
|
.p2align 4, 0x90
|
|
_test:
|
|
.cfi_startproc
|
|
pushq %rbp
|
|
.cfi_def_cfa_offset 16
|
|
pushq %r15
|
|
.cfi_def_cfa_offset 24
|
|
pushq %r14
|
|
.cfi_def_cfa_offset 32
|
|
pushq %r13
|
|
.cfi_def_cfa_offset 40
|
|
pushq %r12
|
|
.cfi_def_cfa_offset 48
|
|
pushq %rbx
|
|
.cfi_def_cfa_offset 56
|
|
.cfi_offset %rbx, -56
|
|
.cfi_offset %r12, -48
|
|
.cfi_offset %r13, -40
|
|
.cfi_offset %r14, -32
|
|
.cfi_offset %r15, -24
|
|
.cfi_offset %rbp, -16
|
|
testq %rdi, %rdi
|
|
jle LBB0_1
|
|
movl $3, %r10d
|
|
movq $-1, %r9
|
|
movb $3, %r8b
|
|
movl $3, %r12d
|
|
movl $3, %r13d
|
|
movl $3, %esi
|
|
movl $3, %ebx
|
|
movl $3, %ebp
|
|
movl $3, %ecx
|
|
movl $3, %edx
|
|
.p2align 4, 0x90
|
|
LBB0_3:
|
|
## InlineAsm Start
|
|
shlxl %edx, %ecx, %r11d
|
|
shlxl %ebp, %ebx, %r14d
|
|
shlxl %esi, %r13d, %r15d
|
|
shlxl %r12d, %r10d, %eax
|
|
shlxl %edx, %ecx, %ecx
|
|
shlxl %ebp, %ebx, %edx
|
|
shlxl %esi, %r13d, %esi
|
|
shlxl %r12d, %r10d, %ebp
|
|
cmovol %ebp, %ebp
|
|
movl %ebp, %r10d
|
|
cmovsl %r10d, %r10d
|
|
crc32b %r8b, %r10d
|
|
shrl %r10d
|
|
addl %r10d, %r10d
|
|
crc32b %r8b, %r10d
|
|
rcrl %r10d
|
|
shrl %r10d
|
|
cmovol %esi, %esi
|
|
movl %esi, %r12d
|
|
cmovsl %r12d, %r12d
|
|
crc32b %r8b, %r12d
|
|
shrl %r12d
|
|
addl %r12d, %r12d
|
|
crc32b %r8b, %r12d
|
|
rcrl %r12d
|
|
shrl %r12d
|
|
cmovol %edx, %edx
|
|
movl %edx, %r13d
|
|
cmovsl %r13d, %r13d
|
|
crc32b %r8b, %r13d
|
|
shrl %r13d
|
|
addl %r13d, %r13d
|
|
crc32b %r8b, %r13d
|
|
rcrl %r13d
|
|
shrl %r13d
|
|
cmovol %ecx, %ecx
|
|
movl %ecx, %esi
|
|
cmovsl %esi, %esi
|
|
crc32b %r8b, %esi
|
|
shrl %esi
|
|
addl %esi, %esi
|
|
crc32b %r8b, %esi
|
|
rcrl %esi
|
|
shrl %esi
|
|
cmovol %eax, %eax
|
|
movl %eax, %ebx
|
|
cmovsl %ebx, %ebx
|
|
crc32b %r8b, %ebx
|
|
shrl %ebx
|
|
addl %ebx, %ebx
|
|
crc32b %r8b, %ebx
|
|
rcrl %ebx
|
|
shrl %ebx
|
|
cmovol %r15d, %r15d
|
|
movl %r15d, %ebp
|
|
cmovsl %ebp, %ebp
|
|
crc32b %r8b, %ebp
|
|
shrl %ebp
|
|
addl %ebp, %ebp
|
|
crc32b %r8b, %ebp
|
|
rcrl %ebp
|
|
shrl %ebp
|
|
cmovol %r14d, %r14d
|
|
movl %r14d, %ecx
|
|
cmovsl %ecx, %ecx
|
|
crc32b %r8b, %ecx
|
|
shrl %ecx
|
|
addl %ecx, %ecx
|
|
crc32b %r8b, %ecx
|
|
rcrl %ecx
|
|
shrl %ecx
|
|
cmovol %r11d, %r11d
|
|
movl %r11d, %edx
|
|
cmovsl %edx, %edx
|
|
crc32b %r8b, %edx
|
|
shrl %edx
|
|
addl %edx, %edx
|
|
crc32b %r8b, %edx
|
|
rcrl %edx
|
|
shrl %edx
|
|
## InlineAsm End
|
|
leaq 1(%r9), %rax
|
|
addq $2, %r9
|
|
cmpq %rdi, %r9
|
|
movq %rax, %r9
|
|
jl LBB0_3
|
|
jmp LBB0_4
|
|
LBB0_1:
|
|
xorl %eax, %eax
|
|
LBB0_4:
|
|
popq %rbx
|
|
popq %r12
|
|
popq %r13
|
|
popq %r14
|
|
popq %r15
|
|
popq %rbp
|
|
retq
|
|
.cfi_endproc
|
|
|
|
|
|
.subsections_via_symbols
|
|
|
|
## Detailed Results
|
|
{'arguments': (5002974,),
|
|
'frequency': 2600000000.0,
|
|
'iterations': 5002974,
|
|
'parallel_factor': 8,
|
|
'returned': [5002973, 5002973, 5002973, 5002973],
|
|
'runtimes': [0.1367008569650352,
|
|
0.13341521099209785,
|
|
0.13342649908736348,
|
|
0.133624273003079]}
|
|
minimal throughput: 8.67 cy
|
|
## Selected Instructions
|
|
SHRX64rr, SBB64ri32, AND64ri8, MOV64rc, INC64r, SUB64ri32, POPCNT64rr, OR64ri8, BTS64rr, ROL64ri
|
|
## Generated Assembly (8x parallel)
|
|
.section __TEXT,__text,regular,pure_instructions
|
|
.macosx_version_min 10, 13
|
|
.globl _test
|
|
.p2align 4, 0x90
|
|
_test:
|
|
.cfi_startproc
|
|
pushq %r15
|
|
.cfi_def_cfa_offset 16
|
|
pushq %r14
|
|
.cfi_def_cfa_offset 24
|
|
pushq %r13
|
|
.cfi_def_cfa_offset 32
|
|
pushq %r12
|
|
.cfi_def_cfa_offset 40
|
|
pushq %rbx
|
|
.cfi_def_cfa_offset 48
|
|
.cfi_offset %rbx, -48
|
|
.cfi_offset %r12, -40
|
|
.cfi_offset %r13, -32
|
|
.cfi_offset %r14, -24
|
|
.cfi_offset %r15, -16
|
|
testq %rdi, %rdi
|
|
jle LBB0_1
|
|
movq $-1, %r8
|
|
movl $3, %r9d
|
|
movl $3, %r11d
|
|
movl $3, %r12d
|
|
movl $3, %r13d
|
|
movl $3, %esi
|
|
movl $3, %ebx
|
|
movl $3, %ecx
|
|
movl $3, %edx
|
|
.p2align 4, 0x90
|
|
LBB0_3:
|
|
## InlineAsm Start
|
|
shrxq %rdx, %rcx, %r10
|
|
shrxq %rbx, %rsi, %r14
|
|
shrxq %r13, %r12, %r15
|
|
shrxq %r11, %r9, %rax
|
|
shrxq %rdx, %rcx, %rcx
|
|
shrxq %rbx, %rsi, %rdx
|
|
shrxq %r13, %r12, %rsi
|
|
shrxq %r11, %r9, %rbx
|
|
sbbq $1, %rbx
|
|
andq $1, %rbx
|
|
movq %rbx, %rbx
|
|
incq %rbx
|
|
subq $1, %rbx
|
|
popcntq %rbx, %r9
|
|
orq $1, %r9
|
|
btsq %r9, %r9
|
|
rolq %r9
|
|
sbbq $1, %rsi
|
|
andq $1, %rsi
|
|
movq %rsi, %rsi
|
|
incq %rsi
|
|
subq $1, %rsi
|
|
popcntq %rsi, %r11
|
|
orq $1, %r11
|
|
btsq %r11, %r11
|
|
rolq %r11
|
|
sbbq $1, %rdx
|
|
andq $1, %rdx
|
|
movq %rdx, %rdx
|
|
incq %rdx
|
|
subq $1, %rdx
|
|
popcntq %rdx, %r12
|
|
orq $1, %r12
|
|
btsq %r12, %r12
|
|
rolq %r12
|
|
sbbq $1, %rcx
|
|
andq $1, %rcx
|
|
movq %rcx, %rcx
|
|
incq %rcx
|
|
subq $1, %rcx
|
|
popcntq %rcx, %r13
|
|
orq $1, %r13
|
|
btsq %r13, %r13
|
|
rolq %r13
|
|
sbbq $1, %rax
|
|
andq $1, %rax
|
|
movq %rax, %rax
|
|
incq %rax
|
|
subq $1, %rax
|
|
popcntq %rax, %rsi
|
|
orq $1, %rsi
|
|
btsq %rsi, %rsi
|
|
rolq %rsi
|
|
sbbq $1, %r15
|
|
andq $1, %r15
|
|
movq %r15, %rax
|
|
incq %rax
|
|
subq $1, %rax
|
|
popcntq %rax, %rbx
|
|
orq $1, %rbx
|
|
btsq %rbx, %rbx
|
|
rolq %rbx
|
|
sbbq $1, %r14
|
|
andq $1, %r14
|
|
movq %r14, %rax
|
|
incq %rax
|
|
subq $1, %rax
|
|
popcntq %rax, %rcx
|
|
orq $1, %rcx
|
|
btsq %rcx, %rcx
|
|
rolq %rcx
|
|
sbbq $1, %r10
|
|
andq $1, %r10
|
|
movq %r10, %rax
|
|
incq %rax
|
|
subq $1, %rax
|
|
popcntq %rax, %rdx
|
|
orq $1, %rdx
|
|
btsq %rdx, %rdx
|
|
rolq %rdx
|
|
## InlineAsm End
|
|
leaq 1(%r8), %rax
|
|
addq $2, %r8
|
|
cmpq %rdi, %r8
|
|
movq %rax, %r8
|
|
jl LBB0_3
|
|
jmp LBB0_4
|
|
LBB0_1:
|
|
xorl %eax, %eax
|
|
LBB0_4:
|
|
popq %rbx
|
|
popq %r12
|
|
popq %r13
|
|
popq %r14
|
|
popq %r15
|
|
retq
|
|
.cfi_endproc
|
|
|
|
|
|
.subsections_via_symbols
|
|
|
|
## Detailed Results
|
|
{'arguments': (6115790,),
|
|
'frequency': 2600000000.0,
|
|
'iterations': 6115790,
|
|
'parallel_factor': 8,
|
|
'returned': [6115789, 6115789, 6115789, 6115789],
|
|
'runtimes': [0.13210842409171164,
|
|
0.13202582008671016,
|
|
0.13190629601012915,
|
|
0.1326259489869699]}
|
|
minimal throughput: 7.01 cy
|
|
## Selected Instructions
|
|
SAR8r1, SHR8ri, INC8r, AND8rr, RCR8ri, ROL8ri, SUB8ri, SBB8rr, NEG8r, NOT8r
|
|
## Generated Assembly (8x parallel)
|
|
.section __TEXT,__text,regular,pure_instructions
|
|
.macosx_version_min 10, 13
|
|
.globl _test
|
|
.p2align 4, 0x90
|
|
_test:
|
|
.cfi_startproc
|
|
pushq %rbp
|
|
.cfi_def_cfa_offset 16
|
|
pushq %rbx
|
|
.cfi_def_cfa_offset 24
|
|
.cfi_offset %rbx, -24
|
|
.cfi_offset %rbp, -16
|
|
testq %rdi, %rdi
|
|
jle LBB0_1
|
|
movb $3, %r8b
|
|
movq $-1, %rsi
|
|
movb $3, %r9b
|
|
movb $3, %r10b
|
|
movb $3, %r11b
|
|
movb $3, %bpl
|
|
movb $3, %dl
|
|
movb $3, %bl
|
|
movb $3, %cl
|
|
.p2align 4, 0x90
|
|
LBB0_3:
|
|
## InlineAsm Start
|
|
sarb %r8b
|
|
shrb %r8b
|
|
incb %r8b
|
|
andb %r8b, %r8b
|
|
rcrb %r8b
|
|
rolb %r8b
|
|
subb $1, %r8b
|
|
sbbb %r8b, %r8b
|
|
negb %r8b
|
|
notb %r8b
|
|
sarb %r9b
|
|
shrb %r9b
|
|
incb %r9b
|
|
andb %r9b, %r9b
|
|
rcrb %r9b
|
|
rolb %r9b
|
|
subb $1, %r9b
|
|
sbbb %r9b, %r9b
|
|
negb %r9b
|
|
notb %r9b
|
|
sarb %r10b
|
|
shrb %r10b
|
|
incb %r10b
|
|
andb %r10b, %r10b
|
|
rcrb %r10b
|
|
rolb %r10b
|
|
subb $1, %r10b
|
|
sbbb %r10b, %r10b
|
|
negb %r10b
|
|
notb %r10b
|
|
sarb %r11b
|
|
shrb %r11b
|
|
incb %r11b
|
|
andb %r11b, %r11b
|
|
rcrb %r11b
|
|
rolb %r11b
|
|
subb $1, %r11b
|
|
sbbb %r11b, %r11b
|
|
negb %r11b
|
|
notb %r11b
|
|
sarb %bpl
|
|
shrb %bpl
|
|
incb %bpl
|
|
andb %bpl, %bpl
|
|
rcrb %bpl
|
|
rolb %bpl
|
|
subb $1, %bpl
|
|
sbbb %bpl, %bpl
|
|
negb %bpl
|
|
notb %bpl
|
|
sarb %dl
|
|
shrb %dl
|
|
incb %dl
|
|
andb %dl, %dl
|
|
rcrb %dl
|
|
rolb %dl
|
|
subb $1, %dl
|
|
sbbb %dl, %dl
|
|
negb %dl
|
|
notb %dl
|
|
sarb %bl
|
|
shrb %bl
|
|
incb %bl
|
|
andb %bl, %bl
|
|
rcrb %bl
|
|
rolb %bl
|
|
subb $1, %bl
|
|
sbbb %bl, %bl
|
|
negb %bl
|
|
notb %bl
|
|
sarb %cl
|
|
shrb %cl
|
|
incb %cl
|
|
andb %cl, %cl
|
|
rcrb %cl
|
|
rolb %cl
|
|
subb $1, %cl
|
|
sbbb %cl, %cl
|
|
negb %cl
|
|
notb %cl
|
|
## InlineAsm End
|
|
leaq 1(%rsi), %rax
|
|
addq $2, %rsi
|
|
cmpq %rdi, %rsi
|
|
movq %rax, %rsi
|
|
jl LBB0_3
|
|
popq %rbx
|
|
popq %rbp
|
|
retq
|
|
LBB0_1:
|
|
xorl %eax, %eax
|
|
popq %rbx
|
|
popq %rbp
|
|
retq
|
|
.cfi_endproc
|
|
|
|
|
|
.subsections_via_symbols
|
|
|
|
## Detailed Results
|
|
{'arguments': (10000000,),
|
|
'frequency': 2600000000.0,
|
|
'iterations': 10000000,
|
|
'parallel_factor': 8,
|
|
'returned': [9999999, 9999999, 9999999, 9999999],
|
|
'runtimes': [0.12684946099761873,
|
|
0.1267317159799859,
|
|
0.1264580050483346,
|
|
0.1265630420530215]}
|
|
minimal throughput: 4.11 cy
|