Files
asmbench/run_SC18_SRC.py
2018-09-25 10:23:40 +02:00

213 lines
8.1 KiB
Python
Executable File

#!/usr/bin/env python3
import collections
import itertools
from asmbench import op, bench, oldjit
def jit_based_benchs():
modules = collections.OrderedDict()
modules['lea_b'] = (
oldjit.AddressGenerationBenchmark(
offset=None,
base=('r', 'i64', '666'),
index=None,
width=None,
destination='base',
parallel=1,
serial=5),
oldjit.AddressGenerationBenchmark(
offset=None,
base=('r', 'i64', '666'),
index=None,
width=None,
destination='base',
parallel=10,
serial=1))
modules['lea_b+off'] = (
oldjit.AddressGenerationBenchmark(
offset=('i', None, '23'),
base=('r', 'i64', '666'),
index=None,
width=None,
destination='base',
parallel=1,
serial=5),
oldjit.AddressGenerationBenchmark(
offset=('i', None, '23'),
base=('r', 'i64', '666'),
index=None,
width=None,
destination='base',
parallel=10,
serial=1))
modules['lea_idx*w'] = (
oldjit.AddressGenerationBenchmark(
offset=None,
base=None,
index=('r', 'i64', '1'),
width=('i', None, '4'),
destination='index',
parallel=1,
serial=5),
oldjit.AddressGenerationBenchmark(
offset=None,
base=None,
index=('r', 'i64', '1'),
width=('i', None, '4'),
destination='index',
parallel=10,
serial=1))
modules['lea_off+idx*w'] = (
oldjit.AddressGenerationBenchmark(
offset=('i', 'i64', '-0x8'),
base=None,
index=('r', 'i64', '51'),
width=('i', None, '4'),
destination='index',
parallel=1,
serial=5),
oldjit.AddressGenerationBenchmark(
offset=('i', 'i64', '-0x8'),
base=None,
index=('r', 'i64', '51'),
width=('i', None, '4'),
destination='index',
parallel=10,
serial=1))
modules['lea_b+idx*w'] = (
oldjit.AddressGenerationBenchmark(
offset=None,
base=('r', 'i64', '23'),
index=('r', 'i64', '12'),
width=('i', None, '4'),
destination='base',
parallel=1,
serial=5),
oldjit.AddressGenerationBenchmark(
offset=None,
base=('r', 'i64', '23'),
index=('r', 'i64', '12'),
width=('i', None, '4'),
destination='base',
parallel=10,
serial=1))
modules['lea_b+off+idx*w'] = (
oldjit.AddressGenerationBenchmark(
offset=('i', None, '42'),
base=('r', 'i64', '23'),
index=('r', 'i64', '12'),
width=('i', None, '4'),
destination='base',
parallel=1,
serial=5),
oldjit.AddressGenerationBenchmark(
offset=('i', None, '42'),
base=('r', 'i64', '23'),
index=('r', 'i64', '12'),
width=('i', None, '4'),
destination='base',
parallel=10,
serial=1))
modules['LD_linear'] = (
oldjit.LoadBenchmark(
chain_length=2048, # 2048 * 8B = 16kB
structure='linear',
parallel=1,
serial=2),
oldjit.LoadBenchmark(
chain_length=2048, # 2048 * 8B = 16kB
structure='linear',
parallel=4,
serial=2))
modules['LD_random'] = (
oldjit.LoadBenchmark(
chain_length=2048, # 2048 * 8B = 16kB
structure='random',
parallel=1,
serial=2),
oldjit.LoadBenchmark(
chain_length=2048, # 2048 * 8B = 16kB
structure='random',
parallel=4,
serial=2))
for name, mods in modules.items():
lat_module, tp_module = mods
r_lat = lat_module.build_and_execute(repeat=3)
cy_per_it_lat = min(r_lat['runtimes']) * r_lat['frequency'] / (
r_lat['iterations'] * lat_module.parallel * lat_module.serial)
r_tp = tp_module.build_and_execute(repeat=3)
cy_per_it_tp = min(r_tp['runtimes']) * r_tp['frequency'] / (
r_tp['iterations'] * tp_module.parallel * tp_module.serial)
print('{key:<16} LAT {cy_per_it_lat:.3f} cy TP {cy_per_it_tp:.3f} cy'.format(
key=name,
cy_per_it_lat=cy_per_it_lat,
cy_per_it_tp=cy_per_it_tp))
if __name__ == '__main__':
bench.setup_llvm()
instructions = [
(i[0], i[1], op.Instruction.from_string(i[1]))
for i in [
('ADD32ri', 'add {src:i32:1}, {srcdst:i32:r}'),
('ADD64ri32', 'add {src:i32:1}, {srcdst:i64:r}'),
('INC64r', 'inc {srcdst:i64:r}'),
('SUB32ri', 'sub {src:i32:1}, {srcdst:i64:r}'),
('MOV64ri32', 'mov {src:i32:1}, {srcdst:i64:r}'),
('VINSERTF128rr', 'vinsertf128 {src:i8:0}, {src:<2 x double>:x}, {src:<4 x double>:x}, {dst:<4 x double>:x}'),
('VCVTSI642SSrr', 'vcvtsi2ss {src:i64:r}, {src:float:x}, {dst:float:x}'),
('VADDPDYrr', 'vaddpd {src:<4 x double>:x}, {src:<4 x double>:x}, {dst:<4 x double>:x}'),
('VADDSDrr', 'vaddsd {src:double:x}, {src:double:x}, {dst:double:x}'),
('VADDSSrr', 'vaddss {src:float:x}, {src:float:x}, {dst:float:x}'),
('VFMADD213PDYr', 'vfmadd213pd {src:<4 x double>:x}, {src:<4 x double>:x}, {srcdst:<4 x double>:x}'),
('VFMADD213PDr', 'vfmadd213pd {src:<2 x double>:x}, {src:<2 x double>:x}, {srcdst:<2 x double>:x}'),
('VFMADD213PSYr', 'vfmadd213ps {src:<4 x double>:x}, {src:<4 x double>:x}, {srcdst:<4 x double>:x}'),
('VFMADD213PSr', 'vfmadd213ps {src:<2 x double>:x}, {src:<2 x double>:x}, {srcdst:<2 x double>:x}'),
('VFMADD213SDr', 'vfmadd213sd {src:double:x}, {src:double:x}, {srcdst:double:x}'),
('VFMADD213SSr', 'vfmadd213ss {src:float:x}, {src:float:x}, {srcdst:float:x}'),
('VMULPDYrr', 'vmulpd {src:<4 x double>:x}, {src:<4 x double>:x}, {dst:<4 x double>:x}'),
('VMULSDrr', 'vmulsd {src:double:x}, {src:double:x}, {dst:double:x}'),
('VMULSSrr', 'vmulss {src:float:x}, {src:float:x}, {dst:float:x}'),
('VSUBSDrr', 'vsubsd {src:double:x}, {src:double:x}, {dst:double:x}'),
('VSUBSSrr', 'vsubss {src:float:x}, {src:float:x}, {dst:float:x}'),
('VDIVPDYrr', 'vdivpd {src:<4 x double>:x}, {src:<4 x double>:x}, {dst:<4 x double>:x}'),
('VDIVSDrr', 'vdivsd {src:double:x}, {src:double:x}, {dst:double:x}'),
('VDIVSSrr', 'vdivss {src:float:x}, {src:float:x}, {dst:float:x}'),
]
]
instructions_measured = collections.OrderedDict()
for llvm_name, i_str, i in instructions:
lat, tp = bench.bench_instructions(
[i],
serial_factor=8, throughput_serial_factor=8, parallel_factor=10,
verbosity=0, repeat=10, min_elapsed=0.3, max_elapsed=0.5)
#serial_factor=8, throughput_serial_factor=8, parallel_factor=10)
print('{:<16} LAT {:.3f} cy TP {:.3f} cy'.format(llvm_name, lat, tp))
instructions_measured[llvm_name] = (lat, tp)
jit_based_benchs()
two_combinations_measured = collections.OrderedDict()
for a, b in itertools.combinations_with_replacement(instructions, 2):
lat, tp = bench.bench_instructions(
[a[2], b[2]],
serial_factor=8, throughput_serial_factor=8, parallel_factor=10,
verbosity=0, repeat=10, min_elapsed=0.3, max_elapsed=0.5)
same_port_metric = ((
tp-max(instructions_measured[a[0]][1], instructions_measured[b[0]][1])) /
min(instructions_measured[a[0]][1], instructions_measured[b[0]][1]))
print('{:<16} {:<16} LAT {:.3f} cy TP {:.3f} cy SPM {:>5.2f}'.format(
a[0], b[0], lat, tp, same_port_metric))
two_combinations_measured[(a[0], a[1]), (b[0], b[1])] = (lat, tp, same_port_metric)