Files
OSACA/osaca/db_interface.py

460 lines
16 KiB
Python
Executable File

#!/usr/bin/env python3
import math
import os
import sys
import warnings
import ruamel.yaml
from osaca.semantics import MachineModel
def add_entry_to_db(arch: str, entry):
"""Adds entry to the user database in ~/.osaca/data
Args:
arch: string representation of the architecture as abbreviation.
Database for this architecture must already exist.
entry: DB entry which will be added. Should consist at best out of
'name', 'operand(s)' ('register', 'memory', 'immediate', 'identifier', ...),
'throughput', 'latency', 'port_pressure'.
"""
# load yaml
arch = arch.lower()
filepath = os.path.join(os.path.expanduser('~/.osaca/data/' + arch + '.yml'))
assert os.path.exists(filepath)
yaml = _create_yaml_object()
with open(filepath, 'r') as f:
data = yaml.load(f)
# check parameter of entry
if 'name' not in entry:
raise ValueError('No name for instruction specified. No import possible')
if 'operands' not in entry:
entry['operands'] = []
if 'throughput' not in entry:
entry['throughput'] = None
if 'latency' not in entry:
entry['latency'] = None
if 'port_pressure' not in entry:
entry['port_pressure'] = None
if 'uops' not in entry:
entry['uops'] = None
data['instruction_forms'].append(entry)
# __dump_data_to_yaml(filepath, data)
with open(filepath, 'w') as f:
yaml.dump(data)
def add_entries_to_db(arch: str, entries: list) -> None:
"""Adds entries to the user database in ~/.osaca/data
Args:
arch: string representation of the architecture as abbreviation.
Database for this architecture must already exist.
entries: :class:`list` of DB entries which will be added. Should consist at best out of
'name', 'operand(s)' ('register', 'memory', 'immediate', 'identifier', ...),
'throughput', 'latency', 'port_pressure', 'uops'.
"""
# load yaml
arch = arch.lower()
filepath = os.path.join(os.path.expanduser('~/.osaca/data/' + arch + '.yml'))
assert os.path.exists(filepath)
yaml = _create_yaml_object()
with open(filepath, 'r') as f:
data = yaml.load(f)
# check parameter of entry and append it to list
for entry in entries:
if 'name' not in entry:
print(
'No name for instruction \n\t{}\nspecified. No import possible'.format(entry),
file=sys.stderr,
)
# remove entry from list
entries.remove(entry)
continue
if 'operands' not in entry:
entry['operands'] = []
if 'throughput' not in entry:
entry['throughput'] = None
if 'latency' not in entry:
entry['latency'] = None
if 'port_pressure' not in entry:
entry['port_pressure'] = None
if 'uops' not in entry:
entry['uops'] = None
data['instruction_forms'].append(entry)
# __dump_data_to_yaml(filepath, data)
with open(filepath, 'w') as f:
yaml.dump(data)
def sanity_check(arch: str, verbose=False):
# load arch machine model
arch_mm = MachineModel(arch=arch)
data = arch_mm['instruction_forms']
# load isa machine model
isa = arch_mm.get_ISA()
isa_mm = MachineModel(arch='isa/{}'.format(isa))
num_of_instr = len(data)
# check arch DB entries
(
missing_throughput,
missing_latency,
missing_port_pressure,
suspicious_instructions,
duplicate_instr_arch,
) = _check_sanity_arch_db(arch_mm, isa_mm)
# check ISA DB entries
duplicate_instr_isa, only_in_isa = _check_sanity_isa_db(arch_mm, isa_mm)
_print_sanity_report(
num_of_instr,
missing_throughput,
missing_latency,
missing_port_pressure,
suspicious_instructions,
duplicate_instr_arch,
duplicate_instr_isa,
only_in_isa,
verbose=verbose,
)
def import_benchmark_output(arch, bench_type, filepath):
supported_bench_outputs = ['ibench', 'asmbench']
assert os.path.exists(filepath)
if bench_type not in supported_bench_outputs:
raise ValueError('Benchmark type is not supported.')
with open(filepath, 'r') as f:
input_data = f.readlines()
db_entries = None
if bench_type == 'ibench':
db_entries = _get_ibench_output(input_data)
elif bench_type == 'asmbench':
raise NotImplementedError
# write entries to DB
add_entries_to_db(arch, list(db_entries.values()))
##################
# HELPERS IBENCH #
##################
def _get_ibench_output(input_data):
db_entries = {}
for line in input_data:
if 'Using frequency' in line or len(line) == 0:
continue
instruction = line.split(':')[0]
key = '-'.join(instruction.split('-')[:2])
if key in db_entries:
# add only TP/LT value
entry = db_entries[key]
else:
mnemonic = instruction.split('-')[0]
operands = instruction.split('-')[1].split('_')
operands = [_create_db_operand(op) for op in operands]
entry = {
'name': mnemonic,
'operands': operands,
'throughput': None,
'latency': None,
'port_pressure': None,
}
if 'TP' in instruction:
entry['throughput'] = _validate_measurement(float(line.split()[1]), True)
if not entry['throughput']:
warnings.warn(
'Your THROUGHPUT measurement for {} looks suspicious'.format(key)
+ ' and was not added. Please inspect your benchmark.'
)
elif 'LT' in instruction:
entry['latency'] = _validate_measurement(float(line.split()[1]), False)
if not entry['latency']:
warnings.warn(
'Your LATENCY measurement for {} looks suspicious'.format(key)
+ ' and was not added. Please inspect your benchmark.'
)
db_entries[key] = entry
def _validate_measurement(self, measurement, is_tp):
if not is_tp:
if (
math.floor(measurement) * 1.05 >= measurement
or math.ceil(measurement) * 0.95 <= measurement
):
# Value is probably correct, so round it to the estimated value
return float(round(measurement))
# Check reciprocal only if it is a throughput value
else:
reciprocals = [1 / x for x in range(1, 11)]
for reci in reciprocals:
if reci * 0.95 <= measurement <= reci * 1.05:
# Value is probably correct, so round it to the estimated value
return round(reci, 5)
# No value close to an integer or its reciprocal found, we assume the
# measurement is incorrect
return None
def _create_db_operand(self, operand):
if self.isa == 'aarch64':
return self._create_db_operand_aarch64(operand)
elif self.isa == 'x86':
return self._create_db_operand_x86(operand)
def _create_db_operand_aarch64(self, operand):
if operand == 'i':
return {'class': 'immediate', 'imd': 'int'}
elif operand in 'wxbhsdq':
return {'class': 'register', 'prefix': operand}
elif operand.startswith('v'):
return {'class': 'register', 'prefix': 'v', 'shape': operand[1:2]}
elif operand.startswith('m'):
return {
'class': 'memory',
'base': 'gpr' if 'b' in operand else None,
'offset': 'imd' if 'o' in operand else None,
'index': 'gpr' if 'i' in operand else None,
'scale': 8 if 's' in operand else 1,
'pre-indexed': True if 'r' in operand else False,
'post-indexed': True if 'p' in operand else False,
}
else:
raise ValueError('Parameter {} is not a valid operand code'.format(operand))
def _create_db_operand_x86(self, operand):
if operand == 'r':
return {'class': 'register', 'name': 'gpr'}
elif operand in 'xyz':
return {'class': 'register', 'name': operand + 'mm'}
elif operand == 'i':
return {'class': 'immediate', 'imd': 'int'}
elif operand.startswith('m'):
return {
'class': 'memory',
'base': 'gpr' if 'b' in operand else None,
'offset': 'imd' if 'o' in operand else None,
'index': 'gpr' if 'i' in operand else None,
'scale': 8 if 's' in operand else 1,
}
else:
raise ValueError('Parameter {} is not a valid operand code'.format(operand))
########################
# HELPERS SANITY CHECK #
########################
def _check_sanity_arch_db(arch_mm, isa_mm):
suspicious_prefixes_x86 = ['vfm', 'fm']
suspicious_prefixes_arm = ['fml', 'ldp', 'stp', 'str']
if arch_mm.get_ISA().lower() == 'aarch64':
suspicious_prefixes = suspicious_prefixes_arm
if arch_mm.get_ISA().lower() == 'x86':
suspicious_prefixes = suspicious_prefixes_x86
port_num = len(arch_mm['ports'])
# returned lists
missing_throughput = []
missing_latency = []
missing_port_pressure = []
suspicious_instructions = []
duplicate_instr_arch = []
for instr_form in arch_mm['instruction_forms']:
# check value in DB entry
if instr_form['throughput'] is None:
missing_throughput.append(instr_form)
if instr_form['latency'] is None:
missing_latency.append(instr_form)
if instr_form['port_pressure'] is None:
missing_port_pressure.append(instr_form)
elif len(instr_form['port_pressure']) != port_num:
warnings.warn(
'Invalid number of ports:\n {}'.format(_get_full_instruction_name(instr_form))
)
# check entry against ISA DB
for prefix in suspicious_prefixes:
if instr_form['name'].startswith(prefix):
# check if instruction in ISA DB
if isa_mm.get_instruction(instr_form['name'], instr_form['operands']) is None:
# if not, mark them as suspicious and print it on the screen
suspicious_instructions.append(instr_form)
# check for duplicates in DB
if arch_mm._check_for_duplicate(instr_form['name'], instr_form['operands']):
duplicate_instr_arch.append(instr_form)
# every entry exists twice --> uniquify
tmp_list = []
for i in range(0, len(duplicate_instr_arch)):
tmp = duplicate_instr_arch.pop()
if tmp not in duplicate_instr_arch:
tmp_list.append(tmp)
duplicate_instr_arch = tmp_list
return (
missing_throughput,
missing_latency,
missing_port_pressure,
suspicious_instructions,
duplicate_instr_arch,
)
def _check_sanity_isa_db(arch_mm, isa_mm):
# returned lists
duplicate_instr_isa = []
only_in_isa = []
for instr_form in isa_mm['instruction_forms']:
# check if instr is missing in arch DB
if arch_mm.get_instruction(instr_form['name'], instr_form['operands']) is None:
only_in_isa.append(instr_form)
# check for duplicates
if isa_mm._check_for_duplicate(instr_form['name'], instr_form['operands']):
duplicate_instr_isa.append(instr_form)
# every entry exists twice --> uniquify
tmp_list = []
for i in range(0, len(duplicate_instr_isa)):
tmp = duplicate_instr_isa.pop()
if tmp not in duplicate_instr_isa:
tmp_list.append(tmp)
duplicate_instr_isa = tmp_list
return duplicate_instr_isa, only_in_isa
def _print_sanity_report(
total, m_tp, m_l, m_pp, suspic_instr, dup_arch, dup_isa, only_isa, verbose=False
):
# non-verbose summary
print('SUMMARY\n----------------------')
print(
'{}% ({}/{}) of instruction forms have no throughput value.'.format(
round(100 * len(m_tp) / total), len(m_tp), total
)
)
print(
'{}% ({}/{}) of instruction forms have no latency value.'.format(
round(100 * len(m_l) / total), len(m_l), total
)
)
print(
'{}% ({}/{}) of instruction forms have no port pressure assignment.'.format(
round(100 * len(m_pp) / total), len(m_pp), total
)
)
print(
'{}% ({}/{}) of instruction forms might miss an ISA DB entry.'.format(
round(100 * len(suspic_instr) / total), len(suspic_instr), total
)
)
print('{} duplicate instruction forms in uarch DB.'.format(len(dup_arch)))
print('{} duplicate instruction forms in ISA DB.'.format(len(dup_isa)))
print(
'{} instruction forms in ISA DB are not referenced by instruction '.format(len(only_isa))
+ 'forms in uarch DB.'
)
print('----------------------\n')
# verbose version
if verbose:
_print_sanity_report_verbose(
total, m_tp, m_l, m_pp, suspic_instr, dup_arch, dup_isa, only_isa
)
def _print_sanity_report_verbose(
total, m_tp, m_l, m_pp, suspic_instr, dup_arch, dup_isa, only_isa
):
BRIGHT_CYAN = '\033[1;36;1m'
BRIGHT_BLUE = '\033[1;34;1m'
BRIGHT_RED = '\033[1;31;1m'
BRIGHT_MAGENTA = '\033[1;35;1m'
BRIGHT_YELLOW = '\033[1;33;1m'
CYAN = '\033[36m'
YELLOW = '\033[33m'
WHITE = '\033[0m'
print('Instruction forms without throughput value:\n' if len(m_tp) != 0 else '', end='')
for instr_form in m_tp:
print('{}{}{}'.format(BRIGHT_BLUE, _get_full_instruction_name(instr_form), WHITE))
print('Instruction forms without latency value:\n' if len(m_l) != 0 else '', end='')
for instr_form in m_l:
print('{}{}{}'.format(BRIGHT_RED, _get_full_instruction_name(instr_form), WHITE))
print(
'Instruction forms without port pressure assignment:\n' if len(m_pp) != 0 else '', end=''
)
for instr_form in m_pp:
print('{}{}{}'.format(BRIGHT_MAGENTA, _get_full_instruction_name(instr_form), WHITE))
print(
'Instruction forms which might miss an ISA DB entry:\n' if len(suspic_instr) != 0 else '',
end='',
)
for instr_form in suspic_instr:
print('{}{}{}'.format(BRIGHT_CYAN, _get_full_instruction_name(instr_form), WHITE))
print('Duplicate instruction forms in uarch DB:\n' if len(dup_arch) != 0 else '', end='')
for instr_form in dup_arch:
print('{}{}{}'.format(YELLOW, _get_full_instruction_name(instr_form), WHITE))
print('Duplicate instruction forms in ISA DB:\n' if len(dup_isa) != 0 else '', end='')
for instr_form in dup_isa:
print('{}{}{}'.format(BRIGHT_YELLOW, _get_full_instruction_name(instr_form), WHITE))
print(
'Instruction forms existing in ISA DB but not in uarch DB:\n'
if len(only_isa) != 0
else '',
end='',
)
for instr_form in only_isa:
print('{}{}{}'.format(CYAN, _get_full_instruction_name(instr_form), WHITE))
###################
# GENERIC HELPERS #
###################
def _get_full_instruction_name(instruction_form):
operands = []
for op in instruction_form['operands']:
op_attrs = [
y + ':' + str(op[y])
for y in list(filter(lambda x: True if x != 'class' else False, op))
]
operands.append('{}({})'.format(op['class'], ','.join(op_attrs)))
return '{} {}'.format(instruction_form['name'], ','.join(operands))
def __represent_none(self, data):
return self.represent_scalar(u'tag:yaml.org,2002:null', u'~')
def _create_yaml_object():
yaml_obj = ruamel.yaml.YAML()
yaml_obj.representer.add_representer(type(None), __represent_none)
return yaml_obj
def __dump_data_to_yaml(filepath, data):
# first add 'normal' meta data in the right order (no ordered dict yet)
meta_data = dict(data)
del meta_data['instruction_forms']
del meta_data['port_model_scheme']
with open(filepath, 'w') as f:
ruamel.yaml.dump(meta_data, f, allow_unicode=True)
with open(filepath, 'a') as f:
# now add port model scheme in |-scheme for better readability
ruamel.yaml.dump(
{'port_model_scheme': data['port_model_scheme']},
f,
allow_unicode=True,
default_style='|',
)
# finally, add instruction forms
ruamel.yaml.dump({'instruction_forms': data['instruction_forms']}, f, allow_unicode=True)