From d418c16f4af91579e107e735670eb74b995c4b3d Mon Sep 17 00:00:00 2001 From: JanLJL Date: Thu, 26 Aug 2021 16:58:19 +0200 Subject: [PATCH] applied flake8 and black rules --- docs/version_from_src.py | 3 +- osaca/data/generate_mov_entries.py | 8 +- osaca/data/model_importer.py | 36 +- osaca/db_interface.py | 16 +- osaca/frontend.py | 11 +- osaca/osaca.py | 25 +- osaca/parser/parser_AArch64.py | 50 ++- osaca/parser/parser_x86att.py | 16 +- osaca/semantics/arch_semantics.py | 11 +- osaca/semantics/hw_model.py | 66 ++- osaca/semantics/isa_semantics.py | 118 ++--- osaca/semantics/kernel_dg.py | 57 ++- osaca/utils.py | 5 +- setup.py | 14 +- tests/test_cli.py | 48 +- tests/test_db_interface.py | 15 +- tests/test_files/kernel_x86_memdep.s | 20 +- tests/test_frontend.py | 9 +- tests/test_marker_utils.py | 13 +- tests/test_parser_AArch64.py | 10 +- tests/test_parser_x86att.py | 26 +- tests/test_semantics.py | 46 +- validation/build_and_run.py | 629 ++++++++++++++++----------- 23 files changed, 781 insertions(+), 471 deletions(-) diff --git a/docs/version_from_src.py b/docs/version_from_src.py index 97a4cda..156a4e2 100644 --- a/docs/version_from_src.py +++ b/docs/version_from_src.py @@ -7,7 +7,8 @@ import re def __read(*names, **kwargs): """Reads in file""" with io.open( - os.path.join(os.path.dirname(__file__), *names), encoding=kwargs.get("encoding", "utf8") + os.path.join(os.path.dirname(__file__), *names), + encoding=kwargs.get("encoding", "utf8"), ) as fp: return fp.read() diff --git a/osaca/data/generate_mov_entries.py b/osaca/data/generate_mov_entries.py index 13921ce..bf7cbf6 100755 --- a/osaca/data/generate_mov_entries.py +++ b/osaca/data/generate_mov_entries.py @@ -88,7 +88,7 @@ class MOVEntryBuilderIntelNoPort7AGU(MOVEntryBuilder): comment = None if load: - if 'ymm' in operand_types: + if "ymm" in operand_types: port2D3D_pressure = 2 else: port2D3D_pressure = 1 @@ -96,7 +96,7 @@ class MOVEntryBuilderIntelNoPort7AGU(MOVEntryBuilder): latency += 4 comment = "with load" if store: - if 'ymm' in operand_types: + if "ymm" in operand_types: port4_pressure = 2 else: port4_pressure = 1 @@ -716,14 +716,14 @@ skx_mov_instructions = list( # ('movapd xmm xmm', ('1*p5', 1)), # ('vmovapd xmm xmm', ('1*p5', 1)), # ('vmovapd ymm ymm', ('1*p5', 1)), - ('vmovapd zmm zmm', ('', 0)), + ("vmovapd zmm zmm", ("", 0)), # https://www.felixcloutier.com/x86/movaps # TODO with masking! # TODO the following may eliminate or be bound to 1*p0156: # ('movaps xmm xmm', ('1*p5', 1)), # ('vmovaps xmm xmm', ('1*p5', 1)), # ('vmovaps ymm ymm', ('1*p5', 1)), - ('vmovaps zmm zmm', ('', 0)), + ("vmovaps zmm zmm", ("", 0)), # https://www.felixcloutier.com/x86/movbe ("movbe gpr mem", ("1*p15", 4)), ("movbe mem gpr", ("1*p15", 4)), diff --git a/osaca/data/model_importer.py b/osaca/data/model_importer.py index 92f5f25..d10555e 100755 --- a/osaca/data/model_importer.py +++ b/osaca/data/model_importer.py @@ -140,9 +140,11 @@ def extract_model(tree, arch, skip_mem=True): print("Couldn't find port utilization, skip: ", iform, file=sys.stderr) continue # skip if measured TP is smaller than computed - if [float(x.attrib["TP_ports"]) > min(float(x.attrib["TP_loop"]), - float(x.attrib["TP_unrolled"])) - for x in arch_tag.findall("measurement")][0]: + if [ + float(x.attrib["TP_ports"]) + > min(float(x.attrib["TP_loop"]), float(x.attrib["TP_unrolled"])) + for x in arch_tag.findall("measurement") + ][0]: print( "Calculated TP is greater than measured TP.", iform, @@ -160,13 +162,15 @@ def extract_model(tree, arch, skip_mem=True): throughput = float(measurement_tag.attrib["TP_ports"]) else: throughput = min( - measurement_tag.attrib.get("TP_loop", float('inf')), - measurement_tag.attrib.get("TP_unroll", float('inf')), - measurement_tag.attrib.get("TP", float('inf')), + measurement_tag.attrib.get("TP_loop", float("inf")), + measurement_tag.attrib.get("TP_unroll", float("inf")), + measurement_tag.attrib.get("TP", float("inf")), ) - if throughput == float('inf'): + if throughput == float("inf"): throughput = None - uops = int(measurement_tag.attrib["uops"]) if "uops" in measurement_tag.attrib else None + uops = ( + int(measurement_tag.attrib["uops"]) if "uops" in measurement_tag.attrib else None + ) if "ports" in measurement_tag.attrib: port_pressure.append(port_pressure_from_tag_attributes(measurement_tag.attrib)) latencies = [ @@ -202,7 +206,11 @@ def extract_model(tree, arch, skip_mem=True): # Check if all are equal if port_pressure: if port_pressure[1:] != port_pressure[:-1]: - print("Contradicting port occupancies, using latest IACA:", iform, file=sys.stderr) + print( + "Contradicting port occupancies, using latest IACA:", + iform, + file=sys.stderr, + ) port_pressure = port_pressure[-1] else: # print("No data available for this architecture:", mnemonic, file=sys.stderr) @@ -222,10 +230,12 @@ def extract_model(tree, arch, skip_mem=True): port_4 = True # Add (x, ['2D', '3D']) if load ports (2 & 3) are used, but not the store port (4) if port_23 and not port_4: - if arch.upper() in ["SNB", "IVB"] and any( - [p.get('name', '') == 'ymm' for p in parameters]) and \ - not '128' in mnemonic: - # x = 2 if SNB or IVB and ymm regiser in any operand and not '128' in + if ( + arch.upper() in ["SNB", "IVB"] + and any([p.get("name", "") == "ymm" for p in parameters]) + and not ("128" in mnemonic) + ): + # x = 2 if SNB or IVB and ymm regiser in any operand and not '128' in # instruction name port2D3D_pressure = 2 else: diff --git a/osaca/db_interface.py b/osaca/db_interface.py index 65b63c6..09c352d 100755 --- a/osaca/db_interface.py +++ b/osaca/db_interface.py @@ -125,7 +125,10 @@ def _get_asmbench_output(input_data, isa): db_entries = {} for i in range(0, len(input_data), 4): if input_data[i + 3].strip() != "": - print("asmbench output not in the correct format! Format must be: ", file=sys.stderr) + print( + "asmbench output not in the correct format! Format must be: ", + file=sys.stderr, + ) print( "-------------\nMNEMONIC[-OP1[_OP2][...]]\nLatency: X cycles\n" "Throughput: Y cycles\n\n-------------", @@ -540,7 +543,16 @@ def _get_sanity_report( def _get_sanity_report_verbose( - total, m_tp, m_l, m_pp, suspic_instr, dup_arch, dup_isa, only_isa, bad_operands, colors=False + total, + m_tp, + m_l, + m_pp, + suspic_instr, + dup_arch, + dup_isa, + only_isa, + bad_operands, + colors=False, ): """Get the verbose part of the sanity report with all missing instruction forms.""" BRIGHT_CYAN = "\033[1;36;1m" if colors else "" diff --git a/osaca/frontend.py b/osaca/frontend.py index fa6b014..81f20a5 100755 --- a/osaca/frontend.py +++ b/osaca/frontend.py @@ -202,7 +202,12 @@ class Frontend(object): ) def combined_view( - self, kernel, cp_kernel: KernelDG, dep_dict, ignore_unknown=False, show_cmnts=True + self, + kernel, + cp_kernel: KernelDG, + dep_dict, + ignore_unknown=False, + show_cmnts=True, ): """ Build combined view of kernel including port pressure (TP), a CP column and a @@ -238,8 +243,8 @@ class Frontend(object): lcd_sum = 0.0 lcd_lines = {} if dep_dict: - longest_lcd = max(dep_dict, key=lambda ln: dep_dict[ln]['latency']) - lcd_sum = dep_dict[longest_lcd]['latency'] + longest_lcd = max(dep_dict, key=lambda ln: dep_dict[ln]["latency"]) + lcd_sum = dep_dict[longest_lcd]["latency"] lcd_lines = { instr["line_number"]: lat for instr, lat in dep_dict[longest_lcd]["dependencies"] } diff --git a/osaca/osaca.py b/osaca/osaca.py index f905104..765cff7 100755 --- a/osaca/osaca.py +++ b/osaca/osaca.py @@ -10,7 +10,13 @@ from functools import lru_cache from osaca.db_interface import import_benchmark_output, sanity_check from osaca.frontend import Frontend from osaca.parser import BaseParser, ParserAArch64, ParserX86ATT -from osaca.semantics import INSTR_FLAGS, ArchSemantics, KernelDG, MachineModel, reduce_to_section +from osaca.semantics import ( + INSTR_FLAGS, + ArchSemantics, + KernelDG, + MachineModel, + reduce_to_section, +) SUPPORTED_ARCHS = [ @@ -37,7 +43,8 @@ DEFAULT_ARCHS = { def __read(*names, **kwargs): """Reads in file""" with io.open( - os.path.join(os.path.dirname(__file__), *names), encoding=kwargs.get("encoding", "utf8") + os.path.join(os.path.dirname(__file__), *names), + encoding=kwargs.get("encoding", "utf8"), ) as fp: return fp.read() @@ -79,7 +86,10 @@ def create_parser(parser=None): # Add arguments parser.add_argument( - "-V", "--version", action="version", version="%(prog)s " + __find_version("__init__.py") + "-V", + "--version", + action="version", + version="%(prog)s " + __find_version("__init__.py"), ) parser.add_argument( "--arch", @@ -167,7 +177,9 @@ def create_parser(parser=None): help="Write analysis to this file (default to stdout).", ) parser.add_argument( - "file", type=argparse.FileType("r"), help="Path to object (ASM or instruction file)." + "file", + type=argparse.FileType("r"), + help="Path to object (ASM or instruction file).", ) return parser @@ -347,7 +359,10 @@ def run(args, output_file=sys.stdout): # Sanity check on DB verbose = True if args.verbose > 0 else False sanity_check( - args.arch, verbose=verbose, internet_check=args.internet_check, output_file=output_file + args.arch, + verbose=verbose, + internet_check=args.internet_check, + output_file=output_file, ) elif "import_data" in args: # Import microbench output file into DB diff --git a/osaca/parser/parser_AArch64.py b/osaca/parser/parser_AArch64.py index ce3376c..0f92edb 100755 --- a/osaca/parser/parser_AArch64.py +++ b/osaca/parser/parser_AArch64.py @@ -26,9 +26,9 @@ class ParserAArch64(BaseParser): pp.ZeroOrMore(pp.Word(pp.printables)) ).setResultsName(self.COMMENT_ID) # Define ARM assembly identifier - decimal_number = pp.Combine(pp.Optional(pp.Literal("-")) + pp.Word(pp.nums)).setResultsName( - "value" - ) + decimal_number = pp.Combine( + pp.Optional(pp.Literal("-")) + pp.Word(pp.nums) + ).setResultsName("value") hex_number = pp.Combine(pp.Literal("0x") + pp.Word(pp.hexnums)).setResultsName("value") relocation = pp.Combine(pp.Literal(":") + pp.Word(pp.alphanums + "_") + pp.Literal(":")) first = pp.Word(pp.alphas + "_.", exact=1) @@ -152,7 +152,9 @@ class ParserAArch64(BaseParser): pp.Literal("{") + ( pp.delimitedList(pp.Combine(self.list_element), delim=",").setResultsName("list") - ^ pp.delimitedList(pp.Combine(self.list_element), delim="-").setResultsName("range") + ^ pp.delimitedList(pp.Combine(self.list_element), delim="-").setResultsName( + "range" + ) ) + pp.Literal("}") + pp.Optional(index) @@ -256,9 +258,7 @@ class ParserAArch64(BaseParser): # 2. Parse label if result is None: try: - result = self.process_operand( - self.label.parseString(line, parseAll=True).asDict() - ) + result = self.process_operand(self.label.parseString(line, parseAll=True).asDict()) result = AttrDict.convert_dict(result) instruction_form[self.LABEL_ID] = result[self.LABEL_ID].name if self.COMMENT_ID in result[self.LABEL_ID]: @@ -293,7 +293,9 @@ class ParserAArch64(BaseParser): try: result = self.parse_instruction(line) except (pp.ParseException, KeyError) as e: - raise ValueError("Unable to parse {!r} on line {}".format(line, line_number)) from e + raise ValueError( + "Unable to parse {!r} on line {}".format(line, line_number) + ) from e instruction_form[self.INSTRUCTION_ID] = result[self.INSTRUCTION_ID] instruction_form[self.OPERANDS_ID] = result[self.OPERANDS_ID] instruction_form[self.COMMENT_ID] = result[self.COMMENT_ID] @@ -390,9 +392,9 @@ class ParserAArch64(BaseParser): new_dict["pre_indexed"] = True if "post_indexed" in memory_address: if "value" in memory_address["post_indexed"]: - new_dict["post_indexed"] = {"value": int( - memory_address["post_indexed"]["value"], 0 - )} + new_dict["post_indexed"] = { + "value": int(memory_address["post_indexed"]["value"], 0) + } else: new_dict["post_indexed"] = memory_address["post_indexed"] return AttrDict({self.MEMORY_ID: new_dict}) @@ -408,27 +410,27 @@ class ParserAArch64(BaseParser): Resolve range or list register operand to list of registers. Returns None if neither list nor range """ - if 'register' in operand: - if 'list' in operand.register: - index = operand.register.get('index') + if "register" in operand: + if "list" in operand.register: + index = operand.register.get("index") range_list = [] for reg in operand.register.list: reg = deepcopy(reg) if index is not None: - reg['index'] = int(index, 0) + reg["index"] = int(index, 0) range_list.append(AttrDict({self.REGISTER_ID: reg})) return range_list - elif 'range' in operand.register: + elif "range" in operand.register: base_register = operand.register.range[0] - index = operand.register.get('index') + index = operand.register.get("index") range_list = [] start_name = base_register.name end_name = operand.register.range[1].name for name in range(int(start_name), int(end_name) + 1): reg = deepcopy(base_register) if index is not None: - reg['index'] = int(index, 0) - reg['name'] = str(name) + reg["index"] = int(index, 0) + reg["name"] = str(name) range_list.append(AttrDict({self.REGISTER_ID: reg})) return range_list # neither register list nor range, return unmodified @@ -482,10 +484,12 @@ class ParserAArch64(BaseParser): return AttrDict({self.IMMEDIATE_ID: immediate}) else: # change 'mantissa' key to 'value' - return AttrDict({ - self.IMMEDIATE_ID: AttrDict({ - "value": immediate[dict_name]["mantissa"], - "type": dict_name})} + return AttrDict( + { + self.IMMEDIATE_ID: AttrDict( + {"value": immediate[dict_name]["mantissa"], "type": dict_name} + ) + } ) def process_label(self, label): diff --git a/osaca/parser/parser_x86att.py b/osaca/parser/parser_x86att.py index 5c2a493..f12d9aa 100755 --- a/osaca/parser/parser_x86att.py +++ b/osaca/parser/parser_x86att.py @@ -23,9 +23,9 @@ class ParserX86ATT(BaseParser): def construct_parser(self): """Create parser for ARM AArch64 ISA.""" - decimal_number = pp.Combine(pp.Optional(pp.Literal("-")) + pp.Word(pp.nums)).setResultsName( - "value" - ) + decimal_number = pp.Combine( + pp.Optional(pp.Literal("-")) + pp.Word(pp.nums) + ).setResultsName("value") hex_number = pp.Combine( pp.Optional(pp.Literal("-")) + pp.Literal("0x") + pp.Word(pp.hexnums) ).setResultsName("value") @@ -41,7 +41,8 @@ class ParserX86ATT(BaseParser): identifier = pp.Group( pp.Optional(id_offset).setResultsName("offset") + pp.Combine( - pp.delimitedList(pp.Combine(first + pp.Optional(rest)), delim="::"), joinString="::" + pp.delimitedList(pp.Combine(first + pp.Optional(rest)), delim="::"), + joinString="::", ).setResultsName("name") + pp.Optional(relocation).setResultsName("relocation") ).setResultsName("identifier") @@ -443,7 +444,12 @@ class ParserX86ATT(BaseParser): """Check if register is a vector register""" if register is None: return False - if register["name"].rstrip(string.digits).lower() in ["mm", "xmm", "ymm", "zmm"]: + if register["name"].rstrip(string.digits).lower() in [ + "mm", + "xmm", + "ymm", + "zmm", + ]: return True return False diff --git a/osaca/semantics/arch_semantics.py b/osaca/semantics/arch_semantics.py index 29c01cf..103c71f 100755 --- a/osaca/semantics/arch_semantics.py +++ b/osaca/semantics/arch_semantics.py @@ -47,7 +47,9 @@ class ArchSemantics(ISASemantics): indices = [port_list.index(p) for p in ports] # check if port sum of used ports for uop are unbalanced port_sums = self._to_list(itemgetter(*indices)(self.get_throughput_sum(kernel))) - instr_ports = self._to_list(itemgetter(*indices)(instruction_form["port_pressure"])) + instr_ports = self._to_list( + itemgetter(*indices)(instruction_form["port_pressure"]) + ) if len(set(port_sums)) > 1: # balance ports # init list for keeping track of the current change @@ -270,7 +272,8 @@ class ArchSemantics(ISASemantics): reg_type ] st_data_port_pressure = [ - pp * multiplier for pp in st_data_port_pressure] + pp * multiplier for pp in st_data_port_pressure + ] data_port_pressure = [ sum(x) for x in zip(data_port_pressure, st_data_port_pressure) ] @@ -343,7 +346,9 @@ class ArchSemantics(ISASemantics): def _handle_instruction_found(self, instruction_data, port_number, instruction_form, flags): """Apply performance data to instruction if it was found in the archDB""" throughput = instruction_data["throughput"] - port_pressure = self._machine_model.average_port_pressure(instruction_data["port_pressure"]) + port_pressure = self._machine_model.average_port_pressure( + instruction_data["port_pressure"] + ) instruction_form["port_uops"] = instruction_data["port_pressure"] try: assert isinstance(port_pressure, list) diff --git a/osaca/semantics/hw_model.py b/osaca/semantics/hw_model.py index b95a8a3..948c2de 100755 --- a/osaca/semantics/hw_model.py +++ b/osaca/semantics/hw_model.py @@ -1,20 +1,19 @@ #!/usr/bin/env python3 +import hashlib import os import pickle import re import string +from collections import defaultdict from copy import deepcopy from itertools import product -import hashlib from pathlib import Path -from collections import defaultdict import ruamel.yaml -from ruamel.yaml.compat import StringIO - from osaca import __version__, utils from osaca.parser import ParserX86ATT +from ruamel.yaml.compat import StringIO class MachineModel(object): @@ -37,7 +36,13 @@ class MachineModel(object): "hidden_loads": None, "load_latency": {}, "load_throughput": [ - {"base": b, "index": i, "offset": o, "scale": s, "port_pressure": []} + { + "base": b, + "index": i, + "offset": o, + "scale": s, + "port_pressure": [], + } for b, i, o, s in product(["gpr"], ["gpr", None], ["imd", None], [1, 8]) ], "load_throughput_default": [], @@ -128,7 +133,8 @@ class MachineModel(object): instruction_form for instruction_form in name_matched_iforms if self._match_operands( - instruction_form["operands"] if "operands" in instruction_form else [], operands + instruction_form["operands"] if "operands" in instruction_form else [], + operands, ) ) except StopIteration: @@ -150,7 +156,13 @@ class MachineModel(object): return average_pressure def set_instruction( - self, name, operands=None, latency=None, port_pressure=None, throughput=None, uops=None + self, + name, + operands=None, + latency=None, + port_pressure=None, + throughput=None, + uops=None, ): """Import instruction form information.""" # If it already exists. Overwrite information. @@ -500,7 +512,11 @@ class MachineModel(object): """Check if the types of operand ``i_operand`` and ``operand`` match.""" # check for wildcard if self.WILDCARD in operand: - if "class" in i_operand and i_operand["class"] == "register" or "register" in i_operand: + if ( + "class" in i_operand + and i_operand["class"] == "register" + or "register" in i_operand + ): return True else: return False @@ -527,20 +543,27 @@ class MachineModel(object): return self._is_AArch64_mem_type(i_operand, operand["memory"]) # immediate if i_operand["class"] == "immediate" and i_operand["imd"] == self.WILDCARD: - return "value" in operand or \ - ("immediate" in operand and "value" in operand["immediate"]) + return "value" in operand or ( + "immediate" in operand and "value" in operand["immediate"] + ) if i_operand["class"] == "immediate" and i_operand["imd"] == "int": - return ("value" in operand and operand.get("type", None) == "int") or \ - ("immediate" in operand and "value" in operand["immediate"] and - operand["immediate"].get("type", None) == "int") + return ("value" in operand and operand.get("type", None) == "int") or ( + "immediate" in operand + and "value" in operand["immediate"] + and operand["immediate"].get("type", None) == "int" + ) if i_operand["class"] == "immediate" and i_operand["imd"] == "float": - return ("float" in operand and operand.get("type", None) == "float") or \ - ("immediate" in operand and "float" in operand["immediate"] and - operand["immediate"].get("type", None) == "float") + return ("float" in operand and operand.get("type", None) == "float") or ( + "immediate" in operand + and "float" in operand["immediate"] + and operand["immediate"].get("type", None) == "float" + ) if i_operand["class"] == "immediate" and i_operand["imd"] == "double": - return ("double" in operand and operand.get("type", None) == "double") or \ - ("immediate" in operand and "double" in operand["immediate"] and - operand["immediate"].get("type", None) == "double") + return ("double" in operand and operand.get("type", None) == "double") or ( + "immediate" in operand + and "double" in operand["immediate"] + and operand["immediate"].get("type", None) == "double" + ) # identifier if "identifier" in operand or ( "immediate" in operand and "identifier" in operand["immediate"] @@ -577,7 +600,10 @@ class MachineModel(object): def _compare_db_entries(self, operand_1, operand_2): """Check if operand types in DB format (i.e., not parsed) match.""" operand_attributes = list( - filter(lambda x: True if x != "source" and x != "destination" else False, operand_1) + filter( + lambda x: True if x != "source" and x != "destination" else False, + operand_1, + ) ) for key in operand_attributes: try: diff --git a/osaca/semantics/isa_semantics.py b/osaca/semantics/isa_semantics.py index 5889eb3..b792de9 100755 --- a/osaca/semantics/isa_semantics.py +++ b/osaca/semantics/isa_semantics.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 from itertools import chain -from copy import deepcopy from osaca import utils from osaca.parser import AttrDict, ParserAArch64, ParserX86ATT @@ -100,53 +99,68 @@ class ISASemantics(object): # post-process pre- and post-indexing for aarch64 memory operands if self._isa == "aarch64": for operand in [op for op in op_dict["source"] if "memory" in op]: - post_indexed = ("post_indexed" in operand["memory"] and - operand["memory"]["post_indexed"]) - pre_indexed = ("pre_indexed" in operand["memory"] and - operand["memory"]["pre_indexed"]) + post_indexed = ( + "post_indexed" in operand["memory"] and operand["memory"]["post_indexed"] + ) + pre_indexed = ( + "pre_indexed" in operand["memory"] and operand["memory"]["pre_indexed"] + ) if post_indexed or pre_indexed: op_dict["src_dst"].append( - AttrDict.convert_dict({ - "register": operand["memory"]["base"], - "pre_indexed": pre_indexed, - "post_indexed": post_indexed}) + AttrDict.convert_dict( + { + "register": operand["memory"]["base"], + "pre_indexed": pre_indexed, + "post_indexed": post_indexed, + } + ) ) for operand in [op for op in op_dict["destination"] if "memory" in op]: - post_indexed = ("post_indexed" in operand["memory"] and - operand["memory"]["post_indexed"]) - pre_indexed = ("pre_indexed" in operand["memory"] and - operand["memory"]["pre_indexed"]) + post_indexed = ( + "post_indexed" in operand["memory"] and operand["memory"]["post_indexed"] + ) + pre_indexed = ( + "pre_indexed" in operand["memory"] and operand["memory"]["pre_indexed"] + ) if post_indexed or pre_indexed: op_dict["src_dst"].append( - AttrDict.convert_dict({ - "register": operand["memory"]["base"], - "pre_indexed": pre_indexed, - "post_indexed": post_indexed}) + AttrDict.convert_dict( + { + "register": operand["memory"]["base"], + "pre_indexed": pre_indexed, + "post_indexed": post_indexed, + } + ) ) - + # store operand list in dict and reassign operand key/value pair instruction_form["semantic_operands"] = AttrDict.convert_dict(op_dict) # assign LD/ST flags - instruction_form["flags"] = instruction_form["flags"] if "flags" in instruction_form else [] + instruction_form["flags"] = ( + instruction_form["flags"] if "flags" in instruction_form else [] + ) if self._has_load(instruction_form): instruction_form["flags"] += [INSTR_FLAGS.HAS_LD] if self._has_store(instruction_form): instruction_form["flags"] += [INSTR_FLAGS.HAS_ST] - def get_reg_changes(self, instruction_form, only_postindexed=False): """ Returns register changes, as dict, for insruction_form, based on operation defined in isa. - + Empty dict if no changes of registers occured. None for registers with unknown changes. If only_postindexed is True, only considers changes due to post_indexed memory references. """ - if instruction_form.get('instruction') is None: + if instruction_form.get("instruction") is None: return {} - dest_reg_names = [op.register.get('prefix', '') + op.register.name - for op in chain(instruction_form.semantic_operands.destination, - instruction_form.semantic_operands.src_dst) - if 'register' in op] + dest_reg_names = [ + op.register.get("prefix", "") + op.register.name + for op in chain( + instruction_form.semantic_operands.destination, + instruction_form.semantic_operands.src_dst, + ) + if "register" in op + ] isa_data = self._isa_model.get_instruction( instruction_form["instruction"], instruction_form["operands"] ) @@ -162,50 +176,50 @@ class ISASemantics(object): if only_postindexed: for o in instruction_form.operands: - if 'post_indexed' in o.get('memory', {}): - base_name = o.memory.base.get('prefix', '') + o.memory.base.name - return {base_name: { - 'name': o.memory.base.get('prefix', '') + o.memory.base.name, - 'value': o.memory.post_indexed.value - }} + if "post_indexed" in o.get("memory", {}): + base_name = o.memory.base.get("prefix", "") + o.memory.base.name + return { + base_name: { + "name": o.memory.base.get("prefix", "") + o.memory.base.name, + "value": o.memory.post_indexed.value, + } + } return {} reg_operand_names = {} # e.g., {'rax': 'op1'} operand_state = {} # e.g., {'op1': {'name': 'rax', 'value': 0}} 0 means unchanged for o in instruction_form.operands: - if 'pre_indexed' in o.get('memory', {}): + if "pre_indexed" in o.get("memory", {}): # Assuming no isa_data.operation if isa_data.get("operation", None) is not None: raise ValueError( "ISA information for pre-indexed instruction {!r} has operation set." - "This is currently not supprted.".format(instruction_form.line)) - base_name = o.memory.base.get('prefix', '') + o.memory.base.name - reg_operand_names = {base_name: 'op1'} - operand_state = {'op1': { - 'name': base_name, - 'value': o.memory.offset.value - }} + "This is currently not supprted.".format(instruction_form.line) + ) + base_name = o.memory.base.get("prefix", "") + o.memory.base.name + reg_operand_names = {base_name: "op1"} + operand_state = {"op1": {"name": base_name, "value": o.memory.offset.value}} - if isa_data is not None and 'operation' in isa_data: + if isa_data is not None and "operation" in isa_data: for i, o in enumerate(instruction_form.operands): operand_name = "op{}".format(i + 1) if "register" in o: - o_reg_name = o["register"].get('prefix', '') + o["register"]["name"] + o_reg_name = o["register"].get("prefix", "") + o["register"]["name"] reg_operand_names[o_reg_name] = operand_name - operand_state[operand_name] = { - 'name': o_reg_name, - 'value': 0} + operand_state[operand_name] = {"name": o_reg_name, "value": 0} elif "immediate" in o: - operand_state[operand_name] = {'value': o["immediate"]["value"]} + operand_state[operand_name] = {"value": o["immediate"]["value"]} elif "memory" in o: # TODO lea needs some thinking about pass - operand_changes = exec(isa_data['operation'], {}, operand_state) + exec(isa_data["operation"], {}, operand_state) - change_dict = {reg_name: operand_state.get(reg_operand_names.get(reg_name)) - for reg_name in dest_reg_names} + change_dict = { + reg_name: operand_state.get(reg_operand_names.get(reg_name)) + for reg_name in dest_reg_names + } return change_dict def _apply_found_ISA_data(self, isa_data, operands): @@ -231,8 +245,10 @@ class ISASemantics(object): if "hidden_operands" in isa_data: op_dict["destination"] += [ AttrDict.convert_dict( - {hop["class"]: {k: hop[k] for k in ["class", "source", "destination"]}}) - for hop in isa_data["hidden_operands"]] + {hop["class"]: {k: hop[k] for k in ["class", "source", "destination"]}} + ) + for hop in isa_data["hidden_operands"] + ] return op_dict for i, op in enumerate(isa_data["operands"]): diff --git a/osaca/semantics/kernel_dg.py b/osaca/semantics/kernel_dg.py index b3a8af6..e95034e 100755 --- a/osaca/semantics/kernel_dg.py +++ b/osaca/semantics/kernel_dg.py @@ -16,7 +16,12 @@ class KernelDG(nx.DiGraph): INSTRUCTION_THRESHOLD = 50 def __init__( - self, parsed_kernel, parser, hw_model: MachineModel, semantics: ArchSemantics, timeout=10 + self, + parsed_kernel, + parser, + hw_model: MachineModel, + semantics: ArchSemantics, + timeout=10, ): self.timed_out = False self.kernel = parsed_kernel @@ -73,7 +78,7 @@ class KernelDG(nx.DiGraph): else instruction_form["latency_wo_load"] ) if "storeload_dep" in dep_flags: - edge_weight += self.model.get('store_to_load_forward_latency', 0) + edge_weight += self.model.get("store_to_load_forward_latency", 0) dg.add_edge( instruction_form["line_number"], dep["line_number"], @@ -98,7 +103,7 @@ class KernelDG(nx.DiGraph): tmp_kernel = [] + kernel for orig_iform in kernel: temp_iform = copy.copy(orig_iform) - temp_iform['line_number'] += offset + temp_iform["line_number"] += offset tmp_kernel.append(temp_iform) # get dependency graph dg = self.create_DG(tmp_kernel) @@ -118,12 +123,15 @@ class KernelDG(nx.DiGraph): with Manager() as manager: all_paths = manager.list() processes = [ - Process(target=self._extend_path, args=(all_paths, instr_section, dg, offset)) + Process( + target=self._extend_path, + args=(all_paths, instr_section, dg, offset), + ) for instr_section in instrs ] for p in processes: p.start() - if (timeout == -1): + if timeout == -1: # no timeout for p in processes: p.join() @@ -162,7 +170,7 @@ class KernelDG(nx.DiGraph): # extend path by edge bound latencies (e.g., store-to-load latency) lat_path = [] for s, d in nx.utils.pairwise(path): - edge_lat = dg.edges[s, d]['latency'] + edge_lat = dg.edges[s, d]["latency"] # map source node back to original line numbers if s >= offset: s -= offset @@ -310,17 +318,17 @@ class KernelDG(nx.DiGraph): if change is None or reg_state.get(reg, {}) is None: reg_state[reg] = None else: - reg_state.setdefault(reg, {'name': reg, 'value': 0}) - if change['name'] != reg: + reg_state.setdefault(reg, {"name": reg, "value": 0}) + if change["name"] != reg: # renaming occured, ovrwrite value with up-to-now change of source register - reg_state[reg]['name'] = change['name'] - src_reg_state = reg_state.get(change['name'], {'value': 0}) + reg_state[reg]["name"] = change["name"] + src_reg_state = reg_state.get(change["name"], {"value": 0}) if src_reg_state is None: # original register's state was changed beyond reconstruction reg_state[reg] = None continue - reg_state[reg]['value'] = src_reg_state['value'] - reg_state[reg]['value'] += change['value'] + reg_state[reg]["value"] = src_reg_state["value"] + reg_state[reg]["value"] += change["value"] return reg_state def get_dependent_instruction_forms(self, instr_form=None, line_number=None): @@ -340,7 +348,8 @@ class KernelDG(nx.DiGraph): if instruction_form.semantic_operands is None: return is_read for src in chain( - instruction_form.semantic_operands.source, instruction_form.semantic_operands.src_dst + instruction_form.semantic_operands.source, + instruction_form.semantic_operands.src_dst, ): if "register" in src: is_read = self.parser.is_reg_dependend_of(register, src.register) or is_read @@ -372,7 +381,8 @@ class KernelDG(nx.DiGraph): if instruction_form.semantic_operands is None: return False for src in chain( - instruction_form.semantic_operands.source, instruction_form.semantic_operands.src_dst + instruction_form.semantic_operands.source, + instruction_form.semantic_operands.src_dst, ): # Here we check for mem dependecies only if "memory" not in src: @@ -387,23 +397,23 @@ class KernelDG(nx.DiGraph): addr_change -= mem.offset.value if mem.base and src.base: base_change = register_changes.get( - src.base.get('prefix', '') + src.base.name, - {'name': src.base.get('prefix', '') + src.base.name, 'value': 0}, + src.base.get("prefix", "") + src.base.name, + {"name": src.base.get("prefix", "") + src.base.name, "value": 0}, ) if base_change is None: # Unknown change occurred continue - if mem.base.get('prefix', '') + mem.base['name'] != base_change['name']: + if mem.base.get("prefix", "") + mem.base["name"] != base_change["name"]: # base registers do not match continue - addr_change += base_change['value'] + addr_change += base_change["value"] elif mem.base or src.base: # base registers do not match continue if mem.index and src.index: index_change = register_changes.get( - src.index.get('prefix', '') + src.index.name, - {'name': src.index.get('prefix', '') + src.index.name, 'value': 0}, + src.index.get("prefix", "") + src.index.name, + {"name": src.index.get("prefix", "") + src.index.name, "value": 0}, ) if index_change is None: # Unknown change occurred @@ -411,10 +421,10 @@ class KernelDG(nx.DiGraph): if mem.scale != src.scale: # scale factors do not match continue - if mem.index.get('prefix', '') + mem.index['name'] != index_change['name']: + if mem.index.get("prefix", "") + mem.index["name"] != index_change["name"]: # index registers do not match continue - addr_change += index_change['value'] * src.scale + addr_change += index_change["value"] * src.scale elif mem.index or src.index: # index registers do not match continue @@ -443,7 +453,8 @@ class KernelDG(nx.DiGraph): ) # Check also for possible pre- or post-indexing in memory addresses for src in chain( - instruction_form.semantic_operands.source, instruction_form.semantic_operands.src_dst + instruction_form.semantic_operands.source, + instruction_form.semantic_operands.src_dst, ): if "memory" in src: if "pre_indexed" in src.memory or "post_indexed" in src.memory: diff --git a/osaca/utils.py b/osaca/utils.py index c235534..ecd2eab 100644 --- a/osaca/utils.py +++ b/osaca/utils.py @@ -1,7 +1,10 @@ #!/usr/bin/env python3 import os.path -DATA_DIRS = [os.path.expanduser("~/.osaca/data"), os.path.join(os.path.dirname(__file__), "data")] +DATA_DIRS = [ + os.path.expanduser("~/.osaca/data"), + os.path.join(os.path.dirname(__file__), "data"), +] CACHE_DIR = os.path.expanduser("~/.osaca/cache") diff --git a/setup.py b/setup.py index e26528b..df74dc6 100755 --- a/setup.py +++ b/setup.py @@ -18,7 +18,8 @@ here = os.path.abspath(os.path.dirname(__file__)) # Stolen from pip def read(*names, **kwargs): with io.open( - os.path.join(os.path.dirname(__file__), *names), encoding=kwargs.get("encoding", "utf8") + os.path.join(os.path.dirname(__file__), *names), + encoding=kwargs.get("encoding", "utf8"), ) as fp: return fp.read() @@ -38,13 +39,20 @@ def _run_build_cache(dir): # This is run inside the install staging directory (that had no .pyc files) # We don't want to generate any. # https://github.com/eliben/pycparser/pull/135 - check_call([sys.executable, "-B", "_build_cache.py"], cwd=os.path.join(dir, "osaca", "data")) + check_call( + [sys.executable, "-B", "_build_cache.py"], + cwd=os.path.join(dir, "osaca", "data"), + ) class install(_install): def run(self): _install.run(self) - self.execute(_run_build_cache, (self.install_lib,), msg="Build ISA and architecture cache") + self.execute( + _run_build_cache, + (self.install_lib,), + msg="Build ISA and architecture cache", + ) class sdist(_sdist): diff --git a/tests/test_cli.py b/tests/test_cli.py index 10a449c..8ab1f41 100755 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -33,7 +33,13 @@ class TestCLI(unittest.TestCase): with self.assertRaises(ValueError): osaca.check_arguments(args, parser) args = parser.parse_args( - ["--arch", "csx", "--import", "WRONG_BENCH", self._find_file("gs", "csx", "gcc")] + [ + "--arch", + "csx", + "--import", + "WRONG_BENCH", + self._find_file("gs", "csx", "gcc"), + ] ) with self.assertRaises(ValueError): osaca.check_arguments(args, parser) @@ -65,7 +71,13 @@ class TestCLI(unittest.TestCase): def test_check_db(self): parser = osaca.create_parser(parser=ErrorRaisingArgumentParser()) args = parser.parse_args( - ["--arch", "tx2", "--db-check", "--verbose", self._find_test_file("triad_x86_iaca.s")] + [ + "--arch", + "tx2", + "--db-check", + "--verbose", + self._find_test_file("triad_x86_iaca.s"), + ] ) output = StringIO() osaca.run(args, output_file=output) @@ -134,7 +146,13 @@ class TestCLI(unittest.TestCase): for c in comps[a]: with self.subTest(kernel=k, arch=a, comp=c): args = parser.parse_args( - ["--arch", a, self._find_file(k, a, c), "--export-graph", "/dev/null"] + [ + "--arch", + a, + self._find_file(k, a, c), + "--export-graph", + "/dev/null", + ] ) output = StringIO() osaca.run(args, output_file=output) @@ -204,17 +222,13 @@ class TestCLI(unittest.TestCase): ) output = StringIO() osaca.run(args, output_file=output) - self.assertTrue( - output.getvalue().count("WARNING: LCD analysis timed out") == 1 - ) + self.assertTrue(output.getvalue().count("WARNING: LCD analysis timed out") == 1) args = parser.parse_args( ["--ignore-unknown", "--lcd-timeout", "-1", self._find_test_file(kernel)] ) output = StringIO() osaca.run(args, output_file=output) - self.assertTrue( - output.getvalue().count("WARNING: LCD analysis timed out") == 0 - ) + self.assertTrue(output.getvalue().count("WARNING: LCD analysis timed out") == 0) def test_lines_arg(self): # Run tests with --lines option @@ -227,12 +241,24 @@ class TestCLI(unittest.TestCase): args = [] args.append( parser.parse_args( - ["--lines", "146-154", "--arch", "csx", self._find_test_file(kernel_x86)] + [ + "--lines", + "146-154", + "--arch", + "csx", + self._find_test_file(kernel_x86), + ] ) ) args.append( parser.parse_args( - ["--lines", "146:154", "--arch", "csx", self._find_test_file(kernel_x86)] + [ + "--lines", + "146:154", + "--arch", + "csx", + self._find_test_file(kernel_x86), + ] ) ) args.append( diff --git a/tests/test_db_interface.py b/tests/test_db_interface.py index 7678ad0..a58a7a3 100755 --- a/tests/test_db_interface.py +++ b/tests/test_db_interface.py @@ -17,7 +17,13 @@ class TestDBInterface(unittest.TestCase): sample_entry = { "name": "DoItRightAndDoItFast", "operands": [ - {"class": "memory", "offset": "imd", "base": "gpr", "index": "gpr", "scale": 8}, + { + "class": "memory", + "offset": "imd", + "base": "gpr", + "index": "gpr", + "scale": 8, + }, {"class": "register", "name": "xmm"}, ], "throughput": 1.25, @@ -35,7 +41,12 @@ class TestDBInterface(unittest.TestCase): del self.entry_tx2["operands"][1]["name"] self.entry_tx2["operands"][1]["prefix"] = "x" # self.entry_zen1['port_pressure'] = [1, 1, 1, 1, 0, 1, 0, 0, 0, 0.5, 1, 0.5, 1] - self.entry_zen1["port_pressure"] = [[4, "0123"], [1, "4"], [1, "89"], [2, ["8D", "9D"]]] + self.entry_zen1["port_pressure"] = [ + [4, "0123"], + [1, "4"], + [1, "89"], + [2, ["8D", "9D"]], + ] ########### # Tests diff --git a/tests/test_files/kernel_x86_memdep.s b/tests/test_files/kernel_x86_memdep.s index bb9789e..cb1c1fe 100644 --- a/tests/test_files/kernel_x86_memdep.s +++ b/tests/test_files/kernel_x86_memdep.s @@ -1,15 +1,15 @@ # OSACA-BEGIN .L4: - vmovsd %xmm0, 8(%rax) - addq $8, %rax - vmovsd %xmm0, 8(%rax,%rcx,8) - vaddsd (%rax), %xmm0, %xmm0 # depends on line 3, 8(%rax) == (%rax+8) - subq $-8, %rax - vaddsd -8(%rax), %xmm0, %xmm0 # depends on line 3, 8(%rax) == -8(%rax+16) - dec %rcx - vaddsd 8(%rax,%rcx,8), %xmm0, %xmm0 # depends on line 5, 8(%rax,%rdx,8) == 8(%rax+8,%rdx-1,8) - movq %rcx, %rdx - vaddsd 8(%rax,%rdx,8), %xmm0, %xmm0 # depends on line 5, 8(%rax,%rdx,8) == 8(%rax+8,%rdx-1,8) + vmovsd %xmm0, 8(%rax) # line 3 <----------------------------------+ + addq $8, %rax # | + vmovsd %xmm0, 8(%rax,%rcx,8) # line 5 <-----------------------------------------------+ + vaddsd (%rax), %xmm0, %xmm0 # depends on line 3, 8(%rax) == (%rax+8) ---+ | + subq $-8, %rax # | | + vaddsd -8(%rax), %xmm0, %xmm0 # depends on line 3, 8(%rax) == -8(%rax+16) ---+ | + dec %rcx # | + vaddsd 8(%rax,%rcx,8), %xmm0, %xmm0 # depends on line 5, 8(%rax,%rdx,8) == 8(%rax+8,%rdx-1,8) --+ + movq %rcx, %rdx # | + vaddsd 8(%rax,%rdx,8), %xmm0, %xmm0 # depends on line 5, 8(%rax,%rdx,8) == 8(%rax+8,%rdx-1,8) --+ vmulsd %xmm1, %xmm0, %xmm0 addq $8, %rax cmpq %rsi, %rax diff --git a/tests/test_frontend.py b/tests/test_frontend.py index 3ab0441..30c7a46 100755 --- a/tests/test_frontend.py +++ b/tests/test_frontend.py @@ -34,7 +34,8 @@ class TestFrontend(unittest.TestCase): ) self.machine_model_tx2 = MachineModel(arch="tx2") self.semantics_csx = ArchSemantics( - self.machine_model_csx, path_to_yaml=os.path.join(self.MODULE_DATA_DIR, "isa/x86.yml") + self.machine_model_csx, + path_to_yaml=os.path.join(self.MODULE_DATA_DIR, "isa/x86.yml"), ) self.semantics_tx2 = ArchSemantics( self.machine_model_tx2, @@ -71,7 +72,11 @@ class TestFrontend(unittest.TestCase): def test_frontend_AArch64(self): dg = KernelDG( - self.kernel_AArch64, self.parser_AArch64, self.machine_model_tx2, self.semantics_tx2) + self.kernel_AArch64, + self.parser_AArch64, + self.machine_model_tx2, + self.semantics_tx2, + ) fe = Frontend(path_to_yaml=os.path.join(self.MODULE_DATA_DIR, "tx2.yml")) fe.full_analysis(self.kernel_AArch64, dg, verbose=True) # TODO compare output with checked string diff --git a/tests/test_marker_utils.py b/tests/test_marker_utils.py index 5d38324..d843ec7 100755 --- a/tests/test_marker_utils.py +++ b/tests/test_marker_utils.py @@ -109,7 +109,8 @@ class TestMarkerUtils(unittest.TestCase): kernel_start = len( list( filter( - None, (prologue + mov_start_var + bytes_var_1).split("\n") + None, + (prologue + mov_start_var + bytes_var_1).split("\n"), ) ) ) @@ -142,7 +143,12 @@ class TestMarkerUtils(unittest.TestCase): epilogue = ".LE9:\t\t#12.2\n" "call dummy\n" kernel_length = len(list(filter(None, kernel.split("\n")))) - bytes_variations = [bytes_1_line, bytes_2_lines_1, bytes_2_lines_2, bytes_3_lines] + bytes_variations = [ + bytes_1_line, + bytes_2_lines_1, + bytes_2_lines_2, + bytes_3_lines, + ] mov_start_variations = [mov_start_1, mov_start_2] mov_end_variations = [mov_end_1, mov_end_2] # actual tests @@ -171,7 +177,8 @@ class TestMarkerUtils(unittest.TestCase): kernel_start = len( list( filter( - None, (prologue + mov_start_var + bytes_var_1).split("\n") + None, + (prologue + mov_start_var + bytes_var_1).split("\n"), ) ) ) diff --git a/tests/test_parser_AArch64.py b/tests/test_parser_AArch64.py index 9511574..fdcf7f1 100755 --- a/tests/test_parser_AArch64.py +++ b/tests/test_parser_AArch64.py @@ -24,7 +24,9 @@ class TestParserAArch64(unittest.TestCase): def test_comment_parser(self): self.assertEqual(self._get_comment(self.parser, "// some comments"), "some comments") - self.assertEqual(self._get_comment(self.parser, "\t\t//AA BB CC \t end \t"), "AA BB CC end") + self.assertEqual( + self._get_comment(self.parser, "\t\t//AA BB CC \t end \t"), "AA BB CC end" + ) self.assertEqual( self._get_comment(self.parser, "\t//// comment //// comment"), "// comment //// comment", @@ -36,7 +38,8 @@ class TestParserAArch64(unittest.TestCase): self.assertEqual(self._get_label(self.parser, ".2.3_2_pack.3:").name, ".2.3_2_pack.3") self.assertEqual(self._get_label(self.parser, ".L1:\t\t\t//label1").name, ".L1") self.assertEqual( - " ".join(self._get_label(self.parser, ".L1:\t\t\t//label1").comment), "label1" + " ".join(self._get_label(self.parser, ".L1:\t\t\t//label1").comment), + "label1", ) with self.assertRaises(ParseException): self._get_label(self.parser, "\t.cfi_startproc") @@ -316,7 +319,8 @@ class TestParserAArch64(unittest.TestCase): value1 = self.parser.normalize_imd(imd_decimal_1) self.assertEqual(value1, self.parser.normalize_imd(imd_hex_1)) self.assertEqual( - self.parser.normalize_imd(imd_decimal_2), self.parser.normalize_imd(imd_hex_2) + self.parser.normalize_imd(imd_decimal_2), + self.parser.normalize_imd(imd_hex_2), ) self.assertEqual(self.parser.normalize_imd(imd_float_11), value1) self.assertEqual(self.parser.normalize_imd(imd_float_12), value1) diff --git a/tests/test_parser_x86att.py b/tests/test_parser_x86att.py index 57b2e71..1b47849 100755 --- a/tests/test_parser_x86att.py +++ b/tests/test_parser_x86att.py @@ -26,7 +26,8 @@ class TestParserX86ATT(unittest.TestCase): self.assertEqual(self._get_comment(self.parser, "# some comments"), "some comments") self.assertEqual(self._get_comment(self.parser, "\t\t#AA BB CC \t end \t"), "AA BB CC end") self.assertEqual( - self._get_comment(self.parser, "\t## comment ## comment"), "# comment ## comment" + self._get_comment(self.parser, "\t## comment ## comment"), + "# comment ## comment", ) def test_label_parser(self): @@ -35,7 +36,8 @@ class TestParserX86ATT(unittest.TestCase): self.assertEqual(self._get_label(self.parser, ".2.3_2_pack.3:").name, ".2.3_2_pack.3") self.assertEqual(self._get_label(self.parser, ".L1:\t\t\t#label1").name, ".L1") self.assertEqual( - " ".join(self._get_label(self.parser, ".L1:\t\t\t#label1").comment), "label1" + " ".join(self._get_label(self.parser, ".L1:\t\t\t#label1").comment), + "label1", ) with self.assertRaises(ParseException): self._get_label(self.parser, "\t.cfi_startproc") @@ -47,7 +49,8 @@ class TestParserX86ATT(unittest.TestCase): self.assertEqual(len(self._get_directive(self.parser, "\t.align\t16,0x90").parameters), 2) self.assertEqual(len(self._get_directive(self.parser, ".text").parameters), 0) self.assertEqual( - len(self._get_directive(self.parser, '.file\t1 "path/to/file.c"').parameters), 2 + len(self._get_directive(self.parser, '.file\t1 "path/to/file.c"').parameters), + 2, ) self.assertEqual( self._get_directive(self.parser, '.file\t1 "path/to/file.c"').parameters[1], @@ -62,7 +65,12 @@ class TestParserX86ATT(unittest.TestCase): self.parser, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support", ).parameters, - ["__TEXT", "__eh_frame", "coalesced", "no_toc+strip_static_syms+live_support"], + [ + "__TEXT", + "__eh_frame", + "coalesced", + "no_toc+strip_static_syms+live_support", + ], ) self.assertEqual( self._get_directive( @@ -74,7 +82,9 @@ class TestParserX86ATT(unittest.TestCase): self._get_directive(self.parser, "\t.align\t16,0x90").parameters[1], "0x90" ) self.assertEqual( - self._get_directive(self.parser, " .byte 100,103,144 #IACA START")["name"], + self._get_directive(self.parser, " .byte 100,103,144 #IACA START")[ + "name" + ], "byte", ) self.assertEqual( @@ -242,10 +252,12 @@ class TestParserX86ATT(unittest.TestCase): imd_decimal_2 = {"value": "8"} imd_hex_2 = {"value": "8"} self.assertEqual( - self.parser.normalize_imd(imd_decimal_1), self.parser.normalize_imd(imd_hex_1) + self.parser.normalize_imd(imd_decimal_1), + self.parser.normalize_imd(imd_hex_1), ) self.assertEqual( - self.parser.normalize_imd(imd_decimal_2), self.parser.normalize_imd(imd_hex_2) + self.parser.normalize_imd(imd_decimal_2), + self.parser.normalize_imd(imd_hex_2), ) def test_reg_dependency(self): diff --git a/tests/test_semantics.py b/tests/test_semantics.py index 46c58d6..54e851f 100755 --- a/tests/test_semantics.py +++ b/tests/test_semantics.py @@ -11,8 +11,14 @@ from copy import deepcopy import networkx as nx from osaca.osaca import get_unmatched_instruction_ratio from osaca.parser import AttrDict, ParserAArch64, ParserX86ATT -from osaca.semantics import (INSTR_FLAGS, ArchSemantics, ISASemantics, - KernelDG, MachineModel, reduce_to_section) +from osaca.semantics import ( + INSTR_FLAGS, + ArchSemantics, + ISASemantics, + KernelDG, + MachineModel, + reduce_to_section, +) class TestSemanticTools(unittest.TestCase): @@ -66,7 +72,8 @@ class TestSemanticTools(unittest.TestCase): ) cls.semantics_x86 = ISASemantics("x86") cls.semantics_csx = ArchSemantics( - cls.machine_model_csx, path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/x86.yml") + cls.machine_model_csx, + path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/x86.yml"), ) cls.semantics_aarch64 = ISASemantics("aarch64") cls.semantics_tx2 = ArchSemantics( @@ -173,7 +180,12 @@ class TestSemanticTools(unittest.TestCase): ) self.assertEqual( test_mm_x86.get_store_throughput( - {"base": {"prefix": "NOT_IN_DB"}, "offset": None, "index": "NOT_NONE", "scale": 1} + { + "base": {"prefix": "NOT_IN_DB"}, + "offset": None, + "index": "NOT_NONE", + "scale": 1, + } ), [[1, "23"], [1, "4"]], ) @@ -185,7 +197,12 @@ class TestSemanticTools(unittest.TestCase): ) self.assertEqual( test_mm_arm.get_store_throughput( - {"base": {"prefix": "NOT_IN_DB"}, "offset": None, "index": None, "scale": 1} + { + "base": {"prefix": "NOT_IN_DB"}, + "offset": None, + "index": None, + "scale": 1, + } ), [[1, "34"], [1, "5"]], ) @@ -310,7 +327,10 @@ class TestSemanticTools(unittest.TestCase): def test_memdependency_x86(self): dg = KernelDG( - self.kernel_x86_memdep, self.parser_x86, self.machine_model_csx, self.semantics_csx + self.kernel_x86_memdep, + self.parser_x86, + self.machine_model_csx, + self.semantics_csx, ) self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg)) self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=3)), {6, 8}) @@ -322,7 +342,10 @@ class TestSemanticTools(unittest.TestCase): def test_kernelDG_AArch64(self): dg = KernelDG( - self.kernel_AArch64, self.parser_AArch64, self.machine_model_tx2, self.semantics_tx2 + self.kernel_AArch64, + self.parser_AArch64, + self.machine_model_tx2, + self.semantics_tx2, ) self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg)) self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=3)), {7, 8}) @@ -400,7 +423,7 @@ class TestSemanticTools(unittest.TestCase): # based on line 6 self.assertEqual(lc_deps[6]["latency"], 28.0) self.assertEqual( - [(iform.line_number, lat) for iform, lat in lc_deps[6]['dependencies']], + [(iform.line_number, lat) for iform, lat in lc_deps[6]["dependencies"]], [(6, 4.0), (10, 6.0), (11, 6.0), (12, 6.0), (13, 6.0), (14, 0)], ) @@ -423,7 +446,8 @@ class TestSemanticTools(unittest.TestCase): # w/o flag dependencies: ID 5 w/ len=1 # TODO discuss self.assertEqual( - lc_deps[lcd_id2]["root"], dg.dg.nodes(data=True)[lcd_id2]["instruction_form"] + lc_deps[lcd_id2]["root"], + dg.dg.nodes(data=True)[lcd_id2]["instruction_form"], ) self.assertEqual(len(lc_deps[lcd_id2]["dependencies"]), 1) self.assertEqual( @@ -438,7 +462,7 @@ class TestSemanticTools(unittest.TestCase): self.parser_x86, self.machine_model_csx, self.semantics_x86, - timeout=10 + timeout=10, ) end_time = time.perf_counter() time_10 = end_time - start_time @@ -448,7 +472,7 @@ class TestSemanticTools(unittest.TestCase): self.parser_x86, self.machine_model_csx, self.semantics_x86, - timeout=2 + timeout=2, ) end_time = time.perf_counter() time_2 = end_time - start_time diff --git a/validation/build_and_run.py b/validation/build_and_run.py index 313b369..6e7775b 100755 --- a/validation/build_and_run.py +++ b/validation/build_and_run.py @@ -1,33 +1,26 @@ #!/usr/bin/env python3 -import sys import os -import re -from subprocess import check_call, check_output, CalledProcessError, STDOUT -from itertools import chain -import shutil -from functools import lru_cache -from glob import glob -from pathlib import Path -from pprint import pprint -import socket import pickle +import re +import shutil +import socket +import sys from copy import deepcopy +from glob import glob +from itertools import chain +from pathlib import Path +from subprocess import STDOUT, CalledProcessError, check_call, check_output import requests -import numpy as np -import pandas as pd - -from osaca.osaca import reduce_to_section - -from kerncraft.models import benchmark from kerncraft.incore_model import ( - parse_asm, asm_instrumentation, iaca_analyse_instrumented_binary, + llvm_mca_analyse_instrumented_assembly, osaca_analyse_instrumented_assembly, - llvm_mca_analyse_instrumented_assembly + parse_asm, ) - +from kerncraft.models import benchmark +from osaca.osaca import reduce_to_section # Scaling of inner dimension for 1D, 2D and 3D kernels # * consider kernels to be compiled with multiple compilers and different options @@ -39,37 +32,50 @@ from kerncraft.incore_model import ( # Collect inner loop body assembly for each kernel/compiler/options combination # * analyze with OSACA, IACA and LLVM-MCA -hosts_arch_map = {r"skylakesp2": "SKX", - r"ivyep1": "IVB", - r"naples1": "ZEN", - r"rome1": "ZEN2", - r"warmup": "TX2", - r"qp4-node-[0-9]+": "A64FX"} +hosts_arch_map = { + r"skylakesp2": "SKX", + r"ivyep1": "IVB", + r"naples1": "ZEN", + r"rome1": "ZEN2", + r"warmup": "TX2", + r"qp4-node-[0-9]+": "A64FX", +} arch_info = { - 'SKX': { - 'prepare': ['likwid-setFrequencies -f 2.4 -t 0'.split()], - 'IACA': 'SKX', - 'OSACA': 'SKX', - 'LLVM-MCA': '-mcpu=skylake-avx512', - 'Ithemal': 'skl', - 'isa': 'x86', - 'perfevents': [], + "SKX": { + "prepare": ["likwid-setFrequencies -f 2.4 -t 0".split()], + "IACA": "SKX", + "OSACA": "SKX", + "LLVM-MCA": "-mcpu=skylake-avx512", + "Ithemal": "skl", + "isa": "x86", + "perfevents": [], "cflags": { - 'icc': { - "Ofast": "-Ofast -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline -ffreestanding -falign-loops".split(), - "O3": "-O3 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline -ffreestanding -falign-loops".split(), - "O2": "-O2 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline -ffreestanding -falign-loops".split(), - "O1": "-O1 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline -ffreestanding -falign-loops".split(), + "icc": { + "Ofast": ( + "-Ofast -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline " + "-ffreestanding -falign-loops" + ).split(), + "O3": ( + "-O3 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline " + "-ffreestanding -falign-loops" + ).split(), + "O2": ( + "-O2 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline " + "-ffreestanding -falign-loops" + ).split(), + "O1": ( + "-O1 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline " + "-ffreestanding -falign-loops" + ).split(), }, - 'clang': { + "clang": { "Ofast": "-Ofast -march=skylake-avx512 -ffreestanding".split(), "O3": "-O3 -march=skylake-avx512 -ffreestanding".split(), "O2": "-O2 -march=skylake-avx512 -ffreestanding".split(), "O1": "-O1 -march=skylake-avx512 -ffreestanding".split(), - }, - 'gcc': { + "gcc": { "Ofast": "-Ofast -march=skylake-avx512 -lm -ffreestanding -falign-loops=16".split(), "O3": "-O3 -march=skylake-avx512 -lm -ffreestanding -falign-loops=16".split(), "O2": "-O2 -march=skylake-avx512 -lm -ffreestanding -falign-loops=16".split(), @@ -77,17 +83,19 @@ arch_info = { }, }, }, - 'IVB': { - 'prepare': ['likwid-setFrequencies -f 3.0 -t 0'.split()], - 'IACA': 'IVB', - 'OSACA': 'IVB', - 'LLVM-MCA': '-mcpu=ivybridge', - 'Ithemal': 'ivb', - 'isa': 'x86', - 'perfevents': [], + "IVB": { + "prepare": ["likwid-setFrequencies -f 3.0 -t 0".split()], + "IACA": "IVB", + "OSACA": "IVB", + "LLVM-MCA": "-mcpu=ivybridge", + "Ithemal": "ivb", + "isa": "x86", + "perfevents": [], "cflags": { "icc": { - "Ofast": "-Ofast -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops".split(), + "Ofast": ( + "-Ofast -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops" + ).split(), "O3": "-O3 -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops".split(), "O2": "-O2 -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops".split(), "O1": "-O1 -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops".split(), @@ -106,14 +114,14 @@ arch_info = { }, }, }, - 'ZEN': { - 'prepare': ['likwid-setFrequencies -f 2.3 -t 0'.split()], - 'IACA': None, - 'OSACA': 'ZEN1', - 'LLVM-MCA': '-mcpu=znver1', - 'Ithemal': None, - 'isa': 'x86', - 'perfevents': [], + "ZEN": { + "prepare": ["likwid-setFrequencies -f 2.3 -t 0".split()], + "IACA": None, + "OSACA": "ZEN1", + "LLVM-MCA": "-mcpu=znver1", + "Ithemal": None, + "isa": "x86", + "perfevents": [], "cflags": { "clang": { "Ofast": "-Ofast -march=znver1 -ffreestanding".split(), @@ -128,21 +136,23 @@ arch_info = { "O1": "-O1 -march=znver1 -ffreestanding -falign-loops=16".split(), }, "icc": { - "Ofast": "-Ofast -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(), + "Ofast": ( + "-Ofast -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops" + ).split(), "O3": "-O3 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(), "O2": "-O2 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(), "O1": "-O1 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(), }, }, }, - 'ZEN2': { - 'prepare': ['likwid-setFrequencies -f 2.35 -t 0'.split()], - 'IACA': None, - 'OSACA': 'ZEN2', - 'LLVM-MCA': '-mcpu=znver2', - 'Ithemal': None, - 'isa': 'x86', - 'perfevents': [], + "ZEN2": { + "prepare": ["likwid-setFrequencies -f 2.35 -t 0".split()], + "IACA": None, + "OSACA": "ZEN2", + "LLVM-MCA": "-mcpu=znver2", + "Ithemal": None, + "isa": "x86", + "perfevents": [], "cflags": { "clang": { "Ofast": "-Ofast -march=znver2 -ffreestanding".split(), @@ -157,22 +167,24 @@ arch_info = { "O1": "-O1 -march=znver2 -ffreestanding -falign-loops=16".split(), }, "icc": { - "Ofast": "-Ofast -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(), + "Ofast": ( + "-Ofast -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops" + ).split(), "O3": "-O3 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(), "O2": "-O2 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(), "O1": "-O1 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(), }, }, }, - 'TX2': { - 'Clock [MHz]': 2200, # reading out via perf. counters is not supported - 'IACA': None, - 'OSACA': 'TX2', - 'assign_optimal_throughput': True, - 'LLVM-MCA': '-mcpu=thunderx2t99 -march=aarch64', - 'Ithemal': None, - 'isa': 'aarch64', - 'perfevents': [], + "TX2": { + "Clock [MHz]": 2200, # reading out via perf. counters is not supported + "IACA": None, + "OSACA": "TX2", + "assign_optimal_throughput": True, + "LLVM-MCA": "-mcpu=thunderx2t99 -march=aarch64", + "Ithemal": None, + "isa": "aarch64", + "perfevents": [], "cflags": { "clang": { "Ofast": "-Ofast -target aarch64-unknown-linux-gnu -ffreestanding".split(), @@ -188,16 +200,16 @@ arch_info = { }, }, }, - 'A64FX': { - 'Clock [MHz]': 1800, # reading out via perf. counters is not supported - 'L2_volume_metric': 'L1<->L2 data volume [GBytes]', - 'IACA': None, - 'OSACA': 'A64FX', - 'assign_optimal_throughput': False, - 'LLVM-MCA': '-mcpu=a64fx -march=aarch64', - 'Ithemal': None, - 'isa': 'aarch64', - 'perfevents': [], + "A64FX": { + "Clock [MHz]": 1800, # reading out via perf. counters is not supported + "L2_volume_metric": "L1<->L2 data volume [GBytes]", + "IACA": None, + "OSACA": "A64FX", + "assign_optimal_throughput": False, + "LLVM-MCA": "-mcpu=a64fx -march=aarch64", + "Ithemal": None, + "isa": "aarch64", + "perfevents": [], "cflags": { "gcc": { "Ofast": "-Ofast -msve-vector-bits=512 -march=armv8.2-a+sve -ffreestanding".split(), @@ -211,7 +223,7 @@ arch_info = { "O2": "-O2 -target aarch64-unknown-linux-gnu -ffreestanding".split(), "O1": "-O1 -target aarch64-unknown-linux-gnu -ffreestanding".split(), }, - } + }, }, } @@ -231,12 +243,13 @@ def get_kernels(kernels=None): if kernels is None: kernels = [] for f in glob("kernels/*.c"): - f = f.rsplit('.', 1)[0].split('/', 1)[1] + f = f.rsplit(".", 1)[0].split("/", 1)[1] if f == "dummy": continue kernels.append(f) return kernels + # Columns: # arch # kernel @@ -259,6 +272,7 @@ def get_kernels(kernels=None): # allruns [list (length, repetitions, cy/it, L2 B/it)] # perfevents [dict event: counter/it] + def build_mark_run_all_kernels(measurements=True, osaca=True, iaca=True, llvm_mca=True): arch = get_current_arch() if arch is None: @@ -268,90 +282,132 @@ def build_mark_run_all_kernels(measurements=True, osaca=True, iaca=True, llvm_mc islocal = True arches = [arch] ainfo = arch_info.get(arch) - if 'prepare' in ainfo: - for cmd in ainfo['prepare']: + if "prepare" in ainfo: + for cmd in ainfo["prepare"]: check_call(cmd) for arch in arches: ainfo = arch_info.get(arch) print(arch) data_path = Path(f"build/{arch}/data.pkl") if data_path.exists(): - with data_path.open('rb') as f: + with data_path.open("rb") as f: data = pickle.load(f) else: data = [] data_lastsaved = deepcopy(data) - for compiler, compiler_cflags in ainfo['cflags'].items(): + for compiler, compiler_cflags in ainfo["cflags"].items(): if not shutil.which(compiler) and islocal: print(compiler, "not found in path! Skipping...") continue for cflags_name, cflags in compiler_cflags.items(): for kernel in get_kernels(): - print(f"{kernel:<15} {arch:>5} {compiler:>5} {cflags_name:>6}", - end=": ", flush=True) - row = list([r for r in data - if r['arch'] == arch and r['kernel'] == kernel and - r['compiler'] == compiler and r['cflags_name'] == cflags_name]) + print( + f"{kernel:<15} {arch:>5} {compiler:>5} {cflags_name:>6}", + end=": ", + flush=True, + ) + row = list( + [ + r + for r in data + if r["arch"] == arch + and r["kernel"] == kernel + and r["compiler"] == compiler + and r["cflags_name"] == cflags_name + ] + ) if row: row = row[0] else: - orig_row = None row = { - 'arch': arch, - 'kernel': kernel, - 'compiler': compiler, - 'cflags_name': cflags_name, - 'element_size': 8, + "arch": arch, + "kernel": kernel, + "compiler": compiler, + "cflags_name": cflags_name, + "element_size": 8, } data.append(row) # Build print("build", end="", flush=True) asm_path, exec_path, overwrite = build_kernel( - kernel, arch, compiler, cflags, cflags_name, dontbuild=not islocal) + kernel, + arch, + compiler, + cflags, + cflags_name, + dontbuild=not islocal, + ) if overwrite: # clear all measurment information - row['best_length'] = None - row['best_runtime'] = None - row['L2_traffic'] = None - row['allruns'] = None - row['perfevents'] = None + row["best_length"] = None + row["best_runtime"] = None + row["L2_traffic"] = None + row["allruns"] = None + row["perfevents"] = None # Mark for IACA, OSACA and LLVM-MCA print("mark", end="", flush=True) try: - marked_asmfile, marked_objfile, row['pointer_increment'], overwrite = mark( - asm_path, compiler, cflags, isa=ainfo['isa'], overwrite=overwrite) - row['marking_error'] = None + ( + marked_asmfile, + marked_objfile, + row["pointer_increment"], + overwrite, + ) = mark( + asm_path, + compiler, + cflags, + isa=ainfo["isa"], + overwrite=overwrite, + ) + row["marking_error"] = None except ValueError as e: - row['marking_error'] = str(e) + row["marking_error"] = str(e) print(":", e) continue if overwrite: # clear all model generated information - for model in ['IACA', 'OSACA', 'LLVM-MCA', 'Ithemal']: - for k in ['ports', 'prediction', 'throughput', 'cp', 'lcd', 'raw']: - row[model+'_'+k] = None - - for model in ['IACA', 'OSACA', 'LLVM-MCA', 'Ithemal']: - for k in ['ports', 'prediction', 'throughput', 'cp', 'lcd', 'raw']: - if model+'_'+k not in row: - row[model+'_'+k] = None + for model in ["IACA", "OSACA", "LLVM-MCA", "Ithemal"]: + for k in [ + "ports", + "prediction", + "throughput", + "cp", + "lcd", + "raw", + ]: + row[model + "_" + k] = None + + for model in ["IACA", "OSACA", "LLVM-MCA", "Ithemal"]: + for k in [ + "ports", + "prediction", + "throughput", + "cp", + "lcd", + "raw", + ]: + if model + "_" + k not in row: + row[model + "_" + k] = None # Analyze with IACA, if requested and configured - if iaca and ainfo['IACA'] is not None: + if iaca and ainfo["IACA"] is not None: print("IACA", end="", flush=True) - if not row.get('IACA_ports'): - row['IACA_raw'] = iaca_analyse_instrumented_binary( - marked_objfile, micro_architecture=ainfo['IACA']) - row['IACA_ports'] = \ - {k: v/(row['pointer_increment']/row['element_size']) - for k,v in row['IACA_raw']['port cycles'].items()} - row['IACA_prediction'] = row['IACA_raw']['throughput']/( - row['pointer_increment']/row['element_size']) - row['IACA_throughput'] = max(row['IACA_ports'].values()) + if not row.get("IACA_ports"): + row["IACA_raw"] = iaca_analyse_instrumented_binary( + marked_objfile, micro_architecture=ainfo["IACA"] + ) + row["IACA_ports"] = { + k: v / (row["pointer_increment"] / row["element_size"]) + for k, v in row["IACA_raw"]["port cycles"].items() + } + row["IACA_prediction"] = row["IACA_raw"]["throughput"] / ( + row["pointer_increment"] / row["element_size"] + ) + row["IACA_throughput"] = max(row["IACA_ports"].values()) print(". ", end="", flush=True) else: print("! ", end="", flush=True) @@ -359,56 +415,70 @@ def build_mark_run_all_kernels(measurements=True, osaca=True, iaca=True, llvm_mc # Analyze with OSACA, if requested if osaca: print("OSACA", end="", flush=True) - if not row.get('OSACA_ports'): - row['OSACA_raw'] = osaca_analyse_instrumented_assembly( - marked_asmfile, micro_architecture=ainfo['OSACA'], - assign_optimal_throughput=ainfo.get('assign_optimal_throughput', - True)) - row['OSACA_ports'] = \ - {k: v/(row['pointer_increment']/row['element_size']) - for k,v in row['OSACA_raw']['port cycles'].items()} - row['OSACA_prediction'] = row['OSACA_raw']['throughput']/( - row['pointer_increment']/row['element_size']) - row['OSACA_throughput'] = max(row['OSACA_ports'].values()) - row['OSACA_cp'] = row['OSACA_raw']['cp_latency']/( - row['pointer_increment']/row['element_size']) - row['OSACA_lcd'] = row['OSACA_raw']['lcd']/( - row['pointer_increment']/row['element_size']) + if not row.get("OSACA_ports"): + row["OSACA_raw"] = osaca_analyse_instrumented_assembly( + marked_asmfile, + micro_architecture=ainfo["OSACA"], + assign_optimal_throughput=ainfo.get( + "assign_optimal_throughput", True + ), + ) + row["OSACA_ports"] = { + k: v / (row["pointer_increment"] / row["element_size"]) + for k, v in row["OSACA_raw"]["port cycles"].items() + } + row["OSACA_prediction"] = row["OSACA_raw"]["throughput"] / ( + row["pointer_increment"] / row["element_size"] + ) + row["OSACA_throughput"] = max(row["OSACA_ports"].values()) + row["OSACA_cp"] = row["OSACA_raw"]["cp_latency"] / ( + row["pointer_increment"] / row["element_size"] + ) + row["OSACA_lcd"] = row["OSACA_raw"]["lcd"] / ( + row["pointer_increment"] / row["element_size"] + ) print(". ", end="", flush=True) else: print("! ", end="", flush=True) # Analyze with LLVM-MCA, if requested and configured - if llvm_mca and ainfo['LLVM-MCA'] is not None: + if llvm_mca and ainfo["LLVM-MCA"] is not None: print("LLVM-MCA", end="", flush=True) - if not row.get('LLVM-MCA_ports'): - row['LLVM-MCA_raw'] = llvm_mca_analyse_instrumented_assembly( + if not row.get("LLVM-MCA_ports"): + row["LLVM-MCA_raw"] = llvm_mca_analyse_instrumented_assembly( marked_asmfile, - micro_architecture=ainfo['LLVM-MCA'], - isa=ainfo['isa']) - row['LLVM-MCA_ports'] = \ - {k: v/(row['pointer_increment']/row['element_size']) - for k,v in row['LLVM-MCA_raw']['port cycles'].items()} - row['LLVM-MCA_prediction'] =row['LLVM-MCA_raw']['throughput']/( - row['pointer_increment']/row['element_size']) - row['LLVM-MCA_throughput'] = max(row['LLVM-MCA_ports'].values()) - row['LLVM-MCA_cp'] = row['LLVM-MCA_raw']['cp_latency']/( - row['pointer_increment']/row['element_size']) - row['LLVM-MCA_lcd'] = row['LLVM-MCA_raw']['lcd']/( - row['pointer_increment']/row['element_size']) + micro_architecture=ainfo["LLVM-MCA"], + isa=ainfo["isa"], + ) + row["LLVM-MCA_ports"] = { + k: v / (row["pointer_increment"] / row["element_size"]) + for k, v in row["LLVM-MCA_raw"]["port cycles"].items() + } + row["LLVM-MCA_prediction"] = row["LLVM-MCA_raw"]["throughput"] / ( + row["pointer_increment"] / row["element_size"] + ) + row["LLVM-MCA_throughput"] = max(row["LLVM-MCA_ports"].values()) + row["LLVM-MCA_cp"] = row["LLVM-MCA_raw"]["cp_latency"] / ( + row["pointer_increment"] / row["element_size"] + ) + row["LLVM-MCA_lcd"] = row["LLVM-MCA_raw"]["lcd"] / ( + row["pointer_increment"] / row["element_size"] + ) print(". ", end="", flush=True) else: print("! ", end="", flush=True) - + # Analyze with Ithemal, if not running local and configured - if ainfo['Ithemal'] is not None and not islocal: + if ainfo["Ithemal"] is not None and not islocal: print("Ithemal", end="", flush=True) - if not row.get('Ithemal_prediction'): + if not row.get("Ithemal_prediction"): with open(marked_asmfile) as f: - parsed_code = parse_asm(f.read(), ainfo['isa']) - kernel = reduce_to_section(parsed_code, ainfo['isa']) - row['Ithemal_prediction'] = get_ithemal_prediction( - get_intel_style_code(marked_objfile), model=ainfo['Ithemal']) + parsed_code = parse_asm(f.read(), ainfo["isa"]) + kernel = reduce_to_section(parsed_code, ainfo["isa"]) + row["Ithemal_prediction"] = get_ithemal_prediction( + get_intel_style_code(marked_objfile), + model=ainfo["Ithemal"], + ) print(". ", end="", flush=True) else: print("! ", end="", flush=True) @@ -416,43 +486,45 @@ def build_mark_run_all_kernels(measurements=True, osaca=True, iaca=True, llvm_mc if measurements and islocal: # run measurements if on same hardware print("scale", end="", flush=True) - if not row.get('allruns'): + if not row.get("allruns"): # find best length with concurrent L2 measurement scaling_runs, best = scalingrun(exec_path) - row['best_length'] = best[0] - row['best_runtime'] = best[2] - row['L2_traffic'] = best[3] - row['allruns'] = scaling_runs + row["best_length"] = best[0] + row["best_runtime"] = best[2] + row["L2_traffic"] = best[3] + row["allruns"] = scaling_runs print(f"({best[0]}). ", end="", flush=True) else: - print(f"({row.get('best_length', None)})! ", end="", flush=True) + print( + f"({row.get('best_length', None)})! ", + end="", + flush=True, + ) print() # dump to file if data != data_lastsaved: - print('saving... ', end="", flush=True) - with data_path.open('wb') as f: + print("saving... ", end="", flush=True) + with data_path.open("wb") as f: try: pickle.dump(data, f) data_lastsaved = deepcopy(data) - print('saved!') + print("saved!") except KeyboardInterrupt: f.seek(0) pickle.dump(data, f) - print('saved!') + print("saved!") sys.exit() - -def scalingrun(kernel_exec, total_iterations=25000000, lengths=range(8, 1*1024+1)): - #print('{:>8} {:>10} {:>10}'.format("x", "cy/it", "L2 B/it")) - parameters = chain(*[[total_iterations//i, i] for i in lengths]) +def scalingrun(kernel_exec, total_iterations=25000000, lengths=range(8, 1 * 1024 + 1)): + # print('{:>8} {:>10} {:>10}'.format("x", "cy/it", "L2 B/it")) + parameters = chain(*[[total_iterations // i, i] for i in lengths]) # TODO use arch specific events and grooup - r, o = perfctr(chain([kernel_exec], map(str, parameters)), - 1, group="L2") + r, o = perfctr(chain([kernel_exec], map(str, parameters)), 1, group="L2") global_infos = {} - for m in [re.match(r"(:?([a-z_\-0-9]+):)?([a-z]+): ([a-z\_\-0-9]+)", l) for l in o]: + for m in [re.match(r"(:?([a-z_\-0-9]+):)?([a-z]+): ([a-z\_\-0-9]+)", line) for line in o]: if m is not None: try: v = int(m.group(4)) @@ -464,37 +536,45 @@ def scalingrun(kernel_exec, total_iterations=25000000, lengths=range(8, 1*1024+1 r[m.group(2)][m.group(3)] = v results = [] - best = (float('inf'), None) + best = (float("inf"), None) for markername, mmetrics in r.items(): - kernelname, repetitions, *_, xlength = markername.split('_') + kernelname, repetitions, *_, xlength = markername.split("_") repetitions = int(repetitions) xlength = int(xlength) - total_iterations = mmetrics['repetitions'] * mmetrics['iterations'] - if 'Clock [MHz]' in mmetrics: - clock_hz = mmetrics['Clock [MHz]']*1e6 + total_iterations = mmetrics["repetitions"] * mmetrics["iterations"] + if "Clock [MHz]" in mmetrics: + clock_hz = mmetrics["Clock [MHz]"] * 1e6 else: - clock_hz = arch_info[get_current_arch()]['Clock [MHz]']*1e6 - cyperit = mmetrics['Runtime (RDTSC) [s]'] * clock_hz / total_iterations + clock_hz = arch_info[get_current_arch()]["Clock [MHz]"] * 1e6 + cyperit = mmetrics["Runtime (RDTSC) [s]"] * clock_hz / total_iterations # TODO use arch specific events and grooup - if 'L2D load data volume [GBytes]' in mmetrics: - l2perit = (mmetrics['L2D load data volume [GBytes]'] + - mmetrics.get('L2D evict data volume [GBytes]', 0))*1e9 / total_iterations + if "L2D load data volume [GBytes]" in mmetrics: + l2perit = ( + ( + mmetrics["L2D load data volume [GBytes]"] + + mmetrics.get("L2D evict data volume [GBytes]", 0) + ) + * 1e9 + / total_iterations + ) else: - l2perit = \ - mmetrics[arch_info[get_current_arch()]['L2_volume_metric']]*1e9 / total_iterations - results.append( - (xlength, repetitions, cyperit, l2perit) - ) + l2perit = ( + mmetrics[arch_info[get_current_arch()]["L2_volume_metric"]] + * 1e9 + / total_iterations + ) + results.append((xlength, repetitions, cyperit, l2perit)) if cyperit < best[0]: best = cyperit, results[-1] return results, best[1] + def mark(asm_path, compiler, cflags, isa, overwrite=False): # Mark assembly for IACA, OSACA and LLVM-MCA marked_asm_path = Path(asm_path).with_suffix(".marked.s") if not marked_asm_path.exists() or overwrite: overwrite = True - with open(asm_path) as fa, open(marked_asm_path, 'w') as fm: + with open(asm_path) as fa, open(marked_asm_path, "w") as fm: try: _, pointer_increment = asm_instrumentation(fa, fm, isa=isa) except KeyboardInterrupt: @@ -505,37 +585,46 @@ def mark(asm_path, compiler, cflags, isa, overwrite=False): # use maked assembly and extract asm_block and pointer_increment with open(marked_asm_path) as f: marked_asm = f.read() - m = re.search(r'pointer_increment=([0-9]+)', marked_asm) + m = re.search(r"pointer_increment=([0-9]+)", marked_asm) if m: pointer_increment = int(m.group(1)) else: os.unlink(marked_asm_path) raise ValueError( - "Could not find `pointer_increment=`. Plase place into file.") + "Could not find `pointer_increment=`. Plase place into file." + ) print("! ", end="", flush=True) # Compile marked assembly to object for IACA marked_obj = Path(asm_path).with_suffix(".marked.o") if not marked_obj.exists(): - check_call([compiler] + ['-c', str(marked_asm_path), '-o', str(marked_obj)]) - + check_call([compiler] + ["-c", str(marked_asm_path), "-o", str(marked_obj)]) + return str(marked_asm_path), str(marked_obj), pointer_increment, overwrite -def build_kernel(kernel, architecture, compiler, cflags, cflags_name, overwrite=False, - dontbuild=False): +def build_kernel( + kernel, + architecture, + compiler, + cflags, + cflags_name, + overwrite=False, + dontbuild=False, +): build_path = f"build/{architecture}/{compiler}/{cflags_name}" kernel_assembly = f"{build_path}/{kernel}.s" - kernel_object= f"{build_path}/{kernel}.o" + kernel_object = f"{build_path}/{kernel}.o" executable = f"{build_path}/{kernel}" Path(build_path).mkdir(parents=True, exist_ok=True) if not overwrite: # Overwrite if any kernel specific file is missing overwrite = ( - not os.path.exists(kernel_object) or - not os.path.exists(kernel_assembly) or - not os.path.exists(executable)) + not os.path.exists(kernel_object) + or not os.path.exists(kernel_assembly) + or not os.path.exists(executable) + ) if dontbuild and overwrite: raise ValueError("Must build, but not allowed.") @@ -545,39 +634,43 @@ def build_kernel(kernel, architecture, compiler, cflags, cflags_name, overwrite= if not Path(f"{build_path}/compiler_version").exists(): # Document compiler version - with open(f"{build_path}/compiler_version", 'w') as f: - f.write(check_output([compiler, "-v"], encoding='utf8', stderr=STDOUT)) + with open(f"{build_path}/compiler_version", "w") as f: + f.write(check_output([compiler, "-v"], encoding="utf8", stderr=STDOUT)) if overwrite: # build object + assembly - check_call([compiler] + - cflags + - ["-c", f"kernels/{kernel}.c", "-o", kernel_object]) - check_call([compiler] + - cflags + - ["-c", f"kernels/{kernel}.c", "-S", "-o", kernel_assembly]) + check_call([compiler] + cflags + ["-c", f"kernels/{kernel}.c", "-o", kernel_object]) + check_call( + [compiler] + cflags + ["-c", f"kernels/{kernel}.c", "-S", "-o", kernel_assembly] + ) # build main and link executable executable_cflags = [ os.environ["LIKWID_DEFINES"], os.environ["LIKWID_INC"], - os.environ["LIKWID_LIB"] - ] + ['-Ofast'] - check_call([compiler] + executable_cflags + [ - f"{build_path}/dummy.o", - kernel_object, - "-DMAIN", - f"kernels/{kernel}.c", - "-llikwid", - "-o", executable]) + os.environ["LIKWID_LIB"], + ] + ["-Ofast"] + check_call( + [compiler] + + executable_cflags + + [ + f"{build_path}/dummy.o", + kernel_object, + "-DMAIN", + f"kernels/{kernel}.c", + "-llikwid", + "-o", + executable, + ] + ) print(". ", end="", flush=True) else: print("! ", end="", flush=True) - + return kernel_assembly, executable, overwrite -def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0): +def perfctr(cmd, cores, group="MEM", code_markers=True, verbose=0): """ Run *cmd* with likwid-perfctr and returns result as dict. @@ -586,30 +679,32 @@ def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0): if CLI argument cores > 1, running with multi-core, otherwise single-core """ # Making sure likwid-perfctr is available: - if benchmark.find_executable('likwid-perfctr') is None: - print("likwid-perfctr was not found. Make sure likwid is installed and found in PATH.", - file=sys.stderr) + if benchmark.find_executable("likwid-perfctr") is None: + print( + "likwid-perfctr was not found. Make sure likwid is installed and found in PATH.", + file=sys.stderr, + ) sys.exit(1) # FIXME currently only single core measurements support! - perf_cmd = ['likwid-perfctr', '-f', '-O', '-g', group] + perf_cmd = ["likwid-perfctr", "-f", "-O", "-g", group] - cpu = 'S0:0' + cpu = "S0:0" if cores > 1: - cpu += '-'+str(cores-1) + cpu += "-" + str(cores - 1) # Pinned and measured on cpu - perf_cmd += ['-C', cpu] + perf_cmd += ["-C", cpu] # code must be marked using likwid markers - perf_cmd.append('-m') + perf_cmd.append("-m") perf_cmd += cmd if verbose > 1: - print(' '.join(perf_cmd)) + print(" ".join(perf_cmd)) try: - with benchmark.fix_env_variable('OMP_NUM_THREADS', None): - output = check_output(perf_cmd).decode('utf-8').split('\n') + with benchmark.fix_env_variable("OMP_NUM_THREADS", None): + output = check_output(perf_cmd).decode("utf-8").split("\n") except CalledProcessError as e: print("Executing benchmark failed: {!s}".format(e), file=sys.stderr) sys.exit(1) @@ -626,7 +721,7 @@ def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0): m = re.match(r"TABLE,Region ([a-z\-0-9_]+),", line) if m: cur_region_name = m.group(1) - line = line.split(',') + line = line.split(",") try: # Metrics cur_region_data[line[0]] = float(line[1]) @@ -639,12 +734,13 @@ def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0): continue try: # Event counters - if line[2] == '-' or line[2] == 'nan': + if line[2] == "-" or line[2] == "nan": counter_value = 0 else: counter_value = int(line[2]) - if re.fullmatch(r'[A-Z0-9_]+', line[0]) and \ - re.fullmatch(r'[A-Z0-9]+(:[A-Z0-9]+=[0-9A-Fa-fx]+)*', line[1]): + if re.fullmatch(r"[A-Z0-9_]+", line[0]) and re.fullmatch( + r"[A-Z0-9]+(:[A-Z0-9]+=[0-9A-Fa-fx]+)*", line[1] + ): cur_region_data.setdefault(line[0], {}) cur_region_data[line[0]][line[1]] = counter_value continue @@ -659,49 +755,52 @@ def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0): def remove_html_tags(text): - return re.sub('<.*?>', '', text) + return re.sub("<.*?>", "", text) def get_intel_style_code(marked_objfile): # Disassembl with Intel syntax - cmd = ("objdump -d --demangle --no-leading-addr --no-leading-headers --no-show-raw-insn " - "--x86-asm-syntax=intel").split(" ") + [marked_objfile] + cmd = ( + "objdump -d --demangle --no-leading-addr --no-leading-headers --no-show-raw-insn " + "--x86-asm-syntax=intel" + ).split(" ") + [marked_objfile] asm_raw = check_output(cmd).decode() - asm_raw = '\n'.join([l.strip() for l in asm_raw.split('\n')]) + asm_raw = "\n".join([line.strip() for line in asm_raw.split("\n")]) kernel_raw = asm_raw[ - asm_raw.index('mov\tebx, 111\nnop')+len('mov\tebx, 111\nnop') : - asm_raw.index('mov\tebx, 222\nnop') + asm_raw.index("mov\tebx, 111\nnop") + + len("mov\tebx, 111\nnop") : asm_raw.index("mov\tebx, 222\nnop") ] - kernel_lines = kernel_raw.split('\n') + kernel_lines = kernel_raw.split("\n") # Ignore label and jump - return '\n'.join(kernel_lines[:-2]) + return "\n".join(kernel_lines[:-2]) -def get_ithemal_prediction(code, model='skl'): +def get_ithemal_prediction(code, model="skl"): url = "http://3.18.198.23/predict" - assert model in ['skl', 'hsw', 'ivb'] - r = requests.post(url, {'code': code, 'model': model}) + assert model in ["skl", "hsw", "ivb"] + r = requests.post(url, {"code": code, "model": model}) raw_text = remove_html_tags(r.text) m = re.search("Could not generate a prediction: (.*)", raw_text) if m: - print(" error:", m.group(1).strip(), end=' ') - return float('nan') - m = re.search("Prediction: ([0-9\.]+) cycles per iteration", raw_text) + print(" error:", m.group(1).strip(), end=" ") + return float("nan") + m = re.search("Prediction: ([0-9.]+) cycles per iteration", raw_text) if m: return float(m.group(1)) else: - return float('nan') + return float("nan") def main(): # Check for correct LLVM-MCA version try: - llvm_mca = 'LLVM version 12.0.0' in check_output(['llvm-mca', '-version']).decode() + llvm_mca = "LLVM version 12.0.0" in check_output(["llvm-mca", "-version"]).decode() except FileNotFoundError: llvm_mca = False - - build_mark_run_all_kernels(measurements='--no-measurements' not in sys.argv, llvm_mca=llvm_mca) + + build_mark_run_all_kernels(measurements="--no-measurements" not in sys.argv, llvm_mca=llvm_mca) sys.exit() + if __name__ == "__main__": - main() \ No newline at end of file + main()