applied flake8 and black rules

2025-07-20 20:21:05 +02:00 · 2021-08-26 16:58:19 +02:00
parent 34523e1b23
commit d418c16f4a
23 changed files with 781 additions and 471 deletions
--- a/docs/version_from_src.py
+++ b/docs/version_from_src.py
@@ -7,7 +7,8 @@ import re
 def __read(*names, **kwargs):
    """Reads in file"""
    with io.open(
-        os.path.join(os.path.dirname(__file__), *names), encoding=kwargs.get("encoding", "utf8")
+        os.path.join(os.path.dirname(__file__), *names),
+        encoding=kwargs.get("encoding", "utf8"),
    ) as fp:
        return fp.read()

--- a/osaca/data/generate_mov_entries.py
+++ b/osaca/data/generate_mov_entries.py
@@ -88,7 +88,7 @@ class MOVEntryBuilderIntelNoPort7AGU(MOVEntryBuilder):

        comment = None
        if load:
-            if 'ymm' in operand_types:
+            if "ymm" in operand_types:
                port2D3D_pressure = 2
            else:
                port2D3D_pressure = 1
@@ -96,7 +96,7 @@ class MOVEntryBuilderIntelNoPort7AGU(MOVEntryBuilder):
            latency += 4
            comment = "with load"
        if store:
-            if 'ymm' in operand_types:
+            if "ymm" in operand_types:
                port4_pressure = 2
            else:
                port4_pressure = 1
@@ -716,14 +716,14 @@ skx_mov_instructions = list(
            # ('movapd xmm xmm', ('1*p5', 1)),
            # ('vmovapd xmm xmm', ('1*p5', 1)),
            # ('vmovapd ymm ymm', ('1*p5', 1)),
-            ('vmovapd zmm zmm', ('', 0)),
+            ("vmovapd zmm zmm", ("", 0)),
            # https://www.felixcloutier.com/x86/movaps
            # TODO with masking!
            # TODO the following may eliminate or be bound to 1*p0156:
            # ('movaps xmm xmm', ('1*p5', 1)),
            # ('vmovaps xmm xmm', ('1*p5', 1)),
            # ('vmovaps ymm ymm', ('1*p5', 1)),
-            ('vmovaps zmm zmm', ('', 0)),
+            ("vmovaps zmm zmm", ("", 0)),
            # https://www.felixcloutier.com/x86/movbe
            ("movbe gpr mem", ("1*p15", 4)),
            ("movbe mem gpr", ("1*p15", 4)),
--- a/osaca/data/model_importer.py
+++ b/osaca/data/model_importer.py
@@ -140,9 +140,11 @@ def extract_model(tree, arch, skip_mem=True):
            print("Couldn't find port utilization, skip: ", iform, file=sys.stderr)
            continue
        # skip if measured TP is smaller than computed
-        if [float(x.attrib["TP_ports"]) > min(float(x.attrib["TP_loop"]),
-                                              float(x.attrib["TP_unrolled"]))
-                for x in arch_tag.findall("measurement")][0]:
+        if [
+            float(x.attrib["TP_ports"])
+            > min(float(x.attrib["TP_loop"]), float(x.attrib["TP_unrolled"]))
+            for x in arch_tag.findall("measurement")
+        ][0]:
            print(
                "Calculated TP is greater than measured TP.",
                iform,
@@ -160,13 +162,15 @@ def extract_model(tree, arch, skip_mem=True):
                throughput = float(measurement_tag.attrib["TP_ports"])
            else:
                throughput = min(
-                    measurement_tag.attrib.get("TP_loop", float('inf')),
-                    measurement_tag.attrib.get("TP_unroll", float('inf')),
-                    measurement_tag.attrib.get("TP", float('inf')),
+                    measurement_tag.attrib.get("TP_loop", float("inf")),
+                    measurement_tag.attrib.get("TP_unroll", float("inf")),
+                    measurement_tag.attrib.get("TP", float("inf")),
                )
-                if throughput == float('inf'):
+                if throughput == float("inf"):
                    throughput = None
-            uops = int(measurement_tag.attrib["uops"]) if "uops" in measurement_tag.attrib else None
+            uops = (
+                int(measurement_tag.attrib["uops"]) if "uops" in measurement_tag.attrib else None
+            )
            if "ports" in measurement_tag.attrib:
                port_pressure.append(port_pressure_from_tag_attributes(measurement_tag.attrib))
            latencies = [
@@ -202,7 +206,11 @@ def extract_model(tree, arch, skip_mem=True):
        # Check if all are equal
        if port_pressure:
            if port_pressure[1:] != port_pressure[:-1]:
-                print("Contradicting port occupancies, using latest IACA:", iform, file=sys.stderr)
+                print(
+                    "Contradicting port occupancies, using latest IACA:",
+                    iform,
+                    file=sys.stderr,
+                )
            port_pressure = port_pressure[-1]
        else:
            # print("No data available for this architecture:", mnemonic, file=sys.stderr)
@@ -222,10 +230,12 @@ def extract_model(tree, arch, skip_mem=True):
                        port_4 = True
                # Add (x, ['2D', '3D']) if load ports (2 & 3) are used, but not the store port (4)
                if port_23 and not port_4:
-                    if arch.upper() in ["SNB", "IVB"] and any(
-                            [p.get('name', '') == 'ymm' for p in parameters]) and \
-                            not '128' in mnemonic:
-                        # x = 2 if SNB or IVB and ymm regiser in any operand and not '128' in 
+                    if (
+                        arch.upper() in ["SNB", "IVB"]
+                        and any([p.get("name", "") == "ymm" for p in parameters])
+                        and not ("128" in mnemonic)
+                    ):
+                        # x = 2 if SNB or IVB and ymm regiser in any operand and not '128' in
                        # instruction name
                        port2D3D_pressure = 2
                    else:
--- a/osaca/db_interface.py
+++ b/osaca/db_interface.py
@@ -125,7 +125,10 @@ def _get_asmbench_output(input_data, isa):
    db_entries = {}
    for i in range(0, len(input_data), 4):
        if input_data[i + 3].strip() != "":
-            print("asmbench output not in the correct format! Format must be: ", file=sys.stderr)
+            print(
+                "asmbench output not in the correct format! Format must be: ",
+                file=sys.stderr,
+            )
            print(
                "-------------\nMNEMONIC[-OP1[_OP2][...]]\nLatency: X cycles\n"
                "Throughput: Y cycles\n\n-------------",
@@ -540,7 +543,16 @@ def _get_sanity_report(


 def _get_sanity_report_verbose(
-    total, m_tp, m_l, m_pp, suspic_instr, dup_arch, dup_isa, only_isa, bad_operands, colors=False
+    total,
+    m_tp,
+    m_l,
+    m_pp,
+    suspic_instr,
+    dup_arch,
+    dup_isa,
+    only_isa,
+    bad_operands,
+    colors=False,
 ):
    """Get the verbose part of the sanity report with all missing instruction forms."""
    BRIGHT_CYAN = "\033[1;36;1m" if colors else ""
--- a/osaca/frontend.py
+++ b/osaca/frontend.py
@@ -202,7 +202,12 @@ class Frontend(object):
        )

    def combined_view(
-        self, kernel, cp_kernel: KernelDG, dep_dict, ignore_unknown=False, show_cmnts=True
+        self,
+        kernel,
+        cp_kernel: KernelDG,
+        dep_dict,
+        ignore_unknown=False,
+        show_cmnts=True,
    ):
        """
        Build combined view of kernel including port pressure (TP), a CP column and a
@@ -238,8 +243,8 @@ class Frontend(object):
        lcd_sum = 0.0
        lcd_lines = {}
        if dep_dict:
-            longest_lcd = max(dep_dict, key=lambda ln: dep_dict[ln]['latency'])
-            lcd_sum = dep_dict[longest_lcd]['latency']
+            longest_lcd = max(dep_dict, key=lambda ln: dep_dict[ln]["latency"])
+            lcd_sum = dep_dict[longest_lcd]["latency"]
            lcd_lines = {
                instr["line_number"]: lat for instr, lat in dep_dict[longest_lcd]["dependencies"]
            }
--- a/osaca/osaca.py
+++ b/osaca/osaca.py
@@ -10,7 +10,13 @@ from functools import lru_cache
 from osaca.db_interface import import_benchmark_output, sanity_check
 from osaca.frontend import Frontend
 from osaca.parser import BaseParser, ParserAArch64, ParserX86ATT
-from osaca.semantics import INSTR_FLAGS, ArchSemantics, KernelDG, MachineModel, reduce_to_section
+from osaca.semantics import (
+    INSTR_FLAGS,
+    ArchSemantics,
+    KernelDG,
+    MachineModel,
+    reduce_to_section,
+)


 SUPPORTED_ARCHS = [
@@ -37,7 +43,8 @@ DEFAULT_ARCHS = {
 def __read(*names, **kwargs):
    """Reads in file"""
    with io.open(
-        os.path.join(os.path.dirname(__file__), *names), encoding=kwargs.get("encoding", "utf8")
+        os.path.join(os.path.dirname(__file__), *names),
+        encoding=kwargs.get("encoding", "utf8"),
    ) as fp:
        return fp.read()

@@ -79,7 +86,10 @@ def create_parser(parser=None):

    # Add arguments
    parser.add_argument(
-        "-V", "--version", action="version", version="%(prog)s " + __find_version("__init__.py")
+        "-V",
+        "--version",
+        action="version",
+        version="%(prog)s " + __find_version("__init__.py"),
    )
    parser.add_argument(
        "--arch",
@@ -167,7 +177,9 @@ def create_parser(parser=None):
        help="Write analysis to this file (default to stdout).",
    )
    parser.add_argument(
-        "file", type=argparse.FileType("r"), help="Path to object (ASM or instruction file)."
+        "file",
+        type=argparse.FileType("r"),
+        help="Path to object (ASM or instruction file).",
    )

    return parser
@@ -347,7 +359,10 @@ def run(args, output_file=sys.stdout):
        # Sanity check on DB
        verbose = True if args.verbose > 0 else False
        sanity_check(
-            args.arch, verbose=verbose, internet_check=args.internet_check, output_file=output_file
+            args.arch,
+            verbose=verbose,
+            internet_check=args.internet_check,
+            output_file=output_file,
        )
    elif "import_data" in args:
        # Import microbench output file into DB
--- a/osaca/parser/parser_AArch64.py
+++ b/osaca/parser/parser_AArch64.py
@@ -26,9 +26,9 @@ class ParserAArch64(BaseParser):
            pp.ZeroOrMore(pp.Word(pp.printables))
        ).setResultsName(self.COMMENT_ID)
        # Define ARM assembly identifier
-        decimal_number = pp.Combine(pp.Optional(pp.Literal("-")) + pp.Word(pp.nums)).setResultsName(
-            "value"
-        )
+        decimal_number = pp.Combine(
+            pp.Optional(pp.Literal("-")) + pp.Word(pp.nums)
+        ).setResultsName("value")
        hex_number = pp.Combine(pp.Literal("0x") + pp.Word(pp.hexnums)).setResultsName("value")
        relocation = pp.Combine(pp.Literal(":") + pp.Word(pp.alphanums + "_") + pp.Literal(":"))
        first = pp.Word(pp.alphas + "_.", exact=1)
@@ -152,7 +152,9 @@ class ParserAArch64(BaseParser):
            pp.Literal("{")
            + (
                pp.delimitedList(pp.Combine(self.list_element), delim=",").setResultsName("list")
-                ^ pp.delimitedList(pp.Combine(self.list_element), delim="-").setResultsName("range")
+                ^ pp.delimitedList(pp.Combine(self.list_element), delim="-").setResultsName(
+                    "range"
+                )
            )
            + pp.Literal("}")
            + pp.Optional(index)
@@ -256,9 +258,7 @@ class ParserAArch64(BaseParser):
        # 2. Parse label
        if result is None:
            try:
-                result = self.process_operand(
-                    self.label.parseString(line, parseAll=True).asDict()
-                )
+                result = self.process_operand(self.label.parseString(line, parseAll=True).asDict())
                result = AttrDict.convert_dict(result)
                instruction_form[self.LABEL_ID] = result[self.LABEL_ID].name
                if self.COMMENT_ID in result[self.LABEL_ID]:
@@ -293,7 +293,9 @@ class ParserAArch64(BaseParser):
            try:
                result = self.parse_instruction(line)
            except (pp.ParseException, KeyError) as e:
-                raise ValueError("Unable to parse {!r} on line {}".format(line, line_number)) from e
+                raise ValueError(
+                    "Unable to parse {!r} on line {}".format(line, line_number)
+                ) from e
            instruction_form[self.INSTRUCTION_ID] = result[self.INSTRUCTION_ID]
            instruction_form[self.OPERANDS_ID] = result[self.OPERANDS_ID]
            instruction_form[self.COMMENT_ID] = result[self.COMMENT_ID]
@@ -390,9 +392,9 @@ class ParserAArch64(BaseParser):
            new_dict["pre_indexed"] = True
        if "post_indexed" in memory_address:
            if "value" in memory_address["post_indexed"]:
-                new_dict["post_indexed"] = {"value": int(
-                    memory_address["post_indexed"]["value"], 0
-                )}
+                new_dict["post_indexed"] = {
+                    "value": int(memory_address["post_indexed"]["value"], 0)
+                }
            else:
                new_dict["post_indexed"] = memory_address["post_indexed"]
        return AttrDict({self.MEMORY_ID: new_dict})
@@ -408,27 +410,27 @@ class ParserAArch64(BaseParser):
        Resolve range or list register operand to list of registers.
        Returns None if neither list nor range
        """
-        if 'register' in operand:
-            if 'list' in operand.register:
-                index = operand.register.get('index')
+        if "register" in operand:
+            if "list" in operand.register:
+                index = operand.register.get("index")
                range_list = []
                for reg in operand.register.list:
                    reg = deepcopy(reg)
                    if index is not None:
-                        reg['index'] = int(index, 0)
+                        reg["index"] = int(index, 0)
                    range_list.append(AttrDict({self.REGISTER_ID: reg}))
                return range_list
-            elif 'range' in operand.register:
+            elif "range" in operand.register:
                base_register = operand.register.range[0]
-                index = operand.register.get('index')
+                index = operand.register.get("index")
                range_list = []
                start_name = base_register.name
                end_name = operand.register.range[1].name
                for name in range(int(start_name), int(end_name) + 1):
                    reg = deepcopy(base_register)
                    if index is not None:
-                        reg['index'] = int(index, 0)
-                    reg['name'] = str(name)
+                        reg["index"] = int(index, 0)
+                    reg["name"] = str(name)
                    range_list.append(AttrDict({self.REGISTER_ID: reg}))
                return range_list
        # neither register list nor range, return unmodified
@@ -482,10 +484,12 @@ class ParserAArch64(BaseParser):
            return AttrDict({self.IMMEDIATE_ID: immediate})
        else:
            # change 'mantissa' key to 'value'
-            return AttrDict({
-                self.IMMEDIATE_ID: AttrDict({
-                    "value": immediate[dict_name]["mantissa"],
-                    "type": dict_name})}
+            return AttrDict(
+                {
+                    self.IMMEDIATE_ID: AttrDict(
+                        {"value": immediate[dict_name]["mantissa"], "type": dict_name}
+                    )
+                }
            )

    def process_label(self, label):
--- a/osaca/parser/parser_x86att.py
+++ b/osaca/parser/parser_x86att.py
@@ -23,9 +23,9 @@ class ParserX86ATT(BaseParser):

    def construct_parser(self):
        """Create parser for ARM AArch64 ISA."""
-        decimal_number = pp.Combine(pp.Optional(pp.Literal("-")) + pp.Word(pp.nums)).setResultsName(
-            "value"
-        )
+        decimal_number = pp.Combine(
+            pp.Optional(pp.Literal("-")) + pp.Word(pp.nums)
+        ).setResultsName("value")
        hex_number = pp.Combine(
            pp.Optional(pp.Literal("-")) + pp.Literal("0x") + pp.Word(pp.hexnums)
        ).setResultsName("value")
@@ -41,7 +41,8 @@ class ParserX86ATT(BaseParser):
        identifier = pp.Group(
            pp.Optional(id_offset).setResultsName("offset")
            + pp.Combine(
-                pp.delimitedList(pp.Combine(first + pp.Optional(rest)), delim="::"), joinString="::"
+                pp.delimitedList(pp.Combine(first + pp.Optional(rest)), delim="::"),
+                joinString="::",
            ).setResultsName("name")
            + pp.Optional(relocation).setResultsName("relocation")
        ).setResultsName("identifier")
@@ -443,7 +444,12 @@ class ParserX86ATT(BaseParser):
        """Check if register is a vector register"""
        if register is None:
            return False
-        if register["name"].rstrip(string.digits).lower() in ["mm", "xmm", "ymm", "zmm"]:
+        if register["name"].rstrip(string.digits).lower() in [
+            "mm",
+            "xmm",
+            "ymm",
+            "zmm",
+        ]:
            return True
        return False

--- a/osaca/semantics/arch_semantics.py
+++ b/osaca/semantics/arch_semantics.py
@@ -47,7 +47,9 @@ class ArchSemantics(ISASemantics):
                indices = [port_list.index(p) for p in ports]
                # check if port sum of used ports for uop are unbalanced
                port_sums = self._to_list(itemgetter(*indices)(self.get_throughput_sum(kernel)))
-                instr_ports = self._to_list(itemgetter(*indices)(instruction_form["port_pressure"]))
+                instr_ports = self._to_list(
+                    itemgetter(*indices)(instruction_form["port_pressure"])
+                )
                if len(set(port_sums)) > 1:
                    # balance ports
                    # init list for keeping track of the current change
@@ -270,7 +272,8 @@ class ArchSemantics(ISASemantics):
                                    reg_type
                                ]
                                st_data_port_pressure = [
-                                    pp * multiplier for pp in st_data_port_pressure]
+                                    pp * multiplier for pp in st_data_port_pressure
+                                ]
                            data_port_pressure = [
                                sum(x) for x in zip(data_port_pressure, st_data_port_pressure)
                            ]
@@ -343,7 +346,9 @@ class ArchSemantics(ISASemantics):
    def _handle_instruction_found(self, instruction_data, port_number, instruction_form, flags):
        """Apply performance data to instruction if it was found in the archDB"""
        throughput = instruction_data["throughput"]
-        port_pressure = self._machine_model.average_port_pressure(instruction_data["port_pressure"])
+        port_pressure = self._machine_model.average_port_pressure(
+            instruction_data["port_pressure"]
+        )
        instruction_form["port_uops"] = instruction_data["port_pressure"]
        try:
            assert isinstance(port_pressure, list)
--- a/osaca/semantics/hw_model.py
+++ b/osaca/semantics/hw_model.py
@@ -1,20 +1,19 @@
 #!/usr/bin/env python3

+import hashlib
 import os
 import pickle
 import re
 import string
+from collections import defaultdict
 from copy import deepcopy
 from itertools import product
-import hashlib
 from pathlib import Path
-from collections import defaultdict

 import ruamel.yaml
-from ruamel.yaml.compat import StringIO
-
 from osaca import __version__, utils
 from osaca.parser import ParserX86ATT
+from ruamel.yaml.compat import StringIO


 class MachineModel(object):
@@ -37,7 +36,13 @@ class MachineModel(object):
                "hidden_loads": None,
                "load_latency": {},
                "load_throughput": [
-                    {"base": b, "index": i, "offset": o, "scale": s, "port_pressure": []}
+                    {
+                        "base": b,
+                        "index": i,
+                        "offset": o,
+                        "scale": s,
+                        "port_pressure": [],
+                    }
                    for b, i, o, s in product(["gpr"], ["gpr", None], ["imd", None], [1, 8])
                ],
                "load_throughput_default": [],
@@ -128,7 +133,8 @@ class MachineModel(object):
                instruction_form
                for instruction_form in name_matched_iforms
                if self._match_operands(
-                    instruction_form["operands"] if "operands" in instruction_form else [], operands
+                    instruction_form["operands"] if "operands" in instruction_form else [],
+                    operands,
                )
            )
        except StopIteration:
@@ -150,7 +156,13 @@ class MachineModel(object):
        return average_pressure

    def set_instruction(
-        self, name, operands=None, latency=None, port_pressure=None, throughput=None, uops=None
+        self,
+        name,
+        operands=None,
+        latency=None,
+        port_pressure=None,
+        throughput=None,
+        uops=None,
    ):
        """Import instruction form information."""
        # If it already exists. Overwrite information.
@@ -500,7 +512,11 @@ class MachineModel(object):
        """Check if the types of operand ``i_operand`` and ``operand`` match."""
        # check for wildcard
        if self.WILDCARD in operand:
-            if "class" in i_operand and i_operand["class"] == "register" or "register" in i_operand:
+            if (
+                "class" in i_operand
+                and i_operand["class"] == "register"
+                or "register" in i_operand
+            ):
                return True
            else:
                return False
@@ -527,20 +543,27 @@ class MachineModel(object):
            return self._is_AArch64_mem_type(i_operand, operand["memory"])
        # immediate
        if i_operand["class"] == "immediate" and i_operand["imd"] == self.WILDCARD:
-            return "value" in operand or \
-                ("immediate" in operand and "value" in operand["immediate"]) 
+            return "value" in operand or (
+                "immediate" in operand and "value" in operand["immediate"]
+            )
        if i_operand["class"] == "immediate" and i_operand["imd"] == "int":
-            return ("value" in operand and operand.get("type", None) == "int") or \
-                ("immediate" in operand and "value" in operand["immediate"] and
-                 operand["immediate"].get("type", None) == "int")
+            return ("value" in operand and operand.get("type", None) == "int") or (
+                "immediate" in operand
+                and "value" in operand["immediate"]
+                and operand["immediate"].get("type", None) == "int"
+            )
        if i_operand["class"] == "immediate" and i_operand["imd"] == "float":
-            return ("float" in operand and operand.get("type", None) == "float") or \
-                ("immediate" in operand and "float" in operand["immediate"] and
-                 operand["immediate"].get("type", None) == "float")
+            return ("float" in operand and operand.get("type", None) == "float") or (
+                "immediate" in operand
+                and "float" in operand["immediate"]
+                and operand["immediate"].get("type", None) == "float"
+            )
        if i_operand["class"] == "immediate" and i_operand["imd"] == "double":
-            return ("double" in operand and operand.get("type", None) == "double") or \
-                ("immediate" in operand and "double" in operand["immediate"] and
-                 operand["immediate"].get("type", None) == "double")
+            return ("double" in operand and operand.get("type", None) == "double") or (
+                "immediate" in operand
+                and "double" in operand["immediate"]
+                and operand["immediate"].get("type", None) == "double"
+            )
        # identifier
        if "identifier" in operand or (
            "immediate" in operand and "identifier" in operand["immediate"]
@@ -577,7 +600,10 @@ class MachineModel(object):
    def _compare_db_entries(self, operand_1, operand_2):
        """Check if operand types in DB format (i.e., not parsed) match."""
        operand_attributes = list(
-            filter(lambda x: True if x != "source" and x != "destination" else False, operand_1)
+            filter(
+                lambda x: True if x != "source" and x != "destination" else False,
+                operand_1,
+            )
        )
        for key in operand_attributes:
            try:
--- a/osaca/semantics/isa_semantics.py
+++ b/osaca/semantics/isa_semantics.py
@@ -1,6 +1,5 @@
 #!/usr/bin/env python3
 from itertools import chain
-from copy import deepcopy

 from osaca import utils
 from osaca.parser import AttrDict, ParserAArch64, ParserX86ATT
@@ -100,53 +99,68 @@ class ISASemantics(object):
        # post-process pre- and post-indexing for aarch64 memory operands
        if self._isa == "aarch64":
            for operand in [op for op in op_dict["source"] if "memory" in op]:
-                post_indexed = ("post_indexed" in operand["memory"] and 
-                                operand["memory"]["post_indexed"])
-                pre_indexed = ("pre_indexed" in operand["memory"] and
-                               operand["memory"]["pre_indexed"])
+                post_indexed = (
+                    "post_indexed" in operand["memory"] and operand["memory"]["post_indexed"]
+                )
+                pre_indexed = (
+                    "pre_indexed" in operand["memory"] and operand["memory"]["pre_indexed"]
+                )
                if post_indexed or pre_indexed:
                    op_dict["src_dst"].append(
-                        AttrDict.convert_dict({
-                            "register": operand["memory"]["base"],
-                            "pre_indexed": pre_indexed,
-                            "post_indexed": post_indexed})
+                        AttrDict.convert_dict(
+                            {
+                                "register": operand["memory"]["base"],
+                                "pre_indexed": pre_indexed,
+                                "post_indexed": post_indexed,
+                            }
+                        )
                    )
            for operand in [op for op in op_dict["destination"] if "memory" in op]:
-                post_indexed = ("post_indexed" in operand["memory"] and 
-                                operand["memory"]["post_indexed"])
-                pre_indexed = ("pre_indexed" in operand["memory"] and
-                               operand["memory"]["pre_indexed"])
+                post_indexed = (
+                    "post_indexed" in operand["memory"] and operand["memory"]["post_indexed"]
+                )
+                pre_indexed = (
+                    "pre_indexed" in operand["memory"] and operand["memory"]["pre_indexed"]
+                )
                if post_indexed or pre_indexed:
                    op_dict["src_dst"].append(
-                        AttrDict.convert_dict({
-                            "register": operand["memory"]["base"],
-                            "pre_indexed": pre_indexed,
-                            "post_indexed": post_indexed})
+                        AttrDict.convert_dict(
+                            {
+                                "register": operand["memory"]["base"],
+                                "pre_indexed": pre_indexed,
+                                "post_indexed": post_indexed,
+                            }
+                        )
                    )
-            
+
        # store operand list in dict and reassign operand key/value pair
        instruction_form["semantic_operands"] = AttrDict.convert_dict(op_dict)
        # assign LD/ST flags
-        instruction_form["flags"] = instruction_form["flags"] if "flags" in instruction_form else []
+        instruction_form["flags"] = (
+            instruction_form["flags"] if "flags" in instruction_form else []
+        )
        if self._has_load(instruction_form):
            instruction_form["flags"] += [INSTR_FLAGS.HAS_LD]
        if self._has_store(instruction_form):
            instruction_form["flags"] += [INSTR_FLAGS.HAS_ST]
-        

    def get_reg_changes(self, instruction_form, only_postindexed=False):
        """
        Returns register changes, as dict, for insruction_form, based on operation defined in isa.
-        
+
        Empty dict if no changes of registers occured. None for registers with unknown changes.
        If only_postindexed is True, only considers changes due to post_indexed memory references.
        """
-        if instruction_form.get('instruction') is None:
+        if instruction_form.get("instruction") is None:
            return {}
-        dest_reg_names = [op.register.get('prefix', '') + op.register.name
-                          for op in chain(instruction_form.semantic_operands.destination,
-                                          instruction_form.semantic_operands.src_dst)
-                          if 'register' in op]
+        dest_reg_names = [
+            op.register.get("prefix", "") + op.register.name
+            for op in chain(
+                instruction_form.semantic_operands.destination,
+                instruction_form.semantic_operands.src_dst,
+            )
+            if "register" in op
+        ]
        isa_data = self._isa_model.get_instruction(
            instruction_form["instruction"], instruction_form["operands"]
        )
@@ -162,50 +176,50 @@ class ISASemantics(object):

        if only_postindexed:
            for o in instruction_form.operands:
-                if 'post_indexed' in o.get('memory', {}):
-                    base_name = o.memory.base.get('prefix', '') + o.memory.base.name
-                    return {base_name: {
-                        'name': o.memory.base.get('prefix', '') + o.memory.base.name,
-                        'value': o.memory.post_indexed.value
-                    }}
+                if "post_indexed" in o.get("memory", {}):
+                    base_name = o.memory.base.get("prefix", "") + o.memory.base.name
+                    return {
+                        base_name: {
+                            "name": o.memory.base.get("prefix", "") + o.memory.base.name,
+                            "value": o.memory.post_indexed.value,
+                        }
+                    }
            return {}

        reg_operand_names = {}  # e.g., {'rax': 'op1'}
        operand_state = {}  # e.g., {'op1': {'name': 'rax', 'value': 0}}  0 means unchanged

        for o in instruction_form.operands:
-            if 'pre_indexed' in o.get('memory', {}):
+            if "pre_indexed" in o.get("memory", {}):
                # Assuming no isa_data.operation
                if isa_data.get("operation", None) is not None:
                    raise ValueError(
                        "ISA information for pre-indexed instruction {!r} has operation set."
-                        "This is currently not supprted.".format(instruction_form.line))
-                base_name = o.memory.base.get('prefix', '') + o.memory.base.name
-                reg_operand_names = {base_name: 'op1'}
-                operand_state = {'op1': {
-                    'name': base_name,
-                    'value': o.memory.offset.value
-                }}
+                        "This is currently not supprted.".format(instruction_form.line)
+                    )
+                base_name = o.memory.base.get("prefix", "") + o.memory.base.name
+                reg_operand_names = {base_name: "op1"}
+                operand_state = {"op1": {"name": base_name, "value": o.memory.offset.value}}

-        if isa_data is not None and 'operation' in isa_data:
+        if isa_data is not None and "operation" in isa_data:
            for i, o in enumerate(instruction_form.operands):
                operand_name = "op{}".format(i + 1)
                if "register" in o:
-                    o_reg_name = o["register"].get('prefix', '') + o["register"]["name"]
+                    o_reg_name = o["register"].get("prefix", "") + o["register"]["name"]
                    reg_operand_names[o_reg_name] = operand_name
-                    operand_state[operand_name] = {
-                        'name': o_reg_name,
-                        'value': 0}
+                    operand_state[operand_name] = {"name": o_reg_name, "value": 0}
                elif "immediate" in o:
-                    operand_state[operand_name] = {'value': o["immediate"]["value"]}
+                    operand_state[operand_name] = {"value": o["immediate"]["value"]}
                elif "memory" in o:
                    # TODO lea needs some thinking about
                    pass

-            operand_changes = exec(isa_data['operation'], {}, operand_state)
+            exec(isa_data["operation"], {}, operand_state)

-        change_dict = {reg_name: operand_state.get(reg_operand_names.get(reg_name))
-                       for reg_name in dest_reg_names}
+        change_dict = {
+            reg_name: operand_state.get(reg_operand_names.get(reg_name))
+            for reg_name in dest_reg_names
+        }
        return change_dict

    def _apply_found_ISA_data(self, isa_data, operands):
@@ -231,8 +245,10 @@ class ISASemantics(object):
            if "hidden_operands" in isa_data:
                op_dict["destination"] += [
                    AttrDict.convert_dict(
-                        {hop["class"]: {k: hop[k] for k in ["class", "source", "destination"]}})
-                     for hop in isa_data["hidden_operands"]]
+                        {hop["class"]: {k: hop[k] for k in ["class", "source", "destination"]}}
+                    )
+                    for hop in isa_data["hidden_operands"]
+                ]
            return op_dict

        for i, op in enumerate(isa_data["operands"]):
--- a/osaca/semantics/kernel_dg.py
+++ b/osaca/semantics/kernel_dg.py
@@ -16,7 +16,12 @@ class KernelDG(nx.DiGraph):
    INSTRUCTION_THRESHOLD = 50

    def __init__(
-        self, parsed_kernel, parser, hw_model: MachineModel, semantics: ArchSemantics, timeout=10
+        self,
+        parsed_kernel,
+        parser,
+        hw_model: MachineModel,
+        semantics: ArchSemantics,
+        timeout=10,
    ):
        self.timed_out = False
        self.kernel = parsed_kernel
@@ -73,7 +78,7 @@ class KernelDG(nx.DiGraph):
                    else instruction_form["latency_wo_load"]
                )
                if "storeload_dep" in dep_flags:
-                    edge_weight += self.model.get('store_to_load_forward_latency', 0)
+                    edge_weight += self.model.get("store_to_load_forward_latency", 0)
                dg.add_edge(
                    instruction_form["line_number"],
                    dep["line_number"],
@@ -98,7 +103,7 @@ class KernelDG(nx.DiGraph):
        tmp_kernel = [] + kernel
        for orig_iform in kernel:
            temp_iform = copy.copy(orig_iform)
-            temp_iform['line_number'] += offset
+            temp_iform["line_number"] += offset
            tmp_kernel.append(temp_iform)
        # get dependency graph
        dg = self.create_DG(tmp_kernel)
@@ -118,12 +123,15 @@ class KernelDG(nx.DiGraph):
            with Manager() as manager:
                all_paths = manager.list()
                processes = [
-                    Process(target=self._extend_path, args=(all_paths, instr_section, dg, offset))
+                    Process(
+                        target=self._extend_path,
+                        args=(all_paths, instr_section, dg, offset),
+                    )
                    for instr_section in instrs
                ]
                for p in processes:
                    p.start()
-                if (timeout == -1):
+                if timeout == -1:
                    # no timeout
                    for p in processes:
                        p.join()
@@ -162,7 +170,7 @@ class KernelDG(nx.DiGraph):
            # extend path by edge bound latencies (e.g., store-to-load latency)
            lat_path = []
            for s, d in nx.utils.pairwise(path):
-                edge_lat = dg.edges[s, d]['latency']
+                edge_lat = dg.edges[s, d]["latency"]
                # map source node back to original line numbers
                if s >= offset:
                    s -= offset
@@ -310,17 +318,17 @@ class KernelDG(nx.DiGraph):
            if change is None or reg_state.get(reg, {}) is None:
                reg_state[reg] = None
            else:
-                reg_state.setdefault(reg, {'name': reg, 'value': 0})
-                if change['name'] != reg:
+                reg_state.setdefault(reg, {"name": reg, "value": 0})
+                if change["name"] != reg:
                    # renaming occured, ovrwrite value with up-to-now change of source register
-                    reg_state[reg]['name'] = change['name']
-                    src_reg_state = reg_state.get(change['name'], {'value': 0})
+                    reg_state[reg]["name"] = change["name"]
+                    src_reg_state = reg_state.get(change["name"], {"value": 0})
                    if src_reg_state is None:
                        # original register's state was changed beyond reconstruction
                        reg_state[reg] = None
                        continue
-                    reg_state[reg]['value'] = src_reg_state['value']
-                reg_state[reg]['value'] += change['value']
+                    reg_state[reg]["value"] = src_reg_state["value"]
+                reg_state[reg]["value"] += change["value"]
        return reg_state

    def get_dependent_instruction_forms(self, instr_form=None, line_number=None):
@@ -340,7 +348,8 @@ class KernelDG(nx.DiGraph):
        if instruction_form.semantic_operands is None:
            return is_read
        for src in chain(
-            instruction_form.semantic_operands.source, instruction_form.semantic_operands.src_dst
+            instruction_form.semantic_operands.source,
+            instruction_form.semantic_operands.src_dst,
        ):
            if "register" in src:
                is_read = self.parser.is_reg_dependend_of(register, src.register) or is_read
@@ -372,7 +381,8 @@ class KernelDG(nx.DiGraph):
        if instruction_form.semantic_operands is None:
            return False
        for src in chain(
-            instruction_form.semantic_operands.source, instruction_form.semantic_operands.src_dst
+            instruction_form.semantic_operands.source,
+            instruction_form.semantic_operands.src_dst,
        ):
            # Here we check for mem dependecies only
            if "memory" not in src:
@@ -387,23 +397,23 @@ class KernelDG(nx.DiGraph):
                addr_change -= mem.offset.value
            if mem.base and src.base:
                base_change = register_changes.get(
-                    src.base.get('prefix', '') + src.base.name,
-                    {'name': src.base.get('prefix', '') + src.base.name, 'value': 0},
+                    src.base.get("prefix", "") + src.base.name,
+                    {"name": src.base.get("prefix", "") + src.base.name, "value": 0},
                )
                if base_change is None:
                    # Unknown change occurred
                    continue
-                if mem.base.get('prefix', '') + mem.base['name'] != base_change['name']:
+                if mem.base.get("prefix", "") + mem.base["name"] != base_change["name"]:
                    # base registers do not match
                    continue
-                addr_change += base_change['value']
+                addr_change += base_change["value"]
            elif mem.base or src.base:
                # base registers do not match
                continue
            if mem.index and src.index:
                index_change = register_changes.get(
-                    src.index.get('prefix', '') + src.index.name,
-                    {'name': src.index.get('prefix', '') + src.index.name, 'value': 0},
+                    src.index.get("prefix", "") + src.index.name,
+                    {"name": src.index.get("prefix", "") + src.index.name, "value": 0},
                )
                if index_change is None:
                    # Unknown change occurred
@@ -411,10 +421,10 @@ class KernelDG(nx.DiGraph):
                if mem.scale != src.scale:
                    # scale factors do not match
                    continue
-                if mem.index.get('prefix', '') + mem.index['name'] != index_change['name']:
+                if mem.index.get("prefix", "") + mem.index["name"] != index_change["name"]:
                    # index registers do not match
                    continue
-                addr_change += index_change['value'] * src.scale
+                addr_change += index_change["value"] * src.scale
            elif mem.index or src.index:
                # index registers do not match
                continue
@@ -443,7 +453,8 @@ class KernelDG(nx.DiGraph):
                    )
        # Check also for possible pre- or post-indexing in memory addresses
        for src in chain(
-            instruction_form.semantic_operands.source, instruction_form.semantic_operands.src_dst
+            instruction_form.semantic_operands.source,
+            instruction_form.semantic_operands.src_dst,
        ):
            if "memory" in src:
                if "pre_indexed" in src.memory or "post_indexed" in src.memory:
--- a/osaca/utils.py
+++ b/osaca/utils.py
@@ -1,7 +1,10 @@
 #!/usr/bin/env python3
 import os.path

-DATA_DIRS = [os.path.expanduser("~/.osaca/data"), os.path.join(os.path.dirname(__file__), "data")]
+DATA_DIRS = [
+    os.path.expanduser("~/.osaca/data"),
+    os.path.join(os.path.dirname(__file__), "data"),
+]
 CACHE_DIR = os.path.expanduser("~/.osaca/cache")


--- a/setup.py
+++ b/setup.py
@@ -18,7 +18,8 @@ here = os.path.abspath(os.path.dirname(__file__))
 # Stolen from pip
 def read(*names, **kwargs):
    with io.open(
-        os.path.join(os.path.dirname(__file__), *names), encoding=kwargs.get("encoding", "utf8")
+        os.path.join(os.path.dirname(__file__), *names),
+        encoding=kwargs.get("encoding", "utf8"),
    ) as fp:
        return fp.read()

@@ -38,13 +39,20 @@ def _run_build_cache(dir):
    # This is run inside the install staging directory (that had no .pyc files)
    # We don't want to generate any.
    # https://github.com/eliben/pycparser/pull/135
-    check_call([sys.executable, "-B", "_build_cache.py"], cwd=os.path.join(dir, "osaca", "data"))
+    check_call(
+        [sys.executable, "-B", "_build_cache.py"],
+        cwd=os.path.join(dir, "osaca", "data"),
+    )


 class install(_install):
    def run(self):
        _install.run(self)
-        self.execute(_run_build_cache, (self.install_lib,), msg="Build ISA and architecture cache")
+        self.execute(
+            _run_build_cache,
+            (self.install_lib,),
+            msg="Build ISA and architecture cache",
+        )


 class sdist(_sdist):
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -33,7 +33,13 @@ class TestCLI(unittest.TestCase):
        with self.assertRaises(ValueError):
            osaca.check_arguments(args, parser)
        args = parser.parse_args(
-            ["--arch", "csx", "--import", "WRONG_BENCH", self._find_file("gs", "csx", "gcc")]
+            [
+                "--arch",
+                "csx",
+                "--import",
+                "WRONG_BENCH",
+                self._find_file("gs", "csx", "gcc"),
+            ]
        )
        with self.assertRaises(ValueError):
            osaca.check_arguments(args, parser)
@@ -65,7 +71,13 @@ class TestCLI(unittest.TestCase):
    def test_check_db(self):
        parser = osaca.create_parser(parser=ErrorRaisingArgumentParser())
        args = parser.parse_args(
-            ["--arch", "tx2", "--db-check", "--verbose", self._find_test_file("triad_x86_iaca.s")]
+            [
+                "--arch",
+                "tx2",
+                "--db-check",
+                "--verbose",
+                self._find_test_file("triad_x86_iaca.s"),
+            ]
        )
        output = StringIO()
        osaca.run(args, output_file=output)
@@ -134,7 +146,13 @@ class TestCLI(unittest.TestCase):
                for c in comps[a]:
                    with self.subTest(kernel=k, arch=a, comp=c):
                        args = parser.parse_args(
-                            ["--arch", a, self._find_file(k, a, c), "--export-graph", "/dev/null"]
+                            [
+                                "--arch",
+                                a,
+                                self._find_file(k, a, c),
+                                "--export-graph",
+                                "/dev/null",
+                            ]
                        )
                        output = StringIO()
                        osaca.run(args, output_file=output)
@@ -204,17 +222,13 @@ class TestCLI(unittest.TestCase):
        )
        output = StringIO()
        osaca.run(args, output_file=output)
-        self.assertTrue(
-            output.getvalue().count("WARNING: LCD analysis timed out") == 1
-        )
+        self.assertTrue(output.getvalue().count("WARNING: LCD analysis timed out") == 1)
        args = parser.parse_args(
            ["--ignore-unknown", "--lcd-timeout", "-1", self._find_test_file(kernel)]
        )
        output = StringIO()
        osaca.run(args, output_file=output)
-        self.assertTrue(
-            output.getvalue().count("WARNING: LCD analysis timed out") == 0
-        )
+        self.assertTrue(output.getvalue().count("WARNING: LCD analysis timed out") == 0)

    def test_lines_arg(self):
        # Run tests with --lines option
@@ -227,12 +241,24 @@ class TestCLI(unittest.TestCase):
        args = []
        args.append(
            parser.parse_args(
-                ["--lines", "146-154", "--arch", "csx", self._find_test_file(kernel_x86)]
+                [
+                    "--lines",
+                    "146-154",
+                    "--arch",
+                    "csx",
+                    self._find_test_file(kernel_x86),
+                ]
            )
        )
        args.append(
            parser.parse_args(
-                ["--lines", "146:154", "--arch", "csx", self._find_test_file(kernel_x86)]
+                [
+                    "--lines",
+                    "146:154",
+                    "--arch",
+                    "csx",
+                    self._find_test_file(kernel_x86),
+                ]
            )
        )
        args.append(
--- a/tests/test_db_interface.py
+++ b/tests/test_db_interface.py
@@ -17,7 +17,13 @@ class TestDBInterface(unittest.TestCase):
        sample_entry = {
            "name": "DoItRightAndDoItFast",
            "operands": [
-                {"class": "memory", "offset": "imd", "base": "gpr", "index": "gpr", "scale": 8},
+                {
+                    "class": "memory",
+                    "offset": "imd",
+                    "base": "gpr",
+                    "index": "gpr",
+                    "scale": 8,
+                },
                {"class": "register", "name": "xmm"},
            ],
            "throughput": 1.25,
@@ -35,7 +41,12 @@ class TestDBInterface(unittest.TestCase):
        del self.entry_tx2["operands"][1]["name"]
        self.entry_tx2["operands"][1]["prefix"] = "x"
        # self.entry_zen1['port_pressure'] = [1, 1, 1, 1, 0, 1, 0, 0, 0, 0.5, 1, 0.5, 1]
-        self.entry_zen1["port_pressure"] = [[4, "0123"], [1, "4"], [1, "89"], [2, ["8D", "9D"]]]
+        self.entry_zen1["port_pressure"] = [
+            [4, "0123"],
+            [1, "4"],
+            [1, "89"],
+            [2, ["8D", "9D"]],
+        ]

    ###########
    # Tests
--- a/tests/test_files/kernel_x86_memdep.s
+++ b/tests/test_files/kernel_x86_memdep.s
@@ -1,15 +1,15 @@
 # OSACA-BEGIN
 .L4:
-	vmovsd %xmm0, 8(%rax)
-	addq $8, %rax
-	vmovsd %xmm0, 8(%rax,%rcx,8)
-	vaddsd (%rax), %xmm0, %xmm0  # depends on line 3, 8(%rax) == (%rax+8)
-	subq $-8, %rax
-	vaddsd -8(%rax), %xmm0, %xmm0  # depends on line 3, 8(%rax) == -8(%rax+16)
-	dec %rcx
-	vaddsd 8(%rax,%rcx,8), %xmm0, %xmm0  # depends on line 5, 8(%rax,%rdx,8) == 8(%rax+8,%rdx-1,8)
-	movq %rcx, %rdx
-	vaddsd 8(%rax,%rdx,8), %xmm0, %xmm0  # depends on line 5, 8(%rax,%rdx,8) == 8(%rax+8,%rdx-1,8)
+	vmovsd %xmm0, 8(%rax)         # line 3          <----------------------------------+
+	addq $8, %rax                 #                                                    |
+	vmovsd %xmm0, 8(%rax,%rcx,8)  # line 5          <-----------------------------------------------+
+	vaddsd (%rax), %xmm0, %xmm0         # depends on line 3, 8(%rax) == (%rax+8)    ---+            |
+	subq $-8, %rax                      #                                              |            |
+	vaddsd -8(%rax), %xmm0, %xmm0       # depends on line 3, 8(%rax) == -8(%rax+16) ---+            |
+	dec %rcx                            #                                                           |
+	vaddsd 8(%rax,%rcx,8), %xmm0, %xmm0 # depends on line 5, 8(%rax,%rdx,8) == 8(%rax+8,%rdx-1,8) --+
+	movq %rcx, %rdx                     #                                                           |
+	vaddsd 8(%rax,%rdx,8), %xmm0, %xmm0 # depends on line 5, 8(%rax,%rdx,8) == 8(%rax+8,%rdx-1,8) --+
 	vmulsd %xmm1, %xmm0, %xmm0
 	addq $8, %rax
 	cmpq %rsi, %rax
--- a/tests/test_frontend.py
+++ b/tests/test_frontend.py
@@ -34,7 +34,8 @@ class TestFrontend(unittest.TestCase):
        )
        self.machine_model_tx2 = MachineModel(arch="tx2")
        self.semantics_csx = ArchSemantics(
-            self.machine_model_csx, path_to_yaml=os.path.join(self.MODULE_DATA_DIR, "isa/x86.yml")
+            self.machine_model_csx,
+            path_to_yaml=os.path.join(self.MODULE_DATA_DIR, "isa/x86.yml"),
        )
        self.semantics_tx2 = ArchSemantics(
            self.machine_model_tx2,
@@ -71,7 +72,11 @@ class TestFrontend(unittest.TestCase):

    def test_frontend_AArch64(self):
        dg = KernelDG(
-            self.kernel_AArch64, self.parser_AArch64, self.machine_model_tx2, self.semantics_tx2)
+            self.kernel_AArch64,
+            self.parser_AArch64,
+            self.machine_model_tx2,
+            self.semantics_tx2,
+        )
        fe = Frontend(path_to_yaml=os.path.join(self.MODULE_DATA_DIR, "tx2.yml"))
        fe.full_analysis(self.kernel_AArch64, dg, verbose=True)
        # TODO compare output with checked string
--- a/tests/test_marker_utils.py
+++ b/tests/test_marker_utils.py
@@ -109,7 +109,8 @@ class TestMarkerUtils(unittest.TestCase):
                            kernel_start = len(
                                list(
                                    filter(
-                                        None, (prologue + mov_start_var + bytes_var_1).split("\n")
+                                        None,
+                                        (prologue + mov_start_var + bytes_var_1).split("\n"),
                                    )
                                )
                            )
@@ -142,7 +143,12 @@ class TestMarkerUtils(unittest.TestCase):
        epilogue = ".LE9:\t\t#12.2\n" "call    dummy\n"
        kernel_length = len(list(filter(None, kernel.split("\n"))))

-        bytes_variations = [bytes_1_line, bytes_2_lines_1, bytes_2_lines_2, bytes_3_lines]
+        bytes_variations = [
+            bytes_1_line,
+            bytes_2_lines_1,
+            bytes_2_lines_2,
+            bytes_3_lines,
+        ]
        mov_start_variations = [mov_start_1, mov_start_2]
        mov_end_variations = [mov_end_1, mov_end_2]
        # actual tests
@@ -171,7 +177,8 @@ class TestMarkerUtils(unittest.TestCase):
                            kernel_start = len(
                                list(
                                    filter(
-                                        None, (prologue + mov_start_var + bytes_var_1).split("\n")
+                                        None,
+                                        (prologue + mov_start_var + bytes_var_1).split("\n"),
                                    )
                                )
                            )
--- a/tests/test_parser_AArch64.py
+++ b/tests/test_parser_AArch64.py
@@ -24,7 +24,9 @@ class TestParserAArch64(unittest.TestCase):

    def test_comment_parser(self):
        self.assertEqual(self._get_comment(self.parser, "// some comments"), "some comments")
-        self.assertEqual(self._get_comment(self.parser, "\t\t//AA BB CC \t end \t"), "AA BB CC end")
+        self.assertEqual(
+            self._get_comment(self.parser, "\t\t//AA BB CC \t end \t"), "AA BB CC end"
+        )
        self.assertEqual(
            self._get_comment(self.parser, "\t//// comment //// comment"),
            "// comment //// comment",
@@ -36,7 +38,8 @@ class TestParserAArch64(unittest.TestCase):
        self.assertEqual(self._get_label(self.parser, ".2.3_2_pack.3:").name, ".2.3_2_pack.3")
        self.assertEqual(self._get_label(self.parser, ".L1:\t\t\t//label1").name, ".L1")
        self.assertEqual(
-            " ".join(self._get_label(self.parser, ".L1:\t\t\t//label1").comment), "label1"
+            " ".join(self._get_label(self.parser, ".L1:\t\t\t//label1").comment),
+            "label1",
        )
        with self.assertRaises(ParseException):
            self._get_label(self.parser, "\t.cfi_startproc")
@@ -316,7 +319,8 @@ class TestParserAArch64(unittest.TestCase):
        value1 = self.parser.normalize_imd(imd_decimal_1)
        self.assertEqual(value1, self.parser.normalize_imd(imd_hex_1))
        self.assertEqual(
-            self.parser.normalize_imd(imd_decimal_2), self.parser.normalize_imd(imd_hex_2)
+            self.parser.normalize_imd(imd_decimal_2),
+            self.parser.normalize_imd(imd_hex_2),
        )
        self.assertEqual(self.parser.normalize_imd(imd_float_11), value1)
        self.assertEqual(self.parser.normalize_imd(imd_float_12), value1)
--- a/tests/test_parser_x86att.py
+++ b/tests/test_parser_x86att.py
@@ -26,7 +26,8 @@ class TestParserX86ATT(unittest.TestCase):
        self.assertEqual(self._get_comment(self.parser, "# some comments"), "some comments")
        self.assertEqual(self._get_comment(self.parser, "\t\t#AA BB CC \t end \t"), "AA BB CC end")
        self.assertEqual(
-            self._get_comment(self.parser, "\t## comment ## comment"), "# comment ## comment"
+            self._get_comment(self.parser, "\t## comment ## comment"),
+            "# comment ## comment",
        )

    def test_label_parser(self):
@@ -35,7 +36,8 @@ class TestParserX86ATT(unittest.TestCase):
        self.assertEqual(self._get_label(self.parser, ".2.3_2_pack.3:").name, ".2.3_2_pack.3")
        self.assertEqual(self._get_label(self.parser, ".L1:\t\t\t#label1").name, ".L1")
        self.assertEqual(
-            " ".join(self._get_label(self.parser, ".L1:\t\t\t#label1").comment), "label1"
+            " ".join(self._get_label(self.parser, ".L1:\t\t\t#label1").comment),
+            "label1",
        )
        with self.assertRaises(ParseException):
            self._get_label(self.parser, "\t.cfi_startproc")
@@ -47,7 +49,8 @@ class TestParserX86ATT(unittest.TestCase):
        self.assertEqual(len(self._get_directive(self.parser, "\t.align\t16,0x90").parameters), 2)
        self.assertEqual(len(self._get_directive(self.parser, ".text").parameters), 0)
        self.assertEqual(
-            len(self._get_directive(self.parser, '.file\t1 "path/to/file.c"').parameters), 2
+            len(self._get_directive(self.parser, '.file\t1 "path/to/file.c"').parameters),
+            2,
        )
        self.assertEqual(
            self._get_directive(self.parser, '.file\t1 "path/to/file.c"').parameters[1],
@@ -62,7 +65,12 @@ class TestParserX86ATT(unittest.TestCase):
                self.parser,
                "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support",
            ).parameters,
-            ["__TEXT", "__eh_frame", "coalesced", "no_toc+strip_static_syms+live_support"],
+            [
+                "__TEXT",
+                "__eh_frame",
+                "coalesced",
+                "no_toc+strip_static_syms+live_support",
+            ],
        )
        self.assertEqual(
            self._get_directive(
@@ -74,7 +82,9 @@ class TestParserX86ATT(unittest.TestCase):
            self._get_directive(self.parser, "\t.align\t16,0x90").parameters[1], "0x90"
        )
        self.assertEqual(
-            self._get_directive(self.parser, "        .byte 100,103,144       #IACA START")["name"],
+            self._get_directive(self.parser, "        .byte 100,103,144       #IACA START")[
+                "name"
+            ],
            "byte",
        )
        self.assertEqual(
@@ -242,10 +252,12 @@ class TestParserX86ATT(unittest.TestCase):
        imd_decimal_2 = {"value": "8"}
        imd_hex_2 = {"value": "8"}
        self.assertEqual(
-            self.parser.normalize_imd(imd_decimal_1), self.parser.normalize_imd(imd_hex_1)
+            self.parser.normalize_imd(imd_decimal_1),
+            self.parser.normalize_imd(imd_hex_1),
        )
        self.assertEqual(
-            self.parser.normalize_imd(imd_decimal_2), self.parser.normalize_imd(imd_hex_2)
+            self.parser.normalize_imd(imd_decimal_2),
+            self.parser.normalize_imd(imd_hex_2),
        )

    def test_reg_dependency(self):
--- a/tests/test_semantics.py
+++ b/tests/test_semantics.py
@@ -11,8 +11,14 @@ from copy import deepcopy
 import networkx as nx
 from osaca.osaca import get_unmatched_instruction_ratio
 from osaca.parser import AttrDict, ParserAArch64, ParserX86ATT
-from osaca.semantics import (INSTR_FLAGS, ArchSemantics, ISASemantics,
-                             KernelDG, MachineModel, reduce_to_section)
+from osaca.semantics import (
+    INSTR_FLAGS,
+    ArchSemantics,
+    ISASemantics,
+    KernelDG,
+    MachineModel,
+    reduce_to_section,
+)


 class TestSemanticTools(unittest.TestCase):
@@ -66,7 +72,8 @@ class TestSemanticTools(unittest.TestCase):
        )
        cls.semantics_x86 = ISASemantics("x86")
        cls.semantics_csx = ArchSemantics(
-            cls.machine_model_csx, path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/x86.yml")
+            cls.machine_model_csx,
+            path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/x86.yml"),
        )
        cls.semantics_aarch64 = ISASemantics("aarch64")
        cls.semantics_tx2 = ArchSemantics(
@@ -173,7 +180,12 @@ class TestSemanticTools(unittest.TestCase):
        )
        self.assertEqual(
            test_mm_x86.get_store_throughput(
-                {"base": {"prefix": "NOT_IN_DB"}, "offset": None, "index": "NOT_NONE", "scale": 1}
+                {
+                    "base": {"prefix": "NOT_IN_DB"},
+                    "offset": None,
+                    "index": "NOT_NONE",
+                    "scale": 1,
+                }
            ),
            [[1, "23"], [1, "4"]],
        )
@@ -185,7 +197,12 @@ class TestSemanticTools(unittest.TestCase):
        )
        self.assertEqual(
            test_mm_arm.get_store_throughput(
-                {"base": {"prefix": "NOT_IN_DB"}, "offset": None, "index": None, "scale": 1}
+                {
+                    "base": {"prefix": "NOT_IN_DB"},
+                    "offset": None,
+                    "index": None,
+                    "scale": 1,
+                }
            ),
            [[1, "34"], [1, "5"]],
        )
@@ -310,7 +327,10 @@ class TestSemanticTools(unittest.TestCase):

    def test_memdependency_x86(self):
        dg = KernelDG(
-            self.kernel_x86_memdep, self.parser_x86, self.machine_model_csx, self.semantics_csx
+            self.kernel_x86_memdep,
+            self.parser_x86,
+            self.machine_model_csx,
+            self.semantics_csx,
        )
        self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg))
        self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=3)), {6, 8})
@@ -322,7 +342,10 @@ class TestSemanticTools(unittest.TestCase):

    def test_kernelDG_AArch64(self):
        dg = KernelDG(
-            self.kernel_AArch64, self.parser_AArch64, self.machine_model_tx2, self.semantics_tx2
+            self.kernel_AArch64,
+            self.parser_AArch64,
+            self.machine_model_tx2,
+            self.semantics_tx2,
        )
        self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg))
        self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=3)), {7, 8})
@@ -400,7 +423,7 @@ class TestSemanticTools(unittest.TestCase):
        # based on line 6
        self.assertEqual(lc_deps[6]["latency"], 28.0)
        self.assertEqual(
-            [(iform.line_number, lat) for iform, lat in lc_deps[6]['dependencies']],
+            [(iform.line_number, lat) for iform, lat in lc_deps[6]["dependencies"]],
            [(6, 4.0), (10, 6.0), (11, 6.0), (12, 6.0), (13, 6.0), (14, 0)],
        )

@@ -423,7 +446,8 @@ class TestSemanticTools(unittest.TestCase):
        # w/o flag dependencies: ID 5 w/ len=1
        # TODO discuss
        self.assertEqual(
-            lc_deps[lcd_id2]["root"], dg.dg.nodes(data=True)[lcd_id2]["instruction_form"]
+            lc_deps[lcd_id2]["root"],
+            dg.dg.nodes(data=True)[lcd_id2]["instruction_form"],
        )
        self.assertEqual(len(lc_deps[lcd_id2]["dependencies"]), 1)
        self.assertEqual(
@@ -438,7 +462,7 @@ class TestSemanticTools(unittest.TestCase):
            self.parser_x86,
            self.machine_model_csx,
            self.semantics_x86,
-            timeout=10
+            timeout=10,
        )
        end_time = time.perf_counter()
        time_10 = end_time - start_time
@@ -448,7 +472,7 @@ class TestSemanticTools(unittest.TestCase):
            self.parser_x86,
            self.machine_model_csx,
            self.semantics_x86,
-            timeout=2
+            timeout=2,
        )
        end_time = time.perf_counter()
        time_2 = end_time - start_time
--- a/validation/build_and_run.py
+++ b/validation/build_and_run.py
@@ -1,33 +1,26 @@
 #!/usr/bin/env python3
-import sys
 import os
-import re
-from subprocess import check_call, check_output, CalledProcessError, STDOUT
-from itertools import chain
-import shutil
-from functools import lru_cache
-from glob import glob
-from pathlib import Path
-from pprint import pprint
-import socket
 import pickle
+import re
+import shutil
+import socket
+import sys
 from copy import deepcopy
+from glob import glob
+from itertools import chain
+from pathlib import Path
+from subprocess import STDOUT, CalledProcessError, check_call, check_output

 import requests
-import numpy as np
-import pandas as pd
-
-from osaca.osaca import reduce_to_section
-
-from kerncraft.models import benchmark
 from kerncraft.incore_model import (
-    parse_asm,
    asm_instrumentation,
    iaca_analyse_instrumented_binary,
+    llvm_mca_analyse_instrumented_assembly,
    osaca_analyse_instrumented_assembly,
-    llvm_mca_analyse_instrumented_assembly
+    parse_asm,
 )
-
+from kerncraft.models import benchmark
+from osaca.osaca import reduce_to_section

 # Scaling of inner dimension for 1D, 2D and 3D kernels
 #  * consider kernels to be compiled with multiple compilers and different options
@@ -39,37 +32,50 @@ from kerncraft.incore_model import (
 # Collect inner loop body assembly for each kernel/compiler/options combination
 #  * analyze with OSACA, IACA and LLVM-MCA

-hosts_arch_map = {r"skylakesp2": "SKX",
-                  r"ivyep1": "IVB",
-                  r"naples1": "ZEN",
-                  r"rome1": "ZEN2",
-                  r"warmup": "TX2",
-                  r"qp4-node-[0-9]+": "A64FX"}
+hosts_arch_map = {
+    r"skylakesp2": "SKX",
+    r"ivyep1": "IVB",
+    r"naples1": "ZEN",
+    r"rome1": "ZEN2",
+    r"warmup": "TX2",
+    r"qp4-node-[0-9]+": "A64FX",
+}

 arch_info = {
-    'SKX': {
-        'prepare': ['likwid-setFrequencies -f 2.4 -t 0'.split()],
-        'IACA': 'SKX',
-        'OSACA': 'SKX',
-        'LLVM-MCA': '-mcpu=skylake-avx512',
-        'Ithemal': 'skl',
-        'isa': 'x86',
-        'perfevents': [],
+    "SKX": {
+        "prepare": ["likwid-setFrequencies -f 2.4 -t 0".split()],
+        "IACA": "SKX",
+        "OSACA": "SKX",
+        "LLVM-MCA": "-mcpu=skylake-avx512",
+        "Ithemal": "skl",
+        "isa": "x86",
+        "perfevents": [],
        "cflags": {
-            'icc': {
-                "Ofast": "-Ofast -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline -ffreestanding -falign-loops".split(),
-                "O3": "-O3 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline -ffreestanding -falign-loops".split(),
-                "O2": "-O2 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline -ffreestanding -falign-loops".split(),
-                "O1": "-O1 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline -ffreestanding -falign-loops".split(),
+            "icc": {
+                "Ofast": (
+                    "-Ofast -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline "
+                    "-ffreestanding -falign-loops"
+                ).split(),
+                "O3": (
+                    "-O3 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline "
+                    "-ffreestanding -falign-loops"
+                ).split(),
+                "O2": (
+                    "-O2 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline "
+                    "-ffreestanding -falign-loops"
+                ).split(),
+                "O1": (
+                    "-O1 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline "
+                    "-ffreestanding -falign-loops"
+                ).split(),
            },
-            'clang': {
+            "clang": {
                "Ofast": "-Ofast -march=skylake-avx512 -ffreestanding".split(),
                "O3": "-O3 -march=skylake-avx512 -ffreestanding".split(),
                "O2": "-O2 -march=skylake-avx512 -ffreestanding".split(),
                "O1": "-O1 -march=skylake-avx512 -ffreestanding".split(),
-                
            },
-            'gcc': {
+            "gcc": {
                "Ofast": "-Ofast -march=skylake-avx512 -lm -ffreestanding -falign-loops=16".split(),
                "O3": "-O3 -march=skylake-avx512 -lm -ffreestanding -falign-loops=16".split(),
                "O2": "-O2 -march=skylake-avx512 -lm -ffreestanding -falign-loops=16".split(),
@@ -77,17 +83,19 @@ arch_info = {
            },
        },
    },
-    'IVB': {
-        'prepare': ['likwid-setFrequencies -f 3.0 -t 0'.split()],
-        'IACA': 'IVB',
-        'OSACA': 'IVB',
-        'LLVM-MCA': '-mcpu=ivybridge',
-        'Ithemal': 'ivb',
-        'isa': 'x86',
-        'perfevents': [],
+    "IVB": {
+        "prepare": ["likwid-setFrequencies -f 3.0 -t 0".split()],
+        "IACA": "IVB",
+        "OSACA": "IVB",
+        "LLVM-MCA": "-mcpu=ivybridge",
+        "Ithemal": "ivb",
+        "isa": "x86",
+        "perfevents": [],
        "cflags": {
            "icc": {
-                "Ofast": "-Ofast -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
+                "Ofast": (
+                    "-Ofast -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops"
+                ).split(),
                "O3": "-O3 -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
                "O2": "-O2 -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
                "O1": "-O1 -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
@@ -106,14 +114,14 @@ arch_info = {
            },
        },
    },
-    'ZEN': {
-        'prepare': ['likwid-setFrequencies -f 2.3 -t 0'.split()],
-        'IACA': None,
-        'OSACA': 'ZEN1',
-        'LLVM-MCA': '-mcpu=znver1',
-        'Ithemal': None,
-        'isa': 'x86',
-        'perfevents': [],
+    "ZEN": {
+        "prepare": ["likwid-setFrequencies -f 2.3 -t 0".split()],
+        "IACA": None,
+        "OSACA": "ZEN1",
+        "LLVM-MCA": "-mcpu=znver1",
+        "Ithemal": None,
+        "isa": "x86",
+        "perfevents": [],
        "cflags": {
            "clang": {
                "Ofast": "-Ofast -march=znver1 -ffreestanding".split(),
@@ -128,21 +136,23 @@ arch_info = {
                "O1": "-O1 -march=znver1 -ffreestanding -falign-loops=16".split(),
            },
            "icc": {
-                "Ofast": "-Ofast -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
+                "Ofast": (
+                    "-Ofast -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops"
+                ).split(),
                "O3": "-O3 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
                "O2": "-O2 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
                "O1": "-O1 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
            },
        },
    },
-    'ZEN2': {
-        'prepare': ['likwid-setFrequencies -f 2.35 -t 0'.split()],
-        'IACA': None,
-        'OSACA': 'ZEN2',
-        'LLVM-MCA': '-mcpu=znver2',
-        'Ithemal': None,
-        'isa': 'x86',
-        'perfevents': [],
+    "ZEN2": {
+        "prepare": ["likwid-setFrequencies -f 2.35 -t 0".split()],
+        "IACA": None,
+        "OSACA": "ZEN2",
+        "LLVM-MCA": "-mcpu=znver2",
+        "Ithemal": None,
+        "isa": "x86",
+        "perfevents": [],
        "cflags": {
            "clang": {
                "Ofast": "-Ofast -march=znver2 -ffreestanding".split(),
@@ -157,22 +167,24 @@ arch_info = {
                "O1": "-O1 -march=znver2 -ffreestanding -falign-loops=16".split(),
            },
            "icc": {
-                "Ofast": "-Ofast -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
+                "Ofast": (
+                    "-Ofast -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops"
+                ).split(),
                "O3": "-O3 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
                "O2": "-O2 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
                "O1": "-O1 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
            },
        },
    },
-    'TX2': {
-        'Clock [MHz]': 2200,  # reading out via perf. counters is not supported
-        'IACA': None,
-        'OSACA': 'TX2',
-        'assign_optimal_throughput': True,
-        'LLVM-MCA': '-mcpu=thunderx2t99 -march=aarch64',
-        'Ithemal': None,
-        'isa': 'aarch64',
-        'perfevents': [],
+    "TX2": {
+        "Clock [MHz]": 2200,  # reading out via perf. counters is not supported
+        "IACA": None,
+        "OSACA": "TX2",
+        "assign_optimal_throughput": True,
+        "LLVM-MCA": "-mcpu=thunderx2t99 -march=aarch64",
+        "Ithemal": None,
+        "isa": "aarch64",
+        "perfevents": [],
        "cflags": {
            "clang": {
                "Ofast": "-Ofast -target aarch64-unknown-linux-gnu -ffreestanding".split(),
@@ -188,16 +200,16 @@ arch_info = {
            },
        },
    },
-    'A64FX': {
-        'Clock [MHz]': 1800,  # reading out via perf. counters is not supported
-        'L2_volume_metric': 'L1<->L2 data volume [GBytes]',
-        'IACA': None,
-        'OSACA': 'A64FX',
-        'assign_optimal_throughput': False,
-        'LLVM-MCA': '-mcpu=a64fx -march=aarch64',
-        'Ithemal': None,
-        'isa': 'aarch64',
-        'perfevents': [],
+    "A64FX": {
+        "Clock [MHz]": 1800,  # reading out via perf. counters is not supported
+        "L2_volume_metric": "L1<->L2 data volume [GBytes]",
+        "IACA": None,
+        "OSACA": "A64FX",
+        "assign_optimal_throughput": False,
+        "LLVM-MCA": "-mcpu=a64fx -march=aarch64",
+        "Ithemal": None,
+        "isa": "aarch64",
+        "perfevents": [],
        "cflags": {
            "gcc": {
                "Ofast": "-Ofast -msve-vector-bits=512 -march=armv8.2-a+sve -ffreestanding".split(),
@@ -211,7 +223,7 @@ arch_info = {
                "O2": "-O2 -target aarch64-unknown-linux-gnu -ffreestanding".split(),
                "O1": "-O1 -target aarch64-unknown-linux-gnu -ffreestanding".split(),
            },
-        }
+        },
    },
 }

@@ -231,12 +243,13 @@ def get_kernels(kernels=None):
    if kernels is None:
        kernels = []
        for f in glob("kernels/*.c"):
-            f = f.rsplit('.', 1)[0].split('/', 1)[1]
+            f = f.rsplit(".", 1)[0].split("/", 1)[1]
            if f == "dummy":
                continue
            kernels.append(f)
    return kernels

+
 # Columns:
 # arch
 # kernel
@@ -259,6 +272,7 @@ def get_kernels(kernels=None):
 # allruns [list (length, repetitions, cy/it, L2 B/it)]
 # perfevents [dict event: counter/it]

+
 def build_mark_run_all_kernels(measurements=True, osaca=True, iaca=True, llvm_mca=True):
    arch = get_current_arch()
    if arch is None:
@@ -268,90 +282,132 @@ def build_mark_run_all_kernels(measurements=True, osaca=True, iaca=True, llvm_mc
        islocal = True
        arches = [arch]
        ainfo = arch_info.get(arch)
-        if 'prepare' in ainfo:
-            for cmd in ainfo['prepare']:
+        if "prepare" in ainfo:
+            for cmd in ainfo["prepare"]:
                check_call(cmd)
    for arch in arches:
        ainfo = arch_info.get(arch)
        print(arch)
        data_path = Path(f"build/{arch}/data.pkl")
        if data_path.exists():
-            with data_path.open('rb') as f:
+            with data_path.open("rb") as f:
                data = pickle.load(f)
        else:
            data = []
        data_lastsaved = deepcopy(data)
-        for compiler, compiler_cflags in ainfo['cflags'].items():
+        for compiler, compiler_cflags in ainfo["cflags"].items():
            if not shutil.which(compiler) and islocal:
                print(compiler, "not found in path! Skipping...")
                continue
            for cflags_name, cflags in compiler_cflags.items():
                for kernel in get_kernels():
-                    print(f"{kernel:<15} {arch:>5} {compiler:>5} {cflags_name:>6}",
-                        end=": ", flush=True)
-                    row = list([r for r in data
-                                if r['arch'] == arch and r['kernel'] == kernel and
-                                r['compiler'] == compiler and r['cflags_name'] == cflags_name])
+                    print(
+                        f"{kernel:<15} {arch:>5} {compiler:>5} {cflags_name:>6}",
+                        end=": ",
+                        flush=True,
+                    )
+                    row = list(
+                        [
+                            r
+                            for r in data
+                            if r["arch"] == arch
+                            and r["kernel"] == kernel
+                            and r["compiler"] == compiler
+                            and r["cflags_name"] == cflags_name
+                        ]
+                    )
                    if row:
                        row = row[0]
                    else:
-                        orig_row = None
                        row = {
-                            'arch': arch,
-                            'kernel': kernel,
-                            'compiler': compiler,
-                            'cflags_name': cflags_name,
-                            'element_size': 8,
+                            "arch": arch,
+                            "kernel": kernel,
+                            "compiler": compiler,
+                            "cflags_name": cflags_name,
+                            "element_size": 8,
                        }
                        data.append(row)

                    # Build
                    print("build", end="", flush=True)
                    asm_path, exec_path, overwrite = build_kernel(
-                        kernel, arch, compiler, cflags, cflags_name, dontbuild=not islocal)
+                        kernel,
+                        arch,
+                        compiler,
+                        cflags,
+                        cflags_name,
+                        dontbuild=not islocal,
+                    )

                    if overwrite:
                        # clear all measurment information
-                        row['best_length'] = None
-                        row['best_runtime'] = None
-                        row['L2_traffic'] = None
-                        row['allruns'] = None
-                        row['perfevents'] = None
+                        row["best_length"] = None
+                        row["best_runtime"] = None
+                        row["L2_traffic"] = None
+                        row["allruns"] = None
+                        row["perfevents"] = None

                    # Mark for IACA, OSACA and LLVM-MCA
                    print("mark", end="", flush=True)
                    try:
-                        marked_asmfile, marked_objfile, row['pointer_increment'], overwrite = mark(
-                            asm_path, compiler, cflags, isa=ainfo['isa'], overwrite=overwrite)
-                        row['marking_error'] = None
+                        (
+                            marked_asmfile,
+                            marked_objfile,
+                            row["pointer_increment"],
+                            overwrite,
+                        ) = mark(
+                            asm_path,
+                            compiler,
+                            cflags,
+                            isa=ainfo["isa"],
+                            overwrite=overwrite,
+                        )
+                        row["marking_error"] = None
                    except ValueError as e:
-                        row['marking_error'] = str(e)
+                        row["marking_error"] = str(e)
                        print(":", e)
                        continue

                    if overwrite:
                        # clear all model generated information
-                        for model in ['IACA', 'OSACA', 'LLVM-MCA', 'Ithemal']:
-                            for k in ['ports', 'prediction', 'throughput', 'cp', 'lcd', 'raw']:
-                                row[model+'_'+k] = None
-                    
-                    for model in ['IACA', 'OSACA', 'LLVM-MCA', 'Ithemal']:
-                        for k in ['ports', 'prediction', 'throughput', 'cp', 'lcd', 'raw']:
-                            if model+'_'+k not in row:
-                                row[model+'_'+k] = None
+                        for model in ["IACA", "OSACA", "LLVM-MCA", "Ithemal"]:
+                            for k in [
+                                "ports",
+                                "prediction",
+                                "throughput",
+                                "cp",
+                                "lcd",
+                                "raw",
+                            ]:
+                                row[model + "_" + k] = None
+
+                    for model in ["IACA", "OSACA", "LLVM-MCA", "Ithemal"]:
+                        for k in [
+                            "ports",
+                            "prediction",
+                            "throughput",
+                            "cp",
+                            "lcd",
+                            "raw",
+                        ]:
+                            if model + "_" + k not in row:
+                                row[model + "_" + k] = None

                    # Analyze with IACA, if requested and configured
-                    if iaca and ainfo['IACA'] is not None:
+                    if iaca and ainfo["IACA"] is not None:
                        print("IACA", end="", flush=True)
-                        if not row.get('IACA_ports'):
-                            row['IACA_raw'] = iaca_analyse_instrumented_binary(
-                                marked_objfile, micro_architecture=ainfo['IACA'])
-                            row['IACA_ports'] = \
-                                {k: v/(row['pointer_increment']/row['element_size'])
-                                for k,v in row['IACA_raw']['port cycles'].items()}
-                            row['IACA_prediction'] = row['IACA_raw']['throughput']/(
-                                row['pointer_increment']/row['element_size'])
-                            row['IACA_throughput'] = max(row['IACA_ports'].values())
+                        if not row.get("IACA_ports"):
+                            row["IACA_raw"] = iaca_analyse_instrumented_binary(
+                                marked_objfile, micro_architecture=ainfo["IACA"]
+                            )
+                            row["IACA_ports"] = {
+                                k: v / (row["pointer_increment"] / row["element_size"])
+                                for k, v in row["IACA_raw"]["port cycles"].items()
+                            }
+                            row["IACA_prediction"] = row["IACA_raw"]["throughput"] / (
+                                row["pointer_increment"] / row["element_size"]
+                            )
+                            row["IACA_throughput"] = max(row["IACA_ports"].values())
                            print(". ", end="", flush=True)
                        else:
                            print("! ", end="", flush=True)
@@ -359,56 +415,70 @@ def build_mark_run_all_kernels(measurements=True, osaca=True, iaca=True, llvm_mc
                    # Analyze with OSACA, if requested
                    if osaca:
                        print("OSACA", end="", flush=True)
-                        if not row.get('OSACA_ports'):
-                            row['OSACA_raw'] = osaca_analyse_instrumented_assembly(
-                                marked_asmfile, micro_architecture=ainfo['OSACA'],
-                                assign_optimal_throughput=ainfo.get('assign_optimal_throughput',
-                                                                    True))
-                            row['OSACA_ports'] = \
-                                {k: v/(row['pointer_increment']/row['element_size'])
-                                for k,v in row['OSACA_raw']['port cycles'].items()}
-                            row['OSACA_prediction'] = row['OSACA_raw']['throughput']/(
-                                row['pointer_increment']/row['element_size'])
-                            row['OSACA_throughput'] = max(row['OSACA_ports'].values())
-                            row['OSACA_cp'] = row['OSACA_raw']['cp_latency']/(
-                                row['pointer_increment']/row['element_size'])
-                            row['OSACA_lcd'] = row['OSACA_raw']['lcd']/(
-                                row['pointer_increment']/row['element_size'])
+                        if not row.get("OSACA_ports"):
+                            row["OSACA_raw"] = osaca_analyse_instrumented_assembly(
+                                marked_asmfile,
+                                micro_architecture=ainfo["OSACA"],
+                                assign_optimal_throughput=ainfo.get(
+                                    "assign_optimal_throughput", True
+                                ),
+                            )
+                            row["OSACA_ports"] = {
+                                k: v / (row["pointer_increment"] / row["element_size"])
+                                for k, v in row["OSACA_raw"]["port cycles"].items()
+                            }
+                            row["OSACA_prediction"] = row["OSACA_raw"]["throughput"] / (
+                                row["pointer_increment"] / row["element_size"]
+                            )
+                            row["OSACA_throughput"] = max(row["OSACA_ports"].values())
+                            row["OSACA_cp"] = row["OSACA_raw"]["cp_latency"] / (
+                                row["pointer_increment"] / row["element_size"]
+                            )
+                            row["OSACA_lcd"] = row["OSACA_raw"]["lcd"] / (
+                                row["pointer_increment"] / row["element_size"]
+                            )
                            print(". ", end="", flush=True)
                        else:
                            print("! ", end="", flush=True)

                    # Analyze with LLVM-MCA, if requested and configured
-                    if llvm_mca and ainfo['LLVM-MCA'] is not None:
+                    if llvm_mca and ainfo["LLVM-MCA"] is not None:
                        print("LLVM-MCA", end="", flush=True)
-                        if not row.get('LLVM-MCA_ports'):
-                            row['LLVM-MCA_raw'] = llvm_mca_analyse_instrumented_assembly(
+                        if not row.get("LLVM-MCA_ports"):
+                            row["LLVM-MCA_raw"] = llvm_mca_analyse_instrumented_assembly(
                                marked_asmfile,
-                                micro_architecture=ainfo['LLVM-MCA'],
-                                isa=ainfo['isa'])
-                            row['LLVM-MCA_ports'] = \
-                                {k: v/(row['pointer_increment']/row['element_size'])
-                                for k,v in row['LLVM-MCA_raw']['port cycles'].items()}
-                            row['LLVM-MCA_prediction'] =row['LLVM-MCA_raw']['throughput']/(
-                                row['pointer_increment']/row['element_size'])
-                            row['LLVM-MCA_throughput'] = max(row['LLVM-MCA_ports'].values())
-                            row['LLVM-MCA_cp'] = row['LLVM-MCA_raw']['cp_latency']/(
-                                row['pointer_increment']/row['element_size'])
-                            row['LLVM-MCA_lcd'] = row['LLVM-MCA_raw']['lcd']/(
-                                row['pointer_increment']/row['element_size'])
+                                micro_architecture=ainfo["LLVM-MCA"],
+                                isa=ainfo["isa"],
+                            )
+                            row["LLVM-MCA_ports"] = {
+                                k: v / (row["pointer_increment"] / row["element_size"])
+                                for k, v in row["LLVM-MCA_raw"]["port cycles"].items()
+                            }
+                            row["LLVM-MCA_prediction"] = row["LLVM-MCA_raw"]["throughput"] / (
+                                row["pointer_increment"] / row["element_size"]
+                            )
+                            row["LLVM-MCA_throughput"] = max(row["LLVM-MCA_ports"].values())
+                            row["LLVM-MCA_cp"] = row["LLVM-MCA_raw"]["cp_latency"] / (
+                                row["pointer_increment"] / row["element_size"]
+                            )
+                            row["LLVM-MCA_lcd"] = row["LLVM-MCA_raw"]["lcd"] / (
+                                row["pointer_increment"] / row["element_size"]
+                            )
                            print(". ", end="", flush=True)
                        else:
                            print("! ", end="", flush=True)
-                    
+
                    # Analyze with Ithemal, if not running local and configured
-                    if ainfo['Ithemal'] is not None and not islocal:
+                    if ainfo["Ithemal"] is not None and not islocal:
                        print("Ithemal", end="", flush=True)
-                        if not row.get('Ithemal_prediction'):
+                        if not row.get("Ithemal_prediction"):
                            with open(marked_asmfile) as f:
-                                parsed_code = parse_asm(f.read(), ainfo['isa'])
-                            kernel = reduce_to_section(parsed_code, ainfo['isa'])
-                            row['Ithemal_prediction'] = get_ithemal_prediction(
-                                get_intel_style_code(marked_objfile), model=ainfo['Ithemal'])
+                                parsed_code = parse_asm(f.read(), ainfo["isa"])
+                            kernel = reduce_to_section(parsed_code, ainfo["isa"])
+                            row["Ithemal_prediction"] = get_ithemal_prediction(
+                                get_intel_style_code(marked_objfile),
+                                model=ainfo["Ithemal"],
+                            )
                            print(". ", end="", flush=True)
                        else:
                            print("! ", end="", flush=True)
@@ -416,43 +486,45 @@ def build_mark_run_all_kernels(measurements=True, osaca=True, iaca=True, llvm_mc
                    if measurements and islocal:
                        # run measurements if on same hardware
                        print("scale", end="", flush=True)
-                        if not row.get('allruns'):
+                        if not row.get("allruns"):
                            # find best length with concurrent L2 measurement
                            scaling_runs, best = scalingrun(exec_path)
-                            row['best_length'] = best[0]
-                            row['best_runtime'] = best[2]
-                            row['L2_traffic'] = best[3]
-                            row['allruns'] = scaling_runs
+                            row["best_length"] = best[0]
+                            row["best_runtime"] = best[2]
+                            row["L2_traffic"] = best[3]
+                            row["allruns"] = scaling_runs
                            print(f"({best[0]}). ", end="", flush=True)
                        else:
-                            print(f"({row.get('best_length', None)})! ", end="", flush=True)
+                            print(
+                                f"({row.get('best_length', None)})! ",
+                                end="",
+                                flush=True,
+                            )

                    print()

                # dump to file
                if data != data_lastsaved:
-                    print('saving... ', end="", flush=True)
-                    with data_path.open('wb') as f:
+                    print("saving... ", end="", flush=True)
+                    with data_path.open("wb") as f:
                        try:
                            pickle.dump(data, f)
                            data_lastsaved = deepcopy(data)
-                            print('saved!')
+                            print("saved!")
                        except KeyboardInterrupt:
                            f.seek(0)
                            pickle.dump(data, f)
-                            print('saved!')
+                            print("saved!")
                            sys.exit()


-
-def scalingrun(kernel_exec, total_iterations=25000000, lengths=range(8, 1*1024+1)):
-    #print('{:>8} {:>10} {:>10}'.format("x", "cy/it", "L2 B/it"))
-    parameters = chain(*[[total_iterations//i, i] for i in lengths])
+def scalingrun(kernel_exec, total_iterations=25000000, lengths=range(8, 1 * 1024 + 1)):
+    # print('{:>8} {:>10} {:>10}'.format("x", "cy/it", "L2 B/it"))
+    parameters = chain(*[[total_iterations // i, i] for i in lengths])
    # TODO use arch specific events and grooup
-    r, o = perfctr(chain([kernel_exec], map(str, parameters)),
-                1, group="L2")
+    r, o = perfctr(chain([kernel_exec], map(str, parameters)), 1, group="L2")
    global_infos = {}
-    for m in [re.match(r"(:?([a-z_\-0-9]+):)?([a-z]+): ([a-z\_\-0-9]+)", l) for l in o]:
+    for m in [re.match(r"(:?([a-z_\-0-9]+):)?([a-z]+): ([a-z\_\-0-9]+)", line) for line in o]:
        if m is not None:
            try:
                v = int(m.group(4))
@@ -464,37 +536,45 @@ def scalingrun(kernel_exec, total_iterations=25000000, lengths=range(8, 1*1024+1
                r[m.group(2)][m.group(3)] = v

    results = []
-    best = (float('inf'), None)
+    best = (float("inf"), None)
    for markername, mmetrics in r.items():
-        kernelname, repetitions, *_, xlength = markername.split('_')
+        kernelname, repetitions, *_, xlength = markername.split("_")
        repetitions = int(repetitions)
        xlength = int(xlength)
-        total_iterations = mmetrics['repetitions'] * mmetrics['iterations']
-        if 'Clock [MHz]' in mmetrics:
-            clock_hz = mmetrics['Clock [MHz]']*1e6
+        total_iterations = mmetrics["repetitions"] * mmetrics["iterations"]
+        if "Clock [MHz]" in mmetrics:
+            clock_hz = mmetrics["Clock [MHz]"] * 1e6
        else:
-            clock_hz = arch_info[get_current_arch()]['Clock [MHz]']*1e6
-        cyperit = mmetrics['Runtime (RDTSC) [s]'] * clock_hz / total_iterations
+            clock_hz = arch_info[get_current_arch()]["Clock [MHz]"] * 1e6
+        cyperit = mmetrics["Runtime (RDTSC) [s]"] * clock_hz / total_iterations
        # TODO use arch specific events and grooup
-        if 'L2D load data volume [GBytes]' in mmetrics:
-            l2perit = (mmetrics['L2D load data volume [GBytes]'] +
-                       mmetrics.get('L2D evict data volume [GBytes]', 0))*1e9 / total_iterations
+        if "L2D load data volume [GBytes]" in mmetrics:
+            l2perit = (
+                (
+                    mmetrics["L2D load data volume [GBytes]"]
+                    + mmetrics.get("L2D evict data volume [GBytes]", 0)
+                )
+                * 1e9
+                / total_iterations
+            )
        else:
-            l2perit = \
-                mmetrics[arch_info[get_current_arch()]['L2_volume_metric']]*1e9 / total_iterations
-        results.append(
-            (xlength, repetitions, cyperit, l2perit)
-        )
+            l2perit = (
+                mmetrics[arch_info[get_current_arch()]["L2_volume_metric"]]
+                * 1e9
+                / total_iterations
+            )
+        results.append((xlength, repetitions, cyperit, l2perit))
        if cyperit < best[0]:
            best = cyperit, results[-1]
    return results, best[1]

+
 def mark(asm_path, compiler, cflags, isa, overwrite=False):
    # Mark assembly for IACA, OSACA and LLVM-MCA
    marked_asm_path = Path(asm_path).with_suffix(".marked.s")
    if not marked_asm_path.exists() or overwrite:
        overwrite = True
-        with open(asm_path) as fa, open(marked_asm_path, 'w') as fm:
+        with open(asm_path) as fa, open(marked_asm_path, "w") as fm:
            try:
                _, pointer_increment = asm_instrumentation(fa, fm, isa=isa)
            except KeyboardInterrupt:
@@ -505,37 +585,46 @@ def mark(asm_path, compiler, cflags, isa, overwrite=False):
        # use maked assembly and extract asm_block and pointer_increment
        with open(marked_asm_path) as f:
            marked_asm = f.read()
-        m = re.search(r'pointer_increment=([0-9]+)', marked_asm)
+        m = re.search(r"pointer_increment=([0-9]+)", marked_asm)
        if m:
            pointer_increment = int(m.group(1))
        else:
            os.unlink(marked_asm_path)
            raise ValueError(
-                "Could not find `pointer_increment=<byte increment>`. Plase place into file.")
+                "Could not find `pointer_increment=<byte increment>`. Plase place into file."
+            )
        print("! ", end="", flush=True)

    # Compile marked assembly to object for IACA
    marked_obj = Path(asm_path).with_suffix(".marked.o")
    if not marked_obj.exists():
-        check_call([compiler] + ['-c', str(marked_asm_path), '-o', str(marked_obj)])
-    
+        check_call([compiler] + ["-c", str(marked_asm_path), "-o", str(marked_obj)])
+
    return str(marked_asm_path), str(marked_obj), pointer_increment, overwrite


-def build_kernel(kernel, architecture, compiler, cflags, cflags_name, overwrite=False,
-                 dontbuild=False):
+def build_kernel(
+    kernel,
+    architecture,
+    compiler,
+    cflags,
+    cflags_name,
+    overwrite=False,
+    dontbuild=False,
+):
    build_path = f"build/{architecture}/{compiler}/{cflags_name}"
    kernel_assembly = f"{build_path}/{kernel}.s"
-    kernel_object= f"{build_path}/{kernel}.o"
+    kernel_object = f"{build_path}/{kernel}.o"
    executable = f"{build_path}/{kernel}"
    Path(build_path).mkdir(parents=True, exist_ok=True)

    if not overwrite:
        # Overwrite if any kernel specific file is missing
        overwrite = (
-            not os.path.exists(kernel_object) or 
-            not os.path.exists(kernel_assembly) or
-            not os.path.exists(executable))
+            not os.path.exists(kernel_object)
+            or not os.path.exists(kernel_assembly)
+            or not os.path.exists(executable)
+        )

    if dontbuild and overwrite:
        raise ValueError("Must build, but not allowed.")
@@ -545,39 +634,43 @@ def build_kernel(kernel, architecture, compiler, cflags, cflags_name, overwrite=

    if not Path(f"{build_path}/compiler_version").exists():
        # Document compiler version
-        with open(f"{build_path}/compiler_version", 'w') as f:
-            f.write(check_output([compiler, "-v"], encoding='utf8', stderr=STDOUT))
+        with open(f"{build_path}/compiler_version", "w") as f:
+            f.write(check_output([compiler, "-v"], encoding="utf8", stderr=STDOUT))

    if overwrite:
        # build object + assembly
-        check_call([compiler] +
-                   cflags +
-                   ["-c", f"kernels/{kernel}.c", "-o", kernel_object])
-        check_call([compiler] +
-                   cflags +
-                   ["-c", f"kernels/{kernel}.c", "-S", "-o", kernel_assembly])
+        check_call([compiler] + cflags + ["-c", f"kernels/{kernel}.c", "-o", kernel_object])
+        check_call(
+            [compiler] + cflags + ["-c", f"kernels/{kernel}.c", "-S", "-o", kernel_assembly]
+        )

        # build main and link executable
        executable_cflags = [
            os.environ["LIKWID_DEFINES"],
            os.environ["LIKWID_INC"],
-            os.environ["LIKWID_LIB"]
-        ] + ['-Ofast']
-        check_call([compiler] + executable_cflags + [
-            f"{build_path}/dummy.o",
-            kernel_object,
-            "-DMAIN",
-            f"kernels/{kernel}.c",
-            "-llikwid",
-            "-o", executable])
+            os.environ["LIKWID_LIB"],
+        ] + ["-Ofast"]
+        check_call(
+            [compiler]
+            + executable_cflags
+            + [
+                f"{build_path}/dummy.o",
+                kernel_object,
+                "-DMAIN",
+                f"kernels/{kernel}.c",
+                "-llikwid",
+                "-o",
+                executable,
+            ]
+        )
        print(". ", end="", flush=True)
    else:
        print("! ", end="", flush=True)
-    
+
    return kernel_assembly, executable, overwrite


-def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0):
+def perfctr(cmd, cores, group="MEM", code_markers=True, verbose=0):
    """
    Run *cmd* with likwid-perfctr and returns result as dict.

@@ -586,30 +679,32 @@ def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0):
    if CLI argument cores > 1, running with multi-core, otherwise single-core
    """
    # Making sure likwid-perfctr is available:
-    if benchmark.find_executable('likwid-perfctr') is None:
-        print("likwid-perfctr was not found. Make sure likwid is installed and found in PATH.",
-                file=sys.stderr)
+    if benchmark.find_executable("likwid-perfctr") is None:
+        print(
+            "likwid-perfctr was not found. Make sure likwid is installed and found in PATH.",
+            file=sys.stderr,
+        )
        sys.exit(1)

    # FIXME currently only single core measurements support!
-    perf_cmd = ['likwid-perfctr', '-f', '-O', '-g', group]
+    perf_cmd = ["likwid-perfctr", "-f", "-O", "-g", group]

-    cpu = 'S0:0'
+    cpu = "S0:0"
    if cores > 1:
-        cpu += '-'+str(cores-1)
+        cpu += "-" + str(cores - 1)

    # Pinned and measured on cpu
-    perf_cmd += ['-C', cpu]
+    perf_cmd += ["-C", cpu]

    # code must be marked using likwid markers
-    perf_cmd.append('-m')
+    perf_cmd.append("-m")

    perf_cmd += cmd
    if verbose > 1:
-        print(' '.join(perf_cmd))
+        print(" ".join(perf_cmd))
    try:
-        with benchmark.fix_env_variable('OMP_NUM_THREADS', None):
-            output = check_output(perf_cmd).decode('utf-8').split('\n')
+        with benchmark.fix_env_variable("OMP_NUM_THREADS", None):
+            output = check_output(perf_cmd).decode("utf-8").split("\n")
    except CalledProcessError as e:
        print("Executing benchmark failed: {!s}".format(e), file=sys.stderr)
        sys.exit(1)
@@ -626,7 +721,7 @@ def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0):
        m = re.match(r"TABLE,Region ([a-z\-0-9_]+),", line)
        if m:
            cur_region_name = m.group(1)
-        line = line.split(',')
+        line = line.split(",")
        try:
            # Metrics
            cur_region_data[line[0]] = float(line[1])
@@ -639,12 +734,13 @@ def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0):
            continue
        try:
            # Event counters
-            if line[2] == '-' or line[2] == 'nan':
+            if line[2] == "-" or line[2] == "nan":
                counter_value = 0
            else:
                counter_value = int(line[2])
-            if re.fullmatch(r'[A-Z0-9_]+', line[0]) and \
-                    re.fullmatch(r'[A-Z0-9]+(:[A-Z0-9]+=[0-9A-Fa-fx]+)*', line[1]):
+            if re.fullmatch(r"[A-Z0-9_]+", line[0]) and re.fullmatch(
+                r"[A-Z0-9]+(:[A-Z0-9]+=[0-9A-Fa-fx]+)*", line[1]
+            ):
                cur_region_data.setdefault(line[0], {})
                cur_region_data[line[0]][line[1]] = counter_value
                continue
@@ -659,49 +755,52 @@ def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0):


 def remove_html_tags(text):
-    return re.sub('<.*?>', '', text)
+    return re.sub("<.*?>", "", text)


 def get_intel_style_code(marked_objfile):
    # Disassembl with Intel syntax
-    cmd = ("objdump -d --demangle --no-leading-addr --no-leading-headers --no-show-raw-insn "
-           "--x86-asm-syntax=intel").split(" ") + [marked_objfile]
+    cmd = (
+        "objdump -d --demangle --no-leading-addr --no-leading-headers --no-show-raw-insn "
+        "--x86-asm-syntax=intel"
+    ).split(" ") + [marked_objfile]
    asm_raw = check_output(cmd).decode()
-    asm_raw = '\n'.join([l.strip() for l in asm_raw.split('\n')])
+    asm_raw = "\n".join([line.strip() for line in asm_raw.split("\n")])
    kernel_raw = asm_raw[
-        asm_raw.index('mov\tebx, 111\nnop')+len('mov\tebx, 111\nnop') : 
-        asm_raw.index('mov\tebx, 222\nnop')
+        asm_raw.index("mov\tebx, 111\nnop")
+        + len("mov\tebx, 111\nnop") : asm_raw.index("mov\tebx, 222\nnop")
    ]
-    kernel_lines = kernel_raw.split('\n')
+    kernel_lines = kernel_raw.split("\n")
    # Ignore label and jump
-    return '\n'.join(kernel_lines[:-2])
+    return "\n".join(kernel_lines[:-2])


-def get_ithemal_prediction(code, model='skl'):
+def get_ithemal_prediction(code, model="skl"):
    url = "http://3.18.198.23/predict"
-    assert model in ['skl', 'hsw', 'ivb']
-    r = requests.post(url, {'code': code, 'model': model})
+    assert model in ["skl", "hsw", "ivb"]
+    r = requests.post(url, {"code": code, "model": model})
    raw_text = remove_html_tags(r.text)
    m = re.search("Could not generate a prediction: (.*)", raw_text)
    if m:
-        print(" error:", m.group(1).strip(), end=' ')
-        return float('nan')
-    m = re.search("Prediction: ([0-9\.]+) cycles per iteration", raw_text)
+        print(" error:", m.group(1).strip(), end=" ")
+        return float("nan")
+    m = re.search("Prediction: ([0-9.]+) cycles per iteration", raw_text)
    if m:
        return float(m.group(1))
    else:
-        return float('nan')
+        return float("nan")


 def main():
    # Check for correct LLVM-MCA version
    try:
-        llvm_mca = 'LLVM version 12.0.0' in check_output(['llvm-mca', '-version']).decode()
+        llvm_mca = "LLVM version 12.0.0" in check_output(["llvm-mca", "-version"]).decode()
    except FileNotFoundError:
        llvm_mca = False
-    
-    build_mark_run_all_kernels(measurements='--no-measurements' not in sys.argv, llvm_mca=llvm_mca)
+
+    build_mark_run_all_kernels(measurements="--no-measurements" not in sys.argv, llvm_mca=llvm_mca)
    sys.exit()

+
 if __name__ == "__main__":
-    main()
+    main()