From d418c16f4af91579e107e735670eb74b995c4b3d Mon Sep 17 00:00:00 2001
From: JanLJL <jan.laukemann@fau.de>
Date: Thu, 26 Aug 2021 16:58:19 +0200
Subject: [PATCH] applied flake8 and black rules

---
 docs/version_from_src.py             |   3 +-
 osaca/data/generate_mov_entries.py   |   8 +-
 osaca/data/model_importer.py         |  36 +-
 osaca/db_interface.py                |  16 +-
 osaca/frontend.py                    |  11 +-
 osaca/osaca.py                       |  25 +-
 osaca/parser/parser_AArch64.py       |  50 ++-
 osaca/parser/parser_x86att.py        |  16 +-
 osaca/semantics/arch_semantics.py    |  11 +-
 osaca/semantics/hw_model.py          |  66 ++-
 osaca/semantics/isa_semantics.py     | 118 ++---
 osaca/semantics/kernel_dg.py         |  57 ++-
 osaca/utils.py                       |   5 +-
 setup.py                             |  14 +-
 tests/test_cli.py                    |  48 +-
 tests/test_db_interface.py           |  15 +-
 tests/test_files/kernel_x86_memdep.s |  20 +-
 tests/test_frontend.py               |   9 +-
 tests/test_marker_utils.py           |  13 +-
 tests/test_parser_AArch64.py         |  10 +-
 tests/test_parser_x86att.py          |  26 +-
 tests/test_semantics.py              |  46 +-
 validation/build_and_run.py          | 629 ++++++++++++++++-----------
 23 files changed, 781 insertions(+), 471 deletions(-)

diff --git a/docs/version_from_src.py b/docs/version_from_src.py
index 97a4cda..156a4e2 100644
--- a/docs/version_from_src.py
+++ b/docs/version_from_src.py
@@ -7,7 +7,8 @@ import re
 def __read(*names, **kwargs):
     """Reads in file"""
     with io.open(
-        os.path.join(os.path.dirname(__file__), *names), encoding=kwargs.get("encoding", "utf8")
+        os.path.join(os.path.dirname(__file__), *names),
+        encoding=kwargs.get("encoding", "utf8"),
     ) as fp:
         return fp.read()
 
diff --git a/osaca/data/generate_mov_entries.py b/osaca/data/generate_mov_entries.py
index 13921ce..bf7cbf6 100755
--- a/osaca/data/generate_mov_entries.py
+++ b/osaca/data/generate_mov_entries.py
@@ -88,7 +88,7 @@ class MOVEntryBuilderIntelNoPort7AGU(MOVEntryBuilder):
 
         comment = None
         if load:
-            if 'ymm' in operand_types:
+            if "ymm" in operand_types:
                 port2D3D_pressure = 2
             else:
                 port2D3D_pressure = 1
@@ -96,7 +96,7 @@ class MOVEntryBuilderIntelNoPort7AGU(MOVEntryBuilder):
             latency += 4
             comment = "with load"
         if store:
-            if 'ymm' in operand_types:
+            if "ymm" in operand_types:
                 port4_pressure = 2
             else:
                 port4_pressure = 1
@@ -716,14 +716,14 @@ skx_mov_instructions = list(
             # ('movapd xmm xmm', ('1*p5', 1)),
             # ('vmovapd xmm xmm', ('1*p5', 1)),
             # ('vmovapd ymm ymm', ('1*p5', 1)),
-            ('vmovapd zmm zmm', ('', 0)),
+            ("vmovapd zmm zmm", ("", 0)),
             # https://www.felixcloutier.com/x86/movaps
             # TODO with masking!
             # TODO the following may eliminate or be bound to 1*p0156:
             # ('movaps xmm xmm', ('1*p5', 1)),
             # ('vmovaps xmm xmm', ('1*p5', 1)),
             # ('vmovaps ymm ymm', ('1*p5', 1)),
-            ('vmovaps zmm zmm', ('', 0)),
+            ("vmovaps zmm zmm", ("", 0)),
             # https://www.felixcloutier.com/x86/movbe
             ("movbe gpr mem", ("1*p15", 4)),
             ("movbe mem gpr", ("1*p15", 4)),
diff --git a/osaca/data/model_importer.py b/osaca/data/model_importer.py
index 92f5f25..d10555e 100755
--- a/osaca/data/model_importer.py
+++ b/osaca/data/model_importer.py
@@ -140,9 +140,11 @@ def extract_model(tree, arch, skip_mem=True):
             print("Couldn't find port utilization, skip: ", iform, file=sys.stderr)
             continue
         # skip if measured TP is smaller than computed
-        if [float(x.attrib["TP_ports"]) > min(float(x.attrib["TP_loop"]),
-                                              float(x.attrib["TP_unrolled"]))
-                for x in arch_tag.findall("measurement")][0]:
+        if [
+            float(x.attrib["TP_ports"])
+            > min(float(x.attrib["TP_loop"]), float(x.attrib["TP_unrolled"]))
+            for x in arch_tag.findall("measurement")
+        ][0]:
             print(
                 "Calculated TP is greater than measured TP.",
                 iform,
@@ -160,13 +162,15 @@ def extract_model(tree, arch, skip_mem=True):
                 throughput = float(measurement_tag.attrib["TP_ports"])
             else:
                 throughput = min(
-                    measurement_tag.attrib.get("TP_loop", float('inf')),
-                    measurement_tag.attrib.get("TP_unroll", float('inf')),
-                    measurement_tag.attrib.get("TP", float('inf')),
+                    measurement_tag.attrib.get("TP_loop", float("inf")),
+                    measurement_tag.attrib.get("TP_unroll", float("inf")),
+                    measurement_tag.attrib.get("TP", float("inf")),
                 )
-                if throughput == float('inf'):
+                if throughput == float("inf"):
                     throughput = None
-            uops = int(measurement_tag.attrib["uops"]) if "uops" in measurement_tag.attrib else None
+            uops = (
+                int(measurement_tag.attrib["uops"]) if "uops" in measurement_tag.attrib else None
+            )
             if "ports" in measurement_tag.attrib:
                 port_pressure.append(port_pressure_from_tag_attributes(measurement_tag.attrib))
             latencies = [
@@ -202,7 +206,11 @@ def extract_model(tree, arch, skip_mem=True):
         # Check if all are equal
         if port_pressure:
             if port_pressure[1:] != port_pressure[:-1]:
-                print("Contradicting port occupancies, using latest IACA:", iform, file=sys.stderr)
+                print(
+                    "Contradicting port occupancies, using latest IACA:",
+                    iform,
+                    file=sys.stderr,
+                )
             port_pressure = port_pressure[-1]
         else:
             # print("No data available for this architecture:", mnemonic, file=sys.stderr)
@@ -222,10 +230,12 @@ def extract_model(tree, arch, skip_mem=True):
                         port_4 = True
                 # Add (x, ['2D', '3D']) if load ports (2 & 3) are used, but not the store port (4)
                 if port_23 and not port_4:
-                    if arch.upper() in ["SNB", "IVB"] and any(
-                            [p.get('name', '') == 'ymm' for p in parameters]) and \
-                            not '128' in mnemonic:
-                        # x = 2 if SNB or IVB and ymm regiser in any operand and not '128' in 
+                    if (
+                        arch.upper() in ["SNB", "IVB"]
+                        and any([p.get("name", "") == "ymm" for p in parameters])
+                        and not ("128" in mnemonic)
+                    ):
+                        # x = 2 if SNB or IVB and ymm regiser in any operand and not '128' in
                         # instruction name
                         port2D3D_pressure = 2
                     else:
diff --git a/osaca/db_interface.py b/osaca/db_interface.py
index 65b63c6..09c352d 100755
--- a/osaca/db_interface.py
+++ b/osaca/db_interface.py
@@ -125,7 +125,10 @@ def _get_asmbench_output(input_data, isa):
     db_entries = {}
     for i in range(0, len(input_data), 4):
         if input_data[i + 3].strip() != "":
-            print("asmbench output not in the correct format! Format must be: ", file=sys.stderr)
+            print(
+                "asmbench output not in the correct format! Format must be: ",
+                file=sys.stderr,
+            )
             print(
                 "-------------\nMNEMONIC[-OP1[_OP2][...]]\nLatency: X cycles\n"
                 "Throughput: Y cycles\n\n-------------",
@@ -540,7 +543,16 @@ def _get_sanity_report(
 
 
 def _get_sanity_report_verbose(
-    total, m_tp, m_l, m_pp, suspic_instr, dup_arch, dup_isa, only_isa, bad_operands, colors=False
+    total,
+    m_tp,
+    m_l,
+    m_pp,
+    suspic_instr,
+    dup_arch,
+    dup_isa,
+    only_isa,
+    bad_operands,
+    colors=False,
 ):
     """Get the verbose part of the sanity report with all missing instruction forms."""
     BRIGHT_CYAN = "\033[1;36;1m" if colors else ""
diff --git a/osaca/frontend.py b/osaca/frontend.py
index fa6b014..81f20a5 100755
--- a/osaca/frontend.py
+++ b/osaca/frontend.py
@@ -202,7 +202,12 @@ class Frontend(object):
         )
 
     def combined_view(
-        self, kernel, cp_kernel: KernelDG, dep_dict, ignore_unknown=False, show_cmnts=True
+        self,
+        kernel,
+        cp_kernel: KernelDG,
+        dep_dict,
+        ignore_unknown=False,
+        show_cmnts=True,
     ):
         """
         Build combined view of kernel including port pressure (TP), a CP column and a
@@ -238,8 +243,8 @@ class Frontend(object):
         lcd_sum = 0.0
         lcd_lines = {}
         if dep_dict:
-            longest_lcd = max(dep_dict, key=lambda ln: dep_dict[ln]['latency'])
-            lcd_sum = dep_dict[longest_lcd]['latency']
+            longest_lcd = max(dep_dict, key=lambda ln: dep_dict[ln]["latency"])
+            lcd_sum = dep_dict[longest_lcd]["latency"]
             lcd_lines = {
                 instr["line_number"]: lat for instr, lat in dep_dict[longest_lcd]["dependencies"]
             }
diff --git a/osaca/osaca.py b/osaca/osaca.py
index f905104..765cff7 100755
--- a/osaca/osaca.py
+++ b/osaca/osaca.py
@@ -10,7 +10,13 @@ from functools import lru_cache
 from osaca.db_interface import import_benchmark_output, sanity_check
 from osaca.frontend import Frontend
 from osaca.parser import BaseParser, ParserAArch64, ParserX86ATT
-from osaca.semantics import INSTR_FLAGS, ArchSemantics, KernelDG, MachineModel, reduce_to_section
+from osaca.semantics import (
+    INSTR_FLAGS,
+    ArchSemantics,
+    KernelDG,
+    MachineModel,
+    reduce_to_section,
+)
 
 
 SUPPORTED_ARCHS = [
@@ -37,7 +43,8 @@ DEFAULT_ARCHS = {
 def __read(*names, **kwargs):
     """Reads in file"""
     with io.open(
-        os.path.join(os.path.dirname(__file__), *names), encoding=kwargs.get("encoding", "utf8")
+        os.path.join(os.path.dirname(__file__), *names),
+        encoding=kwargs.get("encoding", "utf8"),
     ) as fp:
         return fp.read()
 
@@ -79,7 +86,10 @@ def create_parser(parser=None):
 
     # Add arguments
     parser.add_argument(
-        "-V", "--version", action="version", version="%(prog)s " + __find_version("__init__.py")
+        "-V",
+        "--version",
+        action="version",
+        version="%(prog)s " + __find_version("__init__.py"),
     )
     parser.add_argument(
         "--arch",
@@ -167,7 +177,9 @@ def create_parser(parser=None):
         help="Write analysis to this file (default to stdout).",
     )
     parser.add_argument(
-        "file", type=argparse.FileType("r"), help="Path to object (ASM or instruction file)."
+        "file",
+        type=argparse.FileType("r"),
+        help="Path to object (ASM or instruction file).",
     )
 
     return parser
@@ -347,7 +359,10 @@ def run(args, output_file=sys.stdout):
         # Sanity check on DB
         verbose = True if args.verbose > 0 else False
         sanity_check(
-            args.arch, verbose=verbose, internet_check=args.internet_check, output_file=output_file
+            args.arch,
+            verbose=verbose,
+            internet_check=args.internet_check,
+            output_file=output_file,
         )
     elif "import_data" in args:
         # Import microbench output file into DB
diff --git a/osaca/parser/parser_AArch64.py b/osaca/parser/parser_AArch64.py
index ce3376c..0f92edb 100755
--- a/osaca/parser/parser_AArch64.py
+++ b/osaca/parser/parser_AArch64.py
@@ -26,9 +26,9 @@ class ParserAArch64(BaseParser):
             pp.ZeroOrMore(pp.Word(pp.printables))
         ).setResultsName(self.COMMENT_ID)
         # Define ARM assembly identifier
-        decimal_number = pp.Combine(pp.Optional(pp.Literal("-")) + pp.Word(pp.nums)).setResultsName(
-            "value"
-        )
+        decimal_number = pp.Combine(
+            pp.Optional(pp.Literal("-")) + pp.Word(pp.nums)
+        ).setResultsName("value")
         hex_number = pp.Combine(pp.Literal("0x") + pp.Word(pp.hexnums)).setResultsName("value")
         relocation = pp.Combine(pp.Literal(":") + pp.Word(pp.alphanums + "_") + pp.Literal(":"))
         first = pp.Word(pp.alphas + "_.", exact=1)
@@ -152,7 +152,9 @@ class ParserAArch64(BaseParser):
             pp.Literal("{")
             + (
                 pp.delimitedList(pp.Combine(self.list_element), delim=",").setResultsName("list")
-                ^ pp.delimitedList(pp.Combine(self.list_element), delim="-").setResultsName("range")
+                ^ pp.delimitedList(pp.Combine(self.list_element), delim="-").setResultsName(
+                    "range"
+                )
             )
             + pp.Literal("}")
             + pp.Optional(index)
@@ -256,9 +258,7 @@ class ParserAArch64(BaseParser):
         # 2. Parse label
         if result is None:
             try:
-                result = self.process_operand(
-                    self.label.parseString(line, parseAll=True).asDict()
-                )
+                result = self.process_operand(self.label.parseString(line, parseAll=True).asDict())
                 result = AttrDict.convert_dict(result)
                 instruction_form[self.LABEL_ID] = result[self.LABEL_ID].name
                 if self.COMMENT_ID in result[self.LABEL_ID]:
@@ -293,7 +293,9 @@ class ParserAArch64(BaseParser):
             try:
                 result = self.parse_instruction(line)
             except (pp.ParseException, KeyError) as e:
-                raise ValueError("Unable to parse {!r} on line {}".format(line, line_number)) from e
+                raise ValueError(
+                    "Unable to parse {!r} on line {}".format(line, line_number)
+                ) from e
             instruction_form[self.INSTRUCTION_ID] = result[self.INSTRUCTION_ID]
             instruction_form[self.OPERANDS_ID] = result[self.OPERANDS_ID]
             instruction_form[self.COMMENT_ID] = result[self.COMMENT_ID]
@@ -390,9 +392,9 @@ class ParserAArch64(BaseParser):
             new_dict["pre_indexed"] = True
         if "post_indexed" in memory_address:
             if "value" in memory_address["post_indexed"]:
-                new_dict["post_indexed"] = {"value": int(
-                    memory_address["post_indexed"]["value"], 0
-                )}
+                new_dict["post_indexed"] = {
+                    "value": int(memory_address["post_indexed"]["value"], 0)
+                }
             else:
                 new_dict["post_indexed"] = memory_address["post_indexed"]
         return AttrDict({self.MEMORY_ID: new_dict})
@@ -408,27 +410,27 @@ class ParserAArch64(BaseParser):
         Resolve range or list register operand to list of registers.
         Returns None if neither list nor range
         """
-        if 'register' in operand:
-            if 'list' in operand.register:
-                index = operand.register.get('index')
+        if "register" in operand:
+            if "list" in operand.register:
+                index = operand.register.get("index")
                 range_list = []
                 for reg in operand.register.list:
                     reg = deepcopy(reg)
                     if index is not None:
-                        reg['index'] = int(index, 0)
+                        reg["index"] = int(index, 0)
                     range_list.append(AttrDict({self.REGISTER_ID: reg}))
                 return range_list
-            elif 'range' in operand.register:
+            elif "range" in operand.register:
                 base_register = operand.register.range[0]
-                index = operand.register.get('index')
+                index = operand.register.get("index")
                 range_list = []
                 start_name = base_register.name
                 end_name = operand.register.range[1].name
                 for name in range(int(start_name), int(end_name) + 1):
                     reg = deepcopy(base_register)
                     if index is not None:
-                        reg['index'] = int(index, 0)
-                    reg['name'] = str(name)
+                        reg["index"] = int(index, 0)
+                    reg["name"] = str(name)
                     range_list.append(AttrDict({self.REGISTER_ID: reg}))
                 return range_list
         # neither register list nor range, return unmodified
@@ -482,10 +484,12 @@ class ParserAArch64(BaseParser):
             return AttrDict({self.IMMEDIATE_ID: immediate})
         else:
             # change 'mantissa' key to 'value'
-            return AttrDict({
-                self.IMMEDIATE_ID: AttrDict({
-                    "value": immediate[dict_name]["mantissa"],
-                    "type": dict_name})}
+            return AttrDict(
+                {
+                    self.IMMEDIATE_ID: AttrDict(
+                        {"value": immediate[dict_name]["mantissa"], "type": dict_name}
+                    )
+                }
             )
 
     def process_label(self, label):
diff --git a/osaca/parser/parser_x86att.py b/osaca/parser/parser_x86att.py
index 5c2a493..f12d9aa 100755
--- a/osaca/parser/parser_x86att.py
+++ b/osaca/parser/parser_x86att.py
@@ -23,9 +23,9 @@ class ParserX86ATT(BaseParser):
 
     def construct_parser(self):
         """Create parser for ARM AArch64 ISA."""
-        decimal_number = pp.Combine(pp.Optional(pp.Literal("-")) + pp.Word(pp.nums)).setResultsName(
-            "value"
-        )
+        decimal_number = pp.Combine(
+            pp.Optional(pp.Literal("-")) + pp.Word(pp.nums)
+        ).setResultsName("value")
         hex_number = pp.Combine(
             pp.Optional(pp.Literal("-")) + pp.Literal("0x") + pp.Word(pp.hexnums)
         ).setResultsName("value")
@@ -41,7 +41,8 @@ class ParserX86ATT(BaseParser):
         identifier = pp.Group(
             pp.Optional(id_offset).setResultsName("offset")
             + pp.Combine(
-                pp.delimitedList(pp.Combine(first + pp.Optional(rest)), delim="::"), joinString="::"
+                pp.delimitedList(pp.Combine(first + pp.Optional(rest)), delim="::"),
+                joinString="::",
             ).setResultsName("name")
             + pp.Optional(relocation).setResultsName("relocation")
         ).setResultsName("identifier")
@@ -443,7 +444,12 @@ class ParserX86ATT(BaseParser):
         """Check if register is a vector register"""
         if register is None:
             return False
-        if register["name"].rstrip(string.digits).lower() in ["mm", "xmm", "ymm", "zmm"]:
+        if register["name"].rstrip(string.digits).lower() in [
+            "mm",
+            "xmm",
+            "ymm",
+            "zmm",
+        ]:
             return True
         return False
 
diff --git a/osaca/semantics/arch_semantics.py b/osaca/semantics/arch_semantics.py
index 29c01cf..103c71f 100755
--- a/osaca/semantics/arch_semantics.py
+++ b/osaca/semantics/arch_semantics.py
@@ -47,7 +47,9 @@ class ArchSemantics(ISASemantics):
                 indices = [port_list.index(p) for p in ports]
                 # check if port sum of used ports for uop are unbalanced
                 port_sums = self._to_list(itemgetter(*indices)(self.get_throughput_sum(kernel)))
-                instr_ports = self._to_list(itemgetter(*indices)(instruction_form["port_pressure"]))
+                instr_ports = self._to_list(
+                    itemgetter(*indices)(instruction_form["port_pressure"])
+                )
                 if len(set(port_sums)) > 1:
                     # balance ports
                     # init list for keeping track of the current change
@@ -270,7 +272,8 @@ class ArchSemantics(ISASemantics):
                                     reg_type
                                 ]
                                 st_data_port_pressure = [
-                                    pp * multiplier for pp in st_data_port_pressure]
+                                    pp * multiplier for pp in st_data_port_pressure
+                                ]
                             data_port_pressure = [
                                 sum(x) for x in zip(data_port_pressure, st_data_port_pressure)
                             ]
@@ -343,7 +346,9 @@ class ArchSemantics(ISASemantics):
     def _handle_instruction_found(self, instruction_data, port_number, instruction_form, flags):
         """Apply performance data to instruction if it was found in the archDB"""
         throughput = instruction_data["throughput"]
-        port_pressure = self._machine_model.average_port_pressure(instruction_data["port_pressure"])
+        port_pressure = self._machine_model.average_port_pressure(
+            instruction_data["port_pressure"]
+        )
         instruction_form["port_uops"] = instruction_data["port_pressure"]
         try:
             assert isinstance(port_pressure, list)
diff --git a/osaca/semantics/hw_model.py b/osaca/semantics/hw_model.py
index b95a8a3..948c2de 100755
--- a/osaca/semantics/hw_model.py
+++ b/osaca/semantics/hw_model.py
@@ -1,20 +1,19 @@
 #!/usr/bin/env python3
 
+import hashlib
 import os
 import pickle
 import re
 import string
+from collections import defaultdict
 from copy import deepcopy
 from itertools import product
-import hashlib
 from pathlib import Path
-from collections import defaultdict
 
 import ruamel.yaml
-from ruamel.yaml.compat import StringIO
-
 from osaca import __version__, utils
 from osaca.parser import ParserX86ATT
+from ruamel.yaml.compat import StringIO
 
 
 class MachineModel(object):
@@ -37,7 +36,13 @@ class MachineModel(object):
                 "hidden_loads": None,
                 "load_latency": {},
                 "load_throughput": [
-                    {"base": b, "index": i, "offset": o, "scale": s, "port_pressure": []}
+                    {
+                        "base": b,
+                        "index": i,
+                        "offset": o,
+                        "scale": s,
+                        "port_pressure": [],
+                    }
                     for b, i, o, s in product(["gpr"], ["gpr", None], ["imd", None], [1, 8])
                 ],
                 "load_throughput_default": [],
@@ -128,7 +133,8 @@ class MachineModel(object):
                 instruction_form
                 for instruction_form in name_matched_iforms
                 if self._match_operands(
-                    instruction_form["operands"] if "operands" in instruction_form else [], operands
+                    instruction_form["operands"] if "operands" in instruction_form else [],
+                    operands,
                 )
             )
         except StopIteration:
@@ -150,7 +156,13 @@ class MachineModel(object):
         return average_pressure
 
     def set_instruction(
-        self, name, operands=None, latency=None, port_pressure=None, throughput=None, uops=None
+        self,
+        name,
+        operands=None,
+        latency=None,
+        port_pressure=None,
+        throughput=None,
+        uops=None,
     ):
         """Import instruction form information."""
         # If it already exists. Overwrite information.
@@ -500,7 +512,11 @@ class MachineModel(object):
         """Check if the types of operand ``i_operand`` and ``operand`` match."""
         # check for wildcard
         if self.WILDCARD in operand:
-            if "class" in i_operand and i_operand["class"] == "register" or "register" in i_operand:
+            if (
+                "class" in i_operand
+                and i_operand["class"] == "register"
+                or "register" in i_operand
+            ):
                 return True
             else:
                 return False
@@ -527,20 +543,27 @@ class MachineModel(object):
             return self._is_AArch64_mem_type(i_operand, operand["memory"])
         # immediate
         if i_operand["class"] == "immediate" and i_operand["imd"] == self.WILDCARD:
-            return "value" in operand or \
-                ("immediate" in operand and "value" in operand["immediate"]) 
+            return "value" in operand or (
+                "immediate" in operand and "value" in operand["immediate"]
+            )
         if i_operand["class"] == "immediate" and i_operand["imd"] == "int":
-            return ("value" in operand and operand.get("type", None) == "int") or \
-                ("immediate" in operand and "value" in operand["immediate"] and
-                 operand["immediate"].get("type", None) == "int")
+            return ("value" in operand and operand.get("type", None) == "int") or (
+                "immediate" in operand
+                and "value" in operand["immediate"]
+                and operand["immediate"].get("type", None) == "int"
+            )
         if i_operand["class"] == "immediate" and i_operand["imd"] == "float":
-            return ("float" in operand and operand.get("type", None) == "float") or \
-                ("immediate" in operand and "float" in operand["immediate"] and
-                 operand["immediate"].get("type", None) == "float")
+            return ("float" in operand and operand.get("type", None) == "float") or (
+                "immediate" in operand
+                and "float" in operand["immediate"]
+                and operand["immediate"].get("type", None) == "float"
+            )
         if i_operand["class"] == "immediate" and i_operand["imd"] == "double":
-            return ("double" in operand and operand.get("type", None) == "double") or \
-                ("immediate" in operand and "double" in operand["immediate"] and
-                 operand["immediate"].get("type", None) == "double")
+            return ("double" in operand and operand.get("type", None) == "double") or (
+                "immediate" in operand
+                and "double" in operand["immediate"]
+                and operand["immediate"].get("type", None) == "double"
+            )
         # identifier
         if "identifier" in operand or (
             "immediate" in operand and "identifier" in operand["immediate"]
@@ -577,7 +600,10 @@ class MachineModel(object):
     def _compare_db_entries(self, operand_1, operand_2):
         """Check if operand types in DB format (i.e., not parsed) match."""
         operand_attributes = list(
-            filter(lambda x: True if x != "source" and x != "destination" else False, operand_1)
+            filter(
+                lambda x: True if x != "source" and x != "destination" else False,
+                operand_1,
+            )
         )
         for key in operand_attributes:
             try:
diff --git a/osaca/semantics/isa_semantics.py b/osaca/semantics/isa_semantics.py
index 5889eb3..b792de9 100755
--- a/osaca/semantics/isa_semantics.py
+++ b/osaca/semantics/isa_semantics.py
@@ -1,6 +1,5 @@
 #!/usr/bin/env python3
 from itertools import chain
-from copy import deepcopy
 
 from osaca import utils
 from osaca.parser import AttrDict, ParserAArch64, ParserX86ATT
@@ -100,53 +99,68 @@ class ISASemantics(object):
         # post-process pre- and post-indexing for aarch64 memory operands
         if self._isa == "aarch64":
             for operand in [op for op in op_dict["source"] if "memory" in op]:
-                post_indexed = ("post_indexed" in operand["memory"] and 
-                                operand["memory"]["post_indexed"])
-                pre_indexed = ("pre_indexed" in operand["memory"] and
-                               operand["memory"]["pre_indexed"])
+                post_indexed = (
+                    "post_indexed" in operand["memory"] and operand["memory"]["post_indexed"]
+                )
+                pre_indexed = (
+                    "pre_indexed" in operand["memory"] and operand["memory"]["pre_indexed"]
+                )
                 if post_indexed or pre_indexed:
                     op_dict["src_dst"].append(
-                        AttrDict.convert_dict({
-                            "register": operand["memory"]["base"],
-                            "pre_indexed": pre_indexed,
-                            "post_indexed": post_indexed})
+                        AttrDict.convert_dict(
+                            {
+                                "register": operand["memory"]["base"],
+                                "pre_indexed": pre_indexed,
+                                "post_indexed": post_indexed,
+                            }
+                        )
                     )
             for operand in [op for op in op_dict["destination"] if "memory" in op]:
-                post_indexed = ("post_indexed" in operand["memory"] and 
-                                operand["memory"]["post_indexed"])
-                pre_indexed = ("pre_indexed" in operand["memory"] and
-                               operand["memory"]["pre_indexed"])
+                post_indexed = (
+                    "post_indexed" in operand["memory"] and operand["memory"]["post_indexed"]
+                )
+                pre_indexed = (
+                    "pre_indexed" in operand["memory"] and operand["memory"]["pre_indexed"]
+                )
                 if post_indexed or pre_indexed:
                     op_dict["src_dst"].append(
-                        AttrDict.convert_dict({
-                            "register": operand["memory"]["base"],
-                            "pre_indexed": pre_indexed,
-                            "post_indexed": post_indexed})
+                        AttrDict.convert_dict(
+                            {
+                                "register": operand["memory"]["base"],
+                                "pre_indexed": pre_indexed,
+                                "post_indexed": post_indexed,
+                            }
+                        )
                     )
-            
+
         # store operand list in dict and reassign operand key/value pair
         instruction_form["semantic_operands"] = AttrDict.convert_dict(op_dict)
         # assign LD/ST flags
-        instruction_form["flags"] = instruction_form["flags"] if "flags" in instruction_form else []
+        instruction_form["flags"] = (
+            instruction_form["flags"] if "flags" in instruction_form else []
+        )
         if self._has_load(instruction_form):
             instruction_form["flags"] += [INSTR_FLAGS.HAS_LD]
         if self._has_store(instruction_form):
             instruction_form["flags"] += [INSTR_FLAGS.HAS_ST]
-        
 
     def get_reg_changes(self, instruction_form, only_postindexed=False):
         """
         Returns register changes, as dict, for insruction_form, based on operation defined in isa.
-        
+
         Empty dict if no changes of registers occured. None for registers with unknown changes.
         If only_postindexed is True, only considers changes due to post_indexed memory references.
         """
-        if instruction_form.get('instruction') is None:
+        if instruction_form.get("instruction") is None:
             return {}
-        dest_reg_names = [op.register.get('prefix', '') + op.register.name
-                          for op in chain(instruction_form.semantic_operands.destination,
-                                          instruction_form.semantic_operands.src_dst)
-                          if 'register' in op]
+        dest_reg_names = [
+            op.register.get("prefix", "") + op.register.name
+            for op in chain(
+                instruction_form.semantic_operands.destination,
+                instruction_form.semantic_operands.src_dst,
+            )
+            if "register" in op
+        ]
         isa_data = self._isa_model.get_instruction(
             instruction_form["instruction"], instruction_form["operands"]
         )
@@ -162,50 +176,50 @@ class ISASemantics(object):
 
         if only_postindexed:
             for o in instruction_form.operands:
-                if 'post_indexed' in o.get('memory', {}):
-                    base_name = o.memory.base.get('prefix', '') + o.memory.base.name
-                    return {base_name: {
-                        'name': o.memory.base.get('prefix', '') + o.memory.base.name,
-                        'value': o.memory.post_indexed.value
-                    }}
+                if "post_indexed" in o.get("memory", {}):
+                    base_name = o.memory.base.get("prefix", "") + o.memory.base.name
+                    return {
+                        base_name: {
+                            "name": o.memory.base.get("prefix", "") + o.memory.base.name,
+                            "value": o.memory.post_indexed.value,
+                        }
+                    }
             return {}
 
         reg_operand_names = {}  # e.g., {'rax': 'op1'}
         operand_state = {}  # e.g., {'op1': {'name': 'rax', 'value': 0}}  0 means unchanged
 
         for o in instruction_form.operands:
-            if 'pre_indexed' in o.get('memory', {}):
+            if "pre_indexed" in o.get("memory", {}):
                 # Assuming no isa_data.operation
                 if isa_data.get("operation", None) is not None:
                     raise ValueError(
                         "ISA information for pre-indexed instruction {!r} has operation set."
-                        "This is currently not supprted.".format(instruction_form.line))
-                base_name = o.memory.base.get('prefix', '') + o.memory.base.name
-                reg_operand_names = {base_name: 'op1'}
-                operand_state = {'op1': {
-                    'name': base_name,
-                    'value': o.memory.offset.value
-                }}
+                        "This is currently not supprted.".format(instruction_form.line)
+                    )
+                base_name = o.memory.base.get("prefix", "") + o.memory.base.name
+                reg_operand_names = {base_name: "op1"}
+                operand_state = {"op1": {"name": base_name, "value": o.memory.offset.value}}
 
-        if isa_data is not None and 'operation' in isa_data:
+        if isa_data is not None and "operation" in isa_data:
             for i, o in enumerate(instruction_form.operands):
                 operand_name = "op{}".format(i + 1)
                 if "register" in o:
-                    o_reg_name = o["register"].get('prefix', '') + o["register"]["name"]
+                    o_reg_name = o["register"].get("prefix", "") + o["register"]["name"]
                     reg_operand_names[o_reg_name] = operand_name
-                    operand_state[operand_name] = {
-                        'name': o_reg_name,
-                        'value': 0}
+                    operand_state[operand_name] = {"name": o_reg_name, "value": 0}
                 elif "immediate" in o:
-                    operand_state[operand_name] = {'value': o["immediate"]["value"]}
+                    operand_state[operand_name] = {"value": o["immediate"]["value"]}
                 elif "memory" in o:
                     # TODO lea needs some thinking about
                     pass
 
-            operand_changes = exec(isa_data['operation'], {}, operand_state)
+            exec(isa_data["operation"], {}, operand_state)
 
-        change_dict = {reg_name: operand_state.get(reg_operand_names.get(reg_name))
-                       for reg_name in dest_reg_names}
+        change_dict = {
+            reg_name: operand_state.get(reg_operand_names.get(reg_name))
+            for reg_name in dest_reg_names
+        }
         return change_dict
 
     def _apply_found_ISA_data(self, isa_data, operands):
@@ -231,8 +245,10 @@ class ISASemantics(object):
             if "hidden_operands" in isa_data:
                 op_dict["destination"] += [
                     AttrDict.convert_dict(
-                        {hop["class"]: {k: hop[k] for k in ["class", "source", "destination"]}})
-                     for hop in isa_data["hidden_operands"]]
+                        {hop["class"]: {k: hop[k] for k in ["class", "source", "destination"]}}
+                    )
+                    for hop in isa_data["hidden_operands"]
+                ]
             return op_dict
 
         for i, op in enumerate(isa_data["operands"]):
diff --git a/osaca/semantics/kernel_dg.py b/osaca/semantics/kernel_dg.py
index b3a8af6..e95034e 100755
--- a/osaca/semantics/kernel_dg.py
+++ b/osaca/semantics/kernel_dg.py
@@ -16,7 +16,12 @@ class KernelDG(nx.DiGraph):
     INSTRUCTION_THRESHOLD = 50
 
     def __init__(
-        self, parsed_kernel, parser, hw_model: MachineModel, semantics: ArchSemantics, timeout=10
+        self,
+        parsed_kernel,
+        parser,
+        hw_model: MachineModel,
+        semantics: ArchSemantics,
+        timeout=10,
     ):
         self.timed_out = False
         self.kernel = parsed_kernel
@@ -73,7 +78,7 @@ class KernelDG(nx.DiGraph):
                     else instruction_form["latency_wo_load"]
                 )
                 if "storeload_dep" in dep_flags:
-                    edge_weight += self.model.get('store_to_load_forward_latency', 0)
+                    edge_weight += self.model.get("store_to_load_forward_latency", 0)
                 dg.add_edge(
                     instruction_form["line_number"],
                     dep["line_number"],
@@ -98,7 +103,7 @@ class KernelDG(nx.DiGraph):
         tmp_kernel = [] + kernel
         for orig_iform in kernel:
             temp_iform = copy.copy(orig_iform)
-            temp_iform['line_number'] += offset
+            temp_iform["line_number"] += offset
             tmp_kernel.append(temp_iform)
         # get dependency graph
         dg = self.create_DG(tmp_kernel)
@@ -118,12 +123,15 @@ class KernelDG(nx.DiGraph):
             with Manager() as manager:
                 all_paths = manager.list()
                 processes = [
-                    Process(target=self._extend_path, args=(all_paths, instr_section, dg, offset))
+                    Process(
+                        target=self._extend_path,
+                        args=(all_paths, instr_section, dg, offset),
+                    )
                     for instr_section in instrs
                 ]
                 for p in processes:
                     p.start()
-                if (timeout == -1):
+                if timeout == -1:
                     # no timeout
                     for p in processes:
                         p.join()
@@ -162,7 +170,7 @@ class KernelDG(nx.DiGraph):
             # extend path by edge bound latencies (e.g., store-to-load latency)
             lat_path = []
             for s, d in nx.utils.pairwise(path):
-                edge_lat = dg.edges[s, d]['latency']
+                edge_lat = dg.edges[s, d]["latency"]
                 # map source node back to original line numbers
                 if s >= offset:
                     s -= offset
@@ -310,17 +318,17 @@ class KernelDG(nx.DiGraph):
             if change is None or reg_state.get(reg, {}) is None:
                 reg_state[reg] = None
             else:
-                reg_state.setdefault(reg, {'name': reg, 'value': 0})
-                if change['name'] != reg:
+                reg_state.setdefault(reg, {"name": reg, "value": 0})
+                if change["name"] != reg:
                     # renaming occured, ovrwrite value with up-to-now change of source register
-                    reg_state[reg]['name'] = change['name']
-                    src_reg_state = reg_state.get(change['name'], {'value': 0})
+                    reg_state[reg]["name"] = change["name"]
+                    src_reg_state = reg_state.get(change["name"], {"value": 0})
                     if src_reg_state is None:
                         # original register's state was changed beyond reconstruction
                         reg_state[reg] = None
                         continue
-                    reg_state[reg]['value'] = src_reg_state['value']
-                reg_state[reg]['value'] += change['value']
+                    reg_state[reg]["value"] = src_reg_state["value"]
+                reg_state[reg]["value"] += change["value"]
         return reg_state
 
     def get_dependent_instruction_forms(self, instr_form=None, line_number=None):
@@ -340,7 +348,8 @@ class KernelDG(nx.DiGraph):
         if instruction_form.semantic_operands is None:
             return is_read
         for src in chain(
-            instruction_form.semantic_operands.source, instruction_form.semantic_operands.src_dst
+            instruction_form.semantic_operands.source,
+            instruction_form.semantic_operands.src_dst,
         ):
             if "register" in src:
                 is_read = self.parser.is_reg_dependend_of(register, src.register) or is_read
@@ -372,7 +381,8 @@ class KernelDG(nx.DiGraph):
         if instruction_form.semantic_operands is None:
             return False
         for src in chain(
-            instruction_form.semantic_operands.source, instruction_form.semantic_operands.src_dst
+            instruction_form.semantic_operands.source,
+            instruction_form.semantic_operands.src_dst,
         ):
             # Here we check for mem dependecies only
             if "memory" not in src:
@@ -387,23 +397,23 @@ class KernelDG(nx.DiGraph):
                 addr_change -= mem.offset.value
             if mem.base and src.base:
                 base_change = register_changes.get(
-                    src.base.get('prefix', '') + src.base.name,
-                    {'name': src.base.get('prefix', '') + src.base.name, 'value': 0},
+                    src.base.get("prefix", "") + src.base.name,
+                    {"name": src.base.get("prefix", "") + src.base.name, "value": 0},
                 )
                 if base_change is None:
                     # Unknown change occurred
                     continue
-                if mem.base.get('prefix', '') + mem.base['name'] != base_change['name']:
+                if mem.base.get("prefix", "") + mem.base["name"] != base_change["name"]:
                     # base registers do not match
                     continue
-                addr_change += base_change['value']
+                addr_change += base_change["value"]
             elif mem.base or src.base:
                 # base registers do not match
                 continue
             if mem.index and src.index:
                 index_change = register_changes.get(
-                    src.index.get('prefix', '') + src.index.name,
-                    {'name': src.index.get('prefix', '') + src.index.name, 'value': 0},
+                    src.index.get("prefix", "") + src.index.name,
+                    {"name": src.index.get("prefix", "") + src.index.name, "value": 0},
                 )
                 if index_change is None:
                     # Unknown change occurred
@@ -411,10 +421,10 @@ class KernelDG(nx.DiGraph):
                 if mem.scale != src.scale:
                     # scale factors do not match
                     continue
-                if mem.index.get('prefix', '') + mem.index['name'] != index_change['name']:
+                if mem.index.get("prefix", "") + mem.index["name"] != index_change["name"]:
                     # index registers do not match
                     continue
-                addr_change += index_change['value'] * src.scale
+                addr_change += index_change["value"] * src.scale
             elif mem.index or src.index:
                 # index registers do not match
                 continue
@@ -443,7 +453,8 @@ class KernelDG(nx.DiGraph):
                     )
         # Check also for possible pre- or post-indexing in memory addresses
         for src in chain(
-            instruction_form.semantic_operands.source, instruction_form.semantic_operands.src_dst
+            instruction_form.semantic_operands.source,
+            instruction_form.semantic_operands.src_dst,
         ):
             if "memory" in src:
                 if "pre_indexed" in src.memory or "post_indexed" in src.memory:
diff --git a/osaca/utils.py b/osaca/utils.py
index c235534..ecd2eab 100644
--- a/osaca/utils.py
+++ b/osaca/utils.py
@@ -1,7 +1,10 @@
 #!/usr/bin/env python3
 import os.path
 
-DATA_DIRS = [os.path.expanduser("~/.osaca/data"), os.path.join(os.path.dirname(__file__), "data")]
+DATA_DIRS = [
+    os.path.expanduser("~/.osaca/data"),
+    os.path.join(os.path.dirname(__file__), "data"),
+]
 CACHE_DIR = os.path.expanduser("~/.osaca/cache")
 
 
diff --git a/setup.py b/setup.py
index e26528b..df74dc6 100755
--- a/setup.py
+++ b/setup.py
@@ -18,7 +18,8 @@ here = os.path.abspath(os.path.dirname(__file__))
 # Stolen from pip
 def read(*names, **kwargs):
     with io.open(
-        os.path.join(os.path.dirname(__file__), *names), encoding=kwargs.get("encoding", "utf8")
+        os.path.join(os.path.dirname(__file__), *names),
+        encoding=kwargs.get("encoding", "utf8"),
     ) as fp:
         return fp.read()
 
@@ -38,13 +39,20 @@ def _run_build_cache(dir):
     # This is run inside the install staging directory (that had no .pyc files)
     # We don't want to generate any.
     # https://github.com/eliben/pycparser/pull/135
-    check_call([sys.executable, "-B", "_build_cache.py"], cwd=os.path.join(dir, "osaca", "data"))
+    check_call(
+        [sys.executable, "-B", "_build_cache.py"],
+        cwd=os.path.join(dir, "osaca", "data"),
+    )
 
 
 class install(_install):
     def run(self):
         _install.run(self)
-        self.execute(_run_build_cache, (self.install_lib,), msg="Build ISA and architecture cache")
+        self.execute(
+            _run_build_cache,
+            (self.install_lib,),
+            msg="Build ISA and architecture cache",
+        )
 
 
 class sdist(_sdist):
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 10a449c..8ab1f41 100755
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -33,7 +33,13 @@ class TestCLI(unittest.TestCase):
         with self.assertRaises(ValueError):
             osaca.check_arguments(args, parser)
         args = parser.parse_args(
-            ["--arch", "csx", "--import", "WRONG_BENCH", self._find_file("gs", "csx", "gcc")]
+            [
+                "--arch",
+                "csx",
+                "--import",
+                "WRONG_BENCH",
+                self._find_file("gs", "csx", "gcc"),
+            ]
         )
         with self.assertRaises(ValueError):
             osaca.check_arguments(args, parser)
@@ -65,7 +71,13 @@ class TestCLI(unittest.TestCase):
     def test_check_db(self):
         parser = osaca.create_parser(parser=ErrorRaisingArgumentParser())
         args = parser.parse_args(
-            ["--arch", "tx2", "--db-check", "--verbose", self._find_test_file("triad_x86_iaca.s")]
+            [
+                "--arch",
+                "tx2",
+                "--db-check",
+                "--verbose",
+                self._find_test_file("triad_x86_iaca.s"),
+            ]
         )
         output = StringIO()
         osaca.run(args, output_file=output)
@@ -134,7 +146,13 @@ class TestCLI(unittest.TestCase):
                 for c in comps[a]:
                     with self.subTest(kernel=k, arch=a, comp=c):
                         args = parser.parse_args(
-                            ["--arch", a, self._find_file(k, a, c), "--export-graph", "/dev/null"]
+                            [
+                                "--arch",
+                                a,
+                                self._find_file(k, a, c),
+                                "--export-graph",
+                                "/dev/null",
+                            ]
                         )
                         output = StringIO()
                         osaca.run(args, output_file=output)
@@ -204,17 +222,13 @@ class TestCLI(unittest.TestCase):
         )
         output = StringIO()
         osaca.run(args, output_file=output)
-        self.assertTrue(
-            output.getvalue().count("WARNING: LCD analysis timed out") == 1
-        )
+        self.assertTrue(output.getvalue().count("WARNING: LCD analysis timed out") == 1)
         args = parser.parse_args(
             ["--ignore-unknown", "--lcd-timeout", "-1", self._find_test_file(kernel)]
         )
         output = StringIO()
         osaca.run(args, output_file=output)
-        self.assertTrue(
-            output.getvalue().count("WARNING: LCD analysis timed out") == 0
-        )
+        self.assertTrue(output.getvalue().count("WARNING: LCD analysis timed out") == 0)
 
     def test_lines_arg(self):
         # Run tests with --lines option
@@ -227,12 +241,24 @@ class TestCLI(unittest.TestCase):
         args = []
         args.append(
             parser.parse_args(
-                ["--lines", "146-154", "--arch", "csx", self._find_test_file(kernel_x86)]
+                [
+                    "--lines",
+                    "146-154",
+                    "--arch",
+                    "csx",
+                    self._find_test_file(kernel_x86),
+                ]
             )
         )
         args.append(
             parser.parse_args(
-                ["--lines", "146:154", "--arch", "csx", self._find_test_file(kernel_x86)]
+                [
+                    "--lines",
+                    "146:154",
+                    "--arch",
+                    "csx",
+                    self._find_test_file(kernel_x86),
+                ]
             )
         )
         args.append(
diff --git a/tests/test_db_interface.py b/tests/test_db_interface.py
index 7678ad0..a58a7a3 100755
--- a/tests/test_db_interface.py
+++ b/tests/test_db_interface.py
@@ -17,7 +17,13 @@ class TestDBInterface(unittest.TestCase):
         sample_entry = {
             "name": "DoItRightAndDoItFast",
             "operands": [
-                {"class": "memory", "offset": "imd", "base": "gpr", "index": "gpr", "scale": 8},
+                {
+                    "class": "memory",
+                    "offset": "imd",
+                    "base": "gpr",
+                    "index": "gpr",
+                    "scale": 8,
+                },
                 {"class": "register", "name": "xmm"},
             ],
             "throughput": 1.25,
@@ -35,7 +41,12 @@ class TestDBInterface(unittest.TestCase):
         del self.entry_tx2["operands"][1]["name"]
         self.entry_tx2["operands"][1]["prefix"] = "x"
         # self.entry_zen1['port_pressure'] = [1, 1, 1, 1, 0, 1, 0, 0, 0, 0.5, 1, 0.5, 1]
-        self.entry_zen1["port_pressure"] = [[4, "0123"], [1, "4"], [1, "89"], [2, ["8D", "9D"]]]
+        self.entry_zen1["port_pressure"] = [
+            [4, "0123"],
+            [1, "4"],
+            [1, "89"],
+            [2, ["8D", "9D"]],
+        ]
 
     ###########
     # Tests
diff --git a/tests/test_files/kernel_x86_memdep.s b/tests/test_files/kernel_x86_memdep.s
index bb9789e..cb1c1fe 100644
--- a/tests/test_files/kernel_x86_memdep.s
+++ b/tests/test_files/kernel_x86_memdep.s
@@ -1,15 +1,15 @@
 # OSACA-BEGIN
 .L4:
-	vmovsd %xmm0, 8(%rax)
-	addq $8, %rax
-	vmovsd %xmm0, 8(%rax,%rcx,8)
-	vaddsd (%rax), %xmm0, %xmm0  # depends on line 3, 8(%rax) == (%rax+8)
-	subq $-8, %rax
-	vaddsd -8(%rax), %xmm0, %xmm0  # depends on line 3, 8(%rax) == -8(%rax+16)
-	dec %rcx
-	vaddsd 8(%rax,%rcx,8), %xmm0, %xmm0  # depends on line 5, 8(%rax,%rdx,8) == 8(%rax+8,%rdx-1,8)
-	movq %rcx, %rdx
-	vaddsd 8(%rax,%rdx,8), %xmm0, %xmm0  # depends on line 5, 8(%rax,%rdx,8) == 8(%rax+8,%rdx-1,8)
+	vmovsd %xmm0, 8(%rax)         # line 3          <----------------------------------+
+	addq $8, %rax                 #                                                    |
+	vmovsd %xmm0, 8(%rax,%rcx,8)  # line 5          <-----------------------------------------------+
+	vaddsd (%rax), %xmm0, %xmm0         # depends on line 3, 8(%rax) == (%rax+8)    ---+            |
+	subq $-8, %rax                      #                                              |            |
+	vaddsd -8(%rax), %xmm0, %xmm0       # depends on line 3, 8(%rax) == -8(%rax+16) ---+            |
+	dec %rcx                            #                                                           |
+	vaddsd 8(%rax,%rcx,8), %xmm0, %xmm0 # depends on line 5, 8(%rax,%rdx,8) == 8(%rax+8,%rdx-1,8) --+
+	movq %rcx, %rdx                     #                                                           |
+	vaddsd 8(%rax,%rdx,8), %xmm0, %xmm0 # depends on line 5, 8(%rax,%rdx,8) == 8(%rax+8,%rdx-1,8) --+
 	vmulsd %xmm1, %xmm0, %xmm0
 	addq $8, %rax
 	cmpq %rsi, %rax
diff --git a/tests/test_frontend.py b/tests/test_frontend.py
index 3ab0441..30c7a46 100755
--- a/tests/test_frontend.py
+++ b/tests/test_frontend.py
@@ -34,7 +34,8 @@ class TestFrontend(unittest.TestCase):
         )
         self.machine_model_tx2 = MachineModel(arch="tx2")
         self.semantics_csx = ArchSemantics(
-            self.machine_model_csx, path_to_yaml=os.path.join(self.MODULE_DATA_DIR, "isa/x86.yml")
+            self.machine_model_csx,
+            path_to_yaml=os.path.join(self.MODULE_DATA_DIR, "isa/x86.yml"),
         )
         self.semantics_tx2 = ArchSemantics(
             self.machine_model_tx2,
@@ -71,7 +72,11 @@ class TestFrontend(unittest.TestCase):
 
     def test_frontend_AArch64(self):
         dg = KernelDG(
-            self.kernel_AArch64, self.parser_AArch64, self.machine_model_tx2, self.semantics_tx2)
+            self.kernel_AArch64,
+            self.parser_AArch64,
+            self.machine_model_tx2,
+            self.semantics_tx2,
+        )
         fe = Frontend(path_to_yaml=os.path.join(self.MODULE_DATA_DIR, "tx2.yml"))
         fe.full_analysis(self.kernel_AArch64, dg, verbose=True)
         # TODO compare output with checked string
diff --git a/tests/test_marker_utils.py b/tests/test_marker_utils.py
index 5d38324..d843ec7 100755
--- a/tests/test_marker_utils.py
+++ b/tests/test_marker_utils.py
@@ -109,7 +109,8 @@ class TestMarkerUtils(unittest.TestCase):
                             kernel_start = len(
                                 list(
                                     filter(
-                                        None, (prologue + mov_start_var + bytes_var_1).split("\n")
+                                        None,
+                                        (prologue + mov_start_var + bytes_var_1).split("\n"),
                                     )
                                 )
                             )
@@ -142,7 +143,12 @@ class TestMarkerUtils(unittest.TestCase):
         epilogue = ".LE9:\t\t#12.2\n" "call    dummy\n"
         kernel_length = len(list(filter(None, kernel.split("\n"))))
 
-        bytes_variations = [bytes_1_line, bytes_2_lines_1, bytes_2_lines_2, bytes_3_lines]
+        bytes_variations = [
+            bytes_1_line,
+            bytes_2_lines_1,
+            bytes_2_lines_2,
+            bytes_3_lines,
+        ]
         mov_start_variations = [mov_start_1, mov_start_2]
         mov_end_variations = [mov_end_1, mov_end_2]
         # actual tests
@@ -171,7 +177,8 @@ class TestMarkerUtils(unittest.TestCase):
                             kernel_start = len(
                                 list(
                                     filter(
-                                        None, (prologue + mov_start_var + bytes_var_1).split("\n")
+                                        None,
+                                        (prologue + mov_start_var + bytes_var_1).split("\n"),
                                     )
                                 )
                             )
diff --git a/tests/test_parser_AArch64.py b/tests/test_parser_AArch64.py
index 9511574..fdcf7f1 100755
--- a/tests/test_parser_AArch64.py
+++ b/tests/test_parser_AArch64.py
@@ -24,7 +24,9 @@ class TestParserAArch64(unittest.TestCase):
 
     def test_comment_parser(self):
         self.assertEqual(self._get_comment(self.parser, "// some comments"), "some comments")
-        self.assertEqual(self._get_comment(self.parser, "\t\t//AA BB CC \t end \t"), "AA BB CC end")
+        self.assertEqual(
+            self._get_comment(self.parser, "\t\t//AA BB CC \t end \t"), "AA BB CC end"
+        )
         self.assertEqual(
             self._get_comment(self.parser, "\t//// comment //// comment"),
             "// comment //// comment",
@@ -36,7 +38,8 @@ class TestParserAArch64(unittest.TestCase):
         self.assertEqual(self._get_label(self.parser, ".2.3_2_pack.3:").name, ".2.3_2_pack.3")
         self.assertEqual(self._get_label(self.parser, ".L1:\t\t\t//label1").name, ".L1")
         self.assertEqual(
-            " ".join(self._get_label(self.parser, ".L1:\t\t\t//label1").comment), "label1"
+            " ".join(self._get_label(self.parser, ".L1:\t\t\t//label1").comment),
+            "label1",
         )
         with self.assertRaises(ParseException):
             self._get_label(self.parser, "\t.cfi_startproc")
@@ -316,7 +319,8 @@ class TestParserAArch64(unittest.TestCase):
         value1 = self.parser.normalize_imd(imd_decimal_1)
         self.assertEqual(value1, self.parser.normalize_imd(imd_hex_1))
         self.assertEqual(
-            self.parser.normalize_imd(imd_decimal_2), self.parser.normalize_imd(imd_hex_2)
+            self.parser.normalize_imd(imd_decimal_2),
+            self.parser.normalize_imd(imd_hex_2),
         )
         self.assertEqual(self.parser.normalize_imd(imd_float_11), value1)
         self.assertEqual(self.parser.normalize_imd(imd_float_12), value1)
diff --git a/tests/test_parser_x86att.py b/tests/test_parser_x86att.py
index 57b2e71..1b47849 100755
--- a/tests/test_parser_x86att.py
+++ b/tests/test_parser_x86att.py
@@ -26,7 +26,8 @@ class TestParserX86ATT(unittest.TestCase):
         self.assertEqual(self._get_comment(self.parser, "# some comments"), "some comments")
         self.assertEqual(self._get_comment(self.parser, "\t\t#AA BB CC \t end \t"), "AA BB CC end")
         self.assertEqual(
-            self._get_comment(self.parser, "\t## comment ## comment"), "# comment ## comment"
+            self._get_comment(self.parser, "\t## comment ## comment"),
+            "# comment ## comment",
         )
 
     def test_label_parser(self):
@@ -35,7 +36,8 @@ class TestParserX86ATT(unittest.TestCase):
         self.assertEqual(self._get_label(self.parser, ".2.3_2_pack.3:").name, ".2.3_2_pack.3")
         self.assertEqual(self._get_label(self.parser, ".L1:\t\t\t#label1").name, ".L1")
         self.assertEqual(
-            " ".join(self._get_label(self.parser, ".L1:\t\t\t#label1").comment), "label1"
+            " ".join(self._get_label(self.parser, ".L1:\t\t\t#label1").comment),
+            "label1",
         )
         with self.assertRaises(ParseException):
             self._get_label(self.parser, "\t.cfi_startproc")
@@ -47,7 +49,8 @@ class TestParserX86ATT(unittest.TestCase):
         self.assertEqual(len(self._get_directive(self.parser, "\t.align\t16,0x90").parameters), 2)
         self.assertEqual(len(self._get_directive(self.parser, ".text").parameters), 0)
         self.assertEqual(
-            len(self._get_directive(self.parser, '.file\t1 "path/to/file.c"').parameters), 2
+            len(self._get_directive(self.parser, '.file\t1 "path/to/file.c"').parameters),
+            2,
         )
         self.assertEqual(
             self._get_directive(self.parser, '.file\t1 "path/to/file.c"').parameters[1],
@@ -62,7 +65,12 @@ class TestParserX86ATT(unittest.TestCase):
                 self.parser,
                 "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support",
             ).parameters,
-            ["__TEXT", "__eh_frame", "coalesced", "no_toc+strip_static_syms+live_support"],
+            [
+                "__TEXT",
+                "__eh_frame",
+                "coalesced",
+                "no_toc+strip_static_syms+live_support",
+            ],
         )
         self.assertEqual(
             self._get_directive(
@@ -74,7 +82,9 @@ class TestParserX86ATT(unittest.TestCase):
             self._get_directive(self.parser, "\t.align\t16,0x90").parameters[1], "0x90"
         )
         self.assertEqual(
-            self._get_directive(self.parser, "        .byte 100,103,144       #IACA START")["name"],
+            self._get_directive(self.parser, "        .byte 100,103,144       #IACA START")[
+                "name"
+            ],
             "byte",
         )
         self.assertEqual(
@@ -242,10 +252,12 @@ class TestParserX86ATT(unittest.TestCase):
         imd_decimal_2 = {"value": "8"}
         imd_hex_2 = {"value": "8"}
         self.assertEqual(
-            self.parser.normalize_imd(imd_decimal_1), self.parser.normalize_imd(imd_hex_1)
+            self.parser.normalize_imd(imd_decimal_1),
+            self.parser.normalize_imd(imd_hex_1),
         )
         self.assertEqual(
-            self.parser.normalize_imd(imd_decimal_2), self.parser.normalize_imd(imd_hex_2)
+            self.parser.normalize_imd(imd_decimal_2),
+            self.parser.normalize_imd(imd_hex_2),
         )
 
     def test_reg_dependency(self):
diff --git a/tests/test_semantics.py b/tests/test_semantics.py
index 46c58d6..54e851f 100755
--- a/tests/test_semantics.py
+++ b/tests/test_semantics.py
@@ -11,8 +11,14 @@ from copy import deepcopy
 import networkx as nx
 from osaca.osaca import get_unmatched_instruction_ratio
 from osaca.parser import AttrDict, ParserAArch64, ParserX86ATT
-from osaca.semantics import (INSTR_FLAGS, ArchSemantics, ISASemantics,
-                             KernelDG, MachineModel, reduce_to_section)
+from osaca.semantics import (
+    INSTR_FLAGS,
+    ArchSemantics,
+    ISASemantics,
+    KernelDG,
+    MachineModel,
+    reduce_to_section,
+)
 
 
 class TestSemanticTools(unittest.TestCase):
@@ -66,7 +72,8 @@ class TestSemanticTools(unittest.TestCase):
         )
         cls.semantics_x86 = ISASemantics("x86")
         cls.semantics_csx = ArchSemantics(
-            cls.machine_model_csx, path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/x86.yml")
+            cls.machine_model_csx,
+            path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/x86.yml"),
         )
         cls.semantics_aarch64 = ISASemantics("aarch64")
         cls.semantics_tx2 = ArchSemantics(
@@ -173,7 +180,12 @@ class TestSemanticTools(unittest.TestCase):
         )
         self.assertEqual(
             test_mm_x86.get_store_throughput(
-                {"base": {"prefix": "NOT_IN_DB"}, "offset": None, "index": "NOT_NONE", "scale": 1}
+                {
+                    "base": {"prefix": "NOT_IN_DB"},
+                    "offset": None,
+                    "index": "NOT_NONE",
+                    "scale": 1,
+                }
             ),
             [[1, "23"], [1, "4"]],
         )
@@ -185,7 +197,12 @@ class TestSemanticTools(unittest.TestCase):
         )
         self.assertEqual(
             test_mm_arm.get_store_throughput(
-                {"base": {"prefix": "NOT_IN_DB"}, "offset": None, "index": None, "scale": 1}
+                {
+                    "base": {"prefix": "NOT_IN_DB"},
+                    "offset": None,
+                    "index": None,
+                    "scale": 1,
+                }
             ),
             [[1, "34"], [1, "5"]],
         )
@@ -310,7 +327,10 @@ class TestSemanticTools(unittest.TestCase):
 
     def test_memdependency_x86(self):
         dg = KernelDG(
-            self.kernel_x86_memdep, self.parser_x86, self.machine_model_csx, self.semantics_csx
+            self.kernel_x86_memdep,
+            self.parser_x86,
+            self.machine_model_csx,
+            self.semantics_csx,
         )
         self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg))
         self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=3)), {6, 8})
@@ -322,7 +342,10 @@ class TestSemanticTools(unittest.TestCase):
 
     def test_kernelDG_AArch64(self):
         dg = KernelDG(
-            self.kernel_AArch64, self.parser_AArch64, self.machine_model_tx2, self.semantics_tx2
+            self.kernel_AArch64,
+            self.parser_AArch64,
+            self.machine_model_tx2,
+            self.semantics_tx2,
         )
         self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg))
         self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=3)), {7, 8})
@@ -400,7 +423,7 @@ class TestSemanticTools(unittest.TestCase):
         # based on line 6
         self.assertEqual(lc_deps[6]["latency"], 28.0)
         self.assertEqual(
-            [(iform.line_number, lat) for iform, lat in lc_deps[6]['dependencies']],
+            [(iform.line_number, lat) for iform, lat in lc_deps[6]["dependencies"]],
             [(6, 4.0), (10, 6.0), (11, 6.0), (12, 6.0), (13, 6.0), (14, 0)],
         )
 
@@ -423,7 +446,8 @@ class TestSemanticTools(unittest.TestCase):
         # w/o flag dependencies: ID 5 w/ len=1
         # TODO discuss
         self.assertEqual(
-            lc_deps[lcd_id2]["root"], dg.dg.nodes(data=True)[lcd_id2]["instruction_form"]
+            lc_deps[lcd_id2]["root"],
+            dg.dg.nodes(data=True)[lcd_id2]["instruction_form"],
         )
         self.assertEqual(len(lc_deps[lcd_id2]["dependencies"]), 1)
         self.assertEqual(
@@ -438,7 +462,7 @@ class TestSemanticTools(unittest.TestCase):
             self.parser_x86,
             self.machine_model_csx,
             self.semantics_x86,
-            timeout=10
+            timeout=10,
         )
         end_time = time.perf_counter()
         time_10 = end_time - start_time
@@ -448,7 +472,7 @@ class TestSemanticTools(unittest.TestCase):
             self.parser_x86,
             self.machine_model_csx,
             self.semantics_x86,
-            timeout=2
+            timeout=2,
         )
         end_time = time.perf_counter()
         time_2 = end_time - start_time
diff --git a/validation/build_and_run.py b/validation/build_and_run.py
index 313b369..6e7775b 100755
--- a/validation/build_and_run.py
+++ b/validation/build_and_run.py
@@ -1,33 +1,26 @@
 #!/usr/bin/env python3
-import sys
 import os
-import re
-from subprocess import check_call, check_output, CalledProcessError, STDOUT
-from itertools import chain
-import shutil
-from functools import lru_cache
-from glob import glob
-from pathlib import Path
-from pprint import pprint
-import socket
 import pickle
+import re
+import shutil
+import socket
+import sys
 from copy import deepcopy
+from glob import glob
+from itertools import chain
+from pathlib import Path
+from subprocess import STDOUT, CalledProcessError, check_call, check_output
 
 import requests
-import numpy as np
-import pandas as pd
-
-from osaca.osaca import reduce_to_section
-
-from kerncraft.models import benchmark
 from kerncraft.incore_model import (
-    parse_asm,
     asm_instrumentation,
     iaca_analyse_instrumented_binary,
+    llvm_mca_analyse_instrumented_assembly,
     osaca_analyse_instrumented_assembly,
-    llvm_mca_analyse_instrumented_assembly
+    parse_asm,
 )
-
+from kerncraft.models import benchmark
+from osaca.osaca import reduce_to_section
 
 # Scaling of inner dimension for 1D, 2D and 3D kernels
 #  * consider kernels to be compiled with multiple compilers and different options
@@ -39,37 +32,50 @@ from kerncraft.incore_model import (
 # Collect inner loop body assembly for each kernel/compiler/options combination
 #  * analyze with OSACA, IACA and LLVM-MCA
 
-hosts_arch_map = {r"skylakesp2": "SKX",
-                  r"ivyep1": "IVB",
-                  r"naples1": "ZEN",
-                  r"rome1": "ZEN2",
-                  r"warmup": "TX2",
-                  r"qp4-node-[0-9]+": "A64FX"}
+hosts_arch_map = {
+    r"skylakesp2": "SKX",
+    r"ivyep1": "IVB",
+    r"naples1": "ZEN",
+    r"rome1": "ZEN2",
+    r"warmup": "TX2",
+    r"qp4-node-[0-9]+": "A64FX",
+}
 
 arch_info = {
-    'SKX': {
-        'prepare': ['likwid-setFrequencies -f 2.4 -t 0'.split()],
-        'IACA': 'SKX',
-        'OSACA': 'SKX',
-        'LLVM-MCA': '-mcpu=skylake-avx512',
-        'Ithemal': 'skl',
-        'isa': 'x86',
-        'perfevents': [],
+    "SKX": {
+        "prepare": ["likwid-setFrequencies -f 2.4 -t 0".split()],
+        "IACA": "SKX",
+        "OSACA": "SKX",
+        "LLVM-MCA": "-mcpu=skylake-avx512",
+        "Ithemal": "skl",
+        "isa": "x86",
+        "perfevents": [],
         "cflags": {
-            'icc': {
-                "Ofast": "-Ofast -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline -ffreestanding -falign-loops".split(),
-                "O3": "-O3 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline -ffreestanding -falign-loops".split(),
-                "O2": "-O2 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline -ffreestanding -falign-loops".split(),
-                "O1": "-O1 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline -ffreestanding -falign-loops".split(),
+            "icc": {
+                "Ofast": (
+                    "-Ofast -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline "
+                    "-ffreestanding -falign-loops"
+                ).split(),
+                "O3": (
+                    "-O3 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline "
+                    "-ffreestanding -falign-loops"
+                ).split(),
+                "O2": (
+                    "-O2 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline "
+                    "-ffreestanding -falign-loops"
+                ).split(),
+                "O1": (
+                    "-O1 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline "
+                    "-ffreestanding -falign-loops"
+                ).split(),
             },
-            'clang': {
+            "clang": {
                 "Ofast": "-Ofast -march=skylake-avx512 -ffreestanding".split(),
                 "O3": "-O3 -march=skylake-avx512 -ffreestanding".split(),
                 "O2": "-O2 -march=skylake-avx512 -ffreestanding".split(),
                 "O1": "-O1 -march=skylake-avx512 -ffreestanding".split(),
-                
             },
-            'gcc': {
+            "gcc": {
                 "Ofast": "-Ofast -march=skylake-avx512 -lm -ffreestanding -falign-loops=16".split(),
                 "O3": "-O3 -march=skylake-avx512 -lm -ffreestanding -falign-loops=16".split(),
                 "O2": "-O2 -march=skylake-avx512 -lm -ffreestanding -falign-loops=16".split(),
@@ -77,17 +83,19 @@ arch_info = {
             },
         },
     },
-    'IVB': {
-        'prepare': ['likwid-setFrequencies -f 3.0 -t 0'.split()],
-        'IACA': 'IVB',
-        'OSACA': 'IVB',
-        'LLVM-MCA': '-mcpu=ivybridge',
-        'Ithemal': 'ivb',
-        'isa': 'x86',
-        'perfevents': [],
+    "IVB": {
+        "prepare": ["likwid-setFrequencies -f 3.0 -t 0".split()],
+        "IACA": "IVB",
+        "OSACA": "IVB",
+        "LLVM-MCA": "-mcpu=ivybridge",
+        "Ithemal": "ivb",
+        "isa": "x86",
+        "perfevents": [],
         "cflags": {
             "icc": {
-                "Ofast": "-Ofast -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
+                "Ofast": (
+                    "-Ofast -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops"
+                ).split(),
                 "O3": "-O3 -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
                 "O2": "-O2 -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
                 "O1": "-O1 -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
@@ -106,14 +114,14 @@ arch_info = {
             },
         },
     },
-    'ZEN': {
-        'prepare': ['likwid-setFrequencies -f 2.3 -t 0'.split()],
-        'IACA': None,
-        'OSACA': 'ZEN1',
-        'LLVM-MCA': '-mcpu=znver1',
-        'Ithemal': None,
-        'isa': 'x86',
-        'perfevents': [],
+    "ZEN": {
+        "prepare": ["likwid-setFrequencies -f 2.3 -t 0".split()],
+        "IACA": None,
+        "OSACA": "ZEN1",
+        "LLVM-MCA": "-mcpu=znver1",
+        "Ithemal": None,
+        "isa": "x86",
+        "perfevents": [],
         "cflags": {
             "clang": {
                 "Ofast": "-Ofast -march=znver1 -ffreestanding".split(),
@@ -128,21 +136,23 @@ arch_info = {
                 "O1": "-O1 -march=znver1 -ffreestanding -falign-loops=16".split(),
             },
             "icc": {
-                "Ofast": "-Ofast -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
+                "Ofast": (
+                    "-Ofast -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops"
+                ).split(),
                 "O3": "-O3 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
                 "O2": "-O2 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
                 "O1": "-O1 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
             },
         },
     },
-    'ZEN2': {
-        'prepare': ['likwid-setFrequencies -f 2.35 -t 0'.split()],
-        'IACA': None,
-        'OSACA': 'ZEN2',
-        'LLVM-MCA': '-mcpu=znver2',
-        'Ithemal': None,
-        'isa': 'x86',
-        'perfevents': [],
+    "ZEN2": {
+        "prepare": ["likwid-setFrequencies -f 2.35 -t 0".split()],
+        "IACA": None,
+        "OSACA": "ZEN2",
+        "LLVM-MCA": "-mcpu=znver2",
+        "Ithemal": None,
+        "isa": "x86",
+        "perfevents": [],
         "cflags": {
             "clang": {
                 "Ofast": "-Ofast -march=znver2 -ffreestanding".split(),
@@ -157,22 +167,24 @@ arch_info = {
                 "O1": "-O1 -march=znver2 -ffreestanding -falign-loops=16".split(),
             },
             "icc": {
-                "Ofast": "-Ofast -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
+                "Ofast": (
+                    "-Ofast -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops"
+                ).split(),
                 "O3": "-O3 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
                 "O2": "-O2 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
                 "O1": "-O1 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
             },
         },
     },
-    'TX2': {
-        'Clock [MHz]': 2200,  # reading out via perf. counters is not supported
-        'IACA': None,
-        'OSACA': 'TX2',
-        'assign_optimal_throughput': True,
-        'LLVM-MCA': '-mcpu=thunderx2t99 -march=aarch64',
-        'Ithemal': None,
-        'isa': 'aarch64',
-        'perfevents': [],
+    "TX2": {
+        "Clock [MHz]": 2200,  # reading out via perf. counters is not supported
+        "IACA": None,
+        "OSACA": "TX2",
+        "assign_optimal_throughput": True,
+        "LLVM-MCA": "-mcpu=thunderx2t99 -march=aarch64",
+        "Ithemal": None,
+        "isa": "aarch64",
+        "perfevents": [],
         "cflags": {
             "clang": {
                 "Ofast": "-Ofast -target aarch64-unknown-linux-gnu -ffreestanding".split(),
@@ -188,16 +200,16 @@ arch_info = {
             },
         },
     },
-    'A64FX': {
-        'Clock [MHz]': 1800,  # reading out via perf. counters is not supported
-        'L2_volume_metric': 'L1<->L2 data volume [GBytes]',
-        'IACA': None,
-        'OSACA': 'A64FX',
-        'assign_optimal_throughput': False,
-        'LLVM-MCA': '-mcpu=a64fx -march=aarch64',
-        'Ithemal': None,
-        'isa': 'aarch64',
-        'perfevents': [],
+    "A64FX": {
+        "Clock [MHz]": 1800,  # reading out via perf. counters is not supported
+        "L2_volume_metric": "L1<->L2 data volume [GBytes]",
+        "IACA": None,
+        "OSACA": "A64FX",
+        "assign_optimal_throughput": False,
+        "LLVM-MCA": "-mcpu=a64fx -march=aarch64",
+        "Ithemal": None,
+        "isa": "aarch64",
+        "perfevents": [],
         "cflags": {
             "gcc": {
                 "Ofast": "-Ofast -msve-vector-bits=512 -march=armv8.2-a+sve -ffreestanding".split(),
@@ -211,7 +223,7 @@ arch_info = {
                 "O2": "-O2 -target aarch64-unknown-linux-gnu -ffreestanding".split(),
                 "O1": "-O1 -target aarch64-unknown-linux-gnu -ffreestanding".split(),
             },
-        }
+        },
     },
 }
 
@@ -231,12 +243,13 @@ def get_kernels(kernels=None):
     if kernels is None:
         kernels = []
         for f in glob("kernels/*.c"):
-            f = f.rsplit('.', 1)[0].split('/', 1)[1]
+            f = f.rsplit(".", 1)[0].split("/", 1)[1]
             if f == "dummy":
                 continue
             kernels.append(f)
     return kernels
 
+
 # Columns:
 # arch
 # kernel
@@ -259,6 +272,7 @@ def get_kernels(kernels=None):
 # allruns [list (length, repetitions, cy/it, L2 B/it)]
 # perfevents [dict event: counter/it]
 
+
 def build_mark_run_all_kernels(measurements=True, osaca=True, iaca=True, llvm_mca=True):
     arch = get_current_arch()
     if arch is None:
@@ -268,90 +282,132 @@ def build_mark_run_all_kernels(measurements=True, osaca=True, iaca=True, llvm_mc
         islocal = True
         arches = [arch]
         ainfo = arch_info.get(arch)
-        if 'prepare' in ainfo:
-            for cmd in ainfo['prepare']:
+        if "prepare" in ainfo:
+            for cmd in ainfo["prepare"]:
                 check_call(cmd)
     for arch in arches:
         ainfo = arch_info.get(arch)
         print(arch)
         data_path = Path(f"build/{arch}/data.pkl")
         if data_path.exists():
-            with data_path.open('rb') as f:
+            with data_path.open("rb") as f:
                 data = pickle.load(f)
         else:
             data = []
         data_lastsaved = deepcopy(data)
-        for compiler, compiler_cflags in ainfo['cflags'].items():
+        for compiler, compiler_cflags in ainfo["cflags"].items():
             if not shutil.which(compiler) and islocal:
                 print(compiler, "not found in path! Skipping...")
                 continue
             for cflags_name, cflags in compiler_cflags.items():
                 for kernel in get_kernels():
-                    print(f"{kernel:<15} {arch:>5} {compiler:>5} {cflags_name:>6}",
-                        end=": ", flush=True)
-                    row = list([r for r in data
-                                if r['arch'] == arch and r['kernel'] == kernel and
-                                r['compiler'] == compiler and r['cflags_name'] == cflags_name])
+                    print(
+                        f"{kernel:<15} {arch:>5} {compiler:>5} {cflags_name:>6}",
+                        end=": ",
+                        flush=True,
+                    )
+                    row = list(
+                        [
+                            r
+                            for r in data
+                            if r["arch"] == arch
+                            and r["kernel"] == kernel
+                            and r["compiler"] == compiler
+                            and r["cflags_name"] == cflags_name
+                        ]
+                    )
                     if row:
                         row = row[0]
                     else:
-                        orig_row = None
                         row = {
-                            'arch': arch,
-                            'kernel': kernel,
-                            'compiler': compiler,
-                            'cflags_name': cflags_name,
-                            'element_size': 8,
+                            "arch": arch,
+                            "kernel": kernel,
+                            "compiler": compiler,
+                            "cflags_name": cflags_name,
+                            "element_size": 8,
                         }
                         data.append(row)
 
                     # Build
                     print("build", end="", flush=True)
                     asm_path, exec_path, overwrite = build_kernel(
-                        kernel, arch, compiler, cflags, cflags_name, dontbuild=not islocal)
+                        kernel,
+                        arch,
+                        compiler,
+                        cflags,
+                        cflags_name,
+                        dontbuild=not islocal,
+                    )
 
                     if overwrite:
                         # clear all measurment information
-                        row['best_length'] = None
-                        row['best_runtime'] = None
-                        row['L2_traffic'] = None
-                        row['allruns'] = None
-                        row['perfevents'] = None
+                        row["best_length"] = None
+                        row["best_runtime"] = None
+                        row["L2_traffic"] = None
+                        row["allruns"] = None
+                        row["perfevents"] = None
 
                     # Mark for IACA, OSACA and LLVM-MCA
                     print("mark", end="", flush=True)
                     try:
-                        marked_asmfile, marked_objfile, row['pointer_increment'], overwrite = mark(
-                            asm_path, compiler, cflags, isa=ainfo['isa'], overwrite=overwrite)
-                        row['marking_error'] = None
+                        (
+                            marked_asmfile,
+                            marked_objfile,
+                            row["pointer_increment"],
+                            overwrite,
+                        ) = mark(
+                            asm_path,
+                            compiler,
+                            cflags,
+                            isa=ainfo["isa"],
+                            overwrite=overwrite,
+                        )
+                        row["marking_error"] = None
                     except ValueError as e:
-                        row['marking_error'] = str(e)
+                        row["marking_error"] = str(e)
                         print(":", e)
                         continue
 
                     if overwrite:
                         # clear all model generated information
-                        for model in ['IACA', 'OSACA', 'LLVM-MCA', 'Ithemal']:
-                            for k in ['ports', 'prediction', 'throughput', 'cp', 'lcd', 'raw']:
-                                row[model+'_'+k] = None
-                    
-                    for model in ['IACA', 'OSACA', 'LLVM-MCA', 'Ithemal']:
-                        for k in ['ports', 'prediction', 'throughput', 'cp', 'lcd', 'raw']:
-                            if model+'_'+k not in row:
-                                row[model+'_'+k] = None
+                        for model in ["IACA", "OSACA", "LLVM-MCA", "Ithemal"]:
+                            for k in [
+                                "ports",
+                                "prediction",
+                                "throughput",
+                                "cp",
+                                "lcd",
+                                "raw",
+                            ]:
+                                row[model + "_" + k] = None
+
+                    for model in ["IACA", "OSACA", "LLVM-MCA", "Ithemal"]:
+                        for k in [
+                            "ports",
+                            "prediction",
+                            "throughput",
+                            "cp",
+                            "lcd",
+                            "raw",
+                        ]:
+                            if model + "_" + k not in row:
+                                row[model + "_" + k] = None
 
                     # Analyze with IACA, if requested and configured
-                    if iaca and ainfo['IACA'] is not None:
+                    if iaca and ainfo["IACA"] is not None:
                         print("IACA", end="", flush=True)
-                        if not row.get('IACA_ports'):
-                            row['IACA_raw'] = iaca_analyse_instrumented_binary(
-                                marked_objfile, micro_architecture=ainfo['IACA'])
-                            row['IACA_ports'] = \
-                                {k: v/(row['pointer_increment']/row['element_size'])
-                                for k,v in row['IACA_raw']['port cycles'].items()}
-                            row['IACA_prediction'] = row['IACA_raw']['throughput']/(
-                                row['pointer_increment']/row['element_size'])
-                            row['IACA_throughput'] = max(row['IACA_ports'].values())
+                        if not row.get("IACA_ports"):
+                            row["IACA_raw"] = iaca_analyse_instrumented_binary(
+                                marked_objfile, micro_architecture=ainfo["IACA"]
+                            )
+                            row["IACA_ports"] = {
+                                k: v / (row["pointer_increment"] / row["element_size"])
+                                for k, v in row["IACA_raw"]["port cycles"].items()
+                            }
+                            row["IACA_prediction"] = row["IACA_raw"]["throughput"] / (
+                                row["pointer_increment"] / row["element_size"]
+                            )
+                            row["IACA_throughput"] = max(row["IACA_ports"].values())
                             print(". ", end="", flush=True)
                         else:
                             print("! ", end="", flush=True)
@@ -359,56 +415,70 @@ def build_mark_run_all_kernels(measurements=True, osaca=True, iaca=True, llvm_mc
                     # Analyze with OSACA, if requested
                     if osaca:
                         print("OSACA", end="", flush=True)
-                        if not row.get('OSACA_ports'):
-                            row['OSACA_raw'] = osaca_analyse_instrumented_assembly(
-                                marked_asmfile, micro_architecture=ainfo['OSACA'],
-                                assign_optimal_throughput=ainfo.get('assign_optimal_throughput',
-                                                                    True))
-                            row['OSACA_ports'] = \
-                                {k: v/(row['pointer_increment']/row['element_size'])
-                                for k,v in row['OSACA_raw']['port cycles'].items()}
-                            row['OSACA_prediction'] = row['OSACA_raw']['throughput']/(
-                                row['pointer_increment']/row['element_size'])
-                            row['OSACA_throughput'] = max(row['OSACA_ports'].values())
-                            row['OSACA_cp'] = row['OSACA_raw']['cp_latency']/(
-                                row['pointer_increment']/row['element_size'])
-                            row['OSACA_lcd'] = row['OSACA_raw']['lcd']/(
-                                row['pointer_increment']/row['element_size'])
+                        if not row.get("OSACA_ports"):
+                            row["OSACA_raw"] = osaca_analyse_instrumented_assembly(
+                                marked_asmfile,
+                                micro_architecture=ainfo["OSACA"],
+                                assign_optimal_throughput=ainfo.get(
+                                    "assign_optimal_throughput", True
+                                ),
+                            )
+                            row["OSACA_ports"] = {
+                                k: v / (row["pointer_increment"] / row["element_size"])
+                                for k, v in row["OSACA_raw"]["port cycles"].items()
+                            }
+                            row["OSACA_prediction"] = row["OSACA_raw"]["throughput"] / (
+                                row["pointer_increment"] / row["element_size"]
+                            )
+                            row["OSACA_throughput"] = max(row["OSACA_ports"].values())
+                            row["OSACA_cp"] = row["OSACA_raw"]["cp_latency"] / (
+                                row["pointer_increment"] / row["element_size"]
+                            )
+                            row["OSACA_lcd"] = row["OSACA_raw"]["lcd"] / (
+                                row["pointer_increment"] / row["element_size"]
+                            )
                             print(". ", end="", flush=True)
                         else:
                             print("! ", end="", flush=True)
 
                     # Analyze with LLVM-MCA, if requested and configured
-                    if llvm_mca and ainfo['LLVM-MCA'] is not None:
+                    if llvm_mca and ainfo["LLVM-MCA"] is not None:
                         print("LLVM-MCA", end="", flush=True)
-                        if not row.get('LLVM-MCA_ports'):
-                            row['LLVM-MCA_raw'] = llvm_mca_analyse_instrumented_assembly(
+                        if not row.get("LLVM-MCA_ports"):
+                            row["LLVM-MCA_raw"] = llvm_mca_analyse_instrumented_assembly(
                                 marked_asmfile,
-                                micro_architecture=ainfo['LLVM-MCA'],
-                                isa=ainfo['isa'])
-                            row['LLVM-MCA_ports'] = \
-                                {k: v/(row['pointer_increment']/row['element_size'])
-                                for k,v in row['LLVM-MCA_raw']['port cycles'].items()}
-                            row['LLVM-MCA_prediction'] =row['LLVM-MCA_raw']['throughput']/(
-                                row['pointer_increment']/row['element_size'])
-                            row['LLVM-MCA_throughput'] = max(row['LLVM-MCA_ports'].values())
-                            row['LLVM-MCA_cp'] = row['LLVM-MCA_raw']['cp_latency']/(
-                                row['pointer_increment']/row['element_size'])
-                            row['LLVM-MCA_lcd'] = row['LLVM-MCA_raw']['lcd']/(
-                                row['pointer_increment']/row['element_size'])
+                                micro_architecture=ainfo["LLVM-MCA"],
+                                isa=ainfo["isa"],
+                            )
+                            row["LLVM-MCA_ports"] = {
+                                k: v / (row["pointer_increment"] / row["element_size"])
+                                for k, v in row["LLVM-MCA_raw"]["port cycles"].items()
+                            }
+                            row["LLVM-MCA_prediction"] = row["LLVM-MCA_raw"]["throughput"] / (
+                                row["pointer_increment"] / row["element_size"]
+                            )
+                            row["LLVM-MCA_throughput"] = max(row["LLVM-MCA_ports"].values())
+                            row["LLVM-MCA_cp"] = row["LLVM-MCA_raw"]["cp_latency"] / (
+                                row["pointer_increment"] / row["element_size"]
+                            )
+                            row["LLVM-MCA_lcd"] = row["LLVM-MCA_raw"]["lcd"] / (
+                                row["pointer_increment"] / row["element_size"]
+                            )
                             print(". ", end="", flush=True)
                         else:
                             print("! ", end="", flush=True)
-                    
+
                     # Analyze with Ithemal, if not running local and configured
-                    if ainfo['Ithemal'] is not None and not islocal:
+                    if ainfo["Ithemal"] is not None and not islocal:
                         print("Ithemal", end="", flush=True)
-                        if not row.get('Ithemal_prediction'):
+                        if not row.get("Ithemal_prediction"):
                             with open(marked_asmfile) as f:
-                                parsed_code = parse_asm(f.read(), ainfo['isa'])
-                            kernel = reduce_to_section(parsed_code, ainfo['isa'])
-                            row['Ithemal_prediction'] = get_ithemal_prediction(
-                                get_intel_style_code(marked_objfile), model=ainfo['Ithemal'])
+                                parsed_code = parse_asm(f.read(), ainfo["isa"])
+                            kernel = reduce_to_section(parsed_code, ainfo["isa"])
+                            row["Ithemal_prediction"] = get_ithemal_prediction(
+                                get_intel_style_code(marked_objfile),
+                                model=ainfo["Ithemal"],
+                            )
                             print(". ", end="", flush=True)
                         else:
                             print("! ", end="", flush=True)
@@ -416,43 +486,45 @@ def build_mark_run_all_kernels(measurements=True, osaca=True, iaca=True, llvm_mc
                     if measurements and islocal:
                         # run measurements if on same hardware
                         print("scale", end="", flush=True)
-                        if not row.get('allruns'):
+                        if not row.get("allruns"):
                             # find best length with concurrent L2 measurement
                             scaling_runs, best = scalingrun(exec_path)
-                            row['best_length'] = best[0]
-                            row['best_runtime'] = best[2]
-                            row['L2_traffic'] = best[3]
-                            row['allruns'] = scaling_runs
+                            row["best_length"] = best[0]
+                            row["best_runtime"] = best[2]
+                            row["L2_traffic"] = best[3]
+                            row["allruns"] = scaling_runs
                             print(f"({best[0]}). ", end="", flush=True)
                         else:
-                            print(f"({row.get('best_length', None)})! ", end="", flush=True)
+                            print(
+                                f"({row.get('best_length', None)})! ",
+                                end="",
+                                flush=True,
+                            )
 
                     print()
 
                 # dump to file
                 if data != data_lastsaved:
-                    print('saving... ', end="", flush=True)
-                    with data_path.open('wb') as f:
+                    print("saving... ", end="", flush=True)
+                    with data_path.open("wb") as f:
                         try:
                             pickle.dump(data, f)
                             data_lastsaved = deepcopy(data)
-                            print('saved!')
+                            print("saved!")
                         except KeyboardInterrupt:
                             f.seek(0)
                             pickle.dump(data, f)
-                            print('saved!')
+                            print("saved!")
                             sys.exit()
 
 
-
-def scalingrun(kernel_exec, total_iterations=25000000, lengths=range(8, 1*1024+1)):
-    #print('{:>8} {:>10} {:>10}'.format("x", "cy/it", "L2 B/it"))
-    parameters = chain(*[[total_iterations//i, i] for i in lengths])
+def scalingrun(kernel_exec, total_iterations=25000000, lengths=range(8, 1 * 1024 + 1)):
+    # print('{:>8} {:>10} {:>10}'.format("x", "cy/it", "L2 B/it"))
+    parameters = chain(*[[total_iterations // i, i] for i in lengths])
     # TODO use arch specific events and grooup
-    r, o = perfctr(chain([kernel_exec], map(str, parameters)),
-                1, group="L2")
+    r, o = perfctr(chain([kernel_exec], map(str, parameters)), 1, group="L2")
     global_infos = {}
-    for m in [re.match(r"(:?([a-z_\-0-9]+):)?([a-z]+): ([a-z\_\-0-9]+)", l) for l in o]:
+    for m in [re.match(r"(:?([a-z_\-0-9]+):)?([a-z]+): ([a-z\_\-0-9]+)", line) for line in o]:
         if m is not None:
             try:
                 v = int(m.group(4))
@@ -464,37 +536,45 @@ def scalingrun(kernel_exec, total_iterations=25000000, lengths=range(8, 1*1024+1
                 r[m.group(2)][m.group(3)] = v
 
     results = []
-    best = (float('inf'), None)
+    best = (float("inf"), None)
     for markername, mmetrics in r.items():
-        kernelname, repetitions, *_, xlength = markername.split('_')
+        kernelname, repetitions, *_, xlength = markername.split("_")
         repetitions = int(repetitions)
         xlength = int(xlength)
-        total_iterations = mmetrics['repetitions'] * mmetrics['iterations']
-        if 'Clock [MHz]' in mmetrics:
-            clock_hz = mmetrics['Clock [MHz]']*1e6
+        total_iterations = mmetrics["repetitions"] * mmetrics["iterations"]
+        if "Clock [MHz]" in mmetrics:
+            clock_hz = mmetrics["Clock [MHz]"] * 1e6
         else:
-            clock_hz = arch_info[get_current_arch()]['Clock [MHz]']*1e6
-        cyperit = mmetrics['Runtime (RDTSC) [s]'] * clock_hz / total_iterations
+            clock_hz = arch_info[get_current_arch()]["Clock [MHz]"] * 1e6
+        cyperit = mmetrics["Runtime (RDTSC) [s]"] * clock_hz / total_iterations
         # TODO use arch specific events and grooup
-        if 'L2D load data volume [GBytes]' in mmetrics:
-            l2perit = (mmetrics['L2D load data volume [GBytes]'] +
-                       mmetrics.get('L2D evict data volume [GBytes]', 0))*1e9 / total_iterations
+        if "L2D load data volume [GBytes]" in mmetrics:
+            l2perit = (
+                (
+                    mmetrics["L2D load data volume [GBytes]"]
+                    + mmetrics.get("L2D evict data volume [GBytes]", 0)
+                )
+                * 1e9
+                / total_iterations
+            )
         else:
-            l2perit = \
-                mmetrics[arch_info[get_current_arch()]['L2_volume_metric']]*1e9 / total_iterations
-        results.append(
-            (xlength, repetitions, cyperit, l2perit)
-        )
+            l2perit = (
+                mmetrics[arch_info[get_current_arch()]["L2_volume_metric"]]
+                * 1e9
+                / total_iterations
+            )
+        results.append((xlength, repetitions, cyperit, l2perit))
         if cyperit < best[0]:
             best = cyperit, results[-1]
     return results, best[1]
 
+
 def mark(asm_path, compiler, cflags, isa, overwrite=False):
     # Mark assembly for IACA, OSACA and LLVM-MCA
     marked_asm_path = Path(asm_path).with_suffix(".marked.s")
     if not marked_asm_path.exists() or overwrite:
         overwrite = True
-        with open(asm_path) as fa, open(marked_asm_path, 'w') as fm:
+        with open(asm_path) as fa, open(marked_asm_path, "w") as fm:
             try:
                 _, pointer_increment = asm_instrumentation(fa, fm, isa=isa)
             except KeyboardInterrupt:
@@ -505,37 +585,46 @@ def mark(asm_path, compiler, cflags, isa, overwrite=False):
         # use maked assembly and extract asm_block and pointer_increment
         with open(marked_asm_path) as f:
             marked_asm = f.read()
-        m = re.search(r'pointer_increment=([0-9]+)', marked_asm)
+        m = re.search(r"pointer_increment=([0-9]+)", marked_asm)
         if m:
             pointer_increment = int(m.group(1))
         else:
             os.unlink(marked_asm_path)
             raise ValueError(
-                "Could not find `pointer_increment=<byte increment>`. Plase place into file.")
+                "Could not find `pointer_increment=<byte increment>`. Plase place into file."
+            )
         print("! ", end="", flush=True)
 
     # Compile marked assembly to object for IACA
     marked_obj = Path(asm_path).with_suffix(".marked.o")
     if not marked_obj.exists():
-        check_call([compiler] + ['-c', str(marked_asm_path), '-o', str(marked_obj)])
-    
+        check_call([compiler] + ["-c", str(marked_asm_path), "-o", str(marked_obj)])
+
     return str(marked_asm_path), str(marked_obj), pointer_increment, overwrite
 
 
-def build_kernel(kernel, architecture, compiler, cflags, cflags_name, overwrite=False,
-                 dontbuild=False):
+def build_kernel(
+    kernel,
+    architecture,
+    compiler,
+    cflags,
+    cflags_name,
+    overwrite=False,
+    dontbuild=False,
+):
     build_path = f"build/{architecture}/{compiler}/{cflags_name}"
     kernel_assembly = f"{build_path}/{kernel}.s"
-    kernel_object= f"{build_path}/{kernel}.o"
+    kernel_object = f"{build_path}/{kernel}.o"
     executable = f"{build_path}/{kernel}"
     Path(build_path).mkdir(parents=True, exist_ok=True)
 
     if not overwrite:
         # Overwrite if any kernel specific file is missing
         overwrite = (
-            not os.path.exists(kernel_object) or 
-            not os.path.exists(kernel_assembly) or
-            not os.path.exists(executable))
+            not os.path.exists(kernel_object)
+            or not os.path.exists(kernel_assembly)
+            or not os.path.exists(executable)
+        )
 
     if dontbuild and overwrite:
         raise ValueError("Must build, but not allowed.")
@@ -545,39 +634,43 @@ def build_kernel(kernel, architecture, compiler, cflags, cflags_name, overwrite=
 
     if not Path(f"{build_path}/compiler_version").exists():
         # Document compiler version
-        with open(f"{build_path}/compiler_version", 'w') as f:
-            f.write(check_output([compiler, "-v"], encoding='utf8', stderr=STDOUT))
+        with open(f"{build_path}/compiler_version", "w") as f:
+            f.write(check_output([compiler, "-v"], encoding="utf8", stderr=STDOUT))
 
     if overwrite:
         # build object + assembly
-        check_call([compiler] +
-                   cflags +
-                   ["-c", f"kernels/{kernel}.c", "-o", kernel_object])
-        check_call([compiler] +
-                   cflags +
-                   ["-c", f"kernels/{kernel}.c", "-S", "-o", kernel_assembly])
+        check_call([compiler] + cflags + ["-c", f"kernels/{kernel}.c", "-o", kernel_object])
+        check_call(
+            [compiler] + cflags + ["-c", f"kernels/{kernel}.c", "-S", "-o", kernel_assembly]
+        )
 
         # build main and link executable
         executable_cflags = [
             os.environ["LIKWID_DEFINES"],
             os.environ["LIKWID_INC"],
-            os.environ["LIKWID_LIB"]
-        ] + ['-Ofast']
-        check_call([compiler] + executable_cflags + [
-            f"{build_path}/dummy.o",
-            kernel_object,
-            "-DMAIN",
-            f"kernels/{kernel}.c",
-            "-llikwid",
-            "-o", executable])
+            os.environ["LIKWID_LIB"],
+        ] + ["-Ofast"]
+        check_call(
+            [compiler]
+            + executable_cflags
+            + [
+                f"{build_path}/dummy.o",
+                kernel_object,
+                "-DMAIN",
+                f"kernels/{kernel}.c",
+                "-llikwid",
+                "-o",
+                executable,
+            ]
+        )
         print(". ", end="", flush=True)
     else:
         print("! ", end="", flush=True)
-    
+
     return kernel_assembly, executable, overwrite
 
 
-def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0):
+def perfctr(cmd, cores, group="MEM", code_markers=True, verbose=0):
     """
     Run *cmd* with likwid-perfctr and returns result as dict.
 
@@ -586,30 +679,32 @@ def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0):
     if CLI argument cores > 1, running with multi-core, otherwise single-core
     """
     # Making sure likwid-perfctr is available:
-    if benchmark.find_executable('likwid-perfctr') is None:
-        print("likwid-perfctr was not found. Make sure likwid is installed and found in PATH.",
-                file=sys.stderr)
+    if benchmark.find_executable("likwid-perfctr") is None:
+        print(
+            "likwid-perfctr was not found. Make sure likwid is installed and found in PATH.",
+            file=sys.stderr,
+        )
         sys.exit(1)
 
     # FIXME currently only single core measurements support!
-    perf_cmd = ['likwid-perfctr', '-f', '-O', '-g', group]
+    perf_cmd = ["likwid-perfctr", "-f", "-O", "-g", group]
 
-    cpu = 'S0:0'
+    cpu = "S0:0"
     if cores > 1:
-        cpu += '-'+str(cores-1)
+        cpu += "-" + str(cores - 1)
 
     # Pinned and measured on cpu
-    perf_cmd += ['-C', cpu]
+    perf_cmd += ["-C", cpu]
 
     # code must be marked using likwid markers
-    perf_cmd.append('-m')
+    perf_cmd.append("-m")
 
     perf_cmd += cmd
     if verbose > 1:
-        print(' '.join(perf_cmd))
+        print(" ".join(perf_cmd))
     try:
-        with benchmark.fix_env_variable('OMP_NUM_THREADS', None):
-            output = check_output(perf_cmd).decode('utf-8').split('\n')
+        with benchmark.fix_env_variable("OMP_NUM_THREADS", None):
+            output = check_output(perf_cmd).decode("utf-8").split("\n")
     except CalledProcessError as e:
         print("Executing benchmark failed: {!s}".format(e), file=sys.stderr)
         sys.exit(1)
@@ -626,7 +721,7 @@ def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0):
         m = re.match(r"TABLE,Region ([a-z\-0-9_]+),", line)
         if m:
             cur_region_name = m.group(1)
-        line = line.split(',')
+        line = line.split(",")
         try:
             # Metrics
             cur_region_data[line[0]] = float(line[1])
@@ -639,12 +734,13 @@ def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0):
             continue
         try:
             # Event counters
-            if line[2] == '-' or line[2] == 'nan':
+            if line[2] == "-" or line[2] == "nan":
                 counter_value = 0
             else:
                 counter_value = int(line[2])
-            if re.fullmatch(r'[A-Z0-9_]+', line[0]) and \
-                    re.fullmatch(r'[A-Z0-9]+(:[A-Z0-9]+=[0-9A-Fa-fx]+)*', line[1]):
+            if re.fullmatch(r"[A-Z0-9_]+", line[0]) and re.fullmatch(
+                r"[A-Z0-9]+(:[A-Z0-9]+=[0-9A-Fa-fx]+)*", line[1]
+            ):
                 cur_region_data.setdefault(line[0], {})
                 cur_region_data[line[0]][line[1]] = counter_value
                 continue
@@ -659,49 +755,52 @@ def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0):
 
 
 def remove_html_tags(text):
-    return re.sub('<.*?>', '', text)
+    return re.sub("<.*?>", "", text)
 
 
 def get_intel_style_code(marked_objfile):
     # Disassembl with Intel syntax
-    cmd = ("objdump -d --demangle --no-leading-addr --no-leading-headers --no-show-raw-insn "
-           "--x86-asm-syntax=intel").split(" ") + [marked_objfile]
+    cmd = (
+        "objdump -d --demangle --no-leading-addr --no-leading-headers --no-show-raw-insn "
+        "--x86-asm-syntax=intel"
+    ).split(" ") + [marked_objfile]
     asm_raw = check_output(cmd).decode()
-    asm_raw = '\n'.join([l.strip() for l in asm_raw.split('\n')])
+    asm_raw = "\n".join([line.strip() for line in asm_raw.split("\n")])
     kernel_raw = asm_raw[
-        asm_raw.index('mov\tebx, 111\nnop')+len('mov\tebx, 111\nnop') : 
-        asm_raw.index('mov\tebx, 222\nnop')
+        asm_raw.index("mov\tebx, 111\nnop")
+        + len("mov\tebx, 111\nnop") : asm_raw.index("mov\tebx, 222\nnop")
     ]
-    kernel_lines = kernel_raw.split('\n')
+    kernel_lines = kernel_raw.split("\n")
     # Ignore label and jump
-    return '\n'.join(kernel_lines[:-2])
+    return "\n".join(kernel_lines[:-2])
 
 
-def get_ithemal_prediction(code, model='skl'):
+def get_ithemal_prediction(code, model="skl"):
     url = "http://3.18.198.23/predict"
-    assert model in ['skl', 'hsw', 'ivb']
-    r = requests.post(url, {'code': code, 'model': model})
+    assert model in ["skl", "hsw", "ivb"]
+    r = requests.post(url, {"code": code, "model": model})
     raw_text = remove_html_tags(r.text)
     m = re.search("Could not generate a prediction: (.*)", raw_text)
     if m:
-        print(" error:", m.group(1).strip(), end=' ')
-        return float('nan')
-    m = re.search("Prediction: ([0-9\.]+) cycles per iteration", raw_text)
+        print(" error:", m.group(1).strip(), end=" ")
+        return float("nan")
+    m = re.search("Prediction: ([0-9.]+) cycles per iteration", raw_text)
     if m:
         return float(m.group(1))
     else:
-        return float('nan')
+        return float("nan")
 
 
 def main():
     # Check for correct LLVM-MCA version
     try:
-        llvm_mca = 'LLVM version 12.0.0' in check_output(['llvm-mca', '-version']).decode()
+        llvm_mca = "LLVM version 12.0.0" in check_output(["llvm-mca", "-version"]).decode()
     except FileNotFoundError:
         llvm_mca = False
-    
-    build_mark_run_all_kernels(measurements='--no-measurements' not in sys.argv, llvm_mca=llvm_mca)
+
+    build_mark_run_all_kernels(measurements="--no-measurements" not in sys.argv, llvm_mca=llvm_mca)
     sys.exit()
 
+
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()