OSACA/osaca/data/model_importer.py

#!/usr/bin/env python3
import argparse
import os.path
import sys
import xml.etree.ElementTree as ET
from distutils.version import StrictVersion

from osaca.parser import get_parser
from osaca.semantics import MachineModel

intel_archs = [
    "CON",
    "WOL",
    "NHM",
    "WSM",
    "SNB",
    "IVB",
    "HSW",
    "BDW",
    "SKL",
    "SKX",
    "KBL",
    "CFL",
    "CNL",
    "ICL",
]
amd_archs = ["ZEN1", "ZEN+", "ZEN2"]


def port_pressure_from_tag_attributes(attrib):
    # '1*p015+1*p1+1*p23+1*p4+3*p5' ->
    # [[1, '015'], [1, '1'], [1, '23'], [1, '4'], [3, '5']]
    port_occupation = []
    for p in attrib["ports"].split("+"):
        cycles, ports = p.split("*")
        ports = ports.lstrip("p")
        ports = ports.lstrip("FP")
        port_occupation.append([int(cycles), ports])

    # Also consider div on DIV pipeline
    if "div_cycles" in attrib:
        port_occupation.append([int(attrib["div_cycles"]), ["DIV"]])

    return port_occupation


def extract_paramters(instruction_tag, parser, isa):
    # Extract parameter components
    parameters = []  # used to store string representations
    parameter_tags = sorted(instruction_tag.findall("operand"), key=lambda p: int(p.attrib["idx"]))
    for parameter_tag in parameter_tags:
        parameter = {}
        # Ignore parameters with suppressed=1
        if int(parameter_tag.attrib.get("suppressed", "0")):
            continue

        p_type = parameter_tag.attrib["type"]
        if p_type == "imm":
            parameter["class"] = "immediate"
            parameter["imd"] = "int"
            parameters.append(parameter)
        elif p_type == "mem":
            parameter["class"] = "memory"
            parameter["base"] = "*"
            parameter["offset"] = "*"
            parameter["index"] = "*"
            parameter["scale"] = "*"
            parameters.append(parameter)
        elif p_type == "reg":
            parameter["class"] = "register"
            possible_regs = [parser.parse_register("%" + r) for r in parameter_tag.text.split(",")]
            if possible_regs[0] is None:
                raise ValueError(
                    "Unknown register type for {} with {}.".format(
                        parameter_tag.attrib, parameter_tag.text
                    )
                )
            if isa == "x86":
                if parser.is_vector_register(possible_regs[0]["register"]):
                    possible_regs[0]["register"]["name"] = possible_regs[0]["register"][
                        "name"
                    ].lower()[:3]
                    if "mask" in possible_regs[0]["register"]:
                        possible_regs[0]["register"]["mask"] = True
                else:
                    possible_regs[0]["register"]["name"] = "gpr"
            elif isa == "aarch64":
                del possible_regs["register"]["name"]
            for key in possible_regs[0]["register"]:
                parameter[key] = possible_regs[0]["register"][key]
            parameters.append(parameter)
        elif p_type == "relbr":
            parameter["class"] = "identifier"
            parameters.append(parameter)
        elif p_type == "agen":
            parameter["class"] = "memory"
            parameter["base"] = "*"
            parameter["offset"] = "*"
            parameter["index"] = "*"
            parameter["scale"] = "*"
            parameters.append(parameter)
        else:
            raise ValueError("Unknown paramter type {}".format(parameter_tag.attrib))
    return parameters


def extract_model(tree, arch, skip_mem=True):
    try:
        isa = MachineModel.get_isa_for_arch(arch)
    except Exception:
        print("Skipping...", file=sys.stderr)
        return None
    mm = MachineModel(isa=isa)
    parser = get_parser(isa)

    for instruction_tag in tree.findall(".//instruction"):
        ignore = False

        mnemonic = instruction_tag.attrib["asm"]
        iform = instruction_tag.attrib["iform"]
        # reduce to second part if mnemonic contain space (e.g., "REX CRC32")
        if " " in mnemonic:
            mnemonic = mnemonic.split(" ", 1)[1]

        # Extract parameter components
        try:
            parameters = extract_paramters(instruction_tag, parser, isa)
            if isa == "x86":
                parameters.reverse()
        except ValueError as e:
            print(e, file=sys.stderr)

        # Extract port occupation, throughput and latency
        port_pressure, throughput, latency, uops = [], None, None, None
        arch_tag = instruction_tag.find('architecture[@name="' + arch.upper() + '"]')
        if arch_tag is None:
            continue
        # skip any instructions without port utilization
        if not any(["ports" in x.attrib for x in arch_tag.findall("measurement")]):
            print("Couldn't find port utilization, skip: ", iform, file=sys.stderr)
            continue
        # skip if measured TP is smaller than computed
        if [
            float(x.attrib["TP_ports"])
            > min(float(x.attrib["TP_loop"]), float(x.attrib["TP_unrolled"]))
            for x in arch_tag.findall("measurement")
        ][0]:
            print(
                "Calculated TP is greater than measured TP.",
                iform,
                file=sys.stderr,
            )
        # skip if instruction contains memory operand
        if skip_mem and any(
            [x.attrib["type"] == "mem" for x in instruction_tag.findall("operand")]
        ):
            print("Contains memory operand, skip: ", iform, file=sys.stderr)
            continue
        # We collect all measurement and IACA information and compare them later
        for measurement_tag in arch_tag.iter("measurement"):
            if "TP_ports" in measurement_tag.attrib:
                throughput = float(measurement_tag.attrib["TP_ports"])
            else:
                throughput = min(
                    measurement_tag.attrib.get("TP_loop", float("inf")),
                    measurement_tag.attrib.get("TP_unroll", float("inf")),
                    measurement_tag.attrib.get("TP", float("inf")),
                )
                if throughput == float("inf"):
                    throughput = None
            uops = (
                int(measurement_tag.attrib["uops"]) if "uops" in measurement_tag.attrib else None
            )
            if "ports" in measurement_tag.attrib:
                port_pressure.append(port_pressure_from_tag_attributes(measurement_tag.attrib))
            latencies = [
                int(l_tag.attrib["cycles"])
                for l_tag in measurement_tag.iter("latency")
                if "cycles" in l_tag.attrib
            ]
            if len(latencies) == 0:
                latencies = [
                    int(l_tag.attrib["max_cycles"])
                    for l_tag in measurement_tag.iter("latency")
                    if "max_cycles" in l_tag.attrib
                ]
            if latencies[1:] != latencies[:-1]:
                print(
                    "Contradicting latencies found, using smallest:",
                    iform,
                    latencies,
                    file=sys.stderr,
                )
            if latencies:
                latency = min(latencies)
        if ignore:
            continue

        # Ordered by IACA version (newest last)
        for iaca_tag in sorted(
            arch_tag.iter("IACA"), key=lambda i: StrictVersion(i.attrib["version"])
        ):
            if "ports" in iaca_tag.attrib:
                port_pressure.append(port_pressure_from_tag_attributes(iaca_tag.attrib))

        # Check if all are equal
        if port_pressure:
            if port_pressure[1:] != port_pressure[:-1]:
                print(
                    "Contradicting port occupancies, using latest IACA:",
                    iform,
                    file=sys.stderr,
                )
            port_pressure = port_pressure[-1]
        else:
            # print("No data available for this architecture:", mnemonic, file=sys.stderr)
            continue

        # Adding Intel's 2D and 3D pipelines on Intel µarchs, without Ice Lake:
        if arch.upper() in intel_archs and not arch.upper() in ["ICL"]:
            if any([p["class"] == "memory" for p in parameters]):
                # We have a memory parameter, if ports 2 & 3 are present, also add 2D & 3D
                # TODO remove port7 on 'hsw' onward and split entries depending on addressing mode
                port_23 = False
                port_4 = False
                for i, pp in enumerate(port_pressure):
                    if "2" in pp[1] and "3" in pp[1]:
                        port_23 = True
                    if "4" in pp[1]:
                        port_4 = True
                # Add (x, ['2D', '3D']) if load ports (2 & 3) are used, but not the store port (4)
                if port_23 and not port_4:
                    if (
                        arch.upper() in ["SNB", "IVB"]
                        and any([p.get("name", "") == "ymm" for p in parameters])
                        and not ("128" in mnemonic)
                    ):
                        # x = 2 if SNB or IVB and ymm regiser in any operand and not '128' in
                        # instruction name
                        port2D3D_pressure = 2
                    else:
                        # otherwiese x = 1
                        port2D3D_pressure = 1
                    port_pressure.append((port2D3D_pressure, ["2D", "3D"]))

        # Add missing ports:
        for ports in [pp[1] for pp in port_pressure]:
            for p in ports:
                mm.add_port(p)

        throughput = max(mm.average_port_pressure(port_pressure))
        mm.set_instruction(mnemonic, parameters, latency, port_pressure, throughput, uops)
    # TODO eliminate entries which could be covered by automatic load / store expansion
    return mm


def rhs_comment(uncommented_string, comment):
    max_length = max([len(line) for line in uncommented_string.split("\n")])

    commented_string = ""
    for line in uncommented_string.split("\n"):
        commented_string += ("{:<" + str(max_length) + "}  # {}\n").format(line, comment)
    return commented_string


def architectures(tree):
    return set([a.attrib["name"] for a in tree.findall(".//architecture")])


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("xml", help="path of instructions.xml from http://uops.info")
    parser.add_argument(
        "arch",
        nargs="?",
        help="architecture to extract, use IACA abbreviations (e.g., SNB). "
        "if not given, all will be extracted and saved to file in CWD.",
    )
    parser.add_argument(
        "--mem",
        dest="skip_mem",
        action="store_false",
        help="add instruction forms including memory addressing operands, which are "
        "skipped by default",
    )
    args = parser.parse_args()
    basename = os.path.basename(__file__)

    tree = ET.parse(args.xml)
    print("# Available architectures:", ", ".join(architectures(tree)))
    if args.arch:
        print("# Chosen architecture: {}".format(args.arch))
        model = extract_model(tree, args.arch, args.skip_mem)
        if model is not None:
            print(rhs_comment(model.dump(), "uops.info import"))
    else:
        for arch in architectures(tree):
            print(arch, end="")
            model = extract_model(tree, arch.lower(), args.skip_mem)
            if model:
                model_string = rhs_comment(model.dump(), basename + " " + arch)

                with open("{}.yml".format(arch.lower()), "w") as f:
                    f.write(model_string)
                print(".")


if __name__ == "__main__":
    main()