mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2025-07-21 20:51:04 +02:00
310 lines
12 KiB
Python
Executable File
310 lines
12 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
import argparse
|
|
import os.path
|
|
import sys
|
|
import xml.etree.ElementTree as ET
|
|
from distutils.version import StrictVersion
|
|
|
|
from osaca.parser import get_parser
|
|
from osaca.semantics import MachineModel
|
|
|
|
intel_archs = [
|
|
"CON",
|
|
"WOL",
|
|
"NHM",
|
|
"WSM",
|
|
"SNB",
|
|
"IVB",
|
|
"HSW",
|
|
"BDW",
|
|
"SKL",
|
|
"SKX",
|
|
"KBL",
|
|
"CFL",
|
|
"CNL",
|
|
"ICL",
|
|
]
|
|
amd_archs = ["ZEN1", "ZEN+", "ZEN2"]
|
|
|
|
|
|
def port_pressure_from_tag_attributes(attrib):
|
|
# '1*p015+1*p1+1*p23+1*p4+3*p5' ->
|
|
# [[1, '015'], [1, '1'], [1, '23'], [1, '4'], [3, '5']]
|
|
port_occupation = []
|
|
for p in attrib["ports"].split("+"):
|
|
cycles, ports = p.split("*")
|
|
ports = ports.lstrip("p")
|
|
ports = ports.lstrip("FP")
|
|
port_occupation.append([int(cycles), ports])
|
|
|
|
# Also consider div on DIV pipeline
|
|
if "div_cycles" in attrib:
|
|
port_occupation.append([int(attrib["div_cycles"]), ["DIV"]])
|
|
|
|
return port_occupation
|
|
|
|
|
|
def extract_paramters(instruction_tag, parser, isa):
|
|
# Extract parameter components
|
|
parameters = [] # used to store string representations
|
|
parameter_tags = sorted(instruction_tag.findall("operand"), key=lambda p: int(p.attrib["idx"]))
|
|
for parameter_tag in parameter_tags:
|
|
parameter = {}
|
|
# Ignore parameters with suppressed=1
|
|
if int(parameter_tag.attrib.get("suppressed", "0")):
|
|
continue
|
|
|
|
p_type = parameter_tag.attrib["type"]
|
|
if p_type == "imm":
|
|
parameter["class"] = "immediate"
|
|
parameter["imd"] = "int"
|
|
parameters.append(parameter)
|
|
elif p_type == "mem":
|
|
parameter["class"] = "memory"
|
|
parameter["base"] = "*"
|
|
parameter["offset"] = "*"
|
|
parameter["index"] = "*"
|
|
parameter["scale"] = "*"
|
|
parameters.append(parameter)
|
|
elif p_type == "reg":
|
|
parameter["class"] = "register"
|
|
possible_regs = [parser.parse_register("%" + r) for r in parameter_tag.text.split(",")]
|
|
if possible_regs[0] is None:
|
|
raise ValueError(
|
|
"Unknown register type for {} with {}.".format(
|
|
parameter_tag.attrib, parameter_tag.text
|
|
)
|
|
)
|
|
if isa == "x86":
|
|
if parser.is_vector_register(possible_regs[0]["register"]):
|
|
possible_regs[0]["register"]["name"] = possible_regs[0]["register"][
|
|
"name"
|
|
].lower()[:3]
|
|
if "mask" in possible_regs[0]["register"]:
|
|
possible_regs[0]["register"]["mask"] = True
|
|
else:
|
|
possible_regs[0]["register"]["name"] = "gpr"
|
|
elif isa == "aarch64":
|
|
del possible_regs["register"]["name"]
|
|
for key in possible_regs[0]["register"]:
|
|
parameter[key] = possible_regs[0]["register"][key]
|
|
parameters.append(parameter)
|
|
elif p_type == "relbr":
|
|
parameter["class"] = "identifier"
|
|
parameters.append(parameter)
|
|
elif p_type == "agen":
|
|
parameter["class"] = "memory"
|
|
parameter["base"] = "*"
|
|
parameter["offset"] = "*"
|
|
parameter["index"] = "*"
|
|
parameter["scale"] = "*"
|
|
parameters.append(parameter)
|
|
else:
|
|
raise ValueError("Unknown paramter type {}".format(parameter_tag.attrib))
|
|
return parameters
|
|
|
|
|
|
def extract_model(tree, arch, skip_mem=True):
|
|
try:
|
|
isa = MachineModel.get_isa_for_arch(arch)
|
|
except Exception:
|
|
print("Skipping...", file=sys.stderr)
|
|
return None
|
|
mm = MachineModel(isa=isa)
|
|
parser = get_parser(isa)
|
|
|
|
for instruction_tag in tree.findall(".//instruction"):
|
|
ignore = False
|
|
|
|
mnemonic = instruction_tag.attrib["asm"]
|
|
iform = instruction_tag.attrib["iform"]
|
|
# reduce to second part if mnemonic contain space (e.g., "REX CRC32")
|
|
if " " in mnemonic:
|
|
mnemonic = mnemonic.split(" ", 1)[1]
|
|
|
|
# Extract parameter components
|
|
try:
|
|
parameters = extract_paramters(instruction_tag, parser, isa)
|
|
if isa == "x86":
|
|
parameters.reverse()
|
|
except ValueError as e:
|
|
print(e, file=sys.stderr)
|
|
|
|
# Extract port occupation, throughput and latency
|
|
port_pressure, throughput, latency, uops = [], None, None, None
|
|
arch_tag = instruction_tag.find('architecture[@name="' + arch.upper() + '"]')
|
|
if arch_tag is None:
|
|
continue
|
|
# skip any instructions without port utilization
|
|
if not any(["ports" in x.attrib for x in arch_tag.findall("measurement")]):
|
|
print("Couldn't find port utilization, skip: ", iform, file=sys.stderr)
|
|
continue
|
|
# skip if measured TP is smaller than computed
|
|
if [
|
|
float(x.attrib["TP_ports"])
|
|
> min(float(x.attrib["TP_loop"]), float(x.attrib["TP_unrolled"]))
|
|
for x in arch_tag.findall("measurement")
|
|
][0]:
|
|
print(
|
|
"Calculated TP is greater than measured TP.",
|
|
iform,
|
|
file=sys.stderr,
|
|
)
|
|
# skip if instruction contains memory operand
|
|
if skip_mem and any(
|
|
[x.attrib["type"] == "mem" for x in instruction_tag.findall("operand")]
|
|
):
|
|
print("Contains memory operand, skip: ", iform, file=sys.stderr)
|
|
continue
|
|
# We collect all measurement and IACA information and compare them later
|
|
for measurement_tag in arch_tag.iter("measurement"):
|
|
if "TP_ports" in measurement_tag.attrib:
|
|
throughput = float(measurement_tag.attrib["TP_ports"])
|
|
else:
|
|
throughput = min(
|
|
measurement_tag.attrib.get("TP_loop", float("inf")),
|
|
measurement_tag.attrib.get("TP_unroll", float("inf")),
|
|
measurement_tag.attrib.get("TP", float("inf")),
|
|
)
|
|
if throughput == float("inf"):
|
|
throughput = None
|
|
uops = (
|
|
int(measurement_tag.attrib["uops"]) if "uops" in measurement_tag.attrib else None
|
|
)
|
|
if "ports" in measurement_tag.attrib:
|
|
port_pressure.append(port_pressure_from_tag_attributes(measurement_tag.attrib))
|
|
latencies = [
|
|
int(l_tag.attrib["cycles"])
|
|
for l_tag in measurement_tag.iter("latency")
|
|
if "cycles" in l_tag.attrib
|
|
]
|
|
if len(latencies) == 0:
|
|
latencies = [
|
|
int(l_tag.attrib["max_cycles"])
|
|
for l_tag in measurement_tag.iter("latency")
|
|
if "max_cycles" in l_tag.attrib
|
|
]
|
|
if latencies[1:] != latencies[:-1]:
|
|
print(
|
|
"Contradicting latencies found, using smallest:",
|
|
iform,
|
|
latencies,
|
|
file=sys.stderr,
|
|
)
|
|
if latencies:
|
|
latency = min(latencies)
|
|
if ignore:
|
|
continue
|
|
|
|
# Ordered by IACA version (newest last)
|
|
for iaca_tag in sorted(
|
|
arch_tag.iter("IACA"), key=lambda i: StrictVersion(i.attrib["version"])
|
|
):
|
|
if "ports" in iaca_tag.attrib:
|
|
port_pressure.append(port_pressure_from_tag_attributes(iaca_tag.attrib))
|
|
|
|
# Check if all are equal
|
|
if port_pressure:
|
|
if port_pressure[1:] != port_pressure[:-1]:
|
|
print(
|
|
"Contradicting port occupancies, using latest IACA:",
|
|
iform,
|
|
file=sys.stderr,
|
|
)
|
|
port_pressure = port_pressure[-1]
|
|
else:
|
|
# print("No data available for this architecture:", mnemonic, file=sys.stderr)
|
|
continue
|
|
|
|
# Adding Intel's 2D and 3D pipelines on Intel µarchs, without Ice Lake:
|
|
if arch.upper() in intel_archs and not arch.upper() in ["ICL"]:
|
|
if any([p["class"] == "memory" for p in parameters]):
|
|
# We have a memory parameter, if ports 2 & 3 are present, also add 2D & 3D
|
|
# TODO remove port7 on 'hsw' onward and split entries depending on addressing mode
|
|
port_23 = False
|
|
port_4 = False
|
|
for i, pp in enumerate(port_pressure):
|
|
if "2" in pp[1] and "3" in pp[1]:
|
|
port_23 = True
|
|
if "4" in pp[1]:
|
|
port_4 = True
|
|
# Add (x, ['2D', '3D']) if load ports (2 & 3) are used, but not the store port (4)
|
|
if port_23 and not port_4:
|
|
if (
|
|
arch.upper() in ["SNB", "IVB"]
|
|
and any([p.get("name", "") == "ymm" for p in parameters])
|
|
and not ("128" in mnemonic)
|
|
):
|
|
# x = 2 if SNB or IVB and ymm regiser in any operand and not '128' in
|
|
# instruction name
|
|
port2D3D_pressure = 2
|
|
else:
|
|
# otherwiese x = 1
|
|
port2D3D_pressure = 1
|
|
port_pressure.append((port2D3D_pressure, ["2D", "3D"]))
|
|
|
|
# Add missing ports:
|
|
for ports in [pp[1] for pp in port_pressure]:
|
|
for p in ports:
|
|
mm.add_port(p)
|
|
|
|
throughput = max(mm.average_port_pressure(port_pressure))
|
|
mm.set_instruction(mnemonic, parameters, latency, port_pressure, throughput, uops)
|
|
# TODO eliminate entries which could be covered by automatic load / store expansion
|
|
return mm
|
|
|
|
|
|
def rhs_comment(uncommented_string, comment):
|
|
max_length = max([len(line) for line in uncommented_string.split("\n")])
|
|
|
|
commented_string = ""
|
|
for line in uncommented_string.split("\n"):
|
|
commented_string += ("{:<" + str(max_length) + "} # {}\n").format(line, comment)
|
|
return commented_string
|
|
|
|
|
|
def architectures(tree):
|
|
return set([a.attrib["name"] for a in tree.findall(".//architecture")])
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("xml", help="path of instructions.xml from http://uops.info")
|
|
parser.add_argument(
|
|
"arch",
|
|
nargs="?",
|
|
help="architecture to extract, use IACA abbreviations (e.g., SNB). "
|
|
"if not given, all will be extracted and saved to file in CWD.",
|
|
)
|
|
parser.add_argument(
|
|
"--mem",
|
|
dest="skip_mem",
|
|
action="store_false",
|
|
help="add instruction forms including memory addressing operands, which are "
|
|
"skipped by default",
|
|
)
|
|
args = parser.parse_args()
|
|
basename = os.path.basename(__file__)
|
|
|
|
tree = ET.parse(args.xml)
|
|
print("# Available architectures:", ", ".join(architectures(tree)))
|
|
if args.arch:
|
|
print("# Chosen architecture: {}".format(args.arch))
|
|
model = extract_model(tree, args.arch, args.skip_mem)
|
|
if model is not None:
|
|
print(rhs_comment(model.dump(), "uops.info import"))
|
|
else:
|
|
for arch in architectures(tree):
|
|
print(arch, end="")
|
|
model = extract_model(tree, arch.lower(), args.skip_mem)
|
|
if model:
|
|
model_string = rhs_comment(model.dump(), basename + " " + arch)
|
|
|
|
with open("{}.yml".format(arch.lower()), "w") as f:
|
|
f.write(model_string)
|
|
print(".")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|