mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2025-07-20 20:21:05 +02:00
applied flake8 and black rules
This commit is contained in:
@@ -7,7 +7,8 @@ import re
|
||||
def __read(*names, **kwargs):
|
||||
"""Reads in file"""
|
||||
with io.open(
|
||||
os.path.join(os.path.dirname(__file__), *names), encoding=kwargs.get("encoding", "utf8")
|
||||
os.path.join(os.path.dirname(__file__), *names),
|
||||
encoding=kwargs.get("encoding", "utf8"),
|
||||
) as fp:
|
||||
return fp.read()
|
||||
|
||||
|
@@ -88,7 +88,7 @@ class MOVEntryBuilderIntelNoPort7AGU(MOVEntryBuilder):
|
||||
|
||||
comment = None
|
||||
if load:
|
||||
if 'ymm' in operand_types:
|
||||
if "ymm" in operand_types:
|
||||
port2D3D_pressure = 2
|
||||
else:
|
||||
port2D3D_pressure = 1
|
||||
@@ -96,7 +96,7 @@ class MOVEntryBuilderIntelNoPort7AGU(MOVEntryBuilder):
|
||||
latency += 4
|
||||
comment = "with load"
|
||||
if store:
|
||||
if 'ymm' in operand_types:
|
||||
if "ymm" in operand_types:
|
||||
port4_pressure = 2
|
||||
else:
|
||||
port4_pressure = 1
|
||||
@@ -716,14 +716,14 @@ skx_mov_instructions = list(
|
||||
# ('movapd xmm xmm', ('1*p5', 1)),
|
||||
# ('vmovapd xmm xmm', ('1*p5', 1)),
|
||||
# ('vmovapd ymm ymm', ('1*p5', 1)),
|
||||
('vmovapd zmm zmm', ('', 0)),
|
||||
("vmovapd zmm zmm", ("", 0)),
|
||||
# https://www.felixcloutier.com/x86/movaps
|
||||
# TODO with masking!
|
||||
# TODO the following may eliminate or be bound to 1*p0156:
|
||||
# ('movaps xmm xmm', ('1*p5', 1)),
|
||||
# ('vmovaps xmm xmm', ('1*p5', 1)),
|
||||
# ('vmovaps ymm ymm', ('1*p5', 1)),
|
||||
('vmovaps zmm zmm', ('', 0)),
|
||||
("vmovaps zmm zmm", ("", 0)),
|
||||
# https://www.felixcloutier.com/x86/movbe
|
||||
("movbe gpr mem", ("1*p15", 4)),
|
||||
("movbe mem gpr", ("1*p15", 4)),
|
||||
|
@@ -140,9 +140,11 @@ def extract_model(tree, arch, skip_mem=True):
|
||||
print("Couldn't find port utilization, skip: ", iform, file=sys.stderr)
|
||||
continue
|
||||
# skip if measured TP is smaller than computed
|
||||
if [float(x.attrib["TP_ports"]) > min(float(x.attrib["TP_loop"]),
|
||||
float(x.attrib["TP_unrolled"]))
|
||||
for x in arch_tag.findall("measurement")][0]:
|
||||
if [
|
||||
float(x.attrib["TP_ports"])
|
||||
> min(float(x.attrib["TP_loop"]), float(x.attrib["TP_unrolled"]))
|
||||
for x in arch_tag.findall("measurement")
|
||||
][0]:
|
||||
print(
|
||||
"Calculated TP is greater than measured TP.",
|
||||
iform,
|
||||
@@ -160,13 +162,15 @@ def extract_model(tree, arch, skip_mem=True):
|
||||
throughput = float(measurement_tag.attrib["TP_ports"])
|
||||
else:
|
||||
throughput = min(
|
||||
measurement_tag.attrib.get("TP_loop", float('inf')),
|
||||
measurement_tag.attrib.get("TP_unroll", float('inf')),
|
||||
measurement_tag.attrib.get("TP", float('inf')),
|
||||
measurement_tag.attrib.get("TP_loop", float("inf")),
|
||||
measurement_tag.attrib.get("TP_unroll", float("inf")),
|
||||
measurement_tag.attrib.get("TP", float("inf")),
|
||||
)
|
||||
if throughput == float('inf'):
|
||||
if throughput == float("inf"):
|
||||
throughput = None
|
||||
uops = int(measurement_tag.attrib["uops"]) if "uops" in measurement_tag.attrib else None
|
||||
uops = (
|
||||
int(measurement_tag.attrib["uops"]) if "uops" in measurement_tag.attrib else None
|
||||
)
|
||||
if "ports" in measurement_tag.attrib:
|
||||
port_pressure.append(port_pressure_from_tag_attributes(measurement_tag.attrib))
|
||||
latencies = [
|
||||
@@ -202,7 +206,11 @@ def extract_model(tree, arch, skip_mem=True):
|
||||
# Check if all are equal
|
||||
if port_pressure:
|
||||
if port_pressure[1:] != port_pressure[:-1]:
|
||||
print("Contradicting port occupancies, using latest IACA:", iform, file=sys.stderr)
|
||||
print(
|
||||
"Contradicting port occupancies, using latest IACA:",
|
||||
iform,
|
||||
file=sys.stderr,
|
||||
)
|
||||
port_pressure = port_pressure[-1]
|
||||
else:
|
||||
# print("No data available for this architecture:", mnemonic, file=sys.stderr)
|
||||
@@ -222,10 +230,12 @@ def extract_model(tree, arch, skip_mem=True):
|
||||
port_4 = True
|
||||
# Add (x, ['2D', '3D']) if load ports (2 & 3) are used, but not the store port (4)
|
||||
if port_23 and not port_4:
|
||||
if arch.upper() in ["SNB", "IVB"] and any(
|
||||
[p.get('name', '') == 'ymm' for p in parameters]) and \
|
||||
not '128' in mnemonic:
|
||||
# x = 2 if SNB or IVB and ymm regiser in any operand and not '128' in
|
||||
if (
|
||||
arch.upper() in ["SNB", "IVB"]
|
||||
and any([p.get("name", "") == "ymm" for p in parameters])
|
||||
and not ("128" in mnemonic)
|
||||
):
|
||||
# x = 2 if SNB or IVB and ymm regiser in any operand and not '128' in
|
||||
# instruction name
|
||||
port2D3D_pressure = 2
|
||||
else:
|
||||
|
@@ -125,7 +125,10 @@ def _get_asmbench_output(input_data, isa):
|
||||
db_entries = {}
|
||||
for i in range(0, len(input_data), 4):
|
||||
if input_data[i + 3].strip() != "":
|
||||
print("asmbench output not in the correct format! Format must be: ", file=sys.stderr)
|
||||
print(
|
||||
"asmbench output not in the correct format! Format must be: ",
|
||||
file=sys.stderr,
|
||||
)
|
||||
print(
|
||||
"-------------\nMNEMONIC[-OP1[_OP2][...]]\nLatency: X cycles\n"
|
||||
"Throughput: Y cycles\n\n-------------",
|
||||
@@ -540,7 +543,16 @@ def _get_sanity_report(
|
||||
|
||||
|
||||
def _get_sanity_report_verbose(
|
||||
total, m_tp, m_l, m_pp, suspic_instr, dup_arch, dup_isa, only_isa, bad_operands, colors=False
|
||||
total,
|
||||
m_tp,
|
||||
m_l,
|
||||
m_pp,
|
||||
suspic_instr,
|
||||
dup_arch,
|
||||
dup_isa,
|
||||
only_isa,
|
||||
bad_operands,
|
||||
colors=False,
|
||||
):
|
||||
"""Get the verbose part of the sanity report with all missing instruction forms."""
|
||||
BRIGHT_CYAN = "\033[1;36;1m" if colors else ""
|
||||
|
@@ -202,7 +202,12 @@ class Frontend(object):
|
||||
)
|
||||
|
||||
def combined_view(
|
||||
self, kernel, cp_kernel: KernelDG, dep_dict, ignore_unknown=False, show_cmnts=True
|
||||
self,
|
||||
kernel,
|
||||
cp_kernel: KernelDG,
|
||||
dep_dict,
|
||||
ignore_unknown=False,
|
||||
show_cmnts=True,
|
||||
):
|
||||
"""
|
||||
Build combined view of kernel including port pressure (TP), a CP column and a
|
||||
@@ -238,8 +243,8 @@ class Frontend(object):
|
||||
lcd_sum = 0.0
|
||||
lcd_lines = {}
|
||||
if dep_dict:
|
||||
longest_lcd = max(dep_dict, key=lambda ln: dep_dict[ln]['latency'])
|
||||
lcd_sum = dep_dict[longest_lcd]['latency']
|
||||
longest_lcd = max(dep_dict, key=lambda ln: dep_dict[ln]["latency"])
|
||||
lcd_sum = dep_dict[longest_lcd]["latency"]
|
||||
lcd_lines = {
|
||||
instr["line_number"]: lat for instr, lat in dep_dict[longest_lcd]["dependencies"]
|
||||
}
|
||||
|
@@ -10,7 +10,13 @@ from functools import lru_cache
|
||||
from osaca.db_interface import import_benchmark_output, sanity_check
|
||||
from osaca.frontend import Frontend
|
||||
from osaca.parser import BaseParser, ParserAArch64, ParserX86ATT
|
||||
from osaca.semantics import INSTR_FLAGS, ArchSemantics, KernelDG, MachineModel, reduce_to_section
|
||||
from osaca.semantics import (
|
||||
INSTR_FLAGS,
|
||||
ArchSemantics,
|
||||
KernelDG,
|
||||
MachineModel,
|
||||
reduce_to_section,
|
||||
)
|
||||
|
||||
|
||||
SUPPORTED_ARCHS = [
|
||||
@@ -37,7 +43,8 @@ DEFAULT_ARCHS = {
|
||||
def __read(*names, **kwargs):
|
||||
"""Reads in file"""
|
||||
with io.open(
|
||||
os.path.join(os.path.dirname(__file__), *names), encoding=kwargs.get("encoding", "utf8")
|
||||
os.path.join(os.path.dirname(__file__), *names),
|
||||
encoding=kwargs.get("encoding", "utf8"),
|
||||
) as fp:
|
||||
return fp.read()
|
||||
|
||||
@@ -79,7 +86,10 @@ def create_parser(parser=None):
|
||||
|
||||
# Add arguments
|
||||
parser.add_argument(
|
||||
"-V", "--version", action="version", version="%(prog)s " + __find_version("__init__.py")
|
||||
"-V",
|
||||
"--version",
|
||||
action="version",
|
||||
version="%(prog)s " + __find_version("__init__.py"),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--arch",
|
||||
@@ -167,7 +177,9 @@ def create_parser(parser=None):
|
||||
help="Write analysis to this file (default to stdout).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"file", type=argparse.FileType("r"), help="Path to object (ASM or instruction file)."
|
||||
"file",
|
||||
type=argparse.FileType("r"),
|
||||
help="Path to object (ASM or instruction file).",
|
||||
)
|
||||
|
||||
return parser
|
||||
@@ -347,7 +359,10 @@ def run(args, output_file=sys.stdout):
|
||||
# Sanity check on DB
|
||||
verbose = True if args.verbose > 0 else False
|
||||
sanity_check(
|
||||
args.arch, verbose=verbose, internet_check=args.internet_check, output_file=output_file
|
||||
args.arch,
|
||||
verbose=verbose,
|
||||
internet_check=args.internet_check,
|
||||
output_file=output_file,
|
||||
)
|
||||
elif "import_data" in args:
|
||||
# Import microbench output file into DB
|
||||
|
@@ -26,9 +26,9 @@ class ParserAArch64(BaseParser):
|
||||
pp.ZeroOrMore(pp.Word(pp.printables))
|
||||
).setResultsName(self.COMMENT_ID)
|
||||
# Define ARM assembly identifier
|
||||
decimal_number = pp.Combine(pp.Optional(pp.Literal("-")) + pp.Word(pp.nums)).setResultsName(
|
||||
"value"
|
||||
)
|
||||
decimal_number = pp.Combine(
|
||||
pp.Optional(pp.Literal("-")) + pp.Word(pp.nums)
|
||||
).setResultsName("value")
|
||||
hex_number = pp.Combine(pp.Literal("0x") + pp.Word(pp.hexnums)).setResultsName("value")
|
||||
relocation = pp.Combine(pp.Literal(":") + pp.Word(pp.alphanums + "_") + pp.Literal(":"))
|
||||
first = pp.Word(pp.alphas + "_.", exact=1)
|
||||
@@ -152,7 +152,9 @@ class ParserAArch64(BaseParser):
|
||||
pp.Literal("{")
|
||||
+ (
|
||||
pp.delimitedList(pp.Combine(self.list_element), delim=",").setResultsName("list")
|
||||
^ pp.delimitedList(pp.Combine(self.list_element), delim="-").setResultsName("range")
|
||||
^ pp.delimitedList(pp.Combine(self.list_element), delim="-").setResultsName(
|
||||
"range"
|
||||
)
|
||||
)
|
||||
+ pp.Literal("}")
|
||||
+ pp.Optional(index)
|
||||
@@ -256,9 +258,7 @@ class ParserAArch64(BaseParser):
|
||||
# 2. Parse label
|
||||
if result is None:
|
||||
try:
|
||||
result = self.process_operand(
|
||||
self.label.parseString(line, parseAll=True).asDict()
|
||||
)
|
||||
result = self.process_operand(self.label.parseString(line, parseAll=True).asDict())
|
||||
result = AttrDict.convert_dict(result)
|
||||
instruction_form[self.LABEL_ID] = result[self.LABEL_ID].name
|
||||
if self.COMMENT_ID in result[self.LABEL_ID]:
|
||||
@@ -293,7 +293,9 @@ class ParserAArch64(BaseParser):
|
||||
try:
|
||||
result = self.parse_instruction(line)
|
||||
except (pp.ParseException, KeyError) as e:
|
||||
raise ValueError("Unable to parse {!r} on line {}".format(line, line_number)) from e
|
||||
raise ValueError(
|
||||
"Unable to parse {!r} on line {}".format(line, line_number)
|
||||
) from e
|
||||
instruction_form[self.INSTRUCTION_ID] = result[self.INSTRUCTION_ID]
|
||||
instruction_form[self.OPERANDS_ID] = result[self.OPERANDS_ID]
|
||||
instruction_form[self.COMMENT_ID] = result[self.COMMENT_ID]
|
||||
@@ -390,9 +392,9 @@ class ParserAArch64(BaseParser):
|
||||
new_dict["pre_indexed"] = True
|
||||
if "post_indexed" in memory_address:
|
||||
if "value" in memory_address["post_indexed"]:
|
||||
new_dict["post_indexed"] = {"value": int(
|
||||
memory_address["post_indexed"]["value"], 0
|
||||
)}
|
||||
new_dict["post_indexed"] = {
|
||||
"value": int(memory_address["post_indexed"]["value"], 0)
|
||||
}
|
||||
else:
|
||||
new_dict["post_indexed"] = memory_address["post_indexed"]
|
||||
return AttrDict({self.MEMORY_ID: new_dict})
|
||||
@@ -408,27 +410,27 @@ class ParserAArch64(BaseParser):
|
||||
Resolve range or list register operand to list of registers.
|
||||
Returns None if neither list nor range
|
||||
"""
|
||||
if 'register' in operand:
|
||||
if 'list' in operand.register:
|
||||
index = operand.register.get('index')
|
||||
if "register" in operand:
|
||||
if "list" in operand.register:
|
||||
index = operand.register.get("index")
|
||||
range_list = []
|
||||
for reg in operand.register.list:
|
||||
reg = deepcopy(reg)
|
||||
if index is not None:
|
||||
reg['index'] = int(index, 0)
|
||||
reg["index"] = int(index, 0)
|
||||
range_list.append(AttrDict({self.REGISTER_ID: reg}))
|
||||
return range_list
|
||||
elif 'range' in operand.register:
|
||||
elif "range" in operand.register:
|
||||
base_register = operand.register.range[0]
|
||||
index = operand.register.get('index')
|
||||
index = operand.register.get("index")
|
||||
range_list = []
|
||||
start_name = base_register.name
|
||||
end_name = operand.register.range[1].name
|
||||
for name in range(int(start_name), int(end_name) + 1):
|
||||
reg = deepcopy(base_register)
|
||||
if index is not None:
|
||||
reg['index'] = int(index, 0)
|
||||
reg['name'] = str(name)
|
||||
reg["index"] = int(index, 0)
|
||||
reg["name"] = str(name)
|
||||
range_list.append(AttrDict({self.REGISTER_ID: reg}))
|
||||
return range_list
|
||||
# neither register list nor range, return unmodified
|
||||
@@ -482,10 +484,12 @@ class ParserAArch64(BaseParser):
|
||||
return AttrDict({self.IMMEDIATE_ID: immediate})
|
||||
else:
|
||||
# change 'mantissa' key to 'value'
|
||||
return AttrDict({
|
||||
self.IMMEDIATE_ID: AttrDict({
|
||||
"value": immediate[dict_name]["mantissa"],
|
||||
"type": dict_name})}
|
||||
return AttrDict(
|
||||
{
|
||||
self.IMMEDIATE_ID: AttrDict(
|
||||
{"value": immediate[dict_name]["mantissa"], "type": dict_name}
|
||||
)
|
||||
}
|
||||
)
|
||||
|
||||
def process_label(self, label):
|
||||
|
@@ -23,9 +23,9 @@ class ParserX86ATT(BaseParser):
|
||||
|
||||
def construct_parser(self):
|
||||
"""Create parser for ARM AArch64 ISA."""
|
||||
decimal_number = pp.Combine(pp.Optional(pp.Literal("-")) + pp.Word(pp.nums)).setResultsName(
|
||||
"value"
|
||||
)
|
||||
decimal_number = pp.Combine(
|
||||
pp.Optional(pp.Literal("-")) + pp.Word(pp.nums)
|
||||
).setResultsName("value")
|
||||
hex_number = pp.Combine(
|
||||
pp.Optional(pp.Literal("-")) + pp.Literal("0x") + pp.Word(pp.hexnums)
|
||||
).setResultsName("value")
|
||||
@@ -41,7 +41,8 @@ class ParserX86ATT(BaseParser):
|
||||
identifier = pp.Group(
|
||||
pp.Optional(id_offset).setResultsName("offset")
|
||||
+ pp.Combine(
|
||||
pp.delimitedList(pp.Combine(first + pp.Optional(rest)), delim="::"), joinString="::"
|
||||
pp.delimitedList(pp.Combine(first + pp.Optional(rest)), delim="::"),
|
||||
joinString="::",
|
||||
).setResultsName("name")
|
||||
+ pp.Optional(relocation).setResultsName("relocation")
|
||||
).setResultsName("identifier")
|
||||
@@ -443,7 +444,12 @@ class ParserX86ATT(BaseParser):
|
||||
"""Check if register is a vector register"""
|
||||
if register is None:
|
||||
return False
|
||||
if register["name"].rstrip(string.digits).lower() in ["mm", "xmm", "ymm", "zmm"]:
|
||||
if register["name"].rstrip(string.digits).lower() in [
|
||||
"mm",
|
||||
"xmm",
|
||||
"ymm",
|
||||
"zmm",
|
||||
]:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
@@ -47,7 +47,9 @@ class ArchSemantics(ISASemantics):
|
||||
indices = [port_list.index(p) for p in ports]
|
||||
# check if port sum of used ports for uop are unbalanced
|
||||
port_sums = self._to_list(itemgetter(*indices)(self.get_throughput_sum(kernel)))
|
||||
instr_ports = self._to_list(itemgetter(*indices)(instruction_form["port_pressure"]))
|
||||
instr_ports = self._to_list(
|
||||
itemgetter(*indices)(instruction_form["port_pressure"])
|
||||
)
|
||||
if len(set(port_sums)) > 1:
|
||||
# balance ports
|
||||
# init list for keeping track of the current change
|
||||
@@ -270,7 +272,8 @@ class ArchSemantics(ISASemantics):
|
||||
reg_type
|
||||
]
|
||||
st_data_port_pressure = [
|
||||
pp * multiplier for pp in st_data_port_pressure]
|
||||
pp * multiplier for pp in st_data_port_pressure
|
||||
]
|
||||
data_port_pressure = [
|
||||
sum(x) for x in zip(data_port_pressure, st_data_port_pressure)
|
||||
]
|
||||
@@ -343,7 +346,9 @@ class ArchSemantics(ISASemantics):
|
||||
def _handle_instruction_found(self, instruction_data, port_number, instruction_form, flags):
|
||||
"""Apply performance data to instruction if it was found in the archDB"""
|
||||
throughput = instruction_data["throughput"]
|
||||
port_pressure = self._machine_model.average_port_pressure(instruction_data["port_pressure"])
|
||||
port_pressure = self._machine_model.average_port_pressure(
|
||||
instruction_data["port_pressure"]
|
||||
)
|
||||
instruction_form["port_uops"] = instruction_data["port_pressure"]
|
||||
try:
|
||||
assert isinstance(port_pressure, list)
|
||||
|
@@ -1,20 +1,19 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import hashlib
|
||||
import os
|
||||
import pickle
|
||||
import re
|
||||
import string
|
||||
from collections import defaultdict
|
||||
from copy import deepcopy
|
||||
from itertools import product
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
|
||||
import ruamel.yaml
|
||||
from ruamel.yaml.compat import StringIO
|
||||
|
||||
from osaca import __version__, utils
|
||||
from osaca.parser import ParserX86ATT
|
||||
from ruamel.yaml.compat import StringIO
|
||||
|
||||
|
||||
class MachineModel(object):
|
||||
@@ -37,7 +36,13 @@ class MachineModel(object):
|
||||
"hidden_loads": None,
|
||||
"load_latency": {},
|
||||
"load_throughput": [
|
||||
{"base": b, "index": i, "offset": o, "scale": s, "port_pressure": []}
|
||||
{
|
||||
"base": b,
|
||||
"index": i,
|
||||
"offset": o,
|
||||
"scale": s,
|
||||
"port_pressure": [],
|
||||
}
|
||||
for b, i, o, s in product(["gpr"], ["gpr", None], ["imd", None], [1, 8])
|
||||
],
|
||||
"load_throughput_default": [],
|
||||
@@ -128,7 +133,8 @@ class MachineModel(object):
|
||||
instruction_form
|
||||
for instruction_form in name_matched_iforms
|
||||
if self._match_operands(
|
||||
instruction_form["operands"] if "operands" in instruction_form else [], operands
|
||||
instruction_form["operands"] if "operands" in instruction_form else [],
|
||||
operands,
|
||||
)
|
||||
)
|
||||
except StopIteration:
|
||||
@@ -150,7 +156,13 @@ class MachineModel(object):
|
||||
return average_pressure
|
||||
|
||||
def set_instruction(
|
||||
self, name, operands=None, latency=None, port_pressure=None, throughput=None, uops=None
|
||||
self,
|
||||
name,
|
||||
operands=None,
|
||||
latency=None,
|
||||
port_pressure=None,
|
||||
throughput=None,
|
||||
uops=None,
|
||||
):
|
||||
"""Import instruction form information."""
|
||||
# If it already exists. Overwrite information.
|
||||
@@ -500,7 +512,11 @@ class MachineModel(object):
|
||||
"""Check if the types of operand ``i_operand`` and ``operand`` match."""
|
||||
# check for wildcard
|
||||
if self.WILDCARD in operand:
|
||||
if "class" in i_operand and i_operand["class"] == "register" or "register" in i_operand:
|
||||
if (
|
||||
"class" in i_operand
|
||||
and i_operand["class"] == "register"
|
||||
or "register" in i_operand
|
||||
):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
@@ -527,20 +543,27 @@ class MachineModel(object):
|
||||
return self._is_AArch64_mem_type(i_operand, operand["memory"])
|
||||
# immediate
|
||||
if i_operand["class"] == "immediate" and i_operand["imd"] == self.WILDCARD:
|
||||
return "value" in operand or \
|
||||
("immediate" in operand and "value" in operand["immediate"])
|
||||
return "value" in operand or (
|
||||
"immediate" in operand and "value" in operand["immediate"]
|
||||
)
|
||||
if i_operand["class"] == "immediate" and i_operand["imd"] == "int":
|
||||
return ("value" in operand and operand.get("type", None) == "int") or \
|
||||
("immediate" in operand and "value" in operand["immediate"] and
|
||||
operand["immediate"].get("type", None) == "int")
|
||||
return ("value" in operand and operand.get("type", None) == "int") or (
|
||||
"immediate" in operand
|
||||
and "value" in operand["immediate"]
|
||||
and operand["immediate"].get("type", None) == "int"
|
||||
)
|
||||
if i_operand["class"] == "immediate" and i_operand["imd"] == "float":
|
||||
return ("float" in operand and operand.get("type", None) == "float") or \
|
||||
("immediate" in operand and "float" in operand["immediate"] and
|
||||
operand["immediate"].get("type", None) == "float")
|
||||
return ("float" in operand and operand.get("type", None) == "float") or (
|
||||
"immediate" in operand
|
||||
and "float" in operand["immediate"]
|
||||
and operand["immediate"].get("type", None) == "float"
|
||||
)
|
||||
if i_operand["class"] == "immediate" and i_operand["imd"] == "double":
|
||||
return ("double" in operand and operand.get("type", None) == "double") or \
|
||||
("immediate" in operand and "double" in operand["immediate"] and
|
||||
operand["immediate"].get("type", None) == "double")
|
||||
return ("double" in operand and operand.get("type", None) == "double") or (
|
||||
"immediate" in operand
|
||||
and "double" in operand["immediate"]
|
||||
and operand["immediate"].get("type", None) == "double"
|
||||
)
|
||||
# identifier
|
||||
if "identifier" in operand or (
|
||||
"immediate" in operand and "identifier" in operand["immediate"]
|
||||
@@ -577,7 +600,10 @@ class MachineModel(object):
|
||||
def _compare_db_entries(self, operand_1, operand_2):
|
||||
"""Check if operand types in DB format (i.e., not parsed) match."""
|
||||
operand_attributes = list(
|
||||
filter(lambda x: True if x != "source" and x != "destination" else False, operand_1)
|
||||
filter(
|
||||
lambda x: True if x != "source" and x != "destination" else False,
|
||||
operand_1,
|
||||
)
|
||||
)
|
||||
for key in operand_attributes:
|
||||
try:
|
||||
|
@@ -1,6 +1,5 @@
|
||||
#!/usr/bin/env python3
|
||||
from itertools import chain
|
||||
from copy import deepcopy
|
||||
|
||||
from osaca import utils
|
||||
from osaca.parser import AttrDict, ParserAArch64, ParserX86ATT
|
||||
@@ -100,53 +99,68 @@ class ISASemantics(object):
|
||||
# post-process pre- and post-indexing for aarch64 memory operands
|
||||
if self._isa == "aarch64":
|
||||
for operand in [op for op in op_dict["source"] if "memory" in op]:
|
||||
post_indexed = ("post_indexed" in operand["memory"] and
|
||||
operand["memory"]["post_indexed"])
|
||||
pre_indexed = ("pre_indexed" in operand["memory"] and
|
||||
operand["memory"]["pre_indexed"])
|
||||
post_indexed = (
|
||||
"post_indexed" in operand["memory"] and operand["memory"]["post_indexed"]
|
||||
)
|
||||
pre_indexed = (
|
||||
"pre_indexed" in operand["memory"] and operand["memory"]["pre_indexed"]
|
||||
)
|
||||
if post_indexed or pre_indexed:
|
||||
op_dict["src_dst"].append(
|
||||
AttrDict.convert_dict({
|
||||
"register": operand["memory"]["base"],
|
||||
"pre_indexed": pre_indexed,
|
||||
"post_indexed": post_indexed})
|
||||
AttrDict.convert_dict(
|
||||
{
|
||||
"register": operand["memory"]["base"],
|
||||
"pre_indexed": pre_indexed,
|
||||
"post_indexed": post_indexed,
|
||||
}
|
||||
)
|
||||
)
|
||||
for operand in [op for op in op_dict["destination"] if "memory" in op]:
|
||||
post_indexed = ("post_indexed" in operand["memory"] and
|
||||
operand["memory"]["post_indexed"])
|
||||
pre_indexed = ("pre_indexed" in operand["memory"] and
|
||||
operand["memory"]["pre_indexed"])
|
||||
post_indexed = (
|
||||
"post_indexed" in operand["memory"] and operand["memory"]["post_indexed"]
|
||||
)
|
||||
pre_indexed = (
|
||||
"pre_indexed" in operand["memory"] and operand["memory"]["pre_indexed"]
|
||||
)
|
||||
if post_indexed or pre_indexed:
|
||||
op_dict["src_dst"].append(
|
||||
AttrDict.convert_dict({
|
||||
"register": operand["memory"]["base"],
|
||||
"pre_indexed": pre_indexed,
|
||||
"post_indexed": post_indexed})
|
||||
AttrDict.convert_dict(
|
||||
{
|
||||
"register": operand["memory"]["base"],
|
||||
"pre_indexed": pre_indexed,
|
||||
"post_indexed": post_indexed,
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
# store operand list in dict and reassign operand key/value pair
|
||||
instruction_form["semantic_operands"] = AttrDict.convert_dict(op_dict)
|
||||
# assign LD/ST flags
|
||||
instruction_form["flags"] = instruction_form["flags"] if "flags" in instruction_form else []
|
||||
instruction_form["flags"] = (
|
||||
instruction_form["flags"] if "flags" in instruction_form else []
|
||||
)
|
||||
if self._has_load(instruction_form):
|
||||
instruction_form["flags"] += [INSTR_FLAGS.HAS_LD]
|
||||
if self._has_store(instruction_form):
|
||||
instruction_form["flags"] += [INSTR_FLAGS.HAS_ST]
|
||||
|
||||
|
||||
def get_reg_changes(self, instruction_form, only_postindexed=False):
|
||||
"""
|
||||
Returns register changes, as dict, for insruction_form, based on operation defined in isa.
|
||||
|
||||
|
||||
Empty dict if no changes of registers occured. None for registers with unknown changes.
|
||||
If only_postindexed is True, only considers changes due to post_indexed memory references.
|
||||
"""
|
||||
if instruction_form.get('instruction') is None:
|
||||
if instruction_form.get("instruction") is None:
|
||||
return {}
|
||||
dest_reg_names = [op.register.get('prefix', '') + op.register.name
|
||||
for op in chain(instruction_form.semantic_operands.destination,
|
||||
instruction_form.semantic_operands.src_dst)
|
||||
if 'register' in op]
|
||||
dest_reg_names = [
|
||||
op.register.get("prefix", "") + op.register.name
|
||||
for op in chain(
|
||||
instruction_form.semantic_operands.destination,
|
||||
instruction_form.semantic_operands.src_dst,
|
||||
)
|
||||
if "register" in op
|
||||
]
|
||||
isa_data = self._isa_model.get_instruction(
|
||||
instruction_form["instruction"], instruction_form["operands"]
|
||||
)
|
||||
@@ -162,50 +176,50 @@ class ISASemantics(object):
|
||||
|
||||
if only_postindexed:
|
||||
for o in instruction_form.operands:
|
||||
if 'post_indexed' in o.get('memory', {}):
|
||||
base_name = o.memory.base.get('prefix', '') + o.memory.base.name
|
||||
return {base_name: {
|
||||
'name': o.memory.base.get('prefix', '') + o.memory.base.name,
|
||||
'value': o.memory.post_indexed.value
|
||||
}}
|
||||
if "post_indexed" in o.get("memory", {}):
|
||||
base_name = o.memory.base.get("prefix", "") + o.memory.base.name
|
||||
return {
|
||||
base_name: {
|
||||
"name": o.memory.base.get("prefix", "") + o.memory.base.name,
|
||||
"value": o.memory.post_indexed.value,
|
||||
}
|
||||
}
|
||||
return {}
|
||||
|
||||
reg_operand_names = {} # e.g., {'rax': 'op1'}
|
||||
operand_state = {} # e.g., {'op1': {'name': 'rax', 'value': 0}} 0 means unchanged
|
||||
|
||||
for o in instruction_form.operands:
|
||||
if 'pre_indexed' in o.get('memory', {}):
|
||||
if "pre_indexed" in o.get("memory", {}):
|
||||
# Assuming no isa_data.operation
|
||||
if isa_data.get("operation", None) is not None:
|
||||
raise ValueError(
|
||||
"ISA information for pre-indexed instruction {!r} has operation set."
|
||||
"This is currently not supprted.".format(instruction_form.line))
|
||||
base_name = o.memory.base.get('prefix', '') + o.memory.base.name
|
||||
reg_operand_names = {base_name: 'op1'}
|
||||
operand_state = {'op1': {
|
||||
'name': base_name,
|
||||
'value': o.memory.offset.value
|
||||
}}
|
||||
"This is currently not supprted.".format(instruction_form.line)
|
||||
)
|
||||
base_name = o.memory.base.get("prefix", "") + o.memory.base.name
|
||||
reg_operand_names = {base_name: "op1"}
|
||||
operand_state = {"op1": {"name": base_name, "value": o.memory.offset.value}}
|
||||
|
||||
if isa_data is not None and 'operation' in isa_data:
|
||||
if isa_data is not None and "operation" in isa_data:
|
||||
for i, o in enumerate(instruction_form.operands):
|
||||
operand_name = "op{}".format(i + 1)
|
||||
if "register" in o:
|
||||
o_reg_name = o["register"].get('prefix', '') + o["register"]["name"]
|
||||
o_reg_name = o["register"].get("prefix", "") + o["register"]["name"]
|
||||
reg_operand_names[o_reg_name] = operand_name
|
||||
operand_state[operand_name] = {
|
||||
'name': o_reg_name,
|
||||
'value': 0}
|
||||
operand_state[operand_name] = {"name": o_reg_name, "value": 0}
|
||||
elif "immediate" in o:
|
||||
operand_state[operand_name] = {'value': o["immediate"]["value"]}
|
||||
operand_state[operand_name] = {"value": o["immediate"]["value"]}
|
||||
elif "memory" in o:
|
||||
# TODO lea needs some thinking about
|
||||
pass
|
||||
|
||||
operand_changes = exec(isa_data['operation'], {}, operand_state)
|
||||
exec(isa_data["operation"], {}, operand_state)
|
||||
|
||||
change_dict = {reg_name: operand_state.get(reg_operand_names.get(reg_name))
|
||||
for reg_name in dest_reg_names}
|
||||
change_dict = {
|
||||
reg_name: operand_state.get(reg_operand_names.get(reg_name))
|
||||
for reg_name in dest_reg_names
|
||||
}
|
||||
return change_dict
|
||||
|
||||
def _apply_found_ISA_data(self, isa_data, operands):
|
||||
@@ -231,8 +245,10 @@ class ISASemantics(object):
|
||||
if "hidden_operands" in isa_data:
|
||||
op_dict["destination"] += [
|
||||
AttrDict.convert_dict(
|
||||
{hop["class"]: {k: hop[k] for k in ["class", "source", "destination"]}})
|
||||
for hop in isa_data["hidden_operands"]]
|
||||
{hop["class"]: {k: hop[k] for k in ["class", "source", "destination"]}}
|
||||
)
|
||||
for hop in isa_data["hidden_operands"]
|
||||
]
|
||||
return op_dict
|
||||
|
||||
for i, op in enumerate(isa_data["operands"]):
|
||||
|
@@ -16,7 +16,12 @@ class KernelDG(nx.DiGraph):
|
||||
INSTRUCTION_THRESHOLD = 50
|
||||
|
||||
def __init__(
|
||||
self, parsed_kernel, parser, hw_model: MachineModel, semantics: ArchSemantics, timeout=10
|
||||
self,
|
||||
parsed_kernel,
|
||||
parser,
|
||||
hw_model: MachineModel,
|
||||
semantics: ArchSemantics,
|
||||
timeout=10,
|
||||
):
|
||||
self.timed_out = False
|
||||
self.kernel = parsed_kernel
|
||||
@@ -73,7 +78,7 @@ class KernelDG(nx.DiGraph):
|
||||
else instruction_form["latency_wo_load"]
|
||||
)
|
||||
if "storeload_dep" in dep_flags:
|
||||
edge_weight += self.model.get('store_to_load_forward_latency', 0)
|
||||
edge_weight += self.model.get("store_to_load_forward_latency", 0)
|
||||
dg.add_edge(
|
||||
instruction_form["line_number"],
|
||||
dep["line_number"],
|
||||
@@ -98,7 +103,7 @@ class KernelDG(nx.DiGraph):
|
||||
tmp_kernel = [] + kernel
|
||||
for orig_iform in kernel:
|
||||
temp_iform = copy.copy(orig_iform)
|
||||
temp_iform['line_number'] += offset
|
||||
temp_iform["line_number"] += offset
|
||||
tmp_kernel.append(temp_iform)
|
||||
# get dependency graph
|
||||
dg = self.create_DG(tmp_kernel)
|
||||
@@ -118,12 +123,15 @@ class KernelDG(nx.DiGraph):
|
||||
with Manager() as manager:
|
||||
all_paths = manager.list()
|
||||
processes = [
|
||||
Process(target=self._extend_path, args=(all_paths, instr_section, dg, offset))
|
||||
Process(
|
||||
target=self._extend_path,
|
||||
args=(all_paths, instr_section, dg, offset),
|
||||
)
|
||||
for instr_section in instrs
|
||||
]
|
||||
for p in processes:
|
||||
p.start()
|
||||
if (timeout == -1):
|
||||
if timeout == -1:
|
||||
# no timeout
|
||||
for p in processes:
|
||||
p.join()
|
||||
@@ -162,7 +170,7 @@ class KernelDG(nx.DiGraph):
|
||||
# extend path by edge bound latencies (e.g., store-to-load latency)
|
||||
lat_path = []
|
||||
for s, d in nx.utils.pairwise(path):
|
||||
edge_lat = dg.edges[s, d]['latency']
|
||||
edge_lat = dg.edges[s, d]["latency"]
|
||||
# map source node back to original line numbers
|
||||
if s >= offset:
|
||||
s -= offset
|
||||
@@ -310,17 +318,17 @@ class KernelDG(nx.DiGraph):
|
||||
if change is None or reg_state.get(reg, {}) is None:
|
||||
reg_state[reg] = None
|
||||
else:
|
||||
reg_state.setdefault(reg, {'name': reg, 'value': 0})
|
||||
if change['name'] != reg:
|
||||
reg_state.setdefault(reg, {"name": reg, "value": 0})
|
||||
if change["name"] != reg:
|
||||
# renaming occured, ovrwrite value with up-to-now change of source register
|
||||
reg_state[reg]['name'] = change['name']
|
||||
src_reg_state = reg_state.get(change['name'], {'value': 0})
|
||||
reg_state[reg]["name"] = change["name"]
|
||||
src_reg_state = reg_state.get(change["name"], {"value": 0})
|
||||
if src_reg_state is None:
|
||||
# original register's state was changed beyond reconstruction
|
||||
reg_state[reg] = None
|
||||
continue
|
||||
reg_state[reg]['value'] = src_reg_state['value']
|
||||
reg_state[reg]['value'] += change['value']
|
||||
reg_state[reg]["value"] = src_reg_state["value"]
|
||||
reg_state[reg]["value"] += change["value"]
|
||||
return reg_state
|
||||
|
||||
def get_dependent_instruction_forms(self, instr_form=None, line_number=None):
|
||||
@@ -340,7 +348,8 @@ class KernelDG(nx.DiGraph):
|
||||
if instruction_form.semantic_operands is None:
|
||||
return is_read
|
||||
for src in chain(
|
||||
instruction_form.semantic_operands.source, instruction_form.semantic_operands.src_dst
|
||||
instruction_form.semantic_operands.source,
|
||||
instruction_form.semantic_operands.src_dst,
|
||||
):
|
||||
if "register" in src:
|
||||
is_read = self.parser.is_reg_dependend_of(register, src.register) or is_read
|
||||
@@ -372,7 +381,8 @@ class KernelDG(nx.DiGraph):
|
||||
if instruction_form.semantic_operands is None:
|
||||
return False
|
||||
for src in chain(
|
||||
instruction_form.semantic_operands.source, instruction_form.semantic_operands.src_dst
|
||||
instruction_form.semantic_operands.source,
|
||||
instruction_form.semantic_operands.src_dst,
|
||||
):
|
||||
# Here we check for mem dependecies only
|
||||
if "memory" not in src:
|
||||
@@ -387,23 +397,23 @@ class KernelDG(nx.DiGraph):
|
||||
addr_change -= mem.offset.value
|
||||
if mem.base and src.base:
|
||||
base_change = register_changes.get(
|
||||
src.base.get('prefix', '') + src.base.name,
|
||||
{'name': src.base.get('prefix', '') + src.base.name, 'value': 0},
|
||||
src.base.get("prefix", "") + src.base.name,
|
||||
{"name": src.base.get("prefix", "") + src.base.name, "value": 0},
|
||||
)
|
||||
if base_change is None:
|
||||
# Unknown change occurred
|
||||
continue
|
||||
if mem.base.get('prefix', '') + mem.base['name'] != base_change['name']:
|
||||
if mem.base.get("prefix", "") + mem.base["name"] != base_change["name"]:
|
||||
# base registers do not match
|
||||
continue
|
||||
addr_change += base_change['value']
|
||||
addr_change += base_change["value"]
|
||||
elif mem.base or src.base:
|
||||
# base registers do not match
|
||||
continue
|
||||
if mem.index and src.index:
|
||||
index_change = register_changes.get(
|
||||
src.index.get('prefix', '') + src.index.name,
|
||||
{'name': src.index.get('prefix', '') + src.index.name, 'value': 0},
|
||||
src.index.get("prefix", "") + src.index.name,
|
||||
{"name": src.index.get("prefix", "") + src.index.name, "value": 0},
|
||||
)
|
||||
if index_change is None:
|
||||
# Unknown change occurred
|
||||
@@ -411,10 +421,10 @@ class KernelDG(nx.DiGraph):
|
||||
if mem.scale != src.scale:
|
||||
# scale factors do not match
|
||||
continue
|
||||
if mem.index.get('prefix', '') + mem.index['name'] != index_change['name']:
|
||||
if mem.index.get("prefix", "") + mem.index["name"] != index_change["name"]:
|
||||
# index registers do not match
|
||||
continue
|
||||
addr_change += index_change['value'] * src.scale
|
||||
addr_change += index_change["value"] * src.scale
|
||||
elif mem.index or src.index:
|
||||
# index registers do not match
|
||||
continue
|
||||
@@ -443,7 +453,8 @@ class KernelDG(nx.DiGraph):
|
||||
)
|
||||
# Check also for possible pre- or post-indexing in memory addresses
|
||||
for src in chain(
|
||||
instruction_form.semantic_operands.source, instruction_form.semantic_operands.src_dst
|
||||
instruction_form.semantic_operands.source,
|
||||
instruction_form.semantic_operands.src_dst,
|
||||
):
|
||||
if "memory" in src:
|
||||
if "pre_indexed" in src.memory or "post_indexed" in src.memory:
|
||||
|
@@ -1,7 +1,10 @@
|
||||
#!/usr/bin/env python3
|
||||
import os.path
|
||||
|
||||
DATA_DIRS = [os.path.expanduser("~/.osaca/data"), os.path.join(os.path.dirname(__file__), "data")]
|
||||
DATA_DIRS = [
|
||||
os.path.expanduser("~/.osaca/data"),
|
||||
os.path.join(os.path.dirname(__file__), "data"),
|
||||
]
|
||||
CACHE_DIR = os.path.expanduser("~/.osaca/cache")
|
||||
|
||||
|
||||
|
14
setup.py
14
setup.py
@@ -18,7 +18,8 @@ here = os.path.abspath(os.path.dirname(__file__))
|
||||
# Stolen from pip
|
||||
def read(*names, **kwargs):
|
||||
with io.open(
|
||||
os.path.join(os.path.dirname(__file__), *names), encoding=kwargs.get("encoding", "utf8")
|
||||
os.path.join(os.path.dirname(__file__), *names),
|
||||
encoding=kwargs.get("encoding", "utf8"),
|
||||
) as fp:
|
||||
return fp.read()
|
||||
|
||||
@@ -38,13 +39,20 @@ def _run_build_cache(dir):
|
||||
# This is run inside the install staging directory (that had no .pyc files)
|
||||
# We don't want to generate any.
|
||||
# https://github.com/eliben/pycparser/pull/135
|
||||
check_call([sys.executable, "-B", "_build_cache.py"], cwd=os.path.join(dir, "osaca", "data"))
|
||||
check_call(
|
||||
[sys.executable, "-B", "_build_cache.py"],
|
||||
cwd=os.path.join(dir, "osaca", "data"),
|
||||
)
|
||||
|
||||
|
||||
class install(_install):
|
||||
def run(self):
|
||||
_install.run(self)
|
||||
self.execute(_run_build_cache, (self.install_lib,), msg="Build ISA and architecture cache")
|
||||
self.execute(
|
||||
_run_build_cache,
|
||||
(self.install_lib,),
|
||||
msg="Build ISA and architecture cache",
|
||||
)
|
||||
|
||||
|
||||
class sdist(_sdist):
|
||||
|
@@ -33,7 +33,13 @@ class TestCLI(unittest.TestCase):
|
||||
with self.assertRaises(ValueError):
|
||||
osaca.check_arguments(args, parser)
|
||||
args = parser.parse_args(
|
||||
["--arch", "csx", "--import", "WRONG_BENCH", self._find_file("gs", "csx", "gcc")]
|
||||
[
|
||||
"--arch",
|
||||
"csx",
|
||||
"--import",
|
||||
"WRONG_BENCH",
|
||||
self._find_file("gs", "csx", "gcc"),
|
||||
]
|
||||
)
|
||||
with self.assertRaises(ValueError):
|
||||
osaca.check_arguments(args, parser)
|
||||
@@ -65,7 +71,13 @@ class TestCLI(unittest.TestCase):
|
||||
def test_check_db(self):
|
||||
parser = osaca.create_parser(parser=ErrorRaisingArgumentParser())
|
||||
args = parser.parse_args(
|
||||
["--arch", "tx2", "--db-check", "--verbose", self._find_test_file("triad_x86_iaca.s")]
|
||||
[
|
||||
"--arch",
|
||||
"tx2",
|
||||
"--db-check",
|
||||
"--verbose",
|
||||
self._find_test_file("triad_x86_iaca.s"),
|
||||
]
|
||||
)
|
||||
output = StringIO()
|
||||
osaca.run(args, output_file=output)
|
||||
@@ -134,7 +146,13 @@ class TestCLI(unittest.TestCase):
|
||||
for c in comps[a]:
|
||||
with self.subTest(kernel=k, arch=a, comp=c):
|
||||
args = parser.parse_args(
|
||||
["--arch", a, self._find_file(k, a, c), "--export-graph", "/dev/null"]
|
||||
[
|
||||
"--arch",
|
||||
a,
|
||||
self._find_file(k, a, c),
|
||||
"--export-graph",
|
||||
"/dev/null",
|
||||
]
|
||||
)
|
||||
output = StringIO()
|
||||
osaca.run(args, output_file=output)
|
||||
@@ -204,17 +222,13 @@ class TestCLI(unittest.TestCase):
|
||||
)
|
||||
output = StringIO()
|
||||
osaca.run(args, output_file=output)
|
||||
self.assertTrue(
|
||||
output.getvalue().count("WARNING: LCD analysis timed out") == 1
|
||||
)
|
||||
self.assertTrue(output.getvalue().count("WARNING: LCD analysis timed out") == 1)
|
||||
args = parser.parse_args(
|
||||
["--ignore-unknown", "--lcd-timeout", "-1", self._find_test_file(kernel)]
|
||||
)
|
||||
output = StringIO()
|
||||
osaca.run(args, output_file=output)
|
||||
self.assertTrue(
|
||||
output.getvalue().count("WARNING: LCD analysis timed out") == 0
|
||||
)
|
||||
self.assertTrue(output.getvalue().count("WARNING: LCD analysis timed out") == 0)
|
||||
|
||||
def test_lines_arg(self):
|
||||
# Run tests with --lines option
|
||||
@@ -227,12 +241,24 @@ class TestCLI(unittest.TestCase):
|
||||
args = []
|
||||
args.append(
|
||||
parser.parse_args(
|
||||
["--lines", "146-154", "--arch", "csx", self._find_test_file(kernel_x86)]
|
||||
[
|
||||
"--lines",
|
||||
"146-154",
|
||||
"--arch",
|
||||
"csx",
|
||||
self._find_test_file(kernel_x86),
|
||||
]
|
||||
)
|
||||
)
|
||||
args.append(
|
||||
parser.parse_args(
|
||||
["--lines", "146:154", "--arch", "csx", self._find_test_file(kernel_x86)]
|
||||
[
|
||||
"--lines",
|
||||
"146:154",
|
||||
"--arch",
|
||||
"csx",
|
||||
self._find_test_file(kernel_x86),
|
||||
]
|
||||
)
|
||||
)
|
||||
args.append(
|
||||
|
@@ -17,7 +17,13 @@ class TestDBInterface(unittest.TestCase):
|
||||
sample_entry = {
|
||||
"name": "DoItRightAndDoItFast",
|
||||
"operands": [
|
||||
{"class": "memory", "offset": "imd", "base": "gpr", "index": "gpr", "scale": 8},
|
||||
{
|
||||
"class": "memory",
|
||||
"offset": "imd",
|
||||
"base": "gpr",
|
||||
"index": "gpr",
|
||||
"scale": 8,
|
||||
},
|
||||
{"class": "register", "name": "xmm"},
|
||||
],
|
||||
"throughput": 1.25,
|
||||
@@ -35,7 +41,12 @@ class TestDBInterface(unittest.TestCase):
|
||||
del self.entry_tx2["operands"][1]["name"]
|
||||
self.entry_tx2["operands"][1]["prefix"] = "x"
|
||||
# self.entry_zen1['port_pressure'] = [1, 1, 1, 1, 0, 1, 0, 0, 0, 0.5, 1, 0.5, 1]
|
||||
self.entry_zen1["port_pressure"] = [[4, "0123"], [1, "4"], [1, "89"], [2, ["8D", "9D"]]]
|
||||
self.entry_zen1["port_pressure"] = [
|
||||
[4, "0123"],
|
||||
[1, "4"],
|
||||
[1, "89"],
|
||||
[2, ["8D", "9D"]],
|
||||
]
|
||||
|
||||
###########
|
||||
# Tests
|
||||
|
@@ -1,15 +1,15 @@
|
||||
# OSACA-BEGIN
|
||||
.L4:
|
||||
vmovsd %xmm0, 8(%rax)
|
||||
addq $8, %rax
|
||||
vmovsd %xmm0, 8(%rax,%rcx,8)
|
||||
vaddsd (%rax), %xmm0, %xmm0 # depends on line 3, 8(%rax) == (%rax+8)
|
||||
subq $-8, %rax
|
||||
vaddsd -8(%rax), %xmm0, %xmm0 # depends on line 3, 8(%rax) == -8(%rax+16)
|
||||
dec %rcx
|
||||
vaddsd 8(%rax,%rcx,8), %xmm0, %xmm0 # depends on line 5, 8(%rax,%rdx,8) == 8(%rax+8,%rdx-1,8)
|
||||
movq %rcx, %rdx
|
||||
vaddsd 8(%rax,%rdx,8), %xmm0, %xmm0 # depends on line 5, 8(%rax,%rdx,8) == 8(%rax+8,%rdx-1,8)
|
||||
vmovsd %xmm0, 8(%rax) # line 3 <----------------------------------+
|
||||
addq $8, %rax # |
|
||||
vmovsd %xmm0, 8(%rax,%rcx,8) # line 5 <-----------------------------------------------+
|
||||
vaddsd (%rax), %xmm0, %xmm0 # depends on line 3, 8(%rax) == (%rax+8) ---+ |
|
||||
subq $-8, %rax # | |
|
||||
vaddsd -8(%rax), %xmm0, %xmm0 # depends on line 3, 8(%rax) == -8(%rax+16) ---+ |
|
||||
dec %rcx # |
|
||||
vaddsd 8(%rax,%rcx,8), %xmm0, %xmm0 # depends on line 5, 8(%rax,%rdx,8) == 8(%rax+8,%rdx-1,8) --+
|
||||
movq %rcx, %rdx # |
|
||||
vaddsd 8(%rax,%rdx,8), %xmm0, %xmm0 # depends on line 5, 8(%rax,%rdx,8) == 8(%rax+8,%rdx-1,8) --+
|
||||
vmulsd %xmm1, %xmm0, %xmm0
|
||||
addq $8, %rax
|
||||
cmpq %rsi, %rax
|
||||
|
@@ -34,7 +34,8 @@ class TestFrontend(unittest.TestCase):
|
||||
)
|
||||
self.machine_model_tx2 = MachineModel(arch="tx2")
|
||||
self.semantics_csx = ArchSemantics(
|
||||
self.machine_model_csx, path_to_yaml=os.path.join(self.MODULE_DATA_DIR, "isa/x86.yml")
|
||||
self.machine_model_csx,
|
||||
path_to_yaml=os.path.join(self.MODULE_DATA_DIR, "isa/x86.yml"),
|
||||
)
|
||||
self.semantics_tx2 = ArchSemantics(
|
||||
self.machine_model_tx2,
|
||||
@@ -71,7 +72,11 @@ class TestFrontend(unittest.TestCase):
|
||||
|
||||
def test_frontend_AArch64(self):
|
||||
dg = KernelDG(
|
||||
self.kernel_AArch64, self.parser_AArch64, self.machine_model_tx2, self.semantics_tx2)
|
||||
self.kernel_AArch64,
|
||||
self.parser_AArch64,
|
||||
self.machine_model_tx2,
|
||||
self.semantics_tx2,
|
||||
)
|
||||
fe = Frontend(path_to_yaml=os.path.join(self.MODULE_DATA_DIR, "tx2.yml"))
|
||||
fe.full_analysis(self.kernel_AArch64, dg, verbose=True)
|
||||
# TODO compare output with checked string
|
||||
|
@@ -109,7 +109,8 @@ class TestMarkerUtils(unittest.TestCase):
|
||||
kernel_start = len(
|
||||
list(
|
||||
filter(
|
||||
None, (prologue + mov_start_var + bytes_var_1).split("\n")
|
||||
None,
|
||||
(prologue + mov_start_var + bytes_var_1).split("\n"),
|
||||
)
|
||||
)
|
||||
)
|
||||
@@ -142,7 +143,12 @@ class TestMarkerUtils(unittest.TestCase):
|
||||
epilogue = ".LE9:\t\t#12.2\n" "call dummy\n"
|
||||
kernel_length = len(list(filter(None, kernel.split("\n"))))
|
||||
|
||||
bytes_variations = [bytes_1_line, bytes_2_lines_1, bytes_2_lines_2, bytes_3_lines]
|
||||
bytes_variations = [
|
||||
bytes_1_line,
|
||||
bytes_2_lines_1,
|
||||
bytes_2_lines_2,
|
||||
bytes_3_lines,
|
||||
]
|
||||
mov_start_variations = [mov_start_1, mov_start_2]
|
||||
mov_end_variations = [mov_end_1, mov_end_2]
|
||||
# actual tests
|
||||
@@ -171,7 +177,8 @@ class TestMarkerUtils(unittest.TestCase):
|
||||
kernel_start = len(
|
||||
list(
|
||||
filter(
|
||||
None, (prologue + mov_start_var + bytes_var_1).split("\n")
|
||||
None,
|
||||
(prologue + mov_start_var + bytes_var_1).split("\n"),
|
||||
)
|
||||
)
|
||||
)
|
||||
|
@@ -24,7 +24,9 @@ class TestParserAArch64(unittest.TestCase):
|
||||
|
||||
def test_comment_parser(self):
|
||||
self.assertEqual(self._get_comment(self.parser, "// some comments"), "some comments")
|
||||
self.assertEqual(self._get_comment(self.parser, "\t\t//AA BB CC \t end \t"), "AA BB CC end")
|
||||
self.assertEqual(
|
||||
self._get_comment(self.parser, "\t\t//AA BB CC \t end \t"), "AA BB CC end"
|
||||
)
|
||||
self.assertEqual(
|
||||
self._get_comment(self.parser, "\t//// comment //// comment"),
|
||||
"// comment //// comment",
|
||||
@@ -36,7 +38,8 @@ class TestParserAArch64(unittest.TestCase):
|
||||
self.assertEqual(self._get_label(self.parser, ".2.3_2_pack.3:").name, ".2.3_2_pack.3")
|
||||
self.assertEqual(self._get_label(self.parser, ".L1:\t\t\t//label1").name, ".L1")
|
||||
self.assertEqual(
|
||||
" ".join(self._get_label(self.parser, ".L1:\t\t\t//label1").comment), "label1"
|
||||
" ".join(self._get_label(self.parser, ".L1:\t\t\t//label1").comment),
|
||||
"label1",
|
||||
)
|
||||
with self.assertRaises(ParseException):
|
||||
self._get_label(self.parser, "\t.cfi_startproc")
|
||||
@@ -316,7 +319,8 @@ class TestParserAArch64(unittest.TestCase):
|
||||
value1 = self.parser.normalize_imd(imd_decimal_1)
|
||||
self.assertEqual(value1, self.parser.normalize_imd(imd_hex_1))
|
||||
self.assertEqual(
|
||||
self.parser.normalize_imd(imd_decimal_2), self.parser.normalize_imd(imd_hex_2)
|
||||
self.parser.normalize_imd(imd_decimal_2),
|
||||
self.parser.normalize_imd(imd_hex_2),
|
||||
)
|
||||
self.assertEqual(self.parser.normalize_imd(imd_float_11), value1)
|
||||
self.assertEqual(self.parser.normalize_imd(imd_float_12), value1)
|
||||
|
@@ -26,7 +26,8 @@ class TestParserX86ATT(unittest.TestCase):
|
||||
self.assertEqual(self._get_comment(self.parser, "# some comments"), "some comments")
|
||||
self.assertEqual(self._get_comment(self.parser, "\t\t#AA BB CC \t end \t"), "AA BB CC end")
|
||||
self.assertEqual(
|
||||
self._get_comment(self.parser, "\t## comment ## comment"), "# comment ## comment"
|
||||
self._get_comment(self.parser, "\t## comment ## comment"),
|
||||
"# comment ## comment",
|
||||
)
|
||||
|
||||
def test_label_parser(self):
|
||||
@@ -35,7 +36,8 @@ class TestParserX86ATT(unittest.TestCase):
|
||||
self.assertEqual(self._get_label(self.parser, ".2.3_2_pack.3:").name, ".2.3_2_pack.3")
|
||||
self.assertEqual(self._get_label(self.parser, ".L1:\t\t\t#label1").name, ".L1")
|
||||
self.assertEqual(
|
||||
" ".join(self._get_label(self.parser, ".L1:\t\t\t#label1").comment), "label1"
|
||||
" ".join(self._get_label(self.parser, ".L1:\t\t\t#label1").comment),
|
||||
"label1",
|
||||
)
|
||||
with self.assertRaises(ParseException):
|
||||
self._get_label(self.parser, "\t.cfi_startproc")
|
||||
@@ -47,7 +49,8 @@ class TestParserX86ATT(unittest.TestCase):
|
||||
self.assertEqual(len(self._get_directive(self.parser, "\t.align\t16,0x90").parameters), 2)
|
||||
self.assertEqual(len(self._get_directive(self.parser, ".text").parameters), 0)
|
||||
self.assertEqual(
|
||||
len(self._get_directive(self.parser, '.file\t1 "path/to/file.c"').parameters), 2
|
||||
len(self._get_directive(self.parser, '.file\t1 "path/to/file.c"').parameters),
|
||||
2,
|
||||
)
|
||||
self.assertEqual(
|
||||
self._get_directive(self.parser, '.file\t1 "path/to/file.c"').parameters[1],
|
||||
@@ -62,7 +65,12 @@ class TestParserX86ATT(unittest.TestCase):
|
||||
self.parser,
|
||||
"\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support",
|
||||
).parameters,
|
||||
["__TEXT", "__eh_frame", "coalesced", "no_toc+strip_static_syms+live_support"],
|
||||
[
|
||||
"__TEXT",
|
||||
"__eh_frame",
|
||||
"coalesced",
|
||||
"no_toc+strip_static_syms+live_support",
|
||||
],
|
||||
)
|
||||
self.assertEqual(
|
||||
self._get_directive(
|
||||
@@ -74,7 +82,9 @@ class TestParserX86ATT(unittest.TestCase):
|
||||
self._get_directive(self.parser, "\t.align\t16,0x90").parameters[1], "0x90"
|
||||
)
|
||||
self.assertEqual(
|
||||
self._get_directive(self.parser, " .byte 100,103,144 #IACA START")["name"],
|
||||
self._get_directive(self.parser, " .byte 100,103,144 #IACA START")[
|
||||
"name"
|
||||
],
|
||||
"byte",
|
||||
)
|
||||
self.assertEqual(
|
||||
@@ -242,10 +252,12 @@ class TestParserX86ATT(unittest.TestCase):
|
||||
imd_decimal_2 = {"value": "8"}
|
||||
imd_hex_2 = {"value": "8"}
|
||||
self.assertEqual(
|
||||
self.parser.normalize_imd(imd_decimal_1), self.parser.normalize_imd(imd_hex_1)
|
||||
self.parser.normalize_imd(imd_decimal_1),
|
||||
self.parser.normalize_imd(imd_hex_1),
|
||||
)
|
||||
self.assertEqual(
|
||||
self.parser.normalize_imd(imd_decimal_2), self.parser.normalize_imd(imd_hex_2)
|
||||
self.parser.normalize_imd(imd_decimal_2),
|
||||
self.parser.normalize_imd(imd_hex_2),
|
||||
)
|
||||
|
||||
def test_reg_dependency(self):
|
||||
|
@@ -11,8 +11,14 @@ from copy import deepcopy
|
||||
import networkx as nx
|
||||
from osaca.osaca import get_unmatched_instruction_ratio
|
||||
from osaca.parser import AttrDict, ParserAArch64, ParserX86ATT
|
||||
from osaca.semantics import (INSTR_FLAGS, ArchSemantics, ISASemantics,
|
||||
KernelDG, MachineModel, reduce_to_section)
|
||||
from osaca.semantics import (
|
||||
INSTR_FLAGS,
|
||||
ArchSemantics,
|
||||
ISASemantics,
|
||||
KernelDG,
|
||||
MachineModel,
|
||||
reduce_to_section,
|
||||
)
|
||||
|
||||
|
||||
class TestSemanticTools(unittest.TestCase):
|
||||
@@ -66,7 +72,8 @@ class TestSemanticTools(unittest.TestCase):
|
||||
)
|
||||
cls.semantics_x86 = ISASemantics("x86")
|
||||
cls.semantics_csx = ArchSemantics(
|
||||
cls.machine_model_csx, path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/x86.yml")
|
||||
cls.machine_model_csx,
|
||||
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/x86.yml"),
|
||||
)
|
||||
cls.semantics_aarch64 = ISASemantics("aarch64")
|
||||
cls.semantics_tx2 = ArchSemantics(
|
||||
@@ -173,7 +180,12 @@ class TestSemanticTools(unittest.TestCase):
|
||||
)
|
||||
self.assertEqual(
|
||||
test_mm_x86.get_store_throughput(
|
||||
{"base": {"prefix": "NOT_IN_DB"}, "offset": None, "index": "NOT_NONE", "scale": 1}
|
||||
{
|
||||
"base": {"prefix": "NOT_IN_DB"},
|
||||
"offset": None,
|
||||
"index": "NOT_NONE",
|
||||
"scale": 1,
|
||||
}
|
||||
),
|
||||
[[1, "23"], [1, "4"]],
|
||||
)
|
||||
@@ -185,7 +197,12 @@ class TestSemanticTools(unittest.TestCase):
|
||||
)
|
||||
self.assertEqual(
|
||||
test_mm_arm.get_store_throughput(
|
||||
{"base": {"prefix": "NOT_IN_DB"}, "offset": None, "index": None, "scale": 1}
|
||||
{
|
||||
"base": {"prefix": "NOT_IN_DB"},
|
||||
"offset": None,
|
||||
"index": None,
|
||||
"scale": 1,
|
||||
}
|
||||
),
|
||||
[[1, "34"], [1, "5"]],
|
||||
)
|
||||
@@ -310,7 +327,10 @@ class TestSemanticTools(unittest.TestCase):
|
||||
|
||||
def test_memdependency_x86(self):
|
||||
dg = KernelDG(
|
||||
self.kernel_x86_memdep, self.parser_x86, self.machine_model_csx, self.semantics_csx
|
||||
self.kernel_x86_memdep,
|
||||
self.parser_x86,
|
||||
self.machine_model_csx,
|
||||
self.semantics_csx,
|
||||
)
|
||||
self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg))
|
||||
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=3)), {6, 8})
|
||||
@@ -322,7 +342,10 @@ class TestSemanticTools(unittest.TestCase):
|
||||
|
||||
def test_kernelDG_AArch64(self):
|
||||
dg = KernelDG(
|
||||
self.kernel_AArch64, self.parser_AArch64, self.machine_model_tx2, self.semantics_tx2
|
||||
self.kernel_AArch64,
|
||||
self.parser_AArch64,
|
||||
self.machine_model_tx2,
|
||||
self.semantics_tx2,
|
||||
)
|
||||
self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg))
|
||||
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=3)), {7, 8})
|
||||
@@ -400,7 +423,7 @@ class TestSemanticTools(unittest.TestCase):
|
||||
# based on line 6
|
||||
self.assertEqual(lc_deps[6]["latency"], 28.0)
|
||||
self.assertEqual(
|
||||
[(iform.line_number, lat) for iform, lat in lc_deps[6]['dependencies']],
|
||||
[(iform.line_number, lat) for iform, lat in lc_deps[6]["dependencies"]],
|
||||
[(6, 4.0), (10, 6.0), (11, 6.0), (12, 6.0), (13, 6.0), (14, 0)],
|
||||
)
|
||||
|
||||
@@ -423,7 +446,8 @@ class TestSemanticTools(unittest.TestCase):
|
||||
# w/o flag dependencies: ID 5 w/ len=1
|
||||
# TODO discuss
|
||||
self.assertEqual(
|
||||
lc_deps[lcd_id2]["root"], dg.dg.nodes(data=True)[lcd_id2]["instruction_form"]
|
||||
lc_deps[lcd_id2]["root"],
|
||||
dg.dg.nodes(data=True)[lcd_id2]["instruction_form"],
|
||||
)
|
||||
self.assertEqual(len(lc_deps[lcd_id2]["dependencies"]), 1)
|
||||
self.assertEqual(
|
||||
@@ -438,7 +462,7 @@ class TestSemanticTools(unittest.TestCase):
|
||||
self.parser_x86,
|
||||
self.machine_model_csx,
|
||||
self.semantics_x86,
|
||||
timeout=10
|
||||
timeout=10,
|
||||
)
|
||||
end_time = time.perf_counter()
|
||||
time_10 = end_time - start_time
|
||||
@@ -448,7 +472,7 @@ class TestSemanticTools(unittest.TestCase):
|
||||
self.parser_x86,
|
||||
self.machine_model_csx,
|
||||
self.semantics_x86,
|
||||
timeout=2
|
||||
timeout=2,
|
||||
)
|
||||
end_time = time.perf_counter()
|
||||
time_2 = end_time - start_time
|
||||
|
@@ -1,33 +1,26 @@
|
||||
#!/usr/bin/env python3
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
from subprocess import check_call, check_output, CalledProcessError, STDOUT
|
||||
from itertools import chain
|
||||
import shutil
|
||||
from functools import lru_cache
|
||||
from glob import glob
|
||||
from pathlib import Path
|
||||
from pprint import pprint
|
||||
import socket
|
||||
import pickle
|
||||
import re
|
||||
import shutil
|
||||
import socket
|
||||
import sys
|
||||
from copy import deepcopy
|
||||
from glob import glob
|
||||
from itertools import chain
|
||||
from pathlib import Path
|
||||
from subprocess import STDOUT, CalledProcessError, check_call, check_output
|
||||
|
||||
import requests
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from osaca.osaca import reduce_to_section
|
||||
|
||||
from kerncraft.models import benchmark
|
||||
from kerncraft.incore_model import (
|
||||
parse_asm,
|
||||
asm_instrumentation,
|
||||
iaca_analyse_instrumented_binary,
|
||||
llvm_mca_analyse_instrumented_assembly,
|
||||
osaca_analyse_instrumented_assembly,
|
||||
llvm_mca_analyse_instrumented_assembly
|
||||
parse_asm,
|
||||
)
|
||||
|
||||
from kerncraft.models import benchmark
|
||||
from osaca.osaca import reduce_to_section
|
||||
|
||||
# Scaling of inner dimension for 1D, 2D and 3D kernels
|
||||
# * consider kernels to be compiled with multiple compilers and different options
|
||||
@@ -39,37 +32,50 @@ from kerncraft.incore_model import (
|
||||
# Collect inner loop body assembly for each kernel/compiler/options combination
|
||||
# * analyze with OSACA, IACA and LLVM-MCA
|
||||
|
||||
hosts_arch_map = {r"skylakesp2": "SKX",
|
||||
r"ivyep1": "IVB",
|
||||
r"naples1": "ZEN",
|
||||
r"rome1": "ZEN2",
|
||||
r"warmup": "TX2",
|
||||
r"qp4-node-[0-9]+": "A64FX"}
|
||||
hosts_arch_map = {
|
||||
r"skylakesp2": "SKX",
|
||||
r"ivyep1": "IVB",
|
||||
r"naples1": "ZEN",
|
||||
r"rome1": "ZEN2",
|
||||
r"warmup": "TX2",
|
||||
r"qp4-node-[0-9]+": "A64FX",
|
||||
}
|
||||
|
||||
arch_info = {
|
||||
'SKX': {
|
||||
'prepare': ['likwid-setFrequencies -f 2.4 -t 0'.split()],
|
||||
'IACA': 'SKX',
|
||||
'OSACA': 'SKX',
|
||||
'LLVM-MCA': '-mcpu=skylake-avx512',
|
||||
'Ithemal': 'skl',
|
||||
'isa': 'x86',
|
||||
'perfevents': [],
|
||||
"SKX": {
|
||||
"prepare": ["likwid-setFrequencies -f 2.4 -t 0".split()],
|
||||
"IACA": "SKX",
|
||||
"OSACA": "SKX",
|
||||
"LLVM-MCA": "-mcpu=skylake-avx512",
|
||||
"Ithemal": "skl",
|
||||
"isa": "x86",
|
||||
"perfevents": [],
|
||||
"cflags": {
|
||||
'icc': {
|
||||
"Ofast": "-Ofast -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline -ffreestanding -falign-loops".split(),
|
||||
"O3": "-O3 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline -ffreestanding -falign-loops".split(),
|
||||
"O2": "-O2 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline -ffreestanding -falign-loops".split(),
|
||||
"O1": "-O1 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline -ffreestanding -falign-loops".split(),
|
||||
"icc": {
|
||||
"Ofast": (
|
||||
"-Ofast -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline "
|
||||
"-ffreestanding -falign-loops"
|
||||
).split(),
|
||||
"O3": (
|
||||
"-O3 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline "
|
||||
"-ffreestanding -falign-loops"
|
||||
).split(),
|
||||
"O2": (
|
||||
"-O2 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline "
|
||||
"-ffreestanding -falign-loops"
|
||||
).split(),
|
||||
"O1": (
|
||||
"-O1 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline "
|
||||
"-ffreestanding -falign-loops"
|
||||
).split(),
|
||||
},
|
||||
'clang': {
|
||||
"clang": {
|
||||
"Ofast": "-Ofast -march=skylake-avx512 -ffreestanding".split(),
|
||||
"O3": "-O3 -march=skylake-avx512 -ffreestanding".split(),
|
||||
"O2": "-O2 -march=skylake-avx512 -ffreestanding".split(),
|
||||
"O1": "-O1 -march=skylake-avx512 -ffreestanding".split(),
|
||||
|
||||
},
|
||||
'gcc': {
|
||||
"gcc": {
|
||||
"Ofast": "-Ofast -march=skylake-avx512 -lm -ffreestanding -falign-loops=16".split(),
|
||||
"O3": "-O3 -march=skylake-avx512 -lm -ffreestanding -falign-loops=16".split(),
|
||||
"O2": "-O2 -march=skylake-avx512 -lm -ffreestanding -falign-loops=16".split(),
|
||||
@@ -77,17 +83,19 @@ arch_info = {
|
||||
},
|
||||
},
|
||||
},
|
||||
'IVB': {
|
||||
'prepare': ['likwid-setFrequencies -f 3.0 -t 0'.split()],
|
||||
'IACA': 'IVB',
|
||||
'OSACA': 'IVB',
|
||||
'LLVM-MCA': '-mcpu=ivybridge',
|
||||
'Ithemal': 'ivb',
|
||||
'isa': 'x86',
|
||||
'perfevents': [],
|
||||
"IVB": {
|
||||
"prepare": ["likwid-setFrequencies -f 3.0 -t 0".split()],
|
||||
"IACA": "IVB",
|
||||
"OSACA": "IVB",
|
||||
"LLVM-MCA": "-mcpu=ivybridge",
|
||||
"Ithemal": "ivb",
|
||||
"isa": "x86",
|
||||
"perfevents": [],
|
||||
"cflags": {
|
||||
"icc": {
|
||||
"Ofast": "-Ofast -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
||||
"Ofast": (
|
||||
"-Ofast -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops"
|
||||
).split(),
|
||||
"O3": "-O3 -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
||||
"O2": "-O2 -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
||||
"O1": "-O1 -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
||||
@@ -106,14 +114,14 @@ arch_info = {
|
||||
},
|
||||
},
|
||||
},
|
||||
'ZEN': {
|
||||
'prepare': ['likwid-setFrequencies -f 2.3 -t 0'.split()],
|
||||
'IACA': None,
|
||||
'OSACA': 'ZEN1',
|
||||
'LLVM-MCA': '-mcpu=znver1',
|
||||
'Ithemal': None,
|
||||
'isa': 'x86',
|
||||
'perfevents': [],
|
||||
"ZEN": {
|
||||
"prepare": ["likwid-setFrequencies -f 2.3 -t 0".split()],
|
||||
"IACA": None,
|
||||
"OSACA": "ZEN1",
|
||||
"LLVM-MCA": "-mcpu=znver1",
|
||||
"Ithemal": None,
|
||||
"isa": "x86",
|
||||
"perfevents": [],
|
||||
"cflags": {
|
||||
"clang": {
|
||||
"Ofast": "-Ofast -march=znver1 -ffreestanding".split(),
|
||||
@@ -128,21 +136,23 @@ arch_info = {
|
||||
"O1": "-O1 -march=znver1 -ffreestanding -falign-loops=16".split(),
|
||||
},
|
||||
"icc": {
|
||||
"Ofast": "-Ofast -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
||||
"Ofast": (
|
||||
"-Ofast -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops"
|
||||
).split(),
|
||||
"O3": "-O3 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
||||
"O2": "-O2 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
||||
"O1": "-O1 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
||||
},
|
||||
},
|
||||
},
|
||||
'ZEN2': {
|
||||
'prepare': ['likwid-setFrequencies -f 2.35 -t 0'.split()],
|
||||
'IACA': None,
|
||||
'OSACA': 'ZEN2',
|
||||
'LLVM-MCA': '-mcpu=znver2',
|
||||
'Ithemal': None,
|
||||
'isa': 'x86',
|
||||
'perfevents': [],
|
||||
"ZEN2": {
|
||||
"prepare": ["likwid-setFrequencies -f 2.35 -t 0".split()],
|
||||
"IACA": None,
|
||||
"OSACA": "ZEN2",
|
||||
"LLVM-MCA": "-mcpu=znver2",
|
||||
"Ithemal": None,
|
||||
"isa": "x86",
|
||||
"perfevents": [],
|
||||
"cflags": {
|
||||
"clang": {
|
||||
"Ofast": "-Ofast -march=znver2 -ffreestanding".split(),
|
||||
@@ -157,22 +167,24 @@ arch_info = {
|
||||
"O1": "-O1 -march=znver2 -ffreestanding -falign-loops=16".split(),
|
||||
},
|
||||
"icc": {
|
||||
"Ofast": "-Ofast -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
||||
"Ofast": (
|
||||
"-Ofast -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops"
|
||||
).split(),
|
||||
"O3": "-O3 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
||||
"O2": "-O2 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
||||
"O1": "-O1 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
||||
},
|
||||
},
|
||||
},
|
||||
'TX2': {
|
||||
'Clock [MHz]': 2200, # reading out via perf. counters is not supported
|
||||
'IACA': None,
|
||||
'OSACA': 'TX2',
|
||||
'assign_optimal_throughput': True,
|
||||
'LLVM-MCA': '-mcpu=thunderx2t99 -march=aarch64',
|
||||
'Ithemal': None,
|
||||
'isa': 'aarch64',
|
||||
'perfevents': [],
|
||||
"TX2": {
|
||||
"Clock [MHz]": 2200, # reading out via perf. counters is not supported
|
||||
"IACA": None,
|
||||
"OSACA": "TX2",
|
||||
"assign_optimal_throughput": True,
|
||||
"LLVM-MCA": "-mcpu=thunderx2t99 -march=aarch64",
|
||||
"Ithemal": None,
|
||||
"isa": "aarch64",
|
||||
"perfevents": [],
|
||||
"cflags": {
|
||||
"clang": {
|
||||
"Ofast": "-Ofast -target aarch64-unknown-linux-gnu -ffreestanding".split(),
|
||||
@@ -188,16 +200,16 @@ arch_info = {
|
||||
},
|
||||
},
|
||||
},
|
||||
'A64FX': {
|
||||
'Clock [MHz]': 1800, # reading out via perf. counters is not supported
|
||||
'L2_volume_metric': 'L1<->L2 data volume [GBytes]',
|
||||
'IACA': None,
|
||||
'OSACA': 'A64FX',
|
||||
'assign_optimal_throughput': False,
|
||||
'LLVM-MCA': '-mcpu=a64fx -march=aarch64',
|
||||
'Ithemal': None,
|
||||
'isa': 'aarch64',
|
||||
'perfevents': [],
|
||||
"A64FX": {
|
||||
"Clock [MHz]": 1800, # reading out via perf. counters is not supported
|
||||
"L2_volume_metric": "L1<->L2 data volume [GBytes]",
|
||||
"IACA": None,
|
||||
"OSACA": "A64FX",
|
||||
"assign_optimal_throughput": False,
|
||||
"LLVM-MCA": "-mcpu=a64fx -march=aarch64",
|
||||
"Ithemal": None,
|
||||
"isa": "aarch64",
|
||||
"perfevents": [],
|
||||
"cflags": {
|
||||
"gcc": {
|
||||
"Ofast": "-Ofast -msve-vector-bits=512 -march=armv8.2-a+sve -ffreestanding".split(),
|
||||
@@ -211,7 +223,7 @@ arch_info = {
|
||||
"O2": "-O2 -target aarch64-unknown-linux-gnu -ffreestanding".split(),
|
||||
"O1": "-O1 -target aarch64-unknown-linux-gnu -ffreestanding".split(),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -231,12 +243,13 @@ def get_kernels(kernels=None):
|
||||
if kernels is None:
|
||||
kernels = []
|
||||
for f in glob("kernels/*.c"):
|
||||
f = f.rsplit('.', 1)[0].split('/', 1)[1]
|
||||
f = f.rsplit(".", 1)[0].split("/", 1)[1]
|
||||
if f == "dummy":
|
||||
continue
|
||||
kernels.append(f)
|
||||
return kernels
|
||||
|
||||
|
||||
# Columns:
|
||||
# arch
|
||||
# kernel
|
||||
@@ -259,6 +272,7 @@ def get_kernels(kernels=None):
|
||||
# allruns [list (length, repetitions, cy/it, L2 B/it)]
|
||||
# perfevents [dict event: counter/it]
|
||||
|
||||
|
||||
def build_mark_run_all_kernels(measurements=True, osaca=True, iaca=True, llvm_mca=True):
|
||||
arch = get_current_arch()
|
||||
if arch is None:
|
||||
@@ -268,90 +282,132 @@ def build_mark_run_all_kernels(measurements=True, osaca=True, iaca=True, llvm_mc
|
||||
islocal = True
|
||||
arches = [arch]
|
||||
ainfo = arch_info.get(arch)
|
||||
if 'prepare' in ainfo:
|
||||
for cmd in ainfo['prepare']:
|
||||
if "prepare" in ainfo:
|
||||
for cmd in ainfo["prepare"]:
|
||||
check_call(cmd)
|
||||
for arch in arches:
|
||||
ainfo = arch_info.get(arch)
|
||||
print(arch)
|
||||
data_path = Path(f"build/{arch}/data.pkl")
|
||||
if data_path.exists():
|
||||
with data_path.open('rb') as f:
|
||||
with data_path.open("rb") as f:
|
||||
data = pickle.load(f)
|
||||
else:
|
||||
data = []
|
||||
data_lastsaved = deepcopy(data)
|
||||
for compiler, compiler_cflags in ainfo['cflags'].items():
|
||||
for compiler, compiler_cflags in ainfo["cflags"].items():
|
||||
if not shutil.which(compiler) and islocal:
|
||||
print(compiler, "not found in path! Skipping...")
|
||||
continue
|
||||
for cflags_name, cflags in compiler_cflags.items():
|
||||
for kernel in get_kernels():
|
||||
print(f"{kernel:<15} {arch:>5} {compiler:>5} {cflags_name:>6}",
|
||||
end=": ", flush=True)
|
||||
row = list([r for r in data
|
||||
if r['arch'] == arch and r['kernel'] == kernel and
|
||||
r['compiler'] == compiler and r['cflags_name'] == cflags_name])
|
||||
print(
|
||||
f"{kernel:<15} {arch:>5} {compiler:>5} {cflags_name:>6}",
|
||||
end=": ",
|
||||
flush=True,
|
||||
)
|
||||
row = list(
|
||||
[
|
||||
r
|
||||
for r in data
|
||||
if r["arch"] == arch
|
||||
and r["kernel"] == kernel
|
||||
and r["compiler"] == compiler
|
||||
and r["cflags_name"] == cflags_name
|
||||
]
|
||||
)
|
||||
if row:
|
||||
row = row[0]
|
||||
else:
|
||||
orig_row = None
|
||||
row = {
|
||||
'arch': arch,
|
||||
'kernel': kernel,
|
||||
'compiler': compiler,
|
||||
'cflags_name': cflags_name,
|
||||
'element_size': 8,
|
||||
"arch": arch,
|
||||
"kernel": kernel,
|
||||
"compiler": compiler,
|
||||
"cflags_name": cflags_name,
|
||||
"element_size": 8,
|
||||
}
|
||||
data.append(row)
|
||||
|
||||
# Build
|
||||
print("build", end="", flush=True)
|
||||
asm_path, exec_path, overwrite = build_kernel(
|
||||
kernel, arch, compiler, cflags, cflags_name, dontbuild=not islocal)
|
||||
kernel,
|
||||
arch,
|
||||
compiler,
|
||||
cflags,
|
||||
cflags_name,
|
||||
dontbuild=not islocal,
|
||||
)
|
||||
|
||||
if overwrite:
|
||||
# clear all measurment information
|
||||
row['best_length'] = None
|
||||
row['best_runtime'] = None
|
||||
row['L2_traffic'] = None
|
||||
row['allruns'] = None
|
||||
row['perfevents'] = None
|
||||
row["best_length"] = None
|
||||
row["best_runtime"] = None
|
||||
row["L2_traffic"] = None
|
||||
row["allruns"] = None
|
||||
row["perfevents"] = None
|
||||
|
||||
# Mark for IACA, OSACA and LLVM-MCA
|
||||
print("mark", end="", flush=True)
|
||||
try:
|
||||
marked_asmfile, marked_objfile, row['pointer_increment'], overwrite = mark(
|
||||
asm_path, compiler, cflags, isa=ainfo['isa'], overwrite=overwrite)
|
||||
row['marking_error'] = None
|
||||
(
|
||||
marked_asmfile,
|
||||
marked_objfile,
|
||||
row["pointer_increment"],
|
||||
overwrite,
|
||||
) = mark(
|
||||
asm_path,
|
||||
compiler,
|
||||
cflags,
|
||||
isa=ainfo["isa"],
|
||||
overwrite=overwrite,
|
||||
)
|
||||
row["marking_error"] = None
|
||||
except ValueError as e:
|
||||
row['marking_error'] = str(e)
|
||||
row["marking_error"] = str(e)
|
||||
print(":", e)
|
||||
continue
|
||||
|
||||
if overwrite:
|
||||
# clear all model generated information
|
||||
for model in ['IACA', 'OSACA', 'LLVM-MCA', 'Ithemal']:
|
||||
for k in ['ports', 'prediction', 'throughput', 'cp', 'lcd', 'raw']:
|
||||
row[model+'_'+k] = None
|
||||
|
||||
for model in ['IACA', 'OSACA', 'LLVM-MCA', 'Ithemal']:
|
||||
for k in ['ports', 'prediction', 'throughput', 'cp', 'lcd', 'raw']:
|
||||
if model+'_'+k not in row:
|
||||
row[model+'_'+k] = None
|
||||
for model in ["IACA", "OSACA", "LLVM-MCA", "Ithemal"]:
|
||||
for k in [
|
||||
"ports",
|
||||
"prediction",
|
||||
"throughput",
|
||||
"cp",
|
||||
"lcd",
|
||||
"raw",
|
||||
]:
|
||||
row[model + "_" + k] = None
|
||||
|
||||
for model in ["IACA", "OSACA", "LLVM-MCA", "Ithemal"]:
|
||||
for k in [
|
||||
"ports",
|
||||
"prediction",
|
||||
"throughput",
|
||||
"cp",
|
||||
"lcd",
|
||||
"raw",
|
||||
]:
|
||||
if model + "_" + k not in row:
|
||||
row[model + "_" + k] = None
|
||||
|
||||
# Analyze with IACA, if requested and configured
|
||||
if iaca and ainfo['IACA'] is not None:
|
||||
if iaca and ainfo["IACA"] is not None:
|
||||
print("IACA", end="", flush=True)
|
||||
if not row.get('IACA_ports'):
|
||||
row['IACA_raw'] = iaca_analyse_instrumented_binary(
|
||||
marked_objfile, micro_architecture=ainfo['IACA'])
|
||||
row['IACA_ports'] = \
|
||||
{k: v/(row['pointer_increment']/row['element_size'])
|
||||
for k,v in row['IACA_raw']['port cycles'].items()}
|
||||
row['IACA_prediction'] = row['IACA_raw']['throughput']/(
|
||||
row['pointer_increment']/row['element_size'])
|
||||
row['IACA_throughput'] = max(row['IACA_ports'].values())
|
||||
if not row.get("IACA_ports"):
|
||||
row["IACA_raw"] = iaca_analyse_instrumented_binary(
|
||||
marked_objfile, micro_architecture=ainfo["IACA"]
|
||||
)
|
||||
row["IACA_ports"] = {
|
||||
k: v / (row["pointer_increment"] / row["element_size"])
|
||||
for k, v in row["IACA_raw"]["port cycles"].items()
|
||||
}
|
||||
row["IACA_prediction"] = row["IACA_raw"]["throughput"] / (
|
||||
row["pointer_increment"] / row["element_size"]
|
||||
)
|
||||
row["IACA_throughput"] = max(row["IACA_ports"].values())
|
||||
print(". ", end="", flush=True)
|
||||
else:
|
||||
print("! ", end="", flush=True)
|
||||
@@ -359,56 +415,70 @@ def build_mark_run_all_kernels(measurements=True, osaca=True, iaca=True, llvm_mc
|
||||
# Analyze with OSACA, if requested
|
||||
if osaca:
|
||||
print("OSACA", end="", flush=True)
|
||||
if not row.get('OSACA_ports'):
|
||||
row['OSACA_raw'] = osaca_analyse_instrumented_assembly(
|
||||
marked_asmfile, micro_architecture=ainfo['OSACA'],
|
||||
assign_optimal_throughput=ainfo.get('assign_optimal_throughput',
|
||||
True))
|
||||
row['OSACA_ports'] = \
|
||||
{k: v/(row['pointer_increment']/row['element_size'])
|
||||
for k,v in row['OSACA_raw']['port cycles'].items()}
|
||||
row['OSACA_prediction'] = row['OSACA_raw']['throughput']/(
|
||||
row['pointer_increment']/row['element_size'])
|
||||
row['OSACA_throughput'] = max(row['OSACA_ports'].values())
|
||||
row['OSACA_cp'] = row['OSACA_raw']['cp_latency']/(
|
||||
row['pointer_increment']/row['element_size'])
|
||||
row['OSACA_lcd'] = row['OSACA_raw']['lcd']/(
|
||||
row['pointer_increment']/row['element_size'])
|
||||
if not row.get("OSACA_ports"):
|
||||
row["OSACA_raw"] = osaca_analyse_instrumented_assembly(
|
||||
marked_asmfile,
|
||||
micro_architecture=ainfo["OSACA"],
|
||||
assign_optimal_throughput=ainfo.get(
|
||||
"assign_optimal_throughput", True
|
||||
),
|
||||
)
|
||||
row["OSACA_ports"] = {
|
||||
k: v / (row["pointer_increment"] / row["element_size"])
|
||||
for k, v in row["OSACA_raw"]["port cycles"].items()
|
||||
}
|
||||
row["OSACA_prediction"] = row["OSACA_raw"]["throughput"] / (
|
||||
row["pointer_increment"] / row["element_size"]
|
||||
)
|
||||
row["OSACA_throughput"] = max(row["OSACA_ports"].values())
|
||||
row["OSACA_cp"] = row["OSACA_raw"]["cp_latency"] / (
|
||||
row["pointer_increment"] / row["element_size"]
|
||||
)
|
||||
row["OSACA_lcd"] = row["OSACA_raw"]["lcd"] / (
|
||||
row["pointer_increment"] / row["element_size"]
|
||||
)
|
||||
print(". ", end="", flush=True)
|
||||
else:
|
||||
print("! ", end="", flush=True)
|
||||
|
||||
# Analyze with LLVM-MCA, if requested and configured
|
||||
if llvm_mca and ainfo['LLVM-MCA'] is not None:
|
||||
if llvm_mca and ainfo["LLVM-MCA"] is not None:
|
||||
print("LLVM-MCA", end="", flush=True)
|
||||
if not row.get('LLVM-MCA_ports'):
|
||||
row['LLVM-MCA_raw'] = llvm_mca_analyse_instrumented_assembly(
|
||||
if not row.get("LLVM-MCA_ports"):
|
||||
row["LLVM-MCA_raw"] = llvm_mca_analyse_instrumented_assembly(
|
||||
marked_asmfile,
|
||||
micro_architecture=ainfo['LLVM-MCA'],
|
||||
isa=ainfo['isa'])
|
||||
row['LLVM-MCA_ports'] = \
|
||||
{k: v/(row['pointer_increment']/row['element_size'])
|
||||
for k,v in row['LLVM-MCA_raw']['port cycles'].items()}
|
||||
row['LLVM-MCA_prediction'] =row['LLVM-MCA_raw']['throughput']/(
|
||||
row['pointer_increment']/row['element_size'])
|
||||
row['LLVM-MCA_throughput'] = max(row['LLVM-MCA_ports'].values())
|
||||
row['LLVM-MCA_cp'] = row['LLVM-MCA_raw']['cp_latency']/(
|
||||
row['pointer_increment']/row['element_size'])
|
||||
row['LLVM-MCA_lcd'] = row['LLVM-MCA_raw']['lcd']/(
|
||||
row['pointer_increment']/row['element_size'])
|
||||
micro_architecture=ainfo["LLVM-MCA"],
|
||||
isa=ainfo["isa"],
|
||||
)
|
||||
row["LLVM-MCA_ports"] = {
|
||||
k: v / (row["pointer_increment"] / row["element_size"])
|
||||
for k, v in row["LLVM-MCA_raw"]["port cycles"].items()
|
||||
}
|
||||
row["LLVM-MCA_prediction"] = row["LLVM-MCA_raw"]["throughput"] / (
|
||||
row["pointer_increment"] / row["element_size"]
|
||||
)
|
||||
row["LLVM-MCA_throughput"] = max(row["LLVM-MCA_ports"].values())
|
||||
row["LLVM-MCA_cp"] = row["LLVM-MCA_raw"]["cp_latency"] / (
|
||||
row["pointer_increment"] / row["element_size"]
|
||||
)
|
||||
row["LLVM-MCA_lcd"] = row["LLVM-MCA_raw"]["lcd"] / (
|
||||
row["pointer_increment"] / row["element_size"]
|
||||
)
|
||||
print(". ", end="", flush=True)
|
||||
else:
|
||||
print("! ", end="", flush=True)
|
||||
|
||||
|
||||
# Analyze with Ithemal, if not running local and configured
|
||||
if ainfo['Ithemal'] is not None and not islocal:
|
||||
if ainfo["Ithemal"] is not None and not islocal:
|
||||
print("Ithemal", end="", flush=True)
|
||||
if not row.get('Ithemal_prediction'):
|
||||
if not row.get("Ithemal_prediction"):
|
||||
with open(marked_asmfile) as f:
|
||||
parsed_code = parse_asm(f.read(), ainfo['isa'])
|
||||
kernel = reduce_to_section(parsed_code, ainfo['isa'])
|
||||
row['Ithemal_prediction'] = get_ithemal_prediction(
|
||||
get_intel_style_code(marked_objfile), model=ainfo['Ithemal'])
|
||||
parsed_code = parse_asm(f.read(), ainfo["isa"])
|
||||
kernel = reduce_to_section(parsed_code, ainfo["isa"])
|
||||
row["Ithemal_prediction"] = get_ithemal_prediction(
|
||||
get_intel_style_code(marked_objfile),
|
||||
model=ainfo["Ithemal"],
|
||||
)
|
||||
print(". ", end="", flush=True)
|
||||
else:
|
||||
print("! ", end="", flush=True)
|
||||
@@ -416,43 +486,45 @@ def build_mark_run_all_kernels(measurements=True, osaca=True, iaca=True, llvm_mc
|
||||
if measurements and islocal:
|
||||
# run measurements if on same hardware
|
||||
print("scale", end="", flush=True)
|
||||
if not row.get('allruns'):
|
||||
if not row.get("allruns"):
|
||||
# find best length with concurrent L2 measurement
|
||||
scaling_runs, best = scalingrun(exec_path)
|
||||
row['best_length'] = best[0]
|
||||
row['best_runtime'] = best[2]
|
||||
row['L2_traffic'] = best[3]
|
||||
row['allruns'] = scaling_runs
|
||||
row["best_length"] = best[0]
|
||||
row["best_runtime"] = best[2]
|
||||
row["L2_traffic"] = best[3]
|
||||
row["allruns"] = scaling_runs
|
||||
print(f"({best[0]}). ", end="", flush=True)
|
||||
else:
|
||||
print(f"({row.get('best_length', None)})! ", end="", flush=True)
|
||||
print(
|
||||
f"({row.get('best_length', None)})! ",
|
||||
end="",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
print()
|
||||
|
||||
# dump to file
|
||||
if data != data_lastsaved:
|
||||
print('saving... ', end="", flush=True)
|
||||
with data_path.open('wb') as f:
|
||||
print("saving... ", end="", flush=True)
|
||||
with data_path.open("wb") as f:
|
||||
try:
|
||||
pickle.dump(data, f)
|
||||
data_lastsaved = deepcopy(data)
|
||||
print('saved!')
|
||||
print("saved!")
|
||||
except KeyboardInterrupt:
|
||||
f.seek(0)
|
||||
pickle.dump(data, f)
|
||||
print('saved!')
|
||||
print("saved!")
|
||||
sys.exit()
|
||||
|
||||
|
||||
|
||||
def scalingrun(kernel_exec, total_iterations=25000000, lengths=range(8, 1*1024+1)):
|
||||
#print('{:>8} {:>10} {:>10}'.format("x", "cy/it", "L2 B/it"))
|
||||
parameters = chain(*[[total_iterations//i, i] for i in lengths])
|
||||
def scalingrun(kernel_exec, total_iterations=25000000, lengths=range(8, 1 * 1024 + 1)):
|
||||
# print('{:>8} {:>10} {:>10}'.format("x", "cy/it", "L2 B/it"))
|
||||
parameters = chain(*[[total_iterations // i, i] for i in lengths])
|
||||
# TODO use arch specific events and grooup
|
||||
r, o = perfctr(chain([kernel_exec], map(str, parameters)),
|
||||
1, group="L2")
|
||||
r, o = perfctr(chain([kernel_exec], map(str, parameters)), 1, group="L2")
|
||||
global_infos = {}
|
||||
for m in [re.match(r"(:?([a-z_\-0-9]+):)?([a-z]+): ([a-z\_\-0-9]+)", l) for l in o]:
|
||||
for m in [re.match(r"(:?([a-z_\-0-9]+):)?([a-z]+): ([a-z\_\-0-9]+)", line) for line in o]:
|
||||
if m is not None:
|
||||
try:
|
||||
v = int(m.group(4))
|
||||
@@ -464,37 +536,45 @@ def scalingrun(kernel_exec, total_iterations=25000000, lengths=range(8, 1*1024+1
|
||||
r[m.group(2)][m.group(3)] = v
|
||||
|
||||
results = []
|
||||
best = (float('inf'), None)
|
||||
best = (float("inf"), None)
|
||||
for markername, mmetrics in r.items():
|
||||
kernelname, repetitions, *_, xlength = markername.split('_')
|
||||
kernelname, repetitions, *_, xlength = markername.split("_")
|
||||
repetitions = int(repetitions)
|
||||
xlength = int(xlength)
|
||||
total_iterations = mmetrics['repetitions'] * mmetrics['iterations']
|
||||
if 'Clock [MHz]' in mmetrics:
|
||||
clock_hz = mmetrics['Clock [MHz]']*1e6
|
||||
total_iterations = mmetrics["repetitions"] * mmetrics["iterations"]
|
||||
if "Clock [MHz]" in mmetrics:
|
||||
clock_hz = mmetrics["Clock [MHz]"] * 1e6
|
||||
else:
|
||||
clock_hz = arch_info[get_current_arch()]['Clock [MHz]']*1e6
|
||||
cyperit = mmetrics['Runtime (RDTSC) [s]'] * clock_hz / total_iterations
|
||||
clock_hz = arch_info[get_current_arch()]["Clock [MHz]"] * 1e6
|
||||
cyperit = mmetrics["Runtime (RDTSC) [s]"] * clock_hz / total_iterations
|
||||
# TODO use arch specific events and grooup
|
||||
if 'L2D load data volume [GBytes]' in mmetrics:
|
||||
l2perit = (mmetrics['L2D load data volume [GBytes]'] +
|
||||
mmetrics.get('L2D evict data volume [GBytes]', 0))*1e9 / total_iterations
|
||||
if "L2D load data volume [GBytes]" in mmetrics:
|
||||
l2perit = (
|
||||
(
|
||||
mmetrics["L2D load data volume [GBytes]"]
|
||||
+ mmetrics.get("L2D evict data volume [GBytes]", 0)
|
||||
)
|
||||
* 1e9
|
||||
/ total_iterations
|
||||
)
|
||||
else:
|
||||
l2perit = \
|
||||
mmetrics[arch_info[get_current_arch()]['L2_volume_metric']]*1e9 / total_iterations
|
||||
results.append(
|
||||
(xlength, repetitions, cyperit, l2perit)
|
||||
)
|
||||
l2perit = (
|
||||
mmetrics[arch_info[get_current_arch()]["L2_volume_metric"]]
|
||||
* 1e9
|
||||
/ total_iterations
|
||||
)
|
||||
results.append((xlength, repetitions, cyperit, l2perit))
|
||||
if cyperit < best[0]:
|
||||
best = cyperit, results[-1]
|
||||
return results, best[1]
|
||||
|
||||
|
||||
def mark(asm_path, compiler, cflags, isa, overwrite=False):
|
||||
# Mark assembly for IACA, OSACA and LLVM-MCA
|
||||
marked_asm_path = Path(asm_path).with_suffix(".marked.s")
|
||||
if not marked_asm_path.exists() or overwrite:
|
||||
overwrite = True
|
||||
with open(asm_path) as fa, open(marked_asm_path, 'w') as fm:
|
||||
with open(asm_path) as fa, open(marked_asm_path, "w") as fm:
|
||||
try:
|
||||
_, pointer_increment = asm_instrumentation(fa, fm, isa=isa)
|
||||
except KeyboardInterrupt:
|
||||
@@ -505,37 +585,46 @@ def mark(asm_path, compiler, cflags, isa, overwrite=False):
|
||||
# use maked assembly and extract asm_block and pointer_increment
|
||||
with open(marked_asm_path) as f:
|
||||
marked_asm = f.read()
|
||||
m = re.search(r'pointer_increment=([0-9]+)', marked_asm)
|
||||
m = re.search(r"pointer_increment=([0-9]+)", marked_asm)
|
||||
if m:
|
||||
pointer_increment = int(m.group(1))
|
||||
else:
|
||||
os.unlink(marked_asm_path)
|
||||
raise ValueError(
|
||||
"Could not find `pointer_increment=<byte increment>`. Plase place into file.")
|
||||
"Could not find `pointer_increment=<byte increment>`. Plase place into file."
|
||||
)
|
||||
print("! ", end="", flush=True)
|
||||
|
||||
# Compile marked assembly to object for IACA
|
||||
marked_obj = Path(asm_path).with_suffix(".marked.o")
|
||||
if not marked_obj.exists():
|
||||
check_call([compiler] + ['-c', str(marked_asm_path), '-o', str(marked_obj)])
|
||||
|
||||
check_call([compiler] + ["-c", str(marked_asm_path), "-o", str(marked_obj)])
|
||||
|
||||
return str(marked_asm_path), str(marked_obj), pointer_increment, overwrite
|
||||
|
||||
|
||||
def build_kernel(kernel, architecture, compiler, cflags, cflags_name, overwrite=False,
|
||||
dontbuild=False):
|
||||
def build_kernel(
|
||||
kernel,
|
||||
architecture,
|
||||
compiler,
|
||||
cflags,
|
||||
cflags_name,
|
||||
overwrite=False,
|
||||
dontbuild=False,
|
||||
):
|
||||
build_path = f"build/{architecture}/{compiler}/{cflags_name}"
|
||||
kernel_assembly = f"{build_path}/{kernel}.s"
|
||||
kernel_object= f"{build_path}/{kernel}.o"
|
||||
kernel_object = f"{build_path}/{kernel}.o"
|
||||
executable = f"{build_path}/{kernel}"
|
||||
Path(build_path).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if not overwrite:
|
||||
# Overwrite if any kernel specific file is missing
|
||||
overwrite = (
|
||||
not os.path.exists(kernel_object) or
|
||||
not os.path.exists(kernel_assembly) or
|
||||
not os.path.exists(executable))
|
||||
not os.path.exists(kernel_object)
|
||||
or not os.path.exists(kernel_assembly)
|
||||
or not os.path.exists(executable)
|
||||
)
|
||||
|
||||
if dontbuild and overwrite:
|
||||
raise ValueError("Must build, but not allowed.")
|
||||
@@ -545,39 +634,43 @@ def build_kernel(kernel, architecture, compiler, cflags, cflags_name, overwrite=
|
||||
|
||||
if not Path(f"{build_path}/compiler_version").exists():
|
||||
# Document compiler version
|
||||
with open(f"{build_path}/compiler_version", 'w') as f:
|
||||
f.write(check_output([compiler, "-v"], encoding='utf8', stderr=STDOUT))
|
||||
with open(f"{build_path}/compiler_version", "w") as f:
|
||||
f.write(check_output([compiler, "-v"], encoding="utf8", stderr=STDOUT))
|
||||
|
||||
if overwrite:
|
||||
# build object + assembly
|
||||
check_call([compiler] +
|
||||
cflags +
|
||||
["-c", f"kernels/{kernel}.c", "-o", kernel_object])
|
||||
check_call([compiler] +
|
||||
cflags +
|
||||
["-c", f"kernels/{kernel}.c", "-S", "-o", kernel_assembly])
|
||||
check_call([compiler] + cflags + ["-c", f"kernels/{kernel}.c", "-o", kernel_object])
|
||||
check_call(
|
||||
[compiler] + cflags + ["-c", f"kernels/{kernel}.c", "-S", "-o", kernel_assembly]
|
||||
)
|
||||
|
||||
# build main and link executable
|
||||
executable_cflags = [
|
||||
os.environ["LIKWID_DEFINES"],
|
||||
os.environ["LIKWID_INC"],
|
||||
os.environ["LIKWID_LIB"]
|
||||
] + ['-Ofast']
|
||||
check_call([compiler] + executable_cflags + [
|
||||
f"{build_path}/dummy.o",
|
||||
kernel_object,
|
||||
"-DMAIN",
|
||||
f"kernels/{kernel}.c",
|
||||
"-llikwid",
|
||||
"-o", executable])
|
||||
os.environ["LIKWID_LIB"],
|
||||
] + ["-Ofast"]
|
||||
check_call(
|
||||
[compiler]
|
||||
+ executable_cflags
|
||||
+ [
|
||||
f"{build_path}/dummy.o",
|
||||
kernel_object,
|
||||
"-DMAIN",
|
||||
f"kernels/{kernel}.c",
|
||||
"-llikwid",
|
||||
"-o",
|
||||
executable,
|
||||
]
|
||||
)
|
||||
print(". ", end="", flush=True)
|
||||
else:
|
||||
print("! ", end="", flush=True)
|
||||
|
||||
|
||||
return kernel_assembly, executable, overwrite
|
||||
|
||||
|
||||
def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0):
|
||||
def perfctr(cmd, cores, group="MEM", code_markers=True, verbose=0):
|
||||
"""
|
||||
Run *cmd* with likwid-perfctr and returns result as dict.
|
||||
|
||||
@@ -586,30 +679,32 @@ def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0):
|
||||
if CLI argument cores > 1, running with multi-core, otherwise single-core
|
||||
"""
|
||||
# Making sure likwid-perfctr is available:
|
||||
if benchmark.find_executable('likwid-perfctr') is None:
|
||||
print("likwid-perfctr was not found. Make sure likwid is installed and found in PATH.",
|
||||
file=sys.stderr)
|
||||
if benchmark.find_executable("likwid-perfctr") is None:
|
||||
print(
|
||||
"likwid-perfctr was not found. Make sure likwid is installed and found in PATH.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
# FIXME currently only single core measurements support!
|
||||
perf_cmd = ['likwid-perfctr', '-f', '-O', '-g', group]
|
||||
perf_cmd = ["likwid-perfctr", "-f", "-O", "-g", group]
|
||||
|
||||
cpu = 'S0:0'
|
||||
cpu = "S0:0"
|
||||
if cores > 1:
|
||||
cpu += '-'+str(cores-1)
|
||||
cpu += "-" + str(cores - 1)
|
||||
|
||||
# Pinned and measured on cpu
|
||||
perf_cmd += ['-C', cpu]
|
||||
perf_cmd += ["-C", cpu]
|
||||
|
||||
# code must be marked using likwid markers
|
||||
perf_cmd.append('-m')
|
||||
perf_cmd.append("-m")
|
||||
|
||||
perf_cmd += cmd
|
||||
if verbose > 1:
|
||||
print(' '.join(perf_cmd))
|
||||
print(" ".join(perf_cmd))
|
||||
try:
|
||||
with benchmark.fix_env_variable('OMP_NUM_THREADS', None):
|
||||
output = check_output(perf_cmd).decode('utf-8').split('\n')
|
||||
with benchmark.fix_env_variable("OMP_NUM_THREADS", None):
|
||||
output = check_output(perf_cmd).decode("utf-8").split("\n")
|
||||
except CalledProcessError as e:
|
||||
print("Executing benchmark failed: {!s}".format(e), file=sys.stderr)
|
||||
sys.exit(1)
|
||||
@@ -626,7 +721,7 @@ def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0):
|
||||
m = re.match(r"TABLE,Region ([a-z\-0-9_]+),", line)
|
||||
if m:
|
||||
cur_region_name = m.group(1)
|
||||
line = line.split(',')
|
||||
line = line.split(",")
|
||||
try:
|
||||
# Metrics
|
||||
cur_region_data[line[0]] = float(line[1])
|
||||
@@ -639,12 +734,13 @@ def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0):
|
||||
continue
|
||||
try:
|
||||
# Event counters
|
||||
if line[2] == '-' or line[2] == 'nan':
|
||||
if line[2] == "-" or line[2] == "nan":
|
||||
counter_value = 0
|
||||
else:
|
||||
counter_value = int(line[2])
|
||||
if re.fullmatch(r'[A-Z0-9_]+', line[0]) and \
|
||||
re.fullmatch(r'[A-Z0-9]+(:[A-Z0-9]+=[0-9A-Fa-fx]+)*', line[1]):
|
||||
if re.fullmatch(r"[A-Z0-9_]+", line[0]) and re.fullmatch(
|
||||
r"[A-Z0-9]+(:[A-Z0-9]+=[0-9A-Fa-fx]+)*", line[1]
|
||||
):
|
||||
cur_region_data.setdefault(line[0], {})
|
||||
cur_region_data[line[0]][line[1]] = counter_value
|
||||
continue
|
||||
@@ -659,49 +755,52 @@ def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0):
|
||||
|
||||
|
||||
def remove_html_tags(text):
|
||||
return re.sub('<.*?>', '', text)
|
||||
return re.sub("<.*?>", "", text)
|
||||
|
||||
|
||||
def get_intel_style_code(marked_objfile):
|
||||
# Disassembl with Intel syntax
|
||||
cmd = ("objdump -d --demangle --no-leading-addr --no-leading-headers --no-show-raw-insn "
|
||||
"--x86-asm-syntax=intel").split(" ") + [marked_objfile]
|
||||
cmd = (
|
||||
"objdump -d --demangle --no-leading-addr --no-leading-headers --no-show-raw-insn "
|
||||
"--x86-asm-syntax=intel"
|
||||
).split(" ") + [marked_objfile]
|
||||
asm_raw = check_output(cmd).decode()
|
||||
asm_raw = '\n'.join([l.strip() for l in asm_raw.split('\n')])
|
||||
asm_raw = "\n".join([line.strip() for line in asm_raw.split("\n")])
|
||||
kernel_raw = asm_raw[
|
||||
asm_raw.index('mov\tebx, 111\nnop')+len('mov\tebx, 111\nnop') :
|
||||
asm_raw.index('mov\tebx, 222\nnop')
|
||||
asm_raw.index("mov\tebx, 111\nnop")
|
||||
+ len("mov\tebx, 111\nnop") : asm_raw.index("mov\tebx, 222\nnop")
|
||||
]
|
||||
kernel_lines = kernel_raw.split('\n')
|
||||
kernel_lines = kernel_raw.split("\n")
|
||||
# Ignore label and jump
|
||||
return '\n'.join(kernel_lines[:-2])
|
||||
return "\n".join(kernel_lines[:-2])
|
||||
|
||||
|
||||
def get_ithemal_prediction(code, model='skl'):
|
||||
def get_ithemal_prediction(code, model="skl"):
|
||||
url = "http://3.18.198.23/predict"
|
||||
assert model in ['skl', 'hsw', 'ivb']
|
||||
r = requests.post(url, {'code': code, 'model': model})
|
||||
assert model in ["skl", "hsw", "ivb"]
|
||||
r = requests.post(url, {"code": code, "model": model})
|
||||
raw_text = remove_html_tags(r.text)
|
||||
m = re.search("Could not generate a prediction: (.*)", raw_text)
|
||||
if m:
|
||||
print(" error:", m.group(1).strip(), end=' ')
|
||||
return float('nan')
|
||||
m = re.search("Prediction: ([0-9\.]+) cycles per iteration", raw_text)
|
||||
print(" error:", m.group(1).strip(), end=" ")
|
||||
return float("nan")
|
||||
m = re.search("Prediction: ([0-9.]+) cycles per iteration", raw_text)
|
||||
if m:
|
||||
return float(m.group(1))
|
||||
else:
|
||||
return float('nan')
|
||||
return float("nan")
|
||||
|
||||
|
||||
def main():
|
||||
# Check for correct LLVM-MCA version
|
||||
try:
|
||||
llvm_mca = 'LLVM version 12.0.0' in check_output(['llvm-mca', '-version']).decode()
|
||||
llvm_mca = "LLVM version 12.0.0" in check_output(["llvm-mca", "-version"]).decode()
|
||||
except FileNotFoundError:
|
||||
llvm_mca = False
|
||||
|
||||
build_mark_run_all_kernels(measurements='--no-measurements' not in sys.argv, llvm_mca=llvm_mca)
|
||||
|
||||
build_mark_run_all_kernels(measurements="--no-measurements" not in sys.argv, llvm_mca=llvm_mca)
|
||||
sys.exit()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
main()
|
||||
|
Reference in New Issue
Block a user