Files
nanoBench/cycleByCycle.py
Andreas Abel faf75236ca ranges
2023-03-12 13:48:05 +01:00

141 lines
8.2 KiB
Python
Executable File

#!/usr/bin/env python3
import argparse
import os
import sys
from kernelNanoBench import *
from tools.CPUID.cpuid import CPUID, micro_arch
def writeHtmlFile(filename, title, head, body, includeDOCTYPE=True):
with open(filename, 'w') as f:
if includeDOCTYPE:
f.write('<!DOCTYPE html>\n')
f.write('<html>\n'
'<head>\n'
'<meta charset="utf-8"/>'
'<title>' + title + '</title>\n'
+ head +
'</head>\n'
'<body>\n'
+ body +
'</body>\n'
'</html>\n')
def main():
parser = argparse.ArgumentParser(description='Cycle-by-Cycle Measurements')
parser.add_argument('-html', help='HTML filename [Default: graph.html]', nargs='?', const='', metavar='filename')
parser.add_argument('-csv', help='CSV filename [Default: stdout]', nargs='?', const='', metavar='filename')
parser.add_argument('-end_to_end', action='store_true', help='Do not try to remove overhead.')
parser.add_argument('-asm', metavar='code', help='Assembler code string (in Intel syntax) to be benchmarked.')
parser.add_argument('-asm_init', metavar='code', help='Assembler code string (in Intel syntax) to be executed once in the beginning.')
parser.add_argument('-asm_late_init', metavar='code', help='Assembler code string (in Intel syntax) to be executed once immediately before the code to be benchmarked.')
parser.add_argument('-asm_one_time_init', metavar='code', help='Assembler code string (in Intel syntax) to be executed once before the first measurement.')
parser.add_argument('-code', metavar='filename', help='Binary file containing the code to be benchmarked.')
parser.add_argument('-code_init', metavar='filename', help='Binary file containing code to be executed once in the beginning.')
parser.add_argument('-code_late_init', metavar='filename', help='Binary file containing code to be executed once immediately before the code to be benchmarked.')
parser.add_argument('-code_one_time_init', metavar='filename', help='Binary file containing code to be executed once before the first measurement.')
parser.add_argument('-cpu', metavar='n', help='Pins the measurement thread to CPU n.')
parser.add_argument('-config', metavar='filename', help='File with performance counter event specifications.', required=True)
parser.add_argument('-unroll_count', metavar='n', help='Number of copies of the benchmark code inside the inner loop.', default=1)
parser.add_argument('-loop_count', metavar='n', help='Number of iterations of the inner loop.')
parser.add_argument('-n_measurements', metavar='n', help='Number of times the measurements are repeated.')
parser.add_argument('-warm_up_count', metavar='n', help='Number of runs before the first measurement gets recorded.')
parser.add_argument('-initial_warm_up_count', metavar='n', help='Number of runs before any measurement is performed.')
parser.add_argument('-alignment_offset', metavar='n', help='Alignment offset.')
parser.add_argument('-avg', action='store_const', const='avg', help='Selects the arithmetic mean (excluding the top and bottom 20%% of the values) as the '
'aggregate function.')
parser.add_argument('-median', action='store_const', const='med', help='Selects the median as the aggregate function.')
parser.add_argument('-min', action='store_const', const='min', help='Selects the minimum as the aggregate function.')
parser.add_argument('-max', action='store_const', const='max', help='Selects the maximum as the aggregate function.')
parser.add_argument('-range', action='store_true', help='Outputs the range of the measured values (i.e., the minimum and the maximum).')
parser.add_argument('-no_mem', action='store_true', help='The code for reading the perf. ctrs. does not make memory accesses.')
parser.add_argument('-remove_empty_events', action='store_true', help='Removes events from the output that did not occur.')
parser.add_argument('-verbose', action='store_true', help='Outputs the results of all performance counter readings.')
args = parser.parse_args()
uArch = micro_arch(CPUID())
detP23 = (uArch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'CLX', 'KBL', 'CFL', 'CNL'])
setNanoBenchParameters(basicMode=True, drainFrontend=True)
setNanoBenchParameters(config=readFile(args.config),
unrollCount=args.unroll_count,
loopCount=args.loop_count,
nMeasurements=args.n_measurements,
warmUpCount=args.warm_up_count,
initialWarmUpCount=args.initial_warm_up_count,
alignmentOffset=args.alignment_offset,
aggregateFunction=(args.avg or args.median or args.min or args.max or 'med'),
range=args.range,
noMem=args.no_mem,
verbose=args.verbose,
endToEnd=args.end_to_end)
nbDict = runNanoBenchCycleByCycle(code=args.asm, codeBinFile=args.code,
init=args.asm_init, initBinFile=args.code_init,
lateInit=args.asm_late_init, lateInitBinFile=args.code_late_init,
oneTimeInit=args.asm_one_time_init, oneTimeInitBinFile=args.code_one_time_init,
cpu=args.cpu, detP23=detP23)
if nbDict is None:
print('Error: nanoBench did not return a valid result.', file=sys.stderr)
if not args.end_to_end:
print('Try using the -end_to_end option.', file=sys.stderr)
exit(1)
if (uArch in ['TGL', 'RKL']) and (not args.end_to_end):
# on TGL and RKL, the wrmsr instruction sometimes appears to need an extra cycle
print('Note: If the results look incorrect, try using the -end_to_end option.', file=sys.stderr)
if args.remove_empty_events:
for k in list(nbDict.keys()):
if max(nbDict[k][0]) == 0:
del nbDict[k]
if args.csv is not None:
if args.range:
csvString = '\n'.join(k + ',' + ','.join(map(str, sum(zip(v, vMin, vMax), ()))) for k, (v, vMin, vMax) in nbDict.items())
else:
csvString = '\n'.join(k + ',' + ','.join(map(str, v)) for k, (v, _, _) in nbDict.items())
if args.csv:
with open(args.csv, 'w') as f:
f.write(csvString + '\n')
os.chown(args.csv, int(os.environ['SUDO_UID']), int(os.environ['SUDO_GID']))
else:
print(csvString)
if (args.html is not None) or (args.csv is None):
from plotly.offline import plot
import plotly.graph_objects as go
fig = go.Figure()
fig.update_xaxes(title_text='Cycle')
for name, (values, minValues, maxValues) in nbDict.items():
e = None
if args.range:
array = [(m-v) for (v, m) in zip(values, maxValues)]
arrayminus = [(v-m) for (v, m) in zip(values, minValues)]
e = dict(type='data', symmetric=False, array=array, arrayminus=arrayminus)
fig.add_trace(go.Scatter(y=values, error_y=e, mode='lines+markers', line_shape='linear', name=name, marker_size=5, hoverlabel = dict(namelength = -1)))
config = {'displayModeBar': True,
'modeBarButtonsToRemove': ['autoScale2d', 'select2d', 'lasso2d'],
'modeBarButtonsToAdd': ['toggleSpikelines', 'hoverclosest', 'hovercompare',
{'name': 'Toggle interpolation mode', 'icon': 'iconJS', 'click': 'interpolationJS'}]}
body = plot(fig, include_plotlyjs='cdn', output_type='div', config=config)
body = body.replace('"iconJS"', 'Plotly.Icons.drawline')
body = body.replace('"interpolationJS"', 'function (gd) {Plotly.restyle(gd, "line.shape", gd.data[0].line.shape == "hv" ? "linear" : "hv")}')
cmdLine = ' '.join(('"'+p+'"' if ((' ' in p) or (';' in p)) else p) for p in sys.argv)
body += '<p><code>sudo ' + cmdLine + '</code></p>'
htmlFilename = args.html or 'graph.html'
writeHtmlFile(htmlFilename, 'Graph', '', body, includeDOCTYPE=False) # if DOCTYPE is included, scaling doesn't work properly
print('Output written to ' + htmlFilename)
if __name__ == "__main__":
main()