mirror of
https://github.com/andreas-abel/nanoBench.git
synced 2025-07-21 07:01:04 +02:00
cpuBench support for EMR, MTL, and ZEN5
This commit is contained in:
@@ -10,13 +10,13 @@ A8.01 LSD.UOPS
|
|||||||
AE.01 UOPS_ISSUED
|
AE.01 UOPS_ISSUED
|
||||||
B1.01 UOPS_EXECUTED
|
B1.01 UOPS_EXECUTED
|
||||||
C2.02 UOPS_RETIRED.SLOTS
|
C2.02 UOPS_RETIRED.SLOTS
|
||||||
B2.01 UOPS_DISPATCHED_PORT.PORT_0
|
B2.01 UOPS_DISPATCHED.PORT_0
|
||||||
B2.02 UOPS_DISPATCHED_PORT.PORT_1
|
B2.02 UOPS_DISPATCHED.PORT_1
|
||||||
B2.04 UOPS_DISPATCHED_PORT.PORT_2_3_10
|
B2.04 UOPS_DISPATCHED.PORT_2_3_10
|
||||||
B2.10 UOPS_DISPATCHED_PORT.PORT_4_9
|
B2.10 UOPS_DISPATCHED.PORT_4_9
|
||||||
B2.20 UOPS_DISPATCHED_PORT.PORT_5_11
|
B2.20 UOPS_DISPATCHED.PORT_5_11
|
||||||
B2.40 UOPS_DISPATCHED_PORT.PORT_6
|
B2.40 UOPS_DISPATCHED.PORT_6
|
||||||
B2.80 UOPS_DISPATCHED_PORT.PORT_7_8
|
B2.80 UOPS_DISPATCHED.PORT_7_8
|
||||||
C4.00 BR_INST_RETIRED.ALL_BRANCHES
|
C4.00 BR_INST_RETIRED.ALL_BRANCHES
|
||||||
C5.00 BR_MISP_RETIRED.ALL_BRANCHES
|
C5.00 BR_MISP_RETIRED.ALL_BRANCHES
|
||||||
D1.01 MEM_LOAD_RETIRED.L1_HIT
|
D1.01 MEM_LOAD_RETIRED.L1_HIT
|
||||||
|
@@ -10,13 +10,13 @@ A8.01 LSD.UOPS
|
|||||||
AE.01 UOPS_ISSUED
|
AE.01 UOPS_ISSUED
|
||||||
B1.01 UOPS_EXECUTED
|
B1.01 UOPS_EXECUTED
|
||||||
C2.02 UOPS_RETIRED.SLOTS
|
C2.02 UOPS_RETIRED.SLOTS
|
||||||
B2.01 UOPS_DISPATCHED_PORT.PORT_0
|
B2.01 UOPS_DISPATCHED.PORT_0
|
||||||
B2.02 UOPS_DISPATCHED_PORT.PORT_1
|
B2.02 UOPS_DISPATCHED.PORT_1
|
||||||
B2.04 UOPS_DISPATCHED_PORT.PORT_2_3_10
|
B2.04 UOPS_DISPATCHED.PORT_2_3_10
|
||||||
B2.10 UOPS_DISPATCHED_PORT.PORT_4_9
|
B2.10 UOPS_DISPATCHED.PORT_4_9
|
||||||
B2.20 UOPS_DISPATCHED_PORT.PORT_5_11
|
B2.20 UOPS_DISPATCHED.PORT_5_11
|
||||||
B2.40 UOPS_DISPATCHED_PORT.PORT_6
|
B2.40 UOPS_DISPATCHED.PORT_6
|
||||||
B2.80 UOPS_DISPATCHED_PORT.PORT_7_8
|
B2.80 UOPS_DISPATCHED.PORT_7_8
|
||||||
C4.00 BR_INST_RETIRED.ALL_BRANCHES
|
C4.00 BR_INST_RETIRED.ALL_BRANCHES
|
||||||
C5.00 BR_MISP_RETIRED.ALL_BRANCHES
|
C5.00 BR_MISP_RETIRED.ALL_BRANCHES
|
||||||
D1.01 MEM_LOAD_RETIRED.L1_HIT
|
D1.01 MEM_LOAD_RETIRED.L1_HIT
|
||||||
|
@@ -57,7 +57,7 @@ serializingInstructions = {'INVD', 'INVEPT', 'INVLPG', 'INVVPID', 'LGDT', 'LIDT'
|
|||||||
'CPUID', 'IRET', 'RSM', 'SFENCE', 'LFENCE', 'MFENCE'}
|
'CPUID', 'IRET', 'RSM', 'SFENCE', 'LFENCE', 'MFENCE'}
|
||||||
|
|
||||||
def isAMDCPU():
|
def isAMDCPU():
|
||||||
return arch in ['ZEN+', 'ZEN2', 'ZEN3', 'ZEN4']
|
return arch in ['ZEN+', 'ZEN2', 'ZEN3', 'ZEN4', 'ZEN5']
|
||||||
|
|
||||||
def isIntelCPU():
|
def isIntelCPU():
|
||||||
return not isAMDCPU()
|
return not isAMDCPU()
|
||||||
@@ -84,7 +84,7 @@ def getIndexReg(instrNode, opNode):
|
|||||||
# registers that are not used as implicit registers should come first; RAX (and parts of it) should come last, as some instructions have special encodings for that
|
# registers that are not used as implicit registers should come first; RAX (and parts of it) should come last, as some instructions have special encodings for that
|
||||||
# prefer low registers to high registers
|
# prefer low registers to high registers
|
||||||
def sortRegs(regsList):
|
def sortRegs(regsList):
|
||||||
return sorted(regsList, key=lambda r: (not any(i.isdigit() for i in r), 'P' in r, 'I' in r, 'H' in r, 'A' in r, list(map(int, re.findall('\d+',r))), r))
|
return sorted(regsList, key=lambda r: (not any(i.isdigit() for i in r), 'P' in r, 'I' in r, 'H' in r, 'A' in r, list(map(int, re.findall(r'\d+',r))), r))
|
||||||
|
|
||||||
|
|
||||||
# Initialize registers and memory
|
# Initialize registers and memory
|
||||||
@@ -114,7 +114,7 @@ def getRegMemInit(instrNode, opRegDict, memOffset, useIndexedAddr):
|
|||||||
|
|
||||||
if opNode.attrib['type'] == 'reg':
|
if opNode.attrib['type'] == 'reg':
|
||||||
reg = opRegDict[opIdx]
|
reg = opRegDict[opIdx]
|
||||||
regPrefix = re.sub('\d', '', reg)
|
regPrefix = re.sub(r'\d', '', reg)
|
||||||
|
|
||||||
if reg in High8Regs:
|
if reg in High8Regs:
|
||||||
init += ['MOV {}, 0'.format(reg)]
|
init += ['MOV {}, 0'.format(reg)]
|
||||||
@@ -222,9 +222,9 @@ def runExperiment(instrNode, instrCode, init=None, unrollCount=500, loopCount=0,
|
|||||||
if evt == 'UOPS':
|
if evt == 'UOPS':
|
||||||
if arch in ['CON', 'WOL']: evt = 'RS_UOPS_DISPATCHED'
|
if arch in ['CON', 'WOL']: evt = 'RS_UOPS_DISPATCHED'
|
||||||
elif arch in ['NHM', 'WSM', 'BNL', 'GLM', 'GLP']: evt = 'UOPS_RETIRED.ANY'
|
elif arch in ['NHM', 'WSM', 'BNL', 'GLM', 'GLP']: evt = 'UOPS_RETIRED.ANY'
|
||||||
elif arch in ['SNB', 'SLM', 'AMT', 'ADL-E']: evt = 'UOPS_RETIRED.ALL'
|
elif arch in ['SNB', 'SLM', 'AMT', 'ADL-E', 'MTL-E']: evt = 'UOPS_RETIRED.ALL'
|
||||||
elif arch in ['HSW']: evt = 'UOPS_EXECUTED.CORE'
|
elif arch in ['HSW']: evt = 'UOPS_EXECUTED.CORE'
|
||||||
elif arch in ['IVB', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL', 'ADL-P']: evt = 'UOPS_EXECUTED.THREAD'
|
elif arch in ['IVB', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL', 'ADL-P', 'EMR', 'MTL-P']: evt = 'UOPS_EXECUTED.THREAD'
|
||||||
elif arch in ['TRM']: evt = 'TOPDOWN_RETIRING.ALL'
|
elif arch in ['TRM']: evt = 'TOPDOWN_RETIRING.ALL'
|
||||||
localHtmlReports.append('<li>' + evt + ': ' + str(value) + '</li>\n')
|
localHtmlReports.append('<li>' + evt + ': ' + str(value) + '</li>\n')
|
||||||
localHtmlReports.append('</ul>\n</li>')
|
localHtmlReports.append('</ul>\n</li>')
|
||||||
@@ -274,34 +274,34 @@ def getEventConfig(event):
|
|||||||
if arch in ['CON', 'WOL']: return 'A0.00' # RS_UOPS_DISPATCHED
|
if arch in ['CON', 'WOL']: return 'A0.00' # RS_UOPS_DISPATCHED
|
||||||
if arch in ['NHM', 'WSM', 'SNB' ]: return 'C2.01' # UOPS_RETIRED.ANY
|
if arch in ['NHM', 'WSM', 'SNB' ]: return 'C2.01' # UOPS_RETIRED.ANY
|
||||||
if arch in ['SNB']: return 'C2.01' # UOPS_RETIRED.ALL
|
if arch in ['SNB']: return 'C2.01' # UOPS_RETIRED.ALL
|
||||||
if arch in ['GLM', 'GLP', 'ADL-E']: return 'C2.00' # UOPS_RETIRED.ALL
|
if arch in ['GLM', 'GLP', 'ADL-E', 'MTL-E']: return 'C2.00' # UOPS_RETIRED.ALL
|
||||||
if arch in ['TRM']: return 'C2.00' # TOPDOWN_RETIRING.ALL
|
if arch in ['TRM']: return 'C2.00' # TOPDOWN_RETIRING.ALL
|
||||||
if arch in ['BNL', 'SLM', 'AMT']: return 'C2.10' # UOPS_RETIRED.ANY
|
if arch in ['BNL', 'SLM', 'AMT']: return 'C2.10' # UOPS_RETIRED.ANY
|
||||||
if arch in ['HSW']: return 'B1.02' # UOPS_EXECUTED.CORE; note: may undercount due to erratum HSD30
|
if arch in ['HSW']: return 'B1.02' # UOPS_EXECUTED.CORE; note: may undercount due to erratum HSD30
|
||||||
if arch in ['IVB', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL', 'ADL-P']: return 'B1.01' # UOPS_EXECUTED.THREAD
|
if arch in ['IVB', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL', 'ADL-P', 'EMR', 'MTL-P']: return 'B1.01' # UOPS_EXECUTED.THREAD
|
||||||
if arch in ['ZEN+', 'ZEN2', 'ZEN3', 'ZEN4']: return '0C1.00'
|
if arch in ['ZEN+', 'ZEN2', 'ZEN3', 'ZEN4', 'ZEN5']: return '0C1.00'
|
||||||
if event == 'RETIRE_SLOTS':
|
if event == 'RETIRE_SLOTS':
|
||||||
if arch in ['NHM', 'WSM', 'SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL', 'ADL-P']: return 'C2.02'
|
if arch in ['NHM', 'WSM', 'SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL', 'ADL-P', 'EMR', 'MTL-P']: return 'C2.02'
|
||||||
if event == 'UOPS_MITE':
|
if event == 'UOPS_MITE':
|
||||||
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL', 'ADL-P']: return '79.04'
|
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL', 'ADL-P', 'EMR', 'MTL-P']: return '79.04'
|
||||||
if event == 'UOPS_MITE>=1':
|
if event == 'UOPS_MITE>=1':
|
||||||
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL', 'ADL-P']: return '79.04.CMSK=1'
|
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL', 'ADL-P', 'EMR', 'MTL-P']: return '79.04.CMSK=1'
|
||||||
if event == 'UOPS_MS':
|
if event == 'UOPS_MS':
|
||||||
if arch in ['NHM', 'WSM']: return 'D1.02'
|
if arch in ['NHM', 'WSM']: return 'D1.02'
|
||||||
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL']: return '79.30'
|
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL']: return '79.30'
|
||||||
if arch in ['ADL-P']: return '79.20'
|
if arch in ['ADL-P', 'EMR', 'MTL-P']: return '79.20'
|
||||||
if arch in ['SLM', 'AMT', 'GLM', 'GLP', 'TRM', 'ADL-E']: return 'C2.01'
|
if arch in ['SLM', 'AMT', 'GLM', 'GLP', 'TRM', 'ADL-E', 'MTL-E']: return 'C2.01'
|
||||||
if arch in ['BNL']: return 'A9.01' # undocumented, but seems to work
|
if arch in ['BNL']: return 'A9.01' # undocumented, but seems to work
|
||||||
if event == 'UOPS_PORT_0':
|
if event == 'UOPS_PORT_0':
|
||||||
if arch in ['CON', 'WOL']: return 'A1.01.CTR=0'
|
if arch in ['CON', 'WOL']: return 'A1.01.CTR=0'
|
||||||
if arch in ['NHM', 'WSM']: return 'B1.01'
|
if arch in ['NHM', 'WSM']: return 'B1.01'
|
||||||
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL']: return 'A1.01'
|
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL']: return 'A1.01'
|
||||||
if arch in ['ADL-P']: return 'B2.01'
|
if arch in ['ADL-P', 'EMR', 'MTL-P']: return 'B2.01'
|
||||||
if event == 'UOPS_PORT_1':
|
if event == 'UOPS_PORT_1':
|
||||||
if arch in ['CON', 'WOL']: return 'A1.02.CTR=0'
|
if arch in ['CON', 'WOL']: return 'A1.02.CTR=0'
|
||||||
if arch in ['NHM', 'WSM']: return 'B1.02'
|
if arch in ['NHM', 'WSM']: return 'B1.02'
|
||||||
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL']: return 'A1.02'
|
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL']: return 'A1.02'
|
||||||
if arch in ['ADL-P']: return 'B2.02'
|
if arch in ['ADL-P', 'EMR', 'MTL-P']: return 'B2.02'
|
||||||
if event == 'UOPS_PORT_2':
|
if event == 'UOPS_PORT_2':
|
||||||
if arch in ['CON', 'WOL']: return 'A1.04.CTR=0'
|
if arch in ['CON', 'WOL']: return 'A1.04.CTR=0'
|
||||||
if arch in ['NHM', 'WSM']: return 'B1.04'
|
if arch in ['NHM', 'WSM']: return 'B1.04'
|
||||||
@@ -324,45 +324,45 @@ def getEventConfig(event):
|
|||||||
if arch in ['HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL']: return 'A1.20'
|
if arch in ['HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL']: return 'A1.20'
|
||||||
if event == 'UOPS_PORT_6':
|
if event == 'UOPS_PORT_6':
|
||||||
if arch in ['HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL']: return 'A1.40'
|
if arch in ['HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL']: return 'A1.40'
|
||||||
if arch in ['ADL-P']: return 'B2.40'
|
if arch in ['ADL-P', 'EMR', 'MTL-P']: return 'B2.40'
|
||||||
if event == 'UOPS_PORT_7':
|
if event == 'UOPS_PORT_7':
|
||||||
if arch in ['HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'CLX']: return 'A1.80'
|
if arch in ['HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'CLX']: return 'A1.80'
|
||||||
if event == 'UOPS_PORT_23':
|
if event == 'UOPS_PORT_23':
|
||||||
if arch in ['ICL', 'TGL', 'RKL']: return 'A1.04'
|
if arch in ['ICL', 'TGL', 'RKL']: return 'A1.04'
|
||||||
if event == 'UOPS_PORT_49':
|
if event == 'UOPS_PORT_49':
|
||||||
if arch in ['ICL', 'TGL', 'RKL']: return 'A1.10'
|
if arch in ['ICL', 'TGL', 'RKL']: return 'A1.10'
|
||||||
if arch in ['ADL-P']: return 'B2.10'
|
if arch in ['ADL-P', 'EMR', 'MTL-P']: return 'B2.10'
|
||||||
if event == 'UOPS_PORT_78':
|
if event == 'UOPS_PORT_78':
|
||||||
if arch in ['ICL', 'TGL', 'RKL']: return 'A1.80'
|
if arch in ['ICL', 'TGL', 'RKL']: return 'A1.80'
|
||||||
if arch in ['ADL-P']: return 'B2.80'
|
if arch in ['ADL-P', 'EMR', 'MTL-P']: return 'B2.80'
|
||||||
if event == 'UOPS_PORT_5B':
|
if event == 'UOPS_PORT_5B':
|
||||||
if arch in ['ADL-P']: return 'B2.20'
|
if arch in ['ADL-P', 'EMR', 'MTL-P']: return 'B2.20'
|
||||||
if event == 'UOPS_PORT_5B>=2':
|
if event == 'UOPS_PORT_5B>=2':
|
||||||
if arch in ['ADL-P']: return 'B2.20.CMSK=2'
|
if arch in ['ADL-P', 'EMR', 'MTL-P']: return 'B2.20.CMSK=2'
|
||||||
if event == 'UOPS_PORT_23A':
|
if event == 'UOPS_PORT_23A':
|
||||||
if arch in ['ADL-P']: return 'B2.04'
|
if arch in ['ADL-P', 'EMR', 'MTL-P']: return 'B2.04'
|
||||||
if event == 'DIV_CYCLES':
|
if event == 'DIV_CYCLES':
|
||||||
if arch in ['NHM', 'WSM', 'SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'CLX']: return '14.01' # undocumented on HSW, but seems to work
|
if arch in ['NHM', 'WSM', 'SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'CLX']: return '14.01' # undocumented on HSW, but seems to work
|
||||||
if arch in ['ICL', 'TGL', 'RKL']: return '14.09'
|
if arch in ['ICL', 'TGL', 'RKL']: return '14.09'
|
||||||
if arch in ['ZEN+', 'ZEN2', 'ZEN3', 'ZEN4']: return '0D3.00'
|
if arch in ['ZEN+', 'ZEN2', 'ZEN3', 'ZEN4', 'ZEN5']: return '0D3.00'
|
||||||
if arch in ['ADL-P']: return 'B0.09.CMSK=1'
|
if arch in ['ADL-P', 'EMR', 'MTL-P']: return 'B0.09.CMSK=1'
|
||||||
if event == 'ILD_STALL.LCP':
|
if event == 'ILD_STALL.LCP':
|
||||||
if arch in ['NHM', 'WSM', 'SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL', 'ADL-P']: return '87.01'
|
if arch in ['NHM', 'WSM', 'SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL', 'ADL-P', 'EMR', 'MTL-P']: return '87.01'
|
||||||
if event == 'INST_DECODED.DEC0':
|
if event == 'INST_DECODED.DEC0':
|
||||||
if arch in ['NHM', 'WSM']: return '18.01'
|
if arch in ['NHM', 'WSM']: return '18.01'
|
||||||
if event == 'FpuPipeAssignment.Total0':
|
if event == 'FpuPipeAssignment.Total0':
|
||||||
if arch in ['ZEN+', 'ZEN2', 'ZEN3', 'ZEN4']: return '000.01'
|
if arch in ['ZEN+', 'ZEN2', 'ZEN3', 'ZEN4', 'ZEN5']: return '000.01'
|
||||||
if event == 'FpuPipeAssignment.Total1':
|
if event == 'FpuPipeAssignment.Total1':
|
||||||
if arch in ['ZEN+', 'ZEN2', 'ZEN3', 'ZEN4']: return '000.02'
|
if arch in ['ZEN+', 'ZEN2', 'ZEN3', 'ZEN4', 'ZEN5']: return '000.02'
|
||||||
if event == 'FpuPipeAssignment.Total2':
|
if event == 'FpuPipeAssignment.Total2':
|
||||||
if arch in ['ZEN+', 'ZEN2', 'ZEN3', 'ZEN4']: return '000.04'
|
if arch in ['ZEN+', 'ZEN2', 'ZEN3', 'ZEN4', 'ZEN5']: return '000.04'
|
||||||
if event == 'FpuPipeAssignment.Total3':
|
if event == 'FpuPipeAssignment.Total3':
|
||||||
if arch in ['ZEN+', 'ZEN2', 'ZEN3', 'ZEN4']: return '000.08'
|
if arch in ['ZEN+', 'ZEN2', 'ZEN3', 'ZEN4', 'ZEN5']: return '000.08'
|
||||||
# the following two counters are undocumented so far, but seem to work
|
# the following two counters are undocumented so far, but seem to work
|
||||||
if event == 'FpuPipeAssignment.Total4':
|
if event == 'FpuPipeAssignment.Total4':
|
||||||
if arch in ['ZEN3', 'ZEN4']: return '000.10'
|
if arch in ['ZEN3', 'ZEN4', 'ZEN5']: return '000.10'
|
||||||
if event == 'FpuPipeAssignment.Total5':
|
if event == 'FpuPipeAssignment.Total5':
|
||||||
if arch in ['ZEN3', 'ZEN4']: return '000.20'
|
if arch in ['ZEN3', 'ZEN4', 'ZEN5']: return '000.20'
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
@@ -575,7 +575,7 @@ def getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockInstrNode, blockInstr
|
|||||||
print('IACA error')
|
print('IACA error')
|
||||||
return None
|
return None
|
||||||
|
|
||||||
allPortsLine = re.search('\| Cycles \|.*', iacaOut).group(0)
|
allPortsLine = re.search(r'\| Cycles \|.*', iacaOut).group(0)
|
||||||
instrPortsLine = iacaOut.split('\n')[-3]
|
instrPortsLine = iacaOut.split('\n')[-3]
|
||||||
|
|
||||||
allUopsOnBlockedPorts = 0.0
|
allUopsOnBlockedPorts = 0.0
|
||||||
@@ -603,14 +603,14 @@ def getUopsOnBlockedPorts(instrNode, useDistinctRegs, blockInstrNode, blockInstr
|
|||||||
else:
|
else:
|
||||||
events = ['UOPS_PORT_'+str(p) for p in blockedPorts]
|
events = ['UOPS_PORT_'+str(p) for p in blockedPorts]
|
||||||
|
|
||||||
if (arch in ['ADL-P']) and ('5' in blockedPorts): # note that any port combination that contains B also contains 5
|
if (arch in ['ADL-P', 'EMR', 'MTL-P']) and ('5' in blockedPorts): # note that any port combination that contains B also contains 5
|
||||||
events += ['UOPS_PORT_5B']
|
events += ['UOPS_PORT_5B']
|
||||||
if 'B' not in blockedPorts:
|
if 'B' not in blockedPorts:
|
||||||
events += ['UOPS_PORT_5B>=2']
|
events += ['UOPS_PORT_5B>=2']
|
||||||
else:
|
else:
|
||||||
if arch in ['ZEN+', 'ZEN2']:
|
if arch in ['ZEN+', 'ZEN2']:
|
||||||
events = ['FpuPipeAssignment.Total'+str(p) for p in range(0,4)]
|
events = ['FpuPipeAssignment.Total'+str(p) for p in range(0,4)]
|
||||||
elif arch in ['ZEN3', 'ZEN4']:
|
elif arch in ['ZEN3', 'ZEN4', 'ZEN5']:
|
||||||
events = ['FpuPipeAssignment.Total'+str(p) for p in range(0,6)]
|
events = ['FpuPipeAssignment.Total'+str(p) for p in range(0,6)]
|
||||||
|
|
||||||
configurePFCs(events)
|
configurePFCs(events)
|
||||||
@@ -1120,7 +1120,7 @@ def getThroughputAndUops(instrNode, useDistinctRegs, useIndexedAddr, htmlReports
|
|||||||
ports_line = iaca_out.split('\n')[-3]
|
ports_line = iaca_out.split('\n')[-3]
|
||||||
fused_uops = '^' in ports_line.split()[1]
|
fused_uops = '^' in ports_line.split()[1]
|
||||||
|
|
||||||
num_ports = re.search('\| Port \|.*', iaca_out).group(0).count('|')-2
|
num_ports = re.search(r'\| Port \|.*', iaca_out).group(0).count('|')-2
|
||||||
|
|
||||||
for p in range(0, num_ports):
|
for p in range(0, num_ports):
|
||||||
portCol = ports_line.split('|')[p+2].split()
|
portCol = ports_line.split('|')[p+2].split()
|
||||||
@@ -1391,7 +1391,7 @@ def getBasicLatencies(instrNodeList):
|
|||||||
result = runExperiment(instrNodeDict[instr + ' (XMM, XMM, I8)'], instr + ' XMM1, XMM1, 0')
|
result = runExperiment(instrNodeDict[instr + ' (XMM, XMM, I8)'], instr + ' XMM1, XMM1, 0')
|
||||||
basicLatency[instr] = int(result['Core cycles'] + .2)
|
basicLatency[instr] = int(result['Core cycles'] + .2)
|
||||||
|
|
||||||
if any(x for x in instrNodeList if x.findall('[@iclass="VANDPS"]')):
|
if any(x.findall('[@iclass="VANDPS"]') for x in instrNodeList):
|
||||||
for instr in ['VANDPS', 'VANDPD', 'VORPS', 'VORPD', 'VPAND', 'VPOR']:
|
for instr in ['VANDPS', 'VANDPD', 'VORPS', 'VORPD', 'VPAND', 'VPOR']:
|
||||||
result = runExperiment(instrNodeDict[instr + ' (XMM, XMM, XMM)'], instr + ' XMM1, XMM1, XMM1')
|
result = runExperiment(instrNodeDict[instr + ' (XMM, XMM, XMM)'], instr + ' XMM1, XMM1, XMM1')
|
||||||
basicLatency[instr] = int(result['Core cycles'] + .2)
|
basicLatency[instr] = int(result['Core cycles'] + .2)
|
||||||
@@ -1404,8 +1404,8 @@ def getBasicLatencies(instrNodeList):
|
|||||||
result = runExperiment(instrNodeDict[instr + ' (XMM, XMM, I8)'], instr + ' XMM1, XMM1, 0')
|
result = runExperiment(instrNodeDict[instr + ' (XMM, XMM, I8)'], instr + ' XMM1, XMM1, 0')
|
||||||
basicLatency[instr] = int(result['Core cycles'] + .2)
|
basicLatency[instr] = int(result['Core cycles'] + .2)
|
||||||
|
|
||||||
if any(x for x in instrNodeList if x.findall('[@extension="AVX512EVEX"]')):
|
if any(x.findall('[@extension="AVX512EVEX"]') for x in instrNodeList):
|
||||||
kmovq_result = runExperiment(instrNodeDict['KMOVQ (K, K)'], 'KMOVQ K1, K1')
|
kmovq_result = runExperiment(instrNodeDict['KMOVQ_VEX (K, K)'], 'KMOVQ K1, K1')
|
||||||
basicLatency['KMOVQ'] = int(kmovq_result['Core cycles'] + .2)
|
basicLatency['KMOVQ'] = int(kmovq_result['Core cycles'] + .2)
|
||||||
|
|
||||||
vpandd_result = runExperiment(instrNodeDict['VPANDD (ZMM, ZMM, ZMM)'], 'VPANDD ZMM0, ZMM0, ZMM0')
|
vpandd_result = runExperiment(instrNodeDict['VPANDD (ZMM, ZMM, ZMM)'], 'VPANDD ZMM0, ZMM0, ZMM0')
|
||||||
@@ -1449,7 +1449,7 @@ def getDependencyBreakingInstrs(instrNode, opRegDict, ignoreOperand = None):
|
|||||||
reg = opRegDict[opI]
|
reg = opRegDict[opI]
|
||||||
elif opNode.attrib.get('suppressed', '0') == '1':
|
elif opNode.attrib.get('suppressed', '0') == '1':
|
||||||
reg = opNode.text
|
reg = opNode.text
|
||||||
regPrefix = re.sub('\d', '', reg)
|
regPrefix = re.sub(r'\d', '', reg)
|
||||||
if reg in GPRegs:
|
if reg in GPRegs:
|
||||||
if reg not in globalDoNotWriteRegs|memRegs:
|
if reg not in globalDoNotWriteRegs|memRegs:
|
||||||
depBreakingInstrs[opNode] = 'MOV ' + reg + ', 0' # don't use XOR as this would also break flag dependencies
|
depBreakingInstrs[opNode] = 'MOV ' + reg + ', 0' # don't use XOR as this would also break flag dependencies
|
||||||
@@ -1943,7 +1943,7 @@ def getChainInstrForVectorRegs(instrNode, startReg, targetReg, cRep, cType):
|
|||||||
if cType == 'FP':
|
if cType == 'FP':
|
||||||
# We use (V)SHUFPD instead of (V)MOV*PD because the latter is a 0-latency operation on some CPUs in some cases
|
# We use (V)SHUFPD instead of (V)MOV*PD because the latter is a 0-latency operation on some CPUs in some cases
|
||||||
if isAVXInstr(instrNode):
|
if isAVXInstr(instrNode):
|
||||||
if arch in ['ZEN+', 'ZEN2', 'ZEN3', 'ZEN4']:
|
if arch in ['ZEN+', 'ZEN2', 'ZEN3', 'ZEN4', 'ZEN5']:
|
||||||
# on ZEN, all shuffles are integer operations
|
# on ZEN, all shuffles are integer operations
|
||||||
chainInstrFP = 'VANDPD {0}, {1}, {1};'.format(targetReg, startReg)
|
chainInstrFP = 'VANDPD {0}, {1}, {1};'.format(targetReg, startReg)
|
||||||
chainInstrFP += 'VANDPD {0}, {0}, {0};'.format(targetReg) * cRep
|
chainInstrFP += 'VANDPD {0}, {0}, {0};'.format(targetReg) * cRep
|
||||||
@@ -1953,7 +1953,7 @@ def getChainInstrForVectorRegs(instrNode, startReg, targetReg, cRep, cType):
|
|||||||
chainInstrFP += 'VSHUFPD {0}, {0}, {0}, 0;'.format(targetReg) * cRep
|
chainInstrFP += 'VSHUFPD {0}, {0}, {0}, 0;'.format(targetReg) * cRep
|
||||||
chainLatencyFP = basicLatency['VSHUFPD'] * (cRep+1)
|
chainLatencyFP = basicLatency['VSHUFPD'] * (cRep+1)
|
||||||
else:
|
else:
|
||||||
if arch in ['ZEN+', 'ZEN2', 'ZEN3', 'ZEN4']:
|
if arch in ['ZEN+', 'ZEN2', 'ZEN3', 'ZEN4', 'ZEN5']:
|
||||||
# on ZEN, all shuffles are integer operations
|
# on ZEN, all shuffles are integer operations
|
||||||
chainInstrFP = 'VANDPD {0}, {1}, {1};'.format(targetReg, startReg)
|
chainInstrFP = 'VANDPD {0}, {1}, {1};'.format(targetReg, startReg)
|
||||||
chainInstrFP += 'VANDPD {0}, {0}, {0};'.format(targetReg) * cRep
|
chainInstrFP += 'VANDPD {0}, {0}, {0};'.format(targetReg) * cRep
|
||||||
@@ -1968,7 +1968,7 @@ def getChainInstrForVectorRegs(instrNode, startReg, targetReg, cRep, cType):
|
|||||||
if isAVXInstr(instrNode):
|
if isAVXInstr(instrNode):
|
||||||
instr = 'VPANDD' if ('ZMM' in targetReg) else 'VPAND'
|
instr = 'VPANDD' if ('ZMM' in targetReg) else 'VPAND'
|
||||||
chainInstrInt = '{0} {1}, {2}, {2};'.format(instr, targetReg, startReg)
|
chainInstrInt = '{0} {1}, {2}, {2};'.format(instr, targetReg, startReg)
|
||||||
chainInstrInt += '{0} {1}, {1}, {1};'.format(instr, targetReg) * cRep
|
chainInstrInt += '{0} {1}, {1}, {1};'.format(instr, targetReg) * cRep
|
||||||
chainLatencyInt = basicLatency[instr] * (cRep+1)
|
chainLatencyInt = basicLatency[instr] * (cRep+1)
|
||||||
else:
|
else:
|
||||||
# we use one shuffle to avoid a read dependency on the target register
|
# we use one shuffle to avoid a read dependency on the target register
|
||||||
@@ -2108,8 +2108,8 @@ def getLatConfigLists(instrNode, startNode, targetNode, useDistinctRegs, addrMem
|
|||||||
if reg1 == reg2:
|
if reg1 == reg2:
|
||||||
configList.append(LatConfig(instrI))
|
configList.append(LatConfig(instrI))
|
||||||
|
|
||||||
reg1Prefix = re.sub('\d', '', reg1)
|
reg1Prefix = re.sub(r'\d', '', reg1)
|
||||||
reg2Prefix = re.sub('\d', '', reg2)
|
reg2Prefix = re.sub(r'\d', '', reg2)
|
||||||
|
|
||||||
if reg1 in GPRegs and reg2 in GPRegs:
|
if reg1 in GPRegs and reg2 in GPRegs:
|
||||||
if reg1 in High8Regs:
|
if reg1 in High8Regs:
|
||||||
@@ -2614,7 +2614,7 @@ def getLatencies(instrNode, instrNodeList, tpDict, tpDictSameReg, htmlReports):
|
|||||||
regInit = []
|
regInit = []
|
||||||
for opNode in instrNode.findall('./operand[@r="1"][@type="reg"]'):
|
for opNode in instrNode.findall('./operand[@r="1"][@type="reg"]'):
|
||||||
reg = latConfig.instrI.opRegDict[int(opNode.attrib['idx'])]
|
reg = latConfig.instrI.opRegDict[int(opNode.attrib['idx'])]
|
||||||
regPrefix = re.sub('\d', '', reg)
|
regPrefix = re.sub(r'\d', '', reg)
|
||||||
if (regPrefix in ['XMM', 'YMM', 'ZMM']) and (reg not in globalDoNotWriteRegs|memRegs):
|
if (regPrefix in ['XMM', 'YMM', 'ZMM']) and (reg not in globalDoNotWriteRegs|memRegs):
|
||||||
for initOp in instrNode.findall('./operand[@w="1"][@type="reg"]'):
|
for initOp in instrNode.findall('./operand[@w="1"][@type="reg"]'):
|
||||||
if initOp.text != opNode.text: continue
|
if initOp.text != opNode.text: continue
|
||||||
@@ -2823,7 +2823,7 @@ def filterInstructions(XMLRoot):
|
|||||||
isaSet = XMLInstr.attrib['isa-set']
|
isaSet = XMLInstr.attrib['isa-set']
|
||||||
|
|
||||||
# Future instruction set extensions
|
# Future instruction set extensions
|
||||||
if extension in ['AMD_INVLPGB', 'CET', 'KEYLOCKER', 'KEYLOCKER_WIDE', 'RDPRU', 'TDX', 'TSX_LDTRK']: instrSet.discard(XMLInstr)
|
if extension in ['AMD_INVLPGB', 'CET', 'RDPRU', 'TDX', 'TSX_LDTRK']: instrSet.discard(XMLInstr)
|
||||||
|
|
||||||
# Not supported by assembler
|
# Not supported by assembler
|
||||||
if XMLInstr.attrib['iclass'] == 'NOP' and len(XMLInstr.findall('operand')) > 1:
|
if XMLInstr.attrib['iclass'] == 'NOP' and len(XMLInstr.findall('operand')) > 1:
|
||||||
@@ -2858,8 +2858,10 @@ def filterInstructions(XMLRoot):
|
|||||||
|
|
||||||
_, _, ecx1, edx1 = cpu(0x01)
|
_, _, ecx1, edx1 = cpu(0x01)
|
||||||
_, ebx7, ecx7, edx7 = cpu(0x07)
|
_, ebx7, ecx7, edx7 = cpu(0x07)
|
||||||
eax7_1, _, _, edx7_1 = cpu(0x07, 0x01)
|
eax7_1, ebx7_1, ecx7_1, edx7_1 = cpu(0x07, 0x01)
|
||||||
eaxD_1, _, _, _ = cpu(0x0D, 0x01)
|
eaxD_1, _, _, _ = cpu(0x0D, 0x01)
|
||||||
|
_, ebx19, _, _ = cpu(0x19)
|
||||||
|
eax1E_1, _, _, _ = cpu(0x1E, 0x01)
|
||||||
_, _, ecx8_1, edx8_1 = cpu(0x80000001)
|
_, _, ecx8_1, edx8_1 = cpu(0x80000001)
|
||||||
_, ebx8_8, _, _ = cpu(0x80000008)
|
_, ebx8_8, _, _ = cpu(0x80000008)
|
||||||
|
|
||||||
@@ -2934,6 +2936,9 @@ def filterInstructions(XMLRoot):
|
|||||||
if isaSet.startswith('AVX512_BITALG') and not cpuid.get_bit(ecx7, 12): instrSet.discard(XMLInstr)
|
if isaSet.startswith('AVX512_BITALG') and not cpuid.get_bit(ecx7, 12): instrSet.discard(XMLInstr)
|
||||||
if isaSet.startswith('AVX512_VPOPCNTDQ') and not cpuid.get_bit(ecx7, 14): instrSet.discard(XMLInstr)
|
if isaSet.startswith('AVX512_VPOPCNTDQ') and not cpuid.get_bit(ecx7, 14): instrSet.discard(XMLInstr)
|
||||||
if extension == 'RDPID' and not cpuid.get_bit(ecx7, 22): instrSet.discard(XMLInstr)
|
if extension == 'RDPID' and not cpuid.get_bit(ecx7, 22): instrSet.discard(XMLInstr)
|
||||||
|
if extension.startswith('KEYLOCKER'):
|
||||||
|
if not cpuid.get_bit(ecx7, 23) or not cpuid.get_bit(ebx19, 0): instrSet.discard(XMLInstr)
|
||||||
|
if 'WIDE' in extension and not cpuid.get_bit(ebx19, 2): instrSet.discard(XMLInstr)
|
||||||
if extension == 'CLDEMOTE' and not cpuid.get_bit(ecx7, 25): instrSet.discard(XMLInstr)
|
if extension == 'CLDEMOTE' and not cpuid.get_bit(ecx7, 25): instrSet.discard(XMLInstr)
|
||||||
if iclass == 'MOVDIRI' and not cpuid.get_bit(ecx7, 27): instrSet.discard(XMLInstr)
|
if iclass == 'MOVDIRI' and not cpuid.get_bit(ecx7, 27): instrSet.discard(XMLInstr)
|
||||||
if iclass == 'MOVDIR64B' and not cpuid.get_bit(ecx7, 28): instrSet.discard(XMLInstr)
|
if iclass == 'MOVDIR64B' and not cpuid.get_bit(ecx7, 28): instrSet.discard(XMLInstr)
|
||||||
@@ -2948,21 +2953,41 @@ def filterInstructions(XMLRoot):
|
|||||||
if isaSet.startswith('AVX512_FP16') and not cpuid.get_bit(edx7, 23): instrSet.discard(XMLInstr)
|
if isaSet.startswith('AVX512_FP16') and not cpuid.get_bit(edx7, 23): instrSet.discard(XMLInstr)
|
||||||
if extension == 'AMX_TILE' and not cpuid.get_bit(edx7, 24): instrSet.discard(XMLInstr)
|
if extension == 'AMX_TILE' and not cpuid.get_bit(edx7, 24): instrSet.discard(XMLInstr)
|
||||||
if extension == 'AMX_INT8' and not cpuid.get_bit(edx7, 25): instrSet.discard(XMLInstr)
|
if extension == 'AMX_INT8' and not cpuid.get_bit(edx7, 25): instrSet.discard(XMLInstr)
|
||||||
|
if extension == 'SHA512' and not cpuid.get_bit(eax7_1, 0): instrSet.discard(XMLInstr)
|
||||||
|
if extension == 'SM3' and not cpuid.get_bit(eax7_1, 1): instrSet.discard(XMLInstr)
|
||||||
|
if extension == 'SM4' and not cpuid.get_bit(eax7_1, 2): instrSet.discard(XMLInstr)
|
||||||
if extension == 'RAO_INT' and not cpuid.get_bit(eax7_1, 3): instrSet.discard(XMLInstr)
|
if extension == 'RAO_INT' and not cpuid.get_bit(eax7_1, 3): instrSet.discard(XMLInstr)
|
||||||
if extension == 'AVX_VNNI' and not cpuid.get_bit(eax7_1, 4): instrSet.discard(XMLInstr)
|
if extension == 'AVX_VNNI' and not cpuid.get_bit(eax7_1, 4): instrSet.discard(XMLInstr)
|
||||||
if isaSet.startswith('AVX512_BF16') and not cpuid.get_bit(eax7_1, 5): instrSet.discard(XMLInstr)
|
if isaSet.startswith('AVX512_BF16') and not cpuid.get_bit(eax7_1, 5): instrSet.discard(XMLInstr)
|
||||||
if extension == 'CMPCCXADD' and not cpuid.get_bit(eax7_1, 7): instrSet.discard(XMLInstr)
|
if extension == 'CMPCCXADD' and not cpuid.get_bit(eax7_1, 7): instrSet.discard(XMLInstr)
|
||||||
|
if extension == 'FRED' and not cpuid.get_bit(eax7_1, 17): instrSet.discard(XMLInstr)
|
||||||
|
if extension == 'LKGS' and not cpuid.get_bit(eax7_1, 18): instrSet.discard(XMLInstr)
|
||||||
if extension == 'WRMSRNS' and not cpuid.get_bit(eax7_1, 19): instrSet.discard(XMLInstr)
|
if extension == 'WRMSRNS' and not cpuid.get_bit(eax7_1, 19): instrSet.discard(XMLInstr)
|
||||||
if extension == 'AMX_FP16' and not cpuid.get_bit(eax7_1, 21): instrSet.discard(XMLInstr)
|
if extension == 'AMX_FP16' and not cpuid.get_bit(eax7_1, 21): instrSet.discard(XMLInstr)
|
||||||
if extension == 'HRESET' and not cpuid.get_bit(eax7_1, 22): instrSet.discard(XMLInstr)
|
if extension == 'HRESET' and not cpuid.get_bit(eax7_1, 22): instrSet.discard(XMLInstr)
|
||||||
if extension == 'AVX_IFMA' and not cpuid.get_bit(eax7_1, 23): instrSet.discard(XMLInstr)
|
if extension == 'AVX_IFMA' and not cpuid.get_bit(eax7_1, 23): instrSet.discard(XMLInstr)
|
||||||
if extension == 'MSRLIST' and not cpuid.get_bit(eax7_1, 27): instrSet.discard(XMLInstr)
|
if extension == 'MSRLIST' and not cpuid.get_bit(eax7_1, 27): instrSet.discard(XMLInstr)
|
||||||
|
if ('MOVRS' in isaSet) and ('AMX' not in isaSet) and not cpuid.get_bit(eax7_1, 31): instrSet.discard(XMLInstr)
|
||||||
|
if extension == 'PBNDKB' and not cpuid.get_bit(ebx7_1, 1): instrSet.discard(XMLInstr)
|
||||||
|
if extension == 'MSR_IMM' and not cpuid.get_bit(ecx7_1, 5): instrSet.discard(XMLInstr)
|
||||||
if extension == 'AVX_VNNI_INT8' and not cpuid.get_bit(edx7_1, 4): instrSet.discard(XMLInstr)
|
if extension == 'AVX_VNNI_INT8' and not cpuid.get_bit(edx7_1, 4): instrSet.discard(XMLInstr)
|
||||||
if extension == 'AVX_NE_CONVERT' and not cpuid.get_bit(edx7_1, 5): instrSet.discard(XMLInstr)
|
if extension == 'AVX_NE_CONVERT' and not cpuid.get_bit(edx7_1, 5): instrSet.discard(XMLInstr)
|
||||||
|
if isaSet == 'AMX_COMPLEX' and not cpuid.get_bit(edx7_1, 8): instrSet.discard(XMLInstr)
|
||||||
|
if extension == 'AVX_VNNI_INT16' and not cpuid.get_bit(edx7_1, 10): instrSet.discard(XMLInstr)
|
||||||
if extension == 'ICACHE_PREFETCH' and not cpuid.get_bit(edx7_1, 14): instrSet.discard(XMLInstr)
|
if extension == 'ICACHE_PREFETCH' and not cpuid.get_bit(edx7_1, 14): instrSet.discard(XMLInstr)
|
||||||
|
if extension == 'USER_MSR' and not cpuid.get_bit(edx7_1, 15): instrSet.discard(XMLInstr)
|
||||||
|
if (extension in ['APXEVEX', 'APXLEGACY']) and not cpuid.get_bit(edx7_1, 21): instrSet.discard(XMLInstr)
|
||||||
if extension == 'XSAVEOPT' and not cpuid.get_bit(eaxD_1, 0): instrSet.discard(XMLInstr)
|
if extension == 'XSAVEOPT' and not cpuid.get_bit(eaxD_1, 0): instrSet.discard(XMLInstr)
|
||||||
if extension == 'XSAVEC' and not cpuid.get_bit(eaxD_1, 1): instrSet.discard(XMLInstr)
|
if extension == 'XSAVEC' and not cpuid.get_bit(eaxD_1, 1): instrSet.discard(XMLInstr)
|
||||||
if extension == 'XSAVES' and not cpuid.get_bit(eaxD_1, 3): instrSet.discard(XMLInstr)
|
if extension == 'XSAVES' and not cpuid.get_bit(eaxD_1, 3): instrSet.discard(XMLInstr)
|
||||||
|
if isaSet.startswith('AMX_'):
|
||||||
|
if '_FP8' in isaSet and not cpuid.get_bit(eax1E_1, 4): instrSet.discard(XMLInstr)
|
||||||
|
if '_TRANSPOSE' in isaSet and not cpuid.get_bit(eax1E_1, 5): instrSet.discard(XMLInstr)
|
||||||
|
if '_TF32' in isaSet and not cpuid.get_bit(eax1E_1, 6): instrSet.discard(XMLInstr)
|
||||||
|
if '_MOVRS' in isaSet and not cpuid.get_bit(eax1E_1, 8): instrSet.discard(XMLInstr)
|
||||||
|
if '_TRANSPOSE_COMPLEX' in isaSet and not cpuid.get_bit(eax1E_1, 2): instrSet.discard(XMLInstr)
|
||||||
|
if '_TRANSPOSE_BF16' in isaSet and not cpuid.get_bit(eax1E_1, 1): instrSet.discard(XMLInstr)
|
||||||
|
if '_TRANSPOSE_FP16' in isaSet and not cpuid.get_bit(eax1E_1, 3): instrSet.discard(XMLInstr)
|
||||||
if extension == 'SSE4a' and not cpuid.get_bit(ecx8_1, 6): instrSet.discard(XMLInstr)
|
if extension == 'SSE4a' and not cpuid.get_bit(ecx8_1, 6): instrSet.discard(XMLInstr)
|
||||||
if extension == 'XOP' and not cpuid.get_bit(ecx8_1, 11): instrSet.discard(XMLInstr)
|
if extension == 'XOP' and not cpuid.get_bit(ecx8_1, 11): instrSet.discard(XMLInstr)
|
||||||
if extension == 'FMA4' and not cpuid.get_bit(ecx8_1, 16): instrSet.discard(XMLInstr)
|
if extension == 'FMA4' and not cpuid.get_bit(ecx8_1, 16): instrSet.discard(XMLInstr)
|
||||||
@@ -2972,6 +2997,35 @@ def filterInstructions(XMLRoot):
|
|||||||
if extension == 'CLZERO' and not cpuid.get_bit(ebx8_8, 0): instrSet.discard(XMLInstr)
|
if extension == 'CLZERO' and not cpuid.get_bit(ebx8_8, 0): instrSet.discard(XMLInstr)
|
||||||
#if extension == 'MCOMMIT' and not cpuid.get_bit(ebx8_8, 8): instrSet.discard(XMLInstr)
|
#if extension == 'MCOMMIT' and not cpuid.get_bit(ebx8_8, 8): instrSet.discard(XMLInstr)
|
||||||
|
|
||||||
|
# AVX10
|
||||||
|
avx10Enabled = cpuid.get_bit(edx7_1, 19)
|
||||||
|
avx10Version = -1
|
||||||
|
avx10VectorLengths = set()
|
||||||
|
if avx10Enabled:
|
||||||
|
_, ebx24, _, _ = cpu(0x24)
|
||||||
|
avx10Version = cpuid.get_bytes(ebx24)[0]
|
||||||
|
if cpuid.get_bit(ebx24, 16):
|
||||||
|
avx10VectorLengths.add(128)
|
||||||
|
if cpuid.get_bit(ebx24, 17):
|
||||||
|
avx10VectorLengths.add(256)
|
||||||
|
if cpuid.get_bit(ebx24, 18):
|
||||||
|
avx10VectorLengths.add(512)
|
||||||
|
|
||||||
|
if isaSet.endswith('_128') and 128 not in avx10VectorLengths:
|
||||||
|
instrSet.discard(XMLInstr)
|
||||||
|
if isaSet.endswith('_256') and 256 not in avx10VectorLengths:
|
||||||
|
instrSet.discard(XMLInstr)
|
||||||
|
if isaSet.endswith('_512') and 512 not in avx10VectorLengths:
|
||||||
|
instrSet.discard(XMLInstr)
|
||||||
|
|
||||||
|
if avx10Version < 2 and (isaSet in ['AVX512_COM_EF_SCALAR']
|
||||||
|
or any(isaSet.startswith(x) for x in ['AVX10_2_', 'AVX512_FP8_CONVERT_', 'AVX512_FP16_CONVERT_', 'AVX512_MEDIAX_', 'AVX512_MINMAX_',
|
||||||
|
'AVX512_MOVZXC_', 'AVX512_SAT_CVT_', 'AVX512_VNNI_FP16', 'AVX512_VNNI_INT8_', 'AVX512_VNNI_INT16', 'SM4_'])):
|
||||||
|
instrSet.discard(XMLInstr)
|
||||||
|
|
||||||
|
if isaSet == 'AMX_AVX512' and ((avx10Version < 2) or (512 not in avx10VectorLengths) or not cpuid.get_bit(eax1E_1, 7)):
|
||||||
|
instrSet.discard(XMLInstr)
|
||||||
|
|
||||||
# Virtualization instructions
|
# Virtualization instructions
|
||||||
if extension in ['SVM', 'VMFUNC', 'VTX']: instrSet.discard(XMLInstr)
|
if extension in ['SVM', 'VMFUNC', 'VTX']: instrSet.discard(XMLInstr)
|
||||||
|
|
||||||
@@ -2992,7 +3046,7 @@ def filterInstructions(XMLRoot):
|
|||||||
if XMLInstr.attrib['category'] in ['X87_ALU']: instrSet.discard(XMLInstr)
|
if XMLInstr.attrib['category'] in ['X87_ALU']: instrSet.discard(XMLInstr)
|
||||||
|
|
||||||
# System instructions
|
# System instructions
|
||||||
if extension in ['HRESET', 'INVPCID', 'MONITOR', 'MONITORX', 'PCONFIG', 'RDWRFSGS', 'SMAP', 'SNP']: instrSet.discard(XMLInstr)
|
if extension in ['HRESET', 'INVPCID', 'MONITOR', 'MONITORX', 'PCONFIG', 'RDWRFSGS', 'SMAP', 'SNP', 'UINTR']: instrSet.discard(XMLInstr)
|
||||||
if XMLInstr.attrib['category'] in ['INTERRUPT', 'SEGOP', 'SYSCALL', 'SYSRET']: instrSet.discard(XMLInstr)
|
if XMLInstr.attrib['category'] in ['INTERRUPT', 'SEGOP', 'SYSCALL', 'SYSRET']: instrSet.discard(XMLInstr)
|
||||||
if XMLInstr.attrib['iclass'] in ['CALL_FAR', 'HLT', 'INVD', 'IRET', 'IRETD', 'IRETQ', 'JMP_FAR', 'LTR', 'RET_FAR', 'UD2']: instrSet.discard(XMLInstr)
|
if XMLInstr.attrib['iclass'] in ['CALL_FAR', 'HLT', 'INVD', 'IRET', 'IRETD', 'IRETQ', 'JMP_FAR', 'LTR', 'RET_FAR', 'UD2']: instrSet.discard(XMLInstr)
|
||||||
if 'XRSTOR' in XMLInstr.attrib['iclass']: instrSet.discard(XMLInstr)
|
if 'XRSTOR' in XMLInstr.attrib['iclass']: instrSet.discard(XMLInstr)
|
||||||
@@ -3040,7 +3094,7 @@ def main():
|
|||||||
except subprocess.CalledProcessError as e:
|
except subprocess.CalledProcessError as e:
|
||||||
versionString = e.output
|
versionString = e.output
|
||||||
global iacaVersion
|
global iacaVersion
|
||||||
iacaVersion = re.search('\d\.\d', versionString.decode()).group(0)
|
iacaVersion = re.search(r'\d\.\d', versionString.decode()).group(0)
|
||||||
global iacaCMDLine
|
global iacaCMDLine
|
||||||
iacaCMDLine = [args.iaca, '-reduceout', '-arch', arch]
|
iacaCMDLine = [args.iaca, '-reduceout', '-arch', arch]
|
||||||
if iacaVersion == '2.1':
|
if iacaVersion == '2.1':
|
||||||
@@ -3050,7 +3104,7 @@ def main():
|
|||||||
|
|
||||||
resetNanoBench()
|
resetNanoBench()
|
||||||
|
|
||||||
if arch in ['ZEN+', 'ZEN2', 'ZEN3', 'ZEN4']:
|
if arch in ['ZEN+', 'ZEN2', 'ZEN3', 'ZEN4', 'ZEN5']:
|
||||||
configurePFCs(['UOPS','FpuPipeAssignment.Total0', 'FpuPipeAssignment.Total1', 'FpuPipeAssignment.Total2', 'FpuPipeAssignment.Total3',
|
configurePFCs(['UOPS','FpuPipeAssignment.Total0', 'FpuPipeAssignment.Total1', 'FpuPipeAssignment.Total2', 'FpuPipeAssignment.Total3',
|
||||||
'FpuPipeAssignment.Total4', 'FpuPipeAssignment.Total5', 'DIV_CYCLES'])
|
'FpuPipeAssignment.Total4', 'FpuPipeAssignment.Total5', 'DIV_CYCLES'])
|
||||||
else:
|
else:
|
||||||
@@ -3247,7 +3301,7 @@ def main():
|
|||||||
# combining FP with non-FP instr. can lead to wrong port counts
|
# combining FP with non-FP instr. can lead to wrong port counts
|
||||||
#disallowedBlockingInstrs |= set(instr for instr in instrNodeList if instr.attrib['category'] in ['LOGICAL_FP'] or
|
#disallowedBlockingInstrs |= set(instr for instr in instrNodeList if instr.attrib['category'] in ['LOGICAL_FP'] or
|
||||||
# any(not 'f' in o.attrib.get('xtype','f') for o in instr.findall('./operand')))
|
# any(not 'f' in o.attrib.get('xtype','f') for o in instr.findall('./operand')))
|
||||||
if arch in ['ZEN3', 'ZEN4']:
|
if arch in ['ZEN3', 'ZEN4', 'ZEN5']:
|
||||||
# we need one instruction with 1*FP45;
|
# we need one instruction with 1*FP45;
|
||||||
# their throughput is limited to 1 per cycle; thus, they are disallowed by the TP_noDepBreaking_noLoop check above
|
# their throughput is limited to 1 per cycle; thus, they are disallowed by the TP_noDepBreaking_noLoop check above
|
||||||
disallowedBlockingInstrs.remove(instrNodeDict['MOVD (R32, XMM)'])
|
disallowedBlockingInstrs.remove(instrNodeDict['MOVD (R32, XMM)'])
|
||||||
@@ -3300,7 +3354,7 @@ def main():
|
|||||||
# mov to mem has always two uops: store address and store data; there is no instruction that uses just one of them
|
# mov to mem has always two uops: store address and store data; there is no instruction that uses just one of them
|
||||||
movMemInstrNode = instrNodeDict['MOV (M64, R64)']
|
movMemInstrNode = instrNodeDict['MOV (M64, R64)']
|
||||||
|
|
||||||
if arch in ['ICL', 'TGL', 'RKL', 'ADL-P']:
|
if arch in ['ICL', 'TGL', 'RKL', 'ADL-P', 'EMR', 'MTL-P']:
|
||||||
storeDataPort = '49'
|
storeDataPort = '49'
|
||||||
else:
|
else:
|
||||||
storeDataPort = '4'
|
storeDataPort = '4'
|
||||||
|
@@ -129,7 +129,7 @@ def getMemAddr(memAddrAsm):
|
|||||||
base = index = None
|
base = index = None
|
||||||
displacement = 0
|
displacement = 0
|
||||||
scale = 1
|
scale = 1
|
||||||
for c in re.split('\+|-', re.search('\[(.*)\]', memAddrAsm).group(1)):
|
for c in re.split(r'\+|-', re.search(r'\[(.*)\]', memAddrAsm).group(1)):
|
||||||
if '0x' in c:
|
if '0x' in c:
|
||||||
displacement = int(c, 0)
|
displacement = int(c, 0)
|
||||||
if '-0x' in memAddrAsm:
|
if '-0x' in memAddrAsm:
|
||||||
|
Reference in New Issue
Block a user