mirror of
https://github.com/andreas-abel/nanoBench.git
synced 2025-07-21 07:01:04 +02:00
689 lines
27 KiB
Python
Executable File
689 lines
27 KiB
Python
Executable File
from itertools import count
|
|
from collections import namedtuple
|
|
|
|
import math
|
|
import random
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
|
|
sys.path.append('../..')
|
|
from kernelNanoBench import *
|
|
|
|
sys.path.append('../CPUID')
|
|
import cpuid
|
|
|
|
import logging
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
def getEventConfig(event):
|
|
arch = getArch()
|
|
if event == 'L1_HIT':
|
|
if arch in ['Core', 'EnhancedCore']: return '40.0E ' + event # L1D_CACHE_LD.MES
|
|
if arch in ['NHM', 'WSM']: return 'CB.01 ' + event
|
|
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'TGL', 'ADL-P', 'GLM', 'GLP']: return 'D1.01 ' + event
|
|
if event == 'L1_MISS':
|
|
if arch in ['Core', 'EnhancedCore']: return 'CB.01.CTR=0 ' + event
|
|
if arch in ['IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'TGL', 'ADL-P', 'GLM', 'GLP']: return 'D1.08 ' + event
|
|
if arch in ['ZEN+']: return '064.70 ' + event
|
|
if event == 'L2_HIT':
|
|
if arch in ['Core', 'EnhancedCore']: return '29.7E ' + event # L2_LD.THIS_CORE.ALL_INCL.MES
|
|
if arch in ['NHM', 'WSM']: return 'CB.02 ' + event
|
|
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'TGL', 'ADL-P', 'GLM', 'GLP']: return 'D1.02 ' + event
|
|
if arch in ['ZEN+']: return '064.70 ' + event
|
|
if event == 'L2_MISS':
|
|
if arch in ['Core', 'EnhancedCore']: return 'CB.04.CTR=0 ' + event
|
|
if arch in ['IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'TGL', 'ADL-P', 'GLM', 'GLP']: return 'D1.10 ' + event
|
|
if arch in ['ZEN+']: return '064.08 ' + event
|
|
if event == 'L3_HIT':
|
|
if arch in ['NHM', 'WSM']: return 'CB.04 ' + event
|
|
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'TGL', 'ADL-P']: return 'D1.04 ' + event
|
|
if event == 'L3_MISS':
|
|
if arch in ['NHM', 'WSM']: return 'CB.10 ' + event
|
|
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'TGL', 'ADL-P']: return 'D1.20 ' + event
|
|
return ''
|
|
|
|
def getDefaultCacheConfig():
|
|
return '\n'.join(filter(None, [getEventConfig('L' + str(l) + '_' + hm) for l in range(1,4) for hm in ['HIT', 'MISS']]))
|
|
|
|
|
|
def getDefaultCacheMSRConfig():
|
|
if 'Intel' in getCPUVendor() and 'L3' in getCpuidCacheInfo() and getCpuidCacheInfo()['L3']['complex']:
|
|
if getArch() in ['ADL-P']:
|
|
MSR_UNC_PERF_GLOBAL_CTRL = 0x2FF0
|
|
MSR_UNC_CBO_0_PERFEVTSEL0 = 0x2000
|
|
MSR_UNC_CBO_0_PERFCTR0 = 0x2002
|
|
dist = 8
|
|
elif getArch() in ['CNL', 'ICL', 'TGL']:
|
|
MSR_UNC_PERF_GLOBAL_CTRL = 0xE01
|
|
MSR_UNC_CBO_0_PERFEVTSEL0 = 0x700
|
|
MSR_UNC_CBO_0_PERFCTR0 = 0x702
|
|
dist = 8
|
|
else:
|
|
MSR_UNC_PERF_GLOBAL_CTRL = 0xE01
|
|
MSR_UNC_CBO_0_PERFEVTSEL0 = 0x700
|
|
MSR_UNC_CBO_0_PERFCTR0 = 0x706
|
|
dist = 16
|
|
|
|
return '\n'.join('msr_' + format(MSR_UNC_PERF_GLOBAL_CTRL, '#x') + '=0x20000000' +
|
|
'.msr_' + format(MSR_UNC_CBO_0_PERFEVTSEL0 + dist*cbo, '#x') + '=0x408F34' +
|
|
' msr_' + format(MSR_UNC_CBO_0_PERFCTR0 + dist*cbo, '#x') +
|
|
' CACHE_LOOKUP_CBO_' + str(cbo)
|
|
for cbo in range(0, getNCBoxUnits()))
|
|
return ''
|
|
|
|
|
|
def isClose(a, b, rel_tol=1e-09, abs_tol=0.0):
|
|
return abs(a-b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol)
|
|
|
|
|
|
class CacheInfo:
|
|
def __init__(self, level, assoc, lineSize, nSets, nSlices=None, nCboxes=None):
|
|
self.level = level
|
|
self.assoc = assoc
|
|
self.lineSize = lineSize
|
|
self.nSets = nSets
|
|
self.waySize = lineSize * nSets
|
|
self.size = self.waySize * assoc * (nSlices if nSlices is not None else 1)
|
|
self.nSlices = nSlices
|
|
self.nCboxes = nCboxes
|
|
|
|
def __str__(self):
|
|
return '\n'.join(['L' + str(self.level) + ':',
|
|
' Size: ' + str(self.size//1024) + ' kB',
|
|
' Associativity: ' + str(self.assoc),
|
|
' Line Size: ' + str(self.lineSize) + ' B',
|
|
' Number of sets' + (' (per slice)' if self.nSlices is not None else '') + ': ' + str(self.nSets),
|
|
' Way size' + (' (per slice)' if self.nSlices is not None else '') + ': ' + str(self.waySize//1024) + ' kB',
|
|
(' Number of CBoxes: ' + str(self.nCboxes) if self.nCboxes is not None else ''),
|
|
(' Number of slices: ' + str(self.nSlices) if self.nSlices is not None else '')])
|
|
|
|
|
|
def getArch():
|
|
if not hasattr(getArch, 'arch'):
|
|
cpu = cpuid.CPUID()
|
|
getArch.arch = cpuid.micro_arch(cpu)
|
|
return getArch.arch
|
|
|
|
def getCPUVendor():
|
|
if not hasattr(getCPUVendor, 'vendor'):
|
|
cpu = cpuid.CPUID()
|
|
getCPUVendor.vendor = cpuid.cpu_vendor(cpu)
|
|
return getCPUVendor.vendor
|
|
|
|
def getCpuidCacheInfo():
|
|
if not hasattr(getCpuidCacheInfo, 'cpuidCacheInfo'):
|
|
cpu = cpuid.CPUID()
|
|
log.debug(cpuid.get_basic_info(cpu))
|
|
getCpuidCacheInfo.cpuidCacheInfo = cpuid.get_cache_info(cpu)
|
|
|
|
if not len(set(c['lineSize'] for c in getCpuidCacheInfo.cpuidCacheInfo.values())) == 1:
|
|
raise ValueError('All line sizes must be the same')
|
|
return getCpuidCacheInfo.cpuidCacheInfo
|
|
|
|
|
|
def getCacheInfo(level):
|
|
if level == 1:
|
|
if not hasattr(getCacheInfo, 'L1CacheInfo'):
|
|
cpuidInfo = getCpuidCacheInfo()['L1D']
|
|
getCacheInfo.L1CacheInfo = CacheInfo(1, cpuidInfo['assoc'], cpuidInfo['lineSize'], cpuidInfo['nSets'])
|
|
return getCacheInfo.L1CacheInfo
|
|
elif level == 2:
|
|
if not hasattr(getCacheInfo, 'L2CacheInfo'):
|
|
cpuidInfo = getCpuidCacheInfo()['L2']
|
|
getCacheInfo.L2CacheInfo = CacheInfo(2, cpuidInfo['assoc'], cpuidInfo['lineSize'], cpuidInfo['nSets'])
|
|
return getCacheInfo.L2CacheInfo
|
|
elif level == 3:
|
|
if not hasattr(getCacheInfo, 'L3CacheInfo'):
|
|
if not 'L3' in getCpuidCacheInfo():
|
|
raise ValueError('invalid level')
|
|
cpuidInfo = getCpuidCacheInfo()['L3']
|
|
if not 'complex' in cpuidInfo or not cpuidInfo['complex']:
|
|
getCacheInfo.L3CacheInfo = CacheInfo(3, cpuidInfo['assoc'], cpuidInfo['lineSize'], cpuidInfo['nSets'])
|
|
else:
|
|
lineSize = cpuidInfo['lineSize']
|
|
assoc = cpuidInfo['assoc']
|
|
nSets = cpuidInfo['nSets']
|
|
|
|
stride = 2**((lineSize*nSets//getNCBoxUnits())-1).bit_length() # smallest power of two larger than lineSize*nSets/nCBoxUnits
|
|
ms = findMaximalNonEvictingL3SetInCBox(0, stride, assoc, 0)
|
|
log.debug('Maximal non-evicting L3 set: ' + str(len(ms)) + ' ' + str(ms))
|
|
nCboxes = getNCBoxUnits()
|
|
nSlices = nCboxes * int(math.ceil(float(len(ms))/assoc))
|
|
|
|
getCacheInfo.L3CacheInfo = CacheInfo(3, assoc, lineSize, nSets//nSlices, nSlices, nCboxes)
|
|
return getCacheInfo.L3CacheInfo
|
|
else:
|
|
raise ValueError('invalid level')
|
|
|
|
|
|
def getNCBoxUnits():
|
|
if not hasattr(getNCBoxUnits, 'nCBoxUnits'):
|
|
try:
|
|
subprocess.check_output(['modprobe', 'msr'])
|
|
cbo_config = subprocess.check_output(['rdmsr', '0x396', '-f', '3:0'])
|
|
if getArch() in ['CNL', 'ICL', 'TGL', 'ADL-P']:
|
|
getNCBoxUnits.nCBoxUnits = int(cbo_config)
|
|
else:
|
|
getNCBoxUnits.nCBoxUnits = int(cbo_config) - 1
|
|
log.debug('Number of CBox Units: ' + str(getNCBoxUnits.nCBoxUnits))
|
|
except subprocess.CalledProcessError as e:
|
|
log.critical('Error: ' + e.output)
|
|
sys.exit()
|
|
except OSError as e:
|
|
log.critical("rdmsr not found. Try 'sudo apt install msr-tools'")
|
|
sys.exit()
|
|
return getNCBoxUnits.nCBoxUnits
|
|
|
|
|
|
def getCBoxOfAddress(address):
|
|
if not hasattr(getCBoxOfAddress, 'cBoxMap'):
|
|
getCBoxOfAddress.cBoxMap = dict()
|
|
cBoxMap = getCBoxOfAddress.cBoxMap
|
|
|
|
if not address in cBoxMap:
|
|
setNanoBenchParameters(config='', msrConfig=getDefaultCacheMSRConfig(), nMeasurements=10, unrollCount=1, loopCount=10, aggregateFunction='min',
|
|
basicMode=True, noMem=True)
|
|
|
|
ec = getCodeForAddressLists([AddressList([address], False, True, False)])
|
|
nb = runNanoBench(code=ec.code, oneTimeInit=ec.oneTimeInit)
|
|
|
|
nCacheLookups = [nb['CACHE_LOOKUP_CBO_'+str(cBox)] for cBox in range(0, getNCBoxUnits())]
|
|
cBoxMap[address] = nCacheLookups.index(max(nCacheLookups))
|
|
|
|
return cBoxMap[address]
|
|
|
|
|
|
def getNewAddressesInCBox(n, cBox, cacheSet, prevAddresses, notInCBox=False):
|
|
if not prevAddresses:
|
|
maxPrevAddress = cacheSet * getCacheInfo(3).lineSize
|
|
else:
|
|
maxPrevAddress = max(prevAddresses)
|
|
addresses = []
|
|
for addr in count(maxPrevAddress+getCacheInfo(3).waySize, getCacheInfo(3).waySize):
|
|
if not notInCBox and getCBoxOfAddress(addr) == cBox:
|
|
addresses.append(addr)
|
|
if notInCBox and getCBoxOfAddress(addr) != cBox:
|
|
addresses.append(addr)
|
|
if len(addresses) >= n:
|
|
return addresses
|
|
|
|
|
|
def getNewAddressesNotInCBox(n, cBox, cacheSet, prevAddresses):
|
|
return getNewAddressesInCBox(n, cBox, cacheSet, prevAddresses, notInCBox=True)
|
|
|
|
|
|
pointerChasingInits = dict()
|
|
|
|
#addresses must not contain duplicates
|
|
def getPointerChasingInit(addresses):
|
|
if tuple(addresses) in pointerChasingInits:
|
|
return pointerChasingInits[tuple(addresses)]
|
|
|
|
#addresses_tail = addresses[1:]
|
|
#random.shuffle(addresses_tail)
|
|
#adresses = [addresses[0]] + addresses_tail
|
|
|
|
init = 'lea RAX, [R14+' + str(addresses[0]) + ']; '
|
|
init += 'mov RBX, RAX; '
|
|
|
|
i = 0
|
|
while i < len(addresses)-1:
|
|
stride = addresses[i+1] - addresses[i]
|
|
init += '1: add RBX, ' + str(stride) + '; '
|
|
init += 'mov [RAX], RBX; '
|
|
init += 'mov RAX, RBX; '
|
|
|
|
i += 1
|
|
oldI = i
|
|
|
|
while i < len(addresses)-1 and (addresses[i+1] - addresses[i]) == stride:
|
|
i += 1
|
|
|
|
if oldI != i:
|
|
init += 'lea RCX, [R14+' + str(addresses[i]) + ']; '
|
|
init += 'cmp RAX, RCX; '
|
|
init += 'jne 1b; '
|
|
|
|
init += 'mov qword ptr [R14 + ' + str(addresses[-1]) + '], 0; '
|
|
pointerChasingInits[tuple(addresses)] = init
|
|
return init
|
|
|
|
|
|
ExperimentCode = namedtuple('ExperimentCode', 'code init oneTimeInit')
|
|
|
|
def getCodeForAddressLists(codeAddressLists, initAddressLists=[], wbinvd=False, afterEveryAcc=''):
|
|
distinctAddrLists = set(tuple(l.addresses) for l in initAddressLists+codeAddressLists)
|
|
if len(distinctAddrLists) > 1 and set.intersection(*list(set(l) for l in distinctAddrLists)):
|
|
raise ValueError('same address in different lists')
|
|
|
|
code = []
|
|
init = (['wbinvd; '] if wbinvd else [])
|
|
oneTimeInit = []
|
|
|
|
r14Size = getR14Size()
|
|
alreadyAddedOneTimeInits = set()
|
|
|
|
for addressLists, codeList, isInit in [(initAddressLists, init, True), (codeAddressLists, code, False)]:
|
|
if addressLists is None: continue
|
|
|
|
pfcEnabled = True
|
|
for addressList in addressLists:
|
|
if addressList.wbinvd:
|
|
if addressList.exclude and pfcEnabled:
|
|
codeList.append(PFC_STOP_ASM + '; ')
|
|
codeList.append('wbinvd; ')
|
|
if addressList.exclude and pfcEnabled:
|
|
codeList.append(PFC_START_ASM + '; ')
|
|
continue
|
|
|
|
addresses = addressList.addresses
|
|
if len(addresses) < 1: continue
|
|
|
|
if any(addr >= r14Size for addr in addresses):
|
|
sys.stderr.write('Size of memory area too small. Try increasing it with set-R14-size.sh.\n')
|
|
exit(1)
|
|
|
|
if not isInit:
|
|
if addressList.exclude and pfcEnabled:
|
|
codeList.append(PFC_STOP_ASM + '; ')
|
|
pfcEnabled = False
|
|
elif not addressList.exclude and not pfcEnabled:
|
|
codeList.append(PFC_START_ASM + '; ')
|
|
pfcEnabled = True
|
|
|
|
# use multiple lfence instructions to make sure that the block is actually in the cache and not still in a fill buffer
|
|
codeList.append('lfence; ' * 25)
|
|
|
|
if addressList.flush:
|
|
for address in addresses:
|
|
codeList.append('clflush [R14 + ' + str(address) + ']; ' + afterEveryAcc)
|
|
else:
|
|
if len(addresses) == 1:
|
|
codeList.append('mov RCX, [R14 + ' + str(addresses[0]) + ']; ')
|
|
else:
|
|
if not tuple(addresses) in alreadyAddedOneTimeInits:
|
|
oneTimeInit.append(getPointerChasingInit(addresses))
|
|
alreadyAddedOneTimeInits.add(tuple(addresses))
|
|
|
|
codeList.append('lea RCX, [R14+' + str(addresses[0]) + ']; 1: mov RCX, [RCX]; ' + afterEveryAcc + 'jrcxz 2f; jmp 1b; 2: ')
|
|
|
|
if not isInit and not pfcEnabled:
|
|
codeList.append(PFC_START_ASM + '; ')
|
|
|
|
return ExperimentCode(''.join(code), ''.join(init), ''.join(oneTimeInit))
|
|
|
|
|
|
def getClearHLAddresses(level, cacheSetList, cBox, doNotUseOtherCBoxes, nClearAddresses=None):
|
|
lineSize = getCacheInfo(1).lineSize
|
|
|
|
if nClearAddresses is None:
|
|
nClearAddresses = 2 * sum(getCacheInfo(hLevel).assoc for hLevel in range(1, level))
|
|
|
|
if level == 1:
|
|
return []
|
|
elif (level == 2) or (level == 3 and (getCacheInfo(3).nSlices is None or doNotUseOtherCBoxes)):
|
|
nSets = getCacheInfo(level).nSets
|
|
if not all(nSets > getCacheInfo(lLevel).nSets for lLevel in range(1, level)):
|
|
raise ValueError('L' + str(level) + ' way size must be greater than lower level way sizes')
|
|
|
|
nHLSets = getCacheInfo(level-1).nSets
|
|
HLSets = set(cs % nHLSets for cs in cacheSetList)
|
|
addrForClearingHL = []
|
|
|
|
for HLSet in HLSets:
|
|
possibleSets = [cs for cs in range(HLSet, nSets, nHLSets) if cs not in cacheSetList]
|
|
if not possibleSets:
|
|
raise ValueError("not enough cache sets available for clearing higher levels")
|
|
|
|
addrForClearingHLSet = []
|
|
|
|
for setIndex in count(HLSet, nHLSets):
|
|
if not setIndex % nSets in possibleSets:
|
|
continue
|
|
addrForClearingHLSet.append(setIndex*lineSize)
|
|
if len(addrForClearingHLSet) >= nClearAddresses:
|
|
break
|
|
|
|
addrForClearingHL += addrForClearingHLSet
|
|
|
|
return addrForClearingHL
|
|
elif level == 3:
|
|
if not hasattr(getClearHLAddresses, 'clearL2Map'):
|
|
getClearHLAddresses.clearL2Map = dict()
|
|
clearL2Map = getClearHLAddresses.clearL2Map
|
|
|
|
if not cBox in clearL2Map:
|
|
clearL2Map[cBox] = dict()
|
|
|
|
clearAddresses = []
|
|
for L3Set in cacheSetList:
|
|
if not L3Set in clearL2Map[cBox] or len(clearL2Map[cBox][L3Set]) < nClearAddresses:
|
|
clearL2Map[cBox][L3Set] = getNewAddressesNotInCBox(nClearAddresses, cBox, L3Set, [])
|
|
clearAddresses += clearL2Map[cBox][L3Set][:nClearAddresses]
|
|
|
|
return clearAddresses
|
|
|
|
L3SetToWayIDMap = dict()
|
|
def getAddresses(level, wayID, cacheSetList, cBox=1, cSlice=0):
|
|
lineSize = getCacheInfo(1).lineSize
|
|
|
|
if level <= 2 or (level == 3 and getCacheInfo(3).nSlices is None):
|
|
nSets = getCacheInfo(level).nSets
|
|
waySize = getCacheInfo(level).waySize
|
|
return [(wayID*waySize) + s*lineSize for s in cacheSetList]
|
|
elif level == 3:
|
|
if not cBox in L3SetToWayIDMap:
|
|
L3SetToWayIDMap[cBox] = dict()
|
|
if not cSlice in L3SetToWayIDMap[cBox]:
|
|
L3SetToWayIDMap[cBox][cSlice] = dict()
|
|
|
|
addresses = []
|
|
for L3Set in cacheSetList:
|
|
if not L3Set in L3SetToWayIDMap[cBox][cSlice]:
|
|
L3SetToWayIDMap[cBox][cSlice][L3Set] = dict()
|
|
if getCacheInfo(3).nSlices != getNCBoxUnits():
|
|
for i, addr in enumerate(findMinimalL3EvictionSet(L3Set, cBox, cSlice)):
|
|
L3SetToWayIDMap[cBox][cSlice][L3Set][i] = addr
|
|
if not wayID in L3SetToWayIDMap[cBox][cSlice][L3Set]:
|
|
if getCacheInfo(3).nSlices == getNCBoxUnits():
|
|
L3SetToWayIDMap[cBox][cSlice][L3Set][wayID] = next(iter(getNewAddressesInCBox(1, cBox, L3Set, list(L3SetToWayIDMap[cBox][cSlice][L3Set].values()))))
|
|
else:
|
|
L3SetToWayIDMap[cBox][cSlice][L3Set][wayID] = next(iter(findCongruentL3Addresses(1, L3Set, cBox, list(L3SetToWayIDMap[cBox][cSlice][L3Set].values()))))
|
|
addresses.append(L3SetToWayIDMap[cBox][cSlice][L3Set][wayID])
|
|
|
|
return addresses
|
|
|
|
raise ValueError('invalid level')
|
|
|
|
|
|
# removes ?s and !s, and returns the part before the first '_'
|
|
def getBlockName(blockStr):
|
|
return re.sub('[?!]', '', blockStr.split('_')[0])
|
|
|
|
|
|
# removes ?s and !s, and returns the part after the last '_' (as int); returns None if there is no '_'
|
|
def getBlockSet(blockStr):
|
|
if not '_' in blockStr:
|
|
return None
|
|
return int(re.match('\d+', blockStr.split('_')[-1]).group())
|
|
|
|
|
|
def parseCacheSetsStr(level, clearHL, cacheSetsStr, doNotUseOtherCBoxes=False):
|
|
cacheSetList = []
|
|
if cacheSetsStr is not None:
|
|
for s in cacheSetsStr.split(','):
|
|
if '-' in s:
|
|
first, last = s.split('-')[:2]
|
|
cacheSetList += list(range(int(first), int(last)+1))
|
|
else:
|
|
cacheSetList.append(int(s))
|
|
else:
|
|
nSets = getCacheInfo(level).nSets
|
|
if level > 1 and clearHL and not (level == 3 and getCacheInfo(3).nSlices is not None and not doNotUseOtherCBoxes):
|
|
nHLSets = getCacheInfo(level-1).nSets
|
|
cacheSetList = list(range(nHLSets, nSets))
|
|
else:
|
|
cacheSetList = list(range(0, nSets))
|
|
return cacheSetList
|
|
|
|
|
|
def findCacheSetForCode(cacheSetList, level):
|
|
nSets = getCacheInfo(level).nSets
|
|
sortedCacheSetList = sorted(cacheSetList)
|
|
sortedCacheSetList += [sortedCacheSetList[0] + nSets]
|
|
|
|
maxDist = 1
|
|
bestSet = 0
|
|
for i in range(len(sortedCacheSetList)-1):
|
|
dist = sortedCacheSetList[i+1] - sortedCacheSetList[i]
|
|
if dist > maxDist:
|
|
maxDist = dist
|
|
bestSet = (sortedCacheSetList[i] + 1) % nSets
|
|
|
|
return bestSet
|
|
|
|
|
|
def getAllUsedCacheSets(cacheSetList, seq, initSeq=''):
|
|
cacheSetOverrideList = [s for s in set(map(getBlockSet, initSeq.split()+seq.split())) if s is not None]
|
|
if any(s in cacheSetList for s in cacheSetOverrideList):
|
|
raise ValueError('overridden cache sets must not also be in cacheSetList')
|
|
return sorted(set(cacheSetList + cacheSetOverrideList))
|
|
|
|
AddressList = namedtuple('AddressList', 'addresses exclude flush wbinvd')
|
|
|
|
def getCodeForCacheExperiment(level, seq, initSeq, cacheSetList, cBox, cSlice, clearHL, doNotUseOtherCBoxes, wbinvd, nClearAddresses=None):
|
|
allUsedSets = getAllUsedCacheSets(cacheSetList, seq, initSeq)
|
|
|
|
clearHLAddrList = None
|
|
if (clearHL and level > 1):
|
|
clearHLAddrList = AddressList(getClearHLAddresses(level, allUsedSets, cBox, doNotUseOtherCBoxes, nClearAddresses), True, False, False)
|
|
|
|
initAddressLists = []
|
|
seqAddressLists = []
|
|
nameToID = dict()
|
|
|
|
for seqString, addrLists in [(initSeq, initAddressLists), (seq, seqAddressLists)]:
|
|
for seqEl in seqString.split():
|
|
name = getBlockName(seqEl)
|
|
if name == '<wbinvd>':
|
|
addrLists.append(AddressList([], True, False, True))
|
|
continue
|
|
|
|
overrideSet = getBlockSet(seqEl)
|
|
|
|
wayID = nameToID.setdefault(name, len(nameToID))
|
|
exclude = not '?' in seqEl
|
|
flush = '!' in seqEl
|
|
|
|
s = [overrideSet] if overrideSet is not None else cacheSetList
|
|
addresses = getAddresses(level, wayID, s, cBox=cBox, cSlice=cSlice)
|
|
|
|
if clearHLAddrList is not None and not flush:
|
|
addrLists.append(clearHLAddrList)
|
|
addrLists.append(AddressList(addresses, exclude, flush, False))
|
|
|
|
log.debug('\nInitAddresses: ' + str(initAddressLists))
|
|
log.debug('\nSeqAddresses: ' + str(seqAddressLists))
|
|
|
|
return getCodeForAddressLists(seqAddressLists, initAddressLists, wbinvd)
|
|
|
|
|
|
def runCacheExperimentCode(code, initCode, oneTimeInitCode, loop, warmUpCount, codeOffset, nMeasurements, agg):
|
|
resetNanoBench()
|
|
setNanoBenchParameters(config=getDefaultCacheConfig(), msrConfig=getDefaultCacheMSRConfig(), fixedCounters=True, nMeasurements=nMeasurements, unrollCount=1,
|
|
loopCount=loop, warmUpCount=warmUpCount, aggregateFunction=agg, basicMode=True, noMem=True, codeOffset=codeOffset, verbose=None)
|
|
return runNanoBench(code=code, init=initCode, oneTimeInit=oneTimeInitCode)
|
|
|
|
|
|
# cacheSets=None means do access in all sets
|
|
# in this case, the first nL1Sets many sets of L2 will be reserved for clearing L1
|
|
# cSlice refers to the nth slice within a given cBox; the assigment of numbers to slices is arbitrary
|
|
# doNotUseOtherCBoxes determines whether accesses to clear higher levels will go to other CBoxes
|
|
# if wbinvd is set, wbinvd will be called before initSeq
|
|
def runCacheExperiment(level, seq, initSeq='', cacheSets=None, cBox=1, cSlice=0, clearHL=True, doNotUseOtherCBoxes=False, loop=1, wbinvd=False,
|
|
nMeasurements=10, warmUpCount=1, codeSet=None, agg='avg', nClearAddresses=None):
|
|
cacheSetList = parseCacheSetsStr(level, clearHL, cacheSets, doNotUseOtherCBoxes)
|
|
ec = getCodeForCacheExperiment(level, seq, initSeq=initSeq, cacheSetList=cacheSetList, cBox=cBox, cSlice=cSlice, clearHL=clearHL,
|
|
doNotUseOtherCBoxes=doNotUseOtherCBoxes, wbinvd=wbinvd, nClearAddresses=nClearAddresses)
|
|
|
|
log.debug('\nOneTimeInit: ' + ec.oneTimeInit)
|
|
log.debug('\nInit: ' + ec.init)
|
|
log.debug('\nCode: ' + ec.code)
|
|
|
|
lineSize = getCacheInfo(1).lineSize
|
|
allUsedSets = getAllUsedCacheSets(cacheSetList, seq, initSeq)
|
|
codeOffset = lineSize * (codeSet if codeSet is not None else findCacheSetForCode(allUsedSets, level))
|
|
|
|
return runCacheExperimentCode(ec.code, ec.init, ec.oneTimeInit, loop, warmUpCount, codeOffset, nMeasurements, agg)
|
|
|
|
|
|
def printNB(nb_result):
|
|
for r in nb_result.items():
|
|
print(r[0] + ': ' + str(r[1]))
|
|
|
|
|
|
def hasL3Conflicts(addresses, clearHLAddrList, codeOffset):
|
|
addrList = AddressList(addresses, False, False, False)
|
|
ec = getCodeForAddressLists([clearHLAddrList, addrList], initAddressLists=[addrList], wbinvd=True)
|
|
setNanoBenchParameters(config=getEventConfig('L3_HIT'), msrConfig='', nMeasurements=5, unrollCount=1, loopCount=100,
|
|
aggregateFunction='med', basicMode=True, noMem=True, codeOffset=codeOffset)
|
|
nb = runNanoBench(code=ec.code, init=ec.init, oneTimeInit=ec.oneTimeInit)
|
|
|
|
return (nb['L3_HIT'] < len(addresses) - .9)
|
|
|
|
|
|
def findMinimalL3EvictionSet(cacheSet, cBox, cSlice):
|
|
if not hasattr(findMinimalL3EvictionSet, 'evSetForCacheSet'):
|
|
findMinimalL3EvictionSet.evSetForCacheSet = dict()
|
|
if not cBox in findMinimalL3EvictionSet.evSetForCacheSet:
|
|
findMinimalL3EvictionSet.evSetForCacheSet[cBox] = dict()
|
|
if not cSlice in findMinimalL3EvictionSet.evSetForCacheSet[cBox]:
|
|
findMinimalL3EvictionSet.evSetForCacheSet[cBox][cSlice] = dict()
|
|
|
|
if cacheSet in findMinimalL3EvictionSet.evSetForCacheSet[cBox][cSlice]:
|
|
return findMinimalL3EvictionSet.evSetForCacheSet[cBox][cSlice][cacheSet]
|
|
|
|
evSetsForOtherSlices = [findMinimalL3EvictionSet(cacheSet, cBox, s) for s in range(0, cSlice)]
|
|
|
|
lineSize = getCacheInfo(1).lineSize
|
|
L3Assoc = getCacheInfo(3).assoc
|
|
L3WaySize = getCacheInfo(3).waySize
|
|
|
|
clearHLAddrList = AddressList(getClearHLAddresses(3, [cacheSet], cBox, False), True, False, False)
|
|
codeOffset = lineSize * (cacheSet+10)
|
|
|
|
addresses = []
|
|
for curAddr in count(cacheSet * lineSize, L3WaySize):
|
|
if any(curAddr in otherEvSet for otherEvSet in evSetsForOtherSlices): continue
|
|
if not getCBoxOfAddress(curAddr) == cBox: continue
|
|
if any(hasL3Conflicts(otherEvSet[:-1]+[curAddr], clearHLAddrList, codeOffset) for otherEvSet in evSetsForOtherSlices): continue
|
|
|
|
addresses.append(curAddr)
|
|
if len(addresses) > L3Assoc and hasL3Conflicts(addresses, clearHLAddrList, codeOffset):
|
|
break
|
|
|
|
for i in reversed(range(0, len(addresses))):
|
|
if len(addresses) <= L3Assoc+1:
|
|
break
|
|
tmpAddresses = addresses[:i] + addresses[(i+1):]
|
|
if hasL3Conflicts(tmpAddresses, clearHLAddrList, codeOffset):
|
|
addresses = tmpAddresses
|
|
|
|
findMinimalL3EvictionSet.evSetForCacheSet[cBox][cSlice][cacheSet] = addresses
|
|
return addresses
|
|
|
|
|
|
def findCongruentL3Addresses(n, cacheSet, cBox, L3EvictionSet):
|
|
clearHLAddrList = AddressList(getClearHLAddresses(3, [cacheSet], cBox, False), True, False, False)
|
|
codeOffset = getCacheInfo(1).lineSize * (cacheSet+10)
|
|
L3WaySize = getCacheInfo(3).waySize
|
|
|
|
congrAddresses = []
|
|
for newAddr in count(max(L3EvictionSet)+L3WaySize, L3WaySize):
|
|
if not getCBoxOfAddress(newAddr) == cBox: continue
|
|
|
|
tmpAddresses = L3EvictionSet[:getCacheInfo(3).assoc] + [newAddr]
|
|
|
|
if hasL3Conflicts(tmpAddresses, clearHLAddrList, codeOffset):
|
|
congrAddresses.append(newAddr)
|
|
|
|
if len(congrAddresses) >= n: break
|
|
|
|
return congrAddresses
|
|
|
|
|
|
def findMaximalNonEvictingL3SetInCBox(start, stride, L3Assoc, cBox):
|
|
clearHLAddresses = []
|
|
addresses = []
|
|
|
|
curAddress = start
|
|
while len(clearHLAddresses) < 2*(getCacheInfo(1).assoc+getCacheInfo(2).assoc):
|
|
if getCBoxOfAddress(curAddress) != cBox:
|
|
clearHLAddresses.append(curAddress)
|
|
curAddress += stride
|
|
clearHLAddrList = AddressList(clearHLAddresses, True, False, False)
|
|
|
|
curAddress = start
|
|
while len(addresses) < L3Assoc:
|
|
if getCBoxOfAddress(curAddress) == cBox:
|
|
addresses.append(curAddress)
|
|
curAddress += stride
|
|
|
|
notAdded = 0
|
|
while notAdded < L3Assoc:
|
|
curAddress += stride
|
|
|
|
if not getCBoxOfAddress(curAddress) == cBox:
|
|
continue
|
|
|
|
newAddresses = addresses + [curAddress]
|
|
|
|
if not hasL3Conflicts(newAddresses, clearHLAddrList, start+getCacheInfo(1).lineSize):
|
|
addresses = newAddresses
|
|
notAdded = 0
|
|
else:
|
|
notAdded += 1
|
|
|
|
return addresses
|
|
|
|
|
|
def getUnusedBlockNames(n, usedBlockNames, prefix=''):
|
|
newBlockNames = []
|
|
i = 0
|
|
while len(newBlockNames) < n:
|
|
name = prefix + str(i)
|
|
if not name in usedBlockNames: newBlockNames.append(name)
|
|
i += 1
|
|
return newBlockNames
|
|
|
|
|
|
# Returns a dict with the age of each block, i.e., how many fresh blocks need to be accessed until the block is evicted
|
|
# if returnNbResults is True, the function returns additionally all measurment results (as the second component of a tuple)
|
|
def getAgesOfBlocks(blocks, level, seq, initSeq='', maxAge=None, cacheSets=None, cBox=1, cSlice=0, clearHL=True, wbinvd=False, returnNbResults=False, nMeasurements=10, agg='avg'):
|
|
ages = dict()
|
|
if returnNbResults: nbResults = dict()
|
|
|
|
if maxAge is None:
|
|
maxAge = 2*getCacheInfo(level).assoc
|
|
|
|
nSets = len(parseCacheSetsStr(level, clearHL, cacheSets))
|
|
|
|
for block in blocks:
|
|
if returnNbResults: nbResults[block] = []
|
|
|
|
for nNewBlocks in range(0, maxAge+1):
|
|
curSeq = seq.replace('?', '') + ' '
|
|
newBlocks = getUnusedBlockNames(nNewBlocks, seq+initSeq, 'N')
|
|
curSeq += ' '.join(newBlocks) + ' ' + block + '?'
|
|
|
|
nb = runCacheExperiment(level, curSeq, initSeq=initSeq, cacheSets=cacheSets, cBox=cBox, cSlice=cSlice, clearHL=clearHL, loop=0, wbinvd=wbinvd,
|
|
nMeasurements=nMeasurements, agg=agg)
|
|
if returnNbResults: nbResults[block].append(nb)
|
|
|
|
hitEvent = 'L' + str(level) + '_HIT'
|
|
missEvent = 'L' + str(level) + '_MISS'
|
|
|
|
if hitEvent in nb:
|
|
if isClose(nb[hitEvent], 0.0, abs_tol=0.1):
|
|
if not block in ages:
|
|
ages[block] = nNewBlocks
|
|
#if not returnNbResults:
|
|
#break
|
|
elif missEvent in nb:
|
|
if nb[missEvent] > nSets - 0.1:
|
|
if not block in ages:
|
|
ages[block] = nNewBlocks
|
|
#if not returnNbResults:
|
|
#break
|
|
else:
|
|
raise ValueError('no cache results available')
|
|
if not block in ages:
|
|
ages[block] = -1
|
|
|
|
if returnNbResults:
|
|
return (ages, nbResults)
|
|
else:
|
|
return ages
|