Files
nanoBench/tools/CacheAnalyzer/cacheLib.py
2022-01-13 01:14:41 +01:00

689 lines
27 KiB
Python
Executable File

from itertools import count
from collections import namedtuple
import math
import random
import re
import subprocess
import sys
sys.path.append('../..')
from kernelNanoBench import *
sys.path.append('../CPUID')
import cpuid
import logging
log = logging.getLogger(__name__)
def getEventConfig(event):
arch = getArch()
if event == 'L1_HIT':
if arch in ['Core', 'EnhancedCore']: return '40.0E ' + event # L1D_CACHE_LD.MES
if arch in ['NHM', 'WSM']: return 'CB.01 ' + event
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'TGL', 'ADL-P', 'GLM', 'GLP']: return 'D1.01 ' + event
if event == 'L1_MISS':
if arch in ['Core', 'EnhancedCore']: return 'CB.01.CTR=0 ' + event
if arch in ['IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'TGL', 'ADL-P', 'GLM', 'GLP']: return 'D1.08 ' + event
if arch in ['ZEN+']: return '064.70 ' + event
if event == 'L2_HIT':
if arch in ['Core', 'EnhancedCore']: return '29.7E ' + event # L2_LD.THIS_CORE.ALL_INCL.MES
if arch in ['NHM', 'WSM']: return 'CB.02 ' + event
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'TGL', 'ADL-P', 'GLM', 'GLP']: return 'D1.02 ' + event
if arch in ['ZEN+']: return '064.70 ' + event
if event == 'L2_MISS':
if arch in ['Core', 'EnhancedCore']: return 'CB.04.CTR=0 ' + event
if arch in ['IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'TGL', 'ADL-P', 'GLM', 'GLP']: return 'D1.10 ' + event
if arch in ['ZEN+']: return '064.08 ' + event
if event == 'L3_HIT':
if arch in ['NHM', 'WSM']: return 'CB.04 ' + event
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'TGL', 'ADL-P']: return 'D1.04 ' + event
if event == 'L3_MISS':
if arch in ['NHM', 'WSM']: return 'CB.10 ' + event
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'TGL', 'ADL-P']: return 'D1.20 ' + event
return ''
def getDefaultCacheConfig():
return '\n'.join(filter(None, [getEventConfig('L' + str(l) + '_' + hm) for l in range(1,4) for hm in ['HIT', 'MISS']]))
def getDefaultCacheMSRConfig():
if 'Intel' in getCPUVendor() and 'L3' in getCpuidCacheInfo() and getCpuidCacheInfo()['L3']['complex']:
if getArch() in ['ADL-P']:
MSR_UNC_PERF_GLOBAL_CTRL = 0x2FF0
MSR_UNC_CBO_0_PERFEVTSEL0 = 0x2000
MSR_UNC_CBO_0_PERFCTR0 = 0x2002
dist = 8
elif getArch() in ['CNL', 'ICL', 'TGL']:
MSR_UNC_PERF_GLOBAL_CTRL = 0xE01
MSR_UNC_CBO_0_PERFEVTSEL0 = 0x700
MSR_UNC_CBO_0_PERFCTR0 = 0x702
dist = 8
else:
MSR_UNC_PERF_GLOBAL_CTRL = 0xE01
MSR_UNC_CBO_0_PERFEVTSEL0 = 0x700
MSR_UNC_CBO_0_PERFCTR0 = 0x706
dist = 16
return '\n'.join('msr_' + format(MSR_UNC_PERF_GLOBAL_CTRL, '#x') + '=0x20000000' +
'.msr_' + format(MSR_UNC_CBO_0_PERFEVTSEL0 + dist*cbo, '#x') + '=0x408F34' +
' msr_' + format(MSR_UNC_CBO_0_PERFCTR0 + dist*cbo, '#x') +
' CACHE_LOOKUP_CBO_' + str(cbo)
for cbo in range(0, getNCBoxUnits()))
return ''
def isClose(a, b, rel_tol=1e-09, abs_tol=0.0):
return abs(a-b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol)
class CacheInfo:
def __init__(self, level, assoc, lineSize, nSets, nSlices=None, nCboxes=None):
self.level = level
self.assoc = assoc
self.lineSize = lineSize
self.nSets = nSets
self.waySize = lineSize * nSets
self.size = self.waySize * assoc * (nSlices if nSlices is not None else 1)
self.nSlices = nSlices
self.nCboxes = nCboxes
def __str__(self):
return '\n'.join(['L' + str(self.level) + ':',
' Size: ' + str(self.size//1024) + ' kB',
' Associativity: ' + str(self.assoc),
' Line Size: ' + str(self.lineSize) + ' B',
' Number of sets' + (' (per slice)' if self.nSlices is not None else '') + ': ' + str(self.nSets),
' Way size' + (' (per slice)' if self.nSlices is not None else '') + ': ' + str(self.waySize//1024) + ' kB',
(' Number of CBoxes: ' + str(self.nCboxes) if self.nCboxes is not None else ''),
(' Number of slices: ' + str(self.nSlices) if self.nSlices is not None else '')])
def getArch():
if not hasattr(getArch, 'arch'):
cpu = cpuid.CPUID()
getArch.arch = cpuid.micro_arch(cpu)
return getArch.arch
def getCPUVendor():
if not hasattr(getCPUVendor, 'vendor'):
cpu = cpuid.CPUID()
getCPUVendor.vendor = cpuid.cpu_vendor(cpu)
return getCPUVendor.vendor
def getCpuidCacheInfo():
if not hasattr(getCpuidCacheInfo, 'cpuidCacheInfo'):
cpu = cpuid.CPUID()
log.debug(cpuid.get_basic_info(cpu))
getCpuidCacheInfo.cpuidCacheInfo = cpuid.get_cache_info(cpu)
if not len(set(c['lineSize'] for c in getCpuidCacheInfo.cpuidCacheInfo.values())) == 1:
raise ValueError('All line sizes must be the same')
return getCpuidCacheInfo.cpuidCacheInfo
def getCacheInfo(level):
if level == 1:
if not hasattr(getCacheInfo, 'L1CacheInfo'):
cpuidInfo = getCpuidCacheInfo()['L1D']
getCacheInfo.L1CacheInfo = CacheInfo(1, cpuidInfo['assoc'], cpuidInfo['lineSize'], cpuidInfo['nSets'])
return getCacheInfo.L1CacheInfo
elif level == 2:
if not hasattr(getCacheInfo, 'L2CacheInfo'):
cpuidInfo = getCpuidCacheInfo()['L2']
getCacheInfo.L2CacheInfo = CacheInfo(2, cpuidInfo['assoc'], cpuidInfo['lineSize'], cpuidInfo['nSets'])
return getCacheInfo.L2CacheInfo
elif level == 3:
if not hasattr(getCacheInfo, 'L3CacheInfo'):
if not 'L3' in getCpuidCacheInfo():
raise ValueError('invalid level')
cpuidInfo = getCpuidCacheInfo()['L3']
if not 'complex' in cpuidInfo or not cpuidInfo['complex']:
getCacheInfo.L3CacheInfo = CacheInfo(3, cpuidInfo['assoc'], cpuidInfo['lineSize'], cpuidInfo['nSets'])
else:
lineSize = cpuidInfo['lineSize']
assoc = cpuidInfo['assoc']
nSets = cpuidInfo['nSets']
stride = 2**((lineSize*nSets//getNCBoxUnits())-1).bit_length() # smallest power of two larger than lineSize*nSets/nCBoxUnits
ms = findMaximalNonEvictingL3SetInCBox(0, stride, assoc, 0)
log.debug('Maximal non-evicting L3 set: ' + str(len(ms)) + ' ' + str(ms))
nCboxes = getNCBoxUnits()
nSlices = nCboxes * int(math.ceil(float(len(ms))/assoc))
getCacheInfo.L3CacheInfo = CacheInfo(3, assoc, lineSize, nSets//nSlices, nSlices, nCboxes)
return getCacheInfo.L3CacheInfo
else:
raise ValueError('invalid level')
def getNCBoxUnits():
if not hasattr(getNCBoxUnits, 'nCBoxUnits'):
try:
subprocess.check_output(['modprobe', 'msr'])
cbo_config = subprocess.check_output(['rdmsr', '0x396', '-f', '3:0'])
if getArch() in ['CNL', 'ICL', 'TGL', 'ADL-P']:
getNCBoxUnits.nCBoxUnits = int(cbo_config)
else:
getNCBoxUnits.nCBoxUnits = int(cbo_config) - 1
log.debug('Number of CBox Units: ' + str(getNCBoxUnits.nCBoxUnits))
except subprocess.CalledProcessError as e:
log.critical('Error: ' + e.output)
sys.exit()
except OSError as e:
log.critical("rdmsr not found. Try 'sudo apt install msr-tools'")
sys.exit()
return getNCBoxUnits.nCBoxUnits
def getCBoxOfAddress(address):
if not hasattr(getCBoxOfAddress, 'cBoxMap'):
getCBoxOfAddress.cBoxMap = dict()
cBoxMap = getCBoxOfAddress.cBoxMap
if not address in cBoxMap:
setNanoBenchParameters(config='', msrConfig=getDefaultCacheMSRConfig(), nMeasurements=10, unrollCount=1, loopCount=10, aggregateFunction='min',
basicMode=True, noMem=True)
ec = getCodeForAddressLists([AddressList([address], False, True, False)])
nb = runNanoBench(code=ec.code, oneTimeInit=ec.oneTimeInit)
nCacheLookups = [nb['CACHE_LOOKUP_CBO_'+str(cBox)] for cBox in range(0, getNCBoxUnits())]
cBoxMap[address] = nCacheLookups.index(max(nCacheLookups))
return cBoxMap[address]
def getNewAddressesInCBox(n, cBox, cacheSet, prevAddresses, notInCBox=False):
if not prevAddresses:
maxPrevAddress = cacheSet * getCacheInfo(3).lineSize
else:
maxPrevAddress = max(prevAddresses)
addresses = []
for addr in count(maxPrevAddress+getCacheInfo(3).waySize, getCacheInfo(3).waySize):
if not notInCBox and getCBoxOfAddress(addr) == cBox:
addresses.append(addr)
if notInCBox and getCBoxOfAddress(addr) != cBox:
addresses.append(addr)
if len(addresses) >= n:
return addresses
def getNewAddressesNotInCBox(n, cBox, cacheSet, prevAddresses):
return getNewAddressesInCBox(n, cBox, cacheSet, prevAddresses, notInCBox=True)
pointerChasingInits = dict()
#addresses must not contain duplicates
def getPointerChasingInit(addresses):
if tuple(addresses) in pointerChasingInits:
return pointerChasingInits[tuple(addresses)]
#addresses_tail = addresses[1:]
#random.shuffle(addresses_tail)
#adresses = [addresses[0]] + addresses_tail
init = 'lea RAX, [R14+' + str(addresses[0]) + ']; '
init += 'mov RBX, RAX; '
i = 0
while i < len(addresses)-1:
stride = addresses[i+1] - addresses[i]
init += '1: add RBX, ' + str(stride) + '; '
init += 'mov [RAX], RBX; '
init += 'mov RAX, RBX; '
i += 1
oldI = i
while i < len(addresses)-1 and (addresses[i+1] - addresses[i]) == stride:
i += 1
if oldI != i:
init += 'lea RCX, [R14+' + str(addresses[i]) + ']; '
init += 'cmp RAX, RCX; '
init += 'jne 1b; '
init += 'mov qword ptr [R14 + ' + str(addresses[-1]) + '], 0; '
pointerChasingInits[tuple(addresses)] = init
return init
ExperimentCode = namedtuple('ExperimentCode', 'code init oneTimeInit')
def getCodeForAddressLists(codeAddressLists, initAddressLists=[], wbinvd=False, afterEveryAcc=''):
distinctAddrLists = set(tuple(l.addresses) for l in initAddressLists+codeAddressLists)
if len(distinctAddrLists) > 1 and set.intersection(*list(set(l) for l in distinctAddrLists)):
raise ValueError('same address in different lists')
code = []
init = (['wbinvd; '] if wbinvd else [])
oneTimeInit = []
r14Size = getR14Size()
alreadyAddedOneTimeInits = set()
for addressLists, codeList, isInit in [(initAddressLists, init, True), (codeAddressLists, code, False)]:
if addressLists is None: continue
pfcEnabled = True
for addressList in addressLists:
if addressList.wbinvd:
if addressList.exclude and pfcEnabled:
codeList.append(PFC_STOP_ASM + '; ')
codeList.append('wbinvd; ')
if addressList.exclude and pfcEnabled:
codeList.append(PFC_START_ASM + '; ')
continue
addresses = addressList.addresses
if len(addresses) < 1: continue
if any(addr >= r14Size for addr in addresses):
sys.stderr.write('Size of memory area too small. Try increasing it with set-R14-size.sh.\n')
exit(1)
if not isInit:
if addressList.exclude and pfcEnabled:
codeList.append(PFC_STOP_ASM + '; ')
pfcEnabled = False
elif not addressList.exclude and not pfcEnabled:
codeList.append(PFC_START_ASM + '; ')
pfcEnabled = True
# use multiple lfence instructions to make sure that the block is actually in the cache and not still in a fill buffer
codeList.append('lfence; ' * 25)
if addressList.flush:
for address in addresses:
codeList.append('clflush [R14 + ' + str(address) + ']; ' + afterEveryAcc)
else:
if len(addresses) == 1:
codeList.append('mov RCX, [R14 + ' + str(addresses[0]) + ']; ')
else:
if not tuple(addresses) in alreadyAddedOneTimeInits:
oneTimeInit.append(getPointerChasingInit(addresses))
alreadyAddedOneTimeInits.add(tuple(addresses))
codeList.append('lea RCX, [R14+' + str(addresses[0]) + ']; 1: mov RCX, [RCX]; ' + afterEveryAcc + 'jrcxz 2f; jmp 1b; 2: ')
if not isInit and not pfcEnabled:
codeList.append(PFC_START_ASM + '; ')
return ExperimentCode(''.join(code), ''.join(init), ''.join(oneTimeInit))
def getClearHLAddresses(level, cacheSetList, cBox, doNotUseOtherCBoxes, nClearAddresses=None):
lineSize = getCacheInfo(1).lineSize
if nClearAddresses is None:
nClearAddresses = 2 * sum(getCacheInfo(hLevel).assoc for hLevel in range(1, level))
if level == 1:
return []
elif (level == 2) or (level == 3 and (getCacheInfo(3).nSlices is None or doNotUseOtherCBoxes)):
nSets = getCacheInfo(level).nSets
if not all(nSets > getCacheInfo(lLevel).nSets for lLevel in range(1, level)):
raise ValueError('L' + str(level) + ' way size must be greater than lower level way sizes')
nHLSets = getCacheInfo(level-1).nSets
HLSets = set(cs % nHLSets for cs in cacheSetList)
addrForClearingHL = []
for HLSet in HLSets:
possibleSets = [cs for cs in range(HLSet, nSets, nHLSets) if cs not in cacheSetList]
if not possibleSets:
raise ValueError("not enough cache sets available for clearing higher levels")
addrForClearingHLSet = []
for setIndex in count(HLSet, nHLSets):
if not setIndex % nSets in possibleSets:
continue
addrForClearingHLSet.append(setIndex*lineSize)
if len(addrForClearingHLSet) >= nClearAddresses:
break
addrForClearingHL += addrForClearingHLSet
return addrForClearingHL
elif level == 3:
if not hasattr(getClearHLAddresses, 'clearL2Map'):
getClearHLAddresses.clearL2Map = dict()
clearL2Map = getClearHLAddresses.clearL2Map
if not cBox in clearL2Map:
clearL2Map[cBox] = dict()
clearAddresses = []
for L3Set in cacheSetList:
if not L3Set in clearL2Map[cBox] or len(clearL2Map[cBox][L3Set]) < nClearAddresses:
clearL2Map[cBox][L3Set] = getNewAddressesNotInCBox(nClearAddresses, cBox, L3Set, [])
clearAddresses += clearL2Map[cBox][L3Set][:nClearAddresses]
return clearAddresses
L3SetToWayIDMap = dict()
def getAddresses(level, wayID, cacheSetList, cBox=1, cSlice=0):
lineSize = getCacheInfo(1).lineSize
if level <= 2 or (level == 3 and getCacheInfo(3).nSlices is None):
nSets = getCacheInfo(level).nSets
waySize = getCacheInfo(level).waySize
return [(wayID*waySize) + s*lineSize for s in cacheSetList]
elif level == 3:
if not cBox in L3SetToWayIDMap:
L3SetToWayIDMap[cBox] = dict()
if not cSlice in L3SetToWayIDMap[cBox]:
L3SetToWayIDMap[cBox][cSlice] = dict()
addresses = []
for L3Set in cacheSetList:
if not L3Set in L3SetToWayIDMap[cBox][cSlice]:
L3SetToWayIDMap[cBox][cSlice][L3Set] = dict()
if getCacheInfo(3).nSlices != getNCBoxUnits():
for i, addr in enumerate(findMinimalL3EvictionSet(L3Set, cBox, cSlice)):
L3SetToWayIDMap[cBox][cSlice][L3Set][i] = addr
if not wayID in L3SetToWayIDMap[cBox][cSlice][L3Set]:
if getCacheInfo(3).nSlices == getNCBoxUnits():
L3SetToWayIDMap[cBox][cSlice][L3Set][wayID] = next(iter(getNewAddressesInCBox(1, cBox, L3Set, list(L3SetToWayIDMap[cBox][cSlice][L3Set].values()))))
else:
L3SetToWayIDMap[cBox][cSlice][L3Set][wayID] = next(iter(findCongruentL3Addresses(1, L3Set, cBox, list(L3SetToWayIDMap[cBox][cSlice][L3Set].values()))))
addresses.append(L3SetToWayIDMap[cBox][cSlice][L3Set][wayID])
return addresses
raise ValueError('invalid level')
# removes ?s and !s, and returns the part before the first '_'
def getBlockName(blockStr):
return re.sub('[?!]', '', blockStr.split('_')[0])
# removes ?s and !s, and returns the part after the last '_' (as int); returns None if there is no '_'
def getBlockSet(blockStr):
if not '_' in blockStr:
return None
return int(re.match('\d+', blockStr.split('_')[-1]).group())
def parseCacheSetsStr(level, clearHL, cacheSetsStr, doNotUseOtherCBoxes=False):
cacheSetList = []
if cacheSetsStr is not None:
for s in cacheSetsStr.split(','):
if '-' in s:
first, last = s.split('-')[:2]
cacheSetList += list(range(int(first), int(last)+1))
else:
cacheSetList.append(int(s))
else:
nSets = getCacheInfo(level).nSets
if level > 1 and clearHL and not (level == 3 and getCacheInfo(3).nSlices is not None and not doNotUseOtherCBoxes):
nHLSets = getCacheInfo(level-1).nSets
cacheSetList = list(range(nHLSets, nSets))
else:
cacheSetList = list(range(0, nSets))
return cacheSetList
def findCacheSetForCode(cacheSetList, level):
nSets = getCacheInfo(level).nSets
sortedCacheSetList = sorted(cacheSetList)
sortedCacheSetList += [sortedCacheSetList[0] + nSets]
maxDist = 1
bestSet = 0
for i in range(len(sortedCacheSetList)-1):
dist = sortedCacheSetList[i+1] - sortedCacheSetList[i]
if dist > maxDist:
maxDist = dist
bestSet = (sortedCacheSetList[i] + 1) % nSets
return bestSet
def getAllUsedCacheSets(cacheSetList, seq, initSeq=''):
cacheSetOverrideList = [s for s in set(map(getBlockSet, initSeq.split()+seq.split())) if s is not None]
if any(s in cacheSetList for s in cacheSetOverrideList):
raise ValueError('overridden cache sets must not also be in cacheSetList')
return sorted(set(cacheSetList + cacheSetOverrideList))
AddressList = namedtuple('AddressList', 'addresses exclude flush wbinvd')
def getCodeForCacheExperiment(level, seq, initSeq, cacheSetList, cBox, cSlice, clearHL, doNotUseOtherCBoxes, wbinvd, nClearAddresses=None):
allUsedSets = getAllUsedCacheSets(cacheSetList, seq, initSeq)
clearHLAddrList = None
if (clearHL and level > 1):
clearHLAddrList = AddressList(getClearHLAddresses(level, allUsedSets, cBox, doNotUseOtherCBoxes, nClearAddresses), True, False, False)
initAddressLists = []
seqAddressLists = []
nameToID = dict()
for seqString, addrLists in [(initSeq, initAddressLists), (seq, seqAddressLists)]:
for seqEl in seqString.split():
name = getBlockName(seqEl)
if name == '<wbinvd>':
addrLists.append(AddressList([], True, False, True))
continue
overrideSet = getBlockSet(seqEl)
wayID = nameToID.setdefault(name, len(nameToID))
exclude = not '?' in seqEl
flush = '!' in seqEl
s = [overrideSet] if overrideSet is not None else cacheSetList
addresses = getAddresses(level, wayID, s, cBox=cBox, cSlice=cSlice)
if clearHLAddrList is not None and not flush:
addrLists.append(clearHLAddrList)
addrLists.append(AddressList(addresses, exclude, flush, False))
log.debug('\nInitAddresses: ' + str(initAddressLists))
log.debug('\nSeqAddresses: ' + str(seqAddressLists))
return getCodeForAddressLists(seqAddressLists, initAddressLists, wbinvd)
def runCacheExperimentCode(code, initCode, oneTimeInitCode, loop, warmUpCount, codeOffset, nMeasurements, agg):
resetNanoBench()
setNanoBenchParameters(config=getDefaultCacheConfig(), msrConfig=getDefaultCacheMSRConfig(), fixedCounters=True, nMeasurements=nMeasurements, unrollCount=1,
loopCount=loop, warmUpCount=warmUpCount, aggregateFunction=agg, basicMode=True, noMem=True, codeOffset=codeOffset, verbose=None)
return runNanoBench(code=code, init=initCode, oneTimeInit=oneTimeInitCode)
# cacheSets=None means do access in all sets
# in this case, the first nL1Sets many sets of L2 will be reserved for clearing L1
# cSlice refers to the nth slice within a given cBox; the assigment of numbers to slices is arbitrary
# doNotUseOtherCBoxes determines whether accesses to clear higher levels will go to other CBoxes
# if wbinvd is set, wbinvd will be called before initSeq
def runCacheExperiment(level, seq, initSeq='', cacheSets=None, cBox=1, cSlice=0, clearHL=True, doNotUseOtherCBoxes=False, loop=1, wbinvd=False,
nMeasurements=10, warmUpCount=1, codeSet=None, agg='avg', nClearAddresses=None):
cacheSetList = parseCacheSetsStr(level, clearHL, cacheSets, doNotUseOtherCBoxes)
ec = getCodeForCacheExperiment(level, seq, initSeq=initSeq, cacheSetList=cacheSetList, cBox=cBox, cSlice=cSlice, clearHL=clearHL,
doNotUseOtherCBoxes=doNotUseOtherCBoxes, wbinvd=wbinvd, nClearAddresses=nClearAddresses)
log.debug('\nOneTimeInit: ' + ec.oneTimeInit)
log.debug('\nInit: ' + ec.init)
log.debug('\nCode: ' + ec.code)
lineSize = getCacheInfo(1).lineSize
allUsedSets = getAllUsedCacheSets(cacheSetList, seq, initSeq)
codeOffset = lineSize * (codeSet if codeSet is not None else findCacheSetForCode(allUsedSets, level))
return runCacheExperimentCode(ec.code, ec.init, ec.oneTimeInit, loop, warmUpCount, codeOffset, nMeasurements, agg)
def printNB(nb_result):
for r in nb_result.items():
print(r[0] + ': ' + str(r[1]))
def hasL3Conflicts(addresses, clearHLAddrList, codeOffset):
addrList = AddressList(addresses, False, False, False)
ec = getCodeForAddressLists([clearHLAddrList, addrList], initAddressLists=[addrList], wbinvd=True)
setNanoBenchParameters(config=getEventConfig('L3_HIT'), msrConfig='', nMeasurements=5, unrollCount=1, loopCount=100,
aggregateFunction='med', basicMode=True, noMem=True, codeOffset=codeOffset)
nb = runNanoBench(code=ec.code, init=ec.init, oneTimeInit=ec.oneTimeInit)
return (nb['L3_HIT'] < len(addresses) - .9)
def findMinimalL3EvictionSet(cacheSet, cBox, cSlice):
if not hasattr(findMinimalL3EvictionSet, 'evSetForCacheSet'):
findMinimalL3EvictionSet.evSetForCacheSet = dict()
if not cBox in findMinimalL3EvictionSet.evSetForCacheSet:
findMinimalL3EvictionSet.evSetForCacheSet[cBox] = dict()
if not cSlice in findMinimalL3EvictionSet.evSetForCacheSet[cBox]:
findMinimalL3EvictionSet.evSetForCacheSet[cBox][cSlice] = dict()
if cacheSet in findMinimalL3EvictionSet.evSetForCacheSet[cBox][cSlice]:
return findMinimalL3EvictionSet.evSetForCacheSet[cBox][cSlice][cacheSet]
evSetsForOtherSlices = [findMinimalL3EvictionSet(cacheSet, cBox, s) for s in range(0, cSlice)]
lineSize = getCacheInfo(1).lineSize
L3Assoc = getCacheInfo(3).assoc
L3WaySize = getCacheInfo(3).waySize
clearHLAddrList = AddressList(getClearHLAddresses(3, [cacheSet], cBox, False), True, False, False)
codeOffset = lineSize * (cacheSet+10)
addresses = []
for curAddr in count(cacheSet * lineSize, L3WaySize):
if any(curAddr in otherEvSet for otherEvSet in evSetsForOtherSlices): continue
if not getCBoxOfAddress(curAddr) == cBox: continue
if any(hasL3Conflicts(otherEvSet[:-1]+[curAddr], clearHLAddrList, codeOffset) for otherEvSet in evSetsForOtherSlices): continue
addresses.append(curAddr)
if len(addresses) > L3Assoc and hasL3Conflicts(addresses, clearHLAddrList, codeOffset):
break
for i in reversed(range(0, len(addresses))):
if len(addresses) <= L3Assoc+1:
break
tmpAddresses = addresses[:i] + addresses[(i+1):]
if hasL3Conflicts(tmpAddresses, clearHLAddrList, codeOffset):
addresses = tmpAddresses
findMinimalL3EvictionSet.evSetForCacheSet[cBox][cSlice][cacheSet] = addresses
return addresses
def findCongruentL3Addresses(n, cacheSet, cBox, L3EvictionSet):
clearHLAddrList = AddressList(getClearHLAddresses(3, [cacheSet], cBox, False), True, False, False)
codeOffset = getCacheInfo(1).lineSize * (cacheSet+10)
L3WaySize = getCacheInfo(3).waySize
congrAddresses = []
for newAddr in count(max(L3EvictionSet)+L3WaySize, L3WaySize):
if not getCBoxOfAddress(newAddr) == cBox: continue
tmpAddresses = L3EvictionSet[:getCacheInfo(3).assoc] + [newAddr]
if hasL3Conflicts(tmpAddresses, clearHLAddrList, codeOffset):
congrAddresses.append(newAddr)
if len(congrAddresses) >= n: break
return congrAddresses
def findMaximalNonEvictingL3SetInCBox(start, stride, L3Assoc, cBox):
clearHLAddresses = []
addresses = []
curAddress = start
while len(clearHLAddresses) < 2*(getCacheInfo(1).assoc+getCacheInfo(2).assoc):
if getCBoxOfAddress(curAddress) != cBox:
clearHLAddresses.append(curAddress)
curAddress += stride
clearHLAddrList = AddressList(clearHLAddresses, True, False, False)
curAddress = start
while len(addresses) < L3Assoc:
if getCBoxOfAddress(curAddress) == cBox:
addresses.append(curAddress)
curAddress += stride
notAdded = 0
while notAdded < L3Assoc:
curAddress += stride
if not getCBoxOfAddress(curAddress) == cBox:
continue
newAddresses = addresses + [curAddress]
if not hasL3Conflicts(newAddresses, clearHLAddrList, start+getCacheInfo(1).lineSize):
addresses = newAddresses
notAdded = 0
else:
notAdded += 1
return addresses
def getUnusedBlockNames(n, usedBlockNames, prefix=''):
newBlockNames = []
i = 0
while len(newBlockNames) < n:
name = prefix + str(i)
if not name in usedBlockNames: newBlockNames.append(name)
i += 1
return newBlockNames
# Returns a dict with the age of each block, i.e., how many fresh blocks need to be accessed until the block is evicted
# if returnNbResults is True, the function returns additionally all measurment results (as the second component of a tuple)
def getAgesOfBlocks(blocks, level, seq, initSeq='', maxAge=None, cacheSets=None, cBox=1, cSlice=0, clearHL=True, wbinvd=False, returnNbResults=False, nMeasurements=10, agg='avg'):
ages = dict()
if returnNbResults: nbResults = dict()
if maxAge is None:
maxAge = 2*getCacheInfo(level).assoc
nSets = len(parseCacheSetsStr(level, clearHL, cacheSets))
for block in blocks:
if returnNbResults: nbResults[block] = []
for nNewBlocks in range(0, maxAge+1):
curSeq = seq.replace('?', '') + ' '
newBlocks = getUnusedBlockNames(nNewBlocks, seq+initSeq, 'N')
curSeq += ' '.join(newBlocks) + ' ' + block + '?'
nb = runCacheExperiment(level, curSeq, initSeq=initSeq, cacheSets=cacheSets, cBox=cBox, cSlice=cSlice, clearHL=clearHL, loop=0, wbinvd=wbinvd,
nMeasurements=nMeasurements, agg=agg)
if returnNbResults: nbResults[block].append(nb)
hitEvent = 'L' + str(level) + '_HIT'
missEvent = 'L' + str(level) + '_MISS'
if hitEvent in nb:
if isClose(nb[hitEvent], 0.0, abs_tol=0.1):
if not block in ages:
ages[block] = nNewBlocks
#if not returnNbResults:
#break
elif missEvent in nb:
if nb[missEvent] > nSets - 0.1:
if not block in ages:
ages[block] = nNewBlocks
#if not returnNbResults:
#break
else:
raise ValueError('no cache results available')
if not block in ages:
ages[block] = -1
if returnNbResults:
return (ages, nbResults)
else:
return ages