nanoBench/tools/CacheAnalyzer/cacheLib.py

from itertools import count
from collections import namedtuple

import math
import random
import re
import subprocess
import sys

sys.path.append('../..')
from kernelNanoBench import *

sys.path.append('../CPUID')
import cpuid

import logging
log = logging.getLogger(__name__)


def getEventConfig(event):
   arch = getArch()
   if event == 'L1_HIT':
      if arch in ['Core', 'EnhancedCore']: return '40.0E ' + event # L1D_CACHE_LD.MES
      if arch in ['NHM', 'WSM']: return 'CB.01 ' + event
      if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'TGL', 'ADL-P', 'GLM', 'GLP']: return 'D1.01 ' + event
   if event == 'L1_MISS':
      if arch in ['Core', 'EnhancedCore']: return 'CB.01.CTR=0 ' + event
      if arch in ['IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'TGL', 'ADL-P', 'GLM', 'GLP']: return 'D1.08 ' + event
      if arch in ['ZEN+']: return '064.70 ' + event
   if event == 'L2_HIT':
      if arch in ['Core', 'EnhancedCore']: return '29.7E ' + event # L2_LD.THIS_CORE.ALL_INCL.MES
      if arch in ['NHM', 'WSM']: return 'CB.02 ' + event
      if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'TGL', 'ADL-P', 'GLM', 'GLP']: return 'D1.02 ' + event
      if arch in ['ZEN+']: return '064.70 ' + event
   if event == 'L2_MISS':
      if arch in ['Core', 'EnhancedCore']: return 'CB.04.CTR=0 ' + event
      if arch in ['IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'TGL', 'ADL-P', 'GLM', 'GLP']: return 'D1.10 ' + event
      if arch in ['ZEN+']: return '064.08 ' + event
   if event == 'L3_HIT':
      if arch in ['NHM', 'WSM']: return 'CB.04 ' + event
      if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'TGL', 'ADL-P']: return 'D1.04 ' + event
   if event == 'L3_MISS':
      if arch in ['NHM', 'WSM']: return 'CB.10 ' + event
      if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'TGL', 'ADL-P']: return 'D1.20 ' + event
   return ''

def getDefaultCacheConfig():
   return '\n'.join(filter(None, [getEventConfig('L' + str(l) + '_' + hm) for l in range(1,4) for hm in ['HIT', 'MISS']]))


def getDefaultCacheMSRConfig():
   if 'Intel' in getCPUVendor() and 'L3' in getCpuidCacheInfo() and getCpuidCacheInfo()['L3']['complex']:
      if getArch() in ['ADL-P']:
         MSR_UNC_PERF_GLOBAL_CTRL = 0x2FF0
         MSR_UNC_CBO_0_PERFEVTSEL0 = 0x2000
         MSR_UNC_CBO_0_PERFCTR0 = 0x2002
         dist = 8
      elif getArch() in ['CNL', 'ICL', 'TGL']:
         MSR_UNC_PERF_GLOBAL_CTRL = 0xE01
         MSR_UNC_CBO_0_PERFEVTSEL0 = 0x700
         MSR_UNC_CBO_0_PERFCTR0 = 0x702
         dist = 8
      else:
         MSR_UNC_PERF_GLOBAL_CTRL = 0xE01
         MSR_UNC_CBO_0_PERFEVTSEL0 = 0x700
         MSR_UNC_CBO_0_PERFCTR0 = 0x706
         dist = 16

      return '\n'.join('msr_' + format(MSR_UNC_PERF_GLOBAL_CTRL, '#x') + '=0x20000000' +
                       '.msr_' + format(MSR_UNC_CBO_0_PERFEVTSEL0 + dist*cbo, '#x') + '=0x408F34' +
                       ' msr_' + format(MSR_UNC_CBO_0_PERFCTR0 + dist*cbo, '#x') +
                       ' CACHE_LOOKUP_CBO_' + str(cbo)
                       for cbo in range(0, getNCBoxUnits()))
   return ''


def isClose(a, b, rel_tol=1e-09, abs_tol=0.0):
    return abs(a-b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol)


class CacheInfo:
   def __init__(self, level, assoc, lineSize, nSets, nSlices=None, nCboxes=None):
      self.level = level
      self.assoc = assoc
      self.lineSize = lineSize
      self.nSets = nSets
      self.waySize = lineSize * nSets
      self.size = self.waySize * assoc * (nSlices if nSlices is not None else 1)
      self.nSlices = nSlices
      self.nCboxes = nCboxes

   def __str__(self):
      return '\n'.join(['L' + str(self.level) + ':',
                        '  Size: ' + str(self.size//1024) + ' kB',
                        '  Associativity: ' + str(self.assoc),
                        '  Line Size: ' + str(self.lineSize) + ' B',
                        '  Number of sets' + (' (per slice)' if self.nSlices is not None else '') + ': ' + str(self.nSets),
                        '  Way size' + (' (per slice)' if self.nSlices is not None else '') + ': ' + str(self.waySize//1024) + ' kB',
                       ('  Number of CBoxes: ' + str(self.nCboxes) if self.nCboxes is not None else ''),
                       ('  Number of slices: ' + str(self.nSlices) if self.nSlices is not None else '')])


def getArch():
   if not hasattr(getArch, 'arch'):
      cpu = cpuid.CPUID()
      getArch.arch = cpuid.micro_arch(cpu)
   return getArch.arch

def getCPUVendor():
   if not hasattr(getCPUVendor, 'vendor'):
      cpu = cpuid.CPUID()
      getCPUVendor.vendor = cpuid.cpu_vendor(cpu)
   return getCPUVendor.vendor

def getCpuidCacheInfo():
   if not hasattr(getCpuidCacheInfo, 'cpuidCacheInfo'):
      cpu = cpuid.CPUID()
      log.debug(cpuid.get_basic_info(cpu))
      getCpuidCacheInfo.cpuidCacheInfo = cpuid.get_cache_info(cpu)

      if not len(set(c['lineSize'] for c in getCpuidCacheInfo.cpuidCacheInfo.values())) == 1:
         raise ValueError('All line sizes must be the same')
   return getCpuidCacheInfo.cpuidCacheInfo


def getCacheInfo(level):
   if level == 1:
      if not hasattr(getCacheInfo, 'L1CacheInfo'):
         cpuidInfo = getCpuidCacheInfo()['L1D']
         getCacheInfo.L1CacheInfo = CacheInfo(1, cpuidInfo['assoc'], cpuidInfo['lineSize'], cpuidInfo['nSets'])
      return getCacheInfo.L1CacheInfo
   elif level == 2:
      if not hasattr(getCacheInfo, 'L2CacheInfo'):
         cpuidInfo = getCpuidCacheInfo()['L2']
         getCacheInfo.L2CacheInfo = CacheInfo(2, cpuidInfo['assoc'], cpuidInfo['lineSize'], cpuidInfo['nSets'])
      return getCacheInfo.L2CacheInfo
   elif level == 3:
      if not hasattr(getCacheInfo, 'L3CacheInfo'):
         if not 'L3' in getCpuidCacheInfo():
            raise ValueError('invalid level')
         cpuidInfo = getCpuidCacheInfo()['L3']
         if not 'complex' in cpuidInfo or not cpuidInfo['complex']:
            getCacheInfo.L3CacheInfo = CacheInfo(3, cpuidInfo['assoc'], cpuidInfo['lineSize'], cpuidInfo['nSets'])
         else:
            lineSize = cpuidInfo['lineSize']
            assoc = cpuidInfo['assoc']
            nSets = cpuidInfo['nSets']

            stride = 2**((lineSize*nSets//getNCBoxUnits())-1).bit_length() # smallest power of two larger than lineSize*nSets/nCBoxUnits
            ms = findMaximalNonEvictingL3SetInCBox(0, stride, assoc, 0)
            log.debug('Maximal non-evicting L3 set: ' + str(len(ms)) + ' ' + str(ms))
            nCboxes = getNCBoxUnits()
            nSlices = nCboxes * int(math.ceil(float(len(ms))/assoc))

            getCacheInfo.L3CacheInfo = CacheInfo(3, assoc, lineSize, nSets//nSlices, nSlices, nCboxes)
      return getCacheInfo.L3CacheInfo
   else:
      raise ValueError('invalid level')


def getNCBoxUnits():
   if not hasattr(getNCBoxUnits, 'nCBoxUnits'):
      try:
         subprocess.check_output(['modprobe', 'msr'])
         cbo_config = subprocess.check_output(['rdmsr', '0x396', '-f', '3:0'])
         if getArch() in ['CNL', 'ICL', 'TGL', 'ADL-P']:
            getNCBoxUnits.nCBoxUnits = int(cbo_config)
         else:
            getNCBoxUnits.nCBoxUnits = int(cbo_config) - 1
         log.debug('Number of CBox Units: ' + str(getNCBoxUnits.nCBoxUnits))
      except subprocess.CalledProcessError as e:
         log.critical('Error: ' + e.output)
         sys.exit()
      except OSError as e:
         log.critical("rdmsr not found. Try 'sudo apt install msr-tools'")
         sys.exit()
   return getNCBoxUnits.nCBoxUnits


def getCBoxOfAddress(address):
   if not hasattr(getCBoxOfAddress, 'cBoxMap'):
      getCBoxOfAddress.cBoxMap = dict()
   cBoxMap = getCBoxOfAddress.cBoxMap

   if not address in cBoxMap:
      setNanoBenchParameters(config='', msrConfig=getDefaultCacheMSRConfig(), nMeasurements=10, unrollCount=1, loopCount=10, aggregateFunction='min',
                             basicMode=True, noMem=True)

      ec = getCodeForAddressLists([AddressList([address], False, True, False)])
      nb = runNanoBench(code=ec.code, oneTimeInit=ec.oneTimeInit)

      nCacheLookups = [nb['CACHE_LOOKUP_CBO_'+str(cBox)] for cBox in range(0, getNCBoxUnits())]
      cBoxMap[address] = nCacheLookups.index(max(nCacheLookups))

   return cBoxMap[address]


def getNewAddressesInCBox(n, cBox, cacheSet, prevAddresses, notInCBox=False):
   if not prevAddresses:
      maxPrevAddress = cacheSet * getCacheInfo(3).lineSize
   else:
      maxPrevAddress = max(prevAddresses)
   addresses = []
   for addr in count(maxPrevAddress+getCacheInfo(3).waySize, getCacheInfo(3).waySize):
      if not notInCBox and getCBoxOfAddress(addr) == cBox:
         addresses.append(addr)
      if notInCBox and getCBoxOfAddress(addr) != cBox:
         addresses.append(addr)
      if len(addresses) >= n:
         return addresses


def getNewAddressesNotInCBox(n, cBox, cacheSet, prevAddresses):
   return getNewAddressesInCBox(n, cBox, cacheSet, prevAddresses, notInCBox=True)


pointerChasingInits = dict()

#addresses must not contain duplicates
def getPointerChasingInit(addresses):
   if tuple(addresses) in pointerChasingInits:
      return pointerChasingInits[tuple(addresses)]

   #addresses_tail = addresses[1:]
   #random.shuffle(addresses_tail)
   #adresses = [addresses[0]] + addresses_tail

   init = 'lea RAX, [R14+' + str(addresses[0]) + ']; '
   init += 'mov RBX, RAX; '

   i = 0
   while i < len(addresses)-1:
      stride = addresses[i+1] - addresses[i]
      init += '1: add RBX, ' + str(stride) + '; '
      init += 'mov [RAX], RBX; '
      init += 'mov RAX, RBX; '

      i += 1
      oldI = i

      while i < len(addresses)-1 and (addresses[i+1] - addresses[i]) == stride:
         i += 1

      if oldI != i:
         init += 'lea RCX, [R14+' + str(addresses[i]) + ']; '
         init += 'cmp RAX, RCX; '
         init += 'jne 1b; '

   init += 'mov qword ptr [R14 + ' + str(addresses[-1]) + '], 0; '
   pointerChasingInits[tuple(addresses)] = init
   return init


ExperimentCode = namedtuple('ExperimentCode', 'code init oneTimeInit')

def getCodeForAddressLists(codeAddressLists, initAddressLists=[], wbinvd=False, afterEveryAcc=''):
   distinctAddrLists = set(tuple(l.addresses) for l in initAddressLists+codeAddressLists)
   if len(distinctAddrLists) > 1 and set.intersection(*list(set(l) for l in distinctAddrLists)):
      raise ValueError('same address in different lists')

   code = []
   init = (['wbinvd; '] if wbinvd else [])
   oneTimeInit = []

   r14Size = getR14Size()
   alreadyAddedOneTimeInits = set()

   for addressLists, codeList, isInit in [(initAddressLists, init, True), (codeAddressLists, code, False)]:
      if addressLists is None: continue

      pfcEnabled = True
      for addressList in addressLists:
         if addressList.wbinvd:
            if addressList.exclude and pfcEnabled:
               codeList.append(PFC_STOP_ASM + '; ')
            codeList.append('wbinvd; ')
            if addressList.exclude and pfcEnabled:
               codeList.append(PFC_START_ASM + '; ')
            continue

         addresses = addressList.addresses
         if len(addresses) < 1: continue

         if any(addr >= r14Size for addr in addresses):
            sys.stderr.write('Size of memory area too small. Try increasing it with set-R14-size.sh.\n')
            exit(1)

         if not isInit:
            if addressList.exclude and pfcEnabled:
               codeList.append(PFC_STOP_ASM + '; ')
               pfcEnabled = False
            elif not addressList.exclude and not pfcEnabled:
               codeList.append(PFC_START_ASM + '; ')
               pfcEnabled = True

         # use multiple lfence instructions to make sure that the block is actually in the cache and not still in a fill buffer
         codeList.append('lfence; ' * 25)

         if addressList.flush:
            for address in addresses:
               codeList.append('clflush [R14 + ' + str(address) + ']; ' + afterEveryAcc)
         else:
            if len(addresses) == 1:
               codeList.append('mov RCX, [R14 + ' + str(addresses[0]) + ']; ')
            else:
               if not tuple(addresses) in alreadyAddedOneTimeInits:
                  oneTimeInit.append(getPointerChasingInit(addresses))
                  alreadyAddedOneTimeInits.add(tuple(addresses))

               codeList.append('lea RCX, [R14+' + str(addresses[0]) + ']; 1: mov RCX, [RCX]; ' + afterEveryAcc + 'jrcxz 2f; jmp 1b; 2: ')

      if not isInit and not pfcEnabled:
         codeList.append(PFC_START_ASM + '; ')

   return ExperimentCode(''.join(code), ''.join(init), ''.join(oneTimeInit))


def getClearHLAddresses(level, cacheSetList, cBox, doNotUseOtherCBoxes, nClearAddresses=None):
   lineSize = getCacheInfo(1).lineSize

   if nClearAddresses is None:
      nClearAddresses = 2 * sum(getCacheInfo(hLevel).assoc for hLevel in range(1, level))

   if level == 1:
      return []
   elif (level == 2) or (level == 3 and (getCacheInfo(3).nSlices is None or doNotUseOtherCBoxes)):
      nSets = getCacheInfo(level).nSets
      if not all(nSets > getCacheInfo(lLevel).nSets for lLevel in range(1, level)):
         raise ValueError('L' + str(level) + ' way size must be greater than lower level way sizes')

      nHLSets = getCacheInfo(level-1).nSets
      HLSets = set(cs % nHLSets for cs in cacheSetList)
      addrForClearingHL = []

      for HLSet in HLSets:
         possibleSets = [cs for cs in range(HLSet, nSets, nHLSets) if cs not in cacheSetList]
         if not possibleSets:
            raise ValueError("not enough cache sets available for clearing higher levels")

         addrForClearingHLSet = []

         for setIndex in count(HLSet, nHLSets):
            if not setIndex % nSets in possibleSets:
               continue
            addrForClearingHLSet.append(setIndex*lineSize)
            if len(addrForClearingHLSet) >= nClearAddresses:
               break

         addrForClearingHL += addrForClearingHLSet

      return addrForClearingHL
   elif level == 3:
      if not hasattr(getClearHLAddresses, 'clearL2Map'):
         getClearHLAddresses.clearL2Map = dict()
      clearL2Map = getClearHLAddresses.clearL2Map

      if not cBox in clearL2Map:
         clearL2Map[cBox] = dict()

      clearAddresses = []
      for L3Set in cacheSetList:
         if not L3Set in clearL2Map[cBox] or len(clearL2Map[cBox][L3Set]) < nClearAddresses:
            clearL2Map[cBox][L3Set] = getNewAddressesNotInCBox(nClearAddresses, cBox, L3Set, [])
         clearAddresses += clearL2Map[cBox][L3Set][:nClearAddresses]

      return clearAddresses

L3SetToWayIDMap = dict()
def getAddresses(level, wayID, cacheSetList, cBox=1, cSlice=0):
   lineSize = getCacheInfo(1).lineSize

   if level <= 2 or (level == 3 and getCacheInfo(3).nSlices is None):
      nSets = getCacheInfo(level).nSets
      waySize = getCacheInfo(level).waySize
      return [(wayID*waySize) + s*lineSize for s in cacheSetList]
   elif level == 3:
      if not cBox in L3SetToWayIDMap:
         L3SetToWayIDMap[cBox] = dict()
      if not cSlice in L3SetToWayIDMap[cBox]:
         L3SetToWayIDMap[cBox][cSlice] = dict()

      addresses = []
      for L3Set in cacheSetList:
         if not L3Set in L3SetToWayIDMap[cBox][cSlice]:
            L3SetToWayIDMap[cBox][cSlice][L3Set] = dict()
            if getCacheInfo(3).nSlices != getNCBoxUnits():
               for i, addr in enumerate(findMinimalL3EvictionSet(L3Set, cBox, cSlice)):
                  L3SetToWayIDMap[cBox][cSlice][L3Set][i] = addr
         if not wayID in L3SetToWayIDMap[cBox][cSlice][L3Set]:
            if getCacheInfo(3).nSlices == getNCBoxUnits():
               L3SetToWayIDMap[cBox][cSlice][L3Set][wayID] = next(iter(getNewAddressesInCBox(1, cBox, L3Set, list(L3SetToWayIDMap[cBox][cSlice][L3Set].values()))))
            else:
               L3SetToWayIDMap[cBox][cSlice][L3Set][wayID] = next(iter(findCongruentL3Addresses(1, L3Set, cBox, list(L3SetToWayIDMap[cBox][cSlice][L3Set].values()))))
         addresses.append(L3SetToWayIDMap[cBox][cSlice][L3Set][wayID])

      return addresses

   raise ValueError('invalid level')


# removes ?s and !s, and returns the part before the first '_'
def getBlockName(blockStr):
   return re.sub('[?!]', '', blockStr.split('_')[0])


# removes ?s and !s, and returns the part after the last '_' (as int); returns None if there is no '_'
def getBlockSet(blockStr):
   if not '_' in blockStr:
      return None
   return int(re.match('\d+', blockStr.split('_')[-1]).group())


def parseCacheSetsStr(level, clearHL, cacheSetsStr, doNotUseOtherCBoxes=False):
   cacheSetList = []
   if cacheSetsStr is not None:
      for s in cacheSetsStr.split(','):
         if '-' in s:
            first, last = s.split('-')[:2]
            cacheSetList += list(range(int(first), int(last)+1))
         else:
            cacheSetList.append(int(s))
   else:
      nSets = getCacheInfo(level).nSets
      if level > 1 and clearHL and not (level == 3 and getCacheInfo(3).nSlices is not None and not doNotUseOtherCBoxes):
         nHLSets = getCacheInfo(level-1).nSets
         cacheSetList = list(range(nHLSets, nSets))
      else:
         cacheSetList = list(range(0, nSets))
   return cacheSetList


def findCacheSetForCode(cacheSetList, level):
   nSets = getCacheInfo(level).nSets
   sortedCacheSetList = sorted(cacheSetList)
   sortedCacheSetList += [sortedCacheSetList[0] + nSets]

   maxDist = 1
   bestSet = 0
   for i in range(len(sortedCacheSetList)-1):
      dist = sortedCacheSetList[i+1] - sortedCacheSetList[i]
      if dist > maxDist:
         maxDist = dist
         bestSet = (sortedCacheSetList[i] + 1) % nSets

   return bestSet


def getAllUsedCacheSets(cacheSetList, seq, initSeq=''):
   cacheSetOverrideList = [s for s in set(map(getBlockSet, initSeq.split()+seq.split())) if s is not None]
   if any(s in cacheSetList for s in cacheSetOverrideList):
      raise ValueError('overridden cache sets must not also be in cacheSetList')
   return sorted(set(cacheSetList + cacheSetOverrideList))

AddressList = namedtuple('AddressList', 'addresses exclude flush wbinvd')

def getCodeForCacheExperiment(level, seq, initSeq, cacheSetList, cBox, cSlice, clearHL, doNotUseOtherCBoxes, wbinvd, nClearAddresses=None):
   allUsedSets = getAllUsedCacheSets(cacheSetList, seq, initSeq)

   clearHLAddrList = None
   if (clearHL and level > 1):
      clearHLAddrList = AddressList(getClearHLAddresses(level, allUsedSets, cBox, doNotUseOtherCBoxes, nClearAddresses), True, False, False)

   initAddressLists = []
   seqAddressLists = []
   nameToID = dict()

   for seqString, addrLists in [(initSeq, initAddressLists), (seq, seqAddressLists)]:
      for seqEl in seqString.split():
         name = getBlockName(seqEl)
         if name == '<wbinvd>':
            addrLists.append(AddressList([], True, False, True))
            continue

         overrideSet = getBlockSet(seqEl)

         wayID = nameToID.setdefault(name, len(nameToID))
         exclude = not '?' in seqEl
         flush = '!' in seqEl

         s = [overrideSet] if overrideSet is not None else cacheSetList
         addresses = getAddresses(level, wayID, s, cBox=cBox, cSlice=cSlice)

         if clearHLAddrList is not None and not flush:
            addrLists.append(clearHLAddrList)
         addrLists.append(AddressList(addresses, exclude, flush, False))

   log.debug('\nInitAddresses: ' + str(initAddressLists))
   log.debug('\nSeqAddresses: ' + str(seqAddressLists))

   return getCodeForAddressLists(seqAddressLists, initAddressLists, wbinvd)


def runCacheExperimentCode(code, initCode, oneTimeInitCode, loop, warmUpCount, codeOffset, nMeasurements, agg):
   resetNanoBench()
   setNanoBenchParameters(config=getDefaultCacheConfig(), msrConfig=getDefaultCacheMSRConfig(), fixedCounters=True, nMeasurements=nMeasurements, unrollCount=1,
                          loopCount=loop, warmUpCount=warmUpCount, aggregateFunction=agg, basicMode=True, noMem=True, codeOffset=codeOffset, verbose=None)
   return runNanoBench(code=code, init=initCode, oneTimeInit=oneTimeInitCode)


# cacheSets=None means do access in all sets
# in this case, the first nL1Sets many sets of L2 will be reserved for clearing L1
# cSlice refers to the nth slice within a given cBox; the assigment of numbers to slices is arbitrary
# doNotUseOtherCBoxes determines whether accesses to clear higher levels will go to other CBoxes
# if wbinvd is set, wbinvd will be called before initSeq
def runCacheExperiment(level, seq, initSeq='', cacheSets=None, cBox=1, cSlice=0, clearHL=True, doNotUseOtherCBoxes=False, loop=1, wbinvd=False,
                       nMeasurements=10, warmUpCount=1, codeSet=None, agg='avg', nClearAddresses=None):
   cacheSetList = parseCacheSetsStr(level, clearHL, cacheSets, doNotUseOtherCBoxes)
   ec = getCodeForCacheExperiment(level, seq, initSeq=initSeq, cacheSetList=cacheSetList, cBox=cBox, cSlice=cSlice, clearHL=clearHL,
                                  doNotUseOtherCBoxes=doNotUseOtherCBoxes, wbinvd=wbinvd, nClearAddresses=nClearAddresses)

   log.debug('\nOneTimeInit: ' + ec.oneTimeInit)
   log.debug('\nInit: ' + ec.init)
   log.debug('\nCode: ' + ec.code)

   lineSize = getCacheInfo(1).lineSize
   allUsedSets = getAllUsedCacheSets(cacheSetList, seq, initSeq)
   codeOffset = lineSize * (codeSet if codeSet is not None else findCacheSetForCode(allUsedSets, level))

   return runCacheExperimentCode(ec.code, ec.init, ec.oneTimeInit, loop, warmUpCount, codeOffset, nMeasurements, agg)


def printNB(nb_result):
   for r in nb_result.items():
      print(r[0] + ': ' + str(r[1]))


def hasL3Conflicts(addresses, clearHLAddrList, codeOffset):
   addrList = AddressList(addresses, False, False, False)
   ec = getCodeForAddressLists([clearHLAddrList, addrList], initAddressLists=[addrList], wbinvd=True)
   setNanoBenchParameters(config=getEventConfig('L3_HIT'), msrConfig='', nMeasurements=5, unrollCount=1, loopCount=100,
                          aggregateFunction='med', basicMode=True, noMem=True, codeOffset=codeOffset)
   nb = runNanoBench(code=ec.code, init=ec.init, oneTimeInit=ec.oneTimeInit)

   return (nb['L3_HIT'] < len(addresses) - .9)


def findMinimalL3EvictionSet(cacheSet, cBox, cSlice):
   if not hasattr(findMinimalL3EvictionSet, 'evSetForCacheSet'):
      findMinimalL3EvictionSet.evSetForCacheSet = dict()
   if not cBox in findMinimalL3EvictionSet.evSetForCacheSet:
      findMinimalL3EvictionSet.evSetForCacheSet[cBox] = dict()
   if not cSlice in findMinimalL3EvictionSet.evSetForCacheSet[cBox]:
      findMinimalL3EvictionSet.evSetForCacheSet[cBox][cSlice] = dict()

   if cacheSet in findMinimalL3EvictionSet.evSetForCacheSet[cBox][cSlice]:
      return findMinimalL3EvictionSet.evSetForCacheSet[cBox][cSlice][cacheSet]

   evSetsForOtherSlices = [findMinimalL3EvictionSet(cacheSet, cBox, s) for s in range(0, cSlice)]

   lineSize = getCacheInfo(1).lineSize
   L3Assoc = getCacheInfo(3).assoc
   L3WaySize = getCacheInfo(3).waySize

   clearHLAddrList = AddressList(getClearHLAddresses(3, [cacheSet], cBox, False), True, False, False)
   codeOffset = lineSize * (cacheSet+10)

   addresses = []
   for curAddr in count(cacheSet * lineSize, L3WaySize):
      if any(curAddr in otherEvSet for otherEvSet in evSetsForOtherSlices): continue
      if not getCBoxOfAddress(curAddr) == cBox: continue
      if any(hasL3Conflicts(otherEvSet[:-1]+[curAddr], clearHLAddrList, codeOffset) for otherEvSet in evSetsForOtherSlices): continue

      addresses.append(curAddr)
      if len(addresses) > L3Assoc and hasL3Conflicts(addresses, clearHLAddrList, codeOffset):
         break

   for i in reversed(range(0, len(addresses))):
      if len(addresses) <= L3Assoc+1:
            break
      tmpAddresses = addresses[:i] + addresses[(i+1):]
      if hasL3Conflicts(tmpAddresses, clearHLAddrList, codeOffset):
         addresses = tmpAddresses

   findMinimalL3EvictionSet.evSetForCacheSet[cBox][cSlice][cacheSet] = addresses
   return addresses


def findCongruentL3Addresses(n, cacheSet, cBox, L3EvictionSet):
   clearHLAddrList = AddressList(getClearHLAddresses(3, [cacheSet], cBox, False), True, False, False)
   codeOffset = getCacheInfo(1).lineSize * (cacheSet+10)
   L3WaySize = getCacheInfo(3).waySize

   congrAddresses = []
   for newAddr in count(max(L3EvictionSet)+L3WaySize, L3WaySize):
      if not getCBoxOfAddress(newAddr) == cBox: continue

      tmpAddresses = L3EvictionSet[:getCacheInfo(3).assoc] + [newAddr]

      if hasL3Conflicts(tmpAddresses, clearHLAddrList, codeOffset):
         congrAddresses.append(newAddr)

      if len(congrAddresses) >= n: break

   return congrAddresses


def findMaximalNonEvictingL3SetInCBox(start, stride, L3Assoc, cBox):
   clearHLAddresses = []
   addresses = []

   curAddress = start
   while len(clearHLAddresses) < 2*(getCacheInfo(1).assoc+getCacheInfo(2).assoc):
      if getCBoxOfAddress(curAddress) != cBox:
         clearHLAddresses.append(curAddress)
      curAddress += stride
   clearHLAddrList = AddressList(clearHLAddresses, True, False, False)

   curAddress = start
   while len(addresses) < L3Assoc:
      if getCBoxOfAddress(curAddress) == cBox:
         addresses.append(curAddress)
      curAddress += stride

   notAdded = 0
   while notAdded < L3Assoc:
      curAddress += stride

      if not getCBoxOfAddress(curAddress) == cBox:
         continue

      newAddresses = addresses + [curAddress]

      if not hasL3Conflicts(newAddresses, clearHLAddrList, start+getCacheInfo(1).lineSize):
         addresses = newAddresses
         notAdded = 0
      else:
         notAdded += 1

   return addresses


def getUnusedBlockNames(n, usedBlockNames, prefix=''):
   newBlockNames = []
   i = 0
   while len(newBlockNames) < n:
      name = prefix + str(i)
      if not name in usedBlockNames: newBlockNames.append(name)
      i += 1
   return newBlockNames


# Returns a dict with the age of each block, i.e., how many fresh blocks need to be accessed until the block is evicted
# if returnNbResults is True, the function returns additionally all measurment results (as the second component of a tuple)
def getAgesOfBlocks(blocks, level, seq, initSeq='', maxAge=None, cacheSets=None, cBox=1, cSlice=0, clearHL=True, wbinvd=False, returnNbResults=False, nMeasurements=10, agg='avg'):
   ages = dict()
   if returnNbResults: nbResults = dict()

   if maxAge is None:
      maxAge = 2*getCacheInfo(level).assoc

   nSets = len(parseCacheSetsStr(level, clearHL, cacheSets))

   for block in blocks:
      if returnNbResults: nbResults[block] = []

      for nNewBlocks in range(0, maxAge+1):
         curSeq = seq.replace('?', '') + ' '
         newBlocks = getUnusedBlockNames(nNewBlocks, seq+initSeq, 'N')
         curSeq += ' '.join(newBlocks) + ' ' + block + '?'

         nb = runCacheExperiment(level, curSeq, initSeq=initSeq, cacheSets=cacheSets, cBox=cBox, cSlice=cSlice, clearHL=clearHL, loop=0, wbinvd=wbinvd,
                                 nMeasurements=nMeasurements, agg=agg)
         if returnNbResults: nbResults[block].append(nb)

         hitEvent = 'L' + str(level) + '_HIT'
         missEvent = 'L' + str(level) + '_MISS'

         if hitEvent in nb:
            if isClose(nb[hitEvent], 0.0, abs_tol=0.1):
               if not block in ages:
                  ages[block] = nNewBlocks
               #if not returnNbResults:
               #break
         elif missEvent in nb:
            if nb[missEvent] > nSets - 0.1:
               if not block in ages:
                  ages[block] = nNewBlocks
               #if not returnNbResults:
               #break
         else:
            raise ValueError('no cache results available')
      if not block in ages:
         ages[block] = -1

   if returnNbResults:
      return (ages, nbResults)
   else:
      return ages