nanoBench/configs/cfg_Nehalem_all_core.txt

# Based on https://download.01.org/perfmon/NHM-EP/NehalemEP_core_V2.json
# Applies to processors with family-model in {6-1E, 6-1F, 6-1A}

# All Store buffer stall cycles
04.07 SB_DRAIN.ANY

# Loads delayed with at-Retirement block code
06.04 STORE_BLOCKS.AT_RET

# Cacheable loads delayed with L1D block code
06.08 STORE_BLOCKS.L1D_BLOCK

# False dependencies due to partial address aliasing
07.01 PARTIAL_ADDRESS_ALIAS

# DTLB load misses
08.01 DTLB_LOAD_MISSES.ANY

# DTLB load miss page walks complete
08.02 DTLB_LOAD_MISSES.WALK_COMPLETED

# DTLB second level hit
08.10 DTLB_LOAD_MISSES.STLB_HIT

# DTLB load miss caused by low part of address
08.20 DTLB_LOAD_MISSES.PDE_MISS

# Instructions retired which contains a load (Precise Event)
0B.01 MEM_INST_RETIRED.LOADS

# Instructions retired which contains a store (Precise Event)
0B.02 MEM_INST_RETIRED.STORES

# Memory instructions retired above 0 clocks (Precise Event)
0B.10.MSR_3F6H=0x0.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_0

# Memory instructions retired above 16 clocks (Precise Event)
0B.10.MSR_3F6H=0x10.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_16

# Memory instructions retired above 256 clocks (Precise Event)
0B.10.MSR_3F6H=0x100.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_256

# Memory instructions retired above 4096 clocks (Precise Event)
0B.10.MSR_3F6H=0x1000.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_4096

# Memory instructions retired above 32 clocks (Precise Event)
0B.10.MSR_3F6H=0x20.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_32

# Memory instructions retired above 512 clocks (Precise Event)
0B.10.MSR_3F6H=0x200.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_512

# Memory instructions retired above 8192 clocks (Precise Event)
0B.10.MSR_3F6H=0x2000.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_8192

# Memory instructions retired above 4 clocks (Precise Event)
0B.10.MSR_3F6H=0x4.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_4

# Memory instructions retired above 64 clocks (Precise Event)
0B.10.MSR_3F6H=0x40.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_64

# Memory instructions retired above 1024 clocks (Precise Event)
0B.10.MSR_3F6H=0x400.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_1024

# Memory instructions retired above 16384 clocks (Precise Event)
0B.10.MSR_3F6H=0x4000.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_16384

# Memory instructions retired above 8 clocks (Precise Event)
0B.10.MSR_3F6H=0x8.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_8

# Memory instructions retired above 128 clocks (Precise Event)
0B.10.MSR_3F6H=0x80.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_128

# Memory instructions retired above 2048 clocks (Precise Event)
0B.10.MSR_3F6H=0x800.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_2048

# Memory instructions retired above 32768 clocks (Precise Event)
0B.10.MSR_3F6H=0x8000.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_32768

# Retired stores that miss the DTLB (Precise Event)
0C.01 MEM_STORE_RETIRED.DTLB_MISS

# Uops issued
0E.01 UOPS_ISSUED.ANY

# Cycles Uops were issued on either thread
0E.01.CMSK=1.AnyT UOPS_ISSUED.CYCLES_ALL_THREADS

# Cycles no Uops were issued on any thread
0E.01.CMSK=1.AnyT.INV UOPS_ISSUED.CORE_STALL_CYCLES

# Cycles no Uops were issued
0E.01.CMSK=1.INV UOPS_ISSUED.STALL_CYCLES

# Fused Uops issued
0E.02 UOPS_ISSUED.FUSED

# Load instructions retired that HIT modified data in sibling core (Precise Event)
0F.02 MEM_UNCORE_RETIRED.OTHER_CORE_L2_HITM

# Load instructions retired remote cache HIT data source (Precise Event)
0F.08 MEM_UNCORE_RETIRED.REMOTE_CACHE_LOCAL_HOME_HIT

# Load instructions retired remote DRAM and remote home-remote cache HITM (Precise Event)
0F.10 MEM_UNCORE_RETIRED.REMOTE_DRAM

# Load instructions retired with a data source of local DRAM or locally homed remote hitm (Precise Event)
0F.20 MEM_UNCORE_RETIRED.LOCAL_DRAM

# Load instructions retired IO (Precise Event)
0F.80 MEM_UNCORE_RETIRED.UNCACHEABLE

# Computational floating-point operations executed
10.01 FP_COMP_OPS_EXE.X87

# MMX Uops
10.02 FP_COMP_OPS_EXE.MMX

# SSE and SSE2 FP Uops
10.04 FP_COMP_OPS_EXE.SSE_FP

# SSE2 integer Uops
10.08 FP_COMP_OPS_EXE.SSE2_INTEGER

# SSE FP packed Uops
10.10 FP_COMP_OPS_EXE.SSE_FP_PACKED

# SSE FP scalar Uops
10.20 FP_COMP_OPS_EXE.SSE_FP_SCALAR

# SSE* FP single precision Uops
10.40 FP_COMP_OPS_EXE.SSE_SINGLE_PRECISION

# SSE* FP double precision Uops
10.80 FP_COMP_OPS_EXE.SSE_DOUBLE_PRECISION

# 128 bit SIMD integer multiply operations
12.01 SIMD_INT_128.PACKED_MPY

# 128 bit SIMD integer shift operations
12.02 SIMD_INT_128.PACKED_SHIFT

# 128 bit SIMD integer pack operations
12.04 SIMD_INT_128.PACK

# 128 bit SIMD integer unpack operations
12.08 SIMD_INT_128.UNPACK

# 128 bit SIMD integer logical operations
12.10 SIMD_INT_128.PACKED_LOGICAL

# 128 bit SIMD integer arithmetic operations
12.20 SIMD_INT_128.PACKED_ARITH

# 128 bit SIMD integer shuffle/move operations
12.40 SIMD_INT_128.SHUFFLE_MOVE

# Loads dispatched that bypass the MOB
13.01 LOAD_DISPATCH.RS

# Loads dispatched from stage 305
13.02 LOAD_DISPATCH.RS_DELAYED

# Loads dispatched from the MOB
13.04 LOAD_DISPATCH.MOB

# All loads dispatched
13.07 LOAD_DISPATCH.ANY

# Cycles the divider is busy
14.01 ARITH.CYCLES_DIV_BUSY

# Divide Operations executed
14.01.CMSK=1.EDG.INV ARITH.DIV

# Multiply operations executed
14.02 ARITH.MUL

# Instructions written to instruction queue.
17.01 INST_QUEUE_WRITES

# Instructions that must be decoded by decoder 0
18.01 INST_DECODED.DEC0

# Two Uop instructions decoded
19.01 TWO_UOP_INSTS_DECODED

# Cycles instructions are written to the instruction queue
1E.01 INST_QUEUE_WRITE_CYCLES

# Loops that can't stream from the instruction queue
20.01 LSD_OVERFLOW

# L2 load hits
24.01 L2_RQSTS.LD_HIT

# L2 load misses
24.02 L2_RQSTS.LD_MISS

# L2 requests
24.03 L2_RQSTS.LOADS

# L2 RFO hits
24.04 L2_RQSTS.RFO_HIT

# L2 RFO misses
24.08 L2_RQSTS.RFO_MISS

# L2 RFO requests
24.0C L2_RQSTS.RFOS

# L2 instruction fetch hits
24.10 L2_RQSTS.IFETCH_HIT

# L2 instruction fetch misses
24.20 L2_RQSTS.IFETCH_MISS

# L2 instruction fetches
24.30 L2_RQSTS.IFETCHES

# L2 prefetch hits
24.40 L2_RQSTS.PREFETCH_HIT

# L2 prefetch misses
24.80 L2_RQSTS.PREFETCH_MISS

# All L2 misses
24.AA L2_RQSTS.MISS

# All L2 prefetches
24.C0 L2_RQSTS.PREFETCHES

# All L2 requests
24.FF L2_RQSTS.REFERENCES

# L2 data demand loads in I state (misses)
26.01 L2_DATA_RQSTS.DEMAND.I_STATE

# L2 data demand loads in S state
26.02 L2_DATA_RQSTS.DEMAND.S_STATE

# L2 data demand loads in E state
26.04 L2_DATA_RQSTS.DEMAND.E_STATE

# L2 data demand loads in M state
26.08 L2_DATA_RQSTS.DEMAND.M_STATE

# L2 data demand requests
26.0F L2_DATA_RQSTS.DEMAND.MESI

# L2 data prefetches in the I state (misses)
26.10 L2_DATA_RQSTS.PREFETCH.I_STATE

# L2 data prefetches in the S state
26.20 L2_DATA_RQSTS.PREFETCH.S_STATE

# L2 data prefetches in E state
26.40 L2_DATA_RQSTS.PREFETCH.E_STATE

# L2 data prefetches in M state
26.80 L2_DATA_RQSTS.PREFETCH.M_STATE

# All L2 data prefetches
26.F0 L2_DATA_RQSTS.PREFETCH.MESI

# All L2 data requests
26.FF L2_DATA_RQSTS.ANY

# L2 demand store RFOs in I state (misses)
27.01 L2_WRITE.RFO.I_STATE

# L2 demand store RFOs in S state
27.02 L2_WRITE.RFO.S_STATE

# L2 demand store RFOs in M state
27.08 L2_WRITE.RFO.M_STATE

# All L2 demand store RFOs that hit the cache
27.0E L2_WRITE.RFO.HIT

# All L2 demand store RFOs
27.0F L2_WRITE.RFO.MESI

# L2 demand lock RFOs in I state (misses)
27.10 L2_WRITE.LOCK.I_STATE

# L2 demand lock RFOs in S state
27.20 L2_WRITE.LOCK.S_STATE

# L2 demand lock RFOs in E state
27.40 L2_WRITE.LOCK.E_STATE

# L2 demand lock RFOs in M state
27.80 L2_WRITE.LOCK.M_STATE

# All demand L2 lock RFOs that hit the cache
27.E0 L2_WRITE.LOCK.HIT

# All demand L2 lock RFOs
27.F0 L2_WRITE.LOCK.MESI

# L1 writebacks to L2 in I state (misses)
28.01 L1D_WB_L2.I_STATE

# L1 writebacks to L2 in S state
28.02 L1D_WB_L2.S_STATE

# L1 writebacks to L2 in E state
28.04 L1D_WB_L2.E_STATE

# L1 writebacks to L2 in M state
28.08 L1D_WB_L2.M_STATE

# All L1 writebacks to L2
28.0F L1D_WB_L2.MESI

# Longest latency cache miss
2E.41 LONGEST_LAT_CACHE.MISS

# Longest latency cache reference
2E.4F LONGEST_LAT_CACHE.REFERENCE

# Cycles when thread is not halted (programmable counter)
3C.00 CPU_CLK_UNHALTED.THREAD_P

# Total CPU cycles
3C.00.CMSK=2.INV CPU_CLK_UNHALTED.TOTAL_CYCLES

# Reference base clock (133 Mhz) cycles when thread is not halted (programmable counter)
3C.01 CPU_CLK_UNHALTED.REF_P

# L1 data cache read in I state (misses)
40.01.CTR=0 L1D_CACHE_LD.I_STATE

# L1 data cache read in S state
40.02.CTR=0 L1D_CACHE_LD.S_STATE

# L1 data cache read in E state
40.04.CTR=0 L1D_CACHE_LD.E_STATE

# L1 data cache read in M state
40.08.CTR=0 L1D_CACHE_LD.M_STATE

# L1 data cache reads
40.0F.CTR=0 L1D_CACHE_LD.MESI

# L1 data cache stores in S state
41.02.CTR=0 L1D_CACHE_ST.S_STATE

# L1 data cache stores in E state
41.04.CTR=0 L1D_CACHE_ST.E_STATE

# L1 data cache stores in M state
41.08.CTR=0 L1D_CACHE_ST.M_STATE

# L1 data cache load lock hits
42.01.CTR=0 L1D_CACHE_LOCK.HIT

# L1 data cache load locks in S state
42.02.CTR=0 L1D_CACHE_LOCK.S_STATE

# L1 data cache load locks in E state
42.04.CTR=0 L1D_CACHE_LOCK.E_STATE

# L1 data cache load locks in M state
42.08.CTR=0 L1D_CACHE_LOCK.M_STATE

# All references to the L1 data cache
43.01.CTR=0 L1D_ALL_REF.ANY

# L1 data cacheable reads and writes
43.02.CTR=0 L1D_ALL_REF.CACHEABLE

# DTLB misses
49.01 DTLB_MISSES.ANY

# DTLB miss page walks
49.02 DTLB_MISSES.WALK_COMPLETED

# DTLB first level misses but second level hit
49.10 DTLB_MISSES.STLB_HIT

# Load operations conflicting with software prefetches
4C.01.CTR=0 LOAD_HIT_PRE

# L1D hardware prefetch requests
4E.01.CTR=0 L1D_PREFETCH.REQUESTS

# L1D hardware prefetch misses
4E.02.CTR=0 L1D_PREFETCH.MISS

# L1D hardware prefetch requests triggered
4E.04.CTR=0 L1D_PREFETCH.TRIGGERS

# L1 data cache lines allocated
51.01.CTR=0 L1D.REPL

# L1D cache lines allocated in the M state
51.02.CTR=0 L1D.M_REPL

# L1D cache lines replaced in M state
51.04.CTR=0 L1D.M_EVICT

# L1D snoop eviction of cache lines in M state
51.08.CTR=0 L1D.M_SNOOP_EVICT

# L1D prefetch load lock accepted in fill buffer
52.01.CTR=0 L1D_CACHE_PREFETCH_LOCK_FB_HIT

# L1D load lock accepted in fill buffer
53.01.CTR=0 L1D_CACHE_LOCK_FB_HIT

# Cycles L1D and L2 locked
63.01.CTR=0 CACHE_LOCK_CYCLES.L1D_L2

# Cycles L1D locked
63.02.CTR=0 CACHE_LOCK_CYCLES.L1D

# I/O transactions
6C.01 IO_TRANSACTIONS

# L1I instruction fetch hits
80.01 L1I.HITS

# L1I instruction fetch misses
80.02 L1I.MISSES

# L1I Instruction fetches
80.03 L1I.READS

# L1I instruction fetch stall cycles
80.04 L1I.CYCLES_STALLED

# Large ITLB hit
82.01 LARGE_ITLB.HIT

# ITLB miss
85.01 ITLB_MISSES.ANY

# ITLB miss page walks
85.02 ITLB_MISSES.WALK_COMPLETED

# Length Change Prefix stall cycles
87.01 ILD_STALL.LCP

# Stall cycles due to BPU MRU bypass
87.02 ILD_STALL.MRU

# Instruction Queue full stall cycles
87.04 ILD_STALL.IQ_FULL

# Regen stall cycles
87.08 ILD_STALL.REGEN

# Any Instruction Length Decoder stall cycles
87.0F ILD_STALL.ANY

# Conditional branch instructions executed
88.01 BR_INST_EXEC.COND

# Unconditional branches executed
88.02 BR_INST_EXEC.DIRECT

# Indirect non call branches executed
88.04 BR_INST_EXEC.INDIRECT_NON_CALL

# All non call branches executed
88.07 BR_INST_EXEC.NON_CALLS

# Indirect return branches executed
88.08 BR_INST_EXEC.RETURN_NEAR

# Unconditional call branches executed
88.10 BR_INST_EXEC.DIRECT_NEAR_CALL

# Indirect call branches executed
88.20 BR_INST_EXEC.INDIRECT_NEAR_CALL

# Call branches executed
88.30 BR_INST_EXEC.NEAR_CALLS

# Taken branches executed
88.40 BR_INST_EXEC.TAKEN

# Branch instructions executed
88.7F BR_INST_EXEC.ANY

# Mispredicted conditional branches executed
89.01 BR_MISP_EXEC.COND

# Mispredicted unconditional branches executed
89.02 BR_MISP_EXEC.DIRECT

# Mispredicted indirect non call branches executed
89.04 BR_MISP_EXEC.INDIRECT_NON_CALL

# Mispredicted non call branches executed
89.07 BR_MISP_EXEC.NON_CALLS

# Mispredicted return branches executed
89.08 BR_MISP_EXEC.RETURN_NEAR

# Mispredicted non call branches executed
89.10 BR_MISP_EXEC.DIRECT_NEAR_CALL

# Mispredicted indirect call branches executed
89.20 BR_MISP_EXEC.INDIRECT_NEAR_CALL

# Mispredicted call branches executed
89.30 BR_MISP_EXEC.NEAR_CALLS

# Mispredicted taken branches executed
89.40 BR_MISP_EXEC.TAKEN

# Mispredicted branches executed
89.7F BR_MISP_EXEC.ANY

# Resource related stall cycles
A2.01 RESOURCE_STALLS.ANY

# Load buffer stall cycles
A2.02 RESOURCE_STALLS.LOAD

# Reservation Station full stall cycles
A2.04 RESOURCE_STALLS.RS_FULL

# Store buffer stall cycles
A2.08 RESOURCE_STALLS.STORE

# ROB full stall cycles
A2.10 RESOURCE_STALLS.ROB_FULL

# FPU control word write stall cycles
A2.20 RESOURCE_STALLS.FPCW

# MXCSR rename stall cycles
A2.40 RESOURCE_STALLS.MXCSR

# Other Resource related stall cycles
A2.80 RESOURCE_STALLS.OTHER

# Macro-fused instructions decoded
A6.01 MACRO_INSTS.FUSIONS_DECODED

# Instruction queue forced BACLEAR
A7.01 BACLEAR_FORCE_IQ

# Cycles when uops were delivered by the LSD
A8.01.CMSK=1 LSD.ACTIVE

# Cycles no uops were delivered by the LSD
A8.01.CMSK=1.INV LSD.INACTIVE

# ITLB flushes
AE.01 ITLB_FLUSH

# Offcore L1 data cache writebacks
B0.40 OFFCORE_REQUESTS.L1D_WRITEBACK

# Uops executed on port 0
B1.01 UOPS_EXECUTED.PORT0

# Uops executed on port 1
B1.02 UOPS_EXECUTED.PORT1

# Uops executed on port 2 (core count)
B1.04.AnyT UOPS_EXECUTED.PORT2_CORE

# Uops executed on port 3 (core count)
B1.08.AnyT UOPS_EXECUTED.PORT3_CORE

# Uops executed on port 4 (core count)
B1.10.AnyT UOPS_EXECUTED.PORT4_CORE

# Cycles Uops executed on ports 0-4 (core count)
B1.1F.CMSK=1.AnyT UOPS_EXECUTED.CORE_ACTIVE_CYCLES_NO_PORT5

# Uops executed on ports 0-4 (core count)
B1.1F.CMSK=1.AnyT.EDG.INV UOPS_EXECUTED.CORE_STALL_COUNT_NO_PORT5

# Cycles no Uops issued on ports 0-4 (core count)
B1.1F.CMSK=1.AnyT.INV UOPS_EXECUTED.CORE_STALL_CYCLES_NO_PORT5

# Uops executed on port 5
B1.20 UOPS_EXECUTED.PORT5

# Cycles Uops executed on any port (core count)
B1.3F.CMSK=1.AnyT UOPS_EXECUTED.CORE_ACTIVE_CYCLES

# Uops executed on any port (core count)
B1.3F.CMSK=1.AnyT.EDG.INV UOPS_EXECUTED.CORE_STALL_COUNT

# Cycles no Uops issued on any port (core count)
B1.3F.CMSK=1.AnyT.INV UOPS_EXECUTED.CORE_STALL_CYCLES

# Uops issued on ports 0, 1 or 5
B1.40 UOPS_EXECUTED.PORT015

# Cycles no Uops issued on ports 0, 1 or 5
B1.40.CMSK=1.INV UOPS_EXECUTED.PORT015_STALL_CYCLES

# Uops issued on ports 2, 3 or 4
B1.80.AnyT UOPS_EXECUTED.PORT234_CORE

# Offcore requests blocked due to Super Queue full
B2.01 OFFCORE_REQUESTS_SQ_FULL

# Thread responded HIT to snoop
B8.01 SNOOP_RESPONSE.HIT

# Thread responded HITE to snoop
B8.02 SNOOP_RESPONSE.HITE

# Thread responded HITM to snoop
B8.04 SNOOP_RESPONSE.HITM

# Instructions retired (Programmable counter and Precise Event)
C0.01 INST_RETIRED.ANY_P

# Total cycles (Precise Event)
C0.01.CMSK=16.INV INST_RETIRED.TOTAL_CYCLES

# Total cycles (Precise Event)
C0.01.CMSK=16.INV INST_RETIRED.TOTAL_CYCLES_PS

# Retired floating-point operations (Precise Event)
C0.02 INST_RETIRED.X87

# Retired MMX instructions (Precise Event)
C0.04 INST_RETIRED.MMX

# Uops retired (Precise Event)
C2.01 UOPS_RETIRED.ANY

# Cycles Uops are being retired
C2.01.CMSK=1 UOPS_RETIRED.ACTIVE_CYCLES

# Cycles Uops are not retiring (Precise Event)
C2.01.CMSK=1.INV UOPS_RETIRED.STALL_CYCLES

# Total cycles using precise uop retired event (Precise Event)
C2.01.CMSK=16.INV UOPS_RETIRED.TOTAL_CYCLES

# Retirement slots used (Precise Event)
C2.02 UOPS_RETIRED.RETIRE_SLOTS

# Macro-fused Uops retired (Precise Event)
C2.04 UOPS_RETIRED.MACRO_FUSED

# Cycles machine clear asserted
C3.01 MACHINE_CLEARS.CYCLES

# Execution pipeline restart due to Memory ordering conflicts
C3.02 MACHINE_CLEARS.MEM_ORDER

# Self-Modifying Code detected
C3.04 MACHINE_CLEARS.SMC

# Retired conditional branch instructions (Precise Event)
C4.01 BR_INST_RETIRED.CONDITIONAL

# Retired near call instructions (Precise Event)
C4.02 BR_INST_RETIRED.NEAR_CALL

# Retired branch instructions (Precise Event)
C4.04 BR_INST_RETIRED.ALL_BRANCHES

# Mispredicted near retired calls (Precise Event)
C5.02 BR_MISP_RETIRED.NEAR_CALL

# SIMD Packed-Single Uops retired (Precise Event)
C7.01 SSEX_UOPS_RETIRED.PACKED_SINGLE

# SIMD Scalar-Single Uops retired (Precise Event)
C7.02 SSEX_UOPS_RETIRED.SCALAR_SINGLE

# SIMD Packed-Double Uops retired (Precise Event)
C7.04 SSEX_UOPS_RETIRED.PACKED_DOUBLE

# SIMD Scalar-Double Uops retired (Precise Event)
C7.08 SSEX_UOPS_RETIRED.SCALAR_DOUBLE

# SIMD Vector Integer Uops retired (Precise Event)
C7.10 SSEX_UOPS_RETIRED.VECTOR_INTEGER

# Retired instructions that missed the ITLB (Precise Event)
C8.20 ITLB_MISS_RETIRED

# Retired loads that hit the L1 data cache (Precise Event)
CB.01 MEM_LOAD_RETIRED.L1D_HIT

# Retired loads that hit the L2 cache (Precise Event)
CB.02 MEM_LOAD_RETIRED.L2_HIT

# Retired loads that hit valid versions in the LLC cache (Precise Event)
CB.04 MEM_LOAD_RETIRED.LLC_UNSHARED_HIT

# Retired loads that hit sibling core's L2 in modified or unmodified states (Precise Event)
CB.08 MEM_LOAD_RETIRED.OTHER_CORE_L2_HIT_HITM

# Retired loads that miss the LLC cache (Precise Event)
CB.10 MEM_LOAD_RETIRED.LLC_MISS

# Retired loads that miss L1D and hit an previously allocated LFB (Precise Event)
CB.40 MEM_LOAD_RETIRED.HIT_LFB

# Retired loads that miss the DTLB (Precise Event)
CB.80 MEM_LOAD_RETIRED.DTLB_MISS

# Transitions from MMX to Floating Point instructions
CC.01 FP_MMX_TRANS.TO_FP

# Transitions from Floating Point to MMX instructions
CC.02 FP_MMX_TRANS.TO_MMX

# All Floating Point to and from MMX transitions
CC.03 FP_MMX_TRANS.ANY

# Instructions decoded
D0.01 MACRO_INSTS.DECODED

# Cycles no Uops are decoded
D1.01.CMSK=1.INV UOPS_DECODED.STALL_CYCLES

# Uops decoded by Microcode Sequencer
D1.02.CMSK=1 UOPS_DECODED.MS_CYCLES_ACTIVE

# Stack pointer instructions decoded
D1.04 UOPS_DECODED.ESP_FOLDING

# Stack pointer sync operations
D1.08 UOPS_DECODED.ESP_SYNC

# Flag stall cycles
D2.01 RAT_STALLS.FLAGS

# Partial register stall cycles
D2.02 RAT_STALLS.REGISTERS

# ROB read port stalls cycles
D2.04 RAT_STALLS.ROB_READ_PORT

# Scoreboard stall cycles
D2.08 RAT_STALLS.SCOREBOARD

# All RAT stall cycles
D2.0F RAT_STALLS.ANY

# Segment rename stall cycles
D4.01 SEG_RENAME_STALLS

# ES segment renames
D5.01 ES_REG_RENAMES

# Uop unfusions due to FP exceptions
DB.01 UOP_UNFUSION

# Branch instructions decoded
E0.01 BR_INST_DECODED

# Branch prediction unit missed call or return
E5.01 BPU_MISSED_CALL_RET

# BACLEAR asserted, regardless of cause
E6.01 BACLEAR.CLEAR

# BACLEAR asserted with bad target address
E6.02 BACLEAR.BAD_TARGET

# Early Branch Prediciton Unit clears
E8.01 BPU_CLEARS.EARLY

# Late Branch Prediction Unit clears
E8.02 BPU_CLEARS.LATE

# L2 Load transactions
F0.01 L2_TRANSACTIONS.LOAD

# L2 RFO transactions
F0.02 L2_TRANSACTIONS.RFO

# L2 instruction fetch transactions
F0.04 L2_TRANSACTIONS.IFETCH

# L2 prefetch transactions
F0.08 L2_TRANSACTIONS.PREFETCH

# L1D writeback to L2 transactions
F0.10 L2_TRANSACTIONS.L1D_WB

# L2 fill transactions
F0.20 L2_TRANSACTIONS.FILL

# L2 writeback to LLC transactions
F0.40 L2_TRANSACTIONS.WB

# All L2 transactions
F0.80 L2_TRANSACTIONS.ANY

# L2 lines allocated in the S state
F1.02 L2_LINES_IN.S_STATE

# L2 lines allocated in the E state
F1.04 L2_LINES_IN.E_STATE

# L2 lines alloacated
F1.07 L2_LINES_IN.ANY

# L2 lines evicted by a demand request
F2.01 L2_LINES_OUT.DEMAND_CLEAN

# L2 modified lines evicted by a demand request
F2.02 L2_LINES_OUT.DEMAND_DIRTY

# L2 lines evicted by a prefetch request
F2.04 L2_LINES_OUT.PREFETCH_CLEAN

# L2 modified lines evicted by a prefetch request
F2.08 L2_LINES_OUT.PREFETCH_DIRTY

# L2 lines evicted
F2.0F L2_LINES_OUT.ANY

# Super Queue lock splits across a cache line
F4.10 SQ_MISC.SPLIT_LOCK

# Super Queue full stall cycles
F6.01 SQ_FULL_STALL_CYCLES

# X87 Floating point assists (Precise Event)
F7.01 FP_ASSIST.ALL

# X87 Floating point assists for invalid output value (Precise Event)
F7.02 FP_ASSIST.OUTPUT

# X87 Floating poiint assists for invalid input value (Precise Event)
F7.04 FP_ASSIST.INPUT

# SIMD integer 64 bit packed multiply operations
FD.01 SIMD_INT_64.PACKED_MPY

# SIMD integer 64 bit shift operations
FD.02 SIMD_INT_64.PACKED_SHIFT

# SIMD integer 64 bit pack operations
FD.04 SIMD_INT_64.PACK

# SIMD integer 64 bit unpack operations
FD.08 SIMD_INT_64.UNPACK

# SIMD integer 64 bit logical operations
FD.10 SIMD_INT_64.PACKED_LOGICAL

# SIMD integer 64 bit arithmetic operations
FD.20 SIMD_INT_64.PACKED_ARITH

# SIMD integer 64 bit shuffle/move operations
FD.40 SIMD_INT_64.SHUFFLE_MOVE