mirror of
https://github.com/andreas-abel/nanoBench.git
synced 2025-07-21 15:11:03 +02:00
858 lines
20 KiB
Plaintext
858 lines
20 KiB
Plaintext
# Based on https://download.01.org/perfmon/NHM-EP/NehalemEP_core_V2.json
|
|
# Applies to processors with family-model in {6-1E, 6-1F, 6-1A}
|
|
|
|
# All Store buffer stall cycles
|
|
04.07 SB_DRAIN.ANY
|
|
|
|
# Loads delayed with at-Retirement block code
|
|
06.04 STORE_BLOCKS.AT_RET
|
|
|
|
# Cacheable loads delayed with L1D block code
|
|
06.08 STORE_BLOCKS.L1D_BLOCK
|
|
|
|
# False dependencies due to partial address aliasing
|
|
07.01 PARTIAL_ADDRESS_ALIAS
|
|
|
|
# DTLB load misses
|
|
08.01 DTLB_LOAD_MISSES.ANY
|
|
|
|
# DTLB load miss page walks complete
|
|
08.02 DTLB_LOAD_MISSES.WALK_COMPLETED
|
|
|
|
# DTLB second level hit
|
|
08.10 DTLB_LOAD_MISSES.STLB_HIT
|
|
|
|
# DTLB load miss caused by low part of address
|
|
08.20 DTLB_LOAD_MISSES.PDE_MISS
|
|
|
|
# Instructions retired which contains a load (Precise Event)
|
|
0B.01 MEM_INST_RETIRED.LOADS
|
|
|
|
# Instructions retired which contains a store (Precise Event)
|
|
0B.02 MEM_INST_RETIRED.STORES
|
|
|
|
# Memory instructions retired above 0 clocks (Precise Event)
|
|
0B.10.MSR_3F6H=0x0.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_0
|
|
|
|
# Memory instructions retired above 16 clocks (Precise Event)
|
|
0B.10.MSR_3F6H=0x10.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_16
|
|
|
|
# Memory instructions retired above 256 clocks (Precise Event)
|
|
0B.10.MSR_3F6H=0x100.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_256
|
|
|
|
# Memory instructions retired above 4096 clocks (Precise Event)
|
|
0B.10.MSR_3F6H=0x1000.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_4096
|
|
|
|
# Memory instructions retired above 32 clocks (Precise Event)
|
|
0B.10.MSR_3F6H=0x20.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_32
|
|
|
|
# Memory instructions retired above 512 clocks (Precise Event)
|
|
0B.10.MSR_3F6H=0x200.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_512
|
|
|
|
# Memory instructions retired above 8192 clocks (Precise Event)
|
|
0B.10.MSR_3F6H=0x2000.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_8192
|
|
|
|
# Memory instructions retired above 4 clocks (Precise Event)
|
|
0B.10.MSR_3F6H=0x4.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_4
|
|
|
|
# Memory instructions retired above 64 clocks (Precise Event)
|
|
0B.10.MSR_3F6H=0x40.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_64
|
|
|
|
# Memory instructions retired above 1024 clocks (Precise Event)
|
|
0B.10.MSR_3F6H=0x400.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_1024
|
|
|
|
# Memory instructions retired above 16384 clocks (Precise Event)
|
|
0B.10.MSR_3F6H=0x4000.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_16384
|
|
|
|
# Memory instructions retired above 8 clocks (Precise Event)
|
|
0B.10.MSR_3F6H=0x8.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_8
|
|
|
|
# Memory instructions retired above 128 clocks (Precise Event)
|
|
0B.10.MSR_3F6H=0x80.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_128
|
|
|
|
# Memory instructions retired above 2048 clocks (Precise Event)
|
|
0B.10.MSR_3F6H=0x800.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_2048
|
|
|
|
# Memory instructions retired above 32768 clocks (Precise Event)
|
|
0B.10.MSR_3F6H=0x8000.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_32768
|
|
|
|
# Retired stores that miss the DTLB (Precise Event)
|
|
0C.01 MEM_STORE_RETIRED.DTLB_MISS
|
|
|
|
# Uops issued
|
|
0E.01 UOPS_ISSUED.ANY
|
|
|
|
# Cycles Uops were issued on either thread
|
|
0E.01.CMSK=1.AnyT UOPS_ISSUED.CYCLES_ALL_THREADS
|
|
|
|
# Cycles no Uops were issued on any thread
|
|
0E.01.CMSK=1.AnyT.INV UOPS_ISSUED.CORE_STALL_CYCLES
|
|
|
|
# Cycles no Uops were issued
|
|
0E.01.CMSK=1.INV UOPS_ISSUED.STALL_CYCLES
|
|
|
|
# Fused Uops issued
|
|
0E.02 UOPS_ISSUED.FUSED
|
|
|
|
# Load instructions retired that HIT modified data in sibling core (Precise Event)
|
|
0F.02 MEM_UNCORE_RETIRED.OTHER_CORE_L2_HITM
|
|
|
|
# Load instructions retired remote cache HIT data source (Precise Event)
|
|
0F.08 MEM_UNCORE_RETIRED.REMOTE_CACHE_LOCAL_HOME_HIT
|
|
|
|
# Load instructions retired remote DRAM and remote home-remote cache HITM (Precise Event)
|
|
0F.10 MEM_UNCORE_RETIRED.REMOTE_DRAM
|
|
|
|
# Load instructions retired with a data source of local DRAM or locally homed remote hitm (Precise Event)
|
|
0F.20 MEM_UNCORE_RETIRED.LOCAL_DRAM
|
|
|
|
# Load instructions retired IO (Precise Event)
|
|
0F.80 MEM_UNCORE_RETIRED.UNCACHEABLE
|
|
|
|
# Computational floating-point operations executed
|
|
10.01 FP_COMP_OPS_EXE.X87
|
|
|
|
# MMX Uops
|
|
10.02 FP_COMP_OPS_EXE.MMX
|
|
|
|
# SSE and SSE2 FP Uops
|
|
10.04 FP_COMP_OPS_EXE.SSE_FP
|
|
|
|
# SSE2 integer Uops
|
|
10.08 FP_COMP_OPS_EXE.SSE2_INTEGER
|
|
|
|
# SSE FP packed Uops
|
|
10.10 FP_COMP_OPS_EXE.SSE_FP_PACKED
|
|
|
|
# SSE FP scalar Uops
|
|
10.20 FP_COMP_OPS_EXE.SSE_FP_SCALAR
|
|
|
|
# SSE* FP single precision Uops
|
|
10.40 FP_COMP_OPS_EXE.SSE_SINGLE_PRECISION
|
|
|
|
# SSE* FP double precision Uops
|
|
10.80 FP_COMP_OPS_EXE.SSE_DOUBLE_PRECISION
|
|
|
|
# 128 bit SIMD integer multiply operations
|
|
12.01 SIMD_INT_128.PACKED_MPY
|
|
|
|
# 128 bit SIMD integer shift operations
|
|
12.02 SIMD_INT_128.PACKED_SHIFT
|
|
|
|
# 128 bit SIMD integer pack operations
|
|
12.04 SIMD_INT_128.PACK
|
|
|
|
# 128 bit SIMD integer unpack operations
|
|
12.08 SIMD_INT_128.UNPACK
|
|
|
|
# 128 bit SIMD integer logical operations
|
|
12.10 SIMD_INT_128.PACKED_LOGICAL
|
|
|
|
# 128 bit SIMD integer arithmetic operations
|
|
12.20 SIMD_INT_128.PACKED_ARITH
|
|
|
|
# 128 bit SIMD integer shuffle/move operations
|
|
12.40 SIMD_INT_128.SHUFFLE_MOVE
|
|
|
|
# Loads dispatched that bypass the MOB
|
|
13.01 LOAD_DISPATCH.RS
|
|
|
|
# Loads dispatched from stage 305
|
|
13.02 LOAD_DISPATCH.RS_DELAYED
|
|
|
|
# Loads dispatched from the MOB
|
|
13.04 LOAD_DISPATCH.MOB
|
|
|
|
# All loads dispatched
|
|
13.07 LOAD_DISPATCH.ANY
|
|
|
|
# Cycles the divider is busy
|
|
14.01 ARITH.CYCLES_DIV_BUSY
|
|
|
|
# Divide Operations executed
|
|
14.01.CMSK=1.EDG.INV ARITH.DIV
|
|
|
|
# Multiply operations executed
|
|
14.02 ARITH.MUL
|
|
|
|
# Instructions written to instruction queue.
|
|
17.01 INST_QUEUE_WRITES
|
|
|
|
# Instructions that must be decoded by decoder 0
|
|
18.01 INST_DECODED.DEC0
|
|
|
|
# Two Uop instructions decoded
|
|
19.01 TWO_UOP_INSTS_DECODED
|
|
|
|
# Cycles instructions are written to the instruction queue
|
|
1E.01 INST_QUEUE_WRITE_CYCLES
|
|
|
|
# Loops that can't stream from the instruction queue
|
|
20.01 LSD_OVERFLOW
|
|
|
|
# L2 load hits
|
|
24.01 L2_RQSTS.LD_HIT
|
|
|
|
# L2 load misses
|
|
24.02 L2_RQSTS.LD_MISS
|
|
|
|
# L2 requests
|
|
24.03 L2_RQSTS.LOADS
|
|
|
|
# L2 RFO hits
|
|
24.04 L2_RQSTS.RFO_HIT
|
|
|
|
# L2 RFO misses
|
|
24.08 L2_RQSTS.RFO_MISS
|
|
|
|
# L2 RFO requests
|
|
24.0C L2_RQSTS.RFOS
|
|
|
|
# L2 instruction fetch hits
|
|
24.10 L2_RQSTS.IFETCH_HIT
|
|
|
|
# L2 instruction fetch misses
|
|
24.20 L2_RQSTS.IFETCH_MISS
|
|
|
|
# L2 instruction fetches
|
|
24.30 L2_RQSTS.IFETCHES
|
|
|
|
# L2 prefetch hits
|
|
24.40 L2_RQSTS.PREFETCH_HIT
|
|
|
|
# L2 prefetch misses
|
|
24.80 L2_RQSTS.PREFETCH_MISS
|
|
|
|
# All L2 misses
|
|
24.AA L2_RQSTS.MISS
|
|
|
|
# All L2 prefetches
|
|
24.C0 L2_RQSTS.PREFETCHES
|
|
|
|
# All L2 requests
|
|
24.FF L2_RQSTS.REFERENCES
|
|
|
|
# L2 data demand loads in I state (misses)
|
|
26.01 L2_DATA_RQSTS.DEMAND.I_STATE
|
|
|
|
# L2 data demand loads in S state
|
|
26.02 L2_DATA_RQSTS.DEMAND.S_STATE
|
|
|
|
# L2 data demand loads in E state
|
|
26.04 L2_DATA_RQSTS.DEMAND.E_STATE
|
|
|
|
# L2 data demand loads in M state
|
|
26.08 L2_DATA_RQSTS.DEMAND.M_STATE
|
|
|
|
# L2 data demand requests
|
|
26.0F L2_DATA_RQSTS.DEMAND.MESI
|
|
|
|
# L2 data prefetches in the I state (misses)
|
|
26.10 L2_DATA_RQSTS.PREFETCH.I_STATE
|
|
|
|
# L2 data prefetches in the S state
|
|
26.20 L2_DATA_RQSTS.PREFETCH.S_STATE
|
|
|
|
# L2 data prefetches in E state
|
|
26.40 L2_DATA_RQSTS.PREFETCH.E_STATE
|
|
|
|
# L2 data prefetches in M state
|
|
26.80 L2_DATA_RQSTS.PREFETCH.M_STATE
|
|
|
|
# All L2 data prefetches
|
|
26.F0 L2_DATA_RQSTS.PREFETCH.MESI
|
|
|
|
# All L2 data requests
|
|
26.FF L2_DATA_RQSTS.ANY
|
|
|
|
# L2 demand store RFOs in I state (misses)
|
|
27.01 L2_WRITE.RFO.I_STATE
|
|
|
|
# L2 demand store RFOs in S state
|
|
27.02 L2_WRITE.RFO.S_STATE
|
|
|
|
# L2 demand store RFOs in M state
|
|
27.08 L2_WRITE.RFO.M_STATE
|
|
|
|
# All L2 demand store RFOs that hit the cache
|
|
27.0E L2_WRITE.RFO.HIT
|
|
|
|
# All L2 demand store RFOs
|
|
27.0F L2_WRITE.RFO.MESI
|
|
|
|
# L2 demand lock RFOs in I state (misses)
|
|
27.10 L2_WRITE.LOCK.I_STATE
|
|
|
|
# L2 demand lock RFOs in S state
|
|
27.20 L2_WRITE.LOCK.S_STATE
|
|
|
|
# L2 demand lock RFOs in E state
|
|
27.40 L2_WRITE.LOCK.E_STATE
|
|
|
|
# L2 demand lock RFOs in M state
|
|
27.80 L2_WRITE.LOCK.M_STATE
|
|
|
|
# All demand L2 lock RFOs that hit the cache
|
|
27.E0 L2_WRITE.LOCK.HIT
|
|
|
|
# All demand L2 lock RFOs
|
|
27.F0 L2_WRITE.LOCK.MESI
|
|
|
|
# L1 writebacks to L2 in I state (misses)
|
|
28.01 L1D_WB_L2.I_STATE
|
|
|
|
# L1 writebacks to L2 in S state
|
|
28.02 L1D_WB_L2.S_STATE
|
|
|
|
# L1 writebacks to L2 in E state
|
|
28.04 L1D_WB_L2.E_STATE
|
|
|
|
# L1 writebacks to L2 in M state
|
|
28.08 L1D_WB_L2.M_STATE
|
|
|
|
# All L1 writebacks to L2
|
|
28.0F L1D_WB_L2.MESI
|
|
|
|
# Longest latency cache miss
|
|
2E.41 LONGEST_LAT_CACHE.MISS
|
|
|
|
# Longest latency cache reference
|
|
2E.4F LONGEST_LAT_CACHE.REFERENCE
|
|
|
|
# Cycles when thread is not halted (programmable counter)
|
|
3C.00 CPU_CLK_UNHALTED.THREAD_P
|
|
|
|
# Total CPU cycles
|
|
3C.00.CMSK=2.INV CPU_CLK_UNHALTED.TOTAL_CYCLES
|
|
|
|
# Reference base clock (133 Mhz) cycles when thread is not halted (programmable counter)
|
|
3C.01 CPU_CLK_UNHALTED.REF_P
|
|
|
|
# L1 data cache read in I state (misses)
|
|
40.01.CTR=0 L1D_CACHE_LD.I_STATE
|
|
|
|
# L1 data cache read in S state
|
|
40.02.CTR=0 L1D_CACHE_LD.S_STATE
|
|
|
|
# L1 data cache read in E state
|
|
40.04.CTR=0 L1D_CACHE_LD.E_STATE
|
|
|
|
# L1 data cache read in M state
|
|
40.08.CTR=0 L1D_CACHE_LD.M_STATE
|
|
|
|
# L1 data cache reads
|
|
40.0F.CTR=0 L1D_CACHE_LD.MESI
|
|
|
|
# L1 data cache stores in S state
|
|
41.02.CTR=0 L1D_CACHE_ST.S_STATE
|
|
|
|
# L1 data cache stores in E state
|
|
41.04.CTR=0 L1D_CACHE_ST.E_STATE
|
|
|
|
# L1 data cache stores in M state
|
|
41.08.CTR=0 L1D_CACHE_ST.M_STATE
|
|
|
|
# L1 data cache load lock hits
|
|
42.01.CTR=0 L1D_CACHE_LOCK.HIT
|
|
|
|
# L1 data cache load locks in S state
|
|
42.02.CTR=0 L1D_CACHE_LOCK.S_STATE
|
|
|
|
# L1 data cache load locks in E state
|
|
42.04.CTR=0 L1D_CACHE_LOCK.E_STATE
|
|
|
|
# L1 data cache load locks in M state
|
|
42.08.CTR=0 L1D_CACHE_LOCK.M_STATE
|
|
|
|
# All references to the L1 data cache
|
|
43.01.CTR=0 L1D_ALL_REF.ANY
|
|
|
|
# L1 data cacheable reads and writes
|
|
43.02.CTR=0 L1D_ALL_REF.CACHEABLE
|
|
|
|
# DTLB misses
|
|
49.01 DTLB_MISSES.ANY
|
|
|
|
# DTLB miss page walks
|
|
49.02 DTLB_MISSES.WALK_COMPLETED
|
|
|
|
# DTLB first level misses but second level hit
|
|
49.10 DTLB_MISSES.STLB_HIT
|
|
|
|
# Load operations conflicting with software prefetches
|
|
4C.01.CTR=0 LOAD_HIT_PRE
|
|
|
|
# L1D hardware prefetch requests
|
|
4E.01.CTR=0 L1D_PREFETCH.REQUESTS
|
|
|
|
# L1D hardware prefetch misses
|
|
4E.02.CTR=0 L1D_PREFETCH.MISS
|
|
|
|
# L1D hardware prefetch requests triggered
|
|
4E.04.CTR=0 L1D_PREFETCH.TRIGGERS
|
|
|
|
# L1 data cache lines allocated
|
|
51.01.CTR=0 L1D.REPL
|
|
|
|
# L1D cache lines allocated in the M state
|
|
51.02.CTR=0 L1D.M_REPL
|
|
|
|
# L1D cache lines replaced in M state
|
|
51.04.CTR=0 L1D.M_EVICT
|
|
|
|
# L1D snoop eviction of cache lines in M state
|
|
51.08.CTR=0 L1D.M_SNOOP_EVICT
|
|
|
|
# L1D prefetch load lock accepted in fill buffer
|
|
52.01.CTR=0 L1D_CACHE_PREFETCH_LOCK_FB_HIT
|
|
|
|
# L1D load lock accepted in fill buffer
|
|
53.01.CTR=0 L1D_CACHE_LOCK_FB_HIT
|
|
|
|
# Cycles L1D and L2 locked
|
|
63.01.CTR=0 CACHE_LOCK_CYCLES.L1D_L2
|
|
|
|
# Cycles L1D locked
|
|
63.02.CTR=0 CACHE_LOCK_CYCLES.L1D
|
|
|
|
# I/O transactions
|
|
6C.01 IO_TRANSACTIONS
|
|
|
|
# L1I instruction fetch hits
|
|
80.01 L1I.HITS
|
|
|
|
# L1I instruction fetch misses
|
|
80.02 L1I.MISSES
|
|
|
|
# L1I Instruction fetches
|
|
80.03 L1I.READS
|
|
|
|
# L1I instruction fetch stall cycles
|
|
80.04 L1I.CYCLES_STALLED
|
|
|
|
# Large ITLB hit
|
|
82.01 LARGE_ITLB.HIT
|
|
|
|
# ITLB miss
|
|
85.01 ITLB_MISSES.ANY
|
|
|
|
# ITLB miss page walks
|
|
85.02 ITLB_MISSES.WALK_COMPLETED
|
|
|
|
# Length Change Prefix stall cycles
|
|
87.01 ILD_STALL.LCP
|
|
|
|
# Stall cycles due to BPU MRU bypass
|
|
87.02 ILD_STALL.MRU
|
|
|
|
# Instruction Queue full stall cycles
|
|
87.04 ILD_STALL.IQ_FULL
|
|
|
|
# Regen stall cycles
|
|
87.08 ILD_STALL.REGEN
|
|
|
|
# Any Instruction Length Decoder stall cycles
|
|
87.0F ILD_STALL.ANY
|
|
|
|
# Conditional branch instructions executed
|
|
88.01 BR_INST_EXEC.COND
|
|
|
|
# Unconditional branches executed
|
|
88.02 BR_INST_EXEC.DIRECT
|
|
|
|
# Indirect non call branches executed
|
|
88.04 BR_INST_EXEC.INDIRECT_NON_CALL
|
|
|
|
# All non call branches executed
|
|
88.07 BR_INST_EXEC.NON_CALLS
|
|
|
|
# Indirect return branches executed
|
|
88.08 BR_INST_EXEC.RETURN_NEAR
|
|
|
|
# Unconditional call branches executed
|
|
88.10 BR_INST_EXEC.DIRECT_NEAR_CALL
|
|
|
|
# Indirect call branches executed
|
|
88.20 BR_INST_EXEC.INDIRECT_NEAR_CALL
|
|
|
|
# Call branches executed
|
|
88.30 BR_INST_EXEC.NEAR_CALLS
|
|
|
|
# Taken branches executed
|
|
88.40 BR_INST_EXEC.TAKEN
|
|
|
|
# Branch instructions executed
|
|
88.7F BR_INST_EXEC.ANY
|
|
|
|
# Mispredicted conditional branches executed
|
|
89.01 BR_MISP_EXEC.COND
|
|
|
|
# Mispredicted unconditional branches executed
|
|
89.02 BR_MISP_EXEC.DIRECT
|
|
|
|
# Mispredicted indirect non call branches executed
|
|
89.04 BR_MISP_EXEC.INDIRECT_NON_CALL
|
|
|
|
# Mispredicted non call branches executed
|
|
89.07 BR_MISP_EXEC.NON_CALLS
|
|
|
|
# Mispredicted return branches executed
|
|
89.08 BR_MISP_EXEC.RETURN_NEAR
|
|
|
|
# Mispredicted non call branches executed
|
|
89.10 BR_MISP_EXEC.DIRECT_NEAR_CALL
|
|
|
|
# Mispredicted indirect call branches executed
|
|
89.20 BR_MISP_EXEC.INDIRECT_NEAR_CALL
|
|
|
|
# Mispredicted call branches executed
|
|
89.30 BR_MISP_EXEC.NEAR_CALLS
|
|
|
|
# Mispredicted taken branches executed
|
|
89.40 BR_MISP_EXEC.TAKEN
|
|
|
|
# Mispredicted branches executed
|
|
89.7F BR_MISP_EXEC.ANY
|
|
|
|
# Resource related stall cycles
|
|
A2.01 RESOURCE_STALLS.ANY
|
|
|
|
# Load buffer stall cycles
|
|
A2.02 RESOURCE_STALLS.LOAD
|
|
|
|
# Reservation Station full stall cycles
|
|
A2.04 RESOURCE_STALLS.RS_FULL
|
|
|
|
# Store buffer stall cycles
|
|
A2.08 RESOURCE_STALLS.STORE
|
|
|
|
# ROB full stall cycles
|
|
A2.10 RESOURCE_STALLS.ROB_FULL
|
|
|
|
# FPU control word write stall cycles
|
|
A2.20 RESOURCE_STALLS.FPCW
|
|
|
|
# MXCSR rename stall cycles
|
|
A2.40 RESOURCE_STALLS.MXCSR
|
|
|
|
# Other Resource related stall cycles
|
|
A2.80 RESOURCE_STALLS.OTHER
|
|
|
|
# Macro-fused instructions decoded
|
|
A6.01 MACRO_INSTS.FUSIONS_DECODED
|
|
|
|
# Instruction queue forced BACLEAR
|
|
A7.01 BACLEAR_FORCE_IQ
|
|
|
|
# Cycles when uops were delivered by the LSD
|
|
A8.01.CMSK=1 LSD.ACTIVE
|
|
|
|
# Cycles no uops were delivered by the LSD
|
|
A8.01.CMSK=1.INV LSD.INACTIVE
|
|
|
|
# ITLB flushes
|
|
AE.01 ITLB_FLUSH
|
|
|
|
# Offcore L1 data cache writebacks
|
|
B0.40 OFFCORE_REQUESTS.L1D_WRITEBACK
|
|
|
|
# Uops executed on port 0
|
|
B1.01 UOPS_EXECUTED.PORT0
|
|
|
|
# Uops executed on port 1
|
|
B1.02 UOPS_EXECUTED.PORT1
|
|
|
|
# Uops executed on port 2 (core count)
|
|
B1.04.AnyT UOPS_EXECUTED.PORT2_CORE
|
|
|
|
# Uops executed on port 3 (core count)
|
|
B1.08.AnyT UOPS_EXECUTED.PORT3_CORE
|
|
|
|
# Uops executed on port 4 (core count)
|
|
B1.10.AnyT UOPS_EXECUTED.PORT4_CORE
|
|
|
|
# Cycles Uops executed on ports 0-4 (core count)
|
|
B1.1F.CMSK=1.AnyT UOPS_EXECUTED.CORE_ACTIVE_CYCLES_NO_PORT5
|
|
|
|
# Uops executed on ports 0-4 (core count)
|
|
B1.1F.CMSK=1.AnyT.EDG.INV UOPS_EXECUTED.CORE_STALL_COUNT_NO_PORT5
|
|
|
|
# Cycles no Uops issued on ports 0-4 (core count)
|
|
B1.1F.CMSK=1.AnyT.INV UOPS_EXECUTED.CORE_STALL_CYCLES_NO_PORT5
|
|
|
|
# Uops executed on port 5
|
|
B1.20 UOPS_EXECUTED.PORT5
|
|
|
|
# Cycles Uops executed on any port (core count)
|
|
B1.3F.CMSK=1.AnyT UOPS_EXECUTED.CORE_ACTIVE_CYCLES
|
|
|
|
# Uops executed on any port (core count)
|
|
B1.3F.CMSK=1.AnyT.EDG.INV UOPS_EXECUTED.CORE_STALL_COUNT
|
|
|
|
# Cycles no Uops issued on any port (core count)
|
|
B1.3F.CMSK=1.AnyT.INV UOPS_EXECUTED.CORE_STALL_CYCLES
|
|
|
|
# Uops issued on ports 0, 1 or 5
|
|
B1.40 UOPS_EXECUTED.PORT015
|
|
|
|
# Cycles no Uops issued on ports 0, 1 or 5
|
|
B1.40.CMSK=1.INV UOPS_EXECUTED.PORT015_STALL_CYCLES
|
|
|
|
# Uops issued on ports 2, 3 or 4
|
|
B1.80.AnyT UOPS_EXECUTED.PORT234_CORE
|
|
|
|
# Offcore requests blocked due to Super Queue full
|
|
B2.01 OFFCORE_REQUESTS_SQ_FULL
|
|
|
|
# Thread responded HIT to snoop
|
|
B8.01 SNOOP_RESPONSE.HIT
|
|
|
|
# Thread responded HITE to snoop
|
|
B8.02 SNOOP_RESPONSE.HITE
|
|
|
|
# Thread responded HITM to snoop
|
|
B8.04 SNOOP_RESPONSE.HITM
|
|
|
|
# Instructions retired (Programmable counter and Precise Event)
|
|
C0.01 INST_RETIRED.ANY_P
|
|
|
|
# Total cycles (Precise Event)
|
|
C0.01.CMSK=16.INV INST_RETIRED.TOTAL_CYCLES
|
|
|
|
# Total cycles (Precise Event)
|
|
C0.01.CMSK=16.INV INST_RETIRED.TOTAL_CYCLES_PS
|
|
|
|
# Retired floating-point operations (Precise Event)
|
|
C0.02 INST_RETIRED.X87
|
|
|
|
# Retired MMX instructions (Precise Event)
|
|
C0.04 INST_RETIRED.MMX
|
|
|
|
# Uops retired (Precise Event)
|
|
C2.01 UOPS_RETIRED.ANY
|
|
|
|
# Cycles Uops are being retired
|
|
C2.01.CMSK=1 UOPS_RETIRED.ACTIVE_CYCLES
|
|
|
|
# Cycles Uops are not retiring (Precise Event)
|
|
C2.01.CMSK=1.INV UOPS_RETIRED.STALL_CYCLES
|
|
|
|
# Total cycles using precise uop retired event (Precise Event)
|
|
C2.01.CMSK=16.INV UOPS_RETIRED.TOTAL_CYCLES
|
|
|
|
# Retirement slots used (Precise Event)
|
|
C2.02 UOPS_RETIRED.RETIRE_SLOTS
|
|
|
|
# Macro-fused Uops retired (Precise Event)
|
|
C2.04 UOPS_RETIRED.MACRO_FUSED
|
|
|
|
# Cycles machine clear asserted
|
|
C3.01 MACHINE_CLEARS.CYCLES
|
|
|
|
# Execution pipeline restart due to Memory ordering conflicts
|
|
C3.02 MACHINE_CLEARS.MEM_ORDER
|
|
|
|
# Self-Modifying Code detected
|
|
C3.04 MACHINE_CLEARS.SMC
|
|
|
|
# Retired conditional branch instructions (Precise Event)
|
|
C4.01 BR_INST_RETIRED.CONDITIONAL
|
|
|
|
# Retired near call instructions (Precise Event)
|
|
C4.02 BR_INST_RETIRED.NEAR_CALL
|
|
|
|
# Retired branch instructions (Precise Event)
|
|
C4.04 BR_INST_RETIRED.ALL_BRANCHES
|
|
|
|
# Mispredicted near retired calls (Precise Event)
|
|
C5.02 BR_MISP_RETIRED.NEAR_CALL
|
|
|
|
# SIMD Packed-Single Uops retired (Precise Event)
|
|
C7.01 SSEX_UOPS_RETIRED.PACKED_SINGLE
|
|
|
|
# SIMD Scalar-Single Uops retired (Precise Event)
|
|
C7.02 SSEX_UOPS_RETIRED.SCALAR_SINGLE
|
|
|
|
# SIMD Packed-Double Uops retired (Precise Event)
|
|
C7.04 SSEX_UOPS_RETIRED.PACKED_DOUBLE
|
|
|
|
# SIMD Scalar-Double Uops retired (Precise Event)
|
|
C7.08 SSEX_UOPS_RETIRED.SCALAR_DOUBLE
|
|
|
|
# SIMD Vector Integer Uops retired (Precise Event)
|
|
C7.10 SSEX_UOPS_RETIRED.VECTOR_INTEGER
|
|
|
|
# Retired instructions that missed the ITLB (Precise Event)
|
|
C8.20 ITLB_MISS_RETIRED
|
|
|
|
# Retired loads that hit the L1 data cache (Precise Event)
|
|
CB.01 MEM_LOAD_RETIRED.L1D_HIT
|
|
|
|
# Retired loads that hit the L2 cache (Precise Event)
|
|
CB.02 MEM_LOAD_RETIRED.L2_HIT
|
|
|
|
# Retired loads that hit valid versions in the LLC cache (Precise Event)
|
|
CB.04 MEM_LOAD_RETIRED.LLC_UNSHARED_HIT
|
|
|
|
# Retired loads that hit sibling core's L2 in modified or unmodified states (Precise Event)
|
|
CB.08 MEM_LOAD_RETIRED.OTHER_CORE_L2_HIT_HITM
|
|
|
|
# Retired loads that miss the LLC cache (Precise Event)
|
|
CB.10 MEM_LOAD_RETIRED.LLC_MISS
|
|
|
|
# Retired loads that miss L1D and hit an previously allocated LFB (Precise Event)
|
|
CB.40 MEM_LOAD_RETIRED.HIT_LFB
|
|
|
|
# Retired loads that miss the DTLB (Precise Event)
|
|
CB.80 MEM_LOAD_RETIRED.DTLB_MISS
|
|
|
|
# Transitions from MMX to Floating Point instructions
|
|
CC.01 FP_MMX_TRANS.TO_FP
|
|
|
|
# Transitions from Floating Point to MMX instructions
|
|
CC.02 FP_MMX_TRANS.TO_MMX
|
|
|
|
# All Floating Point to and from MMX transitions
|
|
CC.03 FP_MMX_TRANS.ANY
|
|
|
|
# Instructions decoded
|
|
D0.01 MACRO_INSTS.DECODED
|
|
|
|
# Cycles no Uops are decoded
|
|
D1.01.CMSK=1.INV UOPS_DECODED.STALL_CYCLES
|
|
|
|
# Uops decoded by Microcode Sequencer
|
|
D1.02.CMSK=1 UOPS_DECODED.MS_CYCLES_ACTIVE
|
|
|
|
# Stack pointer instructions decoded
|
|
D1.04 UOPS_DECODED.ESP_FOLDING
|
|
|
|
# Stack pointer sync operations
|
|
D1.08 UOPS_DECODED.ESP_SYNC
|
|
|
|
# Flag stall cycles
|
|
D2.01 RAT_STALLS.FLAGS
|
|
|
|
# Partial register stall cycles
|
|
D2.02 RAT_STALLS.REGISTERS
|
|
|
|
# ROB read port stalls cycles
|
|
D2.04 RAT_STALLS.ROB_READ_PORT
|
|
|
|
# Scoreboard stall cycles
|
|
D2.08 RAT_STALLS.SCOREBOARD
|
|
|
|
# All RAT stall cycles
|
|
D2.0F RAT_STALLS.ANY
|
|
|
|
# Segment rename stall cycles
|
|
D4.01 SEG_RENAME_STALLS
|
|
|
|
# ES segment renames
|
|
D5.01 ES_REG_RENAMES
|
|
|
|
# Uop unfusions due to FP exceptions
|
|
DB.01 UOP_UNFUSION
|
|
|
|
# Branch instructions decoded
|
|
E0.01 BR_INST_DECODED
|
|
|
|
# Branch prediction unit missed call or return
|
|
E5.01 BPU_MISSED_CALL_RET
|
|
|
|
# BACLEAR asserted, regardless of cause
|
|
E6.01 BACLEAR.CLEAR
|
|
|
|
# BACLEAR asserted with bad target address
|
|
E6.02 BACLEAR.BAD_TARGET
|
|
|
|
# Early Branch Prediciton Unit clears
|
|
E8.01 BPU_CLEARS.EARLY
|
|
|
|
# Late Branch Prediction Unit clears
|
|
E8.02 BPU_CLEARS.LATE
|
|
|
|
# L2 Load transactions
|
|
F0.01 L2_TRANSACTIONS.LOAD
|
|
|
|
# L2 RFO transactions
|
|
F0.02 L2_TRANSACTIONS.RFO
|
|
|
|
# L2 instruction fetch transactions
|
|
F0.04 L2_TRANSACTIONS.IFETCH
|
|
|
|
# L2 prefetch transactions
|
|
F0.08 L2_TRANSACTIONS.PREFETCH
|
|
|
|
# L1D writeback to L2 transactions
|
|
F0.10 L2_TRANSACTIONS.L1D_WB
|
|
|
|
# L2 fill transactions
|
|
F0.20 L2_TRANSACTIONS.FILL
|
|
|
|
# L2 writeback to LLC transactions
|
|
F0.40 L2_TRANSACTIONS.WB
|
|
|
|
# All L2 transactions
|
|
F0.80 L2_TRANSACTIONS.ANY
|
|
|
|
# L2 lines allocated in the S state
|
|
F1.02 L2_LINES_IN.S_STATE
|
|
|
|
# L2 lines allocated in the E state
|
|
F1.04 L2_LINES_IN.E_STATE
|
|
|
|
# L2 lines alloacated
|
|
F1.07 L2_LINES_IN.ANY
|
|
|
|
# L2 lines evicted by a demand request
|
|
F2.01 L2_LINES_OUT.DEMAND_CLEAN
|
|
|
|
# L2 modified lines evicted by a demand request
|
|
F2.02 L2_LINES_OUT.DEMAND_DIRTY
|
|
|
|
# L2 lines evicted by a prefetch request
|
|
F2.04 L2_LINES_OUT.PREFETCH_CLEAN
|
|
|
|
# L2 modified lines evicted by a prefetch request
|
|
F2.08 L2_LINES_OUT.PREFETCH_DIRTY
|
|
|
|
# L2 lines evicted
|
|
F2.0F L2_LINES_OUT.ANY
|
|
|
|
# Super Queue lock splits across a cache line
|
|
F4.10 SQ_MISC.SPLIT_LOCK
|
|
|
|
# Super Queue full stall cycles
|
|
F6.01 SQ_FULL_STALL_CYCLES
|
|
|
|
# X87 Floating point assists (Precise Event)
|
|
F7.01 FP_ASSIST.ALL
|
|
|
|
# X87 Floating point assists for invalid output value (Precise Event)
|
|
F7.02 FP_ASSIST.OUTPUT
|
|
|
|
# X87 Floating poiint assists for invalid input value (Precise Event)
|
|
F7.04 FP_ASSIST.INPUT
|
|
|
|
# SIMD integer 64 bit packed multiply operations
|
|
FD.01 SIMD_INT_64.PACKED_MPY
|
|
|
|
# SIMD integer 64 bit shift operations
|
|
FD.02 SIMD_INT_64.PACKED_SHIFT
|
|
|
|
# SIMD integer 64 bit pack operations
|
|
FD.04 SIMD_INT_64.PACK
|
|
|
|
# SIMD integer 64 bit unpack operations
|
|
FD.08 SIMD_INT_64.UNPACK
|
|
|
|
# SIMD integer 64 bit logical operations
|
|
FD.10 SIMD_INT_64.PACKED_LOGICAL
|
|
|
|
# SIMD integer 64 bit arithmetic operations
|
|
FD.20 SIMD_INT_64.PACKED_ARITH
|
|
|
|
# SIMD integer 64 bit shuffle/move operations
|
|
FD.40 SIMD_INT_64.SHUFFLE_MOVE
|