Linux Audio

Check our new training course

Loading...
Note: File does not exist in v5.4.
   1/*
   2 * Per core/cpu state
   3 *
   4 * Used to coordinate shared registers between HT threads or
   5 * among events on a single PMU.
   6 */
   7
   8#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   9
  10#include <linux/stddef.h>
  11#include <linux/types.h>
  12#include <linux/init.h>
  13#include <linux/slab.h>
  14#include <linux/export.h>
  15
  16#include <asm/cpufeature.h>
  17#include <asm/hardirq.h>
  18#include <asm/apic.h>
  19
  20#include "perf_event.h"
  21
  22/*
  23 * Intel PerfMon, used on Core and later.
  24 */
  25static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
  26{
  27	[PERF_COUNT_HW_CPU_CYCLES]		= 0x003c,
  28	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
  29	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0x4f2e,
  30	[PERF_COUNT_HW_CACHE_MISSES]		= 0x412e,
  31	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
  32	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
  33	[PERF_COUNT_HW_BUS_CYCLES]		= 0x013c,
  34	[PERF_COUNT_HW_REF_CPU_CYCLES]		= 0x0300, /* pseudo-encoding */
  35};
  36
  37static struct event_constraint intel_core_event_constraints[] __read_mostly =
  38{
  39	INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
  40	INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
  41	INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
  42	INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
  43	INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
  44	INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */
  45	EVENT_CONSTRAINT_END
  46};
  47
  48static struct event_constraint intel_core2_event_constraints[] __read_mostly =
  49{
  50	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
  51	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
  52	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
  53	INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
  54	INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
  55	INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
  56	INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
  57	INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
  58	INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
  59	INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
  60	INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
  61	INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */
  62	INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
  63	EVENT_CONSTRAINT_END
  64};
  65
  66static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
  67{
  68	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
  69	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
  70	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
  71	INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
  72	INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
  73	INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
  74	INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
  75	INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
  76	INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
  77	INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
  78	INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
  79	EVENT_CONSTRAINT_END
  80};
  81
  82static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
  83{
  84	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
  85	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
  86	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
  87	EVENT_EXTRA_END
  88};
  89
  90static struct event_constraint intel_westmere_event_constraints[] __read_mostly =
  91{
  92	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
  93	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
  94	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
  95	INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
  96	INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
  97	INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
  98	INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */
  99	EVENT_CONSTRAINT_END
 100};
 101
 102static struct event_constraint intel_snb_event_constraints[] __read_mostly =
 103{
 104	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
 105	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
 106	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
 107	INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
 108	INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */
 109	INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
 110	INTEL_UEVENT_CONSTRAINT(0x06a3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
 111	INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
 112	INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
 113	INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
 114	INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
 115	INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
 116	EVENT_CONSTRAINT_END
 117};
 118
 119static struct event_constraint intel_ivb_event_constraints[] __read_mostly =
 120{
 121	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
 122	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
 123	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
 124	INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), /* L1D_PEND_MISS.PENDING */
 125	INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMTPY */
 126	INTEL_UEVENT_CONSTRAINT(0x019c, 0xf), /* IDQ_UOPS_NOT_DELIVERED.CORE */
 127	INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_LDM_PENDING */
 128	INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
 129	INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */
 130	INTEL_UEVENT_CONSTRAINT(0x06a3, 0xf), /* CYCLE_ACTIVITY.STALLS_LDM_PENDING */
 131	INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
 132	INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
 133	INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
 134	/*
 135	 * Errata BV98 -- MEM_*_RETIRED events can leak between counters of SMT
 136	 * siblings; disable these events because they can corrupt unrelated
 137	 * counters.
 138	 */
 139	INTEL_EVENT_CONSTRAINT(0xd0, 0x0), /* MEM_UOPS_RETIRED.* */
 140	INTEL_EVENT_CONSTRAINT(0xd1, 0x0), /* MEM_LOAD_UOPS_RETIRED.* */
 141	INTEL_EVENT_CONSTRAINT(0xd2, 0x0), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
 142	INTEL_EVENT_CONSTRAINT(0xd3, 0x0), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
 143	EVENT_CONSTRAINT_END
 144};
 145
 146static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
 147{
 148	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
 149	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
 150	INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
 151	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
 152	EVENT_EXTRA_END
 153};
 154
 155static struct event_constraint intel_v1_event_constraints[] __read_mostly =
 156{
 157	EVENT_CONSTRAINT_END
 158};
 159
 160static struct event_constraint intel_gen_event_constraints[] __read_mostly =
 161{
 162	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
 163	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
 164	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
 165	EVENT_CONSTRAINT_END
 166};
 167
 168static struct event_constraint intel_slm_event_constraints[] __read_mostly =
 169{
 170	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
 171	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
 172	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
 173	EVENT_CONSTRAINT_END
 174};
 175
 176static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
 177	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
 178	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
 179	INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1),
 180	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
 181	EVENT_EXTRA_END
 182};
 183
 184static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
 185	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
 186	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
 187	INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
 188	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
 189	EVENT_EXTRA_END
 190};
 191
 192EVENT_ATTR_STR(mem-loads,	mem_ld_nhm,	"event=0x0b,umask=0x10,ldlat=3");
 193EVENT_ATTR_STR(mem-loads,	mem_ld_snb,	"event=0xcd,umask=0x1,ldlat=3");
 194EVENT_ATTR_STR(mem-stores,	mem_st_snb,	"event=0xcd,umask=0x2");
 195
 196struct attribute *nhm_events_attrs[] = {
 197	EVENT_PTR(mem_ld_nhm),
 198	NULL,
 199};
 200
 201struct attribute *snb_events_attrs[] = {
 202	EVENT_PTR(mem_ld_snb),
 203	EVENT_PTR(mem_st_snb),
 204	NULL,
 205};
 206
 207static struct event_constraint intel_hsw_event_constraints[] = {
 208	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
 209	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
 210	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
 211	INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.* */
 212	INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
 213	INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
 214	/* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
 215	INTEL_EVENT_CONSTRAINT(0x08a3, 0x4),
 216	/* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
 217	INTEL_EVENT_CONSTRAINT(0x0ca3, 0x4),
 218	/* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
 219	INTEL_EVENT_CONSTRAINT(0x04a3, 0xf),
 220	EVENT_CONSTRAINT_END
 221};
 222
 223static u64 intel_pmu_event_map(int hw_event)
 224{
 225	return intel_perfmon_event_map[hw_event];
 226}
 227
 228#define SNB_DMND_DATA_RD	(1ULL << 0)
 229#define SNB_DMND_RFO		(1ULL << 1)
 230#define SNB_DMND_IFETCH		(1ULL << 2)
 231#define SNB_DMND_WB		(1ULL << 3)
 232#define SNB_PF_DATA_RD		(1ULL << 4)
 233#define SNB_PF_RFO		(1ULL << 5)
 234#define SNB_PF_IFETCH		(1ULL << 6)
 235#define SNB_LLC_DATA_RD		(1ULL << 7)
 236#define SNB_LLC_RFO		(1ULL << 8)
 237#define SNB_LLC_IFETCH		(1ULL << 9)
 238#define SNB_BUS_LOCKS		(1ULL << 10)
 239#define SNB_STRM_ST		(1ULL << 11)
 240#define SNB_OTHER		(1ULL << 15)
 241#define SNB_RESP_ANY		(1ULL << 16)
 242#define SNB_NO_SUPP		(1ULL << 17)
 243#define SNB_LLC_HITM		(1ULL << 18)
 244#define SNB_LLC_HITE		(1ULL << 19)
 245#define SNB_LLC_HITS		(1ULL << 20)
 246#define SNB_LLC_HITF		(1ULL << 21)
 247#define SNB_LOCAL		(1ULL << 22)
 248#define SNB_REMOTE		(0xffULL << 23)
 249#define SNB_SNP_NONE		(1ULL << 31)
 250#define SNB_SNP_NOT_NEEDED	(1ULL << 32)
 251#define SNB_SNP_MISS		(1ULL << 33)
 252#define SNB_NO_FWD		(1ULL << 34)
 253#define SNB_SNP_FWD		(1ULL << 35)
 254#define SNB_HITM		(1ULL << 36)
 255#define SNB_NON_DRAM		(1ULL << 37)
 256
 257#define SNB_DMND_READ		(SNB_DMND_DATA_RD|SNB_LLC_DATA_RD)
 258#define SNB_DMND_WRITE		(SNB_DMND_RFO|SNB_LLC_RFO)
 259#define SNB_DMND_PREFETCH	(SNB_PF_DATA_RD|SNB_PF_RFO)
 260
 261#define SNB_SNP_ANY		(SNB_SNP_NONE|SNB_SNP_NOT_NEEDED| \
 262				 SNB_SNP_MISS|SNB_NO_FWD|SNB_SNP_FWD| \
 263				 SNB_HITM)
 264
 265#define SNB_DRAM_ANY		(SNB_LOCAL|SNB_REMOTE|SNB_SNP_ANY)
 266#define SNB_DRAM_REMOTE		(SNB_REMOTE|SNB_SNP_ANY)
 267
 268#define SNB_L3_ACCESS		SNB_RESP_ANY
 269#define SNB_L3_MISS		(SNB_DRAM_ANY|SNB_NON_DRAM)
 270
 271static __initconst const u64 snb_hw_cache_extra_regs
 272				[PERF_COUNT_HW_CACHE_MAX]
 273				[PERF_COUNT_HW_CACHE_OP_MAX]
 274				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
 275{
 276 [ C(LL  ) ] = {
 277	[ C(OP_READ) ] = {
 278		[ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_L3_ACCESS,
 279		[ C(RESULT_MISS)   ] = SNB_DMND_READ|SNB_L3_MISS,
 280	},
 281	[ C(OP_WRITE) ] = {
 282		[ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_L3_ACCESS,
 283		[ C(RESULT_MISS)   ] = SNB_DMND_WRITE|SNB_L3_MISS,
 284	},
 285	[ C(OP_PREFETCH) ] = {
 286		[ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_L3_ACCESS,
 287		[ C(RESULT_MISS)   ] = SNB_DMND_PREFETCH|SNB_L3_MISS,
 288	},
 289 },
 290 [ C(NODE) ] = {
 291	[ C(OP_READ) ] = {
 292		[ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_DRAM_ANY,
 293		[ C(RESULT_MISS)   ] = SNB_DMND_READ|SNB_DRAM_REMOTE,
 294	},
 295	[ C(OP_WRITE) ] = {
 296		[ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_DRAM_ANY,
 297		[ C(RESULT_MISS)   ] = SNB_DMND_WRITE|SNB_DRAM_REMOTE,
 298	},
 299	[ C(OP_PREFETCH) ] = {
 300		[ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_DRAM_ANY,
 301		[ C(RESULT_MISS)   ] = SNB_DMND_PREFETCH|SNB_DRAM_REMOTE,
 302	},
 303 },
 304};
 305
 306static __initconst const u64 snb_hw_cache_event_ids
 307				[PERF_COUNT_HW_CACHE_MAX]
 308				[PERF_COUNT_HW_CACHE_OP_MAX]
 309				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
 310{
 311 [ C(L1D) ] = {
 312	[ C(OP_READ) ] = {
 313		[ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS        */
 314		[ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPLACEMENT              */
 315	},
 316	[ C(OP_WRITE) ] = {
 317		[ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES       */
 318		[ C(RESULT_MISS)   ] = 0x0851, /* L1D.ALL_M_REPLACEMENT        */
 319	},
 320	[ C(OP_PREFETCH) ] = {
 321		[ C(RESULT_ACCESS) ] = 0x0,
 322		[ C(RESULT_MISS)   ] = 0x024e, /* HW_PRE_REQ.DL1_MISS          */
 323	},
 324 },
 325 [ C(L1I ) ] = {
 326	[ C(OP_READ) ] = {
 327		[ C(RESULT_ACCESS) ] = 0x0,
 328		[ C(RESULT_MISS)   ] = 0x0280, /* ICACHE.MISSES */
 329	},
 330	[ C(OP_WRITE) ] = {
 331		[ C(RESULT_ACCESS) ] = -1,
 332		[ C(RESULT_MISS)   ] = -1,
 333	},
 334	[ C(OP_PREFETCH) ] = {
 335		[ C(RESULT_ACCESS) ] = 0x0,
 336		[ C(RESULT_MISS)   ] = 0x0,
 337	},
 338 },
 339 [ C(LL  ) ] = {
 340	[ C(OP_READ) ] = {
 341		/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
 342		[ C(RESULT_ACCESS) ] = 0x01b7,
 343		/* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
 344		[ C(RESULT_MISS)   ] = 0x01b7,
 345	},
 346	[ C(OP_WRITE) ] = {
 347		/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
 348		[ C(RESULT_ACCESS) ] = 0x01b7,
 349		/* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
 350		[ C(RESULT_MISS)   ] = 0x01b7,
 351	},
 352	[ C(OP_PREFETCH) ] = {
 353		/* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
 354		[ C(RESULT_ACCESS) ] = 0x01b7,
 355		/* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
 356		[ C(RESULT_MISS)   ] = 0x01b7,
 357	},
 358 },
 359 [ C(DTLB) ] = {
 360	[ C(OP_READ) ] = {
 361		[ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */
 362		[ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */
 363	},
 364	[ C(OP_WRITE) ] = {
 365		[ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */
 366		[ C(RESULT_MISS)   ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
 367	},
 368	[ C(OP_PREFETCH) ] = {
 369		[ C(RESULT_ACCESS) ] = 0x0,
 370		[ C(RESULT_MISS)   ] = 0x0,
 371	},
 372 },
 373 [ C(ITLB) ] = {
 374	[ C(OP_READ) ] = {
 375		[ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT         */
 376		[ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK    */
 377	},
 378	[ C(OP_WRITE) ] = {
 379		[ C(RESULT_ACCESS) ] = -1,
 380		[ C(RESULT_MISS)   ] = -1,
 381	},
 382	[ C(OP_PREFETCH) ] = {
 383		[ C(RESULT_ACCESS) ] = -1,
 384		[ C(RESULT_MISS)   ] = -1,
 385	},
 386 },
 387 [ C(BPU ) ] = {
 388	[ C(OP_READ) ] = {
 389		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
 390		[ C(RESULT_MISS)   ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
 391	},
 392	[ C(OP_WRITE) ] = {
 393		[ C(RESULT_ACCESS) ] = -1,
 394		[ C(RESULT_MISS)   ] = -1,
 395	},
 396	[ C(OP_PREFETCH) ] = {
 397		[ C(RESULT_ACCESS) ] = -1,
 398		[ C(RESULT_MISS)   ] = -1,
 399	},
 400 },
 401 [ C(NODE) ] = {
 402	[ C(OP_READ) ] = {
 403		[ C(RESULT_ACCESS) ] = 0x01b7,
 404		[ C(RESULT_MISS)   ] = 0x01b7,
 405	},
 406	[ C(OP_WRITE) ] = {
 407		[ C(RESULT_ACCESS) ] = 0x01b7,
 408		[ C(RESULT_MISS)   ] = 0x01b7,
 409	},
 410	[ C(OP_PREFETCH) ] = {
 411		[ C(RESULT_ACCESS) ] = 0x01b7,
 412		[ C(RESULT_MISS)   ] = 0x01b7,
 413	},
 414 },
 415
 416};
 417
 418static __initconst const u64 westmere_hw_cache_event_ids
 419				[PERF_COUNT_HW_CACHE_MAX]
 420				[PERF_COUNT_HW_CACHE_OP_MAX]
 421				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
 422{
 423 [ C(L1D) ] = {
 424	[ C(OP_READ) ] = {
 425		[ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
 426		[ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPL                     */
 427	},
 428	[ C(OP_WRITE) ] = {
 429		[ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
 430		[ C(RESULT_MISS)   ] = 0x0251, /* L1D.M_REPL                   */
 431	},
 432	[ C(OP_PREFETCH) ] = {
 433		[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
 434		[ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
 435	},
 436 },
 437 [ C(L1I ) ] = {
 438	[ C(OP_READ) ] = {
 439		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
 440		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
 441	},
 442	[ C(OP_WRITE) ] = {
 443		[ C(RESULT_ACCESS) ] = -1,
 444		[ C(RESULT_MISS)   ] = -1,
 445	},
 446	[ C(OP_PREFETCH) ] = {
 447		[ C(RESULT_ACCESS) ] = 0x0,
 448		[ C(RESULT_MISS)   ] = 0x0,
 449	},
 450 },
 451 [ C(LL  ) ] = {
 452	[ C(OP_READ) ] = {
 453		/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
 454		[ C(RESULT_ACCESS) ] = 0x01b7,
 455		/* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
 456		[ C(RESULT_MISS)   ] = 0x01b7,
 457	},
 458	/*
 459	 * Use RFO, not WRITEBACK, because a write miss would typically occur
 460	 * on RFO.
 461	 */
 462	[ C(OP_WRITE) ] = {
 463		/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
 464		[ C(RESULT_ACCESS) ] = 0x01b7,
 465		/* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
 466		[ C(RESULT_MISS)   ] = 0x01b7,
 467	},
 468	[ C(OP_PREFETCH) ] = {
 469		/* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
 470		[ C(RESULT_ACCESS) ] = 0x01b7,
 471		/* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
 472		[ C(RESULT_MISS)   ] = 0x01b7,
 473	},
 474 },
 475 [ C(DTLB) ] = {
 476	[ C(OP_READ) ] = {
 477		[ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
 478		[ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
 479	},
 480	[ C(OP_WRITE) ] = {
 481		[ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
 482		[ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
 483	},
 484	[ C(OP_PREFETCH) ] = {
 485		[ C(RESULT_ACCESS) ] = 0x0,
 486		[ C(RESULT_MISS)   ] = 0x0,
 487	},
 488 },
 489 [ C(ITLB) ] = {
 490	[ C(OP_READ) ] = {
 491		[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
 492		[ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISSES.ANY              */
 493	},
 494	[ C(OP_WRITE) ] = {
 495		[ C(RESULT_ACCESS) ] = -1,
 496		[ C(RESULT_MISS)   ] = -1,
 497	},
 498	[ C(OP_PREFETCH) ] = {
 499		[ C(RESULT_ACCESS) ] = -1,
 500		[ C(RESULT_MISS)   ] = -1,
 501	},
 502 },
 503 [ C(BPU ) ] = {
 504	[ C(OP_READ) ] = {
 505		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
 506		[ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
 507	},
 508	[ C(OP_WRITE) ] = {
 509		[ C(RESULT_ACCESS) ] = -1,
 510		[ C(RESULT_MISS)   ] = -1,
 511	},
 512	[ C(OP_PREFETCH) ] = {
 513		[ C(RESULT_ACCESS) ] = -1,
 514		[ C(RESULT_MISS)   ] = -1,
 515	},
 516 },
 517 [ C(NODE) ] = {
 518	[ C(OP_READ) ] = {
 519		[ C(RESULT_ACCESS) ] = 0x01b7,
 520		[ C(RESULT_MISS)   ] = 0x01b7,
 521	},
 522	[ C(OP_WRITE) ] = {
 523		[ C(RESULT_ACCESS) ] = 0x01b7,
 524		[ C(RESULT_MISS)   ] = 0x01b7,
 525	},
 526	[ C(OP_PREFETCH) ] = {
 527		[ C(RESULT_ACCESS) ] = 0x01b7,
 528		[ C(RESULT_MISS)   ] = 0x01b7,
 529	},
 530 },
 531};
 532
 533/*
 534 * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits;
 535 * See IA32 SDM Vol 3B 30.6.1.3
 536 */
 537
 538#define NHM_DMND_DATA_RD	(1 << 0)
 539#define NHM_DMND_RFO		(1 << 1)
 540#define NHM_DMND_IFETCH		(1 << 2)
 541#define NHM_DMND_WB		(1 << 3)
 542#define NHM_PF_DATA_RD		(1 << 4)
 543#define NHM_PF_DATA_RFO		(1 << 5)
 544#define NHM_PF_IFETCH		(1 << 6)
 545#define NHM_OFFCORE_OTHER	(1 << 7)
 546#define NHM_UNCORE_HIT		(1 << 8)
 547#define NHM_OTHER_CORE_HIT_SNP	(1 << 9)
 548#define NHM_OTHER_CORE_HITM	(1 << 10)
 549        			/* reserved */
 550#define NHM_REMOTE_CACHE_FWD	(1 << 12)
 551#define NHM_REMOTE_DRAM		(1 << 13)
 552#define NHM_LOCAL_DRAM		(1 << 14)
 553#define NHM_NON_DRAM		(1 << 15)
 554
 555#define NHM_LOCAL		(NHM_LOCAL_DRAM|NHM_REMOTE_CACHE_FWD)
 556#define NHM_REMOTE		(NHM_REMOTE_DRAM)
 557
 558#define NHM_DMND_READ		(NHM_DMND_DATA_RD)
 559#define NHM_DMND_WRITE		(NHM_DMND_RFO|NHM_DMND_WB)
 560#define NHM_DMND_PREFETCH	(NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
 561
 562#define NHM_L3_HIT	(NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
 563#define NHM_L3_MISS	(NHM_NON_DRAM|NHM_LOCAL_DRAM|NHM_REMOTE_DRAM|NHM_REMOTE_CACHE_FWD)
 564#define NHM_L3_ACCESS	(NHM_L3_HIT|NHM_L3_MISS)
 565
 566static __initconst const u64 nehalem_hw_cache_extra_regs
 567				[PERF_COUNT_HW_CACHE_MAX]
 568				[PERF_COUNT_HW_CACHE_OP_MAX]
 569				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
 570{
 571 [ C(LL  ) ] = {
 572	[ C(OP_READ) ] = {
 573		[ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS,
 574		[ C(RESULT_MISS)   ] = NHM_DMND_READ|NHM_L3_MISS,
 575	},
 576	[ C(OP_WRITE) ] = {
 577		[ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS,
 578		[ C(RESULT_MISS)   ] = NHM_DMND_WRITE|NHM_L3_MISS,
 579	},
 580	[ C(OP_PREFETCH) ] = {
 581		[ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS,
 582		[ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_L3_MISS,
 583	},
 584 },
 585 [ C(NODE) ] = {
 586	[ C(OP_READ) ] = {
 587		[ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_LOCAL|NHM_REMOTE,
 588		[ C(RESULT_MISS)   ] = NHM_DMND_READ|NHM_REMOTE,
 589	},
 590	[ C(OP_WRITE) ] = {
 591		[ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_LOCAL|NHM_REMOTE,
 592		[ C(RESULT_MISS)   ] = NHM_DMND_WRITE|NHM_REMOTE,
 593	},
 594	[ C(OP_PREFETCH) ] = {
 595		[ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_LOCAL|NHM_REMOTE,
 596		[ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_REMOTE,
 597	},
 598 },
 599};
 600
 601static __initconst const u64 nehalem_hw_cache_event_ids
 602				[PERF_COUNT_HW_CACHE_MAX]
 603				[PERF_COUNT_HW_CACHE_OP_MAX]
 604				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
 605{
 606 [ C(L1D) ] = {
 607	[ C(OP_READ) ] = {
 608		[ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
 609		[ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPL                     */
 610	},
 611	[ C(OP_WRITE) ] = {
 612		[ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
 613		[ C(RESULT_MISS)   ] = 0x0251, /* L1D.M_REPL                   */
 614	},
 615	[ C(OP_PREFETCH) ] = {
 616		[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
 617		[ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
 618	},
 619 },
 620 [ C(L1I ) ] = {
 621	[ C(OP_READ) ] = {
 622		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
 623		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
 624	},
 625	[ C(OP_WRITE) ] = {
 626		[ C(RESULT_ACCESS) ] = -1,
 627		[ C(RESULT_MISS)   ] = -1,
 628	},
 629	[ C(OP_PREFETCH) ] = {
 630		[ C(RESULT_ACCESS) ] = 0x0,
 631		[ C(RESULT_MISS)   ] = 0x0,
 632	},
 633 },
 634 [ C(LL  ) ] = {
 635	[ C(OP_READ) ] = {
 636		/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
 637		[ C(RESULT_ACCESS) ] = 0x01b7,
 638		/* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
 639		[ C(RESULT_MISS)   ] = 0x01b7,
 640	},
 641	/*
 642	 * Use RFO, not WRITEBACK, because a write miss would typically occur
 643	 * on RFO.
 644	 */
 645	[ C(OP_WRITE) ] = {
 646		/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
 647		[ C(RESULT_ACCESS) ] = 0x01b7,
 648		/* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
 649		[ C(RESULT_MISS)   ] = 0x01b7,
 650	},
 651	[ C(OP_PREFETCH) ] = {
 652		/* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
 653		[ C(RESULT_ACCESS) ] = 0x01b7,
 654		/* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
 655		[ C(RESULT_MISS)   ] = 0x01b7,
 656	},
 657 },
 658 [ C(DTLB) ] = {
 659	[ C(OP_READ) ] = {
 660		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI   (alias)  */
 661		[ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
 662	},
 663	[ C(OP_WRITE) ] = {
 664		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI   (alias)  */
 665		[ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
 666	},
 667	[ C(OP_PREFETCH) ] = {
 668		[ C(RESULT_ACCESS) ] = 0x0,
 669		[ C(RESULT_MISS)   ] = 0x0,
 670	},
 671 },
 672 [ C(ITLB) ] = {
 673	[ C(OP_READ) ] = {
 674		[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
 675		[ C(RESULT_MISS)   ] = 0x20c8, /* ITLB_MISS_RETIRED            */
 676	},
 677	[ C(OP_WRITE) ] = {
 678		[ C(RESULT_ACCESS) ] = -1,
 679		[ C(RESULT_MISS)   ] = -1,
 680	},
 681	[ C(OP_PREFETCH) ] = {
 682		[ C(RESULT_ACCESS) ] = -1,
 683		[ C(RESULT_MISS)   ] = -1,
 684	},
 685 },
 686 [ C(BPU ) ] = {
 687	[ C(OP_READ) ] = {
 688		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
 689		[ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
 690	},
 691	[ C(OP_WRITE) ] = {
 692		[ C(RESULT_ACCESS) ] = -1,
 693		[ C(RESULT_MISS)   ] = -1,
 694	},
 695	[ C(OP_PREFETCH) ] = {
 696		[ C(RESULT_ACCESS) ] = -1,
 697		[ C(RESULT_MISS)   ] = -1,
 698	},
 699 },
 700 [ C(NODE) ] = {
 701	[ C(OP_READ) ] = {
 702		[ C(RESULT_ACCESS) ] = 0x01b7,
 703		[ C(RESULT_MISS)   ] = 0x01b7,
 704	},
 705	[ C(OP_WRITE) ] = {
 706		[ C(RESULT_ACCESS) ] = 0x01b7,
 707		[ C(RESULT_MISS)   ] = 0x01b7,
 708	},
 709	[ C(OP_PREFETCH) ] = {
 710		[ C(RESULT_ACCESS) ] = 0x01b7,
 711		[ C(RESULT_MISS)   ] = 0x01b7,
 712	},
 713 },
 714};
 715
 716static __initconst const u64 core2_hw_cache_event_ids
 717				[PERF_COUNT_HW_CACHE_MAX]
 718				[PERF_COUNT_HW_CACHE_OP_MAX]
 719				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
 720{
 721 [ C(L1D) ] = {
 722	[ C(OP_READ) ] = {
 723		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI          */
 724		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE       */
 725	},
 726	[ C(OP_WRITE) ] = {
 727		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI          */
 728		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE       */
 729	},
 730	[ C(OP_PREFETCH) ] = {
 731		[ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS      */
 732		[ C(RESULT_MISS)   ] = 0,
 733	},
 734 },
 735 [ C(L1I ) ] = {
 736	[ C(OP_READ) ] = {
 737		[ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */
 738		[ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */
 739	},
 740	[ C(OP_WRITE) ] = {
 741		[ C(RESULT_ACCESS) ] = -1,
 742		[ C(RESULT_MISS)   ] = -1,
 743	},
 744	[ C(OP_PREFETCH) ] = {
 745		[ C(RESULT_ACCESS) ] = 0,
 746		[ C(RESULT_MISS)   ] = 0,
 747	},
 748 },
 749 [ C(LL  ) ] = {
 750	[ C(OP_READ) ] = {
 751		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
 752		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
 753	},
 754	[ C(OP_WRITE) ] = {
 755		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
 756		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
 757	},
 758	[ C(OP_PREFETCH) ] = {
 759		[ C(RESULT_ACCESS) ] = 0,
 760		[ C(RESULT_MISS)   ] = 0,
 761	},
 762 },
 763 [ C(DTLB) ] = {
 764	[ C(OP_READ) ] = {
 765		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */
 766		[ C(RESULT_MISS)   ] = 0x0208, /* DTLB_MISSES.MISS_LD        */
 767	},
 768	[ C(OP_WRITE) ] = {
 769		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */
 770		[ C(RESULT_MISS)   ] = 0x0808, /* DTLB_MISSES.MISS_ST        */
 771	},
 772	[ C(OP_PREFETCH) ] = {
 773		[ C(RESULT_ACCESS) ] = 0,
 774		[ C(RESULT_MISS)   ] = 0,
 775	},
 776 },
 777 [ C(ITLB) ] = {
 778	[ C(OP_READ) ] = {
 779		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
 780		[ C(RESULT_MISS)   ] = 0x1282, /* ITLBMISSES                 */
 781	},
 782	[ C(OP_WRITE) ] = {
 783		[ C(RESULT_ACCESS) ] = -1,
 784		[ C(RESULT_MISS)   ] = -1,
 785	},
 786	[ C(OP_PREFETCH) ] = {
 787		[ C(RESULT_ACCESS) ] = -1,
 788		[ C(RESULT_MISS)   ] = -1,
 789	},
 790 },
 791 [ C(BPU ) ] = {
 792	[ C(OP_READ) ] = {
 793		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
 794		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
 795	},
 796	[ C(OP_WRITE) ] = {
 797		[ C(RESULT_ACCESS) ] = -1,
 798		[ C(RESULT_MISS)   ] = -1,
 799	},
 800	[ C(OP_PREFETCH) ] = {
 801		[ C(RESULT_ACCESS) ] = -1,
 802		[ C(RESULT_MISS)   ] = -1,
 803	},
 804 },
 805};
 806
 807static __initconst const u64 atom_hw_cache_event_ids
 808				[PERF_COUNT_HW_CACHE_MAX]
 809				[PERF_COUNT_HW_CACHE_OP_MAX]
 810				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
 811{
 812 [ C(L1D) ] = {
 813	[ C(OP_READ) ] = {
 814		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD               */
 815		[ C(RESULT_MISS)   ] = 0,
 816	},
 817	[ C(OP_WRITE) ] = {
 818		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST               */
 819		[ C(RESULT_MISS)   ] = 0,
 820	},
 821	[ C(OP_PREFETCH) ] = {
 822		[ C(RESULT_ACCESS) ] = 0x0,
 823		[ C(RESULT_MISS)   ] = 0,
 824	},
 825 },
 826 [ C(L1I ) ] = {
 827	[ C(OP_READ) ] = {
 828		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                  */
 829		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                 */
 830	},
 831	[ C(OP_WRITE) ] = {
 832		[ C(RESULT_ACCESS) ] = -1,
 833		[ C(RESULT_MISS)   ] = -1,
 834	},
 835	[ C(OP_PREFETCH) ] = {
 836		[ C(RESULT_ACCESS) ] = 0,
 837		[ C(RESULT_MISS)   ] = 0,
 838	},
 839 },
 840 [ C(LL  ) ] = {
 841	[ C(OP_READ) ] = {
 842		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
 843		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
 844	},
 845	[ C(OP_WRITE) ] = {
 846		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
 847		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
 848	},
 849	[ C(OP_PREFETCH) ] = {
 850		[ C(RESULT_ACCESS) ] = 0,
 851		[ C(RESULT_MISS)   ] = 0,
 852	},
 853 },
 854 [ C(DTLB) ] = {
 855	[ C(OP_READ) ] = {
 856		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI  (alias) */
 857		[ C(RESULT_MISS)   ] = 0x0508, /* DTLB_MISSES.MISS_LD        */
 858	},
 859	[ C(OP_WRITE) ] = {
 860		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI  (alias) */
 861		[ C(RESULT_MISS)   ] = 0x0608, /* DTLB_MISSES.MISS_ST        */
 862	},
 863	[ C(OP_PREFETCH) ] = {
 864		[ C(RESULT_ACCESS) ] = 0,
 865		[ C(RESULT_MISS)   ] = 0,
 866	},
 867 },
 868 [ C(ITLB) ] = {
 869	[ C(OP_READ) ] = {
 870		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
 871		[ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES                */
 872	},
 873	[ C(OP_WRITE) ] = {
 874		[ C(RESULT_ACCESS) ] = -1,
 875		[ C(RESULT_MISS)   ] = -1,
 876	},
 877	[ C(OP_PREFETCH) ] = {
 878		[ C(RESULT_ACCESS) ] = -1,
 879		[ C(RESULT_MISS)   ] = -1,
 880	},
 881 },
 882 [ C(BPU ) ] = {
 883	[ C(OP_READ) ] = {
 884		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
 885		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
 886	},
 887	[ C(OP_WRITE) ] = {
 888		[ C(RESULT_ACCESS) ] = -1,
 889		[ C(RESULT_MISS)   ] = -1,
 890	},
 891	[ C(OP_PREFETCH) ] = {
 892		[ C(RESULT_ACCESS) ] = -1,
 893		[ C(RESULT_MISS)   ] = -1,
 894	},
 895 },
 896};
 897
 898static struct extra_reg intel_slm_extra_regs[] __read_mostly =
 899{
 900	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
 901	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffffull, RSP_0),
 902	INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x768005ffffull, RSP_1),
 903	EVENT_EXTRA_END
 904};
 905
 906#define SLM_DMND_READ		SNB_DMND_DATA_RD
 907#define SLM_DMND_WRITE		SNB_DMND_RFO
 908#define SLM_DMND_PREFETCH	(SNB_PF_DATA_RD|SNB_PF_RFO)
 909
 910#define SLM_SNP_ANY		(SNB_SNP_NONE|SNB_SNP_MISS|SNB_NO_FWD|SNB_HITM)
 911#define SLM_LLC_ACCESS		SNB_RESP_ANY
 912#define SLM_LLC_MISS		(SLM_SNP_ANY|SNB_NON_DRAM)
 913
 914static __initconst const u64 slm_hw_cache_extra_regs
 915				[PERF_COUNT_HW_CACHE_MAX]
 916				[PERF_COUNT_HW_CACHE_OP_MAX]
 917				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
 918{
 919 [ C(LL  ) ] = {
 920	[ C(OP_READ) ] = {
 921		[ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS,
 922		[ C(RESULT_MISS)   ] = SLM_DMND_READ|SLM_LLC_MISS,
 923	},
 924	[ C(OP_WRITE) ] = {
 925		[ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS,
 926		[ C(RESULT_MISS)   ] = SLM_DMND_WRITE|SLM_LLC_MISS,
 927	},
 928	[ C(OP_PREFETCH) ] = {
 929		[ C(RESULT_ACCESS) ] = SLM_DMND_PREFETCH|SLM_LLC_ACCESS,
 930		[ C(RESULT_MISS)   ] = SLM_DMND_PREFETCH|SLM_LLC_MISS,
 931	},
 932 },
 933};
 934
 935static __initconst const u64 slm_hw_cache_event_ids
 936				[PERF_COUNT_HW_CACHE_MAX]
 937				[PERF_COUNT_HW_CACHE_OP_MAX]
 938				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
 939{
 940 [ C(L1D) ] = {
 941	[ C(OP_READ) ] = {
 942		[ C(RESULT_ACCESS) ] = 0,
 943		[ C(RESULT_MISS)   ] = 0x0104, /* LD_DCU_MISS */
 944	},
 945	[ C(OP_WRITE) ] = {
 946		[ C(RESULT_ACCESS) ] = 0,
 947		[ C(RESULT_MISS)   ] = 0,
 948	},
 949	[ C(OP_PREFETCH) ] = {
 950		[ C(RESULT_ACCESS) ] = 0,
 951		[ C(RESULT_MISS)   ] = 0,
 952	},
 953 },
 954 [ C(L1I ) ] = {
 955	[ C(OP_READ) ] = {
 956		[ C(RESULT_ACCESS) ] = 0x0380, /* ICACHE.ACCESSES */
 957		[ C(RESULT_MISS)   ] = 0x0280, /* ICACGE.MISSES */
 958	},
 959	[ C(OP_WRITE) ] = {
 960		[ C(RESULT_ACCESS) ] = -1,
 961		[ C(RESULT_MISS)   ] = -1,
 962	},
 963	[ C(OP_PREFETCH) ] = {
 964		[ C(RESULT_ACCESS) ] = 0,
 965		[ C(RESULT_MISS)   ] = 0,
 966	},
 967 },
 968 [ C(LL  ) ] = {
 969	[ C(OP_READ) ] = {
 970		/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
 971		[ C(RESULT_ACCESS) ] = 0x01b7,
 972		/* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
 973		[ C(RESULT_MISS)   ] = 0x01b7,
 974	},
 975	[ C(OP_WRITE) ] = {
 976		/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
 977		[ C(RESULT_ACCESS) ] = 0x01b7,
 978		/* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
 979		[ C(RESULT_MISS)   ] = 0x01b7,
 980	},
 981	[ C(OP_PREFETCH) ] = {
 982		/* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
 983		[ C(RESULT_ACCESS) ] = 0x01b7,
 984		/* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
 985		[ C(RESULT_MISS)   ] = 0x01b7,
 986	},
 987 },
 988 [ C(DTLB) ] = {
 989	[ C(OP_READ) ] = {
 990		[ C(RESULT_ACCESS) ] = 0,
 991		[ C(RESULT_MISS)   ] = 0x0804, /* LD_DTLB_MISS */
 992	},
 993	[ C(OP_WRITE) ] = {
 994		[ C(RESULT_ACCESS) ] = 0,
 995		[ C(RESULT_MISS)   ] = 0,
 996	},
 997	[ C(OP_PREFETCH) ] = {
 998		[ C(RESULT_ACCESS) ] = 0,
 999		[ C(RESULT_MISS)   ] = 0,
1000	},
1001 },
1002 [ C(ITLB) ] = {
1003	[ C(OP_READ) ] = {
1004		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
1005		[ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES */
1006	},
1007	[ C(OP_WRITE) ] = {
1008		[ C(RESULT_ACCESS) ] = -1,
1009		[ C(RESULT_MISS)   ] = -1,
1010	},
1011	[ C(OP_PREFETCH) ] = {
1012		[ C(RESULT_ACCESS) ] = -1,
1013		[ C(RESULT_MISS)   ] = -1,
1014	},
1015 },
1016 [ C(BPU ) ] = {
1017	[ C(OP_READ) ] = {
1018		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
1019		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
1020	},
1021	[ C(OP_WRITE) ] = {
1022		[ C(RESULT_ACCESS) ] = -1,
1023		[ C(RESULT_MISS)   ] = -1,
1024	},
1025	[ C(OP_PREFETCH) ] = {
1026		[ C(RESULT_ACCESS) ] = -1,
1027		[ C(RESULT_MISS)   ] = -1,
1028	},
1029 },
1030};
1031
1032static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
1033{
1034	/* user explicitly requested branch sampling */
1035	if (has_branch_stack(event))
1036		return true;
1037
1038	/* implicit branch sampling to correct PEBS skid */
1039	if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1 &&
1040	    x86_pmu.intel_cap.pebs_format < 2)
1041		return true;
1042
1043	return false;
1044}
1045
1046static void intel_pmu_disable_all(void)
1047{
1048	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1049
1050	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
1051
1052	if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
1053		intel_pmu_disable_bts();
1054
1055	intel_pmu_pebs_disable_all();
1056	intel_pmu_lbr_disable_all();
1057}
1058
1059static void intel_pmu_enable_all(int added)
1060{
1061	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1062
1063	intel_pmu_pebs_enable_all();
1064	intel_pmu_lbr_enable_all();
1065	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
1066			x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
1067
1068	if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
1069		struct perf_event *event =
1070			cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
1071
1072		if (WARN_ON_ONCE(!event))
1073			return;
1074
1075		intel_pmu_enable_bts(event->hw.config);
1076	}
1077}
1078
1079/*
1080 * Workaround for:
1081 *   Intel Errata AAK100 (model 26)
1082 *   Intel Errata AAP53  (model 30)
1083 *   Intel Errata BD53   (model 44)
1084 *
1085 * The official story:
1086 *   These chips need to be 'reset' when adding counters by programming the
1087 *   magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either
1088 *   in sequence on the same PMC or on different PMCs.
1089 *
1090 * In practise it appears some of these events do in fact count, and
1091 * we need to programm all 4 events.
1092 */
1093static void intel_pmu_nhm_workaround(void)
1094{
1095	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1096	static const unsigned long nhm_magic[4] = {
1097		0x4300B5,
1098		0x4300D2,
1099		0x4300B1,
1100		0x4300B1
1101	};
1102	struct perf_event *event;
1103	int i;
1104
1105	/*
1106	 * The Errata requires below steps:
1107	 * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL;
1108	 * 2) Configure 4 PERFEVTSELx with the magic events and clear
1109	 *    the corresponding PMCx;
1110	 * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL;
1111	 * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL;
1112	 * 5) Clear 4 pairs of ERFEVTSELx and PMCx;
1113	 */
1114
1115	/*
1116	 * The real steps we choose are a little different from above.
1117	 * A) To reduce MSR operations, we don't run step 1) as they
1118	 *    are already cleared before this function is called;
1119	 * B) Call x86_perf_event_update to save PMCx before configuring
1120	 *    PERFEVTSELx with magic number;
1121	 * C) With step 5), we do clear only when the PERFEVTSELx is
1122	 *    not used currently.
1123	 * D) Call x86_perf_event_set_period to restore PMCx;
1124	 */
1125
1126	/* We always operate 4 pairs of PERF Counters */
1127	for (i = 0; i < 4; i++) {
1128		event = cpuc->events[i];
1129		if (event)
1130			x86_perf_event_update(event);
1131	}
1132
1133	for (i = 0; i < 4; i++) {
1134		wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
1135		wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
1136	}
1137
1138	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
1139	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
1140
1141	for (i = 0; i < 4; i++) {
1142		event = cpuc->events[i];
1143
1144		if (event) {
1145			x86_perf_event_set_period(event);
1146			__x86_pmu_enable_event(&event->hw,
1147					ARCH_PERFMON_EVENTSEL_ENABLE);
1148		} else
1149			wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
1150	}
1151}
1152
1153static void intel_pmu_nhm_enable_all(int added)
1154{
1155	if (added)
1156		intel_pmu_nhm_workaround();
1157	intel_pmu_enable_all(added);
1158}
1159
1160static inline u64 intel_pmu_get_status(void)
1161{
1162	u64 status;
1163
1164	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
1165
1166	return status;
1167}
1168
1169static inline void intel_pmu_ack_status(u64 ack)
1170{
1171	wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
1172}
1173
1174static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
1175{
1176	int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
1177	u64 ctrl_val, mask;
1178
1179	mask = 0xfULL << (idx * 4);
1180
1181	rdmsrl(hwc->config_base, ctrl_val);
1182	ctrl_val &= ~mask;
1183	wrmsrl(hwc->config_base, ctrl_val);
1184}
1185
1186static inline bool event_is_checkpointed(struct perf_event *event)
1187{
1188	return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0;
1189}
1190
1191static void intel_pmu_disable_event(struct perf_event *event)
1192{
1193	struct hw_perf_event *hwc = &event->hw;
1194	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1195
1196	if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
1197		intel_pmu_disable_bts();
1198		intel_pmu_drain_bts_buffer();
1199		return;
1200	}
1201
1202	cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
1203	cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
1204	cpuc->intel_cp_status &= ~(1ull << hwc->idx);
1205
1206	/*
1207	 * must disable before any actual event
1208	 * because any event may be combined with LBR
1209	 */
1210	if (intel_pmu_needs_lbr_smpl(event))
1211		intel_pmu_lbr_disable(event);
1212
1213	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
1214		intel_pmu_disable_fixed(hwc);
1215		return;
1216	}
1217
1218	x86_pmu_disable_event(event);
1219
1220	if (unlikely(event->attr.precise_ip))
1221		intel_pmu_pebs_disable(event);
1222}
1223
1224static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
1225{
1226	int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
1227	u64 ctrl_val, bits, mask;
1228
1229	/*
1230	 * Enable IRQ generation (0x8),
1231	 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
1232	 * if requested:
1233	 */
1234	bits = 0x8ULL;
1235	if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
1236		bits |= 0x2;
1237	if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
1238		bits |= 0x1;
1239
1240	/*
1241	 * ANY bit is supported in v3 and up
1242	 */
1243	if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
1244		bits |= 0x4;
1245
1246	bits <<= (idx * 4);
1247	mask = 0xfULL << (idx * 4);
1248
1249	rdmsrl(hwc->config_base, ctrl_val);
1250	ctrl_val &= ~mask;
1251	ctrl_val |= bits;
1252	wrmsrl(hwc->config_base, ctrl_val);
1253}
1254
1255static void intel_pmu_enable_event(struct perf_event *event)
1256{
1257	struct hw_perf_event *hwc = &event->hw;
1258	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1259
1260	if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
1261		if (!__this_cpu_read(cpu_hw_events.enabled))
1262			return;
1263
1264		intel_pmu_enable_bts(hwc->config);
1265		return;
1266	}
1267	/*
1268	 * must enabled before any actual event
1269	 * because any event may be combined with LBR
1270	 */
1271	if (intel_pmu_needs_lbr_smpl(event))
1272		intel_pmu_lbr_enable(event);
1273
1274	if (event->attr.exclude_host)
1275		cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
1276	if (event->attr.exclude_guest)
1277		cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx);
1278
1279	if (unlikely(event_is_checkpointed(event)))
1280		cpuc->intel_cp_status |= (1ull << hwc->idx);
1281
1282	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
1283		intel_pmu_enable_fixed(hwc);
1284		return;
1285	}
1286
1287	if (unlikely(event->attr.precise_ip))
1288		intel_pmu_pebs_enable(event);
1289
1290	__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
1291}
1292
1293/*
1294 * Save and restart an expired event. Called by NMI contexts,
1295 * so it has to be careful about preempting normal event ops:
1296 */
1297int intel_pmu_save_and_restart(struct perf_event *event)
1298{
1299	x86_perf_event_update(event);
1300	/*
1301	 * For a checkpointed counter always reset back to 0.  This
1302	 * avoids a situation where the counter overflows, aborts the
1303	 * transaction and is then set back to shortly before the
1304	 * overflow, and overflows and aborts again.
1305	 */
1306	if (unlikely(event_is_checkpointed(event))) {
1307		/* No race with NMIs because the counter should not be armed */
1308		wrmsrl(event->hw.event_base, 0);
1309		local64_set(&event->hw.prev_count, 0);
1310	}
1311	return x86_perf_event_set_period(event);
1312}
1313
1314static void intel_pmu_reset(void)
1315{
1316	struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
1317	unsigned long flags;
1318	int idx;
1319
1320	if (!x86_pmu.num_counters)
1321		return;
1322
1323	local_irq_save(flags);
1324
1325	pr_info("clearing PMU state on CPU#%d\n", smp_processor_id());
1326
1327	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1328		wrmsrl_safe(x86_pmu_config_addr(idx), 0ull);
1329		wrmsrl_safe(x86_pmu_event_addr(idx),  0ull);
1330	}
1331	for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
1332		wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
1333
1334	if (ds)
1335		ds->bts_index = ds->bts_buffer_base;
1336
1337	local_irq_restore(flags);
1338}
1339
1340/*
1341 * This handler is triggered by the local APIC, so the APIC IRQ handling
1342 * rules apply:
1343 */
1344static int intel_pmu_handle_irq(struct pt_regs *regs)
1345{
1346	struct perf_sample_data data;
1347	struct cpu_hw_events *cpuc;
1348	int bit, loops;
1349	u64 status;
1350	int handled;
1351
1352	cpuc = &__get_cpu_var(cpu_hw_events);
1353
1354	/*
1355	 * No known reason to not always do late ACK,
1356	 * but just in case do it opt-in.
1357	 */
1358	if (!x86_pmu.late_ack)
1359		apic_write(APIC_LVTPC, APIC_DM_NMI);
1360	intel_pmu_disable_all();
1361	handled = intel_pmu_drain_bts_buffer();
1362	status = intel_pmu_get_status();
1363	if (!status)
1364		goto done;
1365
1366	loops = 0;
1367again:
1368	intel_pmu_ack_status(status);
1369	if (++loops > 100) {
1370		static bool warned = false;
1371		if (!warned) {
1372			WARN(1, "perfevents: irq loop stuck!\n");
1373			perf_event_print_debug();
1374			warned = true;
1375		}
1376		intel_pmu_reset();
1377		goto done;
1378	}
1379
1380	inc_irq_stat(apic_perf_irqs);
1381
1382	intel_pmu_lbr_read();
1383
1384	/*
1385	 * PEBS overflow sets bit 62 in the global status register
1386	 */
1387	if (__test_and_clear_bit(62, (unsigned long *)&status)) {
1388		handled++;
1389		x86_pmu.drain_pebs(regs);
1390	}
1391
1392	/*
1393	 * Checkpointed counters can lead to 'spurious' PMIs because the
1394	 * rollback caused by the PMI will have cleared the overflow status
1395	 * bit. Therefore always force probe these counters.
1396	 */
1397	status |= cpuc->intel_cp_status;
1398
1399	for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
1400		struct perf_event *event = cpuc->events[bit];
1401
1402		handled++;
1403
1404		if (!test_bit(bit, cpuc->active_mask))
1405			continue;
1406
1407		if (!intel_pmu_save_and_restart(event))
1408			continue;
1409
1410		perf_sample_data_init(&data, 0, event->hw.last_period);
1411
1412		if (has_branch_stack(event))
1413			data.br_stack = &cpuc->lbr_stack;
1414
1415		if (perf_event_overflow(event, &data, regs))
1416			x86_pmu_stop(event, 0);
1417	}
1418
1419	/*
1420	 * Repeat if there is more work to be done:
1421	 */
1422	status = intel_pmu_get_status();
1423	if (status)
1424		goto again;
1425
1426done:
1427	intel_pmu_enable_all(0);
1428	/*
1429	 * Only unmask the NMI after the overflow counters
1430	 * have been reset. This avoids spurious NMIs on
1431	 * Haswell CPUs.
1432	 */
1433	if (x86_pmu.late_ack)
1434		apic_write(APIC_LVTPC, APIC_DM_NMI);
1435	return handled;
1436}
1437
1438static struct event_constraint *
1439intel_bts_constraints(struct perf_event *event)
1440{
1441	struct hw_perf_event *hwc = &event->hw;
1442	unsigned int hw_event, bts_event;
1443
1444	if (event->attr.freq)
1445		return NULL;
1446
1447	hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
1448	bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
1449
1450	if (unlikely(hw_event == bts_event && hwc->sample_period == 1))
1451		return &bts_constraint;
1452
1453	return NULL;
1454}
1455
1456static int intel_alt_er(int idx)
1457{
1458	if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
1459		return idx;
1460
1461	if (idx == EXTRA_REG_RSP_0)
1462		return EXTRA_REG_RSP_1;
1463
1464	if (idx == EXTRA_REG_RSP_1)
1465		return EXTRA_REG_RSP_0;
1466
1467	return idx;
1468}
1469
1470static void intel_fixup_er(struct perf_event *event, int idx)
1471{
1472	event->hw.extra_reg.idx = idx;
1473
1474	if (idx == EXTRA_REG_RSP_0) {
1475		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
1476		event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_0].event;
1477		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
1478	} else if (idx == EXTRA_REG_RSP_1) {
1479		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
1480		event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_1].event;
1481		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
1482	}
1483}
1484
1485/*
1486 * manage allocation of shared extra msr for certain events
1487 *
1488 * sharing can be:
1489 * per-cpu: to be shared between the various events on a single PMU
1490 * per-core: per-cpu + shared by HT threads
1491 */
1492static struct event_constraint *
1493__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
1494				   struct perf_event *event,
1495				   struct hw_perf_event_extra *reg)
1496{
1497	struct event_constraint *c = &emptyconstraint;
1498	struct er_account *era;
1499	unsigned long flags;
1500	int idx = reg->idx;
1501
1502	/*
1503	 * reg->alloc can be set due to existing state, so for fake cpuc we
1504	 * need to ignore this, otherwise we might fail to allocate proper fake
1505	 * state for this extra reg constraint. Also see the comment below.
1506	 */
1507	if (reg->alloc && !cpuc->is_fake)
1508		return NULL; /* call x86_get_event_constraint() */
1509
1510again:
1511	era = &cpuc->shared_regs->regs[idx];
1512	/*
1513	 * we use spin_lock_irqsave() to avoid lockdep issues when
1514	 * passing a fake cpuc
1515	 */
1516	raw_spin_lock_irqsave(&era->lock, flags);
1517
1518	if (!atomic_read(&era->ref) || era->config == reg->config) {
1519
1520		/*
1521		 * If its a fake cpuc -- as per validate_{group,event}() we
1522		 * shouldn't touch event state and we can avoid doing so
1523		 * since both will only call get_event_constraints() once
1524		 * on each event, this avoids the need for reg->alloc.
1525		 *
1526		 * Not doing the ER fixup will only result in era->reg being
1527		 * wrong, but since we won't actually try and program hardware
1528		 * this isn't a problem either.
1529		 */
1530		if (!cpuc->is_fake) {
1531			if (idx != reg->idx)
1532				intel_fixup_er(event, idx);
1533
1534			/*
1535			 * x86_schedule_events() can call get_event_constraints()
1536			 * multiple times on events in the case of incremental
1537			 * scheduling(). reg->alloc ensures we only do the ER
1538			 * allocation once.
1539			 */
1540			reg->alloc = 1;
1541		}
1542
1543		/* lock in msr value */
1544		era->config = reg->config;
1545		era->reg = reg->reg;
1546
1547		/* one more user */
1548		atomic_inc(&era->ref);
1549
1550		/*
1551		 * need to call x86_get_event_constraint()
1552		 * to check if associated event has constraints
1553		 */
1554		c = NULL;
1555	} else {
1556		idx = intel_alt_er(idx);
1557		if (idx != reg->idx) {
1558			raw_spin_unlock_irqrestore(&era->lock, flags);
1559			goto again;
1560		}
1561	}
1562	raw_spin_unlock_irqrestore(&era->lock, flags);
1563
1564	return c;
1565}
1566
1567static void
1568__intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
1569				   struct hw_perf_event_extra *reg)
1570{
1571	struct er_account *era;
1572
1573	/*
1574	 * Only put constraint if extra reg was actually allocated. Also takes
1575	 * care of event which do not use an extra shared reg.
1576	 *
1577	 * Also, if this is a fake cpuc we shouldn't touch any event state
1578	 * (reg->alloc) and we don't care about leaving inconsistent cpuc state
1579	 * either since it'll be thrown out.
1580	 */
1581	if (!reg->alloc || cpuc->is_fake)
1582		return;
1583
1584	era = &cpuc->shared_regs->regs[reg->idx];
1585
1586	/* one fewer user */
1587	atomic_dec(&era->ref);
1588
1589	/* allocate again next time */
1590	reg->alloc = 0;
1591}
1592
1593static struct event_constraint *
1594intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
1595			      struct perf_event *event)
1596{
1597	struct event_constraint *c = NULL, *d;
1598	struct hw_perf_event_extra *xreg, *breg;
1599
1600	xreg = &event->hw.extra_reg;
1601	if (xreg->idx != EXTRA_REG_NONE) {
1602		c = __intel_shared_reg_get_constraints(cpuc, event, xreg);
1603		if (c == &emptyconstraint)
1604			return c;
1605	}
1606	breg = &event->hw.branch_reg;
1607	if (breg->idx != EXTRA_REG_NONE) {
1608		d = __intel_shared_reg_get_constraints(cpuc, event, breg);
1609		if (d == &emptyconstraint) {
1610			__intel_shared_reg_put_constraints(cpuc, xreg);
1611			c = d;
1612		}
1613	}
1614	return c;
1615}
1616
1617struct event_constraint *
1618x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1619{
1620	struct event_constraint *c;
1621
1622	if (x86_pmu.event_constraints) {
1623		for_each_event_constraint(c, x86_pmu.event_constraints) {
1624			if ((event->hw.config & c->cmask) == c->code) {
1625				event->hw.flags |= c->flags;
1626				return c;
1627			}
1628		}
1629	}
1630
1631	return &unconstrained;
1632}
1633
1634static struct event_constraint *
1635intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1636{
1637	struct event_constraint *c;
1638
1639	c = intel_bts_constraints(event);
1640	if (c)
1641		return c;
1642
1643	c = intel_pebs_constraints(event);
1644	if (c)
1645		return c;
1646
1647	c = intel_shared_regs_constraints(cpuc, event);
1648	if (c)
1649		return c;
1650
1651	return x86_get_event_constraints(cpuc, event);
1652}
1653
1654static void
1655intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
1656					struct perf_event *event)
1657{
1658	struct hw_perf_event_extra *reg;
1659
1660	reg = &event->hw.extra_reg;
1661	if (reg->idx != EXTRA_REG_NONE)
1662		__intel_shared_reg_put_constraints(cpuc, reg);
1663
1664	reg = &event->hw.branch_reg;
1665	if (reg->idx != EXTRA_REG_NONE)
1666		__intel_shared_reg_put_constraints(cpuc, reg);
1667}
1668
1669static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
1670					struct perf_event *event)
1671{
1672	intel_put_shared_regs_event_constraints(cpuc, event);
1673}
1674
1675static void intel_pebs_aliases_core2(struct perf_event *event)
1676{
1677	if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
1678		/*
1679		 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
1680		 * (0x003c) so that we can use it with PEBS.
1681		 *
1682		 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
1683		 * PEBS capable. However we can use INST_RETIRED.ANY_P
1684		 * (0x00c0), which is a PEBS capable event, to get the same
1685		 * count.
1686		 *
1687		 * INST_RETIRED.ANY_P counts the number of cycles that retires
1688		 * CNTMASK instructions. By setting CNTMASK to a value (16)
1689		 * larger than the maximum number of instructions that can be
1690		 * retired per cycle (4) and then inverting the condition, we
1691		 * count all cycles that retire 16 or less instructions, which
1692		 * is every cycle.
1693		 *
1694		 * Thereby we gain a PEBS capable cycle counter.
1695		 */
1696		u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16);
1697
1698		alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
1699		event->hw.config = alt_config;
1700	}
1701}
1702
1703static void intel_pebs_aliases_snb(struct perf_event *event)
1704{
1705	if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
1706		/*
1707		 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
1708		 * (0x003c) so that we can use it with PEBS.
1709		 *
1710		 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
1711		 * PEBS capable. However we can use UOPS_RETIRED.ALL
1712		 * (0x01c2), which is a PEBS capable event, to get the same
1713		 * count.
1714		 *
1715		 * UOPS_RETIRED.ALL counts the number of cycles that retires
1716		 * CNTMASK micro-ops. By setting CNTMASK to a value (16)
1717		 * larger than the maximum number of micro-ops that can be
1718		 * retired per cycle (4) and then inverting the condition, we
1719		 * count all cycles that retire 16 or less micro-ops, which
1720		 * is every cycle.
1721		 *
1722		 * Thereby we gain a PEBS capable cycle counter.
1723		 */
1724		u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16);
1725
1726		alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
1727		event->hw.config = alt_config;
1728	}
1729}
1730
1731static int intel_pmu_hw_config(struct perf_event *event)
1732{
1733	int ret = x86_pmu_hw_config(event);
1734
1735	if (ret)
1736		return ret;
1737
1738	if (event->attr.precise_ip && x86_pmu.pebs_aliases)
1739		x86_pmu.pebs_aliases(event);
1740
1741	if (intel_pmu_needs_lbr_smpl(event)) {
1742		ret = intel_pmu_setup_lbr_filter(event);
1743		if (ret)
1744			return ret;
1745	}
1746
1747	if (event->attr.type != PERF_TYPE_RAW)
1748		return 0;
1749
1750	if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
1751		return 0;
1752
1753	if (x86_pmu.version < 3)
1754		return -EINVAL;
1755
1756	if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
1757		return -EACCES;
1758
1759	event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
1760
1761	return 0;
1762}
1763
1764struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
1765{
1766	if (x86_pmu.guest_get_msrs)
1767		return x86_pmu.guest_get_msrs(nr);
1768	*nr = 0;
1769	return NULL;
1770}
1771EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
1772
1773static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
1774{
1775	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1776	struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
1777
1778	arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
1779	arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
1780	arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
1781	/*
1782	 * If PMU counter has PEBS enabled it is not enough to disable counter
1783	 * on a guest entry since PEBS memory write can overshoot guest entry
1784	 * and corrupt guest memory. Disabling PEBS solves the problem.
1785	 */
1786	arr[1].msr = MSR_IA32_PEBS_ENABLE;
1787	arr[1].host = cpuc->pebs_enabled;
1788	arr[1].guest = 0;
1789
1790	*nr = 2;
1791	return arr;
1792}
1793
1794static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr)
1795{
1796	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1797	struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
1798	int idx;
1799
1800	for (idx = 0; idx < x86_pmu.num_counters; idx++)  {
1801		struct perf_event *event = cpuc->events[idx];
1802
1803		arr[idx].msr = x86_pmu_config_addr(idx);
1804		arr[idx].host = arr[idx].guest = 0;
1805
1806		if (!test_bit(idx, cpuc->active_mask))
1807			continue;
1808
1809		arr[idx].host = arr[idx].guest =
1810			event->hw.config | ARCH_PERFMON_EVENTSEL_ENABLE;
1811
1812		if (event->attr.exclude_host)
1813			arr[idx].host &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
1814		else if (event->attr.exclude_guest)
1815			arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
1816	}
1817
1818	*nr = x86_pmu.num_counters;
1819	return arr;
1820}
1821
1822static void core_pmu_enable_event(struct perf_event *event)
1823{
1824	if (!event->attr.exclude_host)
1825		x86_pmu_enable_event(event);
1826}
1827
1828static void core_pmu_enable_all(int added)
1829{
1830	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1831	int idx;
1832
1833	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1834		struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
1835
1836		if (!test_bit(idx, cpuc->active_mask) ||
1837				cpuc->events[idx]->attr.exclude_host)
1838			continue;
1839
1840		__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
1841	}
1842}
1843
1844static int hsw_hw_config(struct perf_event *event)
1845{
1846	int ret = intel_pmu_hw_config(event);
1847
1848	if (ret)
1849		return ret;
1850	if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE))
1851		return 0;
1852	event->hw.config |= event->attr.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED);
1853
1854	/*
1855	 * IN_TX/IN_TX-CP filters are not supported by the Haswell PMU with
1856	 * PEBS or in ANY thread mode. Since the results are non-sensical forbid
1857	 * this combination.
1858	 */
1859	if ((event->hw.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)) &&
1860	     ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) ||
1861	      event->attr.precise_ip > 0))
1862		return -EOPNOTSUPP;
1863
1864	if (event_is_checkpointed(event)) {
1865		/*
1866		 * Sampling of checkpointed events can cause situations where
1867		 * the CPU constantly aborts because of a overflow, which is
1868		 * then checkpointed back and ignored. Forbid checkpointing
1869		 * for sampling.
1870		 *
1871		 * But still allow a long sampling period, so that perf stat
1872		 * from KVM works.
1873		 */
1874		if (event->attr.sample_period > 0 &&
1875		    event->attr.sample_period < 0x7fffffff)
1876			return -EOPNOTSUPP;
1877	}
1878	return 0;
1879}
1880
1881static struct event_constraint counter2_constraint =
1882			EVENT_CONSTRAINT(0, 0x4, 0);
1883
1884static struct event_constraint *
1885hsw_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1886{
1887	struct event_constraint *c = intel_get_event_constraints(cpuc, event);
1888
1889	/* Handle special quirk on in_tx_checkpointed only in counter 2 */
1890	if (event->hw.config & HSW_IN_TX_CHECKPOINTED) {
1891		if (c->idxmsk64 & (1U << 2))
1892			return &counter2_constraint;
1893		return &emptyconstraint;
1894	}
1895
1896	return c;
1897}
1898
1899PMU_FORMAT_ATTR(event,	"config:0-7"	);
1900PMU_FORMAT_ATTR(umask,	"config:8-15"	);
1901PMU_FORMAT_ATTR(edge,	"config:18"	);
1902PMU_FORMAT_ATTR(pc,	"config:19"	);
1903PMU_FORMAT_ATTR(any,	"config:21"	); /* v3 + */
1904PMU_FORMAT_ATTR(inv,	"config:23"	);
1905PMU_FORMAT_ATTR(cmask,	"config:24-31"	);
1906PMU_FORMAT_ATTR(in_tx,  "config:32");
1907PMU_FORMAT_ATTR(in_tx_cp, "config:33");
1908
1909static struct attribute *intel_arch_formats_attr[] = {
1910	&format_attr_event.attr,
1911	&format_attr_umask.attr,
1912	&format_attr_edge.attr,
1913	&format_attr_pc.attr,
1914	&format_attr_inv.attr,
1915	&format_attr_cmask.attr,
1916	NULL,
1917};
1918
1919ssize_t intel_event_sysfs_show(char *page, u64 config)
1920{
1921	u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT);
1922
1923	return x86_event_sysfs_show(page, config, event);
1924}
1925
1926static __initconst const struct x86_pmu core_pmu = {
1927	.name			= "core",
1928	.handle_irq		= x86_pmu_handle_irq,
1929	.disable_all		= x86_pmu_disable_all,
1930	.enable_all		= core_pmu_enable_all,
1931	.enable			= core_pmu_enable_event,
1932	.disable		= x86_pmu_disable_event,
1933	.hw_config		= x86_pmu_hw_config,
1934	.schedule_events	= x86_schedule_events,
1935	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
1936	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
1937	.event_map		= intel_pmu_event_map,
1938	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
1939	.apic			= 1,
1940	/*
1941	 * Intel PMCs cannot be accessed sanely above 32 bit width,
1942	 * so we install an artificial 1<<31 period regardless of
1943	 * the generic event period:
1944	 */
1945	.max_period		= (1ULL << 31) - 1,
1946	.get_event_constraints	= intel_get_event_constraints,
1947	.put_event_constraints	= intel_put_event_constraints,
1948	.event_constraints	= intel_core_event_constraints,
1949	.guest_get_msrs		= core_guest_get_msrs,
1950	.format_attrs		= intel_arch_formats_attr,
1951	.events_sysfs_show	= intel_event_sysfs_show,
1952};
1953
1954struct intel_shared_regs *allocate_shared_regs(int cpu)
1955{
1956	struct intel_shared_regs *regs;
1957	int i;
1958
1959	regs = kzalloc_node(sizeof(struct intel_shared_regs),
1960			    GFP_KERNEL, cpu_to_node(cpu));
1961	if (regs) {
1962		/*
1963		 * initialize the locks to keep lockdep happy
1964		 */
1965		for (i = 0; i < EXTRA_REG_MAX; i++)
1966			raw_spin_lock_init(&regs->regs[i].lock);
1967
1968		regs->core_id = -1;
1969	}
1970	return regs;
1971}
1972
1973static int intel_pmu_cpu_prepare(int cpu)
1974{
1975	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1976
1977	if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map))
1978		return NOTIFY_OK;
1979
1980	cpuc->shared_regs = allocate_shared_regs(cpu);
1981	if (!cpuc->shared_regs)
1982		return NOTIFY_BAD;
1983
1984	return NOTIFY_OK;
1985}
1986
1987static void intel_pmu_cpu_starting(int cpu)
1988{
1989	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1990	int core_id = topology_core_id(cpu);
1991	int i;
1992
1993	init_debug_store_on_cpu(cpu);
1994	/*
1995	 * Deal with CPUs that don't clear their LBRs on power-up.
1996	 */
1997	intel_pmu_lbr_reset();
1998
1999	cpuc->lbr_sel = NULL;
2000
2001	if (!cpuc->shared_regs)
2002		return;
2003
2004	if (!(x86_pmu.er_flags & ERF_NO_HT_SHARING)) {
2005		for_each_cpu(i, topology_thread_cpumask(cpu)) {
2006			struct intel_shared_regs *pc;
2007
2008			pc = per_cpu(cpu_hw_events, i).shared_regs;
2009			if (pc && pc->core_id == core_id) {
2010				cpuc->kfree_on_online = cpuc->shared_regs;
2011				cpuc->shared_regs = pc;
2012				break;
2013			}
2014		}
2015		cpuc->shared_regs->core_id = core_id;
2016		cpuc->shared_regs->refcnt++;
2017	}
2018
2019	if (x86_pmu.lbr_sel_map)
2020		cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
2021}
2022
2023static void intel_pmu_cpu_dying(int cpu)
2024{
2025	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
2026	struct intel_shared_regs *pc;
2027
2028	pc = cpuc->shared_regs;
2029	if (pc) {
2030		if (pc->core_id == -1 || --pc->refcnt == 0)
2031			kfree(pc);
2032		cpuc->shared_regs = NULL;
2033	}
2034
2035	fini_debug_store_on_cpu(cpu);
2036}
2037
2038static void intel_pmu_flush_branch_stack(void)
2039{
2040	/*
2041	 * Intel LBR does not tag entries with the
2042	 * PID of the current task, then we need to
2043	 * flush it on ctxsw
2044	 * For now, we simply reset it
2045	 */
2046	if (x86_pmu.lbr_nr)
2047		intel_pmu_lbr_reset();
2048}
2049
2050PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
2051
2052PMU_FORMAT_ATTR(ldlat, "config1:0-15");
2053
2054static struct attribute *intel_arch3_formats_attr[] = {
2055	&format_attr_event.attr,
2056	&format_attr_umask.attr,
2057	&format_attr_edge.attr,
2058	&format_attr_pc.attr,
2059	&format_attr_any.attr,
2060	&format_attr_inv.attr,
2061	&format_attr_cmask.attr,
2062	&format_attr_in_tx.attr,
2063	&format_attr_in_tx_cp.attr,
2064
2065	&format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
2066	&format_attr_ldlat.attr, /* PEBS load latency */
2067	NULL,
2068};
2069
2070static __initconst const struct x86_pmu intel_pmu = {
2071	.name			= "Intel",
2072	.handle_irq		= intel_pmu_handle_irq,
2073	.disable_all		= intel_pmu_disable_all,
2074	.enable_all		= intel_pmu_enable_all,
2075	.enable			= intel_pmu_enable_event,
2076	.disable		= intel_pmu_disable_event,
2077	.hw_config		= intel_pmu_hw_config,
2078	.schedule_events	= x86_schedule_events,
2079	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
2080	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
2081	.event_map		= intel_pmu_event_map,
2082	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
2083	.apic			= 1,
2084	/*
2085	 * Intel PMCs cannot be accessed sanely above 32 bit width,
2086	 * so we install an artificial 1<<31 period regardless of
2087	 * the generic event period:
2088	 */
2089	.max_period		= (1ULL << 31) - 1,
2090	.get_event_constraints	= intel_get_event_constraints,
2091	.put_event_constraints	= intel_put_event_constraints,
2092	.pebs_aliases		= intel_pebs_aliases_core2,
2093
2094	.format_attrs		= intel_arch3_formats_attr,
2095	.events_sysfs_show	= intel_event_sysfs_show,
2096
2097	.cpu_prepare		= intel_pmu_cpu_prepare,
2098	.cpu_starting		= intel_pmu_cpu_starting,
2099	.cpu_dying		= intel_pmu_cpu_dying,
2100	.guest_get_msrs		= intel_guest_get_msrs,
2101	.flush_branch_stack	= intel_pmu_flush_branch_stack,
2102};
2103
2104static __init void intel_clovertown_quirk(void)
2105{
2106	/*
2107	 * PEBS is unreliable due to:
2108	 *
2109	 *   AJ67  - PEBS may experience CPL leaks
2110	 *   AJ68  - PEBS PMI may be delayed by one event
2111	 *   AJ69  - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12]
2112	 *   AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS
2113	 *
2114	 * AJ67 could be worked around by restricting the OS/USR flags.
2115	 * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI.
2116	 *
2117	 * AJ106 could possibly be worked around by not allowing LBR
2118	 *       usage from PEBS, including the fixup.
2119	 * AJ68  could possibly be worked around by always programming
2120	 *	 a pebs_event_reset[0] value and coping with the lost events.
2121	 *
2122	 * But taken together it might just make sense to not enable PEBS on
2123	 * these chips.
2124	 */
2125	pr_warn("PEBS disabled due to CPU errata\n");
2126	x86_pmu.pebs = 0;
2127	x86_pmu.pebs_constraints = NULL;
2128}
2129
2130static int intel_snb_pebs_broken(int cpu)
2131{
2132	u32 rev = UINT_MAX; /* default to broken for unknown models */
2133
2134	switch (cpu_data(cpu).x86_model) {
2135	case 42: /* SNB */
2136		rev = 0x28;
2137		break;
2138
2139	case 45: /* SNB-EP */
2140		switch (cpu_data(cpu).x86_mask) {
2141		case 6: rev = 0x618; break;
2142		case 7: rev = 0x70c; break;
2143		}
2144	}
2145
2146	return (cpu_data(cpu).microcode < rev);
2147}
2148
2149static void intel_snb_check_microcode(void)
2150{
2151	int pebs_broken = 0;
2152	int cpu;
2153
2154	get_online_cpus();
2155	for_each_online_cpu(cpu) {
2156		if ((pebs_broken = intel_snb_pebs_broken(cpu)))
2157			break;
2158	}
2159	put_online_cpus();
2160
2161	if (pebs_broken == x86_pmu.pebs_broken)
2162		return;
2163
2164	/*
2165	 * Serialized by the microcode lock..
2166	 */
2167	if (x86_pmu.pebs_broken) {
2168		pr_info("PEBS enabled due to microcode update\n");
2169		x86_pmu.pebs_broken = 0;
2170	} else {
2171		pr_info("PEBS disabled due to CPU errata, please upgrade microcode\n");
2172		x86_pmu.pebs_broken = 1;
2173	}
2174}
2175
2176static __init void intel_sandybridge_quirk(void)
2177{
2178	x86_pmu.check_microcode = intel_snb_check_microcode;
2179	intel_snb_check_microcode();
2180}
2181
2182static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
2183	{ PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
2184	{ PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
2185	{ PERF_COUNT_HW_BUS_CYCLES, "bus cycles" },
2186	{ PERF_COUNT_HW_CACHE_REFERENCES, "cache references" },
2187	{ PERF_COUNT_HW_CACHE_MISSES, "cache misses" },
2188	{ PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" },
2189	{ PERF_COUNT_HW_BRANCH_MISSES, "branch misses" },
2190};
2191
2192static __init void intel_arch_events_quirk(void)
2193{
2194	int bit;
2195
2196	/* disable event that reported as not presend by cpuid */
2197	for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) {
2198		intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0;
2199		pr_warn("CPUID marked event: \'%s\' unavailable\n",
2200			intel_arch_events_map[bit].name);
2201	}
2202}
2203
2204static __init void intel_nehalem_quirk(void)
2205{
2206	union cpuid10_ebx ebx;
2207
2208	ebx.full = x86_pmu.events_maskl;
2209	if (ebx.split.no_branch_misses_retired) {
2210		/*
2211		 * Erratum AAJ80 detected, we work it around by using
2212		 * the BR_MISP_EXEC.ANY event. This will over-count
2213		 * branch-misses, but it's still much better than the
2214		 * architectural event which is often completely bogus:
2215		 */
2216		intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
2217		ebx.split.no_branch_misses_retired = 0;
2218		x86_pmu.events_maskl = ebx.full;
2219		pr_info("CPU erratum AAJ80 worked around\n");
2220	}
2221}
2222
2223EVENT_ATTR_STR(mem-loads,	mem_ld_hsw,	"event=0xcd,umask=0x1,ldlat=3");
2224EVENT_ATTR_STR(mem-stores,	mem_st_hsw,	"event=0xd0,umask=0x82")
2225
2226/* Haswell special events */
2227EVENT_ATTR_STR(tx-start,	tx_start,	"event=0xc9,umask=0x1");
2228EVENT_ATTR_STR(tx-commit,	tx_commit,	"event=0xc9,umask=0x2");
2229EVENT_ATTR_STR(tx-abort,	tx_abort,	"event=0xc9,umask=0x4");
2230EVENT_ATTR_STR(tx-capacity,	tx_capacity,	"event=0x54,umask=0x2");
2231EVENT_ATTR_STR(tx-conflict,	tx_conflict,	"event=0x54,umask=0x1");
2232EVENT_ATTR_STR(el-start,	el_start,	"event=0xc8,umask=0x1");
2233EVENT_ATTR_STR(el-commit,	el_commit,	"event=0xc8,umask=0x2");
2234EVENT_ATTR_STR(el-abort,	el_abort,	"event=0xc8,umask=0x4");
2235EVENT_ATTR_STR(el-capacity,	el_capacity,	"event=0x54,umask=0x2");
2236EVENT_ATTR_STR(el-conflict,	el_conflict,	"event=0x54,umask=0x1");
2237EVENT_ATTR_STR(cycles-t,	cycles_t,	"event=0x3c,in_tx=1");
2238EVENT_ATTR_STR(cycles-ct,	cycles_ct,	"event=0x3c,in_tx=1,in_tx_cp=1");
2239
2240static struct attribute *hsw_events_attrs[] = {
2241	EVENT_PTR(tx_start),
2242	EVENT_PTR(tx_commit),
2243	EVENT_PTR(tx_abort),
2244	EVENT_PTR(tx_capacity),
2245	EVENT_PTR(tx_conflict),
2246	EVENT_PTR(el_start),
2247	EVENT_PTR(el_commit),
2248	EVENT_PTR(el_abort),
2249	EVENT_PTR(el_capacity),
2250	EVENT_PTR(el_conflict),
2251	EVENT_PTR(cycles_t),
2252	EVENT_PTR(cycles_ct),
2253	EVENT_PTR(mem_ld_hsw),
2254	EVENT_PTR(mem_st_hsw),
2255	NULL
2256};
2257
2258__init int intel_pmu_init(void)
2259{
2260	union cpuid10_edx edx;
2261	union cpuid10_eax eax;
2262	union cpuid10_ebx ebx;
2263	struct event_constraint *c;
2264	unsigned int unused;
2265	int version;
2266
2267	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
2268		switch (boot_cpu_data.x86) {
2269		case 0x6:
2270			return p6_pmu_init();
2271		case 0xb:
2272			return knc_pmu_init();
2273		case 0xf:
2274			return p4_pmu_init();
2275		}
2276		return -ENODEV;
2277	}
2278
2279	/*
2280	 * Check whether the Architectural PerfMon supports
2281	 * Branch Misses Retired hw_event or not.
2282	 */
2283	cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);
2284	if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT)
2285		return -ENODEV;
2286
2287	version = eax.split.version_id;
2288	if (version < 2)
2289		x86_pmu = core_pmu;
2290	else
2291		x86_pmu = intel_pmu;
2292
2293	x86_pmu.version			= version;
2294	x86_pmu.num_counters		= eax.split.num_counters;
2295	x86_pmu.cntval_bits		= eax.split.bit_width;
2296	x86_pmu.cntval_mask		= (1ULL << eax.split.bit_width) - 1;
2297
2298	x86_pmu.events_maskl		= ebx.full;
2299	x86_pmu.events_mask_len		= eax.split.mask_length;
2300
2301	x86_pmu.max_pebs_events		= min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters);
2302
2303	/*
2304	 * Quirk: v2 perfmon does not report fixed-purpose events, so
2305	 * assume at least 3 events:
2306	 */
2307	if (version > 1)
2308		x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
2309
2310	if (boot_cpu_has(X86_FEATURE_PDCM)) {
2311		u64 capabilities;
2312
2313		rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
2314		x86_pmu.intel_cap.capabilities = capabilities;
2315	}
2316
2317	intel_ds_init();
2318
2319	x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
2320
2321	/*
2322	 * Install the hw-cache-events table:
2323	 */
2324	switch (boot_cpu_data.x86_model) {
2325	case 14: /* 65 nm core solo/duo, "Yonah" */
2326		pr_cont("Core events, ");
2327		break;
2328
2329	case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
2330		x86_add_quirk(intel_clovertown_quirk);
2331	case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
2332	case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
2333	case 29: /* six-core 45 nm xeon "Dunnington" */
2334		memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
2335		       sizeof(hw_cache_event_ids));
2336
2337		intel_pmu_lbr_init_core();
2338
2339		x86_pmu.event_constraints = intel_core2_event_constraints;
2340		x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints;
2341		pr_cont("Core2 events, ");
2342		break;
2343
2344	case 26: /* 45 nm nehalem, "Bloomfield" */
2345	case 30: /* 45 nm nehalem, "Lynnfield" */
2346	case 46: /* 45 nm nehalem-ex, "Beckton" */
2347		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
2348		       sizeof(hw_cache_event_ids));
2349		memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
2350		       sizeof(hw_cache_extra_regs));
2351
2352		intel_pmu_lbr_init_nhm();
2353
2354		x86_pmu.event_constraints = intel_nehalem_event_constraints;
2355		x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
2356		x86_pmu.enable_all = intel_pmu_nhm_enable_all;
2357		x86_pmu.extra_regs = intel_nehalem_extra_regs;
2358
2359		x86_pmu.cpu_events = nhm_events_attrs;
2360
2361		/* UOPS_ISSUED.STALLED_CYCLES */
2362		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
2363			X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
2364		/* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
2365		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
2366			X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
2367
2368		x86_add_quirk(intel_nehalem_quirk);
2369
2370		pr_cont("Nehalem events, ");
2371		break;
2372
2373	case 28: /* Atom */
2374	case 38: /* Lincroft */
2375	case 39: /* Penwell */
2376	case 53: /* Cloverview */
2377	case 54: /* Cedarview */
2378		memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
2379		       sizeof(hw_cache_event_ids));
2380
2381		intel_pmu_lbr_init_atom();
2382
2383		x86_pmu.event_constraints = intel_gen_event_constraints;
2384		x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints;
2385		pr_cont("Atom events, ");
2386		break;
2387
2388	case 55: /* Atom 22nm "Silvermont" */
2389	case 77: /* Avoton "Silvermont" */
2390		memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
2391			sizeof(hw_cache_event_ids));
2392		memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
2393		       sizeof(hw_cache_extra_regs));
2394
2395		intel_pmu_lbr_init_atom();
2396
2397		x86_pmu.event_constraints = intel_slm_event_constraints;
2398		x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
2399		x86_pmu.extra_regs = intel_slm_extra_regs;
2400		x86_pmu.er_flags |= ERF_HAS_RSP_1;
2401		pr_cont("Silvermont events, ");
2402		break;
2403
2404	case 37: /* 32 nm nehalem, "Clarkdale" */
2405	case 44: /* 32 nm nehalem, "Gulftown" */
2406	case 47: /* 32 nm Xeon E7 */
2407		memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
2408		       sizeof(hw_cache_event_ids));
2409		memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
2410		       sizeof(hw_cache_extra_regs));
2411
2412		intel_pmu_lbr_init_nhm();
2413
2414		x86_pmu.event_constraints = intel_westmere_event_constraints;
2415		x86_pmu.enable_all = intel_pmu_nhm_enable_all;
2416		x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
2417		x86_pmu.extra_regs = intel_westmere_extra_regs;
2418		x86_pmu.er_flags |= ERF_HAS_RSP_1;
2419
2420		x86_pmu.cpu_events = nhm_events_attrs;
2421
2422		/* UOPS_ISSUED.STALLED_CYCLES */
2423		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
2424			X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
2425		/* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
2426		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
2427			X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
2428
2429		pr_cont("Westmere events, ");
2430		break;
2431
2432	case 42: /* SandyBridge */
2433	case 45: /* SandyBridge, "Romely-EP" */
2434		x86_add_quirk(intel_sandybridge_quirk);
2435		memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
2436		       sizeof(hw_cache_event_ids));
2437		memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
2438		       sizeof(hw_cache_extra_regs));
2439
2440		intel_pmu_lbr_init_snb();
2441
2442		x86_pmu.event_constraints = intel_snb_event_constraints;
2443		x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
2444		x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
2445		if (boot_cpu_data.x86_model == 45)
2446			x86_pmu.extra_regs = intel_snbep_extra_regs;
2447		else
2448			x86_pmu.extra_regs = intel_snb_extra_regs;
2449		/* all extra regs are per-cpu when HT is on */
2450		x86_pmu.er_flags |= ERF_HAS_RSP_1;
2451		x86_pmu.er_flags |= ERF_NO_HT_SHARING;
2452
2453		x86_pmu.cpu_events = snb_events_attrs;
2454
2455		/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
2456		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
2457			X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
2458		/* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
2459		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
2460			X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1);
2461
2462		pr_cont("SandyBridge events, ");
2463		break;
2464	case 58: /* IvyBridge */
2465	case 62: /* IvyBridge EP */
2466		memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
2467		       sizeof(hw_cache_event_ids));
2468		memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
2469		       sizeof(hw_cache_extra_regs));
2470
2471		intel_pmu_lbr_init_snb();
2472
2473		x86_pmu.event_constraints = intel_ivb_event_constraints;
2474		x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
2475		x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
2476		if (boot_cpu_data.x86_model == 62)
2477			x86_pmu.extra_regs = intel_snbep_extra_regs;
2478		else
2479			x86_pmu.extra_regs = intel_snb_extra_regs;
2480		/* all extra regs are per-cpu when HT is on */
2481		x86_pmu.er_flags |= ERF_HAS_RSP_1;
2482		x86_pmu.er_flags |= ERF_NO_HT_SHARING;
2483
2484		x86_pmu.cpu_events = snb_events_attrs;
2485
2486		/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
2487		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
2488			X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
2489
2490		pr_cont("IvyBridge events, ");
2491		break;
2492
2493
2494	case 60: /* Haswell Client */
2495	case 70:
2496	case 71:
2497	case 63:
2498	case 69:
2499		x86_pmu.late_ack = true;
2500		memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids));
2501		memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
2502
2503		intel_pmu_lbr_init_snb();
2504
2505		x86_pmu.event_constraints = intel_hsw_event_constraints;
2506		x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
2507		x86_pmu.extra_regs = intel_snb_extra_regs;
2508		x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
2509		/* all extra regs are per-cpu when HT is on */
2510		x86_pmu.er_flags |= ERF_HAS_RSP_1;
2511		x86_pmu.er_flags |= ERF_NO_HT_SHARING;
2512
2513		x86_pmu.hw_config = hsw_hw_config;
2514		x86_pmu.get_event_constraints = hsw_get_event_constraints;
2515		x86_pmu.cpu_events = hsw_events_attrs;
2516		x86_pmu.lbr_double_abort = true;
2517		pr_cont("Haswell events, ");
2518		break;
2519
2520	default:
2521		switch (x86_pmu.version) {
2522		case 1:
2523			x86_pmu.event_constraints = intel_v1_event_constraints;
2524			pr_cont("generic architected perfmon v1, ");
2525			break;
2526		default:
2527			/*
2528			 * default constraints for v2 and up
2529			 */
2530			x86_pmu.event_constraints = intel_gen_event_constraints;
2531			pr_cont("generic architected perfmon, ");
2532			break;
2533		}
2534	}
2535
2536	if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) {
2537		WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
2538		     x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC);
2539		x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC;
2540	}
2541	x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
2542
2543	if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) {
2544		WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
2545		     x86_pmu.num_counters_fixed, INTEL_PMC_MAX_FIXED);
2546		x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED;
2547	}
2548
2549	x86_pmu.intel_ctrl |=
2550		((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED;
2551
2552	if (x86_pmu.event_constraints) {
2553		/*
2554		 * event on fixed counter2 (REF_CYCLES) only works on this
2555		 * counter, so do not extend mask to generic counters
2556		 */
2557		for_each_event_constraint(c, x86_pmu.event_constraints) {
2558			if (c->cmask != FIXED_EVENT_FLAGS
2559			    || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) {
2560				continue;
2561			}
2562
2563			c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
2564			c->weight += x86_pmu.num_counters;
2565		}
2566	}
2567
2568	/* Support full width counters using alternative MSR range */
2569	if (x86_pmu.intel_cap.full_width_write) {
2570		x86_pmu.max_period = x86_pmu.cntval_mask;
2571		x86_pmu.perfctr = MSR_IA32_PMC0;
2572		pr_cont("full-width counters, ");
2573	}
2574
2575	return 0;
2576}