Linux Audio

Check our new training course

Yocto / OpenEmbedded training

Feb 10-13, 2025
Register
Loading...
Note: File does not exist in v3.1.
   1// SPDX-License-Identifier: GPL-2.0-only
   2#include <linux/module.h>
   3
   4#include <asm/cpu_device_id.h>
   5#include <asm/intel-family.h>
   6#include "uncore.h"
   7#include "uncore_discovery.h"
   8
   9static bool uncore_no_discover;
  10module_param(uncore_no_discover, bool, 0);
  11MODULE_PARM_DESC(uncore_no_discover, "Don't enable the Intel uncore PerfMon discovery mechanism "
  12				     "(default: enable the discovery mechanism).");
  13struct intel_uncore_type *empty_uncore[] = { NULL, };
  14struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
  15struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
  16struct intel_uncore_type **uncore_mmio_uncores = empty_uncore;
  17
  18static bool pcidrv_registered;
  19struct pci_driver *uncore_pci_driver;
  20/* The PCI driver for the device which the uncore doesn't own. */
  21struct pci_driver *uncore_pci_sub_driver;
  22/* pci bus to socket mapping */
  23DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
  24struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
  25struct pci_extra_dev *uncore_extra_pci_dev;
  26int __uncore_max_dies;
  27
  28/* mask of cpus that collect uncore events */
  29static cpumask_t uncore_cpu_mask;
  30
  31/* constraint for the fixed counter */
  32static struct event_constraint uncore_constraint_fixed =
  33	EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
  34struct event_constraint uncore_constraint_empty =
  35	EVENT_CONSTRAINT(0, 0, 0);
  36
  37MODULE_LICENSE("GPL");
  38
  39int uncore_pcibus_to_dieid(struct pci_bus *bus)
  40{
  41	struct pci2phy_map *map;
  42	int die_id = -1;
  43
  44	raw_spin_lock(&pci2phy_map_lock);
  45	list_for_each_entry(map, &pci2phy_map_head, list) {
  46		if (map->segment == pci_domain_nr(bus)) {
  47			die_id = map->pbus_to_dieid[bus->number];
  48			break;
  49		}
  50	}
  51	raw_spin_unlock(&pci2phy_map_lock);
  52
  53	return die_id;
  54}
  55
  56int uncore_die_to_segment(int die)
  57{
  58	struct pci_bus *bus = NULL;
  59
  60	/* Find first pci bus which attributes to specified die. */
  61	while ((bus = pci_find_next_bus(bus)) &&
  62	       (die != uncore_pcibus_to_dieid(bus)))
  63		;
  64
  65	return bus ? pci_domain_nr(bus) : -EINVAL;
  66}
  67
  68int uncore_device_to_die(struct pci_dev *dev)
  69{
  70	int node = pcibus_to_node(dev->bus);
  71	int cpu;
  72
  73	for_each_cpu(cpu, cpumask_of_pcibus(dev->bus)) {
  74		struct cpuinfo_x86 *c = &cpu_data(cpu);
  75
  76		if (c->initialized && cpu_to_node(cpu) == node)
  77			return c->topo.logical_die_id;
  78	}
  79
  80	return -1;
  81}
  82
  83static void uncore_free_pcibus_map(void)
  84{
  85	struct pci2phy_map *map, *tmp;
  86
  87	list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) {
  88		list_del(&map->list);
  89		kfree(map);
  90	}
  91}
  92
  93struct pci2phy_map *__find_pci2phy_map(int segment)
  94{
  95	struct pci2phy_map *map, *alloc = NULL;
  96	int i;
  97
  98	lockdep_assert_held(&pci2phy_map_lock);
  99
 100lookup:
 101	list_for_each_entry(map, &pci2phy_map_head, list) {
 102		if (map->segment == segment)
 103			goto end;
 104	}
 105
 106	if (!alloc) {
 107		raw_spin_unlock(&pci2phy_map_lock);
 108		alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
 109		raw_spin_lock(&pci2phy_map_lock);
 110
 111		if (!alloc)
 112			return NULL;
 113
 114		goto lookup;
 115	}
 116
 117	map = alloc;
 118	alloc = NULL;
 119	map->segment = segment;
 120	for (i = 0; i < 256; i++)
 121		map->pbus_to_dieid[i] = -1;
 122	list_add_tail(&map->list, &pci2phy_map_head);
 123
 124end:
 125	kfree(alloc);
 126	return map;
 127}
 128
 129ssize_t uncore_event_show(struct device *dev,
 130			  struct device_attribute *attr, char *buf)
 131{
 132	struct uncore_event_desc *event =
 133		container_of(attr, struct uncore_event_desc, attr);
 134	return sprintf(buf, "%s", event->config);
 135}
 136
 137struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
 138{
 139	unsigned int dieid = topology_logical_die_id(cpu);
 140
 141	/*
 142	 * The unsigned check also catches the '-1' return value for non
 143	 * existent mappings in the topology map.
 144	 */
 145	return dieid < uncore_max_dies() ? pmu->boxes[dieid] : NULL;
 146}
 147
 148u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
 149{
 150	u64 count;
 151
 152	rdmsrl(event->hw.event_base, count);
 153
 154	return count;
 155}
 156
 157void uncore_mmio_exit_box(struct intel_uncore_box *box)
 158{
 159	if (box->io_addr)
 160		iounmap(box->io_addr);
 161}
 162
 163u64 uncore_mmio_read_counter(struct intel_uncore_box *box,
 164			     struct perf_event *event)
 165{
 166	if (!box->io_addr)
 167		return 0;
 168
 169	if (!uncore_mmio_is_valid_offset(box, event->hw.event_base))
 170		return 0;
 171
 172	return readq(box->io_addr + event->hw.event_base);
 173}
 174
 175/*
 176 * generic get constraint function for shared match/mask registers.
 177 */
 178struct event_constraint *
 179uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
 180{
 181	struct intel_uncore_extra_reg *er;
 182	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
 183	struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
 184	unsigned long flags;
 185	bool ok = false;
 186
 187	/*
 188	 * reg->alloc can be set due to existing state, so for fake box we
 189	 * need to ignore this, otherwise we might fail to allocate proper
 190	 * fake state for this extra reg constraint.
 191	 */
 192	if (reg1->idx == EXTRA_REG_NONE ||
 193	    (!uncore_box_is_fake(box) && reg1->alloc))
 194		return NULL;
 195
 196	er = &box->shared_regs[reg1->idx];
 197	raw_spin_lock_irqsave(&er->lock, flags);
 198	if (!atomic_read(&er->ref) ||
 199	    (er->config1 == reg1->config && er->config2 == reg2->config)) {
 200		atomic_inc(&er->ref);
 201		er->config1 = reg1->config;
 202		er->config2 = reg2->config;
 203		ok = true;
 204	}
 205	raw_spin_unlock_irqrestore(&er->lock, flags);
 206
 207	if (ok) {
 208		if (!uncore_box_is_fake(box))
 209			reg1->alloc = 1;
 210		return NULL;
 211	}
 212
 213	return &uncore_constraint_empty;
 214}
 215
 216void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
 217{
 218	struct intel_uncore_extra_reg *er;
 219	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
 220
 221	/*
 222	 * Only put constraint if extra reg was actually allocated. Also
 223	 * takes care of event which do not use an extra shared reg.
 224	 *
 225	 * Also, if this is a fake box we shouldn't touch any event state
 226	 * (reg->alloc) and we don't care about leaving inconsistent box
 227	 * state either since it will be thrown out.
 228	 */
 229	if (uncore_box_is_fake(box) || !reg1->alloc)
 230		return;
 231
 232	er = &box->shared_regs[reg1->idx];
 233	atomic_dec(&er->ref);
 234	reg1->alloc = 0;
 235}
 236
 237u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx)
 238{
 239	struct intel_uncore_extra_reg *er;
 240	unsigned long flags;
 241	u64 config;
 242
 243	er = &box->shared_regs[idx];
 244
 245	raw_spin_lock_irqsave(&er->lock, flags);
 246	config = er->config;
 247	raw_spin_unlock_irqrestore(&er->lock, flags);
 248
 249	return config;
 250}
 251
 252static void uncore_assign_hw_event(struct intel_uncore_box *box,
 253				   struct perf_event *event, int idx)
 254{
 255	struct hw_perf_event *hwc = &event->hw;
 256
 257	hwc->idx = idx;
 258	hwc->last_tag = ++box->tags[idx];
 259
 260	if (uncore_pmc_fixed(hwc->idx)) {
 261		hwc->event_base = uncore_fixed_ctr(box);
 262		hwc->config_base = uncore_fixed_ctl(box);
 263		return;
 264	}
 265
 266	hwc->config_base = uncore_event_ctl(box, hwc->idx);
 267	hwc->event_base  = uncore_perf_ctr(box, hwc->idx);
 268}
 269
 270void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event)
 271{
 272	u64 prev_count, new_count, delta;
 273	int shift;
 274
 275	if (uncore_pmc_freerunning(event->hw.idx))
 276		shift = 64 - uncore_freerunning_bits(box, event);
 277	else if (uncore_pmc_fixed(event->hw.idx))
 278		shift = 64 - uncore_fixed_ctr_bits(box);
 279	else
 280		shift = 64 - uncore_perf_ctr_bits(box);
 281
 282	/* the hrtimer might modify the previous event value */
 283again:
 284	prev_count = local64_read(&event->hw.prev_count);
 285	new_count = uncore_read_counter(box, event);
 286	if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
 287		goto again;
 288
 289	delta = (new_count << shift) - (prev_count << shift);
 290	delta >>= shift;
 291
 292	local64_add(delta, &event->count);
 293}
 294
 295/*
 296 * The overflow interrupt is unavailable for SandyBridge-EP, is broken
 297 * for SandyBridge. So we use hrtimer to periodically poll the counter
 298 * to avoid overflow.
 299 */
 300static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
 301{
 302	struct intel_uncore_box *box;
 303	struct perf_event *event;
 304	unsigned long flags;
 305	int bit;
 306
 307	box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
 308	if (!box->n_active || box->cpu != smp_processor_id())
 309		return HRTIMER_NORESTART;
 310	/*
 311	 * disable local interrupt to prevent uncore_pmu_event_start/stop
 312	 * to interrupt the update process
 313	 */
 314	local_irq_save(flags);
 315
 316	/*
 317	 * handle boxes with an active event list as opposed to active
 318	 * counters
 319	 */
 320	list_for_each_entry(event, &box->active_list, active_entry) {
 321		uncore_perf_event_update(box, event);
 322	}
 323
 324	for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
 325		uncore_perf_event_update(box, box->events[bit]);
 326
 327	local_irq_restore(flags);
 328
 329	hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
 330	return HRTIMER_RESTART;
 331}
 332
 333void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
 334{
 335	hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration),
 336		      HRTIMER_MODE_REL_PINNED);
 337}
 338
 339void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
 340{
 341	hrtimer_cancel(&box->hrtimer);
 342}
 343
 344static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
 345{
 346	hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 347	box->hrtimer.function = uncore_pmu_hrtimer;
 348}
 349
 350static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
 351						 int node)
 352{
 353	int i, size, numshared = type->num_shared_regs ;
 354	struct intel_uncore_box *box;
 355
 356	size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg);
 357
 358	box = kzalloc_node(size, GFP_KERNEL, node);
 359	if (!box)
 360		return NULL;
 361
 362	for (i = 0; i < numshared; i++)
 363		raw_spin_lock_init(&box->shared_regs[i].lock);
 364
 365	uncore_pmu_init_hrtimer(box);
 366	box->cpu = -1;
 367	box->dieid = -1;
 368
 369	/* set default hrtimer timeout */
 370	box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
 371
 372	INIT_LIST_HEAD(&box->active_list);
 373
 374	return box;
 375}
 376
 377/*
 378 * Using uncore_pmu_event_init pmu event_init callback
 379 * as a detection point for uncore events.
 380 */
 381static int uncore_pmu_event_init(struct perf_event *event);
 382
 383static bool is_box_event(struct intel_uncore_box *box, struct perf_event *event)
 384{
 385	return &box->pmu->pmu == event->pmu;
 386}
 387
 388static int
 389uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
 390		      bool dogrp)
 391{
 392	struct perf_event *event;
 393	int n, max_count;
 394
 395	max_count = box->pmu->type->num_counters;
 396	if (box->pmu->type->fixed_ctl)
 397		max_count++;
 398
 399	if (box->n_events >= max_count)
 400		return -EINVAL;
 401
 402	n = box->n_events;
 403
 404	if (is_box_event(box, leader)) {
 405		box->event_list[n] = leader;
 406		n++;
 407	}
 408
 409	if (!dogrp)
 410		return n;
 411
 412	for_each_sibling_event(event, leader) {
 413		if (!is_box_event(box, event) ||
 414		    event->state <= PERF_EVENT_STATE_OFF)
 415			continue;
 416
 417		if (n >= max_count)
 418			return -EINVAL;
 419
 420		box->event_list[n] = event;
 421		n++;
 422	}
 423	return n;
 424}
 425
 426static struct event_constraint *
 427uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
 428{
 429	struct intel_uncore_type *type = box->pmu->type;
 430	struct event_constraint *c;
 431
 432	if (type->ops->get_constraint) {
 433		c = type->ops->get_constraint(box, event);
 434		if (c)
 435			return c;
 436	}
 437
 438	if (event->attr.config == UNCORE_FIXED_EVENT)
 439		return &uncore_constraint_fixed;
 440
 441	if (type->constraints) {
 442		for_each_event_constraint(c, type->constraints) {
 443			if ((event->hw.config & c->cmask) == c->code)
 444				return c;
 445		}
 446	}
 447
 448	return &type->unconstrainted;
 449}
 450
 451static void uncore_put_event_constraint(struct intel_uncore_box *box,
 452					struct perf_event *event)
 453{
 454	if (box->pmu->type->ops->put_constraint)
 455		box->pmu->type->ops->put_constraint(box, event);
 456}
 457
 458static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
 459{
 460	unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
 461	struct event_constraint *c;
 462	int i, wmin, wmax, ret = 0;
 463	struct hw_perf_event *hwc;
 464
 465	bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
 466
 467	for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
 468		c = uncore_get_event_constraint(box, box->event_list[i]);
 469		box->event_constraint[i] = c;
 470		wmin = min(wmin, c->weight);
 471		wmax = max(wmax, c->weight);
 472	}
 473
 474	/* fastpath, try to reuse previous register */
 475	for (i = 0; i < n; i++) {
 476		hwc = &box->event_list[i]->hw;
 477		c = box->event_constraint[i];
 478
 479		/* never assigned */
 480		if (hwc->idx == -1)
 481			break;
 482
 483		/* constraint still honored */
 484		if (!test_bit(hwc->idx, c->idxmsk))
 485			break;
 486
 487		/* not already used */
 488		if (test_bit(hwc->idx, used_mask))
 489			break;
 490
 491		__set_bit(hwc->idx, used_mask);
 492		if (assign)
 493			assign[i] = hwc->idx;
 494	}
 495	/* slow path */
 496	if (i != n)
 497		ret = perf_assign_events(box->event_constraint, n,
 498					 wmin, wmax, n, assign);
 499
 500	if (!assign || ret) {
 501		for (i = 0; i < n; i++)
 502			uncore_put_event_constraint(box, box->event_list[i]);
 503	}
 504	return ret ? -EINVAL : 0;
 505}
 506
 507void uncore_pmu_event_start(struct perf_event *event, int flags)
 508{
 509	struct intel_uncore_box *box = uncore_event_to_box(event);
 510	int idx = event->hw.idx;
 511
 512	if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
 513		return;
 514
 515	/*
 516	 * Free running counter is read-only and always active.
 517	 * Use the current counter value as start point.
 518	 * There is no overflow interrupt for free running counter.
 519	 * Use hrtimer to periodically poll the counter to avoid overflow.
 520	 */
 521	if (uncore_pmc_freerunning(event->hw.idx)) {
 522		list_add_tail(&event->active_entry, &box->active_list);
 523		local64_set(&event->hw.prev_count,
 524			    uncore_read_counter(box, event));
 525		if (box->n_active++ == 0)
 526			uncore_pmu_start_hrtimer(box);
 527		return;
 528	}
 529
 530	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
 531		return;
 532
 533	event->hw.state = 0;
 534	box->events[idx] = event;
 535	box->n_active++;
 536	__set_bit(idx, box->active_mask);
 537
 538	local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
 539	uncore_enable_event(box, event);
 540
 541	if (box->n_active == 1)
 542		uncore_pmu_start_hrtimer(box);
 543}
 544
 545void uncore_pmu_event_stop(struct perf_event *event, int flags)
 546{
 547	struct intel_uncore_box *box = uncore_event_to_box(event);
 548	struct hw_perf_event *hwc = &event->hw;
 549
 550	/* Cannot disable free running counter which is read-only */
 551	if (uncore_pmc_freerunning(hwc->idx)) {
 552		list_del(&event->active_entry);
 553		if (--box->n_active == 0)
 554			uncore_pmu_cancel_hrtimer(box);
 555		uncore_perf_event_update(box, event);
 556		return;
 557	}
 558
 559	if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
 560		uncore_disable_event(box, event);
 561		box->n_active--;
 562		box->events[hwc->idx] = NULL;
 563		WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
 564		hwc->state |= PERF_HES_STOPPED;
 565
 566		if (box->n_active == 0)
 567			uncore_pmu_cancel_hrtimer(box);
 568	}
 569
 570	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
 571		/*
 572		 * Drain the remaining delta count out of a event
 573		 * that we are disabling:
 574		 */
 575		uncore_perf_event_update(box, event);
 576		hwc->state |= PERF_HES_UPTODATE;
 577	}
 578}
 579
 580int uncore_pmu_event_add(struct perf_event *event, int flags)
 581{
 582	struct intel_uncore_box *box = uncore_event_to_box(event);
 583	struct hw_perf_event *hwc = &event->hw;
 584	int assign[UNCORE_PMC_IDX_MAX];
 585	int i, n, ret;
 586
 587	if (!box)
 588		return -ENODEV;
 589
 590	/*
 591	 * The free funning counter is assigned in event_init().
 592	 * The free running counter event and free running counter
 593	 * are 1:1 mapped. It doesn't need to be tracked in event_list.
 594	 */
 595	if (uncore_pmc_freerunning(hwc->idx)) {
 596		if (flags & PERF_EF_START)
 597			uncore_pmu_event_start(event, 0);
 598		return 0;
 599	}
 600
 601	ret = n = uncore_collect_events(box, event, false);
 602	if (ret < 0)
 603		return ret;
 604
 605	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
 606	if (!(flags & PERF_EF_START))
 607		hwc->state |= PERF_HES_ARCH;
 608
 609	ret = uncore_assign_events(box, assign, n);
 610	if (ret)
 611		return ret;
 612
 613	/* save events moving to new counters */
 614	for (i = 0; i < box->n_events; i++) {
 615		event = box->event_list[i];
 616		hwc = &event->hw;
 617
 618		if (hwc->idx == assign[i] &&
 619			hwc->last_tag == box->tags[assign[i]])
 620			continue;
 621		/*
 622		 * Ensure we don't accidentally enable a stopped
 623		 * counter simply because we rescheduled.
 624		 */
 625		if (hwc->state & PERF_HES_STOPPED)
 626			hwc->state |= PERF_HES_ARCH;
 627
 628		uncore_pmu_event_stop(event, PERF_EF_UPDATE);
 629	}
 630
 631	/* reprogram moved events into new counters */
 632	for (i = 0; i < n; i++) {
 633		event = box->event_list[i];
 634		hwc = &event->hw;
 635
 636		if (hwc->idx != assign[i] ||
 637			hwc->last_tag != box->tags[assign[i]])
 638			uncore_assign_hw_event(box, event, assign[i]);
 639		else if (i < box->n_events)
 640			continue;
 641
 642		if (hwc->state & PERF_HES_ARCH)
 643			continue;
 644
 645		uncore_pmu_event_start(event, 0);
 646	}
 647	box->n_events = n;
 648
 649	return 0;
 650}
 651
 652void uncore_pmu_event_del(struct perf_event *event, int flags)
 653{
 654	struct intel_uncore_box *box = uncore_event_to_box(event);
 655	int i;
 656
 657	uncore_pmu_event_stop(event, PERF_EF_UPDATE);
 658
 659	/*
 660	 * The event for free running counter is not tracked by event_list.
 661	 * It doesn't need to force event->hw.idx = -1 to reassign the counter.
 662	 * Because the event and the free running counter are 1:1 mapped.
 663	 */
 664	if (uncore_pmc_freerunning(event->hw.idx))
 665		return;
 666
 667	for (i = 0; i < box->n_events; i++) {
 668		if (event == box->event_list[i]) {
 669			uncore_put_event_constraint(box, event);
 670
 671			for (++i; i < box->n_events; i++)
 672				box->event_list[i - 1] = box->event_list[i];
 673
 674			--box->n_events;
 675			break;
 676		}
 677	}
 678
 679	event->hw.idx = -1;
 680	event->hw.last_tag = ~0ULL;
 681}
 682
 683void uncore_pmu_event_read(struct perf_event *event)
 684{
 685	struct intel_uncore_box *box = uncore_event_to_box(event);
 686	uncore_perf_event_update(box, event);
 687}
 688
 689/*
 690 * validation ensures the group can be loaded onto the
 691 * PMU if it was the only group available.
 692 */
 693static int uncore_validate_group(struct intel_uncore_pmu *pmu,
 694				struct perf_event *event)
 695{
 696	struct perf_event *leader = event->group_leader;
 697	struct intel_uncore_box *fake_box;
 698	int ret = -EINVAL, n;
 699
 700	/* The free running counter is always active. */
 701	if (uncore_pmc_freerunning(event->hw.idx))
 702		return 0;
 703
 704	fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
 705	if (!fake_box)
 706		return -ENOMEM;
 707
 708	fake_box->pmu = pmu;
 709	/*
 710	 * the event is not yet connected with its
 711	 * siblings therefore we must first collect
 712	 * existing siblings, then add the new event
 713	 * before we can simulate the scheduling
 714	 */
 715	n = uncore_collect_events(fake_box, leader, true);
 716	if (n < 0)
 717		goto out;
 718
 719	fake_box->n_events = n;
 720	n = uncore_collect_events(fake_box, event, false);
 721	if (n < 0)
 722		goto out;
 723
 724	fake_box->n_events = n;
 725
 726	ret = uncore_assign_events(fake_box, NULL, n);
 727out:
 728	kfree(fake_box);
 729	return ret;
 730}
 731
 732static int uncore_pmu_event_init(struct perf_event *event)
 733{
 734	struct intel_uncore_pmu *pmu;
 735	struct intel_uncore_box *box;
 736	struct hw_perf_event *hwc = &event->hw;
 737	int ret;
 738
 739	if (event->attr.type != event->pmu->type)
 740		return -ENOENT;
 741
 742	pmu = uncore_event_to_pmu(event);
 743	/* no device found for this pmu */
 744	if (pmu->func_id < 0)
 745		return -ENOENT;
 746
 747	/* Sampling not supported yet */
 748	if (hwc->sample_period)
 749		return -EINVAL;
 750
 751	/*
 752	 * Place all uncore events for a particular physical package
 753	 * onto a single cpu
 754	 */
 755	if (event->cpu < 0)
 756		return -EINVAL;
 757	box = uncore_pmu_to_box(pmu, event->cpu);
 758	if (!box || box->cpu < 0)
 759		return -EINVAL;
 760	event->cpu = box->cpu;
 761	event->pmu_private = box;
 762
 763	event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
 764
 765	event->hw.idx = -1;
 766	event->hw.last_tag = ~0ULL;
 767	event->hw.extra_reg.idx = EXTRA_REG_NONE;
 768	event->hw.branch_reg.idx = EXTRA_REG_NONE;
 769
 770	if (event->attr.config == UNCORE_FIXED_EVENT) {
 771		/* no fixed counter */
 772		if (!pmu->type->fixed_ctl)
 773			return -EINVAL;
 774		/*
 775		 * if there is only one fixed counter, only the first pmu
 776		 * can access the fixed counter
 777		 */
 778		if (pmu->type->single_fixed && pmu->pmu_idx > 0)
 779			return -EINVAL;
 780
 781		/* fixed counters have event field hardcoded to zero */
 782		hwc->config = 0ULL;
 783	} else if (is_freerunning_event(event)) {
 784		hwc->config = event->attr.config;
 785		if (!check_valid_freerunning_event(box, event))
 786			return -EINVAL;
 787		event->hw.idx = UNCORE_PMC_IDX_FREERUNNING;
 788		/*
 789		 * The free running counter event and free running counter
 790		 * are always 1:1 mapped.
 791		 * The free running counter is always active.
 792		 * Assign the free running counter here.
 793		 */
 794		event->hw.event_base = uncore_freerunning_counter(box, event);
 795	} else {
 796		hwc->config = event->attr.config &
 797			      (pmu->type->event_mask | ((u64)pmu->type->event_mask_ext << 32));
 798		if (pmu->type->ops->hw_config) {
 799			ret = pmu->type->ops->hw_config(box, event);
 800			if (ret)
 801				return ret;
 802		}
 803	}
 804
 805	if (event->group_leader != event)
 806		ret = uncore_validate_group(pmu, event);
 807	else
 808		ret = 0;
 809
 810	return ret;
 811}
 812
 813static void uncore_pmu_enable(struct pmu *pmu)
 814{
 815	struct intel_uncore_pmu *uncore_pmu;
 816	struct intel_uncore_box *box;
 817
 818	uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
 819
 820	box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
 821	if (!box)
 822		return;
 823
 824	if (uncore_pmu->type->ops->enable_box)
 825		uncore_pmu->type->ops->enable_box(box);
 826}
 827
 828static void uncore_pmu_disable(struct pmu *pmu)
 829{
 830	struct intel_uncore_pmu *uncore_pmu;
 831	struct intel_uncore_box *box;
 832
 833	uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
 834
 835	box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
 836	if (!box)
 837		return;
 838
 839	if (uncore_pmu->type->ops->disable_box)
 840		uncore_pmu->type->ops->disable_box(box);
 841}
 842
 843static ssize_t uncore_get_attr_cpumask(struct device *dev,
 844				struct device_attribute *attr, char *buf)
 845{
 846	return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask);
 847}
 848
 849static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
 850
 851static struct attribute *uncore_pmu_attrs[] = {
 852	&dev_attr_cpumask.attr,
 853	NULL,
 854};
 855
 856static const struct attribute_group uncore_pmu_attr_group = {
 857	.attrs = uncore_pmu_attrs,
 858};
 859
 860static inline int uncore_get_box_id(struct intel_uncore_type *type,
 861				    struct intel_uncore_pmu *pmu)
 862{
 863	return type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx;
 864}
 865
 866void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu)
 867{
 868	struct intel_uncore_type *type = pmu->type;
 869
 870	if (type->num_boxes == 1)
 871		sprintf(pmu_name, "uncore_type_%u", type->type_id);
 872	else {
 873		sprintf(pmu_name, "uncore_type_%u_%d",
 874			type->type_id, uncore_get_box_id(type, pmu));
 875	}
 876}
 877
 878static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu)
 879{
 880	struct intel_uncore_type *type = pmu->type;
 881
 882	/*
 883	 * No uncore block name in discovery table.
 884	 * Use uncore_type_&typeid_&boxid as name.
 885	 */
 886	if (!type->name) {
 887		uncore_get_alias_name(pmu->name, pmu);
 888		return;
 889	}
 890
 891	if (type->num_boxes == 1) {
 892		if (strlen(type->name) > 0)
 893			sprintf(pmu->name, "uncore_%s", type->name);
 894		else
 895			sprintf(pmu->name, "uncore");
 896	} else {
 897		/*
 898		 * Use the box ID from the discovery table if applicable.
 899		 */
 900		sprintf(pmu->name, "uncore_%s_%d", type->name,
 901			uncore_get_box_id(type, pmu));
 902	}
 903}
 904
 905static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
 906{
 907	int ret;
 908
 909	if (!pmu->type->pmu) {
 910		pmu->pmu = (struct pmu) {
 911			.attr_groups	= pmu->type->attr_groups,
 912			.task_ctx_nr	= perf_invalid_context,
 913			.pmu_enable	= uncore_pmu_enable,
 914			.pmu_disable	= uncore_pmu_disable,
 915			.event_init	= uncore_pmu_event_init,
 916			.add		= uncore_pmu_event_add,
 917			.del		= uncore_pmu_event_del,
 918			.start		= uncore_pmu_event_start,
 919			.stop		= uncore_pmu_event_stop,
 920			.read		= uncore_pmu_event_read,
 921			.module		= THIS_MODULE,
 922			.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
 923			.attr_update	= pmu->type->attr_update,
 924		};
 925	} else {
 926		pmu->pmu = *pmu->type->pmu;
 927		pmu->pmu.attr_groups = pmu->type->attr_groups;
 928		pmu->pmu.attr_update = pmu->type->attr_update;
 929	}
 930
 931	uncore_get_pmu_name(pmu);
 932
 933	ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
 934	if (!ret)
 935		pmu->registered = true;
 936	return ret;
 937}
 938
 939static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
 940{
 941	if (!pmu->registered)
 942		return;
 943	perf_pmu_unregister(&pmu->pmu);
 944	pmu->registered = false;
 945}
 946
 947static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
 948{
 949	int die;
 950
 951	for (die = 0; die < uncore_max_dies(); die++)
 952		kfree(pmu->boxes[die]);
 953	kfree(pmu->boxes);
 954}
 955
 956static void uncore_type_exit(struct intel_uncore_type *type)
 957{
 958	struct intel_uncore_pmu *pmu = type->pmus;
 959	int i;
 960
 961	if (type->cleanup_mapping)
 962		type->cleanup_mapping(type);
 963
 964	if (pmu) {
 965		for (i = 0; i < type->num_boxes; i++, pmu++) {
 966			uncore_pmu_unregister(pmu);
 967			uncore_free_boxes(pmu);
 968		}
 969		kfree(type->pmus);
 970		type->pmus = NULL;
 971	}
 972	if (type->box_ids) {
 973		kfree(type->box_ids);
 974		type->box_ids = NULL;
 975	}
 976	kfree(type->events_group);
 977	type->events_group = NULL;
 978}
 979
 980static void uncore_types_exit(struct intel_uncore_type **types)
 981{
 982	for (; *types; types++)
 983		uncore_type_exit(*types);
 984}
 985
 986static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
 987{
 988	struct intel_uncore_pmu *pmus;
 989	size_t size;
 990	int i, j;
 991
 992	pmus = kcalloc(type->num_boxes, sizeof(*pmus), GFP_KERNEL);
 993	if (!pmus)
 994		return -ENOMEM;
 995
 996	size = uncore_max_dies() * sizeof(struct intel_uncore_box *);
 997
 998	for (i = 0; i < type->num_boxes; i++) {
 999		pmus[i].func_id	= setid ? i : -1;
1000		pmus[i].pmu_idx	= i;
1001		pmus[i].type	= type;
1002		pmus[i].boxes	= kzalloc(size, GFP_KERNEL);
1003		if (!pmus[i].boxes)
1004			goto err;
1005	}
1006
1007	type->pmus = pmus;
1008	type->unconstrainted = (struct event_constraint)
1009		__EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
1010				0, type->num_counters, 0, 0);
1011
1012	if (type->event_descs) {
1013		struct {
1014			struct attribute_group group;
1015			struct attribute *attrs[];
1016		} *attr_group;
1017		for (i = 0; type->event_descs[i].attr.attr.name; i++);
1018
1019		attr_group = kzalloc(struct_size(attr_group, attrs, i + 1),
1020								GFP_KERNEL);
1021		if (!attr_group)
1022			goto err;
1023
1024		attr_group->group.name = "events";
1025		attr_group->group.attrs = attr_group->attrs;
1026
1027		for (j = 0; j < i; j++)
1028			attr_group->attrs[j] = &type->event_descs[j].attr.attr;
1029
1030		type->events_group = &attr_group->group;
1031	}
1032
1033	type->pmu_group = &uncore_pmu_attr_group;
1034
1035	if (type->set_mapping)
1036		type->set_mapping(type);
1037
1038	return 0;
1039
1040err:
1041	for (i = 0; i < type->num_boxes; i++)
1042		kfree(pmus[i].boxes);
1043	kfree(pmus);
1044
1045	return -ENOMEM;
1046}
1047
1048static int __init
1049uncore_types_init(struct intel_uncore_type **types, bool setid)
1050{
1051	int ret;
1052
1053	for (; *types; types++) {
1054		ret = uncore_type_init(*types, setid);
1055		if (ret)
1056			return ret;
1057	}
1058	return 0;
1059}
1060
1061/*
1062 * Get the die information of a PCI device.
1063 * @pdev: The PCI device.
1064 * @die: The die id which the device maps to.
1065 */
1066static int uncore_pci_get_dev_die_info(struct pci_dev *pdev, int *die)
1067{
1068	*die = uncore_pcibus_to_dieid(pdev->bus);
1069	if (*die < 0)
1070		return -EINVAL;
1071
1072	return 0;
1073}
1074
1075static struct intel_uncore_pmu *
1076uncore_pci_find_dev_pmu_from_types(struct pci_dev *pdev)
1077{
1078	struct intel_uncore_type **types = uncore_pci_uncores;
1079	struct intel_uncore_type *type;
1080	u64 box_ctl;
1081	int i, die;
1082
1083	for (; *types; types++) {
1084		type = *types;
1085		for (die = 0; die < __uncore_max_dies; die++) {
1086			for (i = 0; i < type->num_boxes; i++) {
1087				if (!type->box_ctls[die])
1088					continue;
1089				box_ctl = type->box_ctls[die] + type->pci_offsets[i];
1090				if (pdev->devfn == UNCORE_DISCOVERY_PCI_DEVFN(box_ctl) &&
1091				    pdev->bus->number == UNCORE_DISCOVERY_PCI_BUS(box_ctl) &&
1092				    pci_domain_nr(pdev->bus) == UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl))
1093					return &type->pmus[i];
1094			}
1095		}
1096	}
1097
1098	return NULL;
1099}
1100
1101/*
1102 * Find the PMU of a PCI device.
1103 * @pdev: The PCI device.
1104 * @ids: The ID table of the available PCI devices with a PMU.
1105 *       If NULL, search the whole uncore_pci_uncores.
1106 */
1107static struct intel_uncore_pmu *
1108uncore_pci_find_dev_pmu(struct pci_dev *pdev, const struct pci_device_id *ids)
1109{
1110	struct intel_uncore_pmu *pmu = NULL;
1111	struct intel_uncore_type *type;
1112	kernel_ulong_t data;
1113	unsigned int devfn;
1114
1115	if (!ids)
1116		return uncore_pci_find_dev_pmu_from_types(pdev);
1117
1118	while (ids && ids->vendor) {
1119		if ((ids->vendor == pdev->vendor) &&
1120		    (ids->device == pdev->device)) {
1121			data = ids->driver_data;
1122			devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(data),
1123					  UNCORE_PCI_DEV_FUNC(data));
1124			if (devfn == pdev->devfn) {
1125				type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(data)];
1126				pmu = &type->pmus[UNCORE_PCI_DEV_IDX(data)];
1127				break;
1128			}
1129		}
1130		ids++;
1131	}
1132	return pmu;
1133}
1134
1135/*
1136 * Register the PMU for a PCI device
1137 * @pdev: The PCI device.
1138 * @type: The corresponding PMU type of the device.
1139 * @pmu: The corresponding PMU of the device.
1140 * @die: The die id which the device maps to.
1141 */
1142static int uncore_pci_pmu_register(struct pci_dev *pdev,
1143				   struct intel_uncore_type *type,
1144				   struct intel_uncore_pmu *pmu,
1145				   int die)
1146{
1147	struct intel_uncore_box *box;
1148	int ret;
1149
1150	if (WARN_ON_ONCE(pmu->boxes[die] != NULL))
1151		return -EINVAL;
1152
1153	box = uncore_alloc_box(type, NUMA_NO_NODE);
1154	if (!box)
1155		return -ENOMEM;
1156
1157	if (pmu->func_id < 0)
1158		pmu->func_id = pdev->devfn;
1159	else
1160		WARN_ON_ONCE(pmu->func_id != pdev->devfn);
1161
1162	atomic_inc(&box->refcnt);
1163	box->dieid = die;
1164	box->pci_dev = pdev;
1165	box->pmu = pmu;
1166	uncore_box_init(box);
1167
1168	pmu->boxes[die] = box;
1169	if (atomic_inc_return(&pmu->activeboxes) > 1)
1170		return 0;
1171
1172	/* First active box registers the pmu */
1173	ret = uncore_pmu_register(pmu);
1174	if (ret) {
1175		pmu->boxes[die] = NULL;
1176		uncore_box_exit(box);
1177		kfree(box);
1178	}
1179	return ret;
1180}
1181
1182/*
1183 * add a pci uncore device
1184 */
1185static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1186{
1187	struct intel_uncore_type *type;
1188	struct intel_uncore_pmu *pmu = NULL;
1189	int die, ret;
1190
1191	ret = uncore_pci_get_dev_die_info(pdev, &die);
1192	if (ret)
1193		return ret;
1194
1195	if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
1196		int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
1197
1198		uncore_extra_pci_dev[die].dev[idx] = pdev;
1199		pci_set_drvdata(pdev, NULL);
1200		return 0;
1201	}
1202
1203	type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
1204
1205	/*
1206	 * Some platforms, e.g.  Knights Landing, use a common PCI device ID
1207	 * for multiple instances of an uncore PMU device type. We should check
1208	 * PCI slot and func to indicate the uncore box.
1209	 */
1210	if (id->driver_data & ~0xffff) {
1211		struct pci_driver *pci_drv = to_pci_driver(pdev->dev.driver);
1212
1213		pmu = uncore_pci_find_dev_pmu(pdev, pci_drv->id_table);
1214		if (pmu == NULL)
1215			return -ENODEV;
1216	} else {
1217		/*
1218		 * for performance monitoring unit with multiple boxes,
1219		 * each box has a different function id.
1220		 */
1221		pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
1222	}
1223
1224	ret = uncore_pci_pmu_register(pdev, type, pmu, die);
1225
1226	pci_set_drvdata(pdev, pmu->boxes[die]);
1227
1228	return ret;
1229}
1230
1231/*
1232 * Unregister the PMU of a PCI device
1233 * @pmu: The corresponding PMU is unregistered.
1234 * @die: The die id which the device maps to.
1235 */
1236static void uncore_pci_pmu_unregister(struct intel_uncore_pmu *pmu, int die)
1237{
1238	struct intel_uncore_box *box = pmu->boxes[die];
1239
1240	pmu->boxes[die] = NULL;
1241	if (atomic_dec_return(&pmu->activeboxes) == 0)
1242		uncore_pmu_unregister(pmu);
1243	uncore_box_exit(box);
1244	kfree(box);
1245}
1246
1247static void uncore_pci_remove(struct pci_dev *pdev)
1248{
1249	struct intel_uncore_box *box;
1250	struct intel_uncore_pmu *pmu;
1251	int i, die;
1252
1253	if (uncore_pci_get_dev_die_info(pdev, &die))
1254		return;
1255
1256	box = pci_get_drvdata(pdev);
1257	if (!box) {
1258		for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
1259			if (uncore_extra_pci_dev[die].dev[i] == pdev) {
1260				uncore_extra_pci_dev[die].dev[i] = NULL;
1261				break;
1262			}
1263		}
1264		WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX);
1265		return;
1266	}
1267
1268	pmu = box->pmu;
1269
1270	pci_set_drvdata(pdev, NULL);
1271
1272	uncore_pci_pmu_unregister(pmu, die);
1273}
1274
1275static int uncore_bus_notify(struct notifier_block *nb,
1276			     unsigned long action, void *data,
1277			     const struct pci_device_id *ids)
1278{
1279	struct device *dev = data;
1280	struct pci_dev *pdev = to_pci_dev(dev);
1281	struct intel_uncore_pmu *pmu;
1282	int die;
1283
1284	/* Unregister the PMU when the device is going to be deleted. */
1285	if (action != BUS_NOTIFY_DEL_DEVICE)
1286		return NOTIFY_DONE;
1287
1288	pmu = uncore_pci_find_dev_pmu(pdev, ids);
1289	if (!pmu)
1290		return NOTIFY_DONE;
1291
1292	if (uncore_pci_get_dev_die_info(pdev, &die))
1293		return NOTIFY_DONE;
1294
1295	uncore_pci_pmu_unregister(pmu, die);
1296
1297	return NOTIFY_OK;
1298}
1299
1300static int uncore_pci_sub_bus_notify(struct notifier_block *nb,
1301				     unsigned long action, void *data)
1302{
1303	return uncore_bus_notify(nb, action, data,
1304				 uncore_pci_sub_driver->id_table);
1305}
1306
1307static struct notifier_block uncore_pci_sub_notifier = {
1308	.notifier_call = uncore_pci_sub_bus_notify,
1309};
1310
1311static void uncore_pci_sub_driver_init(void)
1312{
1313	const struct pci_device_id *ids = uncore_pci_sub_driver->id_table;
1314	struct intel_uncore_type *type;
1315	struct intel_uncore_pmu *pmu;
1316	struct pci_dev *pci_sub_dev;
1317	bool notify = false;
1318	unsigned int devfn;
1319	int die;
1320
1321	while (ids && ids->vendor) {
1322		pci_sub_dev = NULL;
1323		type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(ids->driver_data)];
1324		/*
1325		 * Search the available device, and register the
1326		 * corresponding PMU.
1327		 */
1328		while ((pci_sub_dev = pci_get_device(PCI_VENDOR_ID_INTEL,
1329						     ids->device, pci_sub_dev))) {
1330			devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data),
1331					  UNCORE_PCI_DEV_FUNC(ids->driver_data));
1332			if (devfn != pci_sub_dev->devfn)
1333				continue;
1334
1335			pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)];
1336			if (!pmu)
1337				continue;
1338
1339			if (uncore_pci_get_dev_die_info(pci_sub_dev, &die))
1340				continue;
1341
1342			if (!uncore_pci_pmu_register(pci_sub_dev, type, pmu,
1343						     die))
1344				notify = true;
1345		}
1346		ids++;
1347	}
1348
1349	if (notify && bus_register_notifier(&pci_bus_type, &uncore_pci_sub_notifier))
1350		notify = false;
1351
1352	if (!notify)
1353		uncore_pci_sub_driver = NULL;
1354}
1355
1356static int uncore_pci_bus_notify(struct notifier_block *nb,
1357				     unsigned long action, void *data)
1358{
1359	return uncore_bus_notify(nb, action, data, NULL);
1360}
1361
1362static struct notifier_block uncore_pci_notifier = {
1363	.notifier_call = uncore_pci_bus_notify,
1364};
1365
1366
1367static void uncore_pci_pmus_register(void)
1368{
1369	struct intel_uncore_type **types = uncore_pci_uncores;
1370	struct intel_uncore_type *type;
1371	struct intel_uncore_pmu *pmu;
1372	struct pci_dev *pdev;
1373	u64 box_ctl;
1374	int i, die;
1375
1376	for (; *types; types++) {
1377		type = *types;
1378		for (die = 0; die < __uncore_max_dies; die++) {
1379			for (i = 0; i < type->num_boxes; i++) {
1380				if (!type->box_ctls[die])
1381					continue;
1382				box_ctl = type->box_ctls[die] + type->pci_offsets[i];
1383				pdev = pci_get_domain_bus_and_slot(UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl),
1384								   UNCORE_DISCOVERY_PCI_BUS(box_ctl),
1385								   UNCORE_DISCOVERY_PCI_DEVFN(box_ctl));
1386				if (!pdev)
1387					continue;
1388				pmu = &type->pmus[i];
1389
1390				uncore_pci_pmu_register(pdev, type, pmu, die);
1391			}
1392		}
1393	}
1394
1395	bus_register_notifier(&pci_bus_type, &uncore_pci_notifier);
1396}
1397
1398static int __init uncore_pci_init(void)
1399{
1400	size_t size;
1401	int ret;
1402
1403	size = uncore_max_dies() * sizeof(struct pci_extra_dev);
1404	uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
1405	if (!uncore_extra_pci_dev) {
1406		ret = -ENOMEM;
1407		goto err;
1408	}
1409
1410	ret = uncore_types_init(uncore_pci_uncores, false);
1411	if (ret)
1412		goto errtype;
1413
1414	if (uncore_pci_driver) {
1415		uncore_pci_driver->probe = uncore_pci_probe;
1416		uncore_pci_driver->remove = uncore_pci_remove;
1417
1418		ret = pci_register_driver(uncore_pci_driver);
1419		if (ret)
1420			goto errtype;
1421	} else
1422		uncore_pci_pmus_register();
1423
1424	if (uncore_pci_sub_driver)
1425		uncore_pci_sub_driver_init();
1426
1427	pcidrv_registered = true;
1428	return 0;
1429
1430errtype:
1431	uncore_types_exit(uncore_pci_uncores);
1432	kfree(uncore_extra_pci_dev);
1433	uncore_extra_pci_dev = NULL;
1434	uncore_free_pcibus_map();
1435err:
1436	uncore_pci_uncores = empty_uncore;
1437	return ret;
1438}
1439
1440static void uncore_pci_exit(void)
1441{
1442	if (pcidrv_registered) {
1443		pcidrv_registered = false;
1444		if (uncore_pci_sub_driver)
1445			bus_unregister_notifier(&pci_bus_type, &uncore_pci_sub_notifier);
1446		if (uncore_pci_driver)
1447			pci_unregister_driver(uncore_pci_driver);
1448		else
1449			bus_unregister_notifier(&pci_bus_type, &uncore_pci_notifier);
1450		uncore_types_exit(uncore_pci_uncores);
1451		kfree(uncore_extra_pci_dev);
1452		uncore_free_pcibus_map();
1453	}
1454}
1455
1456static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
1457				   int new_cpu)
1458{
1459	struct intel_uncore_pmu *pmu = type->pmus;
1460	struct intel_uncore_box *box;
1461	int i, die;
1462
1463	die = topology_logical_die_id(old_cpu < 0 ? new_cpu : old_cpu);
1464	for (i = 0; i < type->num_boxes; i++, pmu++) {
1465		box = pmu->boxes[die];
1466		if (!box)
1467			continue;
1468
1469		if (old_cpu < 0) {
1470			WARN_ON_ONCE(box->cpu != -1);
1471			box->cpu = new_cpu;
1472			continue;
1473		}
1474
1475		WARN_ON_ONCE(box->cpu != old_cpu);
1476		box->cpu = -1;
1477		if (new_cpu < 0)
1478			continue;
1479
1480		uncore_pmu_cancel_hrtimer(box);
1481		perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu);
1482		box->cpu = new_cpu;
1483	}
1484}
1485
1486static void uncore_change_context(struct intel_uncore_type **uncores,
1487				  int old_cpu, int new_cpu)
1488{
1489	for (; *uncores; uncores++)
1490		uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
1491}
1492
1493static void uncore_box_unref(struct intel_uncore_type **types, int id)
1494{
1495	struct intel_uncore_type *type;
1496	struct intel_uncore_pmu *pmu;
1497	struct intel_uncore_box *box;
1498	int i;
1499
1500	for (; *types; types++) {
1501		type = *types;
1502		pmu = type->pmus;
1503		for (i = 0; i < type->num_boxes; i++, pmu++) {
1504			box = pmu->boxes[id];
1505			if (box && atomic_dec_return(&box->refcnt) == 0)
1506				uncore_box_exit(box);
1507		}
1508	}
1509}
1510
1511static int uncore_event_cpu_offline(unsigned int cpu)
1512{
1513	int die, target;
1514
1515	/* Check if exiting cpu is used for collecting uncore events */
1516	if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
1517		goto unref;
1518	/* Find a new cpu to collect uncore events */
1519	target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
1520
1521	/* Migrate uncore events to the new target */
1522	if (target < nr_cpu_ids)
1523		cpumask_set_cpu(target, &uncore_cpu_mask);
1524	else
1525		target = -1;
1526
1527	uncore_change_context(uncore_msr_uncores, cpu, target);
1528	uncore_change_context(uncore_mmio_uncores, cpu, target);
1529	uncore_change_context(uncore_pci_uncores, cpu, target);
1530
1531unref:
1532	/* Clear the references */
1533	die = topology_logical_die_id(cpu);
1534	uncore_box_unref(uncore_msr_uncores, die);
1535	uncore_box_unref(uncore_mmio_uncores, die);
1536	return 0;
1537}
1538
1539static int allocate_boxes(struct intel_uncore_type **types,
1540			 unsigned int die, unsigned int cpu)
1541{
1542	struct intel_uncore_box *box, *tmp;
1543	struct intel_uncore_type *type;
1544	struct intel_uncore_pmu *pmu;
1545	LIST_HEAD(allocated);
1546	int i;
1547
1548	/* Try to allocate all required boxes */
1549	for (; *types; types++) {
1550		type = *types;
1551		pmu = type->pmus;
1552		for (i = 0; i < type->num_boxes; i++, pmu++) {
1553			if (pmu->boxes[die])
1554				continue;
1555			box = uncore_alloc_box(type, cpu_to_node(cpu));
1556			if (!box)
1557				goto cleanup;
1558			box->pmu = pmu;
1559			box->dieid = die;
1560			list_add(&box->active_list, &allocated);
1561		}
1562	}
1563	/* Install them in the pmus */
1564	list_for_each_entry_safe(box, tmp, &allocated, active_list) {
1565		list_del_init(&box->active_list);
1566		box->pmu->boxes[die] = box;
1567	}
1568	return 0;
1569
1570cleanup:
1571	list_for_each_entry_safe(box, tmp, &allocated, active_list) {
1572		list_del_init(&box->active_list);
1573		kfree(box);
1574	}
1575	return -ENOMEM;
1576}
1577
1578static int uncore_box_ref(struct intel_uncore_type **types,
1579			  int id, unsigned int cpu)
1580{
1581	struct intel_uncore_type *type;
1582	struct intel_uncore_pmu *pmu;
1583	struct intel_uncore_box *box;
1584	int i, ret;
1585
1586	ret = allocate_boxes(types, id, cpu);
1587	if (ret)
1588		return ret;
1589
1590	for (; *types; types++) {
1591		type = *types;
1592		pmu = type->pmus;
1593		for (i = 0; i < type->num_boxes; i++, pmu++) {
1594			box = pmu->boxes[id];
1595			if (box && atomic_inc_return(&box->refcnt) == 1)
1596				uncore_box_init(box);
1597		}
1598	}
1599	return 0;
1600}
1601
1602static int uncore_event_cpu_online(unsigned int cpu)
1603{
1604	int die, target, msr_ret, mmio_ret;
1605
1606	die = topology_logical_die_id(cpu);
1607	msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu);
1608	mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu);
1609	if (msr_ret && mmio_ret)
1610		return -ENOMEM;
1611
1612	/*
1613	 * Check if there is an online cpu in the package
1614	 * which collects uncore events already.
1615	 */
1616	target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu));
1617	if (target < nr_cpu_ids)
1618		return 0;
1619
1620	cpumask_set_cpu(cpu, &uncore_cpu_mask);
1621
1622	if (!msr_ret)
1623		uncore_change_context(uncore_msr_uncores, -1, cpu);
1624	if (!mmio_ret)
1625		uncore_change_context(uncore_mmio_uncores, -1, cpu);
1626	uncore_change_context(uncore_pci_uncores, -1, cpu);
1627	return 0;
1628}
1629
1630static int __init type_pmu_register(struct intel_uncore_type *type)
1631{
1632	int i, ret;
1633
1634	for (i = 0; i < type->num_boxes; i++) {
1635		ret = uncore_pmu_register(&type->pmus[i]);
1636		if (ret)
1637			return ret;
1638	}
1639	return 0;
1640}
1641
1642static int __init uncore_msr_pmus_register(void)
1643{
1644	struct intel_uncore_type **types = uncore_msr_uncores;
1645	int ret;
1646
1647	for (; *types; types++) {
1648		ret = type_pmu_register(*types);
1649		if (ret)
1650			return ret;
1651	}
1652	return 0;
1653}
1654
1655static int __init uncore_cpu_init(void)
1656{
1657	int ret;
1658
1659	ret = uncore_types_init(uncore_msr_uncores, true);
1660	if (ret)
1661		goto err;
1662
1663	ret = uncore_msr_pmus_register();
1664	if (ret)
1665		goto err;
1666	return 0;
1667err:
1668	uncore_types_exit(uncore_msr_uncores);
1669	uncore_msr_uncores = empty_uncore;
1670	return ret;
1671}
1672
1673static int __init uncore_mmio_init(void)
1674{
1675	struct intel_uncore_type **types = uncore_mmio_uncores;
1676	int ret;
1677
1678	ret = uncore_types_init(types, true);
1679	if (ret)
1680		goto err;
1681
1682	for (; *types; types++) {
1683		ret = type_pmu_register(*types);
1684		if (ret)
1685			goto err;
1686	}
1687	return 0;
1688err:
1689	uncore_types_exit(uncore_mmio_uncores);
1690	uncore_mmio_uncores = empty_uncore;
1691	return ret;
1692}
1693
1694struct intel_uncore_init_fun {
1695	void	(*cpu_init)(void);
1696	int	(*pci_init)(void);
1697	void	(*mmio_init)(void);
1698	/* Discovery table is required */
1699	bool	use_discovery;
1700	/* The units in the discovery table should be ignored. */
1701	int	*uncore_units_ignore;
1702};
1703
1704static const struct intel_uncore_init_fun nhm_uncore_init __initconst = {
1705	.cpu_init = nhm_uncore_cpu_init,
1706};
1707
1708static const struct intel_uncore_init_fun snb_uncore_init __initconst = {
1709	.cpu_init = snb_uncore_cpu_init,
1710	.pci_init = snb_uncore_pci_init,
1711};
1712
1713static const struct intel_uncore_init_fun ivb_uncore_init __initconst = {
1714	.cpu_init = snb_uncore_cpu_init,
1715	.pci_init = ivb_uncore_pci_init,
1716};
1717
1718static const struct intel_uncore_init_fun hsw_uncore_init __initconst = {
1719	.cpu_init = snb_uncore_cpu_init,
1720	.pci_init = hsw_uncore_pci_init,
1721};
1722
1723static const struct intel_uncore_init_fun bdw_uncore_init __initconst = {
1724	.cpu_init = snb_uncore_cpu_init,
1725	.pci_init = bdw_uncore_pci_init,
1726};
1727
1728static const struct intel_uncore_init_fun snbep_uncore_init __initconst = {
1729	.cpu_init = snbep_uncore_cpu_init,
1730	.pci_init = snbep_uncore_pci_init,
1731};
1732
1733static const struct intel_uncore_init_fun nhmex_uncore_init __initconst = {
1734	.cpu_init = nhmex_uncore_cpu_init,
1735};
1736
1737static const struct intel_uncore_init_fun ivbep_uncore_init __initconst = {
1738	.cpu_init = ivbep_uncore_cpu_init,
1739	.pci_init = ivbep_uncore_pci_init,
1740};
1741
1742static const struct intel_uncore_init_fun hswep_uncore_init __initconst = {
1743	.cpu_init = hswep_uncore_cpu_init,
1744	.pci_init = hswep_uncore_pci_init,
1745};
1746
1747static const struct intel_uncore_init_fun bdx_uncore_init __initconst = {
1748	.cpu_init = bdx_uncore_cpu_init,
1749	.pci_init = bdx_uncore_pci_init,
1750};
1751
1752static const struct intel_uncore_init_fun knl_uncore_init __initconst = {
1753	.cpu_init = knl_uncore_cpu_init,
1754	.pci_init = knl_uncore_pci_init,
1755};
1756
1757static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
1758	.cpu_init = skl_uncore_cpu_init,
1759	.pci_init = skl_uncore_pci_init,
1760};
1761
1762static const struct intel_uncore_init_fun skx_uncore_init __initconst = {
1763	.cpu_init = skx_uncore_cpu_init,
1764	.pci_init = skx_uncore_pci_init,
1765};
1766
1767static const struct intel_uncore_init_fun icl_uncore_init __initconst = {
1768	.cpu_init = icl_uncore_cpu_init,
1769	.pci_init = skl_uncore_pci_init,
1770};
1771
1772static const struct intel_uncore_init_fun tgl_uncore_init __initconst = {
1773	.cpu_init = tgl_uncore_cpu_init,
1774	.mmio_init = tgl_uncore_mmio_init,
1775};
1776
1777static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = {
1778	.cpu_init = tgl_uncore_cpu_init,
1779	.mmio_init = tgl_l_uncore_mmio_init,
1780};
1781
1782static const struct intel_uncore_init_fun rkl_uncore_init __initconst = {
1783	.cpu_init = tgl_uncore_cpu_init,
1784	.pci_init = skl_uncore_pci_init,
1785};
1786
1787static const struct intel_uncore_init_fun adl_uncore_init __initconst = {
1788	.cpu_init = adl_uncore_cpu_init,
1789	.mmio_init = adl_uncore_mmio_init,
1790};
1791
1792static const struct intel_uncore_init_fun mtl_uncore_init __initconst = {
1793	.cpu_init = mtl_uncore_cpu_init,
1794	.mmio_init = adl_uncore_mmio_init,
1795};
1796
1797static const struct intel_uncore_init_fun icx_uncore_init __initconst = {
1798	.cpu_init = icx_uncore_cpu_init,
1799	.pci_init = icx_uncore_pci_init,
1800	.mmio_init = icx_uncore_mmio_init,
1801};
1802
1803static const struct intel_uncore_init_fun snr_uncore_init __initconst = {
1804	.cpu_init = snr_uncore_cpu_init,
1805	.pci_init = snr_uncore_pci_init,
1806	.mmio_init = snr_uncore_mmio_init,
1807};
1808
1809static const struct intel_uncore_init_fun spr_uncore_init __initconst = {
1810	.cpu_init = spr_uncore_cpu_init,
1811	.pci_init = spr_uncore_pci_init,
1812	.mmio_init = spr_uncore_mmio_init,
1813	.use_discovery = true,
1814	.uncore_units_ignore = spr_uncore_units_ignore,
1815};
1816
1817static const struct intel_uncore_init_fun gnr_uncore_init __initconst = {
1818	.cpu_init = gnr_uncore_cpu_init,
1819	.pci_init = gnr_uncore_pci_init,
1820	.mmio_init = gnr_uncore_mmio_init,
1821	.use_discovery = true,
1822	.uncore_units_ignore = gnr_uncore_units_ignore,
1823};
1824
1825static const struct intel_uncore_init_fun generic_uncore_init __initconst = {
1826	.cpu_init = intel_uncore_generic_uncore_cpu_init,
1827	.pci_init = intel_uncore_generic_uncore_pci_init,
1828	.mmio_init = intel_uncore_generic_uncore_mmio_init,
1829};
1830
1831static const struct x86_cpu_id intel_uncore_match[] __initconst = {
1832	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP,		&nhm_uncore_init),
1833	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM,		&nhm_uncore_init),
1834	X86_MATCH_INTEL_FAM6_MODEL(WESTMERE,		&nhm_uncore_init),
1835	X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP,		&nhm_uncore_init),
1836	X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE,		&snb_uncore_init),
1837	X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE,		&ivb_uncore_init),
1838	X86_MATCH_INTEL_FAM6_MODEL(HASWELL,		&hsw_uncore_init),
1839	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L,		&hsw_uncore_init),
1840	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G,		&hsw_uncore_init),
1841	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL,		&bdw_uncore_init),
1842	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G,		&bdw_uncore_init),
1843	X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X,	&snbep_uncore_init),
1844	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX,		&nhmex_uncore_init),
1845	X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX,		&nhmex_uncore_init),
1846	X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X,		&ivbep_uncore_init),
1847	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X,		&hswep_uncore_init),
1848	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X,		&bdx_uncore_init),
1849	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D,		&bdx_uncore_init),
1850	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,	&knl_uncore_init),
1851	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM,	&knl_uncore_init),
1852	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE,		&skl_uncore_init),
1853	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L,		&skl_uncore_init),
1854	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,		&skx_uncore_init),
1855	X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L,		&skl_uncore_init),
1856	X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE,		&skl_uncore_init),
1857	X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L,		&skl_uncore_init),
1858	X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE,		&skl_uncore_init),
1859	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L,		&icl_uncore_init),
1860	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI,	&icl_uncore_init),
1861	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE,		&icl_uncore_init),
1862	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,		&icx_uncore_init),
1863	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,		&icx_uncore_init),
1864	X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L,		&tgl_l_uncore_init),
1865	X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE,		&tgl_uncore_init),
1866	X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE,		&rkl_uncore_init),
1867	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,		&adl_uncore_init),
1868	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,		&adl_uncore_init),
1869	X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE,		&adl_uncore_init),
1870	X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P,	&adl_uncore_init),
1871	X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S,	&adl_uncore_init),
1872	X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE,		&mtl_uncore_init),
1873	X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L,	&mtl_uncore_init),
1874	X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,	&spr_uncore_init),
1875	X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X,	&spr_uncore_init),
1876	X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_X,	&gnr_uncore_init),
1877	X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_D,	&gnr_uncore_init),
1878	X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,	&snr_uncore_init),
1879	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT,	&adl_uncore_init),
1880	X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT_X,	&gnr_uncore_init),
1881	X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT,	&gnr_uncore_init),
1882	{},
1883};
1884MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match);
1885
1886static int __init intel_uncore_init(void)
1887{
1888	const struct x86_cpu_id *id;
1889	struct intel_uncore_init_fun *uncore_init;
1890	int pret = 0, cret = 0, mret = 0, ret;
1891
1892	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
1893		return -ENODEV;
1894
1895	__uncore_max_dies =
1896		topology_max_packages() * topology_max_dies_per_package();
1897
1898	id = x86_match_cpu(intel_uncore_match);
1899	if (!id) {
1900		if (!uncore_no_discover && intel_uncore_has_discovery_tables(NULL))
1901			uncore_init = (struct intel_uncore_init_fun *)&generic_uncore_init;
1902		else
1903			return -ENODEV;
1904	} else {
1905		uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
1906		if (uncore_no_discover && uncore_init->use_discovery)
1907			return -ENODEV;
1908		if (uncore_init->use_discovery &&
1909		    !intel_uncore_has_discovery_tables(uncore_init->uncore_units_ignore))
1910			return -ENODEV;
1911	}
1912
1913	if (uncore_init->pci_init) {
1914		pret = uncore_init->pci_init();
1915		if (!pret)
1916			pret = uncore_pci_init();
1917	}
1918
1919	if (uncore_init->cpu_init) {
1920		uncore_init->cpu_init();
1921		cret = uncore_cpu_init();
1922	}
1923
1924	if (uncore_init->mmio_init) {
1925		uncore_init->mmio_init();
1926		mret = uncore_mmio_init();
1927	}
1928
1929	if (cret && pret && mret) {
1930		ret = -ENODEV;
1931		goto free_discovery;
1932	}
1933
1934	/* Install hotplug callbacks to setup the targets for each package */
1935	ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE,
1936				"perf/x86/intel/uncore:online",
1937				uncore_event_cpu_online,
1938				uncore_event_cpu_offline);
1939	if (ret)
1940		goto err;
1941	return 0;
1942
1943err:
1944	uncore_types_exit(uncore_msr_uncores);
1945	uncore_types_exit(uncore_mmio_uncores);
1946	uncore_pci_exit();
1947free_discovery:
1948	intel_uncore_clear_discovery_tables();
1949	return ret;
1950}
1951module_init(intel_uncore_init);
1952
1953static void __exit intel_uncore_exit(void)
1954{
1955	cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE);
1956	uncore_types_exit(uncore_msr_uncores);
1957	uncore_types_exit(uncore_mmio_uncores);
1958	uncore_pci_exit();
1959	intel_uncore_clear_discovery_tables();
1960}
1961module_exit(intel_uncore_exit);