Linux Audio

Check our new training course

Loading...
Note: File does not exist in v4.6.
   1/*
   2 * Copyright © 2017 Intel Corporation
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 *
  23 */
  24
  25#include <linux/perf_event.h>
  26#include <linux/pm_runtime.h>
  27
  28#include "i915_drv.h"
  29#include "i915_pmu.h"
  30#include "intel_ringbuffer.h"
  31
  32/* Frequency for the sampling timer for events which need it. */
  33#define FREQUENCY 200
  34#define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
  35
  36#define ENGINE_SAMPLE_MASK \
  37	(BIT(I915_SAMPLE_BUSY) | \
  38	 BIT(I915_SAMPLE_WAIT) | \
  39	 BIT(I915_SAMPLE_SEMA))
  40
  41#define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
  42
  43static cpumask_t i915_pmu_cpumask;
  44
  45static u8 engine_config_sample(u64 config)
  46{
  47	return config & I915_PMU_SAMPLE_MASK;
  48}
  49
  50static u8 engine_event_sample(struct perf_event *event)
  51{
  52	return engine_config_sample(event->attr.config);
  53}
  54
  55static u8 engine_event_class(struct perf_event *event)
  56{
  57	return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff;
  58}
  59
  60static u8 engine_event_instance(struct perf_event *event)
  61{
  62	return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff;
  63}
  64
  65static bool is_engine_config(u64 config)
  66{
  67	return config < __I915_PMU_OTHER(0);
  68}
  69
  70static unsigned int config_enabled_bit(u64 config)
  71{
  72	if (is_engine_config(config))
  73		return engine_config_sample(config);
  74	else
  75		return ENGINE_SAMPLE_BITS + (config - __I915_PMU_OTHER(0));
  76}
  77
  78static u64 config_enabled_mask(u64 config)
  79{
  80	return BIT_ULL(config_enabled_bit(config));
  81}
  82
  83static bool is_engine_event(struct perf_event *event)
  84{
  85	return is_engine_config(event->attr.config);
  86}
  87
  88static unsigned int event_enabled_bit(struct perf_event *event)
  89{
  90	return config_enabled_bit(event->attr.config);
  91}
  92
  93static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
  94{
  95	u64 enable;
  96
  97	/*
  98	 * Only some counters need the sampling timer.
  99	 *
 100	 * We start with a bitmask of all currently enabled events.
 101	 */
 102	enable = i915->pmu.enable;
 103
 104	/*
 105	 * Mask out all the ones which do not need the timer, or in
 106	 * other words keep all the ones that could need the timer.
 107	 */
 108	enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
 109		  config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) |
 110		  ENGINE_SAMPLE_MASK;
 111
 112	/*
 113	 * When the GPU is idle per-engine counters do not need to be
 114	 * running so clear those bits out.
 115	 */
 116	if (!gpu_active)
 117		enable &= ~ENGINE_SAMPLE_MASK;
 118	/*
 119	 * Also there is software busyness tracking available we do not
 120	 * need the timer for I915_SAMPLE_BUSY counter.
 121	 *
 122	 * Use RCS as proxy for all engines.
 123	 */
 124	else if (intel_engine_supports_stats(i915->engine[RCS]))
 125		enable &= ~BIT(I915_SAMPLE_BUSY);
 126
 127	/*
 128	 * If some bits remain it means we need the sampling timer running.
 129	 */
 130	return enable;
 131}
 132
 133void i915_pmu_gt_parked(struct drm_i915_private *i915)
 134{
 135	if (!i915->pmu.base.event_init)
 136		return;
 137
 138	spin_lock_irq(&i915->pmu.lock);
 139	/*
 140	 * Signal sampling timer to stop if only engine events are enabled and
 141	 * GPU went idle.
 142	 */
 143	i915->pmu.timer_enabled = pmu_needs_timer(i915, false);
 144	spin_unlock_irq(&i915->pmu.lock);
 145}
 146
 147static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915)
 148{
 149	if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) {
 150		i915->pmu.timer_enabled = true;
 151		hrtimer_start_range_ns(&i915->pmu.timer,
 152				       ns_to_ktime(PERIOD), 0,
 153				       HRTIMER_MODE_REL_PINNED);
 154	}
 155}
 156
 157void i915_pmu_gt_unparked(struct drm_i915_private *i915)
 158{
 159	if (!i915->pmu.base.event_init)
 160		return;
 161
 162	spin_lock_irq(&i915->pmu.lock);
 163	/*
 164	 * Re-enable sampling timer when GPU goes active.
 165	 */
 166	__i915_pmu_maybe_start_timer(i915);
 167	spin_unlock_irq(&i915->pmu.lock);
 168}
 169
 170static bool grab_forcewake(struct drm_i915_private *i915, bool fw)
 171{
 172	if (!fw)
 173		intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
 174
 175	return true;
 176}
 177
 178static void
 179update_sample(struct i915_pmu_sample *sample, u32 unit, u32 val)
 180{
 181	sample->cur += mul_u32_u32(val, unit);
 182}
 183
 184static void engines_sample(struct drm_i915_private *dev_priv)
 185{
 186	struct intel_engine_cs *engine;
 187	enum intel_engine_id id;
 188	bool fw = false;
 189
 190	if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
 191		return;
 192
 193	if (!dev_priv->gt.awake)
 194		return;
 195
 196	if (!intel_runtime_pm_get_if_in_use(dev_priv))
 197		return;
 198
 199	for_each_engine(engine, dev_priv, id) {
 200		u32 current_seqno = intel_engine_get_seqno(engine);
 201		u32 last_seqno = intel_engine_last_submit(engine);
 202		u32 val;
 203
 204		val = !i915_seqno_passed(current_seqno, last_seqno);
 205
 206		update_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
 207			      PERIOD, val);
 208
 209		if (val && (engine->pmu.enable &
 210		    (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
 211			fw = grab_forcewake(dev_priv, fw);
 212
 213			val = I915_READ_FW(RING_CTL(engine->mmio_base));
 214		} else {
 215			val = 0;
 216		}
 217
 218		update_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
 219			      PERIOD, !!(val & RING_WAIT));
 220
 221		update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
 222			      PERIOD, !!(val & RING_WAIT_SEMAPHORE));
 223	}
 224
 225	if (fw)
 226		intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 227
 228	intel_runtime_pm_put(dev_priv);
 229}
 230
 231static void frequency_sample(struct drm_i915_private *dev_priv)
 232{
 233	if (dev_priv->pmu.enable &
 234	    config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
 235		u32 val;
 236
 237		val = dev_priv->gt_pm.rps.cur_freq;
 238		if (dev_priv->gt.awake &&
 239		    intel_runtime_pm_get_if_in_use(dev_priv)) {
 240			val = intel_get_cagf(dev_priv,
 241					     I915_READ_NOTRACE(GEN6_RPSTAT1));
 242			intel_runtime_pm_put(dev_priv);
 243		}
 244
 245		update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
 246			      1, intel_gpu_freq(dev_priv, val));
 247	}
 248
 249	if (dev_priv->pmu.enable &
 250	    config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
 251		update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ], 1,
 252			      intel_gpu_freq(dev_priv,
 253					     dev_priv->gt_pm.rps.cur_freq));
 254	}
 255}
 256
 257static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
 258{
 259	struct drm_i915_private *i915 =
 260		container_of(hrtimer, struct drm_i915_private, pmu.timer);
 261
 262	if (!READ_ONCE(i915->pmu.timer_enabled))
 263		return HRTIMER_NORESTART;
 264
 265	engines_sample(i915);
 266	frequency_sample(i915);
 267
 268	hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD));
 269	return HRTIMER_RESTART;
 270}
 271
 272static u64 count_interrupts(struct drm_i915_private *i915)
 273{
 274	/* open-coded kstat_irqs() */
 275	struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq);
 276	u64 sum = 0;
 277	int cpu;
 278
 279	if (!desc || !desc->kstat_irqs)
 280		return 0;
 281
 282	for_each_possible_cpu(cpu)
 283		sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
 284
 285	return sum;
 286}
 287
 288static void engine_event_destroy(struct perf_event *event)
 289{
 290	struct drm_i915_private *i915 =
 291		container_of(event->pmu, typeof(*i915), pmu.base);
 292	struct intel_engine_cs *engine;
 293
 294	engine = intel_engine_lookup_user(i915,
 295					  engine_event_class(event),
 296					  engine_event_instance(event));
 297	if (WARN_ON_ONCE(!engine))
 298		return;
 299
 300	if (engine_event_sample(event) == I915_SAMPLE_BUSY &&
 301	    intel_engine_supports_stats(engine))
 302		intel_disable_engine_stats(engine);
 303}
 304
 305static void i915_pmu_event_destroy(struct perf_event *event)
 306{
 307	WARN_ON(event->parent);
 308
 309	if (is_engine_event(event))
 310		engine_event_destroy(event);
 311}
 312
 313static int
 314engine_event_status(struct intel_engine_cs *engine,
 315		    enum drm_i915_pmu_engine_sample sample)
 316{
 317	switch (sample) {
 318	case I915_SAMPLE_BUSY:
 319	case I915_SAMPLE_WAIT:
 320		break;
 321	case I915_SAMPLE_SEMA:
 322		if (INTEL_GEN(engine->i915) < 6)
 323			return -ENODEV;
 324		break;
 325	default:
 326		return -ENOENT;
 327	}
 328
 329	return 0;
 330}
 331
 332static int
 333config_status(struct drm_i915_private *i915, u64 config)
 334{
 335	switch (config) {
 336	case I915_PMU_ACTUAL_FREQUENCY:
 337		if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
 338			/* Requires a mutex for sampling! */
 339			return -ENODEV;
 340		/* Fall-through. */
 341	case I915_PMU_REQUESTED_FREQUENCY:
 342		if (INTEL_GEN(i915) < 6)
 343			return -ENODEV;
 344		break;
 345	case I915_PMU_INTERRUPTS:
 346		break;
 347	case I915_PMU_RC6_RESIDENCY:
 348		if (!HAS_RC6(i915))
 349			return -ENODEV;
 350		break;
 351	default:
 352		return -ENOENT;
 353	}
 354
 355	return 0;
 356}
 357
 358static int engine_event_init(struct perf_event *event)
 359{
 360	struct drm_i915_private *i915 =
 361		container_of(event->pmu, typeof(*i915), pmu.base);
 362	struct intel_engine_cs *engine;
 363	u8 sample;
 364	int ret;
 365
 366	engine = intel_engine_lookup_user(i915, engine_event_class(event),
 367					  engine_event_instance(event));
 368	if (!engine)
 369		return -ENODEV;
 370
 371	sample = engine_event_sample(event);
 372	ret = engine_event_status(engine, sample);
 373	if (ret)
 374		return ret;
 375
 376	if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine))
 377		ret = intel_enable_engine_stats(engine);
 378
 379	return ret;
 380}
 381
 382static int i915_pmu_event_init(struct perf_event *event)
 383{
 384	struct drm_i915_private *i915 =
 385		container_of(event->pmu, typeof(*i915), pmu.base);
 386	int ret;
 387
 388	if (event->attr.type != event->pmu->type)
 389		return -ENOENT;
 390
 391	/* unsupported modes and filters */
 392	if (event->attr.sample_period) /* no sampling */
 393		return -EINVAL;
 394
 395	if (has_branch_stack(event))
 396		return -EOPNOTSUPP;
 397
 398	if (event->cpu < 0)
 399		return -EINVAL;
 400
 401	/* only allow running on one cpu at a time */
 402	if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask))
 403		return -EINVAL;
 404
 405	if (is_engine_event(event))
 406		ret = engine_event_init(event);
 407	else
 408		ret = config_status(i915, event->attr.config);
 409	if (ret)
 410		return ret;
 411
 412	if (!event->parent)
 413		event->destroy = i915_pmu_event_destroy;
 414
 415	return 0;
 416}
 417
 418static u64 __get_rc6(struct drm_i915_private *i915)
 419{
 420	u64 val;
 421
 422	val = intel_rc6_residency_ns(i915,
 423				     IS_VALLEYVIEW(i915) ?
 424				     VLV_GT_RENDER_RC6 :
 425				     GEN6_GT_GFX_RC6);
 426
 427	if (HAS_RC6p(i915))
 428		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
 429
 430	if (HAS_RC6pp(i915))
 431		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
 432
 433	return val;
 434}
 435
 436static u64 get_rc6(struct drm_i915_private *i915)
 437{
 438#if IS_ENABLED(CONFIG_PM)
 439	unsigned long flags;
 440	u64 val;
 441
 442	if (intel_runtime_pm_get_if_in_use(i915)) {
 443		val = __get_rc6(i915);
 444		intel_runtime_pm_put(i915);
 445
 446		/*
 447		 * If we are coming back from being runtime suspended we must
 448		 * be careful not to report a larger value than returned
 449		 * previously.
 450		 */
 451
 452		spin_lock_irqsave(&i915->pmu.lock, flags);
 453
 454		if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
 455			i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
 456			i915->pmu.sample[__I915_SAMPLE_RC6].cur = val;
 457		} else {
 458			val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
 459		}
 460
 461		spin_unlock_irqrestore(&i915->pmu.lock, flags);
 462	} else {
 463		struct pci_dev *pdev = i915->drm.pdev;
 464		struct device *kdev = &pdev->dev;
 465
 466		/*
 467		 * We are runtime suspended.
 468		 *
 469		 * Report the delta from when the device was suspended to now,
 470		 * on top of the last known real value, as the approximated RC6
 471		 * counter value.
 472		 */
 473		spin_lock_irqsave(&i915->pmu.lock, flags);
 474		spin_lock(&kdev->power.lock);
 475
 476		/*
 477		 * After the above branch intel_runtime_pm_get_if_in_use failed
 478		 * to get the runtime PM reference we cannot assume we are in
 479		 * runtime suspend since we can either: a) race with coming out
 480		 * of it before we took the power.lock, or b) there are other
 481		 * states than suspended which can bring us here.
 482		 *
 483		 * We need to double-check that we are indeed currently runtime
 484		 * suspended and if not we cannot do better than report the last
 485		 * known RC6 value.
 486		 */
 487		if (kdev->power.runtime_status == RPM_SUSPENDED) {
 488			if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
 489				i915->pmu.suspended_jiffies_last =
 490						  kdev->power.suspended_jiffies;
 491
 492			val = kdev->power.suspended_jiffies -
 493			      i915->pmu.suspended_jiffies_last;
 494			val += jiffies - kdev->power.accounting_timestamp;
 495
 496			val = jiffies_to_nsecs(val);
 497			val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
 498
 499			i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
 500		} else if (i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
 501			val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
 502		} else {
 503			val = i915->pmu.sample[__I915_SAMPLE_RC6].cur;
 504		}
 505
 506		spin_unlock(&kdev->power.lock);
 507		spin_unlock_irqrestore(&i915->pmu.lock, flags);
 508	}
 509
 510	return val;
 511#else
 512	return __get_rc6(i915);
 513#endif
 514}
 515
 516static u64 __i915_pmu_event_read(struct perf_event *event)
 517{
 518	struct drm_i915_private *i915 =
 519		container_of(event->pmu, typeof(*i915), pmu.base);
 520	u64 val = 0;
 521
 522	if (is_engine_event(event)) {
 523		u8 sample = engine_event_sample(event);
 524		struct intel_engine_cs *engine;
 525
 526		engine = intel_engine_lookup_user(i915,
 527						  engine_event_class(event),
 528						  engine_event_instance(event));
 529
 530		if (WARN_ON_ONCE(!engine)) {
 531			/* Do nothing */
 532		} else if (sample == I915_SAMPLE_BUSY &&
 533			   intel_engine_supports_stats(engine)) {
 534			val = ktime_to_ns(intel_engine_get_busy_time(engine));
 535		} else {
 536			val = engine->pmu.sample[sample].cur;
 537		}
 538	} else {
 539		switch (event->attr.config) {
 540		case I915_PMU_ACTUAL_FREQUENCY:
 541			val =
 542			   div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_ACT].cur,
 543				   FREQUENCY);
 544			break;
 545		case I915_PMU_REQUESTED_FREQUENCY:
 546			val =
 547			   div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur,
 548				   FREQUENCY);
 549			break;
 550		case I915_PMU_INTERRUPTS:
 551			val = count_interrupts(i915);
 552			break;
 553		case I915_PMU_RC6_RESIDENCY:
 554			val = get_rc6(i915);
 555			break;
 556		}
 557	}
 558
 559	return val;
 560}
 561
 562static void i915_pmu_event_read(struct perf_event *event)
 563{
 564	struct hw_perf_event *hwc = &event->hw;
 565	u64 prev, new;
 566
 567again:
 568	prev = local64_read(&hwc->prev_count);
 569	new = __i915_pmu_event_read(event);
 570
 571	if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
 572		goto again;
 573
 574	local64_add(new - prev, &event->count);
 575}
 576
 577static void i915_pmu_enable(struct perf_event *event)
 578{
 579	struct drm_i915_private *i915 =
 580		container_of(event->pmu, typeof(*i915), pmu.base);
 581	unsigned int bit = event_enabled_bit(event);
 582	unsigned long flags;
 583
 584	spin_lock_irqsave(&i915->pmu.lock, flags);
 585
 586	/*
 587	 * Update the bitmask of enabled events and increment
 588	 * the event reference counter.
 589	 */
 590	GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
 591	GEM_BUG_ON(i915->pmu.enable_count[bit] == ~0);
 592	i915->pmu.enable |= BIT_ULL(bit);
 593	i915->pmu.enable_count[bit]++;
 594
 595	/*
 596	 * Start the sampling timer if needed and not already enabled.
 597	 */
 598	__i915_pmu_maybe_start_timer(i915);
 599
 600	/*
 601	 * For per-engine events the bitmask and reference counting
 602	 * is stored per engine.
 603	 */
 604	if (is_engine_event(event)) {
 605		u8 sample = engine_event_sample(event);
 606		struct intel_engine_cs *engine;
 607
 608		engine = intel_engine_lookup_user(i915,
 609						  engine_event_class(event),
 610						  engine_event_instance(event));
 611		GEM_BUG_ON(!engine);
 612		engine->pmu.enable |= BIT(sample);
 613
 614		GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
 615		GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
 616		engine->pmu.enable_count[sample]++;
 617	}
 618
 619	spin_unlock_irqrestore(&i915->pmu.lock, flags);
 620
 621	/*
 622	 * Store the current counter value so we can report the correct delta
 623	 * for all listeners. Even when the event was already enabled and has
 624	 * an existing non-zero value.
 625	 */
 626	local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
 627}
 628
 629static void i915_pmu_disable(struct perf_event *event)
 630{
 631	struct drm_i915_private *i915 =
 632		container_of(event->pmu, typeof(*i915), pmu.base);
 633	unsigned int bit = event_enabled_bit(event);
 634	unsigned long flags;
 635
 636	spin_lock_irqsave(&i915->pmu.lock, flags);
 637
 638	if (is_engine_event(event)) {
 639		u8 sample = engine_event_sample(event);
 640		struct intel_engine_cs *engine;
 641
 642		engine = intel_engine_lookup_user(i915,
 643						  engine_event_class(event),
 644						  engine_event_instance(event));
 645		GEM_BUG_ON(!engine);
 646		GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
 647		GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
 648		/*
 649		 * Decrement the reference count and clear the enabled
 650		 * bitmask when the last listener on an event goes away.
 651		 */
 652		if (--engine->pmu.enable_count[sample] == 0)
 653			engine->pmu.enable &= ~BIT(sample);
 654	}
 655
 656	GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
 657	GEM_BUG_ON(i915->pmu.enable_count[bit] == 0);
 658	/*
 659	 * Decrement the reference count and clear the enabled
 660	 * bitmask when the last listener on an event goes away.
 661	 */
 662	if (--i915->pmu.enable_count[bit] == 0) {
 663		i915->pmu.enable &= ~BIT_ULL(bit);
 664		i915->pmu.timer_enabled &= pmu_needs_timer(i915, true);
 665	}
 666
 667	spin_unlock_irqrestore(&i915->pmu.lock, flags);
 668}
 669
 670static void i915_pmu_event_start(struct perf_event *event, int flags)
 671{
 672	i915_pmu_enable(event);
 673	event->hw.state = 0;
 674}
 675
 676static void i915_pmu_event_stop(struct perf_event *event, int flags)
 677{
 678	if (flags & PERF_EF_UPDATE)
 679		i915_pmu_event_read(event);
 680	i915_pmu_disable(event);
 681	event->hw.state = PERF_HES_STOPPED;
 682}
 683
 684static int i915_pmu_event_add(struct perf_event *event, int flags)
 685{
 686	if (flags & PERF_EF_START)
 687		i915_pmu_event_start(event, flags);
 688
 689	return 0;
 690}
 691
 692static void i915_pmu_event_del(struct perf_event *event, int flags)
 693{
 694	i915_pmu_event_stop(event, PERF_EF_UPDATE);
 695}
 696
 697static int i915_pmu_event_event_idx(struct perf_event *event)
 698{
 699	return 0;
 700}
 701
 702struct i915_str_attribute {
 703	struct device_attribute attr;
 704	const char *str;
 705};
 706
 707static ssize_t i915_pmu_format_show(struct device *dev,
 708				    struct device_attribute *attr, char *buf)
 709{
 710	struct i915_str_attribute *eattr;
 711
 712	eattr = container_of(attr, struct i915_str_attribute, attr);
 713	return sprintf(buf, "%s\n", eattr->str);
 714}
 715
 716#define I915_PMU_FORMAT_ATTR(_name, _config) \
 717	(&((struct i915_str_attribute[]) { \
 718		{ .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \
 719		  .str = _config, } \
 720	})[0].attr.attr)
 721
 722static struct attribute *i915_pmu_format_attrs[] = {
 723	I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"),
 724	NULL,
 725};
 726
 727static const struct attribute_group i915_pmu_format_attr_group = {
 728	.name = "format",
 729	.attrs = i915_pmu_format_attrs,
 730};
 731
 732struct i915_ext_attribute {
 733	struct device_attribute attr;
 734	unsigned long val;
 735};
 736
 737static ssize_t i915_pmu_event_show(struct device *dev,
 738				   struct device_attribute *attr, char *buf)
 739{
 740	struct i915_ext_attribute *eattr;
 741
 742	eattr = container_of(attr, struct i915_ext_attribute, attr);
 743	return sprintf(buf, "config=0x%lx\n", eattr->val);
 744}
 745
 746static struct attribute_group i915_pmu_events_attr_group = {
 747	.name = "events",
 748	/* Patch in attrs at runtime. */
 749};
 750
 751static ssize_t
 752i915_pmu_get_attr_cpumask(struct device *dev,
 753			  struct device_attribute *attr,
 754			  char *buf)
 755{
 756	return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask);
 757}
 758
 759static DEVICE_ATTR(cpumask, 0444, i915_pmu_get_attr_cpumask, NULL);
 760
 761static struct attribute *i915_cpumask_attrs[] = {
 762	&dev_attr_cpumask.attr,
 763	NULL,
 764};
 765
 766static const struct attribute_group i915_pmu_cpumask_attr_group = {
 767	.attrs = i915_cpumask_attrs,
 768};
 769
 770static const struct attribute_group *i915_pmu_attr_groups[] = {
 771	&i915_pmu_format_attr_group,
 772	&i915_pmu_events_attr_group,
 773	&i915_pmu_cpumask_attr_group,
 774	NULL
 775};
 776
 777#define __event(__config, __name, __unit) \
 778{ \
 779	.config = (__config), \
 780	.name = (__name), \
 781	.unit = (__unit), \
 782}
 783
 784#define __engine_event(__sample, __name) \
 785{ \
 786	.sample = (__sample), \
 787	.name = (__name), \
 788}
 789
 790static struct i915_ext_attribute *
 791add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config)
 792{
 793	sysfs_attr_init(&attr->attr.attr);
 794	attr->attr.attr.name = name;
 795	attr->attr.attr.mode = 0444;
 796	attr->attr.show = i915_pmu_event_show;
 797	attr->val = config;
 798
 799	return ++attr;
 800}
 801
 802static struct perf_pmu_events_attr *
 803add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
 804	     const char *str)
 805{
 806	sysfs_attr_init(&attr->attr.attr);
 807	attr->attr.attr.name = name;
 808	attr->attr.attr.mode = 0444;
 809	attr->attr.show = perf_event_sysfs_show;
 810	attr->event_str = str;
 811
 812	return ++attr;
 813}
 814
 815static struct attribute **
 816create_event_attributes(struct drm_i915_private *i915)
 817{
 818	static const struct {
 819		u64 config;
 820		const char *name;
 821		const char *unit;
 822	} events[] = {
 823		__event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "MHz"),
 824		__event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"),
 825		__event(I915_PMU_INTERRUPTS, "interrupts", NULL),
 826		__event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
 827	};
 828	static const struct {
 829		enum drm_i915_pmu_engine_sample sample;
 830		char *name;
 831	} engine_events[] = {
 832		__engine_event(I915_SAMPLE_BUSY, "busy"),
 833		__engine_event(I915_SAMPLE_SEMA, "sema"),
 834		__engine_event(I915_SAMPLE_WAIT, "wait"),
 835	};
 836	unsigned int count = 0;
 837	struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
 838	struct i915_ext_attribute *i915_attr = NULL, *i915_iter;
 839	struct attribute **attr = NULL, **attr_iter;
 840	struct intel_engine_cs *engine;
 841	enum intel_engine_id id;
 842	unsigned int i;
 843
 844	/* Count how many counters we will be exposing. */
 845	for (i = 0; i < ARRAY_SIZE(events); i++) {
 846		if (!config_status(i915, events[i].config))
 847			count++;
 848	}
 849
 850	for_each_engine(engine, i915, id) {
 851		for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
 852			if (!engine_event_status(engine,
 853						 engine_events[i].sample))
 854				count++;
 855		}
 856	}
 857
 858	/* Allocate attribute objects and table. */
 859	i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL);
 860	if (!i915_attr)
 861		goto err_alloc;
 862
 863	pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL);
 864	if (!pmu_attr)
 865		goto err_alloc;
 866
 867	/* Max one pointer of each attribute type plus a termination entry. */
 868	attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL);
 869	if (!attr)
 870		goto err_alloc;
 871
 872	i915_iter = i915_attr;
 873	pmu_iter = pmu_attr;
 874	attr_iter = attr;
 875
 876	/* Initialize supported non-engine counters. */
 877	for (i = 0; i < ARRAY_SIZE(events); i++) {
 878		char *str;
 879
 880		if (config_status(i915, events[i].config))
 881			continue;
 882
 883		str = kstrdup(events[i].name, GFP_KERNEL);
 884		if (!str)
 885			goto err;
 886
 887		*attr_iter++ = &i915_iter->attr.attr;
 888		i915_iter = add_i915_attr(i915_iter, str, events[i].config);
 889
 890		if (events[i].unit) {
 891			str = kasprintf(GFP_KERNEL, "%s.unit", events[i].name);
 892			if (!str)
 893				goto err;
 894
 895			*attr_iter++ = &pmu_iter->attr.attr;
 896			pmu_iter = add_pmu_attr(pmu_iter, str, events[i].unit);
 897		}
 898	}
 899
 900	/* Initialize supported engine counters. */
 901	for_each_engine(engine, i915, id) {
 902		for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
 903			char *str;
 904
 905			if (engine_event_status(engine,
 906						engine_events[i].sample))
 907				continue;
 908
 909			str = kasprintf(GFP_KERNEL, "%s-%s",
 910					engine->name, engine_events[i].name);
 911			if (!str)
 912				goto err;
 913
 914			*attr_iter++ = &i915_iter->attr.attr;
 915			i915_iter =
 916				add_i915_attr(i915_iter, str,
 917					      __I915_PMU_ENGINE(engine->uabi_class,
 918								engine->instance,
 919								engine_events[i].sample));
 920
 921			str = kasprintf(GFP_KERNEL, "%s-%s.unit",
 922					engine->name, engine_events[i].name);
 923			if (!str)
 924				goto err;
 925
 926			*attr_iter++ = &pmu_iter->attr.attr;
 927			pmu_iter = add_pmu_attr(pmu_iter, str, "ns");
 928		}
 929	}
 930
 931	i915->pmu.i915_attr = i915_attr;
 932	i915->pmu.pmu_attr = pmu_attr;
 933
 934	return attr;
 935
 936err:;
 937	for (attr_iter = attr; *attr_iter; attr_iter++)
 938		kfree((*attr_iter)->name);
 939
 940err_alloc:
 941	kfree(attr);
 942	kfree(i915_attr);
 943	kfree(pmu_attr);
 944
 945	return NULL;
 946}
 947
 948static void free_event_attributes(struct drm_i915_private *i915)
 949{
 950	struct attribute **attr_iter = i915_pmu_events_attr_group.attrs;
 951
 952	for (; *attr_iter; attr_iter++)
 953		kfree((*attr_iter)->name);
 954
 955	kfree(i915_pmu_events_attr_group.attrs);
 956	kfree(i915->pmu.i915_attr);
 957	kfree(i915->pmu.pmu_attr);
 958
 959	i915_pmu_events_attr_group.attrs = NULL;
 960	i915->pmu.i915_attr = NULL;
 961	i915->pmu.pmu_attr = NULL;
 962}
 963
 964static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
 965{
 966	struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
 967
 968	GEM_BUG_ON(!pmu->base.event_init);
 969
 970	/* Select the first online CPU as a designated reader. */
 971	if (!cpumask_weight(&i915_pmu_cpumask))
 972		cpumask_set_cpu(cpu, &i915_pmu_cpumask);
 973
 974	return 0;
 975}
 976
 977static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
 978{
 979	struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
 980	unsigned int target;
 981
 982	GEM_BUG_ON(!pmu->base.event_init);
 983
 984	if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
 985		target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
 986		/* Migrate events if there is a valid target */
 987		if (target < nr_cpu_ids) {
 988			cpumask_set_cpu(target, &i915_pmu_cpumask);
 989			perf_pmu_migrate_context(&pmu->base, cpu, target);
 990		}
 991	}
 992
 993	return 0;
 994}
 995
 996static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
 997
 998static int i915_pmu_register_cpuhp_state(struct drm_i915_private *i915)
 999{
1000	enum cpuhp_state slot;
1001	int ret;
1002
1003	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
1004				      "perf/x86/intel/i915:online",
1005				      i915_pmu_cpu_online,
1006				      i915_pmu_cpu_offline);
1007	if (ret < 0)
1008		return ret;
1009
1010	slot = ret;
1011	ret = cpuhp_state_add_instance(slot, &i915->pmu.node);
1012	if (ret) {
1013		cpuhp_remove_multi_state(slot);
1014		return ret;
1015	}
1016
1017	cpuhp_slot = slot;
1018	return 0;
1019}
1020
1021static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915)
1022{
1023	WARN_ON(cpuhp_slot == CPUHP_INVALID);
1024	WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &i915->pmu.node));
1025	cpuhp_remove_multi_state(cpuhp_slot);
1026}
1027
1028void i915_pmu_register(struct drm_i915_private *i915)
1029{
1030	int ret;
1031
1032	if (INTEL_GEN(i915) <= 2) {
1033		DRM_INFO("PMU not supported for this GPU.");
1034		return;
1035	}
1036
1037	i915_pmu_events_attr_group.attrs = create_event_attributes(i915);
1038	if (!i915_pmu_events_attr_group.attrs) {
1039		ret = -ENOMEM;
1040		goto err;
1041	}
1042
1043	i915->pmu.base.attr_groups	= i915_pmu_attr_groups;
1044	i915->pmu.base.task_ctx_nr	= perf_invalid_context;
1045	i915->pmu.base.event_init	= i915_pmu_event_init;
1046	i915->pmu.base.add		= i915_pmu_event_add;
1047	i915->pmu.base.del		= i915_pmu_event_del;
1048	i915->pmu.base.start		= i915_pmu_event_start;
1049	i915->pmu.base.stop		= i915_pmu_event_stop;
1050	i915->pmu.base.read		= i915_pmu_event_read;
1051	i915->pmu.base.event_idx	= i915_pmu_event_event_idx;
1052
1053	spin_lock_init(&i915->pmu.lock);
1054	hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1055	i915->pmu.timer.function = i915_sample;
1056
1057	ret = perf_pmu_register(&i915->pmu.base, "i915", -1);
1058	if (ret)
1059		goto err;
1060
1061	ret = i915_pmu_register_cpuhp_state(i915);
1062	if (ret)
1063		goto err_unreg;
1064
1065	return;
1066
1067err_unreg:
1068	perf_pmu_unregister(&i915->pmu.base);
1069err:
1070	i915->pmu.base.event_init = NULL;
1071	free_event_attributes(i915);
1072	DRM_NOTE("Failed to register PMU! (err=%d)\n", ret);
1073}
1074
1075void i915_pmu_unregister(struct drm_i915_private *i915)
1076{
1077	if (!i915->pmu.base.event_init)
1078		return;
1079
1080	WARN_ON(i915->pmu.enable);
1081
1082	hrtimer_cancel(&i915->pmu.timer);
1083
1084	i915_pmu_unregister_cpuhp_state(i915);
1085
1086	perf_pmu_unregister(&i915->pmu.base);
1087	i915->pmu.base.event_init = NULL;
1088	free_event_attributes(i915);
1089}