Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
  1// SPDX-License-Identifier: GPL-2.0-only
  2/*
  3 * Support Intel IOMMU PerfMon
  4 * Copyright(c) 2023 Intel Corporation.
  5 */
  6#define pr_fmt(fmt)	"DMAR: " fmt
  7#define dev_fmt(fmt)	pr_fmt(fmt)
  8
  9#include <linux/dmar.h>
 10#include "iommu.h"
 11#include "perfmon.h"
 12
 13PMU_FORMAT_ATTR(event,		"config:0-27");		/* ES: Events Select */
 14PMU_FORMAT_ATTR(event_group,	"config:28-31");	/* EGI: Event Group Index */
 15
 16static struct attribute *iommu_pmu_format_attrs[] = {
 17	&format_attr_event_group.attr,
 18	&format_attr_event.attr,
 19	NULL
 20};
 21
 22static struct attribute_group iommu_pmu_format_attr_group = {
 23	.name = "format",
 24	.attrs = iommu_pmu_format_attrs,
 25};
 26
 27/* The available events are added in attr_update later */
 28static struct attribute *attrs_empty[] = {
 29	NULL
 30};
 31
 32static struct attribute_group iommu_pmu_events_attr_group = {
 33	.name = "events",
 34	.attrs = attrs_empty,
 35};
 36
 37static const struct attribute_group *iommu_pmu_attr_groups[] = {
 38	&iommu_pmu_format_attr_group,
 39	&iommu_pmu_events_attr_group,
 40	NULL
 41};
 42
 43static inline struct iommu_pmu *dev_to_iommu_pmu(struct device *dev)
 44{
 45	/*
 46	 * The perf_event creates its own dev for each PMU.
 47	 * See pmu_dev_alloc()
 48	 */
 49	return container_of(dev_get_drvdata(dev), struct iommu_pmu, pmu);
 50}
 51
 52#define IOMMU_PMU_ATTR(_name, _format, _filter)				\
 53	PMU_FORMAT_ATTR(_name, _format);				\
 54									\
 55static struct attribute *_name##_attr[] = {				\
 56	&format_attr_##_name.attr,					\
 57	NULL								\
 58};									\
 59									\
 60static umode_t								\
 61_name##_is_visible(struct kobject *kobj, struct attribute *attr, int i)	\
 62{									\
 63	struct device *dev = kobj_to_dev(kobj);				\
 64	struct iommu_pmu *iommu_pmu = dev_to_iommu_pmu(dev);		\
 65									\
 66	if (!iommu_pmu)							\
 67		return 0;						\
 68	return (iommu_pmu->filter & _filter) ? attr->mode : 0;		\
 69}									\
 70									\
 71static struct attribute_group _name = {					\
 72	.name		= "format",					\
 73	.attrs		= _name##_attr,					\
 74	.is_visible	= _name##_is_visible,				\
 75};
 76
 77IOMMU_PMU_ATTR(filter_requester_id_en,	"config1:0",		IOMMU_PMU_FILTER_REQUESTER_ID);
 78IOMMU_PMU_ATTR(filter_domain_en,	"config1:1",		IOMMU_PMU_FILTER_DOMAIN);
 79IOMMU_PMU_ATTR(filter_pasid_en,		"config1:2",		IOMMU_PMU_FILTER_PASID);
 80IOMMU_PMU_ATTR(filter_ats_en,		"config1:3",		IOMMU_PMU_FILTER_ATS);
 81IOMMU_PMU_ATTR(filter_page_table_en,	"config1:4",		IOMMU_PMU_FILTER_PAGE_TABLE);
 82IOMMU_PMU_ATTR(filter_requester_id,	"config1:16-31",	IOMMU_PMU_FILTER_REQUESTER_ID);
 83IOMMU_PMU_ATTR(filter_domain,		"config1:32-47",	IOMMU_PMU_FILTER_DOMAIN);
 84IOMMU_PMU_ATTR(filter_pasid,		"config2:0-21",		IOMMU_PMU_FILTER_PASID);
 85IOMMU_PMU_ATTR(filter_ats,		"config2:24-28",	IOMMU_PMU_FILTER_ATS);
 86IOMMU_PMU_ATTR(filter_page_table,	"config2:32-36",	IOMMU_PMU_FILTER_PAGE_TABLE);
 87
 88#define iommu_pmu_en_requester_id(e)		((e) & 0x1)
 89#define iommu_pmu_en_domain(e)			(((e) >> 1) & 0x1)
 90#define iommu_pmu_en_pasid(e)			(((e) >> 2) & 0x1)
 91#define iommu_pmu_en_ats(e)			(((e) >> 3) & 0x1)
 92#define iommu_pmu_en_page_table(e)		(((e) >> 4) & 0x1)
 93#define iommu_pmu_get_requester_id(filter)	(((filter) >> 16) & 0xffff)
 94#define iommu_pmu_get_domain(filter)		(((filter) >> 32) & 0xffff)
 95#define iommu_pmu_get_pasid(filter)		((filter) & 0x3fffff)
 96#define iommu_pmu_get_ats(filter)		(((filter) >> 24) & 0x1f)
 97#define iommu_pmu_get_page_table(filter)	(((filter) >> 32) & 0x1f)
 98
 99#define iommu_pmu_set_filter(_name, _config, _filter, _idx, _econfig)		\
100{										\
101	if ((iommu_pmu->filter & _filter) && iommu_pmu_en_##_name(_econfig)) {	\
102		dmar_writel(iommu_pmu->cfg_reg + _idx * IOMMU_PMU_CFG_OFFSET +	\
103			    IOMMU_PMU_CFG_SIZE +				\
104			    (ffs(_filter) - 1) * IOMMU_PMU_CFG_FILTERS_OFFSET,	\
105			    iommu_pmu_get_##_name(_config) | IOMMU_PMU_FILTER_EN);\
106	}									\
107}
108
109#define iommu_pmu_clear_filter(_filter, _idx)					\
110{										\
111	if (iommu_pmu->filter & _filter) {					\
112		dmar_writel(iommu_pmu->cfg_reg + _idx * IOMMU_PMU_CFG_OFFSET +	\
113			    IOMMU_PMU_CFG_SIZE +				\
114			    (ffs(_filter) - 1) * IOMMU_PMU_CFG_FILTERS_OFFSET,	\
115			    0);							\
116	}									\
117}
118
119/*
120 * Define the event attr related functions
121 * Input: _name: event attr name
122 *        _string: string of the event in sysfs
123 *        _g_idx: event group encoding
124 *        _event: event encoding
125 */
126#define IOMMU_PMU_EVENT_ATTR(_name, _string, _g_idx, _event)			\
127	PMU_EVENT_ATTR_STRING(_name, event_attr_##_name, _string)		\
128										\
129static struct attribute *_name##_attr[] = {					\
130	&event_attr_##_name.attr.attr,						\
131	NULL									\
132};										\
133										\
134static umode_t									\
135_name##_is_visible(struct kobject *kobj, struct attribute *attr, int i)		\
136{										\
137	struct device *dev = kobj_to_dev(kobj);					\
138	struct iommu_pmu *iommu_pmu = dev_to_iommu_pmu(dev);			\
139										\
140	if (!iommu_pmu)								\
141		return 0;							\
142	return (iommu_pmu->evcap[_g_idx] & _event) ? attr->mode : 0;		\
143}										\
144										\
145static struct attribute_group _name = {						\
146	.name		= "events",						\
147	.attrs		= _name##_attr,						\
148	.is_visible	= _name##_is_visible,					\
149};
150
151IOMMU_PMU_EVENT_ATTR(iommu_clocks,		"event_group=0x0,event=0x001", 0x0, 0x001)
152IOMMU_PMU_EVENT_ATTR(iommu_requests,		"event_group=0x0,event=0x002", 0x0, 0x002)
153IOMMU_PMU_EVENT_ATTR(pw_occupancy,		"event_group=0x0,event=0x004", 0x0, 0x004)
154IOMMU_PMU_EVENT_ATTR(ats_blocked,		"event_group=0x0,event=0x008", 0x0, 0x008)
155IOMMU_PMU_EVENT_ATTR(iommu_mrds,		"event_group=0x1,event=0x001", 0x1, 0x001)
156IOMMU_PMU_EVENT_ATTR(iommu_mem_blocked,		"event_group=0x1,event=0x020", 0x1, 0x020)
157IOMMU_PMU_EVENT_ATTR(pg_req_posted,		"event_group=0x1,event=0x040", 0x1, 0x040)
158IOMMU_PMU_EVENT_ATTR(ctxt_cache_lookup,		"event_group=0x2,event=0x001", 0x2, 0x001)
159IOMMU_PMU_EVENT_ATTR(ctxt_cache_hit,		"event_group=0x2,event=0x002", 0x2, 0x002)
160IOMMU_PMU_EVENT_ATTR(pasid_cache_lookup,	"event_group=0x2,event=0x004", 0x2, 0x004)
161IOMMU_PMU_EVENT_ATTR(pasid_cache_hit,		"event_group=0x2,event=0x008", 0x2, 0x008)
162IOMMU_PMU_EVENT_ATTR(ss_nonleaf_lookup,		"event_group=0x2,event=0x010", 0x2, 0x010)
163IOMMU_PMU_EVENT_ATTR(ss_nonleaf_hit,		"event_group=0x2,event=0x020", 0x2, 0x020)
164IOMMU_PMU_EVENT_ATTR(fs_nonleaf_lookup,		"event_group=0x2,event=0x040", 0x2, 0x040)
165IOMMU_PMU_EVENT_ATTR(fs_nonleaf_hit,		"event_group=0x2,event=0x080", 0x2, 0x080)
166IOMMU_PMU_EVENT_ATTR(hpt_nonleaf_lookup,	"event_group=0x2,event=0x100", 0x2, 0x100)
167IOMMU_PMU_EVENT_ATTR(hpt_nonleaf_hit,		"event_group=0x2,event=0x200", 0x2, 0x200)
168IOMMU_PMU_EVENT_ATTR(iotlb_lookup,		"event_group=0x3,event=0x001", 0x3, 0x001)
169IOMMU_PMU_EVENT_ATTR(iotlb_hit,			"event_group=0x3,event=0x002", 0x3, 0x002)
170IOMMU_PMU_EVENT_ATTR(hpt_leaf_lookup,		"event_group=0x3,event=0x004", 0x3, 0x004)
171IOMMU_PMU_EVENT_ATTR(hpt_leaf_hit,		"event_group=0x3,event=0x008", 0x3, 0x008)
172IOMMU_PMU_EVENT_ATTR(int_cache_lookup,		"event_group=0x4,event=0x001", 0x4, 0x001)
173IOMMU_PMU_EVENT_ATTR(int_cache_hit_nonposted,	"event_group=0x4,event=0x002", 0x4, 0x002)
174IOMMU_PMU_EVENT_ATTR(int_cache_hit_posted,	"event_group=0x4,event=0x004", 0x4, 0x004)
175
176static const struct attribute_group *iommu_pmu_attr_update[] = {
177	&filter_requester_id_en,
178	&filter_domain_en,
179	&filter_pasid_en,
180	&filter_ats_en,
181	&filter_page_table_en,
182	&filter_requester_id,
183	&filter_domain,
184	&filter_pasid,
185	&filter_ats,
186	&filter_page_table,
187	&iommu_clocks,
188	&iommu_requests,
189	&pw_occupancy,
190	&ats_blocked,
191	&iommu_mrds,
192	&iommu_mem_blocked,
193	&pg_req_posted,
194	&ctxt_cache_lookup,
195	&ctxt_cache_hit,
196	&pasid_cache_lookup,
197	&pasid_cache_hit,
198	&ss_nonleaf_lookup,
199	&ss_nonleaf_hit,
200	&fs_nonleaf_lookup,
201	&fs_nonleaf_hit,
202	&hpt_nonleaf_lookup,
203	&hpt_nonleaf_hit,
204	&iotlb_lookup,
205	&iotlb_hit,
206	&hpt_leaf_lookup,
207	&hpt_leaf_hit,
208	&int_cache_lookup,
209	&int_cache_hit_nonposted,
210	&int_cache_hit_posted,
211	NULL
212};
213
214static inline void __iomem *
215iommu_event_base(struct iommu_pmu *iommu_pmu, int idx)
216{
217	return iommu_pmu->cntr_reg + idx * iommu_pmu->cntr_stride;
218}
219
220static inline void __iomem *
221iommu_config_base(struct iommu_pmu *iommu_pmu, int idx)
222{
223	return iommu_pmu->cfg_reg + idx * IOMMU_PMU_CFG_OFFSET;
224}
225
226static inline struct iommu_pmu *iommu_event_to_pmu(struct perf_event *event)
227{
228	return container_of(event->pmu, struct iommu_pmu, pmu);
229}
230
231static inline u64 iommu_event_config(struct perf_event *event)
232{
233	u64 config = event->attr.config;
234
235	return (iommu_event_select(config) << IOMMU_EVENT_CFG_ES_SHIFT) |
236	       (iommu_event_group(config) << IOMMU_EVENT_CFG_EGI_SHIFT) |
237	       IOMMU_EVENT_CFG_INT;
238}
239
240static inline bool is_iommu_pmu_event(struct iommu_pmu *iommu_pmu,
241				      struct perf_event *event)
242{
243	return event->pmu == &iommu_pmu->pmu;
244}
245
246static int iommu_pmu_validate_event(struct perf_event *event)
247{
248	struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
249	u32 event_group = iommu_event_group(event->attr.config);
250
251	if (event_group >= iommu_pmu->num_eg)
252		return -EINVAL;
253
254	return 0;
255}
256
257static int iommu_pmu_validate_group(struct perf_event *event)
258{
259	struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
260	struct perf_event *sibling;
261	int nr = 0;
262
263	/*
264	 * All events in a group must be scheduled simultaneously.
265	 * Check whether there is enough counters for all the events.
266	 */
267	for_each_sibling_event(sibling, event->group_leader) {
268		if (!is_iommu_pmu_event(iommu_pmu, sibling) ||
269		    sibling->state <= PERF_EVENT_STATE_OFF)
270			continue;
271
272		if (++nr > iommu_pmu->num_cntr)
273			return -EINVAL;
274	}
275
276	return 0;
277}
278
279static int iommu_pmu_event_init(struct perf_event *event)
280{
281	struct hw_perf_event *hwc = &event->hw;
282
283	if (event->attr.type != event->pmu->type)
284		return -ENOENT;
285
286	/* sampling not supported */
287	if (event->attr.sample_period)
288		return -EINVAL;
289
290	if (event->cpu < 0)
291		return -EINVAL;
292
293	if (iommu_pmu_validate_event(event))
294		return -EINVAL;
295
296	hwc->config = iommu_event_config(event);
297
298	return iommu_pmu_validate_group(event);
299}
300
301static void iommu_pmu_event_update(struct perf_event *event)
302{
303	struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
304	struct hw_perf_event *hwc = &event->hw;
305	u64 prev_count, new_count, delta;
306	int shift = 64 - iommu_pmu->cntr_width;
307
308again:
309	prev_count = local64_read(&hwc->prev_count);
310	new_count = dmar_readq(iommu_event_base(iommu_pmu, hwc->idx));
311	if (local64_xchg(&hwc->prev_count, new_count) != prev_count)
312		goto again;
313
314	/*
315	 * The counter width is enumerated. Always shift the counter
316	 * before using it.
317	 */
318	delta = (new_count << shift) - (prev_count << shift);
319	delta >>= shift;
320
321	local64_add(delta, &event->count);
322}
323
324static void iommu_pmu_start(struct perf_event *event, int flags)
325{
326	struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
327	struct intel_iommu *iommu = iommu_pmu->iommu;
328	struct hw_perf_event *hwc = &event->hw;
329	u64 count;
330
331	if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
332		return;
333
334	if (WARN_ON_ONCE(hwc->idx < 0 || hwc->idx >= IOMMU_PMU_IDX_MAX))
335		return;
336
337	if (flags & PERF_EF_RELOAD)
338		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
339
340	hwc->state = 0;
341
342	/* Always reprogram the period */
343	count = dmar_readq(iommu_event_base(iommu_pmu, hwc->idx));
344	local64_set((&hwc->prev_count), count);
345
346	/*
347	 * The error of ecmd will be ignored.
348	 * - The existing perf_event subsystem doesn't handle the error.
349	 *   Only IOMMU PMU returns runtime HW error. We don't want to
350	 *   change the existing generic interfaces for the specific case.
351	 * - It's a corner case caused by HW, which is very unlikely to
352	 *   happen. There is nothing SW can do.
353	 * - The worst case is that the user will get <not count> with
354	 *   perf command, which can give the user some hints.
355	 */
356	ecmd_submit_sync(iommu, DMA_ECMD_ENABLE, hwc->idx, 0);
357
358	perf_event_update_userpage(event);
359}
360
361static void iommu_pmu_stop(struct perf_event *event, int flags)
362{
363	struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
364	struct intel_iommu *iommu = iommu_pmu->iommu;
365	struct hw_perf_event *hwc = &event->hw;
366
367	if (!(hwc->state & PERF_HES_STOPPED)) {
368		ecmd_submit_sync(iommu, DMA_ECMD_DISABLE, hwc->idx, 0);
369
370		iommu_pmu_event_update(event);
371
372		hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
373	}
374}
375
376static inline int
377iommu_pmu_validate_per_cntr_event(struct iommu_pmu *iommu_pmu,
378				  int idx, struct perf_event *event)
379{
380	u32 event_group = iommu_event_group(event->attr.config);
381	u32 select = iommu_event_select(event->attr.config);
382
383	if (!(iommu_pmu->cntr_evcap[idx][event_group] & select))
384		return -EINVAL;
385
386	return 0;
387}
388
389static int iommu_pmu_assign_event(struct iommu_pmu *iommu_pmu,
390				  struct perf_event *event)
391{
392	struct hw_perf_event *hwc = &event->hw;
393	int idx;
394
395	/*
396	 * The counters which support limited events are usually at the end.
397	 * Schedule them first to accommodate more events.
398	 */
399	for (idx = iommu_pmu->num_cntr - 1; idx >= 0; idx--) {
400		if (test_and_set_bit(idx, iommu_pmu->used_mask))
401			continue;
402		/* Check per-counter event capabilities */
403		if (!iommu_pmu_validate_per_cntr_event(iommu_pmu, idx, event))
404			break;
405		clear_bit(idx, iommu_pmu->used_mask);
406	}
407	if (idx < 0)
408		return -EINVAL;
409
410	iommu_pmu->event_list[idx] = event;
411	hwc->idx = idx;
412
413	/* config events */
414	dmar_writeq(iommu_config_base(iommu_pmu, idx), hwc->config);
415
416	iommu_pmu_set_filter(requester_id, event->attr.config1,
417			     IOMMU_PMU_FILTER_REQUESTER_ID, idx,
418			     event->attr.config1);
419	iommu_pmu_set_filter(domain, event->attr.config1,
420			     IOMMU_PMU_FILTER_DOMAIN, idx,
421			     event->attr.config1);
422	iommu_pmu_set_filter(pasid, event->attr.config2,
423			     IOMMU_PMU_FILTER_PASID, idx,
424			     event->attr.config1);
425	iommu_pmu_set_filter(ats, event->attr.config2,
426			     IOMMU_PMU_FILTER_ATS, idx,
427			     event->attr.config1);
428	iommu_pmu_set_filter(page_table, event->attr.config2,
429			     IOMMU_PMU_FILTER_PAGE_TABLE, idx,
430			     event->attr.config1);
431
432	return 0;
433}
434
435static int iommu_pmu_add(struct perf_event *event, int flags)
436{
437	struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
438	struct hw_perf_event *hwc = &event->hw;
439	int ret;
440
441	ret = iommu_pmu_assign_event(iommu_pmu, event);
442	if (ret < 0)
443		return ret;
444
445	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
446
447	if (flags & PERF_EF_START)
448		iommu_pmu_start(event, 0);
449
450	return 0;
451}
452
453static void iommu_pmu_del(struct perf_event *event, int flags)
454{
455	struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
456	int idx = event->hw.idx;
457
458	iommu_pmu_stop(event, PERF_EF_UPDATE);
459
460	iommu_pmu_clear_filter(IOMMU_PMU_FILTER_REQUESTER_ID, idx);
461	iommu_pmu_clear_filter(IOMMU_PMU_FILTER_DOMAIN, idx);
462	iommu_pmu_clear_filter(IOMMU_PMU_FILTER_PASID, idx);
463	iommu_pmu_clear_filter(IOMMU_PMU_FILTER_ATS, idx);
464	iommu_pmu_clear_filter(IOMMU_PMU_FILTER_PAGE_TABLE, idx);
465
466	iommu_pmu->event_list[idx] = NULL;
467	event->hw.idx = -1;
468	clear_bit(idx, iommu_pmu->used_mask);
469
470	perf_event_update_userpage(event);
471}
472
473static void iommu_pmu_enable(struct pmu *pmu)
474{
475	struct iommu_pmu *iommu_pmu = container_of(pmu, struct iommu_pmu, pmu);
476	struct intel_iommu *iommu = iommu_pmu->iommu;
477
478	ecmd_submit_sync(iommu, DMA_ECMD_UNFREEZE, 0, 0);
479}
480
481static void iommu_pmu_disable(struct pmu *pmu)
482{
483	struct iommu_pmu *iommu_pmu = container_of(pmu, struct iommu_pmu, pmu);
484	struct intel_iommu *iommu = iommu_pmu->iommu;
485
486	ecmd_submit_sync(iommu, DMA_ECMD_FREEZE, 0, 0);
487}
488
489static void iommu_pmu_counter_overflow(struct iommu_pmu *iommu_pmu)
490{
491	struct perf_event *event;
492	u64 status;
493	int i;
494
495	/*
496	 * Two counters may be overflowed very close. Always check
497	 * whether there are more to handle.
498	 */
499	while ((status = dmar_readq(iommu_pmu->overflow))) {
500		for_each_set_bit(i, (unsigned long *)&status, iommu_pmu->num_cntr) {
501			/*
502			 * Find the assigned event of the counter.
503			 * Accumulate the value into the event->count.
504			 */
505			event = iommu_pmu->event_list[i];
506			if (!event) {
507				pr_warn_once("Cannot find the assigned event for counter %d\n", i);
508				continue;
509			}
510			iommu_pmu_event_update(event);
511		}
512
513		dmar_writeq(iommu_pmu->overflow, status);
514	}
515}
516
517static irqreturn_t iommu_pmu_irq_handler(int irq, void *dev_id)
518{
519	struct intel_iommu *iommu = dev_id;
520
521	if (!dmar_readl(iommu->reg + DMAR_PERFINTRSTS_REG))
522		return IRQ_NONE;
523
524	iommu_pmu_counter_overflow(iommu->pmu);
525
526	/* Clear the status bit */
527	dmar_writel(iommu->reg + DMAR_PERFINTRSTS_REG, DMA_PERFINTRSTS_PIS);
528
529	return IRQ_HANDLED;
530}
531
532static int __iommu_pmu_register(struct intel_iommu *iommu)
533{
534	struct iommu_pmu *iommu_pmu = iommu->pmu;
535
536	iommu_pmu->pmu.name		= iommu->name;
537	iommu_pmu->pmu.task_ctx_nr	= perf_invalid_context;
538	iommu_pmu->pmu.event_init	= iommu_pmu_event_init;
539	iommu_pmu->pmu.pmu_enable	= iommu_pmu_enable;
540	iommu_pmu->pmu.pmu_disable	= iommu_pmu_disable;
541	iommu_pmu->pmu.add		= iommu_pmu_add;
542	iommu_pmu->pmu.del		= iommu_pmu_del;
543	iommu_pmu->pmu.start		= iommu_pmu_start;
544	iommu_pmu->pmu.stop		= iommu_pmu_stop;
545	iommu_pmu->pmu.read		= iommu_pmu_event_update;
546	iommu_pmu->pmu.attr_groups	= iommu_pmu_attr_groups;
547	iommu_pmu->pmu.attr_update	= iommu_pmu_attr_update;
548	iommu_pmu->pmu.capabilities	= PERF_PMU_CAP_NO_EXCLUDE;
549	iommu_pmu->pmu.scope		= PERF_PMU_SCOPE_SYS_WIDE;
550	iommu_pmu->pmu.module		= THIS_MODULE;
551
552	return perf_pmu_register(&iommu_pmu->pmu, iommu_pmu->pmu.name, -1);
553}
554
555static inline void __iomem *
556get_perf_reg_address(struct intel_iommu *iommu, u32 offset)
557{
558	u32 off = dmar_readl(iommu->reg + offset);
559
560	return iommu->reg + off;
561}
562
563int alloc_iommu_pmu(struct intel_iommu *iommu)
564{
565	struct iommu_pmu *iommu_pmu;
566	int i, j, ret;
567	u64 perfcap;
568	u32 cap;
569
570	if (!ecap_pms(iommu->ecap))
571		return 0;
572
573	/* The IOMMU PMU requires the ECMD support as well */
574	if (!cap_ecmds(iommu->cap))
575		return -ENODEV;
576
577	perfcap = dmar_readq(iommu->reg + DMAR_PERFCAP_REG);
578	/* The performance monitoring is not supported. */
579	if (!perfcap)
580		return -ENODEV;
581
582	/* Sanity check for the number of the counters and event groups */
583	if (!pcap_num_cntr(perfcap) || !pcap_num_event_group(perfcap))
584		return -ENODEV;
585
586	/* The interrupt on overflow is required */
587	if (!pcap_interrupt(perfcap))
588		return -ENODEV;
589
590	/* Check required Enhanced Command Capability */
591	if (!ecmd_has_pmu_essential(iommu))
592		return -ENODEV;
593
594	iommu_pmu = kzalloc(sizeof(*iommu_pmu), GFP_KERNEL);
595	if (!iommu_pmu)
596		return -ENOMEM;
597
598	iommu_pmu->num_cntr = pcap_num_cntr(perfcap);
599	if (iommu_pmu->num_cntr > IOMMU_PMU_IDX_MAX) {
600		pr_warn_once("The number of IOMMU counters %d > max(%d), clipping!",
601			     iommu_pmu->num_cntr, IOMMU_PMU_IDX_MAX);
602		iommu_pmu->num_cntr = IOMMU_PMU_IDX_MAX;
603	}
604
605	iommu_pmu->cntr_width = pcap_cntr_width(perfcap);
606	iommu_pmu->filter = pcap_filters_mask(perfcap);
607	iommu_pmu->cntr_stride = pcap_cntr_stride(perfcap);
608	iommu_pmu->num_eg = pcap_num_event_group(perfcap);
609
610	iommu_pmu->evcap = kcalloc(iommu_pmu->num_eg, sizeof(u64), GFP_KERNEL);
611	if (!iommu_pmu->evcap) {
612		ret = -ENOMEM;
613		goto free_pmu;
614	}
615
616	/* Parse event group capabilities */
617	for (i = 0; i < iommu_pmu->num_eg; i++) {
618		u64 pcap;
619
620		pcap = dmar_readq(iommu->reg + DMAR_PERFEVNTCAP_REG +
621				  i * IOMMU_PMU_CAP_REGS_STEP);
622		iommu_pmu->evcap[i] = pecap_es(pcap);
623	}
624
625	iommu_pmu->cntr_evcap = kcalloc(iommu_pmu->num_cntr, sizeof(u32 *), GFP_KERNEL);
626	if (!iommu_pmu->cntr_evcap) {
627		ret = -ENOMEM;
628		goto free_pmu_evcap;
629	}
630	for (i = 0; i < iommu_pmu->num_cntr; i++) {
631		iommu_pmu->cntr_evcap[i] = kcalloc(iommu_pmu->num_eg, sizeof(u32), GFP_KERNEL);
632		if (!iommu_pmu->cntr_evcap[i]) {
633			ret = -ENOMEM;
634			goto free_pmu_cntr_evcap;
635		}
636		/*
637		 * Set to the global capabilities, will adjust according
638		 * to per-counter capabilities later.
639		 */
640		for (j = 0; j < iommu_pmu->num_eg; j++)
641			iommu_pmu->cntr_evcap[i][j] = (u32)iommu_pmu->evcap[j];
642	}
643
644	iommu_pmu->cfg_reg = get_perf_reg_address(iommu, DMAR_PERFCFGOFF_REG);
645	iommu_pmu->cntr_reg = get_perf_reg_address(iommu, DMAR_PERFCNTROFF_REG);
646	iommu_pmu->overflow = get_perf_reg_address(iommu, DMAR_PERFOVFOFF_REG);
647
648	/*
649	 * Check per-counter capabilities. All counters should have the
650	 * same capabilities on Interrupt on Overflow Support and Counter
651	 * Width.
652	 */
653	for (i = 0; i < iommu_pmu->num_cntr; i++) {
654		cap = dmar_readl(iommu_pmu->cfg_reg +
655				 i * IOMMU_PMU_CFG_OFFSET +
656				 IOMMU_PMU_CFG_CNTRCAP_OFFSET);
657		if (!iommu_cntrcap_pcc(cap))
658			continue;
659
660		/*
661		 * It's possible that some counters have a different
662		 * capability because of e.g., HW bug. Check the corner
663		 * case here and simply drop those counters.
664		 */
665		if ((iommu_cntrcap_cw(cap) != iommu_pmu->cntr_width) ||
666		    !iommu_cntrcap_ios(cap)) {
667			iommu_pmu->num_cntr = i;
668			pr_warn("PMU counter capability inconsistent, counter number reduced to %d\n",
669				iommu_pmu->num_cntr);
670		}
671
672		/* Clear the pre-defined events group */
673		for (j = 0; j < iommu_pmu->num_eg; j++)
674			iommu_pmu->cntr_evcap[i][j] = 0;
675
676		/* Override with per-counter event capabilities */
677		for (j = 0; j < iommu_cntrcap_egcnt(cap); j++) {
678			cap = dmar_readl(iommu_pmu->cfg_reg + i * IOMMU_PMU_CFG_OFFSET +
679					 IOMMU_PMU_CFG_CNTREVCAP_OFFSET +
680					 (j * IOMMU_PMU_OFF_REGS_STEP));
681			iommu_pmu->cntr_evcap[i][iommu_event_group(cap)] = iommu_event_select(cap);
682			/*
683			 * Some events may only be supported by a specific counter.
684			 * Track them in the evcap as well.
685			 */
686			iommu_pmu->evcap[iommu_event_group(cap)] |= iommu_event_select(cap);
687		}
688	}
689
690	iommu_pmu->iommu = iommu;
691	iommu->pmu = iommu_pmu;
692
693	return 0;
694
695free_pmu_cntr_evcap:
696	for (i = 0; i < iommu_pmu->num_cntr; i++)
697		kfree(iommu_pmu->cntr_evcap[i]);
698	kfree(iommu_pmu->cntr_evcap);
699free_pmu_evcap:
700	kfree(iommu_pmu->evcap);
701free_pmu:
702	kfree(iommu_pmu);
703
704	return ret;
705}
706
707void free_iommu_pmu(struct intel_iommu *iommu)
708{
709	struct iommu_pmu *iommu_pmu = iommu->pmu;
710
711	if (!iommu_pmu)
712		return;
713
714	if (iommu_pmu->evcap) {
715		int i;
716
717		for (i = 0; i < iommu_pmu->num_cntr; i++)
718			kfree(iommu_pmu->cntr_evcap[i]);
719		kfree(iommu_pmu->cntr_evcap);
720	}
721	kfree(iommu_pmu->evcap);
722	kfree(iommu_pmu);
723	iommu->pmu = NULL;
724}
725
726static int iommu_pmu_set_interrupt(struct intel_iommu *iommu)
727{
728	struct iommu_pmu *iommu_pmu = iommu->pmu;
729	int irq, ret;
730
731	irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PERF + iommu->seq_id, iommu->node, iommu);
732	if (irq <= 0)
733		return -EINVAL;
734
735	snprintf(iommu_pmu->irq_name, sizeof(iommu_pmu->irq_name), "dmar%d-perf", iommu->seq_id);
736
737	iommu->perf_irq = irq;
738	ret = request_threaded_irq(irq, NULL, iommu_pmu_irq_handler,
739				   IRQF_ONESHOT, iommu_pmu->irq_name, iommu);
740	if (ret) {
741		dmar_free_hwirq(irq);
742		iommu->perf_irq = 0;
743		return ret;
744	}
745	return 0;
746}
747
748static void iommu_pmu_unset_interrupt(struct intel_iommu *iommu)
749{
750	if (!iommu->perf_irq)
751		return;
752
753	free_irq(iommu->perf_irq, iommu);
754	dmar_free_hwirq(iommu->perf_irq);
755	iommu->perf_irq = 0;
756}
757
758void iommu_pmu_register(struct intel_iommu *iommu)
759{
760	struct iommu_pmu *iommu_pmu = iommu->pmu;
761
762	if (!iommu_pmu)
763		return;
764
765	if (__iommu_pmu_register(iommu))
766		goto err;
767
768	/* Set interrupt for overflow */
769	if (iommu_pmu_set_interrupt(iommu))
770		goto unregister;
771
772	return;
773
774unregister:
775	perf_pmu_unregister(&iommu_pmu->pmu);
776err:
777	pr_err("Failed to register PMU for iommu (seq_id = %d)\n", iommu->seq_id);
778	free_iommu_pmu(iommu);
779}
780
781void iommu_pmu_unregister(struct intel_iommu *iommu)
782{
783	struct iommu_pmu *iommu_pmu = iommu->pmu;
784
785	if (!iommu_pmu)
786		return;
787
788	iommu_pmu_unset_interrupt(iommu);
789	perf_pmu_unregister(&iommu_pmu->pmu);
790}