Loading...
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Support Intel IOMMU PerfMon
4 * Copyright(c) 2023 Intel Corporation.
5 */
6#define pr_fmt(fmt) "DMAR: " fmt
7#define dev_fmt(fmt) pr_fmt(fmt)
8
9#include <linux/dmar.h>
10#include "iommu.h"
11#include "perfmon.h"
12
13PMU_FORMAT_ATTR(event, "config:0-27"); /* ES: Events Select */
14PMU_FORMAT_ATTR(event_group, "config:28-31"); /* EGI: Event Group Index */
15
16static struct attribute *iommu_pmu_format_attrs[] = {
17 &format_attr_event_group.attr,
18 &format_attr_event.attr,
19 NULL
20};
21
22static struct attribute_group iommu_pmu_format_attr_group = {
23 .name = "format",
24 .attrs = iommu_pmu_format_attrs,
25};
26
27/* The available events are added in attr_update later */
28static struct attribute *attrs_empty[] = {
29 NULL
30};
31
32static struct attribute_group iommu_pmu_events_attr_group = {
33 .name = "events",
34 .attrs = attrs_empty,
35};
36
37static const struct attribute_group *iommu_pmu_attr_groups[] = {
38 &iommu_pmu_format_attr_group,
39 &iommu_pmu_events_attr_group,
40 NULL
41};
42
43static inline struct iommu_pmu *dev_to_iommu_pmu(struct device *dev)
44{
45 /*
46 * The perf_event creates its own dev for each PMU.
47 * See pmu_dev_alloc()
48 */
49 return container_of(dev_get_drvdata(dev), struct iommu_pmu, pmu);
50}
51
52#define IOMMU_PMU_ATTR(_name, _format, _filter) \
53 PMU_FORMAT_ATTR(_name, _format); \
54 \
55static struct attribute *_name##_attr[] = { \
56 &format_attr_##_name.attr, \
57 NULL \
58}; \
59 \
60static umode_t \
61_name##_is_visible(struct kobject *kobj, struct attribute *attr, int i) \
62{ \
63 struct device *dev = kobj_to_dev(kobj); \
64 struct iommu_pmu *iommu_pmu = dev_to_iommu_pmu(dev); \
65 \
66 if (!iommu_pmu) \
67 return 0; \
68 return (iommu_pmu->filter & _filter) ? attr->mode : 0; \
69} \
70 \
71static struct attribute_group _name = { \
72 .name = "format", \
73 .attrs = _name##_attr, \
74 .is_visible = _name##_is_visible, \
75};
76
77IOMMU_PMU_ATTR(filter_requester_id_en, "config1:0", IOMMU_PMU_FILTER_REQUESTER_ID);
78IOMMU_PMU_ATTR(filter_domain_en, "config1:1", IOMMU_PMU_FILTER_DOMAIN);
79IOMMU_PMU_ATTR(filter_pasid_en, "config1:2", IOMMU_PMU_FILTER_PASID);
80IOMMU_PMU_ATTR(filter_ats_en, "config1:3", IOMMU_PMU_FILTER_ATS);
81IOMMU_PMU_ATTR(filter_page_table_en, "config1:4", IOMMU_PMU_FILTER_PAGE_TABLE);
82IOMMU_PMU_ATTR(filter_requester_id, "config1:16-31", IOMMU_PMU_FILTER_REQUESTER_ID);
83IOMMU_PMU_ATTR(filter_domain, "config1:32-47", IOMMU_PMU_FILTER_DOMAIN);
84IOMMU_PMU_ATTR(filter_pasid, "config2:0-21", IOMMU_PMU_FILTER_PASID);
85IOMMU_PMU_ATTR(filter_ats, "config2:24-28", IOMMU_PMU_FILTER_ATS);
86IOMMU_PMU_ATTR(filter_page_table, "config2:32-36", IOMMU_PMU_FILTER_PAGE_TABLE);
87
88#define iommu_pmu_en_requester_id(e) ((e) & 0x1)
89#define iommu_pmu_en_domain(e) (((e) >> 1) & 0x1)
90#define iommu_pmu_en_pasid(e) (((e) >> 2) & 0x1)
91#define iommu_pmu_en_ats(e) (((e) >> 3) & 0x1)
92#define iommu_pmu_en_page_table(e) (((e) >> 4) & 0x1)
93#define iommu_pmu_get_requester_id(filter) (((filter) >> 16) & 0xffff)
94#define iommu_pmu_get_domain(filter) (((filter) >> 32) & 0xffff)
95#define iommu_pmu_get_pasid(filter) ((filter) & 0x3fffff)
96#define iommu_pmu_get_ats(filter) (((filter) >> 24) & 0x1f)
97#define iommu_pmu_get_page_table(filter) (((filter) >> 32) & 0x1f)
98
99#define iommu_pmu_set_filter(_name, _config, _filter, _idx, _econfig) \
100{ \
101 if ((iommu_pmu->filter & _filter) && iommu_pmu_en_##_name(_econfig)) { \
102 dmar_writel(iommu_pmu->cfg_reg + _idx * IOMMU_PMU_CFG_OFFSET + \
103 IOMMU_PMU_CFG_SIZE + \
104 (ffs(_filter) - 1) * IOMMU_PMU_CFG_FILTERS_OFFSET, \
105 iommu_pmu_get_##_name(_config) | IOMMU_PMU_FILTER_EN);\
106 } \
107}
108
109#define iommu_pmu_clear_filter(_filter, _idx) \
110{ \
111 if (iommu_pmu->filter & _filter) { \
112 dmar_writel(iommu_pmu->cfg_reg + _idx * IOMMU_PMU_CFG_OFFSET + \
113 IOMMU_PMU_CFG_SIZE + \
114 (ffs(_filter) - 1) * IOMMU_PMU_CFG_FILTERS_OFFSET, \
115 0); \
116 } \
117}
118
119/*
120 * Define the event attr related functions
121 * Input: _name: event attr name
122 * _string: string of the event in sysfs
123 * _g_idx: event group encoding
124 * _event: event encoding
125 */
126#define IOMMU_PMU_EVENT_ATTR(_name, _string, _g_idx, _event) \
127 PMU_EVENT_ATTR_STRING(_name, event_attr_##_name, _string) \
128 \
129static struct attribute *_name##_attr[] = { \
130 &event_attr_##_name.attr.attr, \
131 NULL \
132}; \
133 \
134static umode_t \
135_name##_is_visible(struct kobject *kobj, struct attribute *attr, int i) \
136{ \
137 struct device *dev = kobj_to_dev(kobj); \
138 struct iommu_pmu *iommu_pmu = dev_to_iommu_pmu(dev); \
139 \
140 if (!iommu_pmu) \
141 return 0; \
142 return (iommu_pmu->evcap[_g_idx] & _event) ? attr->mode : 0; \
143} \
144 \
145static struct attribute_group _name = { \
146 .name = "events", \
147 .attrs = _name##_attr, \
148 .is_visible = _name##_is_visible, \
149};
150
151IOMMU_PMU_EVENT_ATTR(iommu_clocks, "event_group=0x0,event=0x001", 0x0, 0x001)
152IOMMU_PMU_EVENT_ATTR(iommu_requests, "event_group=0x0,event=0x002", 0x0, 0x002)
153IOMMU_PMU_EVENT_ATTR(pw_occupancy, "event_group=0x0,event=0x004", 0x0, 0x004)
154IOMMU_PMU_EVENT_ATTR(ats_blocked, "event_group=0x0,event=0x008", 0x0, 0x008)
155IOMMU_PMU_EVENT_ATTR(iommu_mrds, "event_group=0x1,event=0x001", 0x1, 0x001)
156IOMMU_PMU_EVENT_ATTR(iommu_mem_blocked, "event_group=0x1,event=0x020", 0x1, 0x020)
157IOMMU_PMU_EVENT_ATTR(pg_req_posted, "event_group=0x1,event=0x040", 0x1, 0x040)
158IOMMU_PMU_EVENT_ATTR(ctxt_cache_lookup, "event_group=0x2,event=0x001", 0x2, 0x001)
159IOMMU_PMU_EVENT_ATTR(ctxt_cache_hit, "event_group=0x2,event=0x002", 0x2, 0x002)
160IOMMU_PMU_EVENT_ATTR(pasid_cache_lookup, "event_group=0x2,event=0x004", 0x2, 0x004)
161IOMMU_PMU_EVENT_ATTR(pasid_cache_hit, "event_group=0x2,event=0x008", 0x2, 0x008)
162IOMMU_PMU_EVENT_ATTR(ss_nonleaf_lookup, "event_group=0x2,event=0x010", 0x2, 0x010)
163IOMMU_PMU_EVENT_ATTR(ss_nonleaf_hit, "event_group=0x2,event=0x020", 0x2, 0x020)
164IOMMU_PMU_EVENT_ATTR(fs_nonleaf_lookup, "event_group=0x2,event=0x040", 0x2, 0x040)
165IOMMU_PMU_EVENT_ATTR(fs_nonleaf_hit, "event_group=0x2,event=0x080", 0x2, 0x080)
166IOMMU_PMU_EVENT_ATTR(hpt_nonleaf_lookup, "event_group=0x2,event=0x100", 0x2, 0x100)
167IOMMU_PMU_EVENT_ATTR(hpt_nonleaf_hit, "event_group=0x2,event=0x200", 0x2, 0x200)
168IOMMU_PMU_EVENT_ATTR(iotlb_lookup, "event_group=0x3,event=0x001", 0x3, 0x001)
169IOMMU_PMU_EVENT_ATTR(iotlb_hit, "event_group=0x3,event=0x002", 0x3, 0x002)
170IOMMU_PMU_EVENT_ATTR(hpt_leaf_lookup, "event_group=0x3,event=0x004", 0x3, 0x004)
171IOMMU_PMU_EVENT_ATTR(hpt_leaf_hit, "event_group=0x3,event=0x008", 0x3, 0x008)
172IOMMU_PMU_EVENT_ATTR(int_cache_lookup, "event_group=0x4,event=0x001", 0x4, 0x001)
173IOMMU_PMU_EVENT_ATTR(int_cache_hit_nonposted, "event_group=0x4,event=0x002", 0x4, 0x002)
174IOMMU_PMU_EVENT_ATTR(int_cache_hit_posted, "event_group=0x4,event=0x004", 0x4, 0x004)
175
176static const struct attribute_group *iommu_pmu_attr_update[] = {
177 &filter_requester_id_en,
178 &filter_domain_en,
179 &filter_pasid_en,
180 &filter_ats_en,
181 &filter_page_table_en,
182 &filter_requester_id,
183 &filter_domain,
184 &filter_pasid,
185 &filter_ats,
186 &filter_page_table,
187 &iommu_clocks,
188 &iommu_requests,
189 &pw_occupancy,
190 &ats_blocked,
191 &iommu_mrds,
192 &iommu_mem_blocked,
193 &pg_req_posted,
194 &ctxt_cache_lookup,
195 &ctxt_cache_hit,
196 &pasid_cache_lookup,
197 &pasid_cache_hit,
198 &ss_nonleaf_lookup,
199 &ss_nonleaf_hit,
200 &fs_nonleaf_lookup,
201 &fs_nonleaf_hit,
202 &hpt_nonleaf_lookup,
203 &hpt_nonleaf_hit,
204 &iotlb_lookup,
205 &iotlb_hit,
206 &hpt_leaf_lookup,
207 &hpt_leaf_hit,
208 &int_cache_lookup,
209 &int_cache_hit_nonposted,
210 &int_cache_hit_posted,
211 NULL
212};
213
214static inline void __iomem *
215iommu_event_base(struct iommu_pmu *iommu_pmu, int idx)
216{
217 return iommu_pmu->cntr_reg + idx * iommu_pmu->cntr_stride;
218}
219
220static inline void __iomem *
221iommu_config_base(struct iommu_pmu *iommu_pmu, int idx)
222{
223 return iommu_pmu->cfg_reg + idx * IOMMU_PMU_CFG_OFFSET;
224}
225
226static inline struct iommu_pmu *iommu_event_to_pmu(struct perf_event *event)
227{
228 return container_of(event->pmu, struct iommu_pmu, pmu);
229}
230
231static inline u64 iommu_event_config(struct perf_event *event)
232{
233 u64 config = event->attr.config;
234
235 return (iommu_event_select(config) << IOMMU_EVENT_CFG_ES_SHIFT) |
236 (iommu_event_group(config) << IOMMU_EVENT_CFG_EGI_SHIFT) |
237 IOMMU_EVENT_CFG_INT;
238}
239
240static inline bool is_iommu_pmu_event(struct iommu_pmu *iommu_pmu,
241 struct perf_event *event)
242{
243 return event->pmu == &iommu_pmu->pmu;
244}
245
246static int iommu_pmu_validate_event(struct perf_event *event)
247{
248 struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
249 u32 event_group = iommu_event_group(event->attr.config);
250
251 if (event_group >= iommu_pmu->num_eg)
252 return -EINVAL;
253
254 return 0;
255}
256
257static int iommu_pmu_validate_group(struct perf_event *event)
258{
259 struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
260 struct perf_event *sibling;
261 int nr = 0;
262
263 /*
264 * All events in a group must be scheduled simultaneously.
265 * Check whether there is enough counters for all the events.
266 */
267 for_each_sibling_event(sibling, event->group_leader) {
268 if (!is_iommu_pmu_event(iommu_pmu, sibling) ||
269 sibling->state <= PERF_EVENT_STATE_OFF)
270 continue;
271
272 if (++nr > iommu_pmu->num_cntr)
273 return -EINVAL;
274 }
275
276 return 0;
277}
278
279static int iommu_pmu_event_init(struct perf_event *event)
280{
281 struct hw_perf_event *hwc = &event->hw;
282
283 if (event->attr.type != event->pmu->type)
284 return -ENOENT;
285
286 /* sampling not supported */
287 if (event->attr.sample_period)
288 return -EINVAL;
289
290 if (event->cpu < 0)
291 return -EINVAL;
292
293 if (iommu_pmu_validate_event(event))
294 return -EINVAL;
295
296 hwc->config = iommu_event_config(event);
297
298 return iommu_pmu_validate_group(event);
299}
300
301static void iommu_pmu_event_update(struct perf_event *event)
302{
303 struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
304 struct hw_perf_event *hwc = &event->hw;
305 u64 prev_count, new_count, delta;
306 int shift = 64 - iommu_pmu->cntr_width;
307
308again:
309 prev_count = local64_read(&hwc->prev_count);
310 new_count = dmar_readq(iommu_event_base(iommu_pmu, hwc->idx));
311 if (local64_xchg(&hwc->prev_count, new_count) != prev_count)
312 goto again;
313
314 /*
315 * The counter width is enumerated. Always shift the counter
316 * before using it.
317 */
318 delta = (new_count << shift) - (prev_count << shift);
319 delta >>= shift;
320
321 local64_add(delta, &event->count);
322}
323
324static void iommu_pmu_start(struct perf_event *event, int flags)
325{
326 struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
327 struct intel_iommu *iommu = iommu_pmu->iommu;
328 struct hw_perf_event *hwc = &event->hw;
329 u64 count;
330
331 if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
332 return;
333
334 if (WARN_ON_ONCE(hwc->idx < 0 || hwc->idx >= IOMMU_PMU_IDX_MAX))
335 return;
336
337 if (flags & PERF_EF_RELOAD)
338 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
339
340 hwc->state = 0;
341
342 /* Always reprogram the period */
343 count = dmar_readq(iommu_event_base(iommu_pmu, hwc->idx));
344 local64_set((&hwc->prev_count), count);
345
346 /*
347 * The error of ecmd will be ignored.
348 * - The existing perf_event subsystem doesn't handle the error.
349 * Only IOMMU PMU returns runtime HW error. We don't want to
350 * change the existing generic interfaces for the specific case.
351 * - It's a corner case caused by HW, which is very unlikely to
352 * happen. There is nothing SW can do.
353 * - The worst case is that the user will get <not count> with
354 * perf command, which can give the user some hints.
355 */
356 ecmd_submit_sync(iommu, DMA_ECMD_ENABLE, hwc->idx, 0);
357
358 perf_event_update_userpage(event);
359}
360
361static void iommu_pmu_stop(struct perf_event *event, int flags)
362{
363 struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
364 struct intel_iommu *iommu = iommu_pmu->iommu;
365 struct hw_perf_event *hwc = &event->hw;
366
367 if (!(hwc->state & PERF_HES_STOPPED)) {
368 ecmd_submit_sync(iommu, DMA_ECMD_DISABLE, hwc->idx, 0);
369
370 iommu_pmu_event_update(event);
371
372 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
373 }
374}
375
376static inline int
377iommu_pmu_validate_per_cntr_event(struct iommu_pmu *iommu_pmu,
378 int idx, struct perf_event *event)
379{
380 u32 event_group = iommu_event_group(event->attr.config);
381 u32 select = iommu_event_select(event->attr.config);
382
383 if (!(iommu_pmu->cntr_evcap[idx][event_group] & select))
384 return -EINVAL;
385
386 return 0;
387}
388
389static int iommu_pmu_assign_event(struct iommu_pmu *iommu_pmu,
390 struct perf_event *event)
391{
392 struct hw_perf_event *hwc = &event->hw;
393 int idx;
394
395 /*
396 * The counters which support limited events are usually at the end.
397 * Schedule them first to accommodate more events.
398 */
399 for (idx = iommu_pmu->num_cntr - 1; idx >= 0; idx--) {
400 if (test_and_set_bit(idx, iommu_pmu->used_mask))
401 continue;
402 /* Check per-counter event capabilities */
403 if (!iommu_pmu_validate_per_cntr_event(iommu_pmu, idx, event))
404 break;
405 clear_bit(idx, iommu_pmu->used_mask);
406 }
407 if (idx < 0)
408 return -EINVAL;
409
410 iommu_pmu->event_list[idx] = event;
411 hwc->idx = idx;
412
413 /* config events */
414 dmar_writeq(iommu_config_base(iommu_pmu, idx), hwc->config);
415
416 iommu_pmu_set_filter(requester_id, event->attr.config1,
417 IOMMU_PMU_FILTER_REQUESTER_ID, idx,
418 event->attr.config1);
419 iommu_pmu_set_filter(domain, event->attr.config1,
420 IOMMU_PMU_FILTER_DOMAIN, idx,
421 event->attr.config1);
422 iommu_pmu_set_filter(pasid, event->attr.config2,
423 IOMMU_PMU_FILTER_PASID, idx,
424 event->attr.config1);
425 iommu_pmu_set_filter(ats, event->attr.config2,
426 IOMMU_PMU_FILTER_ATS, idx,
427 event->attr.config1);
428 iommu_pmu_set_filter(page_table, event->attr.config2,
429 IOMMU_PMU_FILTER_PAGE_TABLE, idx,
430 event->attr.config1);
431
432 return 0;
433}
434
435static int iommu_pmu_add(struct perf_event *event, int flags)
436{
437 struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
438 struct hw_perf_event *hwc = &event->hw;
439 int ret;
440
441 ret = iommu_pmu_assign_event(iommu_pmu, event);
442 if (ret < 0)
443 return ret;
444
445 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
446
447 if (flags & PERF_EF_START)
448 iommu_pmu_start(event, 0);
449
450 return 0;
451}
452
453static void iommu_pmu_del(struct perf_event *event, int flags)
454{
455 struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
456 int idx = event->hw.idx;
457
458 iommu_pmu_stop(event, PERF_EF_UPDATE);
459
460 iommu_pmu_clear_filter(IOMMU_PMU_FILTER_REQUESTER_ID, idx);
461 iommu_pmu_clear_filter(IOMMU_PMU_FILTER_DOMAIN, idx);
462 iommu_pmu_clear_filter(IOMMU_PMU_FILTER_PASID, idx);
463 iommu_pmu_clear_filter(IOMMU_PMU_FILTER_ATS, idx);
464 iommu_pmu_clear_filter(IOMMU_PMU_FILTER_PAGE_TABLE, idx);
465
466 iommu_pmu->event_list[idx] = NULL;
467 event->hw.idx = -1;
468 clear_bit(idx, iommu_pmu->used_mask);
469
470 perf_event_update_userpage(event);
471}
472
473static void iommu_pmu_enable(struct pmu *pmu)
474{
475 struct iommu_pmu *iommu_pmu = container_of(pmu, struct iommu_pmu, pmu);
476 struct intel_iommu *iommu = iommu_pmu->iommu;
477
478 ecmd_submit_sync(iommu, DMA_ECMD_UNFREEZE, 0, 0);
479}
480
481static void iommu_pmu_disable(struct pmu *pmu)
482{
483 struct iommu_pmu *iommu_pmu = container_of(pmu, struct iommu_pmu, pmu);
484 struct intel_iommu *iommu = iommu_pmu->iommu;
485
486 ecmd_submit_sync(iommu, DMA_ECMD_FREEZE, 0, 0);
487}
488
489static void iommu_pmu_counter_overflow(struct iommu_pmu *iommu_pmu)
490{
491 struct perf_event *event;
492 u64 status;
493 int i;
494
495 /*
496 * Two counters may be overflowed very close. Always check
497 * whether there are more to handle.
498 */
499 while ((status = dmar_readq(iommu_pmu->overflow))) {
500 for_each_set_bit(i, (unsigned long *)&status, iommu_pmu->num_cntr) {
501 /*
502 * Find the assigned event of the counter.
503 * Accumulate the value into the event->count.
504 */
505 event = iommu_pmu->event_list[i];
506 if (!event) {
507 pr_warn_once("Cannot find the assigned event for counter %d\n", i);
508 continue;
509 }
510 iommu_pmu_event_update(event);
511 }
512
513 dmar_writeq(iommu_pmu->overflow, status);
514 }
515}
516
517static irqreturn_t iommu_pmu_irq_handler(int irq, void *dev_id)
518{
519 struct intel_iommu *iommu = dev_id;
520
521 if (!dmar_readl(iommu->reg + DMAR_PERFINTRSTS_REG))
522 return IRQ_NONE;
523
524 iommu_pmu_counter_overflow(iommu->pmu);
525
526 /* Clear the status bit */
527 dmar_writel(iommu->reg + DMAR_PERFINTRSTS_REG, DMA_PERFINTRSTS_PIS);
528
529 return IRQ_HANDLED;
530}
531
532static int __iommu_pmu_register(struct intel_iommu *iommu)
533{
534 struct iommu_pmu *iommu_pmu = iommu->pmu;
535
536 iommu_pmu->pmu.name = iommu->name;
537 iommu_pmu->pmu.task_ctx_nr = perf_invalid_context;
538 iommu_pmu->pmu.event_init = iommu_pmu_event_init;
539 iommu_pmu->pmu.pmu_enable = iommu_pmu_enable;
540 iommu_pmu->pmu.pmu_disable = iommu_pmu_disable;
541 iommu_pmu->pmu.add = iommu_pmu_add;
542 iommu_pmu->pmu.del = iommu_pmu_del;
543 iommu_pmu->pmu.start = iommu_pmu_start;
544 iommu_pmu->pmu.stop = iommu_pmu_stop;
545 iommu_pmu->pmu.read = iommu_pmu_event_update;
546 iommu_pmu->pmu.attr_groups = iommu_pmu_attr_groups;
547 iommu_pmu->pmu.attr_update = iommu_pmu_attr_update;
548 iommu_pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
549 iommu_pmu->pmu.scope = PERF_PMU_SCOPE_SYS_WIDE;
550 iommu_pmu->pmu.module = THIS_MODULE;
551
552 return perf_pmu_register(&iommu_pmu->pmu, iommu_pmu->pmu.name, -1);
553}
554
555static inline void __iomem *
556get_perf_reg_address(struct intel_iommu *iommu, u32 offset)
557{
558 u32 off = dmar_readl(iommu->reg + offset);
559
560 return iommu->reg + off;
561}
562
563int alloc_iommu_pmu(struct intel_iommu *iommu)
564{
565 struct iommu_pmu *iommu_pmu;
566 int i, j, ret;
567 u64 perfcap;
568 u32 cap;
569
570 if (!ecap_pms(iommu->ecap))
571 return 0;
572
573 /* The IOMMU PMU requires the ECMD support as well */
574 if (!cap_ecmds(iommu->cap))
575 return -ENODEV;
576
577 perfcap = dmar_readq(iommu->reg + DMAR_PERFCAP_REG);
578 /* The performance monitoring is not supported. */
579 if (!perfcap)
580 return -ENODEV;
581
582 /* Sanity check for the number of the counters and event groups */
583 if (!pcap_num_cntr(perfcap) || !pcap_num_event_group(perfcap))
584 return -ENODEV;
585
586 /* The interrupt on overflow is required */
587 if (!pcap_interrupt(perfcap))
588 return -ENODEV;
589
590 /* Check required Enhanced Command Capability */
591 if (!ecmd_has_pmu_essential(iommu))
592 return -ENODEV;
593
594 iommu_pmu = kzalloc(sizeof(*iommu_pmu), GFP_KERNEL);
595 if (!iommu_pmu)
596 return -ENOMEM;
597
598 iommu_pmu->num_cntr = pcap_num_cntr(perfcap);
599 if (iommu_pmu->num_cntr > IOMMU_PMU_IDX_MAX) {
600 pr_warn_once("The number of IOMMU counters %d > max(%d), clipping!",
601 iommu_pmu->num_cntr, IOMMU_PMU_IDX_MAX);
602 iommu_pmu->num_cntr = IOMMU_PMU_IDX_MAX;
603 }
604
605 iommu_pmu->cntr_width = pcap_cntr_width(perfcap);
606 iommu_pmu->filter = pcap_filters_mask(perfcap);
607 iommu_pmu->cntr_stride = pcap_cntr_stride(perfcap);
608 iommu_pmu->num_eg = pcap_num_event_group(perfcap);
609
610 iommu_pmu->evcap = kcalloc(iommu_pmu->num_eg, sizeof(u64), GFP_KERNEL);
611 if (!iommu_pmu->evcap) {
612 ret = -ENOMEM;
613 goto free_pmu;
614 }
615
616 /* Parse event group capabilities */
617 for (i = 0; i < iommu_pmu->num_eg; i++) {
618 u64 pcap;
619
620 pcap = dmar_readq(iommu->reg + DMAR_PERFEVNTCAP_REG +
621 i * IOMMU_PMU_CAP_REGS_STEP);
622 iommu_pmu->evcap[i] = pecap_es(pcap);
623 }
624
625 iommu_pmu->cntr_evcap = kcalloc(iommu_pmu->num_cntr, sizeof(u32 *), GFP_KERNEL);
626 if (!iommu_pmu->cntr_evcap) {
627 ret = -ENOMEM;
628 goto free_pmu_evcap;
629 }
630 for (i = 0; i < iommu_pmu->num_cntr; i++) {
631 iommu_pmu->cntr_evcap[i] = kcalloc(iommu_pmu->num_eg, sizeof(u32), GFP_KERNEL);
632 if (!iommu_pmu->cntr_evcap[i]) {
633 ret = -ENOMEM;
634 goto free_pmu_cntr_evcap;
635 }
636 /*
637 * Set to the global capabilities, will adjust according
638 * to per-counter capabilities later.
639 */
640 for (j = 0; j < iommu_pmu->num_eg; j++)
641 iommu_pmu->cntr_evcap[i][j] = (u32)iommu_pmu->evcap[j];
642 }
643
644 iommu_pmu->cfg_reg = get_perf_reg_address(iommu, DMAR_PERFCFGOFF_REG);
645 iommu_pmu->cntr_reg = get_perf_reg_address(iommu, DMAR_PERFCNTROFF_REG);
646 iommu_pmu->overflow = get_perf_reg_address(iommu, DMAR_PERFOVFOFF_REG);
647
648 /*
649 * Check per-counter capabilities. All counters should have the
650 * same capabilities on Interrupt on Overflow Support and Counter
651 * Width.
652 */
653 for (i = 0; i < iommu_pmu->num_cntr; i++) {
654 cap = dmar_readl(iommu_pmu->cfg_reg +
655 i * IOMMU_PMU_CFG_OFFSET +
656 IOMMU_PMU_CFG_CNTRCAP_OFFSET);
657 if (!iommu_cntrcap_pcc(cap))
658 continue;
659
660 /*
661 * It's possible that some counters have a different
662 * capability because of e.g., HW bug. Check the corner
663 * case here and simply drop those counters.
664 */
665 if ((iommu_cntrcap_cw(cap) != iommu_pmu->cntr_width) ||
666 !iommu_cntrcap_ios(cap)) {
667 iommu_pmu->num_cntr = i;
668 pr_warn("PMU counter capability inconsistent, counter number reduced to %d\n",
669 iommu_pmu->num_cntr);
670 }
671
672 /* Clear the pre-defined events group */
673 for (j = 0; j < iommu_pmu->num_eg; j++)
674 iommu_pmu->cntr_evcap[i][j] = 0;
675
676 /* Override with per-counter event capabilities */
677 for (j = 0; j < iommu_cntrcap_egcnt(cap); j++) {
678 cap = dmar_readl(iommu_pmu->cfg_reg + i * IOMMU_PMU_CFG_OFFSET +
679 IOMMU_PMU_CFG_CNTREVCAP_OFFSET +
680 (j * IOMMU_PMU_OFF_REGS_STEP));
681 iommu_pmu->cntr_evcap[i][iommu_event_group(cap)] = iommu_event_select(cap);
682 /*
683 * Some events may only be supported by a specific counter.
684 * Track them in the evcap as well.
685 */
686 iommu_pmu->evcap[iommu_event_group(cap)] |= iommu_event_select(cap);
687 }
688 }
689
690 iommu_pmu->iommu = iommu;
691 iommu->pmu = iommu_pmu;
692
693 return 0;
694
695free_pmu_cntr_evcap:
696 for (i = 0; i < iommu_pmu->num_cntr; i++)
697 kfree(iommu_pmu->cntr_evcap[i]);
698 kfree(iommu_pmu->cntr_evcap);
699free_pmu_evcap:
700 kfree(iommu_pmu->evcap);
701free_pmu:
702 kfree(iommu_pmu);
703
704 return ret;
705}
706
707void free_iommu_pmu(struct intel_iommu *iommu)
708{
709 struct iommu_pmu *iommu_pmu = iommu->pmu;
710
711 if (!iommu_pmu)
712 return;
713
714 if (iommu_pmu->evcap) {
715 int i;
716
717 for (i = 0; i < iommu_pmu->num_cntr; i++)
718 kfree(iommu_pmu->cntr_evcap[i]);
719 kfree(iommu_pmu->cntr_evcap);
720 }
721 kfree(iommu_pmu->evcap);
722 kfree(iommu_pmu);
723 iommu->pmu = NULL;
724}
725
726static int iommu_pmu_set_interrupt(struct intel_iommu *iommu)
727{
728 struct iommu_pmu *iommu_pmu = iommu->pmu;
729 int irq, ret;
730
731 irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PERF + iommu->seq_id, iommu->node, iommu);
732 if (irq <= 0)
733 return -EINVAL;
734
735 snprintf(iommu_pmu->irq_name, sizeof(iommu_pmu->irq_name), "dmar%d-perf", iommu->seq_id);
736
737 iommu->perf_irq = irq;
738 ret = request_threaded_irq(irq, NULL, iommu_pmu_irq_handler,
739 IRQF_ONESHOT, iommu_pmu->irq_name, iommu);
740 if (ret) {
741 dmar_free_hwirq(irq);
742 iommu->perf_irq = 0;
743 return ret;
744 }
745 return 0;
746}
747
748static void iommu_pmu_unset_interrupt(struct intel_iommu *iommu)
749{
750 if (!iommu->perf_irq)
751 return;
752
753 free_irq(iommu->perf_irq, iommu);
754 dmar_free_hwirq(iommu->perf_irq);
755 iommu->perf_irq = 0;
756}
757
758void iommu_pmu_register(struct intel_iommu *iommu)
759{
760 struct iommu_pmu *iommu_pmu = iommu->pmu;
761
762 if (!iommu_pmu)
763 return;
764
765 if (__iommu_pmu_register(iommu))
766 goto err;
767
768 /* Set interrupt for overflow */
769 if (iommu_pmu_set_interrupt(iommu))
770 goto unregister;
771
772 return;
773
774unregister:
775 perf_pmu_unregister(&iommu_pmu->pmu);
776err:
777 pr_err("Failed to register PMU for iommu (seq_id = %d)\n", iommu->seq_id);
778 free_iommu_pmu(iommu);
779}
780
781void iommu_pmu_unregister(struct intel_iommu *iommu)
782{
783 struct iommu_pmu *iommu_pmu = iommu->pmu;
784
785 if (!iommu_pmu)
786 return;
787
788 iommu_pmu_unset_interrupt(iommu);
789 perf_pmu_unregister(&iommu_pmu->pmu);
790}
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Support Intel IOMMU PerfMon
4 * Copyright(c) 2023 Intel Corporation.
5 */
6#define pr_fmt(fmt) "DMAR: " fmt
7#define dev_fmt(fmt) pr_fmt(fmt)
8
9#include <linux/dmar.h>
10#include "iommu.h"
11#include "perfmon.h"
12
13PMU_FORMAT_ATTR(event, "config:0-27"); /* ES: Events Select */
14PMU_FORMAT_ATTR(event_group, "config:28-31"); /* EGI: Event Group Index */
15
16static struct attribute *iommu_pmu_format_attrs[] = {
17 &format_attr_event_group.attr,
18 &format_attr_event.attr,
19 NULL
20};
21
22static struct attribute_group iommu_pmu_format_attr_group = {
23 .name = "format",
24 .attrs = iommu_pmu_format_attrs,
25};
26
27/* The available events are added in attr_update later */
28static struct attribute *attrs_empty[] = {
29 NULL
30};
31
32static struct attribute_group iommu_pmu_events_attr_group = {
33 .name = "events",
34 .attrs = attrs_empty,
35};
36
37static cpumask_t iommu_pmu_cpu_mask;
38
39static ssize_t
40cpumask_show(struct device *dev, struct device_attribute *attr, char *buf)
41{
42 return cpumap_print_to_pagebuf(true, buf, &iommu_pmu_cpu_mask);
43}
44static DEVICE_ATTR_RO(cpumask);
45
46static struct attribute *iommu_pmu_cpumask_attrs[] = {
47 &dev_attr_cpumask.attr,
48 NULL
49};
50
51static struct attribute_group iommu_pmu_cpumask_attr_group = {
52 .attrs = iommu_pmu_cpumask_attrs,
53};
54
55static const struct attribute_group *iommu_pmu_attr_groups[] = {
56 &iommu_pmu_format_attr_group,
57 &iommu_pmu_events_attr_group,
58 &iommu_pmu_cpumask_attr_group,
59 NULL
60};
61
62static inline struct iommu_pmu *dev_to_iommu_pmu(struct device *dev)
63{
64 /*
65 * The perf_event creates its own dev for each PMU.
66 * See pmu_dev_alloc()
67 */
68 return container_of(dev_get_drvdata(dev), struct iommu_pmu, pmu);
69}
70
71#define IOMMU_PMU_ATTR(_name, _format, _filter) \
72 PMU_FORMAT_ATTR(_name, _format); \
73 \
74static struct attribute *_name##_attr[] = { \
75 &format_attr_##_name.attr, \
76 NULL \
77}; \
78 \
79static umode_t \
80_name##_is_visible(struct kobject *kobj, struct attribute *attr, int i) \
81{ \
82 struct device *dev = kobj_to_dev(kobj); \
83 struct iommu_pmu *iommu_pmu = dev_to_iommu_pmu(dev); \
84 \
85 if (!iommu_pmu) \
86 return 0; \
87 return (iommu_pmu->filter & _filter) ? attr->mode : 0; \
88} \
89 \
90static struct attribute_group _name = { \
91 .name = "format", \
92 .attrs = _name##_attr, \
93 .is_visible = _name##_is_visible, \
94};
95
96IOMMU_PMU_ATTR(filter_requester_id_en, "config1:0", IOMMU_PMU_FILTER_REQUESTER_ID);
97IOMMU_PMU_ATTR(filter_domain_en, "config1:1", IOMMU_PMU_FILTER_DOMAIN);
98IOMMU_PMU_ATTR(filter_pasid_en, "config1:2", IOMMU_PMU_FILTER_PASID);
99IOMMU_PMU_ATTR(filter_ats_en, "config1:3", IOMMU_PMU_FILTER_ATS);
100IOMMU_PMU_ATTR(filter_page_table_en, "config1:4", IOMMU_PMU_FILTER_PAGE_TABLE);
101IOMMU_PMU_ATTR(filter_requester_id, "config1:16-31", IOMMU_PMU_FILTER_REQUESTER_ID);
102IOMMU_PMU_ATTR(filter_domain, "config1:32-47", IOMMU_PMU_FILTER_DOMAIN);
103IOMMU_PMU_ATTR(filter_pasid, "config2:0-21", IOMMU_PMU_FILTER_PASID);
104IOMMU_PMU_ATTR(filter_ats, "config2:24-28", IOMMU_PMU_FILTER_ATS);
105IOMMU_PMU_ATTR(filter_page_table, "config2:32-36", IOMMU_PMU_FILTER_PAGE_TABLE);
106
107#define iommu_pmu_en_requester_id(e) ((e) & 0x1)
108#define iommu_pmu_en_domain(e) (((e) >> 1) & 0x1)
109#define iommu_pmu_en_pasid(e) (((e) >> 2) & 0x1)
110#define iommu_pmu_en_ats(e) (((e) >> 3) & 0x1)
111#define iommu_pmu_en_page_table(e) (((e) >> 4) & 0x1)
112#define iommu_pmu_get_requester_id(filter) (((filter) >> 16) & 0xffff)
113#define iommu_pmu_get_domain(filter) (((filter) >> 32) & 0xffff)
114#define iommu_pmu_get_pasid(filter) ((filter) & 0x3fffff)
115#define iommu_pmu_get_ats(filter) (((filter) >> 24) & 0x1f)
116#define iommu_pmu_get_page_table(filter) (((filter) >> 32) & 0x1f)
117
118#define iommu_pmu_set_filter(_name, _config, _filter, _idx, _econfig) \
119{ \
120 if ((iommu_pmu->filter & _filter) && iommu_pmu_en_##_name(_econfig)) { \
121 dmar_writel(iommu_pmu->cfg_reg + _idx * IOMMU_PMU_CFG_OFFSET + \
122 IOMMU_PMU_CFG_SIZE + \
123 (ffs(_filter) - 1) * IOMMU_PMU_CFG_FILTERS_OFFSET, \
124 iommu_pmu_get_##_name(_config) | IOMMU_PMU_FILTER_EN);\
125 } \
126}
127
128#define iommu_pmu_clear_filter(_filter, _idx) \
129{ \
130 if (iommu_pmu->filter & _filter) { \
131 dmar_writel(iommu_pmu->cfg_reg + _idx * IOMMU_PMU_CFG_OFFSET + \
132 IOMMU_PMU_CFG_SIZE + \
133 (ffs(_filter) - 1) * IOMMU_PMU_CFG_FILTERS_OFFSET, \
134 0); \
135 } \
136}
137
138/*
139 * Define the event attr related functions
140 * Input: _name: event attr name
141 * _string: string of the event in sysfs
142 * _g_idx: event group encoding
143 * _event: event encoding
144 */
145#define IOMMU_PMU_EVENT_ATTR(_name, _string, _g_idx, _event) \
146 PMU_EVENT_ATTR_STRING(_name, event_attr_##_name, _string) \
147 \
148static struct attribute *_name##_attr[] = { \
149 &event_attr_##_name.attr.attr, \
150 NULL \
151}; \
152 \
153static umode_t \
154_name##_is_visible(struct kobject *kobj, struct attribute *attr, int i) \
155{ \
156 struct device *dev = kobj_to_dev(kobj); \
157 struct iommu_pmu *iommu_pmu = dev_to_iommu_pmu(dev); \
158 \
159 if (!iommu_pmu) \
160 return 0; \
161 return (iommu_pmu->evcap[_g_idx] & _event) ? attr->mode : 0; \
162} \
163 \
164static struct attribute_group _name = { \
165 .name = "events", \
166 .attrs = _name##_attr, \
167 .is_visible = _name##_is_visible, \
168};
169
170IOMMU_PMU_EVENT_ATTR(iommu_clocks, "event_group=0x0,event=0x001", 0x0, 0x001)
171IOMMU_PMU_EVENT_ATTR(iommu_requests, "event_group=0x0,event=0x002", 0x0, 0x002)
172IOMMU_PMU_EVENT_ATTR(pw_occupancy, "event_group=0x0,event=0x004", 0x0, 0x004)
173IOMMU_PMU_EVENT_ATTR(ats_blocked, "event_group=0x0,event=0x008", 0x0, 0x008)
174IOMMU_PMU_EVENT_ATTR(iommu_mrds, "event_group=0x1,event=0x001", 0x1, 0x001)
175IOMMU_PMU_EVENT_ATTR(iommu_mem_blocked, "event_group=0x1,event=0x020", 0x1, 0x020)
176IOMMU_PMU_EVENT_ATTR(pg_req_posted, "event_group=0x1,event=0x040", 0x1, 0x040)
177IOMMU_PMU_EVENT_ATTR(ctxt_cache_lookup, "event_group=0x2,event=0x001", 0x2, 0x001)
178IOMMU_PMU_EVENT_ATTR(ctxt_cache_hit, "event_group=0x2,event=0x002", 0x2, 0x002)
179IOMMU_PMU_EVENT_ATTR(pasid_cache_lookup, "event_group=0x2,event=0x004", 0x2, 0x004)
180IOMMU_PMU_EVENT_ATTR(pasid_cache_hit, "event_group=0x2,event=0x008", 0x2, 0x008)
181IOMMU_PMU_EVENT_ATTR(ss_nonleaf_lookup, "event_group=0x2,event=0x010", 0x2, 0x010)
182IOMMU_PMU_EVENT_ATTR(ss_nonleaf_hit, "event_group=0x2,event=0x020", 0x2, 0x020)
183IOMMU_PMU_EVENT_ATTR(fs_nonleaf_lookup, "event_group=0x2,event=0x040", 0x2, 0x040)
184IOMMU_PMU_EVENT_ATTR(fs_nonleaf_hit, "event_group=0x2,event=0x080", 0x2, 0x080)
185IOMMU_PMU_EVENT_ATTR(hpt_nonleaf_lookup, "event_group=0x2,event=0x100", 0x2, 0x100)
186IOMMU_PMU_EVENT_ATTR(hpt_nonleaf_hit, "event_group=0x2,event=0x200", 0x2, 0x200)
187IOMMU_PMU_EVENT_ATTR(iotlb_lookup, "event_group=0x3,event=0x001", 0x3, 0x001)
188IOMMU_PMU_EVENT_ATTR(iotlb_hit, "event_group=0x3,event=0x002", 0x3, 0x002)
189IOMMU_PMU_EVENT_ATTR(hpt_leaf_lookup, "event_group=0x3,event=0x004", 0x3, 0x004)
190IOMMU_PMU_EVENT_ATTR(hpt_leaf_hit, "event_group=0x3,event=0x008", 0x3, 0x008)
191IOMMU_PMU_EVENT_ATTR(int_cache_lookup, "event_group=0x4,event=0x001", 0x4, 0x001)
192IOMMU_PMU_EVENT_ATTR(int_cache_hit_nonposted, "event_group=0x4,event=0x002", 0x4, 0x002)
193IOMMU_PMU_EVENT_ATTR(int_cache_hit_posted, "event_group=0x4,event=0x004", 0x4, 0x004)
194
195static const struct attribute_group *iommu_pmu_attr_update[] = {
196 &filter_requester_id_en,
197 &filter_domain_en,
198 &filter_pasid_en,
199 &filter_ats_en,
200 &filter_page_table_en,
201 &filter_requester_id,
202 &filter_domain,
203 &filter_pasid,
204 &filter_ats,
205 &filter_page_table,
206 &iommu_clocks,
207 &iommu_requests,
208 &pw_occupancy,
209 &ats_blocked,
210 &iommu_mrds,
211 &iommu_mem_blocked,
212 &pg_req_posted,
213 &ctxt_cache_lookup,
214 &ctxt_cache_hit,
215 &pasid_cache_lookup,
216 &pasid_cache_hit,
217 &ss_nonleaf_lookup,
218 &ss_nonleaf_hit,
219 &fs_nonleaf_lookup,
220 &fs_nonleaf_hit,
221 &hpt_nonleaf_lookup,
222 &hpt_nonleaf_hit,
223 &iotlb_lookup,
224 &iotlb_hit,
225 &hpt_leaf_lookup,
226 &hpt_leaf_hit,
227 &int_cache_lookup,
228 &int_cache_hit_nonposted,
229 &int_cache_hit_posted,
230 NULL
231};
232
233static inline void __iomem *
234iommu_event_base(struct iommu_pmu *iommu_pmu, int idx)
235{
236 return iommu_pmu->cntr_reg + idx * iommu_pmu->cntr_stride;
237}
238
239static inline void __iomem *
240iommu_config_base(struct iommu_pmu *iommu_pmu, int idx)
241{
242 return iommu_pmu->cfg_reg + idx * IOMMU_PMU_CFG_OFFSET;
243}
244
245static inline struct iommu_pmu *iommu_event_to_pmu(struct perf_event *event)
246{
247 return container_of(event->pmu, struct iommu_pmu, pmu);
248}
249
250static inline u64 iommu_event_config(struct perf_event *event)
251{
252 u64 config = event->attr.config;
253
254 return (iommu_event_select(config) << IOMMU_EVENT_CFG_ES_SHIFT) |
255 (iommu_event_group(config) << IOMMU_EVENT_CFG_EGI_SHIFT) |
256 IOMMU_EVENT_CFG_INT;
257}
258
259static inline bool is_iommu_pmu_event(struct iommu_pmu *iommu_pmu,
260 struct perf_event *event)
261{
262 return event->pmu == &iommu_pmu->pmu;
263}
264
265static int iommu_pmu_validate_event(struct perf_event *event)
266{
267 struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
268 u32 event_group = iommu_event_group(event->attr.config);
269
270 if (event_group >= iommu_pmu->num_eg)
271 return -EINVAL;
272
273 return 0;
274}
275
276static int iommu_pmu_validate_group(struct perf_event *event)
277{
278 struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
279 struct perf_event *sibling;
280 int nr = 0;
281
282 /*
283 * All events in a group must be scheduled simultaneously.
284 * Check whether there is enough counters for all the events.
285 */
286 for_each_sibling_event(sibling, event->group_leader) {
287 if (!is_iommu_pmu_event(iommu_pmu, sibling) ||
288 sibling->state <= PERF_EVENT_STATE_OFF)
289 continue;
290
291 if (++nr > iommu_pmu->num_cntr)
292 return -EINVAL;
293 }
294
295 return 0;
296}
297
298static int iommu_pmu_event_init(struct perf_event *event)
299{
300 struct hw_perf_event *hwc = &event->hw;
301
302 if (event->attr.type != event->pmu->type)
303 return -ENOENT;
304
305 /* sampling not supported */
306 if (event->attr.sample_period)
307 return -EINVAL;
308
309 if (event->cpu < 0)
310 return -EINVAL;
311
312 if (iommu_pmu_validate_event(event))
313 return -EINVAL;
314
315 hwc->config = iommu_event_config(event);
316
317 return iommu_pmu_validate_group(event);
318}
319
320static void iommu_pmu_event_update(struct perf_event *event)
321{
322 struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
323 struct hw_perf_event *hwc = &event->hw;
324 u64 prev_count, new_count, delta;
325 int shift = 64 - iommu_pmu->cntr_width;
326
327again:
328 prev_count = local64_read(&hwc->prev_count);
329 new_count = dmar_readq(iommu_event_base(iommu_pmu, hwc->idx));
330 if (local64_xchg(&hwc->prev_count, new_count) != prev_count)
331 goto again;
332
333 /*
334 * The counter width is enumerated. Always shift the counter
335 * before using it.
336 */
337 delta = (new_count << shift) - (prev_count << shift);
338 delta >>= shift;
339
340 local64_add(delta, &event->count);
341}
342
343static void iommu_pmu_start(struct perf_event *event, int flags)
344{
345 struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
346 struct intel_iommu *iommu = iommu_pmu->iommu;
347 struct hw_perf_event *hwc = &event->hw;
348 u64 count;
349
350 if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
351 return;
352
353 if (WARN_ON_ONCE(hwc->idx < 0 || hwc->idx >= IOMMU_PMU_IDX_MAX))
354 return;
355
356 if (flags & PERF_EF_RELOAD)
357 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
358
359 hwc->state = 0;
360
361 /* Always reprogram the period */
362 count = dmar_readq(iommu_event_base(iommu_pmu, hwc->idx));
363 local64_set((&hwc->prev_count), count);
364
365 /*
366 * The error of ecmd will be ignored.
367 * - The existing perf_event subsystem doesn't handle the error.
368 * Only IOMMU PMU returns runtime HW error. We don't want to
369 * change the existing generic interfaces for the specific case.
370 * - It's a corner case caused by HW, which is very unlikely to
371 * happen. There is nothing SW can do.
372 * - The worst case is that the user will get <not count> with
373 * perf command, which can give the user some hints.
374 */
375 ecmd_submit_sync(iommu, DMA_ECMD_ENABLE, hwc->idx, 0);
376
377 perf_event_update_userpage(event);
378}
379
380static void iommu_pmu_stop(struct perf_event *event, int flags)
381{
382 struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
383 struct intel_iommu *iommu = iommu_pmu->iommu;
384 struct hw_perf_event *hwc = &event->hw;
385
386 if (!(hwc->state & PERF_HES_STOPPED)) {
387 ecmd_submit_sync(iommu, DMA_ECMD_DISABLE, hwc->idx, 0);
388
389 iommu_pmu_event_update(event);
390
391 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
392 }
393}
394
395static inline int
396iommu_pmu_validate_per_cntr_event(struct iommu_pmu *iommu_pmu,
397 int idx, struct perf_event *event)
398{
399 u32 event_group = iommu_event_group(event->attr.config);
400 u32 select = iommu_event_select(event->attr.config);
401
402 if (!(iommu_pmu->cntr_evcap[idx][event_group] & select))
403 return -EINVAL;
404
405 return 0;
406}
407
408static int iommu_pmu_assign_event(struct iommu_pmu *iommu_pmu,
409 struct perf_event *event)
410{
411 struct hw_perf_event *hwc = &event->hw;
412 int idx;
413
414 /*
415 * The counters which support limited events are usually at the end.
416 * Schedule them first to accommodate more events.
417 */
418 for (idx = iommu_pmu->num_cntr - 1; idx >= 0; idx--) {
419 if (test_and_set_bit(idx, iommu_pmu->used_mask))
420 continue;
421 /* Check per-counter event capabilities */
422 if (!iommu_pmu_validate_per_cntr_event(iommu_pmu, idx, event))
423 break;
424 clear_bit(idx, iommu_pmu->used_mask);
425 }
426 if (idx < 0)
427 return -EINVAL;
428
429 iommu_pmu->event_list[idx] = event;
430 hwc->idx = idx;
431
432 /* config events */
433 dmar_writeq(iommu_config_base(iommu_pmu, idx), hwc->config);
434
435 iommu_pmu_set_filter(requester_id, event->attr.config1,
436 IOMMU_PMU_FILTER_REQUESTER_ID, idx,
437 event->attr.config1);
438 iommu_pmu_set_filter(domain, event->attr.config1,
439 IOMMU_PMU_FILTER_DOMAIN, idx,
440 event->attr.config1);
441 iommu_pmu_set_filter(pasid, event->attr.config1,
442 IOMMU_PMU_FILTER_PASID, idx,
443 event->attr.config1);
444 iommu_pmu_set_filter(ats, event->attr.config2,
445 IOMMU_PMU_FILTER_ATS, idx,
446 event->attr.config1);
447 iommu_pmu_set_filter(page_table, event->attr.config2,
448 IOMMU_PMU_FILTER_PAGE_TABLE, idx,
449 event->attr.config1);
450
451 return 0;
452}
453
454static int iommu_pmu_add(struct perf_event *event, int flags)
455{
456 struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
457 struct hw_perf_event *hwc = &event->hw;
458 int ret;
459
460 ret = iommu_pmu_assign_event(iommu_pmu, event);
461 if (ret < 0)
462 return ret;
463
464 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
465
466 if (flags & PERF_EF_START)
467 iommu_pmu_start(event, 0);
468
469 return 0;
470}
471
472static void iommu_pmu_del(struct perf_event *event, int flags)
473{
474 struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
475 int idx = event->hw.idx;
476
477 iommu_pmu_stop(event, PERF_EF_UPDATE);
478
479 iommu_pmu_clear_filter(IOMMU_PMU_FILTER_REQUESTER_ID, idx);
480 iommu_pmu_clear_filter(IOMMU_PMU_FILTER_DOMAIN, idx);
481 iommu_pmu_clear_filter(IOMMU_PMU_FILTER_PASID, idx);
482 iommu_pmu_clear_filter(IOMMU_PMU_FILTER_ATS, idx);
483 iommu_pmu_clear_filter(IOMMU_PMU_FILTER_PAGE_TABLE, idx);
484
485 iommu_pmu->event_list[idx] = NULL;
486 event->hw.idx = -1;
487 clear_bit(idx, iommu_pmu->used_mask);
488
489 perf_event_update_userpage(event);
490}
491
492static void iommu_pmu_enable(struct pmu *pmu)
493{
494 struct iommu_pmu *iommu_pmu = container_of(pmu, struct iommu_pmu, pmu);
495 struct intel_iommu *iommu = iommu_pmu->iommu;
496
497 ecmd_submit_sync(iommu, DMA_ECMD_UNFREEZE, 0, 0);
498}
499
500static void iommu_pmu_disable(struct pmu *pmu)
501{
502 struct iommu_pmu *iommu_pmu = container_of(pmu, struct iommu_pmu, pmu);
503 struct intel_iommu *iommu = iommu_pmu->iommu;
504
505 ecmd_submit_sync(iommu, DMA_ECMD_FREEZE, 0, 0);
506}
507
508static void iommu_pmu_counter_overflow(struct iommu_pmu *iommu_pmu)
509{
510 struct perf_event *event;
511 u64 status;
512 int i;
513
514 /*
515 * Two counters may be overflowed very close. Always check
516 * whether there are more to handle.
517 */
518 while ((status = dmar_readq(iommu_pmu->overflow))) {
519 for_each_set_bit(i, (unsigned long *)&status, iommu_pmu->num_cntr) {
520 /*
521 * Find the assigned event of the counter.
522 * Accumulate the value into the event->count.
523 */
524 event = iommu_pmu->event_list[i];
525 if (!event) {
526 pr_warn_once("Cannot find the assigned event for counter %d\n", i);
527 continue;
528 }
529 iommu_pmu_event_update(event);
530 }
531
532 dmar_writeq(iommu_pmu->overflow, status);
533 }
534}
535
536static irqreturn_t iommu_pmu_irq_handler(int irq, void *dev_id)
537{
538 struct intel_iommu *iommu = dev_id;
539
540 if (!dmar_readl(iommu->reg + DMAR_PERFINTRSTS_REG))
541 return IRQ_NONE;
542
543 iommu_pmu_counter_overflow(iommu->pmu);
544
545 /* Clear the status bit */
546 dmar_writel(iommu->reg + DMAR_PERFINTRSTS_REG, DMA_PERFINTRSTS_PIS);
547
548 return IRQ_HANDLED;
549}
550
551static int __iommu_pmu_register(struct intel_iommu *iommu)
552{
553 struct iommu_pmu *iommu_pmu = iommu->pmu;
554
555 iommu_pmu->pmu.name = iommu->name;
556 iommu_pmu->pmu.task_ctx_nr = perf_invalid_context;
557 iommu_pmu->pmu.event_init = iommu_pmu_event_init;
558 iommu_pmu->pmu.pmu_enable = iommu_pmu_enable;
559 iommu_pmu->pmu.pmu_disable = iommu_pmu_disable;
560 iommu_pmu->pmu.add = iommu_pmu_add;
561 iommu_pmu->pmu.del = iommu_pmu_del;
562 iommu_pmu->pmu.start = iommu_pmu_start;
563 iommu_pmu->pmu.stop = iommu_pmu_stop;
564 iommu_pmu->pmu.read = iommu_pmu_event_update;
565 iommu_pmu->pmu.attr_groups = iommu_pmu_attr_groups;
566 iommu_pmu->pmu.attr_update = iommu_pmu_attr_update;
567 iommu_pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
568 iommu_pmu->pmu.module = THIS_MODULE;
569
570 return perf_pmu_register(&iommu_pmu->pmu, iommu_pmu->pmu.name, -1);
571}
572
573static inline void __iomem *
574get_perf_reg_address(struct intel_iommu *iommu, u32 offset)
575{
576 u32 off = dmar_readl(iommu->reg + offset);
577
578 return iommu->reg + off;
579}
580
581int alloc_iommu_pmu(struct intel_iommu *iommu)
582{
583 struct iommu_pmu *iommu_pmu;
584 int i, j, ret;
585 u64 perfcap;
586 u32 cap;
587
588 if (!ecap_pms(iommu->ecap))
589 return 0;
590
591 /* The IOMMU PMU requires the ECMD support as well */
592 if (!cap_ecmds(iommu->cap))
593 return -ENODEV;
594
595 perfcap = dmar_readq(iommu->reg + DMAR_PERFCAP_REG);
596 /* The performance monitoring is not supported. */
597 if (!perfcap)
598 return -ENODEV;
599
600 /* Sanity check for the number of the counters and event groups */
601 if (!pcap_num_cntr(perfcap) || !pcap_num_event_group(perfcap))
602 return -ENODEV;
603
604 /* The interrupt on overflow is required */
605 if (!pcap_interrupt(perfcap))
606 return -ENODEV;
607
608 /* Check required Enhanced Command Capability */
609 if (!ecmd_has_pmu_essential(iommu))
610 return -ENODEV;
611
612 iommu_pmu = kzalloc(sizeof(*iommu_pmu), GFP_KERNEL);
613 if (!iommu_pmu)
614 return -ENOMEM;
615
616 iommu_pmu->num_cntr = pcap_num_cntr(perfcap);
617 if (iommu_pmu->num_cntr > IOMMU_PMU_IDX_MAX) {
618 pr_warn_once("The number of IOMMU counters %d > max(%d), clipping!",
619 iommu_pmu->num_cntr, IOMMU_PMU_IDX_MAX);
620 iommu_pmu->num_cntr = IOMMU_PMU_IDX_MAX;
621 }
622
623 iommu_pmu->cntr_width = pcap_cntr_width(perfcap);
624 iommu_pmu->filter = pcap_filters_mask(perfcap);
625 iommu_pmu->cntr_stride = pcap_cntr_stride(perfcap);
626 iommu_pmu->num_eg = pcap_num_event_group(perfcap);
627
628 iommu_pmu->evcap = kcalloc(iommu_pmu->num_eg, sizeof(u64), GFP_KERNEL);
629 if (!iommu_pmu->evcap) {
630 ret = -ENOMEM;
631 goto free_pmu;
632 }
633
634 /* Parse event group capabilities */
635 for (i = 0; i < iommu_pmu->num_eg; i++) {
636 u64 pcap;
637
638 pcap = dmar_readq(iommu->reg + DMAR_PERFEVNTCAP_REG +
639 i * IOMMU_PMU_CAP_REGS_STEP);
640 iommu_pmu->evcap[i] = pecap_es(pcap);
641 }
642
643 iommu_pmu->cntr_evcap = kcalloc(iommu_pmu->num_cntr, sizeof(u32 *), GFP_KERNEL);
644 if (!iommu_pmu->cntr_evcap) {
645 ret = -ENOMEM;
646 goto free_pmu_evcap;
647 }
648 for (i = 0; i < iommu_pmu->num_cntr; i++) {
649 iommu_pmu->cntr_evcap[i] = kcalloc(iommu_pmu->num_eg, sizeof(u32), GFP_KERNEL);
650 if (!iommu_pmu->cntr_evcap[i]) {
651 ret = -ENOMEM;
652 goto free_pmu_cntr_evcap;
653 }
654 /*
655 * Set to the global capabilities, will adjust according
656 * to per-counter capabilities later.
657 */
658 for (j = 0; j < iommu_pmu->num_eg; j++)
659 iommu_pmu->cntr_evcap[i][j] = (u32)iommu_pmu->evcap[j];
660 }
661
662 iommu_pmu->cfg_reg = get_perf_reg_address(iommu, DMAR_PERFCFGOFF_REG);
663 iommu_pmu->cntr_reg = get_perf_reg_address(iommu, DMAR_PERFCNTROFF_REG);
664 iommu_pmu->overflow = get_perf_reg_address(iommu, DMAR_PERFOVFOFF_REG);
665
666 /*
667 * Check per-counter capabilities. All counters should have the
668 * same capabilities on Interrupt on Overflow Support and Counter
669 * Width.
670 */
671 for (i = 0; i < iommu_pmu->num_cntr; i++) {
672 cap = dmar_readl(iommu_pmu->cfg_reg +
673 i * IOMMU_PMU_CFG_OFFSET +
674 IOMMU_PMU_CFG_CNTRCAP_OFFSET);
675 if (!iommu_cntrcap_pcc(cap))
676 continue;
677
678 /*
679 * It's possible that some counters have a different
680 * capability because of e.g., HW bug. Check the corner
681 * case here and simply drop those counters.
682 */
683 if ((iommu_cntrcap_cw(cap) != iommu_pmu->cntr_width) ||
684 !iommu_cntrcap_ios(cap)) {
685 iommu_pmu->num_cntr = i;
686 pr_warn("PMU counter capability inconsistent, counter number reduced to %d\n",
687 iommu_pmu->num_cntr);
688 }
689
690 /* Clear the pre-defined events group */
691 for (j = 0; j < iommu_pmu->num_eg; j++)
692 iommu_pmu->cntr_evcap[i][j] = 0;
693
694 /* Override with per-counter event capabilities */
695 for (j = 0; j < iommu_cntrcap_egcnt(cap); j++) {
696 cap = dmar_readl(iommu_pmu->cfg_reg + i * IOMMU_PMU_CFG_OFFSET +
697 IOMMU_PMU_CFG_CNTREVCAP_OFFSET +
698 (j * IOMMU_PMU_OFF_REGS_STEP));
699 iommu_pmu->cntr_evcap[i][iommu_event_group(cap)] = iommu_event_select(cap);
700 /*
701 * Some events may only be supported by a specific counter.
702 * Track them in the evcap as well.
703 */
704 iommu_pmu->evcap[iommu_event_group(cap)] |= iommu_event_select(cap);
705 }
706 }
707
708 iommu_pmu->iommu = iommu;
709 iommu->pmu = iommu_pmu;
710
711 return 0;
712
713free_pmu_cntr_evcap:
714 for (i = 0; i < iommu_pmu->num_cntr; i++)
715 kfree(iommu_pmu->cntr_evcap[i]);
716 kfree(iommu_pmu->cntr_evcap);
717free_pmu_evcap:
718 kfree(iommu_pmu->evcap);
719free_pmu:
720 kfree(iommu_pmu);
721
722 return ret;
723}
724
725void free_iommu_pmu(struct intel_iommu *iommu)
726{
727 struct iommu_pmu *iommu_pmu = iommu->pmu;
728
729 if (!iommu_pmu)
730 return;
731
732 if (iommu_pmu->evcap) {
733 int i;
734
735 for (i = 0; i < iommu_pmu->num_cntr; i++)
736 kfree(iommu_pmu->cntr_evcap[i]);
737 kfree(iommu_pmu->cntr_evcap);
738 }
739 kfree(iommu_pmu->evcap);
740 kfree(iommu_pmu);
741 iommu->pmu = NULL;
742}
743
744static int iommu_pmu_set_interrupt(struct intel_iommu *iommu)
745{
746 struct iommu_pmu *iommu_pmu = iommu->pmu;
747 int irq, ret;
748
749 irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PERF + iommu->seq_id, iommu->node, iommu);
750 if (irq <= 0)
751 return -EINVAL;
752
753 snprintf(iommu_pmu->irq_name, sizeof(iommu_pmu->irq_name), "dmar%d-perf", iommu->seq_id);
754
755 iommu->perf_irq = irq;
756 ret = request_threaded_irq(irq, NULL, iommu_pmu_irq_handler,
757 IRQF_ONESHOT, iommu_pmu->irq_name, iommu);
758 if (ret) {
759 dmar_free_hwirq(irq);
760 iommu->perf_irq = 0;
761 return ret;
762 }
763 return 0;
764}
765
766static void iommu_pmu_unset_interrupt(struct intel_iommu *iommu)
767{
768 if (!iommu->perf_irq)
769 return;
770
771 free_irq(iommu->perf_irq, iommu);
772 dmar_free_hwirq(iommu->perf_irq);
773 iommu->perf_irq = 0;
774}
775
776static int iommu_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
777{
778 struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node);
779
780 if (cpumask_empty(&iommu_pmu_cpu_mask))
781 cpumask_set_cpu(cpu, &iommu_pmu_cpu_mask);
782
783 if (cpumask_test_cpu(cpu, &iommu_pmu_cpu_mask))
784 iommu_pmu->cpu = cpu;
785
786 return 0;
787}
788
789static int iommu_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
790{
791 struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node);
792 int target = cpumask_first(&iommu_pmu_cpu_mask);
793
794 /*
795 * The iommu_pmu_cpu_mask has been updated when offline the CPU
796 * for the first iommu_pmu. Migrate the other iommu_pmu to the
797 * new target.
798 */
799 if (target < nr_cpu_ids && target != iommu_pmu->cpu) {
800 perf_pmu_migrate_context(&iommu_pmu->pmu, cpu, target);
801 iommu_pmu->cpu = target;
802 return 0;
803 }
804
805 if (!cpumask_test_and_clear_cpu(cpu, &iommu_pmu_cpu_mask))
806 return 0;
807
808 target = cpumask_any_but(cpu_online_mask, cpu);
809
810 if (target < nr_cpu_ids)
811 cpumask_set_cpu(target, &iommu_pmu_cpu_mask);
812 else
813 return 0;
814
815 perf_pmu_migrate_context(&iommu_pmu->pmu, cpu, target);
816 iommu_pmu->cpu = target;
817
818 return 0;
819}
820
821static int nr_iommu_pmu;
822static enum cpuhp_state iommu_cpuhp_slot;
823
824static int iommu_pmu_cpuhp_setup(struct iommu_pmu *iommu_pmu)
825{
826 int ret;
827
828 if (!nr_iommu_pmu) {
829 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
830 "driver/iommu/intel/perfmon:online",
831 iommu_pmu_cpu_online,
832 iommu_pmu_cpu_offline);
833 if (ret < 0)
834 return ret;
835 iommu_cpuhp_slot = ret;
836 }
837
838 ret = cpuhp_state_add_instance(iommu_cpuhp_slot, &iommu_pmu->cpuhp_node);
839 if (ret) {
840 if (!nr_iommu_pmu)
841 cpuhp_remove_multi_state(iommu_cpuhp_slot);
842 return ret;
843 }
844 nr_iommu_pmu++;
845
846 return 0;
847}
848
849static void iommu_pmu_cpuhp_free(struct iommu_pmu *iommu_pmu)
850{
851 cpuhp_state_remove_instance(iommu_cpuhp_slot, &iommu_pmu->cpuhp_node);
852
853 if (--nr_iommu_pmu)
854 return;
855
856 cpuhp_remove_multi_state(iommu_cpuhp_slot);
857}
858
859void iommu_pmu_register(struct intel_iommu *iommu)
860{
861 struct iommu_pmu *iommu_pmu = iommu->pmu;
862
863 if (!iommu_pmu)
864 return;
865
866 if (__iommu_pmu_register(iommu))
867 goto err;
868
869 if (iommu_pmu_cpuhp_setup(iommu_pmu))
870 goto unregister;
871
872 /* Set interrupt for overflow */
873 if (iommu_pmu_set_interrupt(iommu))
874 goto cpuhp_free;
875
876 return;
877
878cpuhp_free:
879 iommu_pmu_cpuhp_free(iommu_pmu);
880unregister:
881 perf_pmu_unregister(&iommu_pmu->pmu);
882err:
883 pr_err("Failed to register PMU for iommu (seq_id = %d)\n", iommu->seq_id);
884 free_iommu_pmu(iommu);
885}
886
887void iommu_pmu_unregister(struct intel_iommu *iommu)
888{
889 struct iommu_pmu *iommu_pmu = iommu->pmu;
890
891 if (!iommu_pmu)
892 return;
893
894 iommu_pmu_unset_interrupt(iommu);
895 iommu_pmu_cpuhp_free(iommu_pmu);
896 perf_pmu_unregister(&iommu_pmu->pmu);
897}