Linux Audio

Check our new training course

Loading...
Note: File does not exist in v5.4.
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * Copyright (c) 2023 Rivos Inc
  4 *
  5 * Authors:
  6 *     Atish Patra <atishp@rivosinc.com>
  7 */
  8
  9#define pr_fmt(fmt)	"riscv-kvm-pmu: " fmt
 10#include <linux/errno.h>
 11#include <linux/err.h>
 12#include <linux/kvm_host.h>
 13#include <linux/perf/riscv_pmu.h>
 14#include <asm/csr.h>
 15#include <asm/kvm_vcpu_sbi.h>
 16#include <asm/kvm_vcpu_pmu.h>
 17#include <asm/sbi.h>
 18#include <linux/bitops.h>
 19
 20#define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs)
 21#define get_event_type(x) (((x) & SBI_PMU_EVENT_IDX_TYPE_MASK) >> 16)
 22#define get_event_code(x) ((x) & SBI_PMU_EVENT_IDX_CODE_MASK)
 23
 24static enum perf_hw_id hw_event_perf_map[SBI_PMU_HW_GENERAL_MAX] = {
 25	[SBI_PMU_HW_CPU_CYCLES] = PERF_COUNT_HW_CPU_CYCLES,
 26	[SBI_PMU_HW_INSTRUCTIONS] = PERF_COUNT_HW_INSTRUCTIONS,
 27	[SBI_PMU_HW_CACHE_REFERENCES] = PERF_COUNT_HW_CACHE_REFERENCES,
 28	[SBI_PMU_HW_CACHE_MISSES] = PERF_COUNT_HW_CACHE_MISSES,
 29	[SBI_PMU_HW_BRANCH_INSTRUCTIONS] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
 30	[SBI_PMU_HW_BRANCH_MISSES] = PERF_COUNT_HW_BRANCH_MISSES,
 31	[SBI_PMU_HW_BUS_CYCLES] = PERF_COUNT_HW_BUS_CYCLES,
 32	[SBI_PMU_HW_STALLED_CYCLES_FRONTEND] = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND,
 33	[SBI_PMU_HW_STALLED_CYCLES_BACKEND] = PERF_COUNT_HW_STALLED_CYCLES_BACKEND,
 34	[SBI_PMU_HW_REF_CPU_CYCLES] = PERF_COUNT_HW_REF_CPU_CYCLES,
 35};
 36
 37static u64 kvm_pmu_get_sample_period(struct kvm_pmc *pmc)
 38{
 39	u64 counter_val_mask = GENMASK(pmc->cinfo.width, 0);
 40	u64 sample_period;
 41
 42	if (!pmc->counter_val)
 43		sample_period = counter_val_mask;
 44	else
 45		sample_period = (-pmc->counter_val) & counter_val_mask;
 46
 47	return sample_period;
 48}
 49
 50static u32 kvm_pmu_get_perf_event_type(unsigned long eidx)
 51{
 52	enum sbi_pmu_event_type etype = get_event_type(eidx);
 53	u32 type = PERF_TYPE_MAX;
 54
 55	switch (etype) {
 56	case SBI_PMU_EVENT_TYPE_HW:
 57		type = PERF_TYPE_HARDWARE;
 58		break;
 59	case SBI_PMU_EVENT_TYPE_CACHE:
 60		type = PERF_TYPE_HW_CACHE;
 61		break;
 62	case SBI_PMU_EVENT_TYPE_RAW:
 63	case SBI_PMU_EVENT_TYPE_FW:
 64		type = PERF_TYPE_RAW;
 65		break;
 66	default:
 67		break;
 68	}
 69
 70	return type;
 71}
 72
 73static bool kvm_pmu_is_fw_event(unsigned long eidx)
 74{
 75	return get_event_type(eidx) == SBI_PMU_EVENT_TYPE_FW;
 76}
 77
 78static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
 79{
 80	if (pmc->perf_event) {
 81		perf_event_disable(pmc->perf_event);
 82		perf_event_release_kernel(pmc->perf_event);
 83		pmc->perf_event = NULL;
 84	}
 85}
 86
 87static u64 kvm_pmu_get_perf_event_hw_config(u32 sbi_event_code)
 88{
 89	return hw_event_perf_map[sbi_event_code];
 90}
 91
 92static u64 kvm_pmu_get_perf_event_cache_config(u32 sbi_event_code)
 93{
 94	u64 config = U64_MAX;
 95	unsigned int cache_type, cache_op, cache_result;
 96
 97	/* All the cache event masks lie within 0xFF. No separate masking is necessary */
 98	cache_type = (sbi_event_code & SBI_PMU_EVENT_CACHE_ID_CODE_MASK) >>
 99		      SBI_PMU_EVENT_CACHE_ID_SHIFT;
100	cache_op = (sbi_event_code & SBI_PMU_EVENT_CACHE_OP_ID_CODE_MASK) >>
101		    SBI_PMU_EVENT_CACHE_OP_SHIFT;
102	cache_result = sbi_event_code & SBI_PMU_EVENT_CACHE_RESULT_ID_CODE_MASK;
103
104	if (cache_type >= PERF_COUNT_HW_CACHE_MAX ||
105	    cache_op >= PERF_COUNT_HW_CACHE_OP_MAX ||
106	    cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
107		return config;
108
109	config = cache_type | (cache_op << 8) | (cache_result << 16);
110
111	return config;
112}
113
114static u64 kvm_pmu_get_perf_event_config(unsigned long eidx, uint64_t evt_data)
115{
116	enum sbi_pmu_event_type etype = get_event_type(eidx);
117	u32 ecode = get_event_code(eidx);
118	u64 config = U64_MAX;
119
120	switch (etype) {
121	case SBI_PMU_EVENT_TYPE_HW:
122		if (ecode < SBI_PMU_HW_GENERAL_MAX)
123			config = kvm_pmu_get_perf_event_hw_config(ecode);
124		break;
125	case SBI_PMU_EVENT_TYPE_CACHE:
126		config = kvm_pmu_get_perf_event_cache_config(ecode);
127		break;
128	case SBI_PMU_EVENT_TYPE_RAW:
129		config = evt_data & RISCV_PMU_RAW_EVENT_MASK;
130		break;
131	case SBI_PMU_EVENT_TYPE_FW:
132		if (ecode < SBI_PMU_FW_MAX)
133			config = (1ULL << 63) | ecode;
134		break;
135	default:
136		break;
137	}
138
139	return config;
140}
141
142static int kvm_pmu_get_fixed_pmc_index(unsigned long eidx)
143{
144	u32 etype = kvm_pmu_get_perf_event_type(eidx);
145	u32 ecode = get_event_code(eidx);
146
147	if (etype != SBI_PMU_EVENT_TYPE_HW)
148		return -EINVAL;
149
150	if (ecode == SBI_PMU_HW_CPU_CYCLES)
151		return 0;
152	else if (ecode == SBI_PMU_HW_INSTRUCTIONS)
153		return 2;
154	else
155		return -EINVAL;
156}
157
158static int kvm_pmu_get_programmable_pmc_index(struct kvm_pmu *kvpmu, unsigned long eidx,
159					      unsigned long cbase, unsigned long cmask)
160{
161	int ctr_idx = -1;
162	int i, pmc_idx;
163	int min, max;
164
165	if (kvm_pmu_is_fw_event(eidx)) {
166		/* Firmware counters are mapped 1:1 starting from num_hw_ctrs for simplicity */
167		min = kvpmu->num_hw_ctrs;
168		max = min + kvpmu->num_fw_ctrs;
169	} else {
170		/* First 3 counters are reserved for fixed counters */
171		min = 3;
172		max = kvpmu->num_hw_ctrs;
173	}
174
175	for_each_set_bit(i, &cmask, BITS_PER_LONG) {
176		pmc_idx = i + cbase;
177		if ((pmc_idx >= min && pmc_idx < max) &&
178		    !test_bit(pmc_idx, kvpmu->pmc_in_use)) {
179			ctr_idx = pmc_idx;
180			break;
181		}
182	}
183
184	return ctr_idx;
185}
186
187static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx,
188			     unsigned long cbase, unsigned long cmask)
189{
190	int ret;
191
192	/* Fixed counters need to be have fixed mapping as they have different width */
193	ret = kvm_pmu_get_fixed_pmc_index(eidx);
194	if (ret >= 0)
195		return ret;
196
197	return kvm_pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask);
198}
199
200static int pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
201			      unsigned long *out_val)
202{
203	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
204	struct kvm_pmc *pmc;
205	int fevent_code;
206
207	if (!IS_ENABLED(CONFIG_32BIT)) {
208		pr_warn("%s: should be invoked for only RV32\n", __func__);
209		return -EINVAL;
210	}
211
212	if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
213		pr_warn("Invalid counter id [%ld]during read\n", cidx);
214		return -EINVAL;
215	}
216
217	pmc = &kvpmu->pmc[cidx];
218
219	if (pmc->cinfo.type != SBI_PMU_CTR_TYPE_FW)
220		return -EINVAL;
221
222	fevent_code = get_event_code(pmc->event_idx);
223	pmc->counter_val = kvpmu->fw_event[fevent_code].value;
224
225	*out_val = pmc->counter_val >> 32;
226
227	return 0;
228}
229
230static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
231			unsigned long *out_val)
232{
233	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
234	struct kvm_pmc *pmc;
235	u64 enabled, running;
236	int fevent_code;
237
238	if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
239		pr_warn("Invalid counter id [%ld] during read\n", cidx);
240		return -EINVAL;
241	}
242
243	pmc = &kvpmu->pmc[cidx];
244
245	if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
246		fevent_code = get_event_code(pmc->event_idx);
247		pmc->counter_val = kvpmu->fw_event[fevent_code].value;
248	} else if (pmc->perf_event) {
249		pmc->counter_val += perf_event_read_value(pmc->perf_event, &enabled, &running);
250	} else {
251		return -EINVAL;
252	}
253	*out_val = pmc->counter_val;
254
255	return 0;
256}
257
258static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ctr_base,
259					 unsigned long ctr_mask)
260{
261	/* Make sure the we have a valid counter mask requested from the caller */
262	if (!ctr_mask || (ctr_base + __fls(ctr_mask) >= kvm_pmu_num_counters(kvpmu)))
263		return -EINVAL;
264
265	return 0;
266}
267
268static void kvm_riscv_pmu_overflow(struct perf_event *perf_event,
269				   struct perf_sample_data *data,
270				   struct pt_regs *regs)
271{
272	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
273	struct kvm_vcpu *vcpu = pmc->vcpu;
274	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
275	struct riscv_pmu *rpmu = to_riscv_pmu(perf_event->pmu);
276	u64 period;
277
278	/*
279	 * Stop the event counting by directly accessing the perf_event.
280	 * Otherwise, this needs to deferred via a workqueue.
281	 * That will introduce skew in the counter value because the actual
282	 * physical counter would start after returning from this function.
283	 * It will be stopped again once the workqueue is scheduled
284	 */
285	rpmu->pmu.stop(perf_event, PERF_EF_UPDATE);
286
287	/*
288	 * The hw counter would start automatically when this function returns.
289	 * Thus, the host may continue to interrupt and inject it to the guest
290	 * even without the guest configuring the next event. Depending on the hardware
291	 * the host may have some sluggishness only if privilege mode filtering is not
292	 * available. In an ideal world, where qemu is not the only capable hardware,
293	 * this can be removed.
294	 * FYI: ARM64 does this way while x86 doesn't do anything as such.
295	 * TODO: Should we keep it for RISC-V ?
296	 */
297	period = -(local64_read(&perf_event->count));
298
299	local64_set(&perf_event->hw.period_left, 0);
300	perf_event->attr.sample_period = period;
301	perf_event->hw.sample_period = period;
302
303	set_bit(pmc->idx, kvpmu->pmc_overflown);
304	kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_PMU_OVF);
305
306	rpmu->pmu.start(perf_event, PERF_EF_RELOAD);
307}
308
309static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr,
310				      unsigned long flags, unsigned long eidx,
311				      unsigned long evtdata)
312{
313	struct perf_event *event;
314
315	kvm_pmu_release_perf_event(pmc);
316	attr->config = kvm_pmu_get_perf_event_config(eidx, evtdata);
317	if (flags & SBI_PMU_CFG_FLAG_CLEAR_VALUE) {
318		//TODO: Do we really want to clear the value in hardware counter
319		pmc->counter_val = 0;
320	}
321
322	/*
323	 * Set the default sample_period for now. The guest specified value
324	 * will be updated in the start call.
325	 */
326	attr->sample_period = kvm_pmu_get_sample_period(pmc);
327
328	event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc);
329	if (IS_ERR(event)) {
330		pr_debug("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event));
331		return PTR_ERR(event);
332	}
333
334	pmc->perf_event = event;
335	if (flags & SBI_PMU_CFG_FLAG_AUTO_START)
336		perf_event_enable(pmc->perf_event);
337
338	return 0;
339}
340
341int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid)
342{
343	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
344	struct kvm_fw_event *fevent;
345
346	if (!kvpmu || fid >= SBI_PMU_FW_MAX)
347		return -EINVAL;
348
349	fevent = &kvpmu->fw_event[fid];
350	if (fevent->started)
351		fevent->value++;
352
353	return 0;
354}
355
356int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num,
357				unsigned long *val, unsigned long new_val,
358				unsigned long wr_mask)
359{
360	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
361	int cidx, ret = KVM_INSN_CONTINUE_NEXT_SEPC;
362
363	if (!kvpmu || !kvpmu->init_done) {
364		/*
365		 * In absence of sscofpmf in the platform, the guest OS may use
366		 * the legacy PMU driver to read cycle/instret. In that case,
367		 * just return 0 to avoid any illegal trap. However, any other
368		 * hpmcounter access should result in illegal trap as they must
369		 * be access through SBI PMU only.
370		 */
371		if (csr_num == CSR_CYCLE || csr_num == CSR_INSTRET) {
372			*val = 0;
373			return ret;
374		} else {
375			return KVM_INSN_ILLEGAL_TRAP;
376		}
377	}
378
379	/* The counter CSR are read only. Thus, any write should result in illegal traps */
380	if (wr_mask)
381		return KVM_INSN_ILLEGAL_TRAP;
382
383	cidx = csr_num - CSR_CYCLE;
384
385	if (pmu_ctr_read(vcpu, cidx, val) < 0)
386		return KVM_INSN_ILLEGAL_TRAP;
387
388	return ret;
389}
390
391static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu *vcpu)
392{
393	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
394
395	kfree(kvpmu->sdata);
396	kvpmu->sdata = NULL;
397	kvpmu->snapshot_addr = INVALID_GPA;
398}
399
400int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low,
401				      unsigned long saddr_high, unsigned long flags,
402				      struct kvm_vcpu_sbi_return *retdata)
403{
404	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
405	int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data);
406	int sbiret = 0;
407	gpa_t saddr;
408	unsigned long hva;
409	bool writable;
410
411	if (!kvpmu || flags) {
412		sbiret = SBI_ERR_INVALID_PARAM;
413		goto out;
414	}
415
416	if (saddr_low == SBI_SHMEM_DISABLE && saddr_high == SBI_SHMEM_DISABLE) {
417		kvm_pmu_clear_snapshot_area(vcpu);
418		return 0;
419	}
420
421	saddr = saddr_low;
422
423	if (saddr_high != 0) {
424		if (IS_ENABLED(CONFIG_32BIT))
425			saddr |= ((gpa_t)saddr_high << 32);
426		else
427			sbiret = SBI_ERR_INVALID_ADDRESS;
428		goto out;
429	}
430
431	hva = kvm_vcpu_gfn_to_hva_prot(vcpu, saddr >> PAGE_SHIFT, &writable);
432	if (kvm_is_error_hva(hva) || !writable) {
433		sbiret = SBI_ERR_INVALID_ADDRESS;
434		goto out;
435	}
436
437	kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC);
438	if (!kvpmu->sdata)
439		return -ENOMEM;
440
441	if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) {
442		kfree(kvpmu->sdata);
443		sbiret = SBI_ERR_FAILURE;
444		goto out;
445	}
446
447	kvpmu->snapshot_addr = saddr;
448
449out:
450	retdata->err_val = sbiret;
451
452	return 0;
453}
454
455int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu,
456				struct kvm_vcpu_sbi_return *retdata)
457{
458	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
459
460	retdata->out_val = kvm_pmu_num_counters(kvpmu);
461
462	return 0;
463}
464
465int kvm_riscv_vcpu_pmu_ctr_info(struct kvm_vcpu *vcpu, unsigned long cidx,
466				struct kvm_vcpu_sbi_return *retdata)
467{
468	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
469
470	if (cidx > RISCV_KVM_MAX_COUNTERS || cidx == 1) {
471		retdata->err_val = SBI_ERR_INVALID_PARAM;
472		return 0;
473	}
474
475	retdata->out_val = kvpmu->pmc[cidx].cinfo.value;
476
477	return 0;
478}
479
480int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base,
481				 unsigned long ctr_mask, unsigned long flags, u64 ival,
482				 struct kvm_vcpu_sbi_return *retdata)
483{
484	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
485	int i, pmc_index, sbiret = 0;
486	struct kvm_pmc *pmc;
487	int fevent_code;
488	bool snap_flag_set = flags & SBI_PMU_START_FLAG_INIT_SNAPSHOT;
489
490	if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
491		sbiret = SBI_ERR_INVALID_PARAM;
492		goto out;
493	}
494
495	if (snap_flag_set) {
496		if (kvpmu->snapshot_addr == INVALID_GPA) {
497			sbiret = SBI_ERR_NO_SHMEM;
498			goto out;
499		}
500		if (kvm_vcpu_read_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
501					sizeof(struct riscv_pmu_snapshot_data))) {
502			pr_warn("Unable to read snapshot shared memory while starting counters\n");
503			sbiret = SBI_ERR_FAILURE;
504			goto out;
505		}
506	}
507	/* Start the counters that have been configured and requested by the guest */
508	for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
509		pmc_index = i + ctr_base;
510		if (!test_bit(pmc_index, kvpmu->pmc_in_use))
511			continue;
512		/* The guest started the counter again. Reset the overflow status */
513		clear_bit(pmc_index, kvpmu->pmc_overflown);
514		pmc = &kvpmu->pmc[pmc_index];
515		if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) {
516			pmc->counter_val = ival;
517		} else if (snap_flag_set) {
518			/* The counter index in the snapshot are relative to the counter base */
519			pmc->counter_val = kvpmu->sdata->ctr_values[i];
520		}
521
522		if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
523			fevent_code = get_event_code(pmc->event_idx);
524			if (fevent_code >= SBI_PMU_FW_MAX) {
525				sbiret = SBI_ERR_INVALID_PARAM;
526				goto out;
527			}
528
529			/* Check if the counter was already started for some reason */
530			if (kvpmu->fw_event[fevent_code].started) {
531				sbiret = SBI_ERR_ALREADY_STARTED;
532				continue;
533			}
534
535			kvpmu->fw_event[fevent_code].started = true;
536			kvpmu->fw_event[fevent_code].value = pmc->counter_val;
537		} else if (pmc->perf_event) {
538			if (unlikely(pmc->started)) {
539				sbiret = SBI_ERR_ALREADY_STARTED;
540				continue;
541			}
542			perf_event_period(pmc->perf_event, kvm_pmu_get_sample_period(pmc));
543			perf_event_enable(pmc->perf_event);
544			pmc->started = true;
545		} else {
546			sbiret = SBI_ERR_INVALID_PARAM;
547		}
548	}
549
550out:
551	retdata->err_val = sbiret;
552
553	return 0;
554}
555
556int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
557				unsigned long ctr_mask, unsigned long flags,
558				struct kvm_vcpu_sbi_return *retdata)
559{
560	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
561	int i, pmc_index, sbiret = 0;
562	u64 enabled, running;
563	struct kvm_pmc *pmc;
564	int fevent_code;
565	bool snap_flag_set = flags & SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT;
566	bool shmem_needs_update = false;
567
568	if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
569		sbiret = SBI_ERR_INVALID_PARAM;
570		goto out;
571	}
572
573	if (snap_flag_set && kvpmu->snapshot_addr == INVALID_GPA) {
574		sbiret = SBI_ERR_NO_SHMEM;
575		goto out;
576	}
577
578	/* Stop the counters that have been configured and requested by the guest */
579	for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
580		pmc_index = i + ctr_base;
581		if (!test_bit(pmc_index, kvpmu->pmc_in_use))
582			continue;
583		pmc = &kvpmu->pmc[pmc_index];
584		if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
585			fevent_code = get_event_code(pmc->event_idx);
586			if (fevent_code >= SBI_PMU_FW_MAX) {
587				sbiret = SBI_ERR_INVALID_PARAM;
588				goto out;
589			}
590
591			if (!kvpmu->fw_event[fevent_code].started)
592				sbiret = SBI_ERR_ALREADY_STOPPED;
593
594			kvpmu->fw_event[fevent_code].started = false;
595		} else if (pmc->perf_event) {
596			if (pmc->started) {
597				/* Stop counting the counter */
598				perf_event_disable(pmc->perf_event);
599				pmc->started = false;
600			} else {
601				sbiret = SBI_ERR_ALREADY_STOPPED;
602			}
603
604			if (flags & SBI_PMU_STOP_FLAG_RESET)
605				/* Release the counter if this is a reset request */
606				kvm_pmu_release_perf_event(pmc);
607		} else {
608			sbiret = SBI_ERR_INVALID_PARAM;
609		}
610
611		if (snap_flag_set && !sbiret) {
612			if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW)
613				pmc->counter_val = kvpmu->fw_event[fevent_code].value;
614			else if (pmc->perf_event)
615				pmc->counter_val += perf_event_read_value(pmc->perf_event,
616									  &enabled, &running);
617			/*
618			 * The counter and overflow indicies in the snapshot region are w.r.to
619			 * cbase. Modify the set bit in the counter mask instead of the pmc_index
620			 * which indicates the absolute counter index.
621			 */
622			if (test_bit(pmc_index, kvpmu->pmc_overflown))
623				kvpmu->sdata->ctr_overflow_mask |= BIT(i);
624			kvpmu->sdata->ctr_values[i] = pmc->counter_val;
625			shmem_needs_update = true;
626		}
627
628		if (flags & SBI_PMU_STOP_FLAG_RESET) {
629			pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
630			clear_bit(pmc_index, kvpmu->pmc_in_use);
631			clear_bit(pmc_index, kvpmu->pmc_overflown);
632			if (snap_flag_set) {
633				/*
634				 * Only clear the given counter as the caller is responsible to
635				 * validate both the overflow mask and configured counters.
636				 */
637				kvpmu->sdata->ctr_overflow_mask &= ~BIT(i);
638				shmem_needs_update = true;
639			}
640		}
641	}
642
643	if (shmem_needs_update)
644		kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
645					     sizeof(struct riscv_pmu_snapshot_data));
646
647out:
648	retdata->err_val = sbiret;
649
650	return 0;
651}
652
653int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_base,
654				     unsigned long ctr_mask, unsigned long flags,
655				     unsigned long eidx, u64 evtdata,
656				     struct kvm_vcpu_sbi_return *retdata)
657{
658	int ctr_idx, sbiret = 0;
659	long ret;
660	bool is_fevent;
661	unsigned long event_code;
662	u32 etype = kvm_pmu_get_perf_event_type(eidx);
663	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
664	struct kvm_pmc *pmc = NULL;
665	struct perf_event_attr attr = {
666		.type = etype,
667		.size = sizeof(struct perf_event_attr),
668		.pinned = true,
669		/*
670		 * It should never reach here if the platform doesn't support the sscofpmf
671		 * extension as mode filtering won't work without it.
672		 */
673		.exclude_host = true,
674		.exclude_hv = true,
675		.exclude_user = !!(flags & SBI_PMU_CFG_FLAG_SET_UINH),
676		.exclude_kernel = !!(flags & SBI_PMU_CFG_FLAG_SET_SINH),
677		.config1 = RISCV_PMU_CONFIG1_GUEST_EVENTS,
678	};
679
680	if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
681		sbiret = SBI_ERR_INVALID_PARAM;
682		goto out;
683	}
684
685	event_code = get_event_code(eidx);
686	is_fevent = kvm_pmu_is_fw_event(eidx);
687	if (is_fevent && event_code >= SBI_PMU_FW_MAX) {
688		sbiret = SBI_ERR_NOT_SUPPORTED;
689		goto out;
690	}
691
692	/*
693	 * SKIP_MATCH flag indicates the caller is aware of the assigned counter
694	 * for this event. Just do a sanity check if it already marked used.
695	 */
696	if (flags & SBI_PMU_CFG_FLAG_SKIP_MATCH) {
697		if (!test_bit(ctr_base + __ffs(ctr_mask), kvpmu->pmc_in_use)) {
698			sbiret = SBI_ERR_FAILURE;
699			goto out;
700		}
701		ctr_idx = ctr_base + __ffs(ctr_mask);
702	} else  {
703		ctr_idx = pmu_get_pmc_index(kvpmu, eidx, ctr_base, ctr_mask);
704		if (ctr_idx < 0) {
705			sbiret = SBI_ERR_NOT_SUPPORTED;
706			goto out;
707		}
708	}
709
710	pmc = &kvpmu->pmc[ctr_idx];
711	pmc->idx = ctr_idx;
712
713	if (is_fevent) {
714		if (flags & SBI_PMU_CFG_FLAG_AUTO_START)
715			kvpmu->fw_event[event_code].started = true;
716	} else {
717		ret = kvm_pmu_create_perf_event(pmc, &attr, flags, eidx, evtdata);
718		if (ret) {
719			sbiret = SBI_ERR_NOT_SUPPORTED;
720			goto out;
721		}
722	}
723
724	set_bit(ctr_idx, kvpmu->pmc_in_use);
725	pmc->event_idx = eidx;
726	retdata->out_val = ctr_idx;
727out:
728	retdata->err_val = sbiret;
729
730	return 0;
731}
732
733int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
734				      struct kvm_vcpu_sbi_return *retdata)
735{
736	int ret;
737
738	ret = pmu_fw_ctr_read_hi(vcpu, cidx, &retdata->out_val);
739	if (ret == -EINVAL)
740		retdata->err_val = SBI_ERR_INVALID_PARAM;
741
742	return 0;
743}
744
745int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
746				struct kvm_vcpu_sbi_return *retdata)
747{
748	int ret;
749
750	ret = pmu_ctr_read(vcpu, cidx, &retdata->out_val);
751	if (ret == -EINVAL)
752		retdata->err_val = SBI_ERR_INVALID_PARAM;
753
754	return 0;
755}
756
757void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
758{
759	int i = 0, ret, num_hw_ctrs = 0, hpm_width = 0;
760	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
761	struct kvm_pmc *pmc;
762
763	/*
764	 * PMU functionality should be only available to guests if privilege mode
765	 * filtering is available in the host. Otherwise, guest will always count
766	 * events while the execution is in hypervisor mode.
767	 */
768	if (!riscv_isa_extension_available(NULL, SSCOFPMF))
769		return;
770
771	ret = riscv_pmu_get_hpm_info(&hpm_width, &num_hw_ctrs);
772	if (ret < 0 || !hpm_width || !num_hw_ctrs)
773		return;
774
775	/*
776	 * Increase the number of hardware counters to offset the time counter.
777	 */
778	kvpmu->num_hw_ctrs = num_hw_ctrs + 1;
779	kvpmu->num_fw_ctrs = SBI_PMU_FW_MAX;
780	memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
781	kvpmu->snapshot_addr = INVALID_GPA;
782
783	if (kvpmu->num_hw_ctrs > RISCV_KVM_MAX_HW_CTRS) {
784		pr_warn_once("Limiting the hardware counters to 32 as specified by the ISA");
785		kvpmu->num_hw_ctrs = RISCV_KVM_MAX_HW_CTRS;
786	}
787
788	/*
789	 * There is no correlation between the logical hardware counter and virtual counters.
790	 * However, we need to encode a hpmcounter CSR in the counter info field so that
791	 * KVM can trap n emulate the read. This works well in the migration use case as
792	 * KVM doesn't care if the actual hpmcounter is available in the hardware or not.
793	 */
794	for (i = 0; i < kvm_pmu_num_counters(kvpmu); i++) {
795		/* TIME CSR shouldn't be read from perf interface */
796		if (i == 1)
797			continue;
798		pmc = &kvpmu->pmc[i];
799		pmc->idx = i;
800		pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
801		pmc->vcpu = vcpu;
802		if (i < kvpmu->num_hw_ctrs) {
803			pmc->cinfo.type = SBI_PMU_CTR_TYPE_HW;
804			if (i < 3)
805				/* CY, IR counters */
806				pmc->cinfo.width = 63;
807			else
808				pmc->cinfo.width = hpm_width;
809			/*
810			 * The CSR number doesn't have any relation with the logical
811			 * hardware counters. The CSR numbers are encoded sequentially
812			 * to avoid maintaining a map between the virtual counter
813			 * and CSR number.
814			 */
815			pmc->cinfo.csr = CSR_CYCLE + i;
816		} else {
817			pmc->cinfo.type = SBI_PMU_CTR_TYPE_FW;
818			pmc->cinfo.width = 63;
819		}
820	}
821
822	kvpmu->init_done = true;
823}
824
825void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu)
826{
827	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
828	struct kvm_pmc *pmc;
829	int i;
830
831	if (!kvpmu)
832		return;
833
834	for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS) {
835		pmc = &kvpmu->pmc[i];
836		pmc->counter_val = 0;
837		kvm_pmu_release_perf_event(pmc);
838		pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
839	}
840	bitmap_zero(kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS);
841	bitmap_zero(kvpmu->pmc_overflown, RISCV_KVM_MAX_COUNTERS);
842	memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
843	kvm_pmu_clear_snapshot_area(vcpu);
844}
845
846void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu)
847{
848	kvm_riscv_vcpu_pmu_deinit(vcpu);
849}