Linux Audio

Check our new training course

Loading...
Note: File does not exist in v4.17.
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2008-2017 Andes Technology Corporation
   4 *
   5 * Reference ARMv7: Jean Pihet <jpihet@mvista.com>
   6 * 2010 (c) MontaVista Software, LLC.
   7 */
   8
   9#include <linux/perf_event.h>
  10#include <linux/bitmap.h>
  11#include <linux/export.h>
  12#include <linux/kernel.h>
  13#include <linux/of.h>
  14#include <linux/platform_device.h>
  15#include <linux/slab.h>
  16#include <linux/spinlock.h>
  17#include <linux/pm_runtime.h>
  18#include <linux/ftrace.h>
  19#include <linux/uaccess.h>
  20#include <linux/sched/clock.h>
  21#include <linux/percpu-defs.h>
  22
  23#include <asm/pmu.h>
  24#include <asm/irq_regs.h>
  25#include <asm/nds32.h>
  26#include <asm/stacktrace.h>
  27#include <asm/perf_event.h>
  28#include <nds32_intrinsic.h>
  29
  30/* Set at runtime when we know what CPU type we are. */
  31static struct nds32_pmu *cpu_pmu;
  32
  33static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
  34static void nds32_pmu_start(struct nds32_pmu *cpu_pmu);
  35static void nds32_pmu_stop(struct nds32_pmu *cpu_pmu);
  36static struct platform_device_id cpu_pmu_plat_device_ids[] = {
  37	{.name = "nds32-pfm"},
  38	{},
  39};
  40
  41static int nds32_pmu_map_cache_event(const unsigned int (*cache_map)
  42				  [PERF_COUNT_HW_CACHE_MAX]
  43				  [PERF_COUNT_HW_CACHE_OP_MAX]
  44				  [PERF_COUNT_HW_CACHE_RESULT_MAX], u64 config)
  45{
  46	unsigned int cache_type, cache_op, cache_result, ret;
  47
  48	cache_type = (config >> 0) & 0xff;
  49	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
  50		return -EINVAL;
  51
  52	cache_op = (config >> 8) & 0xff;
  53	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
  54		return -EINVAL;
  55
  56	cache_result = (config >> 16) & 0xff;
  57	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
  58		return -EINVAL;
  59
  60	ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
  61
  62	if (ret == CACHE_OP_UNSUPPORTED)
  63		return -ENOENT;
  64
  65	return ret;
  66}
  67
  68static int
  69nds32_pmu_map_hw_event(const unsigned int (*event_map)[PERF_COUNT_HW_MAX],
  70		       u64 config)
  71{
  72	int mapping;
  73
  74	if (config >= PERF_COUNT_HW_MAX)
  75		return -ENOENT;
  76
  77	mapping = (*event_map)[config];
  78	return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
  79}
  80
  81static int nds32_pmu_map_raw_event(u32 raw_event_mask, u64 config)
  82{
  83	int ev_type = (int)(config & raw_event_mask);
  84	int idx = config >> 8;
  85
  86	switch (idx) {
  87	case 0:
  88		ev_type = PFM_OFFSET_MAGIC_0 + ev_type;
  89		if (ev_type >= SPAV3_0_SEL_LAST || ev_type <= SPAV3_0_SEL_BASE)
  90			return -ENOENT;
  91		break;
  92	case 1:
  93		ev_type = PFM_OFFSET_MAGIC_1 + ev_type;
  94		if (ev_type >= SPAV3_1_SEL_LAST || ev_type <= SPAV3_1_SEL_BASE)
  95			return -ENOENT;
  96		break;
  97	case 2:
  98		ev_type = PFM_OFFSET_MAGIC_2 + ev_type;
  99		if (ev_type >= SPAV3_2_SEL_LAST || ev_type <= SPAV3_2_SEL_BASE)
 100			return -ENOENT;
 101		break;
 102	default:
 103		return -ENOENT;
 104	}
 105
 106	return ev_type;
 107}
 108
 109int
 110nds32_pmu_map_event(struct perf_event *event,
 111		    const unsigned int (*event_map)[PERF_COUNT_HW_MAX],
 112		    const unsigned int (*cache_map)
 113		    [PERF_COUNT_HW_CACHE_MAX]
 114		    [PERF_COUNT_HW_CACHE_OP_MAX]
 115		    [PERF_COUNT_HW_CACHE_RESULT_MAX], u32 raw_event_mask)
 116{
 117	u64 config = event->attr.config;
 118
 119	switch (event->attr.type) {
 120	case PERF_TYPE_HARDWARE:
 121		return nds32_pmu_map_hw_event(event_map, config);
 122	case PERF_TYPE_HW_CACHE:
 123		return nds32_pmu_map_cache_event(cache_map, config);
 124	case PERF_TYPE_RAW:
 125		return nds32_pmu_map_raw_event(raw_event_mask, config);
 126	}
 127
 128	return -ENOENT;
 129}
 130
 131static int nds32_spav3_map_event(struct perf_event *event)
 132{
 133	return nds32_pmu_map_event(event, &nds32_pfm_perf_map,
 134				&nds32_pfm_perf_cache_map, SOFTWARE_EVENT_MASK);
 135}
 136
 137static inline u32 nds32_pfm_getreset_flags(void)
 138{
 139	/* Read overflow status */
 140	u32 val = __nds32__mfsr(NDS32_SR_PFM_CTL);
 141	u32 old_val = val;
 142
 143	/* Write overflow bit to clear status, and others keep it 0 */
 144	u32 ov_flag = PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2];
 145
 146	__nds32__mtsr(val | ov_flag, NDS32_SR_PFM_CTL);
 147
 148	return old_val;
 149}
 150
 151static inline int nds32_pfm_has_overflowed(u32 pfm)
 152{
 153	u32 ov_flag = PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2];
 154
 155	return pfm & ov_flag;
 156}
 157
 158static inline int nds32_pfm_counter_has_overflowed(u32 pfm, int idx)
 159{
 160	u32 mask = 0;
 161
 162	switch (idx) {
 163	case 0:
 164		mask = PFM_CTL_OVF[0];
 165		break;
 166	case 1:
 167		mask = PFM_CTL_OVF[1];
 168		break;
 169	case 2:
 170		mask = PFM_CTL_OVF[2];
 171		break;
 172	default:
 173		pr_err("%s index wrong\n", __func__);
 174		break;
 175	}
 176	return pfm & mask;
 177}
 178
 179/*
 180 * Set the next IRQ period, based on the hwc->period_left value.
 181 * To be called with the event disabled in hw:
 182 */
 183int nds32_pmu_event_set_period(struct perf_event *event)
 184{
 185	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
 186	struct hw_perf_event *hwc = &event->hw;
 187	s64 left = local64_read(&hwc->period_left);
 188	s64 period = hwc->sample_period;
 189	int ret = 0;
 190
 191	/* The period may have been changed by PERF_EVENT_IOC_PERIOD */
 192	if (unlikely(period != hwc->last_period))
 193		left = period - (hwc->last_period - left);
 194
 195	if (unlikely(left <= -period)) {
 196		left = period;
 197		local64_set(&hwc->period_left, left);
 198		hwc->last_period = period;
 199		ret = 1;
 200	}
 201
 202	if (unlikely(left <= 0)) {
 203		left += period;
 204		local64_set(&hwc->period_left, left);
 205		hwc->last_period = period;
 206		ret = 1;
 207	}
 208
 209	if (left > (s64)nds32_pmu->max_period)
 210		left = nds32_pmu->max_period;
 211
 212	/*
 213	 * The hw event starts counting from this event offset,
 214	 * mark it to be able to extract future "deltas":
 215	 */
 216	local64_set(&hwc->prev_count, (u64)(-left));
 217
 218	nds32_pmu->write_counter(event, (u64)(-left) & nds32_pmu->max_period);
 219
 220	perf_event_update_userpage(event);
 221
 222	return ret;
 223}
 224
 225static irqreturn_t nds32_pmu_handle_irq(int irq_num, void *dev)
 226{
 227	u32 pfm;
 228	struct perf_sample_data data;
 229	struct nds32_pmu *cpu_pmu = (struct nds32_pmu *)dev;
 230	struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
 231	struct pt_regs *regs;
 232	int idx;
 233	/*
 234	 * Get and reset the IRQ flags
 235	 */
 236	pfm = nds32_pfm_getreset_flags();
 237
 238	/*
 239	 * Did an overflow occur?
 240	 */
 241	if (!nds32_pfm_has_overflowed(pfm))
 242		return IRQ_NONE;
 243
 244	/*
 245	 * Handle the counter(s) overflow(s)
 246	 */
 247	regs = get_irq_regs();
 248
 249	nds32_pmu_stop(cpu_pmu);
 250	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
 251		struct perf_event *event = cpuc->events[idx];
 252		struct hw_perf_event *hwc;
 253
 254		/* Ignore if we don't have an event. */
 255		if (!event)
 256			continue;
 257
 258		/*
 259		 * We have a single interrupt for all counters. Check that
 260		 * each counter has overflowed before we process it.
 261		 */
 262		if (!nds32_pfm_counter_has_overflowed(pfm, idx))
 263			continue;
 264
 265		hwc = &event->hw;
 266		nds32_pmu_event_update(event);
 267		perf_sample_data_init(&data, 0, hwc->last_period);
 268		if (!nds32_pmu_event_set_period(event))
 269			continue;
 270
 271		if (perf_event_overflow(event, &data, regs))
 272			cpu_pmu->disable(event);
 273	}
 274	nds32_pmu_start(cpu_pmu);
 275	/*
 276	 * Handle the pending perf events.
 277	 *
 278	 * Note: this call *must* be run with interrupts disabled. For
 279	 * platforms that can have the PMU interrupts raised as an NMI, this
 280	 * will not work.
 281	 */
 282	irq_work_run();
 283
 284	return IRQ_HANDLED;
 285}
 286
 287static inline int nds32_pfm_counter_valid(struct nds32_pmu *cpu_pmu, int idx)
 288{
 289	return ((idx >= 0) && (idx < cpu_pmu->num_events));
 290}
 291
 292static inline int nds32_pfm_disable_counter(int idx)
 293{
 294	unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
 295	u32 mask = 0;
 296
 297	mask = PFM_CTL_EN[idx];
 298	val &= ~mask;
 299	val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
 300	__nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
 301	return idx;
 302}
 303
 304/*
 305 * Add an event filter to a given event.
 306 */
 307static int nds32_pmu_set_event_filter(struct hw_perf_event *event,
 308				      struct perf_event_attr *attr)
 309{
 310	unsigned long config_base = 0;
 311	int idx = event->idx;
 312	unsigned long no_kernel_tracing = 0;
 313	unsigned long no_user_tracing = 0;
 314	/* If index is -1, do not do anything */
 315	if (idx == -1)
 316		return 0;
 317
 318	no_kernel_tracing = PFM_CTL_KS[idx];
 319	no_user_tracing = PFM_CTL_KU[idx];
 320	/*
 321	 * Default: enable both kernel and user mode tracing.
 322	 */
 323	if (attr->exclude_user)
 324		config_base |= no_user_tracing;
 325
 326	if (attr->exclude_kernel)
 327		config_base |= no_kernel_tracing;
 328
 329	/*
 330	 * Install the filter into config_base as this is used to
 331	 * construct the event type.
 332	 */
 333	event->config_base |= config_base;
 334	return 0;
 335}
 336
 337static inline void nds32_pfm_write_evtsel(int idx, u32 evnum)
 338{
 339	u32 offset = 0;
 340	u32 ori_val = __nds32__mfsr(NDS32_SR_PFM_CTL);
 341	u32 ev_mask = 0;
 342	u32 no_kernel_mask = 0;
 343	u32 no_user_mask = 0;
 344	u32 val;
 345
 346	offset = PFM_CTL_OFFSEL[idx];
 347	/* Clear previous mode selection, and write new one */
 348	no_kernel_mask = PFM_CTL_KS[idx];
 349	no_user_mask = PFM_CTL_KU[idx];
 350	ori_val &= ~no_kernel_mask;
 351	ori_val &= ~no_user_mask;
 352	if (evnum & no_kernel_mask)
 353		ori_val |= no_kernel_mask;
 354
 355	if (evnum & no_user_mask)
 356		ori_val |= no_user_mask;
 357
 358	/* Clear previous event selection */
 359	ev_mask = PFM_CTL_SEL[idx];
 360	ori_val &= ~ev_mask;
 361	evnum &= SOFTWARE_EVENT_MASK;
 362
 363	/* undo the linear mapping */
 364	evnum = get_converted_evet_hw_num(evnum);
 365	val = ori_val | (evnum << offset);
 366	val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
 367	__nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
 368}
 369
 370static inline int nds32_pfm_enable_counter(int idx)
 371{
 372	unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
 373	u32 mask = 0;
 374
 375	mask = PFM_CTL_EN[idx];
 376	val |= mask;
 377	val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
 378	__nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
 379	return idx;
 380}
 381
 382static inline int nds32_pfm_enable_intens(int idx)
 383{
 384	unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
 385	u32 mask = 0;
 386
 387	mask = PFM_CTL_IE[idx];
 388	val |= mask;
 389	val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
 390	__nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
 391	return idx;
 392}
 393
 394static inline int nds32_pfm_disable_intens(int idx)
 395{
 396	unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
 397	u32 mask = 0;
 398
 399	mask = PFM_CTL_IE[idx];
 400	val &= ~mask;
 401	val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
 402	__nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
 403	return idx;
 404}
 405
 406static int event_requires_mode_exclusion(struct perf_event_attr *attr)
 407{
 408	/* Other modes NDS32 does not support */
 409	return attr->exclude_user || attr->exclude_kernel;
 410}
 411
 412static void nds32_pmu_enable_event(struct perf_event *event)
 413{
 414	unsigned long flags;
 415	unsigned int evnum = 0;
 416	struct hw_perf_event *hwc = &event->hw;
 417	struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
 418	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 419	int idx = hwc->idx;
 420
 421	if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
 422		pr_err("CPU enabling wrong pfm counter IRQ enable\n");
 423		return;
 424	}
 425
 426	/*
 427	 * Enable counter and interrupt, and set the counter to count
 428	 * the event that we're interested in.
 429	 */
 430	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 431
 432	/*
 433	 * Disable counter
 434	 */
 435	nds32_pfm_disable_counter(idx);
 436
 437	/*
 438	 * Check whether we need to exclude the counter from certain modes.
 439	 */
 440	if ((!cpu_pmu->set_event_filter ||
 441	     cpu_pmu->set_event_filter(hwc, &event->attr)) &&
 442	     event_requires_mode_exclusion(&event->attr)) {
 443		pr_notice
 444		("NDS32 performance counters do not support mode exclusion\n");
 445		hwc->config_base = 0;
 446	}
 447	/* Write event */
 448	evnum = hwc->config_base;
 449	nds32_pfm_write_evtsel(idx, evnum);
 450
 451	/*
 452	 * Enable interrupt for this counter
 453	 */
 454	nds32_pfm_enable_intens(idx);
 455
 456	/*
 457	 * Enable counter
 458	 */
 459	nds32_pfm_enable_counter(idx);
 460
 461	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 462}
 463
 464static void nds32_pmu_disable_event(struct perf_event *event)
 465{
 466	unsigned long flags;
 467	struct hw_perf_event *hwc = &event->hw;
 468	struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
 469	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 470	int idx = hwc->idx;
 471
 472	if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
 473		pr_err("CPU disabling wrong pfm counter IRQ enable %d\n", idx);
 474		return;
 475	}
 476
 477	/*
 478	 * Disable counter and interrupt
 479	 */
 480	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 481
 482	/*
 483	 * Disable counter
 484	 */
 485	nds32_pfm_disable_counter(idx);
 486
 487	/*
 488	 * Disable interrupt for this counter
 489	 */
 490	nds32_pfm_disable_intens(idx);
 491
 492	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 493}
 494
 495static inline u32 nds32_pmu_read_counter(struct perf_event *event)
 496{
 497	struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
 498	struct hw_perf_event *hwc = &event->hw;
 499	int idx = hwc->idx;
 500	u32 count = 0;
 501
 502	if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
 503		pr_err("CPU reading wrong counter %d\n", idx);
 504	} else {
 505		switch (idx) {
 506		case PFMC0:
 507			count = __nds32__mfsr(NDS32_SR_PFMC0);
 508			break;
 509		case PFMC1:
 510			count = __nds32__mfsr(NDS32_SR_PFMC1);
 511			break;
 512		case PFMC2:
 513			count = __nds32__mfsr(NDS32_SR_PFMC2);
 514			break;
 515		default:
 516			pr_err
 517			    ("%s: CPU has no performance counters %d\n",
 518			     __func__, idx);
 519		}
 520	}
 521	return count;
 522}
 523
 524static inline void nds32_pmu_write_counter(struct perf_event *event, u32 value)
 525{
 526	struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
 527	struct hw_perf_event *hwc = &event->hw;
 528	int idx = hwc->idx;
 529
 530	if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
 531		pr_err("CPU writing wrong counter %d\n", idx);
 532	} else {
 533		switch (idx) {
 534		case PFMC0:
 535			__nds32__mtsr_isb(value, NDS32_SR_PFMC0);
 536			break;
 537		case PFMC1:
 538			__nds32__mtsr_isb(value, NDS32_SR_PFMC1);
 539			break;
 540		case PFMC2:
 541			__nds32__mtsr_isb(value, NDS32_SR_PFMC2);
 542			break;
 543		default:
 544			pr_err
 545			    ("%s: CPU has no performance counters %d\n",
 546			     __func__, idx);
 547		}
 548	}
 549}
 550
 551static int nds32_pmu_get_event_idx(struct pmu_hw_events *cpuc,
 552				   struct perf_event *event)
 553{
 554	int idx;
 555	struct hw_perf_event *hwc = &event->hw;
 556	/*
 557	 * Current implementation maps cycles, instruction count and cache-miss
 558	 * to specific counter.
 559	 * However, multiple of the 3 counters are able to count these events.
 560	 *
 561	 *
 562	 * SOFTWARE_EVENT_MASK mask for getting event num ,
 563	 * This is defined by Jia-Rung, you can change the polocies.
 564	 * However, do not exceed 8 bits. This is hardware specific.
 565	 * The last number is SPAv3_2_SEL_LAST.
 566	 */
 567	unsigned long evtype = hwc->config_base & SOFTWARE_EVENT_MASK;
 568
 569	idx = get_converted_event_idx(evtype);
 570	/*
 571	 * Try to get the counter for correpsonding event
 572	 */
 573	if (evtype == SPAV3_0_SEL_TOTAL_CYCLES) {
 574		if (!test_and_set_bit(idx, cpuc->used_mask))
 575			return idx;
 576		if (!test_and_set_bit(NDS32_IDX_COUNTER0, cpuc->used_mask))
 577			return NDS32_IDX_COUNTER0;
 578		if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask))
 579			return NDS32_IDX_COUNTER1;
 580	} else if (evtype == SPAV3_1_SEL_COMPLETED_INSTRUCTION) {
 581		if (!test_and_set_bit(idx, cpuc->used_mask))
 582			return idx;
 583		else if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask))
 584			return NDS32_IDX_COUNTER1;
 585		else if (!test_and_set_bit
 586			 (NDS32_IDX_CYCLE_COUNTER, cpuc->used_mask))
 587			return NDS32_IDX_CYCLE_COUNTER;
 588	} else {
 589		if (!test_and_set_bit(idx, cpuc->used_mask))
 590			return idx;
 591	}
 592	return -EAGAIN;
 593}
 594
 595static void nds32_pmu_start(struct nds32_pmu *cpu_pmu)
 596{
 597	unsigned long flags;
 598	unsigned int val;
 599	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 600
 601	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 602
 603	/* Enable all counters , NDS PFM has 3 counters */
 604	val = __nds32__mfsr(NDS32_SR_PFM_CTL);
 605	val |= (PFM_CTL_EN[0] | PFM_CTL_EN[1] | PFM_CTL_EN[2]);
 606	val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
 607	__nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
 608
 609	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 610}
 611
 612static void nds32_pmu_stop(struct nds32_pmu *cpu_pmu)
 613{
 614	unsigned long flags;
 615	unsigned int val;
 616	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 617
 618	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 619
 620	/* Disable all counters , NDS PFM has 3 counters */
 621	val = __nds32__mfsr(NDS32_SR_PFM_CTL);
 622	val &= ~(PFM_CTL_EN[0] | PFM_CTL_EN[1] | PFM_CTL_EN[2]);
 623	val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
 624	__nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
 625
 626	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 627}
 628
 629static void nds32_pmu_reset(void *info)
 630{
 631	u32 val = 0;
 632
 633	val |= (PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
 634	__nds32__mtsr(val, NDS32_SR_PFM_CTL);
 635	__nds32__mtsr(0, NDS32_SR_PFM_CTL);
 636	__nds32__mtsr(0, NDS32_SR_PFMC0);
 637	__nds32__mtsr(0, NDS32_SR_PFMC1);
 638	__nds32__mtsr(0, NDS32_SR_PFMC2);
 639}
 640
 641static void nds32_pmu_init(struct nds32_pmu *cpu_pmu)
 642{
 643	cpu_pmu->handle_irq = nds32_pmu_handle_irq;
 644	cpu_pmu->enable = nds32_pmu_enable_event;
 645	cpu_pmu->disable = nds32_pmu_disable_event;
 646	cpu_pmu->read_counter = nds32_pmu_read_counter;
 647	cpu_pmu->write_counter = nds32_pmu_write_counter;
 648	cpu_pmu->get_event_idx = nds32_pmu_get_event_idx;
 649	cpu_pmu->start = nds32_pmu_start;
 650	cpu_pmu->stop = nds32_pmu_stop;
 651	cpu_pmu->reset = nds32_pmu_reset;
 652	cpu_pmu->max_period = 0xFFFFFFFF;	/* Maximum counts */
 653};
 654
 655static u32 nds32_read_num_pfm_events(void)
 656{
 657	/* NDS32 SPAv3 PMU support 3 counter */
 658	return 3;
 659}
 660
 661static int device_pmu_init(struct nds32_pmu *cpu_pmu)
 662{
 663	nds32_pmu_init(cpu_pmu);
 664	/*
 665	 * This name should be devive-specific name, whatever you like :)
 666	 * I think "PMU" will be a good generic name.
 667	 */
 668	cpu_pmu->name = "nds32v3-pmu";
 669	cpu_pmu->map_event = nds32_spav3_map_event;
 670	cpu_pmu->num_events = nds32_read_num_pfm_events();
 671	cpu_pmu->set_event_filter = nds32_pmu_set_event_filter;
 672	return 0;
 673}
 674
 675/*
 676 * CPU PMU identification and probing.
 677 */
 678static int probe_current_pmu(struct nds32_pmu *pmu)
 679{
 680	int ret;
 681
 682	get_cpu();
 683	ret = -ENODEV;
 684	/*
 685	 * If ther are various CPU types with its own PMU, initialize with
 686	 *
 687	 * the corresponding one
 688	 */
 689	device_pmu_init(pmu);
 690	put_cpu();
 691	return ret;
 692}
 693
 694static void nds32_pmu_enable(struct pmu *pmu)
 695{
 696	struct nds32_pmu *nds32_pmu = to_nds32_pmu(pmu);
 697	struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events();
 698	int enabled = bitmap_weight(hw_events->used_mask,
 699				    nds32_pmu->num_events);
 700
 701	if (enabled)
 702		nds32_pmu->start(nds32_pmu);
 703}
 704
 705static void nds32_pmu_disable(struct pmu *pmu)
 706{
 707	struct nds32_pmu *nds32_pmu = to_nds32_pmu(pmu);
 708
 709	nds32_pmu->stop(nds32_pmu);
 710}
 711
 712static void nds32_pmu_release_hardware(struct nds32_pmu *nds32_pmu)
 713{
 714	nds32_pmu->free_irq(nds32_pmu);
 715	pm_runtime_put_sync(&nds32_pmu->plat_device->dev);
 716}
 717
 718static irqreturn_t nds32_pmu_dispatch_irq(int irq, void *dev)
 719{
 720	struct nds32_pmu *nds32_pmu = (struct nds32_pmu *)dev;
 721	int ret;
 722	u64 start_clock, finish_clock;
 723
 724	start_clock = local_clock();
 725	ret = nds32_pmu->handle_irq(irq, dev);
 726	finish_clock = local_clock();
 727
 728	perf_sample_event_took(finish_clock - start_clock);
 729	return ret;
 730}
 731
 732static int nds32_pmu_reserve_hardware(struct nds32_pmu *nds32_pmu)
 733{
 734	int err;
 735	struct platform_device *pmu_device = nds32_pmu->plat_device;
 736
 737	if (!pmu_device)
 738		return -ENODEV;
 739
 740	pm_runtime_get_sync(&pmu_device->dev);
 741	err = nds32_pmu->request_irq(nds32_pmu, nds32_pmu_dispatch_irq);
 742	if (err) {
 743		nds32_pmu_release_hardware(nds32_pmu);
 744		return err;
 745	}
 746
 747	return 0;
 748}
 749
 750static int
 751validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events,
 752	       struct perf_event *event)
 753{
 754	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
 755
 756	if (is_software_event(event))
 757		return 1;
 758
 759	if (event->pmu != pmu)
 760		return 0;
 761
 762	if (event->state < PERF_EVENT_STATE_OFF)
 763		return 1;
 764
 765	if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
 766		return 1;
 767
 768	return nds32_pmu->get_event_idx(hw_events, event) >= 0;
 769}
 770
 771static int validate_group(struct perf_event *event)
 772{
 773	struct perf_event *sibling, *leader = event->group_leader;
 774	struct pmu_hw_events fake_pmu;
 775	DECLARE_BITMAP(fake_used_mask, MAX_COUNTERS);
 776	/*
 777	 * Initialize the fake PMU. We only need to populate the
 778	 * used_mask for the purposes of validation.
 779	 */
 780	memset(fake_used_mask, 0, sizeof(fake_used_mask));
 781
 782	if (!validate_event(event->pmu, &fake_pmu, leader))
 783		return -EINVAL;
 784
 785	for_each_sibling_event(sibling, leader) {
 786		if (!validate_event(event->pmu, &fake_pmu, sibling))
 787			return -EINVAL;
 788	}
 789
 790	if (!validate_event(event->pmu, &fake_pmu, event))
 791		return -EINVAL;
 792
 793	return 0;
 794}
 795
 796static int __hw_perf_event_init(struct perf_event *event)
 797{
 798	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
 799	struct hw_perf_event *hwc = &event->hw;
 800	int mapping;
 801
 802	mapping = nds32_pmu->map_event(event);
 803
 804	if (mapping < 0) {
 805		pr_debug("event %x:%llx not supported\n", event->attr.type,
 806			 event->attr.config);
 807		return mapping;
 808	}
 809
 810	/*
 811	 * We don't assign an index until we actually place the event onto
 812	 * hardware. Use -1 to signify that we haven't decided where to put it
 813	 * yet. For SMP systems, each core has it's own PMU so we can't do any
 814	 * clever allocation or constraints checking at this point.
 815	 */
 816	hwc->idx = -1;
 817	hwc->config_base = 0;
 818	hwc->config = 0;
 819	hwc->event_base = 0;
 820
 821	/*
 822	 * Check whether we need to exclude the counter from certain modes.
 823	 */
 824	if ((!nds32_pmu->set_event_filter ||
 825	     nds32_pmu->set_event_filter(hwc, &event->attr)) &&
 826	    event_requires_mode_exclusion(&event->attr)) {
 827		pr_debug
 828			("NDS performance counters do not support mode exclusion\n");
 829		return -EOPNOTSUPP;
 830	}
 831
 832	/*
 833	 * Store the event encoding into the config_base field.
 834	 */
 835	hwc->config_base |= (unsigned long)mapping;
 836
 837	if (!hwc->sample_period) {
 838		/*
 839		 * For non-sampling runs, limit the sample_period to half
 840		 * of the counter width. That way, the new counter value
 841		 * is far less likely to overtake the previous one unless
 842		 * you have some serious IRQ latency issues.
 843		 */
 844		hwc->sample_period = nds32_pmu->max_period >> 1;
 845		hwc->last_period = hwc->sample_period;
 846		local64_set(&hwc->period_left, hwc->sample_period);
 847	}
 848
 849	if (event->group_leader != event) {
 850		if (validate_group(event) != 0)
 851			return -EINVAL;
 852	}
 853
 854	return 0;
 855}
 856
 857static int nds32_pmu_event_init(struct perf_event *event)
 858{
 859	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
 860	int err = 0;
 861	atomic_t *active_events = &nds32_pmu->active_events;
 862
 863	/* does not support taken branch sampling */
 864	if (has_branch_stack(event))
 865		return -EOPNOTSUPP;
 866
 867	if (nds32_pmu->map_event(event) == -ENOENT)
 868		return -ENOENT;
 869
 870	if (!atomic_inc_not_zero(active_events)) {
 871		if (atomic_read(active_events) == 0) {
 872			/* Register irq handler */
 873			err = nds32_pmu_reserve_hardware(nds32_pmu);
 874		}
 875
 876		if (!err)
 877			atomic_inc(active_events);
 878	}
 879
 880	if (err)
 881		return err;
 882
 883	err = __hw_perf_event_init(event);
 884
 885	return err;
 886}
 887
 888static void nds32_start(struct perf_event *event, int flags)
 889{
 890	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
 891	struct hw_perf_event *hwc = &event->hw;
 892	/*
 893	 * NDS pmu always has to reprogram the period, so ignore
 894	 * PERF_EF_RELOAD, see the comment below.
 895	 */
 896	if (flags & PERF_EF_RELOAD)
 897		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
 898
 899	hwc->state = 0;
 900	/* Set the period for the event. */
 901	nds32_pmu_event_set_period(event);
 902
 903	nds32_pmu->enable(event);
 904}
 905
 906static int nds32_pmu_add(struct perf_event *event, int flags)
 907{
 908	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
 909	struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events();
 910	struct hw_perf_event *hwc = &event->hw;
 911	int idx;
 912	int err = 0;
 913
 914	perf_pmu_disable(event->pmu);
 915
 916	/* If we don't have a space for the counter then finish early. */
 917	idx = nds32_pmu->get_event_idx(hw_events, event);
 918	if (idx < 0) {
 919		err = idx;
 920		goto out;
 921	}
 922
 923	/*
 924	 * If there is an event in the counter we are going to use then make
 925	 * sure it is disabled.
 926	 */
 927	event->hw.idx = idx;
 928	nds32_pmu->disable(event);
 929	hw_events->events[idx] = event;
 930
 931	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
 932	if (flags & PERF_EF_START)
 933		nds32_start(event, PERF_EF_RELOAD);
 934
 935	/* Propagate our changes to the userspace mapping. */
 936	perf_event_update_userpage(event);
 937
 938out:
 939	perf_pmu_enable(event->pmu);
 940	return err;
 941}
 942
 943u64 nds32_pmu_event_update(struct perf_event *event)
 944{
 945	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
 946	struct hw_perf_event *hwc = &event->hw;
 947	u64 delta, prev_raw_count, new_raw_count;
 948
 949again:
 950	prev_raw_count = local64_read(&hwc->prev_count);
 951	new_raw_count = nds32_pmu->read_counter(event);
 952
 953	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
 954			    new_raw_count) != prev_raw_count) {
 955		goto again;
 956	}
 957	/*
 958	 * Whether overflow or not, "unsigned substraction"
 959	 * will always get their delta
 960	 */
 961	delta = (new_raw_count - prev_raw_count) & nds32_pmu->max_period;
 962
 963	local64_add(delta, &event->count);
 964	local64_sub(delta, &hwc->period_left);
 965
 966	return new_raw_count;
 967}
 968
 969static void nds32_stop(struct perf_event *event, int flags)
 970{
 971	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
 972	struct hw_perf_event *hwc = &event->hw;
 973	/*
 974	 * NDS pmu always has to update the counter, so ignore
 975	 * PERF_EF_UPDATE, see comments in nds32_start().
 976	 */
 977	if (!(hwc->state & PERF_HES_STOPPED)) {
 978		nds32_pmu->disable(event);
 979		nds32_pmu_event_update(event);
 980		hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
 981	}
 982}
 983
 984static void nds32_pmu_del(struct perf_event *event, int flags)
 985{
 986	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
 987	struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events();
 988	struct hw_perf_event *hwc = &event->hw;
 989	int idx = hwc->idx;
 990
 991	nds32_stop(event, PERF_EF_UPDATE);
 992	hw_events->events[idx] = NULL;
 993	clear_bit(idx, hw_events->used_mask);
 994
 995	perf_event_update_userpage(event);
 996}
 997
 998static void nds32_pmu_read(struct perf_event *event)
 999{
1000	nds32_pmu_event_update(event);
1001}
1002
1003/* Please refer to SPAv3 for more hardware specific details */
1004PMU_FORMAT_ATTR(event, "config:0-63");
1005
1006static struct attribute *nds32_arch_formats_attr[] = {
1007	&format_attr_event.attr,
1008	NULL,
1009};
1010
1011static struct attribute_group nds32_pmu_format_group = {
1012	.name = "format",
1013	.attrs = nds32_arch_formats_attr,
1014};
1015
1016static ssize_t nds32_pmu_cpumask_show(struct device *dev,
1017				      struct device_attribute *attr,
1018				      char *buf)
1019{
1020	return 0;
1021}
1022
1023static DEVICE_ATTR(cpus, 0444, nds32_pmu_cpumask_show, NULL);
1024
1025static struct attribute *nds32_pmu_common_attrs[] = {
1026	&dev_attr_cpus.attr,
1027	NULL,
1028};
1029
1030static struct attribute_group nds32_pmu_common_group = {
1031	.attrs = nds32_pmu_common_attrs,
1032};
1033
1034static const struct attribute_group *nds32_pmu_attr_groups[] = {
1035	&nds32_pmu_format_group,
1036	&nds32_pmu_common_group,
1037	NULL,
1038};
1039
1040static void nds32_init(struct nds32_pmu *nds32_pmu)
1041{
1042	atomic_set(&nds32_pmu->active_events, 0);
1043
1044	nds32_pmu->pmu = (struct pmu) {
1045		.pmu_enable = nds32_pmu_enable,
1046		.pmu_disable = nds32_pmu_disable,
1047		.attr_groups = nds32_pmu_attr_groups,
1048		.event_init = nds32_pmu_event_init,
1049		.add = nds32_pmu_add,
1050		.del = nds32_pmu_del,
1051		.start = nds32_start,
1052		.stop = nds32_stop,
1053		.read = nds32_pmu_read,
1054	};
1055}
1056
1057int nds32_pmu_register(struct nds32_pmu *nds32_pmu, int type)
1058{
1059	nds32_init(nds32_pmu);
1060	pm_runtime_enable(&nds32_pmu->plat_device->dev);
1061	pr_info("enabled with %s PMU driver, %d counters available\n",
1062		nds32_pmu->name, nds32_pmu->num_events);
1063	return perf_pmu_register(&nds32_pmu->pmu, nds32_pmu->name, type);
1064}
1065
1066static struct pmu_hw_events *cpu_pmu_get_cpu_events(void)
1067{
1068	return this_cpu_ptr(&cpu_hw_events);
1069}
1070
1071static int cpu_pmu_request_irq(struct nds32_pmu *cpu_pmu, irq_handler_t handler)
1072{
1073	int err, irq, irqs;
1074	struct platform_device *pmu_device = cpu_pmu->plat_device;
1075
1076	if (!pmu_device)
1077		return -ENODEV;
1078
1079	irqs = min(pmu_device->num_resources, num_possible_cpus());
1080	if (irqs < 1) {
1081		pr_err("no irqs for PMUs defined\n");
1082		return -ENODEV;
1083	}
1084
1085	irq = platform_get_irq(pmu_device, 0);
1086	err = request_irq(irq, handler, IRQF_NOBALANCING, "nds32-pfm",
1087			  cpu_pmu);
1088	if (err) {
1089		pr_err("unable to request IRQ%d for NDS PMU counters\n",
1090		       irq);
1091		return err;
1092	}
1093	return 0;
1094}
1095
1096static void cpu_pmu_free_irq(struct nds32_pmu *cpu_pmu)
1097{
1098	int irq;
1099	struct platform_device *pmu_device = cpu_pmu->plat_device;
1100
1101	irq = platform_get_irq(pmu_device, 0);
1102	if (irq >= 0)
1103		free_irq(irq, cpu_pmu);
1104}
1105
1106static void cpu_pmu_init(struct nds32_pmu *cpu_pmu)
1107{
1108	int cpu;
1109	struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
1110
1111	raw_spin_lock_init(&events->pmu_lock);
1112
1113	cpu_pmu->get_hw_events = cpu_pmu_get_cpu_events;
1114	cpu_pmu->request_irq = cpu_pmu_request_irq;
1115	cpu_pmu->free_irq = cpu_pmu_free_irq;
1116
1117	/* Ensure the PMU has sane values out of reset. */
1118	if (cpu_pmu->reset)
1119		on_each_cpu(cpu_pmu->reset, cpu_pmu, 1);
1120}
1121
1122const static struct of_device_id cpu_pmu_of_device_ids[] = {
1123	{.compatible = "andestech,nds32v3-pmu",
1124	 .data = device_pmu_init},
1125	{},
1126};
1127
1128static int cpu_pmu_device_probe(struct platform_device *pdev)
1129{
1130	const struct of_device_id *of_id;
1131	int (*init_fn)(struct nds32_pmu *nds32_pmu);
1132	struct device_node *node = pdev->dev.of_node;
1133	struct nds32_pmu *pmu;
1134	int ret = -ENODEV;
1135
1136	if (cpu_pmu) {
1137		pr_notice("[perf] attempt to register multiple PMU devices!\n");
1138		return -ENOSPC;
1139	}
1140
1141	pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
1142	if (!pmu)
1143		return -ENOMEM;
1144
1145	of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node);
1146	if (node && of_id) {
1147		init_fn = of_id->data;
1148		ret = init_fn(pmu);
1149	} else {
1150		ret = probe_current_pmu(pmu);
1151	}
1152
1153	if (ret) {
1154		pr_notice("[perf] failed to probe PMU!\n");
1155		goto out_free;
1156	}
1157
1158	cpu_pmu = pmu;
1159	cpu_pmu->plat_device = pdev;
1160	cpu_pmu_init(cpu_pmu);
1161	ret = nds32_pmu_register(cpu_pmu, PERF_TYPE_RAW);
1162
1163	if (!ret)
1164		return 0;
1165
1166out_free:
1167	pr_notice("[perf] failed to register PMU devices!\n");
1168	kfree(pmu);
1169	return ret;
1170}
1171
1172static struct platform_driver cpu_pmu_driver = {
1173	.driver = {
1174		   .name = "nds32-pfm",
1175		   .of_match_table = cpu_pmu_of_device_ids,
1176		   },
1177	.probe = cpu_pmu_device_probe,
1178	.id_table = cpu_pmu_plat_device_ids,
1179};
1180
1181static int __init register_pmu_driver(void)
1182{
1183	int err = 0;
1184
1185	err = platform_driver_register(&cpu_pmu_driver);
1186	if (err)
1187		pr_notice("[perf] PMU initialization failed\n");
1188	else
1189		pr_notice("[perf] PMU initialization done\n");
1190
1191	return err;
1192}
1193
1194device_initcall(register_pmu_driver);
1195
1196/*
1197 * References: arch/nds32/kernel/traps.c:__dump()
1198 * You will need to know the NDS ABI first.
1199 */
1200static int unwind_frame_kernel(struct stackframe *frame)
1201{
1202	int graph = 0;
1203#ifdef CONFIG_FRAME_POINTER
1204	/* 0x3 means misalignment */
1205	if (!kstack_end((void *)frame->fp) &&
1206	    !((unsigned long)frame->fp & 0x3) &&
1207	    ((unsigned long)frame->fp >= TASK_SIZE)) {
1208		/*
1209		 *	The array index is based on the ABI, the below graph
1210		 *	illustrate the reasons.
1211		 *	Function call procedure: "smw" and "lmw" will always
1212		 *	update SP and FP for you automatically.
1213		 *
1214		 *	Stack                                 Relative Address
1215		 *	|  |                                          0
1216		 *	----
1217		 *	|LP| <-- SP(before smw)  <-- FP(after smw)   -1
1218		 *	----
1219		 *	|FP|                                         -2
1220		 *	----
1221		 *	|  | <-- SP(after smw)                       -3
1222		 */
1223		frame->lp = ((unsigned long *)frame->fp)[-1];
1224		frame->fp = ((unsigned long *)frame->fp)[FP_OFFSET];
1225		/* make sure CONFIG_FUNCTION_GRAPH_TRACER is turned on */
1226		if (__kernel_text_address(frame->lp))
1227			frame->lp = ftrace_graph_ret_addr
1228						(NULL, &graph, frame->lp, NULL);
1229
1230		return 0;
1231	} else {
1232		return -EPERM;
1233	}
1234#else
1235	/*
1236	 * You can refer to arch/nds32/kernel/traps.c:__dump()
1237	 * Treat "sp" as "fp", but the "sp" is one frame ahead of "fp".
1238	 * And, the "sp" is not always correct.
1239	 *
1240	 *   Stack                                 Relative Address
1241	 *   |  |                                          0
1242	 *   ----
1243	 *   |LP| <-- SP(before smw)                      -1
1244	 *   ----
1245	 *   |  | <-- SP(after smw)                       -2
1246	 *   ----
1247	 */
1248	if (!kstack_end((void *)frame->sp)) {
1249		frame->lp = ((unsigned long *)frame->sp)[1];
1250		/* TODO: How to deal with the value in first
1251		 * "sp" is not correct?
1252		 */
1253		if (__kernel_text_address(frame->lp))
1254			frame->lp = ftrace_graph_ret_addr
1255						(tsk, &graph, frame->lp, NULL);
1256
1257		frame->sp = ((unsigned long *)frame->sp) + 1;
1258
1259		return 0;
1260	} else {
1261		return -EPERM;
1262	}
1263#endif
1264}
1265
1266static void notrace
1267walk_stackframe(struct stackframe *frame,
1268		int (*fn_record)(struct stackframe *, void *),
1269		void *data)
1270{
1271	while (1) {
1272		int ret;
1273
1274		if (fn_record(frame, data))
1275			break;
1276
1277		ret = unwind_frame_kernel(frame);
1278		if (ret < 0)
1279			break;
1280	}
1281}
1282
1283/*
1284 * Gets called by walk_stackframe() for every stackframe. This will be called
1285 * whist unwinding the stackframe and is like a subroutine return so we use
1286 * the PC.
1287 */
1288static int callchain_trace(struct stackframe *fr, void *data)
1289{
1290	struct perf_callchain_entry_ctx *entry = data;
1291
1292	perf_callchain_store(entry, fr->lp);
1293	return 0;
1294}
1295
1296/*
1297 * Get the return address for a single stackframe and return a pointer to the
1298 * next frame tail.
1299 */
1300static unsigned long
1301user_backtrace(struct perf_callchain_entry_ctx *entry, unsigned long fp)
1302{
1303	struct frame_tail buftail;
1304	unsigned long lp = 0;
1305	unsigned long *user_frame_tail =
1306		(unsigned long *)(fp - (unsigned long)sizeof(buftail));
1307
1308	/* Check accessibility of one struct frame_tail beyond */
1309	if (!access_ok(user_frame_tail, sizeof(buftail)))
1310		return 0;
1311	if (__copy_from_user_inatomic
1312		(&buftail, user_frame_tail, sizeof(buftail)))
1313		return 0;
1314
1315	/*
1316	 * Refer to unwind_frame_kernel() for more illurstration
1317	 */
1318	lp = buftail.stack_lp;  /* ((unsigned long *)fp)[-1] */
1319	fp = buftail.stack_fp;  /* ((unsigned long *)fp)[FP_OFFSET] */
1320	perf_callchain_store(entry, lp);
1321	return fp;
1322}
1323
1324static unsigned long
1325user_backtrace_opt_size(struct perf_callchain_entry_ctx *entry,
1326			unsigned long fp)
1327{
1328	struct frame_tail_opt_size buftail;
1329	unsigned long lp = 0;
1330
1331	unsigned long *user_frame_tail =
1332		(unsigned long *)(fp - (unsigned long)sizeof(buftail));
1333
1334	/* Check accessibility of one struct frame_tail beyond */
1335	if (!access_ok(user_frame_tail, sizeof(buftail)))
1336		return 0;
1337	if (__copy_from_user_inatomic
1338		(&buftail, user_frame_tail, sizeof(buftail)))
1339		return 0;
1340
1341	/*
1342	 * Refer to unwind_frame_kernel() for more illurstration
1343	 */
1344	lp = buftail.stack_lp;  /* ((unsigned long *)fp)[-1] */
1345	fp = buftail.stack_fp;  /* ((unsigned long *)fp)[FP_OFFSET] */
1346
1347	perf_callchain_store(entry, lp);
1348	return fp;
1349}
1350
1351/*
1352 * This will be called when the target is in user mode
1353 * This function will only be called when we use
1354 * "PERF_SAMPLE_CALLCHAIN" in
1355 * kernel/events/core.c:perf_prepare_sample()
1356 *
1357 * How to trigger perf_callchain_[user/kernel] :
1358 * $ perf record -e cpu-clock --call-graph fp ./program
1359 * $ perf report --call-graph
1360 */
1361unsigned long leaf_fp;
1362void
1363perf_callchain_user(struct perf_callchain_entry_ctx *entry,
1364		    struct pt_regs *regs)
1365{
1366	unsigned long fp = 0;
1367	unsigned long gp = 0;
1368	unsigned long lp = 0;
1369	unsigned long sp = 0;
1370	unsigned long *user_frame_tail;
1371
1372	leaf_fp = 0;
1373
1374	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1375		/* We don't support guest os callchain now */
1376		return;
1377	}
1378
1379	perf_callchain_store(entry, regs->ipc);
1380	fp = regs->fp;
1381	gp = regs->gp;
1382	lp = regs->lp;
1383	sp = regs->sp;
1384	if (entry->nr < PERF_MAX_STACK_DEPTH &&
1385	    (unsigned long)fp && !((unsigned long)fp & 0x7) && fp > sp) {
1386		user_frame_tail =
1387			(unsigned long *)(fp - (unsigned long)sizeof(fp));
1388
1389		if (!access_ok(user_frame_tail, sizeof(fp)))
1390			return;
1391
1392		if (__copy_from_user_inatomic
1393			(&leaf_fp, user_frame_tail, sizeof(fp)))
1394			return;
1395
1396		if (leaf_fp == lp) {
1397			/*
1398			 * Maybe this is non leaf function
1399			 * with optimize for size,
1400			 * or maybe this is the function
1401			 * with optimize for size
1402			 */
1403			struct frame_tail buftail;
1404
1405			user_frame_tail =
1406				(unsigned long *)(fp -
1407					(unsigned long)sizeof(buftail));
1408
1409			if (!access_ok(user_frame_tail, sizeof(buftail)))
1410				return;
1411
1412			if (__copy_from_user_inatomic
1413				(&buftail, user_frame_tail, sizeof(buftail)))
1414				return;
1415
1416			if (buftail.stack_fp == gp) {
1417				/* non leaf function with optimize
1418				 * for size condition
1419				 */
1420				struct frame_tail_opt_size buftail_opt_size;
1421
1422				user_frame_tail =
1423					(unsigned long *)(fp - (unsigned long)
1424						sizeof(buftail_opt_size));
1425
1426				if (!access_ok(user_frame_tail,
1427					       sizeof(buftail_opt_size)))
1428					return;
1429
1430				if (__copy_from_user_inatomic
1431				   (&buftail_opt_size, user_frame_tail,
1432				   sizeof(buftail_opt_size)))
1433					return;
1434
1435				perf_callchain_store(entry, lp);
1436				fp = buftail_opt_size.stack_fp;
1437
1438				while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
1439				       (unsigned long)fp &&
1440						!((unsigned long)fp & 0x7) &&
1441						fp > sp) {
1442					sp = fp;
1443					fp = user_backtrace_opt_size(entry, fp);
1444				}
1445
1446			} else {
1447				/* this is the function
1448				 * without optimize for size
1449				 */
1450				fp = buftail.stack_fp;
1451				perf_callchain_store(entry, lp);
1452				while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
1453				       (unsigned long)fp &&
1454						!((unsigned long)fp & 0x7) &&
1455						fp > sp) {
1456					sp = fp;
1457					fp = user_backtrace(entry, fp);
1458				}
1459			}
1460		} else {
1461			/* this is leaf function */
1462			fp = leaf_fp;
1463			perf_callchain_store(entry, lp);
1464
1465			/* previous function callcahin  */
1466			while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
1467			       (unsigned long)fp &&
1468				   !((unsigned long)fp & 0x7) && fp > sp) {
1469				sp = fp;
1470				fp = user_backtrace(entry, fp);
1471			}
1472		}
1473		return;
1474	}
1475}
1476
1477/* This will be called when the target is in kernel mode */
1478void
1479perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
1480		      struct pt_regs *regs)
1481{
1482	struct stackframe fr;
1483
1484	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1485		/* We don't support guest os callchain now */
1486		return;
1487	}
1488	fr.fp = regs->fp;
1489	fr.lp = regs->lp;
1490	fr.sp = regs->sp;
1491	walk_stackframe(&fr, callchain_trace, entry);
1492}
1493
1494unsigned long perf_instruction_pointer(struct pt_regs *regs)
1495{
1496	/* However, NDS32 does not support virtualization */
1497	if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
1498		return perf_guest_cbs->get_guest_ip();
1499
1500	return instruction_pointer(regs);
1501}
1502
1503unsigned long perf_misc_flags(struct pt_regs *regs)
1504{
1505	int misc = 0;
1506
1507	/* However, NDS32 does not support virtualization */
1508	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1509		if (perf_guest_cbs->is_user_mode())
1510			misc |= PERF_RECORD_MISC_GUEST_USER;
1511		else
1512			misc |= PERF_RECORD_MISC_GUEST_KERNEL;
1513	} else {
1514		if (user_mode(regs))
1515			misc |= PERF_RECORD_MISC_USER;
1516		else
1517			misc |= PERF_RECORD_MISC_KERNEL;
1518	}
1519
1520	return misc;
1521}