Linux Audio

Check our new training course

Linux BSP upgrade and security maintenance

Need help to get security updates for your Linux BSP?
Loading...
Note: File does not exist in v3.1.
  1/*
  2 * Copyright 2014 Advanced Micro Devices, Inc.
  3 * All Rights Reserved.
  4 *
  5 * Permission is hereby granted, free of charge, to any person obtaining a
  6 * copy of this software and associated documentation files (the
  7 * "Software"), to deal in the Software without restriction, including
  8 * without limitation the rights to use, copy, modify, merge, publish,
  9 * distribute, sub license, and/or sell copies of the Software, and to
 10 * permit persons to whom the Software is furnished to do so, subject to
 11 * the following conditions:
 12 *
 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 20 *
 21 * The above copyright notice and this permission notice (including the
 22 * next paragraph) shall be included in all copies or substantial portions
 23 * of the Software.
 24 *
 25 * Authors: Christian König <christian.koenig@amd.com>
 26 */
 27
 28#include <linux/firmware.h>
 29
 30#include "amdgpu.h"
 31#include "amdgpu_vce.h"
 32#include "vid.h"
 33#include "vce/vce_3_0_d.h"
 34#include "vce/vce_3_0_sh_mask.h"
 35#include "oss/oss_3_0_d.h"
 36#include "oss/oss_3_0_sh_mask.h"
 37#include "gca/gfx_8_0_d.h"
 38#include "smu/smu_7_1_2_d.h"
 39#include "smu/smu_7_1_2_sh_mask.h"
 40#include "gca/gfx_8_0_sh_mask.h"
 41#include "ivsrcid/ivsrcid_vislands30.h"
 42
 43
 44#define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT	0x04
 45#define GRBM_GFX_INDEX__VCE_INSTANCE_MASK	0x10
 46#define GRBM_GFX_INDEX__VCE_ALL_PIPE		0x07
 47
 48#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0	0x8616
 49#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1	0x8617
 50#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2	0x8618
 51#define mmGRBM_GFX_INDEX_DEFAULT 0xE0000000
 52
 53#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
 54
 55#define VCE_V3_0_FW_SIZE	(384 * 1024)
 56#define VCE_V3_0_STACK_SIZE	(64 * 1024)
 57#define VCE_V3_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
 58
 59#define FW_52_8_3	((52 << 24) | (8 << 16) | (3 << 8))
 60
 61#define GET_VCE_INSTANCE(i)  ((i) << GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT \
 62					| GRBM_GFX_INDEX__VCE_ALL_PIPE)
 63
 64static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx);
 65static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev);
 66static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev);
 67static int vce_v3_0_wait_for_idle(void *handle);
 68static int vce_v3_0_set_clockgating_state(void *handle,
 69					  enum amd_clockgating_state state);
 70/**
 71 * vce_v3_0_ring_get_rptr - get read pointer
 72 *
 73 * @ring: amdgpu_ring pointer
 74 *
 75 * Returns the current hardware read pointer
 76 */
 77static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
 78{
 79	struct amdgpu_device *adev = ring->adev;
 80	u32 v;
 81
 82	mutex_lock(&adev->grbm_idx_mutex);
 83	if (adev->vce.harvest_config == 0 ||
 84		adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
 85		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
 86	else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
 87		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
 88
 89	if (ring->me == 0)
 90		v = RREG32(mmVCE_RB_RPTR);
 91	else if (ring->me == 1)
 92		v = RREG32(mmVCE_RB_RPTR2);
 93	else
 94		v = RREG32(mmVCE_RB_RPTR3);
 95
 96	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
 97	mutex_unlock(&adev->grbm_idx_mutex);
 98
 99	return v;
100}
101
102/**
103 * vce_v3_0_ring_get_wptr - get write pointer
104 *
105 * @ring: amdgpu_ring pointer
106 *
107 * Returns the current hardware write pointer
108 */
109static uint64_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
110{
111	struct amdgpu_device *adev = ring->adev;
112	u32 v;
113
114	mutex_lock(&adev->grbm_idx_mutex);
115	if (adev->vce.harvest_config == 0 ||
116		adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
117		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
118	else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
119		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
120
121	if (ring->me == 0)
122		v = RREG32(mmVCE_RB_WPTR);
123	else if (ring->me == 1)
124		v = RREG32(mmVCE_RB_WPTR2);
125	else
126		v = RREG32(mmVCE_RB_WPTR3);
127
128	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
129	mutex_unlock(&adev->grbm_idx_mutex);
130
131	return v;
132}
133
134/**
135 * vce_v3_0_ring_set_wptr - set write pointer
136 *
137 * @ring: amdgpu_ring pointer
138 *
139 * Commits the write pointer to the hardware
140 */
141static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
142{
143	struct amdgpu_device *adev = ring->adev;
144
145	mutex_lock(&adev->grbm_idx_mutex);
146	if (adev->vce.harvest_config == 0 ||
147		adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
148		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
149	else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
150		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
151
152	if (ring->me == 0)
153		WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
154	else if (ring->me == 1)
155		WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
156	else
157		WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
158
159	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
160	mutex_unlock(&adev->grbm_idx_mutex);
161}
162
163static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
164{
165	WREG32_FIELD(VCE_RB_ARB_CTRL, VCE_CGTT_OVERRIDE, override ? 1 : 0);
166}
167
168static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
169					     bool gated)
170{
171	u32 data;
172
173	/* Set Override to disable Clock Gating */
174	vce_v3_0_override_vce_clock_gating(adev, true);
175
176	/* This function enables MGCG which is controlled by firmware.
177	   With the clocks in the gated state the core is still
178	   accessible but the firmware will throttle the clocks on the
179	   fly as necessary.
180	*/
181	if (!gated) {
182		data = RREG32(mmVCE_CLOCK_GATING_B);
183		data |= 0x1ff;
184		data &= ~0xef0000;
185		WREG32(mmVCE_CLOCK_GATING_B, data);
186
187		data = RREG32(mmVCE_UENC_CLOCK_GATING);
188		data |= 0x3ff000;
189		data &= ~0xffc00000;
190		WREG32(mmVCE_UENC_CLOCK_GATING, data);
191
192		data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
193		data |= 0x2;
194		data &= ~0x00010000;
195		WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
196
197		data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
198		data |= 0x37f;
199		WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
200
201		data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
202		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
203			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
204			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
205			0x8;
206		WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
207	} else {
208		data = RREG32(mmVCE_CLOCK_GATING_B);
209		data &= ~0x80010;
210		data |= 0xe70008;
211		WREG32(mmVCE_CLOCK_GATING_B, data);
212
213		data = RREG32(mmVCE_UENC_CLOCK_GATING);
214		data |= 0xffc00000;
215		WREG32(mmVCE_UENC_CLOCK_GATING, data);
216
217		data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
218		data |= 0x10000;
219		WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
220
221		data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
222		data &= ~0x3ff;
223		WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
224
225		data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
226		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
227			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
228			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
229			  0x8);
230		WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
231	}
232	vce_v3_0_override_vce_clock_gating(adev, false);
233}
234
235static int vce_v3_0_firmware_loaded(struct amdgpu_device *adev)
236{
237	int i, j;
238
239	for (i = 0; i < 10; ++i) {
240		for (j = 0; j < 100; ++j) {
241			uint32_t status = RREG32(mmVCE_STATUS);
242
243			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
244				return 0;
245			mdelay(10);
246		}
247
248		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
249		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
250		mdelay(10);
251		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
252		mdelay(10);
253	}
254
255	return -ETIMEDOUT;
256}
257
258/**
259 * vce_v3_0_start - start VCE block
260 *
261 * @adev: amdgpu_device pointer
262 *
263 * Setup and start the VCE block
264 */
265static int vce_v3_0_start(struct amdgpu_device *adev)
266{
267	struct amdgpu_ring *ring;
268	int idx, r;
269
270	mutex_lock(&adev->grbm_idx_mutex);
271	for (idx = 0; idx < 2; ++idx) {
272		if (adev->vce.harvest_config & (1 << idx))
273			continue;
274
275		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
276
277		/* Program instance 0 reg space for two instances or instance 0 case
278		program instance 1 reg space for only instance 1 available case */
279		if (idx != 1 || adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) {
280			ring = &adev->vce.ring[0];
281			WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr));
282			WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
283			WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
284			WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
285			WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
286
287			ring = &adev->vce.ring[1];
288			WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr));
289			WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
290			WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
291			WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
292			WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
293
294			ring = &adev->vce.ring[2];
295			WREG32(mmVCE_RB_RPTR3, lower_32_bits(ring->wptr));
296			WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
297			WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr);
298			WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr));
299			WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4);
300		}
301
302		vce_v3_0_mc_resume(adev, idx);
303		WREG32_FIELD(VCE_STATUS, JOB_BUSY, 1);
304
305		if (adev->asic_type >= CHIP_STONEY)
306			WREG32_P(mmVCE_VCPU_CNTL, 1, ~0x200001);
307		else
308			WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 1);
309
310		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
311		mdelay(100);
312
313		r = vce_v3_0_firmware_loaded(adev);
314
315		/* clear BUSY flag */
316		WREG32_FIELD(VCE_STATUS, JOB_BUSY, 0);
317
318		if (r) {
319			DRM_ERROR("VCE not responding, giving up!!!\n");
320			mutex_unlock(&adev->grbm_idx_mutex);
321			return r;
322		}
323	}
324
325	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
326	mutex_unlock(&adev->grbm_idx_mutex);
327
328	return 0;
329}
330
331static int vce_v3_0_stop(struct amdgpu_device *adev)
332{
333	int idx;
334
335	mutex_lock(&adev->grbm_idx_mutex);
336	for (idx = 0; idx < 2; ++idx) {
337		if (adev->vce.harvest_config & (1 << idx))
338			continue;
339
340		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
341
342		if (adev->asic_type >= CHIP_STONEY)
343			WREG32_P(mmVCE_VCPU_CNTL, 0, ~0x200001);
344		else
345			WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 0);
346
347		/* hold on ECPU */
348		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
349
350		/* clear VCE STATUS */
351		WREG32(mmVCE_STATUS, 0);
352	}
353
354	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
355	mutex_unlock(&adev->grbm_idx_mutex);
356
357	return 0;
358}
359
360#define ixVCE_HARVEST_FUSE_MACRO__ADDRESS     0xC0014074
361#define VCE_HARVEST_FUSE_MACRO__SHIFT       27
362#define VCE_HARVEST_FUSE_MACRO__MASK        0x18000000
363
364static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev)
365{
366	u32 tmp;
367
368	if ((adev->asic_type == CHIP_FIJI) ||
369	    (adev->asic_type == CHIP_STONEY))
370		return AMDGPU_VCE_HARVEST_VCE1;
371
372	if (adev->flags & AMD_IS_APU)
373		tmp = (RREG32_SMC(ixVCE_HARVEST_FUSE_MACRO__ADDRESS) &
374		       VCE_HARVEST_FUSE_MACRO__MASK) >>
375			VCE_HARVEST_FUSE_MACRO__SHIFT;
376	else
377		tmp = (RREG32_SMC(ixCC_HARVEST_FUSES) &
378		       CC_HARVEST_FUSES__VCE_DISABLE_MASK) >>
379			CC_HARVEST_FUSES__VCE_DISABLE__SHIFT;
380
381	switch (tmp) {
382	case 1:
383		return AMDGPU_VCE_HARVEST_VCE0;
384	case 2:
385		return AMDGPU_VCE_HARVEST_VCE1;
386	case 3:
387		return AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1;
388	default:
389		if ((adev->asic_type == CHIP_POLARIS10) ||
390		    (adev->asic_type == CHIP_POLARIS11) ||
391		    (adev->asic_type == CHIP_POLARIS12) ||
392		    (adev->asic_type == CHIP_VEGAM))
393			return AMDGPU_VCE_HARVEST_VCE1;
394
395		return 0;
396	}
397}
398
399static int vce_v3_0_early_init(void *handle)
400{
401	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
402
403	adev->vce.harvest_config = vce_v3_0_get_harvest_config(adev);
404
405	if ((adev->vce.harvest_config &
406	     (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1)) ==
407	    (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1))
408		return -ENOENT;
409
410	adev->vce.num_rings = 3;
411
412	vce_v3_0_set_ring_funcs(adev);
413	vce_v3_0_set_irq_funcs(adev);
414
415	return 0;
416}
417
418static int vce_v3_0_sw_init(void *handle)
419{
420	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
421	struct amdgpu_ring *ring;
422	int r, i;
423
424	/* VCE */
425	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_VCE_TRAP, &adev->vce.irq);
426	if (r)
427		return r;
428
429	r = amdgpu_vce_sw_init(adev, VCE_V3_0_FW_SIZE +
430		(VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE) * 2);
431	if (r)
432		return r;
433
434	/* 52.8.3 required for 3 ring support */
435	if (adev->vce.fw_version < FW_52_8_3)
436		adev->vce.num_rings = 2;
437
438	r = amdgpu_vce_resume(adev);
439	if (r)
440		return r;
441
442	for (i = 0; i < adev->vce.num_rings; i++) {
443		ring = &adev->vce.ring[i];
444		sprintf(ring->name, "vce%d", i);
445		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
446		if (r)
447			return r;
448	}
449
450	r = amdgpu_vce_entity_init(adev);
451
452	return r;
453}
454
455static int vce_v3_0_sw_fini(void *handle)
456{
457	int r;
458	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
459
460	r = amdgpu_vce_suspend(adev);
461	if (r)
462		return r;
463
464	return amdgpu_vce_sw_fini(adev);
465}
466
467static int vce_v3_0_hw_init(void *handle)
468{
469	int r, i;
470	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
471
472	vce_v3_0_override_vce_clock_gating(adev, true);
473
474	amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
475
476	for (i = 0; i < adev->vce.num_rings; i++) {
477		r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
478		if (r)
479			return r;
480	}
481
482	DRM_INFO("VCE initialized successfully.\n");
483
484	return 0;
485}
486
487static int vce_v3_0_hw_fini(void *handle)
488{
489	int r;
490	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
491
492	r = vce_v3_0_wait_for_idle(handle);
493	if (r)
494		return r;
495
496	vce_v3_0_stop(adev);
497	return vce_v3_0_set_clockgating_state(adev, AMD_CG_STATE_GATE);
498}
499
500static int vce_v3_0_suspend(void *handle)
501{
502	int r;
503	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
504
505	r = vce_v3_0_hw_fini(adev);
506	if (r)
507		return r;
508
509	return amdgpu_vce_suspend(adev);
510}
511
512static int vce_v3_0_resume(void *handle)
513{
514	int r;
515	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
516
517	r = amdgpu_vce_resume(adev);
518	if (r)
519		return r;
520
521	return vce_v3_0_hw_init(adev);
522}
523
524static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
525{
526	uint32_t offset, size;
527
528	WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16));
529	WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
530	WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
531	WREG32(mmVCE_CLOCK_GATING_B, 0x1FF);
532
533	WREG32(mmVCE_LMI_CTRL, 0x00398000);
534	WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1);
535	WREG32(mmVCE_LMI_SWAP_CNTL, 0);
536	WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
537	WREG32(mmVCE_LMI_VM_CTRL, 0);
538	WREG32_OR(mmVCE_VCPU_CNTL, 0x00100000);
539
540	if (adev->asic_type >= CHIP_STONEY) {
541		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR0, (adev->vce.gpu_addr >> 8));
542		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR1, (adev->vce.gpu_addr >> 8));
543		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR2, (adev->vce.gpu_addr >> 8));
544	} else
545		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
546	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
547	size = VCE_V3_0_FW_SIZE;
548	WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
549	WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
550
551	if (idx == 0) {
552		offset += size;
553		size = VCE_V3_0_STACK_SIZE;
554		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff);
555		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
556		offset += size;
557		size = VCE_V3_0_DATA_SIZE;
558		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff);
559		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
560	} else {
561		offset += size + VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE;
562		size = VCE_V3_0_STACK_SIZE;
563		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0xfffffff);
564		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
565		offset += size;
566		size = VCE_V3_0_DATA_SIZE;
567		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0xfffffff);
568		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
569	}
570
571	WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
572	WREG32_FIELD(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, 1);
573}
574
575static bool vce_v3_0_is_idle(void *handle)
576{
577	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
578	u32 mask = 0;
579
580	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
581	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
582
583	return !(RREG32(mmSRBM_STATUS2) & mask);
584}
585
586static int vce_v3_0_wait_for_idle(void *handle)
587{
588	unsigned i;
589	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
590
591	for (i = 0; i < adev->usec_timeout; i++)
592		if (vce_v3_0_is_idle(handle))
593			return 0;
594
595	return -ETIMEDOUT;
596}
597
598#define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
599#define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
600#define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
601#define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
602				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
603
604static bool vce_v3_0_check_soft_reset(void *handle)
605{
606	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
607	u32 srbm_soft_reset = 0;
608
609	/* According to VCE team , we should use VCE_STATUS instead
610	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
611	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
612	 * instance's registers are accessed
613	 * (0 for 1st instance, 10 for 2nd instance).
614	 *
615	 *VCE_STATUS
616	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
617	 *|----+----+-----------+----+----+----+----------+---------+----|
618	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
619	 *
620	 * VCE team suggest use bit 3--bit 6 for busy status check
621	 */
622	mutex_lock(&adev->grbm_idx_mutex);
623	WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
624	if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
625		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
626		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
627	}
628	WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
629	if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
630		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
631		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
632	}
633	WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
634	mutex_unlock(&adev->grbm_idx_mutex);
635
636	if (srbm_soft_reset) {
637		adev->vce.srbm_soft_reset = srbm_soft_reset;
638		return true;
639	} else {
640		adev->vce.srbm_soft_reset = 0;
641		return false;
642	}
643}
644
645static int vce_v3_0_soft_reset(void *handle)
646{
647	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
648	u32 srbm_soft_reset;
649
650	if (!adev->vce.srbm_soft_reset)
651		return 0;
652	srbm_soft_reset = adev->vce.srbm_soft_reset;
653
654	if (srbm_soft_reset) {
655		u32 tmp;
656
657		tmp = RREG32(mmSRBM_SOFT_RESET);
658		tmp |= srbm_soft_reset;
659		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
660		WREG32(mmSRBM_SOFT_RESET, tmp);
661		tmp = RREG32(mmSRBM_SOFT_RESET);
662
663		udelay(50);
664
665		tmp &= ~srbm_soft_reset;
666		WREG32(mmSRBM_SOFT_RESET, tmp);
667		tmp = RREG32(mmSRBM_SOFT_RESET);
668
669		/* Wait a little for things to settle down */
670		udelay(50);
671	}
672
673	return 0;
674}
675
676static int vce_v3_0_pre_soft_reset(void *handle)
677{
678	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
679
680	if (!adev->vce.srbm_soft_reset)
681		return 0;
682
683	mdelay(5);
684
685	return vce_v3_0_suspend(adev);
686}
687
688
689static int vce_v3_0_post_soft_reset(void *handle)
690{
691	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
692
693	if (!adev->vce.srbm_soft_reset)
694		return 0;
695
696	mdelay(5);
697
698	return vce_v3_0_resume(adev);
699}
700
701static int vce_v3_0_set_interrupt_state(struct amdgpu_device *adev,
702					struct amdgpu_irq_src *source,
703					unsigned type,
704					enum amdgpu_interrupt_state state)
705{
706	uint32_t val = 0;
707
708	if (state == AMDGPU_IRQ_STATE_ENABLE)
709		val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
710
711	WREG32_P(mmVCE_SYS_INT_EN, val, ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
712	return 0;
713}
714
715static int vce_v3_0_process_interrupt(struct amdgpu_device *adev,
716				      struct amdgpu_irq_src *source,
717				      struct amdgpu_iv_entry *entry)
718{
719	DRM_DEBUG("IH: VCE\n");
720
721	WREG32_FIELD(VCE_SYS_INT_STATUS, VCE_SYS_INT_TRAP_INTERRUPT_INT, 1);
722
723	switch (entry->src_data[0]) {
724	case 0:
725	case 1:
726	case 2:
727		amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
728		break;
729	default:
730		DRM_ERROR("Unhandled interrupt: %d %d\n",
731			  entry->src_id, entry->src_data[0]);
732		break;
733	}
734
735	return 0;
736}
737
738static int vce_v3_0_set_clockgating_state(void *handle,
739					  enum amd_clockgating_state state)
740{
741	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
742	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
743	int i;
744
745	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
746		return 0;
747
748	mutex_lock(&adev->grbm_idx_mutex);
749	for (i = 0; i < 2; i++) {
750		/* Program VCE Instance 0 or 1 if not harvested */
751		if (adev->vce.harvest_config & (1 << i))
752			continue;
753
754		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(i));
755
756		if (!enable) {
757			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
758			uint32_t data = RREG32(mmVCE_CLOCK_GATING_A);
759			data &= ~(0xf | 0xff0);
760			data |= ((0x0 << 0) | (0x04 << 4));
761			WREG32(mmVCE_CLOCK_GATING_A, data);
762
763			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
764			data = RREG32(mmVCE_UENC_CLOCK_GATING);
765			data &= ~(0xf | 0xff0);
766			data |= ((0x0 << 0) | (0x04 << 4));
767			WREG32(mmVCE_UENC_CLOCK_GATING, data);
768		}
769
770		vce_v3_0_set_vce_sw_clock_gating(adev, enable);
771	}
772
773	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
774	mutex_unlock(&adev->grbm_idx_mutex);
775
776	return 0;
777}
778
779static int vce_v3_0_set_powergating_state(void *handle,
780					  enum amd_powergating_state state)
781{
782	/* This doesn't actually powergate the VCE block.
783	 * That's done in the dpm code via the SMC.  This
784	 * just re-inits the block as necessary.  The actual
785	 * gating still happens in the dpm code.  We should
786	 * revisit this when there is a cleaner line between
787	 * the smc and the hw blocks
788	 */
789	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
790	int ret = 0;
791
792	if (state == AMD_PG_STATE_GATE) {
793		ret = vce_v3_0_stop(adev);
794		if (ret)
795			goto out;
796	} else {
797		ret = vce_v3_0_start(adev);
798		if (ret)
799			goto out;
800	}
801
802out:
803	return ret;
804}
805
806static void vce_v3_0_get_clockgating_state(void *handle, u32 *flags)
807{
808	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
809	int data;
810
811	mutex_lock(&adev->pm.mutex);
812
813	if (adev->flags & AMD_IS_APU)
814		data = RREG32_SMC(ixCURRENT_PG_STATUS_APU);
815	else
816		data = RREG32_SMC(ixCURRENT_PG_STATUS);
817
818	if (data & CURRENT_PG_STATUS__VCE_PG_STATUS_MASK) {
819		DRM_INFO("Cannot get clockgating state when VCE is powergated.\n");
820		goto out;
821	}
822
823	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
824
825	/* AMD_CG_SUPPORT_VCE_MGCG */
826	data = RREG32(mmVCE_CLOCK_GATING_A);
827	if (data & (0x04 << 4))
828		*flags |= AMD_CG_SUPPORT_VCE_MGCG;
829
830out:
831	mutex_unlock(&adev->pm.mutex);
832}
833
834static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
835				  struct amdgpu_job *job,
836				  struct amdgpu_ib *ib,
837				  uint32_t flags)
838{
839	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
840
841	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
842	amdgpu_ring_write(ring, vmid);
843	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
844	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
845	amdgpu_ring_write(ring, ib->length_dw);
846}
847
848static void vce_v3_0_emit_vm_flush(struct amdgpu_ring *ring,
849				   unsigned int vmid, uint64_t pd_addr)
850{
851	amdgpu_ring_write(ring, VCE_CMD_UPDATE_PTB);
852	amdgpu_ring_write(ring, vmid);
853	amdgpu_ring_write(ring, pd_addr >> 12);
854
855	amdgpu_ring_write(ring, VCE_CMD_FLUSH_TLB);
856	amdgpu_ring_write(ring, vmid);
857	amdgpu_ring_write(ring, VCE_CMD_END);
858}
859
860static void vce_v3_0_emit_pipeline_sync(struct amdgpu_ring *ring)
861{
862	uint32_t seq = ring->fence_drv.sync_seq;
863	uint64_t addr = ring->fence_drv.gpu_addr;
864
865	amdgpu_ring_write(ring, VCE_CMD_WAIT_GE);
866	amdgpu_ring_write(ring, lower_32_bits(addr));
867	amdgpu_ring_write(ring, upper_32_bits(addr));
868	amdgpu_ring_write(ring, seq);
869}
870
871static const struct amd_ip_funcs vce_v3_0_ip_funcs = {
872	.name = "vce_v3_0",
873	.early_init = vce_v3_0_early_init,
874	.late_init = NULL,
875	.sw_init = vce_v3_0_sw_init,
876	.sw_fini = vce_v3_0_sw_fini,
877	.hw_init = vce_v3_0_hw_init,
878	.hw_fini = vce_v3_0_hw_fini,
879	.suspend = vce_v3_0_suspend,
880	.resume = vce_v3_0_resume,
881	.is_idle = vce_v3_0_is_idle,
882	.wait_for_idle = vce_v3_0_wait_for_idle,
883	.check_soft_reset = vce_v3_0_check_soft_reset,
884	.pre_soft_reset = vce_v3_0_pre_soft_reset,
885	.soft_reset = vce_v3_0_soft_reset,
886	.post_soft_reset = vce_v3_0_post_soft_reset,
887	.set_clockgating_state = vce_v3_0_set_clockgating_state,
888	.set_powergating_state = vce_v3_0_set_powergating_state,
889	.get_clockgating_state = vce_v3_0_get_clockgating_state,
890};
891
892static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = {
893	.type = AMDGPU_RING_TYPE_VCE,
894	.align_mask = 0xf,
895	.nop = VCE_CMD_NO_OP,
896	.support_64bit_ptrs = false,
897	.no_user_fence = true,
898	.get_rptr = vce_v3_0_ring_get_rptr,
899	.get_wptr = vce_v3_0_ring_get_wptr,
900	.set_wptr = vce_v3_0_ring_set_wptr,
901	.parse_cs = amdgpu_vce_ring_parse_cs,
902	.emit_frame_size =
903		4 + /* vce_v3_0_emit_pipeline_sync */
904		6, /* amdgpu_vce_ring_emit_fence x1 no user fence */
905	.emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */
906	.emit_ib = amdgpu_vce_ring_emit_ib,
907	.emit_fence = amdgpu_vce_ring_emit_fence,
908	.test_ring = amdgpu_vce_ring_test_ring,
909	.test_ib = amdgpu_vce_ring_test_ib,
910	.insert_nop = amdgpu_ring_insert_nop,
911	.pad_ib = amdgpu_ring_generic_pad_ib,
912	.begin_use = amdgpu_vce_ring_begin_use,
913	.end_use = amdgpu_vce_ring_end_use,
914};
915
916static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = {
917	.type = AMDGPU_RING_TYPE_VCE,
918	.align_mask = 0xf,
919	.nop = VCE_CMD_NO_OP,
920	.support_64bit_ptrs = false,
921	.no_user_fence = true,
922	.get_rptr = vce_v3_0_ring_get_rptr,
923	.get_wptr = vce_v3_0_ring_get_wptr,
924	.set_wptr = vce_v3_0_ring_set_wptr,
925	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
926	.emit_frame_size =
927		6 + /* vce_v3_0_emit_vm_flush */
928		4 + /* vce_v3_0_emit_pipeline_sync */
929		6 + 6, /* amdgpu_vce_ring_emit_fence x2 vm fence */
930	.emit_ib_size = 5, /* vce_v3_0_ring_emit_ib */
931	.emit_ib = vce_v3_0_ring_emit_ib,
932	.emit_vm_flush = vce_v3_0_emit_vm_flush,
933	.emit_pipeline_sync = vce_v3_0_emit_pipeline_sync,
934	.emit_fence = amdgpu_vce_ring_emit_fence,
935	.test_ring = amdgpu_vce_ring_test_ring,
936	.test_ib = amdgpu_vce_ring_test_ib,
937	.insert_nop = amdgpu_ring_insert_nop,
938	.pad_ib = amdgpu_ring_generic_pad_ib,
939	.begin_use = amdgpu_vce_ring_begin_use,
940	.end_use = amdgpu_vce_ring_end_use,
941};
942
943static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)
944{
945	int i;
946
947	if (adev->asic_type >= CHIP_STONEY) {
948		for (i = 0; i < adev->vce.num_rings; i++) {
949			adev->vce.ring[i].funcs = &vce_v3_0_ring_vm_funcs;
950			adev->vce.ring[i].me = i;
951		}
952		DRM_INFO("VCE enabled in VM mode\n");
953	} else {
954		for (i = 0; i < adev->vce.num_rings; i++) {
955			adev->vce.ring[i].funcs = &vce_v3_0_ring_phys_funcs;
956			adev->vce.ring[i].me = i;
957		}
958		DRM_INFO("VCE enabled in physical mode\n");
959	}
960}
961
962static const struct amdgpu_irq_src_funcs vce_v3_0_irq_funcs = {
963	.set = vce_v3_0_set_interrupt_state,
964	.process = vce_v3_0_process_interrupt,
965};
966
967static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev)
968{
969	adev->vce.irq.num_types = 1;
970	adev->vce.irq.funcs = &vce_v3_0_irq_funcs;
971};
972
973const struct amdgpu_ip_block_version vce_v3_0_ip_block =
974{
975	.type = AMD_IP_BLOCK_TYPE_VCE,
976	.major = 3,
977	.minor = 0,
978	.rev = 0,
979	.funcs = &vce_v3_0_ip_funcs,
980};
981
982const struct amdgpu_ip_block_version vce_v3_1_ip_block =
983{
984	.type = AMD_IP_BLOCK_TYPE_VCE,
985	.major = 3,
986	.minor = 1,
987	.rev = 0,
988	.funcs = &vce_v3_0_ip_funcs,
989};
990
991const struct amdgpu_ip_block_version vce_v3_4_ip_block =
992{
993	.type = AMD_IP_BLOCK_TYPE_VCE,
994	.major = 3,
995	.minor = 4,
996	.rev = 0,
997	.funcs = &vce_v3_0_ip_funcs,
998};