Loading...
1/*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * The above copyright notice and this permission notice (including the
22 * next paragraph) shall be included in all copies or substantial portions
23 * of the Software.
24 *
25 */
26
27#include <linux/firmware.h>
28#include <drm/drm_drv.h>
29
30#include "amdgpu.h"
31#include "amdgpu_vce.h"
32#include "soc15.h"
33#include "soc15d.h"
34#include "soc15_common.h"
35#include "mmsch_v1_0.h"
36
37#include "vce/vce_4_0_offset.h"
38#include "vce/vce_4_0_default.h"
39#include "vce/vce_4_0_sh_mask.h"
40#include "mmhub/mmhub_1_0_offset.h"
41#include "mmhub/mmhub_1_0_sh_mask.h"
42
43#include "ivsrcid/vce/irqsrcs_vce_4_0.h"
44
45#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02
46
47#define VCE_V4_0_FW_SIZE (384 * 1024)
48#define VCE_V4_0_STACK_SIZE (64 * 1024)
49#define VCE_V4_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
50
51static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
52static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
53static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
54
55/**
56 * vce_v4_0_ring_get_rptr - get read pointer
57 *
58 * @ring: amdgpu_ring pointer
59 *
60 * Returns the current hardware read pointer
61 */
62static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
63{
64 struct amdgpu_device *adev = ring->adev;
65
66 if (ring->me == 0)
67 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
68 else if (ring->me == 1)
69 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
70 else
71 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
72}
73
74/**
75 * vce_v4_0_ring_get_wptr - get write pointer
76 *
77 * @ring: amdgpu_ring pointer
78 *
79 * Returns the current hardware write pointer
80 */
81static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
82{
83 struct amdgpu_device *adev = ring->adev;
84
85 if (ring->use_doorbell)
86 return *ring->wptr_cpu_addr;
87
88 if (ring->me == 0)
89 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
90 else if (ring->me == 1)
91 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
92 else
93 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
94}
95
96/**
97 * vce_v4_0_ring_set_wptr - set write pointer
98 *
99 * @ring: amdgpu_ring pointer
100 *
101 * Commits the write pointer to the hardware
102 */
103static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
104{
105 struct amdgpu_device *adev = ring->adev;
106
107 if (ring->use_doorbell) {
108 /* XXX check if swapping is necessary on BE */
109 *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
110 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
111 return;
112 }
113
114 if (ring->me == 0)
115 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
116 lower_32_bits(ring->wptr));
117 else if (ring->me == 1)
118 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
119 lower_32_bits(ring->wptr));
120 else
121 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
122 lower_32_bits(ring->wptr));
123}
124
125static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
126{
127 int i, j;
128
129 for (i = 0; i < 10; ++i) {
130 for (j = 0; j < 100; ++j) {
131 uint32_t status =
132 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
133
134 if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
135 return 0;
136 mdelay(10);
137 }
138
139 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
140 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
141 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
142 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
143 mdelay(10);
144 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
145 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
146 mdelay(10);
147
148 }
149
150 return -ETIMEDOUT;
151}
152
153static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
154 struct amdgpu_mm_table *table)
155{
156 uint32_t data = 0, loop;
157 uint64_t addr = table->gpu_addr;
158 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
159 uint32_t size;
160
161 size = header->header_size + header->vce_table_size + header->uvd_table_size;
162
163 /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
164 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
165 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
166
167 /* 2, update vmid of descriptor */
168 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
169 data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
170 data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
171 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
172
173 /* 3, notify mmsch about the size of this descriptor */
174 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
175
176 /* 4, set resp to zero */
177 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
178
179 WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
180 *adev->vce.ring[0].wptr_cpu_addr = 0;
181 adev->vce.ring[0].wptr = 0;
182 adev->vce.ring[0].wptr_old = 0;
183
184 /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
185 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
186
187 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
188 loop = 1000;
189 while ((data & 0x10000002) != 0x10000002) {
190 udelay(10);
191 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
192 loop--;
193 if (!loop)
194 break;
195 }
196
197 if (!loop) {
198 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
199 return -EBUSY;
200 }
201
202 return 0;
203}
204
205static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
206{
207 struct amdgpu_ring *ring;
208 uint32_t offset, size;
209 uint32_t table_size = 0;
210 struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
211 struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
212 struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
213 struct mmsch_v1_0_cmd_end end = { { 0 } };
214 uint32_t *init_table = adev->virt.mm_table.cpu_addr;
215 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
216
217 direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
218 direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
219 direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
220 end.cmd_header.command_type = MMSCH_COMMAND__END;
221
222 if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
223 header->version = MMSCH_VERSION;
224 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
225
226 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
227 header->vce_table_offset = header->header_size;
228 else
229 header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
230
231 init_table += header->vce_table_offset;
232
233 ring = &adev->vce.ring[0];
234 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
235 lower_32_bits(ring->gpu_addr));
236 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
237 upper_32_bits(ring->gpu_addr));
238 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
239 ring->ring_size / 4);
240
241 /* BEGING OF MC_RESUME */
242 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
243 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
244 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
245 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
246 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
247
248 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
249 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
250 uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
251 uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi;
252 uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low;
253
254 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
255 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8);
256 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
257 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
258 (tmr_mc_addr >> 40) & 0xff);
259 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
260 } else {
261 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
262 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
263 adev->vce.gpu_addr >> 8);
264 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
265 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
266 (adev->vce.gpu_addr >> 40) & 0xff);
267 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
268 offset & ~0x0f000000);
269
270 }
271 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
272 mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
273 adev->vce.gpu_addr >> 8);
274 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
275 mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
276 (adev->vce.gpu_addr >> 40) & 0xff);
277 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
278 mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
279 adev->vce.gpu_addr >> 8);
280 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
281 mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
282 (adev->vce.gpu_addr >> 40) & 0xff);
283
284 size = VCE_V4_0_FW_SIZE;
285 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
286
287 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
288 size = VCE_V4_0_STACK_SIZE;
289 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
290 (offset & ~0x0f000000) | (1 << 24));
291 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
292
293 offset += size;
294 size = VCE_V4_0_DATA_SIZE;
295 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
296 (offset & ~0x0f000000) | (2 << 24));
297 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
298
299 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
300 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
301 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
302 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
303
304 /* end of MC_RESUME */
305 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
306 VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
307 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
308 ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
309 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
310 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
311
312 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
313 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
314 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
315
316 /* clear BUSY flag */
317 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
318 ~VCE_STATUS__JOB_BUSY_MASK, 0);
319
320 /* add end packet */
321 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
322 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
323 header->vce_table_size = table_size;
324 }
325
326 return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
327}
328
329/**
330 * vce_v4_0_start - start VCE block
331 *
332 * @adev: amdgpu_device pointer
333 *
334 * Setup and start the VCE block
335 */
336static int vce_v4_0_start(struct amdgpu_device *adev)
337{
338 struct amdgpu_ring *ring;
339 int r;
340
341 ring = &adev->vce.ring[0];
342
343 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
344 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
345 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
346 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
347 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
348
349 ring = &adev->vce.ring[1];
350
351 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
352 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
353 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
354 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
355 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
356
357 ring = &adev->vce.ring[2];
358
359 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
360 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
361 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
362 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
363 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
364
365 vce_v4_0_mc_resume(adev);
366 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
367 ~VCE_STATUS__JOB_BUSY_MASK);
368
369 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
370
371 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
372 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
373 mdelay(100);
374
375 r = vce_v4_0_firmware_loaded(adev);
376
377 /* clear BUSY flag */
378 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
379
380 if (r) {
381 DRM_ERROR("VCE not responding, giving up!!!\n");
382 return r;
383 }
384
385 return 0;
386}
387
388static int vce_v4_0_stop(struct amdgpu_device *adev)
389{
390
391 /* Disable VCPU */
392 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
393
394 /* hold on ECPU */
395 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
396 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
397 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
398
399 /* clear VCE_STATUS */
400 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0);
401
402 /* Set Clock-Gating off */
403 /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
404 vce_v4_0_set_vce_sw_clock_gating(adev, false);
405 */
406
407 return 0;
408}
409
410static int vce_v4_0_early_init(void *handle)
411{
412 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
413
414 if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
415 adev->vce.num_rings = 1;
416 else
417 adev->vce.num_rings = 3;
418
419 vce_v4_0_set_ring_funcs(adev);
420 vce_v4_0_set_irq_funcs(adev);
421
422 return 0;
423}
424
425static int vce_v4_0_sw_init(void *handle)
426{
427 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
428 struct amdgpu_ring *ring;
429
430 unsigned size;
431 int r, i;
432
433 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
434 if (r)
435 return r;
436
437 size = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
438 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
439 size += VCE_V4_0_FW_SIZE;
440
441 r = amdgpu_vce_sw_init(adev, size);
442 if (r)
443 return r;
444
445 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
446 const struct common_firmware_header *hdr;
447 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
448
449 adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
450 if (!adev->vce.saved_bo)
451 return -ENOMEM;
452
453 hdr = (const struct common_firmware_header *)adev->vce.fw->data;
454 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
455 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
456 adev->firmware.fw_size +=
457 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
458 DRM_INFO("PSP loading VCE firmware\n");
459 } else {
460 r = amdgpu_vce_resume(adev);
461 if (r)
462 return r;
463 }
464
465 for (i = 0; i < adev->vce.num_rings; i++) {
466 enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i);
467
468 ring = &adev->vce.ring[i];
469 ring->vm_hub = AMDGPU_MMHUB0(0);
470 sprintf(ring->name, "vce%d", i);
471 if (amdgpu_sriov_vf(adev)) {
472 /* DOORBELL only works under SRIOV */
473 ring->use_doorbell = true;
474
475 /* currently only use the first encoding ring for sriov,
476 * so set unused location for other unused rings.
477 */
478 if (i == 0)
479 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2;
480 else
481 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1;
482 }
483 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
484 hw_prio, NULL);
485 if (r)
486 return r;
487 }
488
489 r = amdgpu_virt_alloc_mm_table(adev);
490 if (r)
491 return r;
492
493 return r;
494}
495
496static int vce_v4_0_sw_fini(void *handle)
497{
498 int r;
499 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
500
501 /* free MM table */
502 amdgpu_virt_free_mm_table(adev);
503
504 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
505 kvfree(adev->vce.saved_bo);
506 adev->vce.saved_bo = NULL;
507 }
508
509 r = amdgpu_vce_suspend(adev);
510 if (r)
511 return r;
512
513 return amdgpu_vce_sw_fini(adev);
514}
515
516static int vce_v4_0_hw_init(void *handle)
517{
518 int r, i;
519 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
520
521 if (amdgpu_sriov_vf(adev))
522 r = vce_v4_0_sriov_start(adev);
523 else
524 r = vce_v4_0_start(adev);
525 if (r)
526 return r;
527
528 for (i = 0; i < adev->vce.num_rings; i++) {
529 r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
530 if (r)
531 return r;
532 }
533
534 DRM_INFO("VCE initialized successfully.\n");
535
536 return 0;
537}
538
539static int vce_v4_0_hw_fini(void *handle)
540{
541 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
542
543 cancel_delayed_work_sync(&adev->vce.idle_work);
544
545 if (!amdgpu_sriov_vf(adev)) {
546 /* vce_v4_0_wait_for_idle(handle); */
547 vce_v4_0_stop(adev);
548 } else {
549 /* full access mode, so don't touch any VCE register */
550 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
551 }
552
553 return 0;
554}
555
556static int vce_v4_0_suspend(void *handle)
557{
558 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
559 int r, idx;
560
561 if (adev->vce.vcpu_bo == NULL)
562 return 0;
563
564 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
565 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
566 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
567 void *ptr = adev->vce.cpu_addr;
568
569 memcpy_fromio(adev->vce.saved_bo, ptr, size);
570 }
571 drm_dev_exit(idx);
572 }
573
574 /*
575 * Proper cleanups before halting the HW engine:
576 * - cancel the delayed idle work
577 * - enable powergating
578 * - enable clockgating
579 * - disable dpm
580 *
581 * TODO: to align with the VCN implementation, move the
582 * jobs for clockgating/powergating/dpm setting to
583 * ->set_powergating_state().
584 */
585 cancel_delayed_work_sync(&adev->vce.idle_work);
586
587 if (adev->pm.dpm_enabled) {
588 amdgpu_dpm_enable_vce(adev, false);
589 } else {
590 amdgpu_asic_set_vce_clocks(adev, 0, 0);
591 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
592 AMD_PG_STATE_GATE);
593 amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
594 AMD_CG_STATE_GATE);
595 }
596
597 r = vce_v4_0_hw_fini(adev);
598 if (r)
599 return r;
600
601 return amdgpu_vce_suspend(adev);
602}
603
604static int vce_v4_0_resume(void *handle)
605{
606 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
607 int r, idx;
608
609 if (adev->vce.vcpu_bo == NULL)
610 return -EINVAL;
611
612 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
613
614 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
615 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
616 void *ptr = adev->vce.cpu_addr;
617
618 memcpy_toio(ptr, adev->vce.saved_bo, size);
619 drm_dev_exit(idx);
620 }
621 } else {
622 r = amdgpu_vce_resume(adev);
623 if (r)
624 return r;
625 }
626
627 return vce_v4_0_hw_init(adev);
628}
629
630static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
631{
632 uint32_t offset, size;
633 uint64_t tmr_mc_addr;
634
635 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
636 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
637 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
638 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
639
640 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
641 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
642 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
643 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
644 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
645
646 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
647
648 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
649 tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 |
650 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
651 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
652 (tmr_mc_addr >> 8));
653 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
654 (tmr_mc_addr >> 40) & 0xff);
655 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
656 } else {
657 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
658 (adev->vce.gpu_addr >> 8));
659 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
660 (adev->vce.gpu_addr >> 40) & 0xff);
661 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
662 }
663
664 size = VCE_V4_0_FW_SIZE;
665 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
666
667 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
668 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
669 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
670 size = VCE_V4_0_STACK_SIZE;
671 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
672 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
673
674 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
675 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
676 offset += size;
677 size = VCE_V4_0_DATA_SIZE;
678 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
679 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
680
681 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
682 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
683 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
684 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
685}
686
687static int vce_v4_0_set_clockgating_state(void *handle,
688 enum amd_clockgating_state state)
689{
690 /* needed for driver unload*/
691 return 0;
692}
693
694#if 0
695static bool vce_v4_0_is_idle(void *handle)
696{
697 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
698 u32 mask = 0;
699
700 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
701 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
702
703 return !(RREG32(mmSRBM_STATUS2) & mask);
704}
705
706static int vce_v4_0_wait_for_idle(void *handle)
707{
708 unsigned i;
709 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
710
711 for (i = 0; i < adev->usec_timeout; i++)
712 if (vce_v4_0_is_idle(handle))
713 return 0;
714
715 return -ETIMEDOUT;
716}
717
718#define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */
719#define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */
720#define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */
721#define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
722 VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
723
724static bool vce_v4_0_check_soft_reset(void *handle)
725{
726 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
727 u32 srbm_soft_reset = 0;
728
729 /* According to VCE team , we should use VCE_STATUS instead
730 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
731 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
732 * instance's registers are accessed
733 * (0 for 1st instance, 10 for 2nd instance).
734 *
735 *VCE_STATUS
736 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB |
737 *|----+----+-----------+----+----+----+----------+---------+----|
738 *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0|
739 *
740 * VCE team suggest use bit 3--bit 6 for busy status check
741 */
742 mutex_lock(&adev->grbm_idx_mutex);
743 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
744 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
745 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
746 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
747 }
748 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
749 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
750 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
751 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
752 }
753 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
754 mutex_unlock(&adev->grbm_idx_mutex);
755
756 if (srbm_soft_reset) {
757 adev->vce.srbm_soft_reset = srbm_soft_reset;
758 return true;
759 } else {
760 adev->vce.srbm_soft_reset = 0;
761 return false;
762 }
763}
764
765static int vce_v4_0_soft_reset(void *handle)
766{
767 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
768 u32 srbm_soft_reset;
769
770 if (!adev->vce.srbm_soft_reset)
771 return 0;
772 srbm_soft_reset = adev->vce.srbm_soft_reset;
773
774 if (srbm_soft_reset) {
775 u32 tmp;
776
777 tmp = RREG32(mmSRBM_SOFT_RESET);
778 tmp |= srbm_soft_reset;
779 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
780 WREG32(mmSRBM_SOFT_RESET, tmp);
781 tmp = RREG32(mmSRBM_SOFT_RESET);
782
783 udelay(50);
784
785 tmp &= ~srbm_soft_reset;
786 WREG32(mmSRBM_SOFT_RESET, tmp);
787 tmp = RREG32(mmSRBM_SOFT_RESET);
788
789 /* Wait a little for things to settle down */
790 udelay(50);
791 }
792
793 return 0;
794}
795
796static int vce_v4_0_pre_soft_reset(void *handle)
797{
798 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
799
800 if (!adev->vce.srbm_soft_reset)
801 return 0;
802
803 mdelay(5);
804
805 return vce_v4_0_suspend(adev);
806}
807
808
809static int vce_v4_0_post_soft_reset(void *handle)
810{
811 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
812
813 if (!adev->vce.srbm_soft_reset)
814 return 0;
815
816 mdelay(5);
817
818 return vce_v4_0_resume(adev);
819}
820
821static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
822{
823 u32 tmp, data;
824
825 tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
826 if (override)
827 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
828 else
829 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
830
831 if (tmp != data)
832 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
833}
834
835static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
836 bool gated)
837{
838 u32 data;
839
840 /* Set Override to disable Clock Gating */
841 vce_v4_0_override_vce_clock_gating(adev, true);
842
843 /* This function enables MGCG which is controlled by firmware.
844 With the clocks in the gated state the core is still
845 accessible but the firmware will throttle the clocks on the
846 fly as necessary.
847 */
848 if (gated) {
849 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
850 data |= 0x1ff;
851 data &= ~0xef0000;
852 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
853
854 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
855 data |= 0x3ff000;
856 data &= ~0xffc00000;
857 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
858
859 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
860 data |= 0x2;
861 data &= ~0x00010000;
862 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
863
864 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
865 data |= 0x37f;
866 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
867
868 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
869 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
870 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
871 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
872 0x8;
873 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
874 } else {
875 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
876 data &= ~0x80010;
877 data |= 0xe70008;
878 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
879
880 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
881 data |= 0xffc00000;
882 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
883
884 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
885 data |= 0x10000;
886 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
887
888 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
889 data &= ~0xffc00000;
890 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
891
892 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
893 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
894 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
895 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
896 0x8);
897 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
898 }
899 vce_v4_0_override_vce_clock_gating(adev, false);
900}
901
902static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
903{
904 u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
905
906 if (enable)
907 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
908 else
909 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
910
911 WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
912}
913
914static int vce_v4_0_set_clockgating_state(void *handle,
915 enum amd_clockgating_state state)
916{
917 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
918 bool enable = (state == AMD_CG_STATE_GATE);
919 int i;
920
921 if ((adev->asic_type == CHIP_POLARIS10) ||
922 (adev->asic_type == CHIP_TONGA) ||
923 (adev->asic_type == CHIP_FIJI))
924 vce_v4_0_set_bypass_mode(adev, enable);
925
926 if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
927 return 0;
928
929 mutex_lock(&adev->grbm_idx_mutex);
930 for (i = 0; i < 2; i++) {
931 /* Program VCE Instance 0 or 1 if not harvested */
932 if (adev->vce.harvest_config & (1 << i))
933 continue;
934
935 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
936
937 if (enable) {
938 /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
939 uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
940 data &= ~(0xf | 0xff0);
941 data |= ((0x0 << 0) | (0x04 << 4));
942 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
943
944 /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
945 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
946 data &= ~(0xf | 0xff0);
947 data |= ((0x0 << 0) | (0x04 << 4));
948 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
949 }
950
951 vce_v4_0_set_vce_sw_clock_gating(adev, enable);
952 }
953
954 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
955 mutex_unlock(&adev->grbm_idx_mutex);
956
957 return 0;
958}
959#endif
960
961static int vce_v4_0_set_powergating_state(void *handle,
962 enum amd_powergating_state state)
963{
964 /* This doesn't actually powergate the VCE block.
965 * That's done in the dpm code via the SMC. This
966 * just re-inits the block as necessary. The actual
967 * gating still happens in the dpm code. We should
968 * revisit this when there is a cleaner line between
969 * the smc and the hw blocks
970 */
971 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
972
973 if (state == AMD_PG_STATE_GATE)
974 return vce_v4_0_stop(adev);
975 else
976 return vce_v4_0_start(adev);
977}
978
979static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
980 struct amdgpu_ib *ib, uint32_t flags)
981{
982 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
983
984 amdgpu_ring_write(ring, VCE_CMD_IB_VM);
985 amdgpu_ring_write(ring, vmid);
986 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
987 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
988 amdgpu_ring_write(ring, ib->length_dw);
989}
990
991static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
992 u64 seq, unsigned flags)
993{
994 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
995
996 amdgpu_ring_write(ring, VCE_CMD_FENCE);
997 amdgpu_ring_write(ring, addr);
998 amdgpu_ring_write(ring, upper_32_bits(addr));
999 amdgpu_ring_write(ring, seq);
1000 amdgpu_ring_write(ring, VCE_CMD_TRAP);
1001}
1002
1003static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
1004{
1005 amdgpu_ring_write(ring, VCE_CMD_END);
1006}
1007
1008static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1009 uint32_t val, uint32_t mask)
1010{
1011 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
1012 amdgpu_ring_write(ring, reg << 2);
1013 amdgpu_ring_write(ring, mask);
1014 amdgpu_ring_write(ring, val);
1015}
1016
1017static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
1018 unsigned int vmid, uint64_t pd_addr)
1019{
1020 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
1021
1022 pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1023
1024 /* wait for reg writes */
1025 vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 +
1026 vmid * hub->ctx_addr_distance,
1027 lower_32_bits(pd_addr), 0xffffffff);
1028}
1029
1030static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
1031 uint32_t reg, uint32_t val)
1032{
1033 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1034 amdgpu_ring_write(ring, reg << 2);
1035 amdgpu_ring_write(ring, val);
1036}
1037
1038static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1039 struct amdgpu_irq_src *source,
1040 unsigned type,
1041 enum amdgpu_interrupt_state state)
1042{
1043 uint32_t val = 0;
1044
1045 if (!amdgpu_sriov_vf(adev)) {
1046 if (state == AMDGPU_IRQ_STATE_ENABLE)
1047 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1048
1049 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1050 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1051 }
1052 return 0;
1053}
1054
1055static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1056 struct amdgpu_irq_src *source,
1057 struct amdgpu_iv_entry *entry)
1058{
1059 DRM_DEBUG("IH: VCE\n");
1060
1061 switch (entry->src_data[0]) {
1062 case 0:
1063 case 1:
1064 case 2:
1065 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1066 break;
1067 default:
1068 DRM_ERROR("Unhandled interrupt: %d %d\n",
1069 entry->src_id, entry->src_data[0]);
1070 break;
1071 }
1072
1073 return 0;
1074}
1075
1076const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1077 .name = "vce_v4_0",
1078 .early_init = vce_v4_0_early_init,
1079 .late_init = NULL,
1080 .sw_init = vce_v4_0_sw_init,
1081 .sw_fini = vce_v4_0_sw_fini,
1082 .hw_init = vce_v4_0_hw_init,
1083 .hw_fini = vce_v4_0_hw_fini,
1084 .suspend = vce_v4_0_suspend,
1085 .resume = vce_v4_0_resume,
1086 .is_idle = NULL /* vce_v4_0_is_idle */,
1087 .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1088 .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1089 .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1090 .soft_reset = NULL /* vce_v4_0_soft_reset */,
1091 .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1092 .set_clockgating_state = vce_v4_0_set_clockgating_state,
1093 .set_powergating_state = vce_v4_0_set_powergating_state,
1094};
1095
1096static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1097 .type = AMDGPU_RING_TYPE_VCE,
1098 .align_mask = 0x3f,
1099 .nop = VCE_CMD_NO_OP,
1100 .support_64bit_ptrs = false,
1101 .no_user_fence = true,
1102 .get_rptr = vce_v4_0_ring_get_rptr,
1103 .get_wptr = vce_v4_0_ring_get_wptr,
1104 .set_wptr = vce_v4_0_ring_set_wptr,
1105 .parse_cs = amdgpu_vce_ring_parse_cs_vm,
1106 .emit_frame_size =
1107 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1108 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1109 4 + /* vce_v4_0_emit_vm_flush */
1110 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1111 1, /* vce_v4_0_ring_insert_end */
1112 .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1113 .emit_ib = vce_v4_0_ring_emit_ib,
1114 .emit_vm_flush = vce_v4_0_emit_vm_flush,
1115 .emit_fence = vce_v4_0_ring_emit_fence,
1116 .test_ring = amdgpu_vce_ring_test_ring,
1117 .test_ib = amdgpu_vce_ring_test_ib,
1118 .insert_nop = amdgpu_ring_insert_nop,
1119 .insert_end = vce_v4_0_ring_insert_end,
1120 .pad_ib = amdgpu_ring_generic_pad_ib,
1121 .begin_use = amdgpu_vce_ring_begin_use,
1122 .end_use = amdgpu_vce_ring_end_use,
1123 .emit_wreg = vce_v4_0_emit_wreg,
1124 .emit_reg_wait = vce_v4_0_emit_reg_wait,
1125 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1126};
1127
1128static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1129{
1130 int i;
1131
1132 for (i = 0; i < adev->vce.num_rings; i++) {
1133 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1134 adev->vce.ring[i].me = i;
1135 }
1136 DRM_INFO("VCE enabled in VM mode\n");
1137}
1138
1139static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1140 .set = vce_v4_0_set_interrupt_state,
1141 .process = vce_v4_0_process_interrupt,
1142};
1143
1144static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1145{
1146 adev->vce.irq.num_types = 1;
1147 adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1148};
1149
1150const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1151{
1152 .type = AMD_IP_BLOCK_TYPE_VCE,
1153 .major = 4,
1154 .minor = 0,
1155 .rev = 0,
1156 .funcs = &vce_v4_0_ip_funcs,
1157};
1/*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * The above copyright notice and this permission notice (including the
22 * next paragraph) shall be included in all copies or substantial portions
23 * of the Software.
24 *
25 */
26
27#include <linux/firmware.h>
28
29#include "amdgpu.h"
30#include "amdgpu_vce.h"
31#include "soc15.h"
32#include "soc15d.h"
33#include "soc15_common.h"
34#include "mmsch_v1_0.h"
35
36#include "vce/vce_4_0_offset.h"
37#include "vce/vce_4_0_default.h"
38#include "vce/vce_4_0_sh_mask.h"
39#include "mmhub/mmhub_1_0_offset.h"
40#include "mmhub/mmhub_1_0_sh_mask.h"
41
42#include "ivsrcid/vce/irqsrcs_vce_4_0.h"
43
44#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02
45
46#define VCE_V4_0_FW_SIZE (384 * 1024)
47#define VCE_V4_0_STACK_SIZE (64 * 1024)
48#define VCE_V4_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
49
50static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
51static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
52static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
53
54/**
55 * vce_v4_0_ring_get_rptr - get read pointer
56 *
57 * @ring: amdgpu_ring pointer
58 *
59 * Returns the current hardware read pointer
60 */
61static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
62{
63 struct amdgpu_device *adev = ring->adev;
64
65 if (ring->me == 0)
66 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
67 else if (ring->me == 1)
68 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
69 else
70 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
71}
72
73/**
74 * vce_v4_0_ring_get_wptr - get write pointer
75 *
76 * @ring: amdgpu_ring pointer
77 *
78 * Returns the current hardware write pointer
79 */
80static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
81{
82 struct amdgpu_device *adev = ring->adev;
83
84 if (ring->use_doorbell)
85 return adev->wb.wb[ring->wptr_offs];
86
87 if (ring->me == 0)
88 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
89 else if (ring->me == 1)
90 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
91 else
92 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
93}
94
95/**
96 * vce_v4_0_ring_set_wptr - set write pointer
97 *
98 * @ring: amdgpu_ring pointer
99 *
100 * Commits the write pointer to the hardware
101 */
102static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
103{
104 struct amdgpu_device *adev = ring->adev;
105
106 if (ring->use_doorbell) {
107 /* XXX check if swapping is necessary on BE */
108 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
109 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
110 return;
111 }
112
113 if (ring->me == 0)
114 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
115 lower_32_bits(ring->wptr));
116 else if (ring->me == 1)
117 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
118 lower_32_bits(ring->wptr));
119 else
120 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
121 lower_32_bits(ring->wptr));
122}
123
124static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
125{
126 int i, j;
127
128 for (i = 0; i < 10; ++i) {
129 for (j = 0; j < 100; ++j) {
130 uint32_t status =
131 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
132
133 if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
134 return 0;
135 mdelay(10);
136 }
137
138 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
139 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
140 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
141 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
142 mdelay(10);
143 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
144 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
145 mdelay(10);
146
147 }
148
149 return -ETIMEDOUT;
150}
151
152static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
153 struct amdgpu_mm_table *table)
154{
155 uint32_t data = 0, loop;
156 uint64_t addr = table->gpu_addr;
157 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
158 uint32_t size;
159
160 size = header->header_size + header->vce_table_size + header->uvd_table_size;
161
162 /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
163 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
164 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
165
166 /* 2, update vmid of descriptor */
167 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
168 data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
169 data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
170 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
171
172 /* 3, notify mmsch about the size of this descriptor */
173 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
174
175 /* 4, set resp to zero */
176 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
177
178 WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
179 adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
180 adev->vce.ring[0].wptr = 0;
181 adev->vce.ring[0].wptr_old = 0;
182
183 /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
184 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
185
186 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
187 loop = 1000;
188 while ((data & 0x10000002) != 0x10000002) {
189 udelay(10);
190 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
191 loop--;
192 if (!loop)
193 break;
194 }
195
196 if (!loop) {
197 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
198 return -EBUSY;
199 }
200
201 return 0;
202}
203
204static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
205{
206 struct amdgpu_ring *ring;
207 uint32_t offset, size;
208 uint32_t table_size = 0;
209 struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
210 struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
211 struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
212 struct mmsch_v1_0_cmd_end end = { { 0 } };
213 uint32_t *init_table = adev->virt.mm_table.cpu_addr;
214 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
215
216 direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
217 direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
218 direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
219 end.cmd_header.command_type = MMSCH_COMMAND__END;
220
221 if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
222 header->version = MMSCH_VERSION;
223 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
224
225 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
226 header->vce_table_offset = header->header_size;
227 else
228 header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
229
230 init_table += header->vce_table_offset;
231
232 ring = &adev->vce.ring[0];
233 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
234 lower_32_bits(ring->gpu_addr));
235 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
236 upper_32_bits(ring->gpu_addr));
237 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
238 ring->ring_size / 4);
239
240 /* BEGING OF MC_RESUME */
241 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
242 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
243 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
244 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
245 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
246
247 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
248 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
249 uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
250 uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi;
251 uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low;
252
253 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
254 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8);
255 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
256 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
257 (tmr_mc_addr >> 40) & 0xff);
258 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
259 } else {
260 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
261 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
262 adev->vce.gpu_addr >> 8);
263 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
264 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
265 (adev->vce.gpu_addr >> 40) & 0xff);
266 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
267 offset & ~0x0f000000);
268
269 }
270 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
271 mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
272 adev->vce.gpu_addr >> 8);
273 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
274 mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
275 (adev->vce.gpu_addr >> 40) & 0xff);
276 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
277 mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
278 adev->vce.gpu_addr >> 8);
279 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
280 mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
281 (adev->vce.gpu_addr >> 40) & 0xff);
282
283 size = VCE_V4_0_FW_SIZE;
284 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
285
286 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
287 size = VCE_V4_0_STACK_SIZE;
288 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
289 (offset & ~0x0f000000) | (1 << 24));
290 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
291
292 offset += size;
293 size = VCE_V4_0_DATA_SIZE;
294 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
295 (offset & ~0x0f000000) | (2 << 24));
296 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
297
298 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
299 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
300 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
301 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
302
303 /* end of MC_RESUME */
304 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
305 VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
306 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
307 ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
308 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
309 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
310
311 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
312 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
313 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
314
315 /* clear BUSY flag */
316 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
317 ~VCE_STATUS__JOB_BUSY_MASK, 0);
318
319 /* add end packet */
320 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
321 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
322 header->vce_table_size = table_size;
323 }
324
325 return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
326}
327
328/**
329 * vce_v4_0_start - start VCE block
330 *
331 * @adev: amdgpu_device pointer
332 *
333 * Setup and start the VCE block
334 */
335static int vce_v4_0_start(struct amdgpu_device *adev)
336{
337 struct amdgpu_ring *ring;
338 int r;
339
340 ring = &adev->vce.ring[0];
341
342 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
343 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
344 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
345 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
346 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
347
348 ring = &adev->vce.ring[1];
349
350 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
351 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
352 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
353 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
354 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
355
356 ring = &adev->vce.ring[2];
357
358 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
359 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
360 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
361 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
362 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
363
364 vce_v4_0_mc_resume(adev);
365 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
366 ~VCE_STATUS__JOB_BUSY_MASK);
367
368 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
369
370 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
371 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
372 mdelay(100);
373
374 r = vce_v4_0_firmware_loaded(adev);
375
376 /* clear BUSY flag */
377 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
378
379 if (r) {
380 DRM_ERROR("VCE not responding, giving up!!!\n");
381 return r;
382 }
383
384 return 0;
385}
386
387static int vce_v4_0_stop(struct amdgpu_device *adev)
388{
389
390 /* Disable VCPU */
391 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
392
393 /* hold on ECPU */
394 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
395 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
396 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
397
398 /* clear VCE_STATUS */
399 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0);
400
401 /* Set Clock-Gating off */
402 /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
403 vce_v4_0_set_vce_sw_clock_gating(adev, false);
404 */
405
406 return 0;
407}
408
409static int vce_v4_0_early_init(void *handle)
410{
411 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
412
413 if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
414 adev->vce.num_rings = 1;
415 else
416 adev->vce.num_rings = 3;
417
418 vce_v4_0_set_ring_funcs(adev);
419 vce_v4_0_set_irq_funcs(adev);
420
421 return 0;
422}
423
424static int vce_v4_0_sw_init(void *handle)
425{
426 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
427 struct amdgpu_ring *ring;
428
429 unsigned size;
430 int r, i;
431
432 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
433 if (r)
434 return r;
435
436 size = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
437 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
438 size += VCE_V4_0_FW_SIZE;
439
440 r = amdgpu_vce_sw_init(adev, size);
441 if (r)
442 return r;
443
444 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
445 const struct common_firmware_header *hdr;
446 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
447
448 adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
449 if (!adev->vce.saved_bo)
450 return -ENOMEM;
451
452 hdr = (const struct common_firmware_header *)adev->vce.fw->data;
453 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
454 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
455 adev->firmware.fw_size +=
456 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
457 DRM_INFO("PSP loading VCE firmware\n");
458 } else {
459 r = amdgpu_vce_resume(adev);
460 if (r)
461 return r;
462 }
463
464 for (i = 0; i < adev->vce.num_rings; i++) {
465 ring = &adev->vce.ring[i];
466 sprintf(ring->name, "vce%d", i);
467 if (amdgpu_sriov_vf(adev)) {
468 /* DOORBELL only works under SRIOV */
469 ring->use_doorbell = true;
470
471 /* currently only use the first encoding ring for sriov,
472 * so set unused location for other unused rings.
473 */
474 if (i == 0)
475 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2;
476 else
477 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1;
478 }
479 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
480 if (r)
481 return r;
482 }
483
484
485 r = amdgpu_vce_entity_init(adev);
486 if (r)
487 return r;
488
489 r = amdgpu_virt_alloc_mm_table(adev);
490 if (r)
491 return r;
492
493 return r;
494}
495
496static int vce_v4_0_sw_fini(void *handle)
497{
498 int r;
499 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
500
501 /* free MM table */
502 amdgpu_virt_free_mm_table(adev);
503
504 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
505 kvfree(adev->vce.saved_bo);
506 adev->vce.saved_bo = NULL;
507 }
508
509 r = amdgpu_vce_suspend(adev);
510 if (r)
511 return r;
512
513 return amdgpu_vce_sw_fini(adev);
514}
515
516static int vce_v4_0_hw_init(void *handle)
517{
518 int r, i;
519 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
520
521 if (amdgpu_sriov_vf(adev))
522 r = vce_v4_0_sriov_start(adev);
523 else
524 r = vce_v4_0_start(adev);
525 if (r)
526 return r;
527
528 for (i = 0; i < adev->vce.num_rings; i++) {
529 r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
530 if (r)
531 return r;
532 }
533
534 DRM_INFO("VCE initialized successfully.\n");
535
536 return 0;
537}
538
539static int vce_v4_0_hw_fini(void *handle)
540{
541 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
542 int i;
543
544 if (!amdgpu_sriov_vf(adev)) {
545 /* vce_v4_0_wait_for_idle(handle); */
546 vce_v4_0_stop(adev);
547 } else {
548 /* full access mode, so don't touch any VCE register */
549 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
550 }
551
552 for (i = 0; i < adev->vce.num_rings; i++)
553 adev->vce.ring[i].sched.ready = false;
554
555 return 0;
556}
557
558static int vce_v4_0_suspend(void *handle)
559{
560 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
561 int r;
562
563 if (adev->vce.vcpu_bo == NULL)
564 return 0;
565
566 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
567 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
568 void *ptr = adev->vce.cpu_addr;
569
570 memcpy_fromio(adev->vce.saved_bo, ptr, size);
571 }
572
573 r = vce_v4_0_hw_fini(adev);
574 if (r)
575 return r;
576
577 return amdgpu_vce_suspend(adev);
578}
579
580static int vce_v4_0_resume(void *handle)
581{
582 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
583 int r;
584
585 if (adev->vce.vcpu_bo == NULL)
586 return -EINVAL;
587
588 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
589 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
590 void *ptr = adev->vce.cpu_addr;
591
592 memcpy_toio(ptr, adev->vce.saved_bo, size);
593 } else {
594 r = amdgpu_vce_resume(adev);
595 if (r)
596 return r;
597 }
598
599 return vce_v4_0_hw_init(adev);
600}
601
602static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
603{
604 uint32_t offset, size;
605 uint64_t tmr_mc_addr;
606
607 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
608 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
609 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
610 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
611
612 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
613 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
614 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
615 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
616 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
617
618 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
619
620 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
621 tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 |
622 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
623 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
624 (tmr_mc_addr >> 8));
625 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
626 (tmr_mc_addr >> 40) & 0xff);
627 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
628 } else {
629 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
630 (adev->vce.gpu_addr >> 8));
631 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
632 (adev->vce.gpu_addr >> 40) & 0xff);
633 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
634 }
635
636 size = VCE_V4_0_FW_SIZE;
637 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
638
639 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
640 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
641 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
642 size = VCE_V4_0_STACK_SIZE;
643 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
644 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
645
646 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
647 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
648 offset += size;
649 size = VCE_V4_0_DATA_SIZE;
650 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
651 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
652
653 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
654 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
655 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
656 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
657}
658
659static int vce_v4_0_set_clockgating_state(void *handle,
660 enum amd_clockgating_state state)
661{
662 /* needed for driver unload*/
663 return 0;
664}
665
666#if 0
667static bool vce_v4_0_is_idle(void *handle)
668{
669 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
670 u32 mask = 0;
671
672 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
673 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
674
675 return !(RREG32(mmSRBM_STATUS2) & mask);
676}
677
678static int vce_v4_0_wait_for_idle(void *handle)
679{
680 unsigned i;
681 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
682
683 for (i = 0; i < adev->usec_timeout; i++)
684 if (vce_v4_0_is_idle(handle))
685 return 0;
686
687 return -ETIMEDOUT;
688}
689
690#define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */
691#define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */
692#define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */
693#define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
694 VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
695
696static bool vce_v4_0_check_soft_reset(void *handle)
697{
698 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
699 u32 srbm_soft_reset = 0;
700
701 /* According to VCE team , we should use VCE_STATUS instead
702 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
703 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
704 * instance's registers are accessed
705 * (0 for 1st instance, 10 for 2nd instance).
706 *
707 *VCE_STATUS
708 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB |
709 *|----+----+-----------+----+----+----+----------+---------+----|
710 *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0|
711 *
712 * VCE team suggest use bit 3--bit 6 for busy status check
713 */
714 mutex_lock(&adev->grbm_idx_mutex);
715 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
716 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
717 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
718 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
719 }
720 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
721 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
722 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
723 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
724 }
725 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
726 mutex_unlock(&adev->grbm_idx_mutex);
727
728 if (srbm_soft_reset) {
729 adev->vce.srbm_soft_reset = srbm_soft_reset;
730 return true;
731 } else {
732 adev->vce.srbm_soft_reset = 0;
733 return false;
734 }
735}
736
737static int vce_v4_0_soft_reset(void *handle)
738{
739 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
740 u32 srbm_soft_reset;
741
742 if (!adev->vce.srbm_soft_reset)
743 return 0;
744 srbm_soft_reset = adev->vce.srbm_soft_reset;
745
746 if (srbm_soft_reset) {
747 u32 tmp;
748
749 tmp = RREG32(mmSRBM_SOFT_RESET);
750 tmp |= srbm_soft_reset;
751 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
752 WREG32(mmSRBM_SOFT_RESET, tmp);
753 tmp = RREG32(mmSRBM_SOFT_RESET);
754
755 udelay(50);
756
757 tmp &= ~srbm_soft_reset;
758 WREG32(mmSRBM_SOFT_RESET, tmp);
759 tmp = RREG32(mmSRBM_SOFT_RESET);
760
761 /* Wait a little for things to settle down */
762 udelay(50);
763 }
764
765 return 0;
766}
767
768static int vce_v4_0_pre_soft_reset(void *handle)
769{
770 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
771
772 if (!adev->vce.srbm_soft_reset)
773 return 0;
774
775 mdelay(5);
776
777 return vce_v4_0_suspend(adev);
778}
779
780
781static int vce_v4_0_post_soft_reset(void *handle)
782{
783 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
784
785 if (!adev->vce.srbm_soft_reset)
786 return 0;
787
788 mdelay(5);
789
790 return vce_v4_0_resume(adev);
791}
792
793static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
794{
795 u32 tmp, data;
796
797 tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
798 if (override)
799 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
800 else
801 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
802
803 if (tmp != data)
804 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
805}
806
807static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
808 bool gated)
809{
810 u32 data;
811
812 /* Set Override to disable Clock Gating */
813 vce_v4_0_override_vce_clock_gating(adev, true);
814
815 /* This function enables MGCG which is controlled by firmware.
816 With the clocks in the gated state the core is still
817 accessible but the firmware will throttle the clocks on the
818 fly as necessary.
819 */
820 if (gated) {
821 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
822 data |= 0x1ff;
823 data &= ~0xef0000;
824 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
825
826 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
827 data |= 0x3ff000;
828 data &= ~0xffc00000;
829 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
830
831 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
832 data |= 0x2;
833 data &= ~0x00010000;
834 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
835
836 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
837 data |= 0x37f;
838 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
839
840 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
841 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
842 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
843 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
844 0x8;
845 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
846 } else {
847 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
848 data &= ~0x80010;
849 data |= 0xe70008;
850 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
851
852 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
853 data |= 0xffc00000;
854 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
855
856 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
857 data |= 0x10000;
858 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
859
860 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
861 data &= ~0xffc00000;
862 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
863
864 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
865 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
866 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
867 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
868 0x8);
869 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
870 }
871 vce_v4_0_override_vce_clock_gating(adev, false);
872}
873
874static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
875{
876 u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
877
878 if (enable)
879 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
880 else
881 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
882
883 WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
884}
885
886static int vce_v4_0_set_clockgating_state(void *handle,
887 enum amd_clockgating_state state)
888{
889 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
890 bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
891 int i;
892
893 if ((adev->asic_type == CHIP_POLARIS10) ||
894 (adev->asic_type == CHIP_TONGA) ||
895 (adev->asic_type == CHIP_FIJI))
896 vce_v4_0_set_bypass_mode(adev, enable);
897
898 if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
899 return 0;
900
901 mutex_lock(&adev->grbm_idx_mutex);
902 for (i = 0; i < 2; i++) {
903 /* Program VCE Instance 0 or 1 if not harvested */
904 if (adev->vce.harvest_config & (1 << i))
905 continue;
906
907 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
908
909 if (enable) {
910 /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
911 uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
912 data &= ~(0xf | 0xff0);
913 data |= ((0x0 << 0) | (0x04 << 4));
914 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
915
916 /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
917 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
918 data &= ~(0xf | 0xff0);
919 data |= ((0x0 << 0) | (0x04 << 4));
920 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
921 }
922
923 vce_v4_0_set_vce_sw_clock_gating(adev, enable);
924 }
925
926 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
927 mutex_unlock(&adev->grbm_idx_mutex);
928
929 return 0;
930}
931#endif
932
933static int vce_v4_0_set_powergating_state(void *handle,
934 enum amd_powergating_state state)
935{
936 /* This doesn't actually powergate the VCE block.
937 * That's done in the dpm code via the SMC. This
938 * just re-inits the block as necessary. The actual
939 * gating still happens in the dpm code. We should
940 * revisit this when there is a cleaner line between
941 * the smc and the hw blocks
942 */
943 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
944
945 if (state == AMD_PG_STATE_GATE)
946 return vce_v4_0_stop(adev);
947 else
948 return vce_v4_0_start(adev);
949}
950
951static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
952 struct amdgpu_ib *ib, uint32_t flags)
953{
954 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
955
956 amdgpu_ring_write(ring, VCE_CMD_IB_VM);
957 amdgpu_ring_write(ring, vmid);
958 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
959 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
960 amdgpu_ring_write(ring, ib->length_dw);
961}
962
963static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
964 u64 seq, unsigned flags)
965{
966 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
967
968 amdgpu_ring_write(ring, VCE_CMD_FENCE);
969 amdgpu_ring_write(ring, addr);
970 amdgpu_ring_write(ring, upper_32_bits(addr));
971 amdgpu_ring_write(ring, seq);
972 amdgpu_ring_write(ring, VCE_CMD_TRAP);
973}
974
975static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
976{
977 amdgpu_ring_write(ring, VCE_CMD_END);
978}
979
980static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
981 uint32_t val, uint32_t mask)
982{
983 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
984 amdgpu_ring_write(ring, reg << 2);
985 amdgpu_ring_write(ring, mask);
986 amdgpu_ring_write(ring, val);
987}
988
989static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
990 unsigned int vmid, uint64_t pd_addr)
991{
992 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
993
994 pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
995
996 /* wait for reg writes */
997 vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
998 lower_32_bits(pd_addr), 0xffffffff);
999}
1000
1001static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
1002 uint32_t reg, uint32_t val)
1003{
1004 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1005 amdgpu_ring_write(ring, reg << 2);
1006 amdgpu_ring_write(ring, val);
1007}
1008
1009static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1010 struct amdgpu_irq_src *source,
1011 unsigned type,
1012 enum amdgpu_interrupt_state state)
1013{
1014 uint32_t val = 0;
1015
1016 if (!amdgpu_sriov_vf(adev)) {
1017 if (state == AMDGPU_IRQ_STATE_ENABLE)
1018 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1019
1020 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1021 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1022 }
1023 return 0;
1024}
1025
1026static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1027 struct amdgpu_irq_src *source,
1028 struct amdgpu_iv_entry *entry)
1029{
1030 DRM_DEBUG("IH: VCE\n");
1031
1032 switch (entry->src_data[0]) {
1033 case 0:
1034 case 1:
1035 case 2:
1036 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1037 break;
1038 default:
1039 DRM_ERROR("Unhandled interrupt: %d %d\n",
1040 entry->src_id, entry->src_data[0]);
1041 break;
1042 }
1043
1044 return 0;
1045}
1046
1047const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1048 .name = "vce_v4_0",
1049 .early_init = vce_v4_0_early_init,
1050 .late_init = NULL,
1051 .sw_init = vce_v4_0_sw_init,
1052 .sw_fini = vce_v4_0_sw_fini,
1053 .hw_init = vce_v4_0_hw_init,
1054 .hw_fini = vce_v4_0_hw_fini,
1055 .suspend = vce_v4_0_suspend,
1056 .resume = vce_v4_0_resume,
1057 .is_idle = NULL /* vce_v4_0_is_idle */,
1058 .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1059 .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1060 .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1061 .soft_reset = NULL /* vce_v4_0_soft_reset */,
1062 .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1063 .set_clockgating_state = vce_v4_0_set_clockgating_state,
1064 .set_powergating_state = vce_v4_0_set_powergating_state,
1065};
1066
1067static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1068 .type = AMDGPU_RING_TYPE_VCE,
1069 .align_mask = 0x3f,
1070 .nop = VCE_CMD_NO_OP,
1071 .support_64bit_ptrs = false,
1072 .no_user_fence = true,
1073 .vmhub = AMDGPU_MMHUB_0,
1074 .get_rptr = vce_v4_0_ring_get_rptr,
1075 .get_wptr = vce_v4_0_ring_get_wptr,
1076 .set_wptr = vce_v4_0_ring_set_wptr,
1077 .parse_cs = amdgpu_vce_ring_parse_cs_vm,
1078 .emit_frame_size =
1079 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1080 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1081 4 + /* vce_v4_0_emit_vm_flush */
1082 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1083 1, /* vce_v4_0_ring_insert_end */
1084 .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1085 .emit_ib = vce_v4_0_ring_emit_ib,
1086 .emit_vm_flush = vce_v4_0_emit_vm_flush,
1087 .emit_fence = vce_v4_0_ring_emit_fence,
1088 .test_ring = amdgpu_vce_ring_test_ring,
1089 .test_ib = amdgpu_vce_ring_test_ib,
1090 .insert_nop = amdgpu_ring_insert_nop,
1091 .insert_end = vce_v4_0_ring_insert_end,
1092 .pad_ib = amdgpu_ring_generic_pad_ib,
1093 .begin_use = amdgpu_vce_ring_begin_use,
1094 .end_use = amdgpu_vce_ring_end_use,
1095 .emit_wreg = vce_v4_0_emit_wreg,
1096 .emit_reg_wait = vce_v4_0_emit_reg_wait,
1097 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1098};
1099
1100static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1101{
1102 int i;
1103
1104 for (i = 0; i < adev->vce.num_rings; i++) {
1105 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1106 adev->vce.ring[i].me = i;
1107 }
1108 DRM_INFO("VCE enabled in VM mode\n");
1109}
1110
1111static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1112 .set = vce_v4_0_set_interrupt_state,
1113 .process = vce_v4_0_process_interrupt,
1114};
1115
1116static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1117{
1118 adev->vce.irq.num_types = 1;
1119 adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1120};
1121
1122const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1123{
1124 .type = AMD_IP_BLOCK_TYPE_VCE,
1125 .major = 4,
1126 .minor = 0,
1127 .rev = 0,
1128 .funcs = &vce_v4_0_ip_funcs,
1129};