Loading...
1/*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * The above copyright notice and this permission notice (including the
22 * next paragraph) shall be included in all copies or substantial portions
23 * of the Software.
24 *
25 */
26
27#include <linux/firmware.h>
28#include <drm/drm_drv.h>
29
30#include "amdgpu.h"
31#include "amdgpu_vce.h"
32#include "soc15.h"
33#include "soc15d.h"
34#include "soc15_common.h"
35#include "mmsch_v1_0.h"
36
37#include "vce/vce_4_0_offset.h"
38#include "vce/vce_4_0_default.h"
39#include "vce/vce_4_0_sh_mask.h"
40#include "mmhub/mmhub_1_0_offset.h"
41#include "mmhub/mmhub_1_0_sh_mask.h"
42
43#include "ivsrcid/vce/irqsrcs_vce_4_0.h"
44
45#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02
46
47#define VCE_V4_0_FW_SIZE (384 * 1024)
48#define VCE_V4_0_STACK_SIZE (64 * 1024)
49#define VCE_V4_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
50
51static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
52static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
53static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
54
55/**
56 * vce_v4_0_ring_get_rptr - get read pointer
57 *
58 * @ring: amdgpu_ring pointer
59 *
60 * Returns the current hardware read pointer
61 */
62static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
63{
64 struct amdgpu_device *adev = ring->adev;
65
66 if (ring->me == 0)
67 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
68 else if (ring->me == 1)
69 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
70 else
71 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
72}
73
74/**
75 * vce_v4_0_ring_get_wptr - get write pointer
76 *
77 * @ring: amdgpu_ring pointer
78 *
79 * Returns the current hardware write pointer
80 */
81static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
82{
83 struct amdgpu_device *adev = ring->adev;
84
85 if (ring->use_doorbell)
86 return *ring->wptr_cpu_addr;
87
88 if (ring->me == 0)
89 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
90 else if (ring->me == 1)
91 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
92 else
93 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
94}
95
96/**
97 * vce_v4_0_ring_set_wptr - set write pointer
98 *
99 * @ring: amdgpu_ring pointer
100 *
101 * Commits the write pointer to the hardware
102 */
103static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
104{
105 struct amdgpu_device *adev = ring->adev;
106
107 if (ring->use_doorbell) {
108 /* XXX check if swapping is necessary on BE */
109 *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
110 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
111 return;
112 }
113
114 if (ring->me == 0)
115 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
116 lower_32_bits(ring->wptr));
117 else if (ring->me == 1)
118 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
119 lower_32_bits(ring->wptr));
120 else
121 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
122 lower_32_bits(ring->wptr));
123}
124
125static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
126{
127 int i, j;
128
129 for (i = 0; i < 10; ++i) {
130 for (j = 0; j < 100; ++j) {
131 uint32_t status =
132 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
133
134 if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
135 return 0;
136 mdelay(10);
137 }
138
139 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
140 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
141 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
142 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
143 mdelay(10);
144 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
145 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
146 mdelay(10);
147
148 }
149
150 return -ETIMEDOUT;
151}
152
153static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
154 struct amdgpu_mm_table *table)
155{
156 uint32_t data = 0, loop;
157 uint64_t addr = table->gpu_addr;
158 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
159 uint32_t size;
160
161 size = header->header_size + header->vce_table_size + header->uvd_table_size;
162
163 /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
164 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
165 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
166
167 /* 2, update vmid of descriptor */
168 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
169 data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
170 data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
171 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
172
173 /* 3, notify mmsch about the size of this descriptor */
174 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
175
176 /* 4, set resp to zero */
177 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
178
179 WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
180 *adev->vce.ring[0].wptr_cpu_addr = 0;
181 adev->vce.ring[0].wptr = 0;
182 adev->vce.ring[0].wptr_old = 0;
183
184 /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
185 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
186
187 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
188 loop = 1000;
189 while ((data & 0x10000002) != 0x10000002) {
190 udelay(10);
191 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
192 loop--;
193 if (!loop)
194 break;
195 }
196
197 if (!loop) {
198 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
199 return -EBUSY;
200 }
201
202 return 0;
203}
204
205static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
206{
207 struct amdgpu_ring *ring;
208 uint32_t offset, size;
209 uint32_t table_size = 0;
210 struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
211 struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
212 struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
213 struct mmsch_v1_0_cmd_end end = { { 0 } };
214 uint32_t *init_table = adev->virt.mm_table.cpu_addr;
215 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
216
217 direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
218 direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
219 direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
220 end.cmd_header.command_type = MMSCH_COMMAND__END;
221
222 if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
223 header->version = MMSCH_VERSION;
224 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
225
226 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
227 header->vce_table_offset = header->header_size;
228 else
229 header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
230
231 init_table += header->vce_table_offset;
232
233 ring = &adev->vce.ring[0];
234 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
235 lower_32_bits(ring->gpu_addr));
236 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
237 upper_32_bits(ring->gpu_addr));
238 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
239 ring->ring_size / 4);
240
241 /* BEGING OF MC_RESUME */
242 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
243 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
244 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
245 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
246 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
247
248 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
249 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
250 uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
251 uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi;
252 uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low;
253
254 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
255 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8);
256 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
257 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
258 (tmr_mc_addr >> 40) & 0xff);
259 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
260 } else {
261 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
262 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
263 adev->vce.gpu_addr >> 8);
264 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
265 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
266 (adev->vce.gpu_addr >> 40) & 0xff);
267 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
268 offset & ~0x0f000000);
269
270 }
271 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
272 mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
273 adev->vce.gpu_addr >> 8);
274 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
275 mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
276 (adev->vce.gpu_addr >> 40) & 0xff);
277 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
278 mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
279 adev->vce.gpu_addr >> 8);
280 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
281 mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
282 (adev->vce.gpu_addr >> 40) & 0xff);
283
284 size = VCE_V4_0_FW_SIZE;
285 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
286
287 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
288 size = VCE_V4_0_STACK_SIZE;
289 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
290 (offset & ~0x0f000000) | (1 << 24));
291 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
292
293 offset += size;
294 size = VCE_V4_0_DATA_SIZE;
295 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
296 (offset & ~0x0f000000) | (2 << 24));
297 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
298
299 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
300 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
301 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
302 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
303
304 /* end of MC_RESUME */
305 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
306 VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
307 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
308 ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
309 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
310 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
311
312 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
313 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
314 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
315
316 /* clear BUSY flag */
317 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
318 ~VCE_STATUS__JOB_BUSY_MASK, 0);
319
320 /* add end packet */
321 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
322 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
323 header->vce_table_size = table_size;
324 }
325
326 return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
327}
328
329/**
330 * vce_v4_0_start - start VCE block
331 *
332 * @adev: amdgpu_device pointer
333 *
334 * Setup and start the VCE block
335 */
336static int vce_v4_0_start(struct amdgpu_device *adev)
337{
338 struct amdgpu_ring *ring;
339 int r;
340
341 ring = &adev->vce.ring[0];
342
343 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
344 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
345 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
346 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
347 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
348
349 ring = &adev->vce.ring[1];
350
351 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
352 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
353 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
354 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
355 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
356
357 ring = &adev->vce.ring[2];
358
359 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
360 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
361 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
362 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
363 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
364
365 vce_v4_0_mc_resume(adev);
366 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
367 ~VCE_STATUS__JOB_BUSY_MASK);
368
369 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
370
371 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
372 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
373 mdelay(100);
374
375 r = vce_v4_0_firmware_loaded(adev);
376
377 /* clear BUSY flag */
378 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
379
380 if (r) {
381 DRM_ERROR("VCE not responding, giving up!!!\n");
382 return r;
383 }
384
385 return 0;
386}
387
388static int vce_v4_0_stop(struct amdgpu_device *adev)
389{
390
391 /* Disable VCPU */
392 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
393
394 /* hold on ECPU */
395 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
396 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
397 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
398
399 /* clear VCE_STATUS */
400 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0);
401
402 /* Set Clock-Gating off */
403 /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
404 vce_v4_0_set_vce_sw_clock_gating(adev, false);
405 */
406
407 return 0;
408}
409
410static int vce_v4_0_early_init(void *handle)
411{
412 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
413
414 if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
415 adev->vce.num_rings = 1;
416 else
417 adev->vce.num_rings = 3;
418
419 vce_v4_0_set_ring_funcs(adev);
420 vce_v4_0_set_irq_funcs(adev);
421
422 return 0;
423}
424
425static int vce_v4_0_sw_init(void *handle)
426{
427 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
428 struct amdgpu_ring *ring;
429
430 unsigned size;
431 int r, i;
432
433 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
434 if (r)
435 return r;
436
437 size = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
438 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
439 size += VCE_V4_0_FW_SIZE;
440
441 r = amdgpu_vce_sw_init(adev, size);
442 if (r)
443 return r;
444
445 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
446 const struct common_firmware_header *hdr;
447 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
448
449 adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
450 if (!adev->vce.saved_bo)
451 return -ENOMEM;
452
453 hdr = (const struct common_firmware_header *)adev->vce.fw->data;
454 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
455 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
456 adev->firmware.fw_size +=
457 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
458 DRM_INFO("PSP loading VCE firmware\n");
459 } else {
460 r = amdgpu_vce_resume(adev);
461 if (r)
462 return r;
463 }
464
465 for (i = 0; i < adev->vce.num_rings; i++) {
466 enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i);
467
468 ring = &adev->vce.ring[i];
469 sprintf(ring->name, "vce%d", i);
470 if (amdgpu_sriov_vf(adev)) {
471 /* DOORBELL only works under SRIOV */
472 ring->use_doorbell = true;
473
474 /* currently only use the first encoding ring for sriov,
475 * so set unused location for other unused rings.
476 */
477 if (i == 0)
478 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2;
479 else
480 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1;
481 }
482 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
483 hw_prio, NULL);
484 if (r)
485 return r;
486 }
487
488
489 r = amdgpu_vce_entity_init(adev);
490 if (r)
491 return r;
492
493 r = amdgpu_virt_alloc_mm_table(adev);
494 if (r)
495 return r;
496
497 return r;
498}
499
500static int vce_v4_0_sw_fini(void *handle)
501{
502 int r;
503 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
504
505 /* free MM table */
506 amdgpu_virt_free_mm_table(adev);
507
508 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
509 kvfree(adev->vce.saved_bo);
510 adev->vce.saved_bo = NULL;
511 }
512
513 r = amdgpu_vce_suspend(adev);
514 if (r)
515 return r;
516
517 return amdgpu_vce_sw_fini(adev);
518}
519
520static int vce_v4_0_hw_init(void *handle)
521{
522 int r, i;
523 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
524
525 if (amdgpu_sriov_vf(adev))
526 r = vce_v4_0_sriov_start(adev);
527 else
528 r = vce_v4_0_start(adev);
529 if (r)
530 return r;
531
532 for (i = 0; i < adev->vce.num_rings; i++) {
533 r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
534 if (r)
535 return r;
536 }
537
538 DRM_INFO("VCE initialized successfully.\n");
539
540 return 0;
541}
542
543static int vce_v4_0_hw_fini(void *handle)
544{
545 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
546
547 cancel_delayed_work_sync(&adev->vce.idle_work);
548
549 if (!amdgpu_sriov_vf(adev)) {
550 /* vce_v4_0_wait_for_idle(handle); */
551 vce_v4_0_stop(adev);
552 } else {
553 /* full access mode, so don't touch any VCE register */
554 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
555 }
556
557 return 0;
558}
559
560static int vce_v4_0_suspend(void *handle)
561{
562 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
563 int r, idx;
564
565 if (adev->vce.vcpu_bo == NULL)
566 return 0;
567
568 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
569 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
570 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
571 void *ptr = adev->vce.cpu_addr;
572
573 memcpy_fromio(adev->vce.saved_bo, ptr, size);
574 }
575 drm_dev_exit(idx);
576 }
577
578 /*
579 * Proper cleanups before halting the HW engine:
580 * - cancel the delayed idle work
581 * - enable powergating
582 * - enable clockgating
583 * - disable dpm
584 *
585 * TODO: to align with the VCN implementation, move the
586 * jobs for clockgating/powergating/dpm setting to
587 * ->set_powergating_state().
588 */
589 cancel_delayed_work_sync(&adev->vce.idle_work);
590
591 if (adev->pm.dpm_enabled) {
592 amdgpu_dpm_enable_vce(adev, false);
593 } else {
594 amdgpu_asic_set_vce_clocks(adev, 0, 0);
595 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
596 AMD_PG_STATE_GATE);
597 amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
598 AMD_CG_STATE_GATE);
599 }
600
601 r = vce_v4_0_hw_fini(adev);
602 if (r)
603 return r;
604
605 return amdgpu_vce_suspend(adev);
606}
607
608static int vce_v4_0_resume(void *handle)
609{
610 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
611 int r, idx;
612
613 if (adev->vce.vcpu_bo == NULL)
614 return -EINVAL;
615
616 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
617
618 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
619 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
620 void *ptr = adev->vce.cpu_addr;
621
622 memcpy_toio(ptr, adev->vce.saved_bo, size);
623 drm_dev_exit(idx);
624 }
625 } else {
626 r = amdgpu_vce_resume(adev);
627 if (r)
628 return r;
629 }
630
631 return vce_v4_0_hw_init(adev);
632}
633
634static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
635{
636 uint32_t offset, size;
637 uint64_t tmr_mc_addr;
638
639 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
640 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
641 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
642 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
643
644 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
645 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
646 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
647 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
648 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
649
650 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
651
652 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
653 tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 |
654 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
655 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
656 (tmr_mc_addr >> 8));
657 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
658 (tmr_mc_addr >> 40) & 0xff);
659 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
660 } else {
661 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
662 (adev->vce.gpu_addr >> 8));
663 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
664 (adev->vce.gpu_addr >> 40) & 0xff);
665 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
666 }
667
668 size = VCE_V4_0_FW_SIZE;
669 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
670
671 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
672 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
673 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
674 size = VCE_V4_0_STACK_SIZE;
675 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
676 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
677
678 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
679 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
680 offset += size;
681 size = VCE_V4_0_DATA_SIZE;
682 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
683 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
684
685 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
686 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
687 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
688 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
689}
690
691static int vce_v4_0_set_clockgating_state(void *handle,
692 enum amd_clockgating_state state)
693{
694 /* needed for driver unload*/
695 return 0;
696}
697
698#if 0
699static bool vce_v4_0_is_idle(void *handle)
700{
701 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
702 u32 mask = 0;
703
704 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
705 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
706
707 return !(RREG32(mmSRBM_STATUS2) & mask);
708}
709
710static int vce_v4_0_wait_for_idle(void *handle)
711{
712 unsigned i;
713 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
714
715 for (i = 0; i < adev->usec_timeout; i++)
716 if (vce_v4_0_is_idle(handle))
717 return 0;
718
719 return -ETIMEDOUT;
720}
721
722#define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */
723#define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */
724#define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */
725#define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
726 VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
727
728static bool vce_v4_0_check_soft_reset(void *handle)
729{
730 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
731 u32 srbm_soft_reset = 0;
732
733 /* According to VCE team , we should use VCE_STATUS instead
734 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
735 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
736 * instance's registers are accessed
737 * (0 for 1st instance, 10 for 2nd instance).
738 *
739 *VCE_STATUS
740 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB |
741 *|----+----+-----------+----+----+----+----------+---------+----|
742 *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0|
743 *
744 * VCE team suggest use bit 3--bit 6 for busy status check
745 */
746 mutex_lock(&adev->grbm_idx_mutex);
747 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
748 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
749 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
750 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
751 }
752 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
753 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
754 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
755 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
756 }
757 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
758 mutex_unlock(&adev->grbm_idx_mutex);
759
760 if (srbm_soft_reset) {
761 adev->vce.srbm_soft_reset = srbm_soft_reset;
762 return true;
763 } else {
764 adev->vce.srbm_soft_reset = 0;
765 return false;
766 }
767}
768
769static int vce_v4_0_soft_reset(void *handle)
770{
771 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
772 u32 srbm_soft_reset;
773
774 if (!adev->vce.srbm_soft_reset)
775 return 0;
776 srbm_soft_reset = adev->vce.srbm_soft_reset;
777
778 if (srbm_soft_reset) {
779 u32 tmp;
780
781 tmp = RREG32(mmSRBM_SOFT_RESET);
782 tmp |= srbm_soft_reset;
783 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
784 WREG32(mmSRBM_SOFT_RESET, tmp);
785 tmp = RREG32(mmSRBM_SOFT_RESET);
786
787 udelay(50);
788
789 tmp &= ~srbm_soft_reset;
790 WREG32(mmSRBM_SOFT_RESET, tmp);
791 tmp = RREG32(mmSRBM_SOFT_RESET);
792
793 /* Wait a little for things to settle down */
794 udelay(50);
795 }
796
797 return 0;
798}
799
800static int vce_v4_0_pre_soft_reset(void *handle)
801{
802 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
803
804 if (!adev->vce.srbm_soft_reset)
805 return 0;
806
807 mdelay(5);
808
809 return vce_v4_0_suspend(adev);
810}
811
812
813static int vce_v4_0_post_soft_reset(void *handle)
814{
815 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
816
817 if (!adev->vce.srbm_soft_reset)
818 return 0;
819
820 mdelay(5);
821
822 return vce_v4_0_resume(adev);
823}
824
825static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
826{
827 u32 tmp, data;
828
829 tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
830 if (override)
831 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
832 else
833 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
834
835 if (tmp != data)
836 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
837}
838
839static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
840 bool gated)
841{
842 u32 data;
843
844 /* Set Override to disable Clock Gating */
845 vce_v4_0_override_vce_clock_gating(adev, true);
846
847 /* This function enables MGCG which is controlled by firmware.
848 With the clocks in the gated state the core is still
849 accessible but the firmware will throttle the clocks on the
850 fly as necessary.
851 */
852 if (gated) {
853 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
854 data |= 0x1ff;
855 data &= ~0xef0000;
856 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
857
858 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
859 data |= 0x3ff000;
860 data &= ~0xffc00000;
861 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
862
863 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
864 data |= 0x2;
865 data &= ~0x00010000;
866 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
867
868 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
869 data |= 0x37f;
870 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
871
872 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
873 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
874 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
875 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
876 0x8;
877 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
878 } else {
879 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
880 data &= ~0x80010;
881 data |= 0xe70008;
882 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
883
884 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
885 data |= 0xffc00000;
886 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
887
888 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
889 data |= 0x10000;
890 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
891
892 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
893 data &= ~0xffc00000;
894 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
895
896 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
897 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
898 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
899 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
900 0x8);
901 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
902 }
903 vce_v4_0_override_vce_clock_gating(adev, false);
904}
905
906static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
907{
908 u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
909
910 if (enable)
911 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
912 else
913 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
914
915 WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
916}
917
918static int vce_v4_0_set_clockgating_state(void *handle,
919 enum amd_clockgating_state state)
920{
921 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
922 bool enable = (state == AMD_CG_STATE_GATE);
923 int i;
924
925 if ((adev->asic_type == CHIP_POLARIS10) ||
926 (adev->asic_type == CHIP_TONGA) ||
927 (adev->asic_type == CHIP_FIJI))
928 vce_v4_0_set_bypass_mode(adev, enable);
929
930 if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
931 return 0;
932
933 mutex_lock(&adev->grbm_idx_mutex);
934 for (i = 0; i < 2; i++) {
935 /* Program VCE Instance 0 or 1 if not harvested */
936 if (adev->vce.harvest_config & (1 << i))
937 continue;
938
939 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
940
941 if (enable) {
942 /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
943 uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
944 data &= ~(0xf | 0xff0);
945 data |= ((0x0 << 0) | (0x04 << 4));
946 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
947
948 /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
949 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
950 data &= ~(0xf | 0xff0);
951 data |= ((0x0 << 0) | (0x04 << 4));
952 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
953 }
954
955 vce_v4_0_set_vce_sw_clock_gating(adev, enable);
956 }
957
958 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
959 mutex_unlock(&adev->grbm_idx_mutex);
960
961 return 0;
962}
963#endif
964
965static int vce_v4_0_set_powergating_state(void *handle,
966 enum amd_powergating_state state)
967{
968 /* This doesn't actually powergate the VCE block.
969 * That's done in the dpm code via the SMC. This
970 * just re-inits the block as necessary. The actual
971 * gating still happens in the dpm code. We should
972 * revisit this when there is a cleaner line between
973 * the smc and the hw blocks
974 */
975 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
976
977 if (state == AMD_PG_STATE_GATE)
978 return vce_v4_0_stop(adev);
979 else
980 return vce_v4_0_start(adev);
981}
982
983static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
984 struct amdgpu_ib *ib, uint32_t flags)
985{
986 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
987
988 amdgpu_ring_write(ring, VCE_CMD_IB_VM);
989 amdgpu_ring_write(ring, vmid);
990 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
991 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
992 amdgpu_ring_write(ring, ib->length_dw);
993}
994
995static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
996 u64 seq, unsigned flags)
997{
998 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
999
1000 amdgpu_ring_write(ring, VCE_CMD_FENCE);
1001 amdgpu_ring_write(ring, addr);
1002 amdgpu_ring_write(ring, upper_32_bits(addr));
1003 amdgpu_ring_write(ring, seq);
1004 amdgpu_ring_write(ring, VCE_CMD_TRAP);
1005}
1006
1007static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
1008{
1009 amdgpu_ring_write(ring, VCE_CMD_END);
1010}
1011
1012static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1013 uint32_t val, uint32_t mask)
1014{
1015 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
1016 amdgpu_ring_write(ring, reg << 2);
1017 amdgpu_ring_write(ring, mask);
1018 amdgpu_ring_write(ring, val);
1019}
1020
1021static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
1022 unsigned int vmid, uint64_t pd_addr)
1023{
1024 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
1025
1026 pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1027
1028 /* wait for reg writes */
1029 vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 +
1030 vmid * hub->ctx_addr_distance,
1031 lower_32_bits(pd_addr), 0xffffffff);
1032}
1033
1034static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
1035 uint32_t reg, uint32_t val)
1036{
1037 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1038 amdgpu_ring_write(ring, reg << 2);
1039 amdgpu_ring_write(ring, val);
1040}
1041
1042static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1043 struct amdgpu_irq_src *source,
1044 unsigned type,
1045 enum amdgpu_interrupt_state state)
1046{
1047 uint32_t val = 0;
1048
1049 if (!amdgpu_sriov_vf(adev)) {
1050 if (state == AMDGPU_IRQ_STATE_ENABLE)
1051 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1052
1053 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1054 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1055 }
1056 return 0;
1057}
1058
1059static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1060 struct amdgpu_irq_src *source,
1061 struct amdgpu_iv_entry *entry)
1062{
1063 DRM_DEBUG("IH: VCE\n");
1064
1065 switch (entry->src_data[0]) {
1066 case 0:
1067 case 1:
1068 case 2:
1069 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1070 break;
1071 default:
1072 DRM_ERROR("Unhandled interrupt: %d %d\n",
1073 entry->src_id, entry->src_data[0]);
1074 break;
1075 }
1076
1077 return 0;
1078}
1079
1080const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1081 .name = "vce_v4_0",
1082 .early_init = vce_v4_0_early_init,
1083 .late_init = NULL,
1084 .sw_init = vce_v4_0_sw_init,
1085 .sw_fini = vce_v4_0_sw_fini,
1086 .hw_init = vce_v4_0_hw_init,
1087 .hw_fini = vce_v4_0_hw_fini,
1088 .suspend = vce_v4_0_suspend,
1089 .resume = vce_v4_0_resume,
1090 .is_idle = NULL /* vce_v4_0_is_idle */,
1091 .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1092 .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1093 .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1094 .soft_reset = NULL /* vce_v4_0_soft_reset */,
1095 .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1096 .set_clockgating_state = vce_v4_0_set_clockgating_state,
1097 .set_powergating_state = vce_v4_0_set_powergating_state,
1098};
1099
1100static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1101 .type = AMDGPU_RING_TYPE_VCE,
1102 .align_mask = 0x3f,
1103 .nop = VCE_CMD_NO_OP,
1104 .support_64bit_ptrs = false,
1105 .no_user_fence = true,
1106 .vmhub = AMDGPU_MMHUB_0,
1107 .get_rptr = vce_v4_0_ring_get_rptr,
1108 .get_wptr = vce_v4_0_ring_get_wptr,
1109 .set_wptr = vce_v4_0_ring_set_wptr,
1110 .parse_cs = amdgpu_vce_ring_parse_cs_vm,
1111 .emit_frame_size =
1112 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1113 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1114 4 + /* vce_v4_0_emit_vm_flush */
1115 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1116 1, /* vce_v4_0_ring_insert_end */
1117 .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1118 .emit_ib = vce_v4_0_ring_emit_ib,
1119 .emit_vm_flush = vce_v4_0_emit_vm_flush,
1120 .emit_fence = vce_v4_0_ring_emit_fence,
1121 .test_ring = amdgpu_vce_ring_test_ring,
1122 .test_ib = amdgpu_vce_ring_test_ib,
1123 .insert_nop = amdgpu_ring_insert_nop,
1124 .insert_end = vce_v4_0_ring_insert_end,
1125 .pad_ib = amdgpu_ring_generic_pad_ib,
1126 .begin_use = amdgpu_vce_ring_begin_use,
1127 .end_use = amdgpu_vce_ring_end_use,
1128 .emit_wreg = vce_v4_0_emit_wreg,
1129 .emit_reg_wait = vce_v4_0_emit_reg_wait,
1130 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1131};
1132
1133static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1134{
1135 int i;
1136
1137 for (i = 0; i < adev->vce.num_rings; i++) {
1138 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1139 adev->vce.ring[i].me = i;
1140 }
1141 DRM_INFO("VCE enabled in VM mode\n");
1142}
1143
1144static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1145 .set = vce_v4_0_set_interrupt_state,
1146 .process = vce_v4_0_process_interrupt,
1147};
1148
1149static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1150{
1151 adev->vce.irq.num_types = 1;
1152 adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1153};
1154
1155const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1156{
1157 .type = AMD_IP_BLOCK_TYPE_VCE,
1158 .major = 4,
1159 .minor = 0,
1160 .rev = 0,
1161 .funcs = &vce_v4_0_ip_funcs,
1162};
1/*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * The above copyright notice and this permission notice (including the
22 * next paragraph) shall be included in all copies or substantial portions
23 * of the Software.
24 *
25 */
26
27#include <linux/firmware.h>
28#include <drm/drmP.h>
29#include "amdgpu.h"
30#include "amdgpu_vce.h"
31#include "soc15.h"
32#include "soc15d.h"
33#include "soc15_common.h"
34#include "mmsch_v1_0.h"
35
36#include "vce/vce_4_0_offset.h"
37#include "vce/vce_4_0_default.h"
38#include "vce/vce_4_0_sh_mask.h"
39#include "mmhub/mmhub_1_0_offset.h"
40#include "mmhub/mmhub_1_0_sh_mask.h"
41
42#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02
43
44#define VCE_V4_0_FW_SIZE (384 * 1024)
45#define VCE_V4_0_STACK_SIZE (64 * 1024)
46#define VCE_V4_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
47
48static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
49static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
50static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
51
52/**
53 * vce_v4_0_ring_get_rptr - get read pointer
54 *
55 * @ring: amdgpu_ring pointer
56 *
57 * Returns the current hardware read pointer
58 */
59static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
60{
61 struct amdgpu_device *adev = ring->adev;
62
63 if (ring == &adev->vce.ring[0])
64 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
65 else if (ring == &adev->vce.ring[1])
66 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
67 else
68 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
69}
70
71/**
72 * vce_v4_0_ring_get_wptr - get write pointer
73 *
74 * @ring: amdgpu_ring pointer
75 *
76 * Returns the current hardware write pointer
77 */
78static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
79{
80 struct amdgpu_device *adev = ring->adev;
81
82 if (ring->use_doorbell)
83 return adev->wb.wb[ring->wptr_offs];
84
85 if (ring == &adev->vce.ring[0])
86 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
87 else if (ring == &adev->vce.ring[1])
88 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
89 else
90 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
91}
92
93/**
94 * vce_v4_0_ring_set_wptr - set write pointer
95 *
96 * @ring: amdgpu_ring pointer
97 *
98 * Commits the write pointer to the hardware
99 */
100static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
101{
102 struct amdgpu_device *adev = ring->adev;
103
104 if (ring->use_doorbell) {
105 /* XXX check if swapping is necessary on BE */
106 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
107 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
108 return;
109 }
110
111 if (ring == &adev->vce.ring[0])
112 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
113 lower_32_bits(ring->wptr));
114 else if (ring == &adev->vce.ring[1])
115 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
116 lower_32_bits(ring->wptr));
117 else
118 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
119 lower_32_bits(ring->wptr));
120}
121
122static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
123{
124 int i, j;
125
126 for (i = 0; i < 10; ++i) {
127 for (j = 0; j < 100; ++j) {
128 uint32_t status =
129 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
130
131 if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
132 return 0;
133 mdelay(10);
134 }
135
136 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
137 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
138 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
139 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
140 mdelay(10);
141 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
142 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
143 mdelay(10);
144
145 }
146
147 return -ETIMEDOUT;
148}
149
150static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
151 struct amdgpu_mm_table *table)
152{
153 uint32_t data = 0, loop;
154 uint64_t addr = table->gpu_addr;
155 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
156 uint32_t size;
157
158 size = header->header_size + header->vce_table_size + header->uvd_table_size;
159
160 /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
161 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
162 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
163
164 /* 2, update vmid of descriptor */
165 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
166 data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
167 data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
168 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
169
170 /* 3, notify mmsch about the size of this descriptor */
171 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
172
173 /* 4, set resp to zero */
174 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
175
176 WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
177 adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
178 adev->vce.ring[0].wptr = 0;
179 adev->vce.ring[0].wptr_old = 0;
180
181 /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
182 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
183
184 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
185 loop = 1000;
186 while ((data & 0x10000002) != 0x10000002) {
187 udelay(10);
188 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
189 loop--;
190 if (!loop)
191 break;
192 }
193
194 if (!loop) {
195 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
196 return -EBUSY;
197 }
198
199 return 0;
200}
201
202static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
203{
204 struct amdgpu_ring *ring;
205 uint32_t offset, size;
206 uint32_t table_size = 0;
207 struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
208 struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
209 struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
210 struct mmsch_v1_0_cmd_end end = { { 0 } };
211 uint32_t *init_table = adev->virt.mm_table.cpu_addr;
212 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
213
214 direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
215 direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
216 direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
217 end.cmd_header.command_type = MMSCH_COMMAND__END;
218
219 if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
220 header->version = MMSCH_VERSION;
221 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
222
223 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
224 header->vce_table_offset = header->header_size;
225 else
226 header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
227
228 init_table += header->vce_table_offset;
229
230 ring = &adev->vce.ring[0];
231 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
232 lower_32_bits(ring->gpu_addr));
233 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
234 upper_32_bits(ring->gpu_addr));
235 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
236 ring->ring_size / 4);
237
238 /* BEGING OF MC_RESUME */
239 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
240 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
241 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
242 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
243 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
244
245 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
246 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
247 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
248 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
249 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
250 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
251 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
252 } else {
253 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
254 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
255 adev->vce.gpu_addr >> 8);
256 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
257 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
258 (adev->vce.gpu_addr >> 40) & 0xff);
259 }
260 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
261 mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
262 adev->vce.gpu_addr >> 8);
263 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
264 mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
265 (adev->vce.gpu_addr >> 40) & 0xff);
266 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
267 mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
268 adev->vce.gpu_addr >> 8);
269 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
270 mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
271 (adev->vce.gpu_addr >> 40) & 0xff);
272
273 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
274 size = VCE_V4_0_FW_SIZE;
275 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
276 offset & ~0x0f000000);
277 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
278
279 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
280 size = VCE_V4_0_STACK_SIZE;
281 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
282 (offset & ~0x0f000000) | (1 << 24));
283 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
284
285 offset += size;
286 size = VCE_V4_0_DATA_SIZE;
287 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
288 (offset & ~0x0f000000) | (2 << 24));
289 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
290
291 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
292 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
293 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
294 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
295
296 /* end of MC_RESUME */
297 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
298 VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
299 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
300 ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
301 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
302 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
303
304 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
305 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
306 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
307
308 /* clear BUSY flag */
309 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
310 ~VCE_STATUS__JOB_BUSY_MASK, 0);
311
312 /* add end packet */
313 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
314 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
315 header->vce_table_size = table_size;
316 }
317
318 return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
319}
320
321/**
322 * vce_v4_0_start - start VCE block
323 *
324 * @adev: amdgpu_device pointer
325 *
326 * Setup and start the VCE block
327 */
328static int vce_v4_0_start(struct amdgpu_device *adev)
329{
330 struct amdgpu_ring *ring;
331 int r;
332
333 ring = &adev->vce.ring[0];
334
335 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
336 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
337 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
338 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
339 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
340
341 ring = &adev->vce.ring[1];
342
343 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
344 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
345 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
346 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
347 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
348
349 ring = &adev->vce.ring[2];
350
351 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
352 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
353 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
354 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
355 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
356
357 vce_v4_0_mc_resume(adev);
358 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
359 ~VCE_STATUS__JOB_BUSY_MASK);
360
361 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
362
363 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
364 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
365 mdelay(100);
366
367 r = vce_v4_0_firmware_loaded(adev);
368
369 /* clear BUSY flag */
370 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
371
372 if (r) {
373 DRM_ERROR("VCE not responding, giving up!!!\n");
374 return r;
375 }
376
377 return 0;
378}
379
380static int vce_v4_0_stop(struct amdgpu_device *adev)
381{
382
383 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
384
385 /* hold on ECPU */
386 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
387 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
388 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
389
390 /* clear BUSY flag */
391 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
392
393 /* Set Clock-Gating off */
394 /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
395 vce_v4_0_set_vce_sw_clock_gating(adev, false);
396 */
397
398 return 0;
399}
400
401static int vce_v4_0_early_init(void *handle)
402{
403 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
404
405 if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
406 adev->vce.num_rings = 1;
407 else
408 adev->vce.num_rings = 3;
409
410 vce_v4_0_set_ring_funcs(adev);
411 vce_v4_0_set_irq_funcs(adev);
412
413 return 0;
414}
415
416static int vce_v4_0_sw_init(void *handle)
417{
418 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
419 struct amdgpu_ring *ring;
420 unsigned size;
421 int r, i;
422
423 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
424 if (r)
425 return r;
426
427 size = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
428 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
429 size += VCE_V4_0_FW_SIZE;
430
431 r = amdgpu_vce_sw_init(adev, size);
432 if (r)
433 return r;
434
435 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
436 const struct common_firmware_header *hdr;
437 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
438
439 adev->vce.saved_bo = kmalloc(size, GFP_KERNEL);
440 if (!adev->vce.saved_bo)
441 return -ENOMEM;
442
443 hdr = (const struct common_firmware_header *)adev->vce.fw->data;
444 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
445 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
446 adev->firmware.fw_size +=
447 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
448 DRM_INFO("PSP loading VCE firmware\n");
449 } else {
450 r = amdgpu_vce_resume(adev);
451 if (r)
452 return r;
453 }
454
455 for (i = 0; i < adev->vce.num_rings; i++) {
456 ring = &adev->vce.ring[i];
457 sprintf(ring->name, "vce%d", i);
458 if (amdgpu_sriov_vf(adev)) {
459 /* DOORBELL only works under SRIOV */
460 ring->use_doorbell = true;
461
462 /* currently only use the first encoding ring for sriov,
463 * so set unused location for other unused rings.
464 */
465 if (i == 0)
466 ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING0_1 * 2;
467 else
468 ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING2_3 * 2 + 1;
469 }
470 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
471 if (r)
472 return r;
473 }
474
475 r = amdgpu_virt_alloc_mm_table(adev);
476 if (r)
477 return r;
478
479 return r;
480}
481
482static int vce_v4_0_sw_fini(void *handle)
483{
484 int r;
485 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
486
487 /* free MM table */
488 amdgpu_virt_free_mm_table(adev);
489
490 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
491 kfree(adev->vce.saved_bo);
492 adev->vce.saved_bo = NULL;
493 }
494
495 r = amdgpu_vce_suspend(adev);
496 if (r)
497 return r;
498
499 return amdgpu_vce_sw_fini(adev);
500}
501
502static int vce_v4_0_hw_init(void *handle)
503{
504 int r, i;
505 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
506
507 if (amdgpu_sriov_vf(adev))
508 r = vce_v4_0_sriov_start(adev);
509 else
510 r = vce_v4_0_start(adev);
511 if (r)
512 return r;
513
514 for (i = 0; i < adev->vce.num_rings; i++)
515 adev->vce.ring[i].ready = false;
516
517 for (i = 0; i < adev->vce.num_rings; i++) {
518 r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
519 if (r)
520 return r;
521 else
522 adev->vce.ring[i].ready = true;
523 }
524
525 DRM_INFO("VCE initialized successfully.\n");
526
527 return 0;
528}
529
530static int vce_v4_0_hw_fini(void *handle)
531{
532 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
533 int i;
534
535 if (!amdgpu_sriov_vf(adev)) {
536 /* vce_v4_0_wait_for_idle(handle); */
537 vce_v4_0_stop(adev);
538 } else {
539 /* full access mode, so don't touch any VCE register */
540 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
541 }
542
543 for (i = 0; i < adev->vce.num_rings; i++)
544 adev->vce.ring[i].ready = false;
545
546 return 0;
547}
548
549static int vce_v4_0_suspend(void *handle)
550{
551 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
552 int r;
553
554 if (adev->vce.vcpu_bo == NULL)
555 return 0;
556
557 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
558 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
559 void *ptr = adev->vce.cpu_addr;
560
561 memcpy_fromio(adev->vce.saved_bo, ptr, size);
562 }
563
564 r = vce_v4_0_hw_fini(adev);
565 if (r)
566 return r;
567
568 return amdgpu_vce_suspend(adev);
569}
570
571static int vce_v4_0_resume(void *handle)
572{
573 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
574 int r;
575
576 if (adev->vce.vcpu_bo == NULL)
577 return -EINVAL;
578
579 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
580 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
581 void *ptr = adev->vce.cpu_addr;
582
583 memcpy_toio(ptr, adev->vce.saved_bo, size);
584 } else {
585 r = amdgpu_vce_resume(adev);
586 if (r)
587 return r;
588 }
589
590 return vce_v4_0_hw_init(adev);
591}
592
593static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
594{
595 uint32_t offset, size;
596
597 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
598 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
599 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
600 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
601
602 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
603 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
604 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
605 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
606 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
607
608 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
609 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
610 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8));
611 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
612 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
613 } else {
614 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
615 (adev->vce.gpu_addr >> 8));
616 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
617 (adev->vce.gpu_addr >> 40) & 0xff);
618 }
619
620 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
621 size = VCE_V4_0_FW_SIZE;
622 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
623 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
624
625 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
626 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
627 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
628 size = VCE_V4_0_STACK_SIZE;
629 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
630 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
631
632 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
633 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
634 offset += size;
635 size = VCE_V4_0_DATA_SIZE;
636 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
637 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
638
639 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
640 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
641 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
642 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
643}
644
645static int vce_v4_0_set_clockgating_state(void *handle,
646 enum amd_clockgating_state state)
647{
648 /* needed for driver unload*/
649 return 0;
650}
651
652#if 0
653static bool vce_v4_0_is_idle(void *handle)
654{
655 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
656 u32 mask = 0;
657
658 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
659 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
660
661 return !(RREG32(mmSRBM_STATUS2) & mask);
662}
663
664static int vce_v4_0_wait_for_idle(void *handle)
665{
666 unsigned i;
667 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
668
669 for (i = 0; i < adev->usec_timeout; i++)
670 if (vce_v4_0_is_idle(handle))
671 return 0;
672
673 return -ETIMEDOUT;
674}
675
676#define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */
677#define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */
678#define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */
679#define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
680 VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
681
682static bool vce_v4_0_check_soft_reset(void *handle)
683{
684 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
685 u32 srbm_soft_reset = 0;
686
687 /* According to VCE team , we should use VCE_STATUS instead
688 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
689 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
690 * instance's registers are accessed
691 * (0 for 1st instance, 10 for 2nd instance).
692 *
693 *VCE_STATUS
694 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB |
695 *|----+----+-----------+----+----+----+----------+---------+----|
696 *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0|
697 *
698 * VCE team suggest use bit 3--bit 6 for busy status check
699 */
700 mutex_lock(&adev->grbm_idx_mutex);
701 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
702 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
703 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
704 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
705 }
706 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
707 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
708 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
709 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
710 }
711 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
712 mutex_unlock(&adev->grbm_idx_mutex);
713
714 if (srbm_soft_reset) {
715 adev->vce.srbm_soft_reset = srbm_soft_reset;
716 return true;
717 } else {
718 adev->vce.srbm_soft_reset = 0;
719 return false;
720 }
721}
722
723static int vce_v4_0_soft_reset(void *handle)
724{
725 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
726 u32 srbm_soft_reset;
727
728 if (!adev->vce.srbm_soft_reset)
729 return 0;
730 srbm_soft_reset = adev->vce.srbm_soft_reset;
731
732 if (srbm_soft_reset) {
733 u32 tmp;
734
735 tmp = RREG32(mmSRBM_SOFT_RESET);
736 tmp |= srbm_soft_reset;
737 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
738 WREG32(mmSRBM_SOFT_RESET, tmp);
739 tmp = RREG32(mmSRBM_SOFT_RESET);
740
741 udelay(50);
742
743 tmp &= ~srbm_soft_reset;
744 WREG32(mmSRBM_SOFT_RESET, tmp);
745 tmp = RREG32(mmSRBM_SOFT_RESET);
746
747 /* Wait a little for things to settle down */
748 udelay(50);
749 }
750
751 return 0;
752}
753
754static int vce_v4_0_pre_soft_reset(void *handle)
755{
756 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
757
758 if (!adev->vce.srbm_soft_reset)
759 return 0;
760
761 mdelay(5);
762
763 return vce_v4_0_suspend(adev);
764}
765
766
767static int vce_v4_0_post_soft_reset(void *handle)
768{
769 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
770
771 if (!adev->vce.srbm_soft_reset)
772 return 0;
773
774 mdelay(5);
775
776 return vce_v4_0_resume(adev);
777}
778
779static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
780{
781 u32 tmp, data;
782
783 tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
784 if (override)
785 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
786 else
787 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
788
789 if (tmp != data)
790 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
791}
792
793static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
794 bool gated)
795{
796 u32 data;
797
798 /* Set Override to disable Clock Gating */
799 vce_v4_0_override_vce_clock_gating(adev, true);
800
801 /* This function enables MGCG which is controlled by firmware.
802 With the clocks in the gated state the core is still
803 accessible but the firmware will throttle the clocks on the
804 fly as necessary.
805 */
806 if (gated) {
807 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
808 data |= 0x1ff;
809 data &= ~0xef0000;
810 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
811
812 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
813 data |= 0x3ff000;
814 data &= ~0xffc00000;
815 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
816
817 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
818 data |= 0x2;
819 data &= ~0x00010000;
820 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
821
822 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
823 data |= 0x37f;
824 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
825
826 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
827 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
828 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
829 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
830 0x8;
831 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
832 } else {
833 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
834 data &= ~0x80010;
835 data |= 0xe70008;
836 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
837
838 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
839 data |= 0xffc00000;
840 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
841
842 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
843 data |= 0x10000;
844 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
845
846 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
847 data &= ~0xffc00000;
848 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
849
850 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
851 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
852 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
853 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
854 0x8);
855 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
856 }
857 vce_v4_0_override_vce_clock_gating(adev, false);
858}
859
860static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
861{
862 u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
863
864 if (enable)
865 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
866 else
867 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
868
869 WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
870}
871
872static int vce_v4_0_set_clockgating_state(void *handle,
873 enum amd_clockgating_state state)
874{
875 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
876 bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
877 int i;
878
879 if ((adev->asic_type == CHIP_POLARIS10) ||
880 (adev->asic_type == CHIP_TONGA) ||
881 (adev->asic_type == CHIP_FIJI))
882 vce_v4_0_set_bypass_mode(adev, enable);
883
884 if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
885 return 0;
886
887 mutex_lock(&adev->grbm_idx_mutex);
888 for (i = 0; i < 2; i++) {
889 /* Program VCE Instance 0 or 1 if not harvested */
890 if (adev->vce.harvest_config & (1 << i))
891 continue;
892
893 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
894
895 if (enable) {
896 /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
897 uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
898 data &= ~(0xf | 0xff0);
899 data |= ((0x0 << 0) | (0x04 << 4));
900 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
901
902 /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
903 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
904 data &= ~(0xf | 0xff0);
905 data |= ((0x0 << 0) | (0x04 << 4));
906 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
907 }
908
909 vce_v4_0_set_vce_sw_clock_gating(adev, enable);
910 }
911
912 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
913 mutex_unlock(&adev->grbm_idx_mutex);
914
915 return 0;
916}
917
918static int vce_v4_0_set_powergating_state(void *handle,
919 enum amd_powergating_state state)
920{
921 /* This doesn't actually powergate the VCE block.
922 * That's done in the dpm code via the SMC. This
923 * just re-inits the block as necessary. The actual
924 * gating still happens in the dpm code. We should
925 * revisit this when there is a cleaner line between
926 * the smc and the hw blocks
927 */
928 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
929
930 if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
931 return 0;
932
933 if (state == AMD_PG_STATE_GATE)
934 /* XXX do we need a vce_v4_0_stop()? */
935 return 0;
936 else
937 return vce_v4_0_start(adev);
938}
939#endif
940
941static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
942 struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch)
943{
944 amdgpu_ring_write(ring, VCE_CMD_IB_VM);
945 amdgpu_ring_write(ring, vmid);
946 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
947 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
948 amdgpu_ring_write(ring, ib->length_dw);
949}
950
951static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
952 u64 seq, unsigned flags)
953{
954 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
955
956 amdgpu_ring_write(ring, VCE_CMD_FENCE);
957 amdgpu_ring_write(ring, addr);
958 amdgpu_ring_write(ring, upper_32_bits(addr));
959 amdgpu_ring_write(ring, seq);
960 amdgpu_ring_write(ring, VCE_CMD_TRAP);
961}
962
963static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
964{
965 amdgpu_ring_write(ring, VCE_CMD_END);
966}
967
968static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
969 uint32_t val, uint32_t mask)
970{
971 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
972 amdgpu_ring_write(ring, reg << 2);
973 amdgpu_ring_write(ring, mask);
974 amdgpu_ring_write(ring, val);
975}
976
977static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
978 unsigned int vmid, uint64_t pd_addr)
979{
980 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
981
982 pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
983
984 /* wait for reg writes */
985 vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
986 lower_32_bits(pd_addr), 0xffffffff);
987}
988
989static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
990 uint32_t reg, uint32_t val)
991{
992 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
993 amdgpu_ring_write(ring, reg << 2);
994 amdgpu_ring_write(ring, val);
995}
996
997static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
998 struct amdgpu_irq_src *source,
999 unsigned type,
1000 enum amdgpu_interrupt_state state)
1001{
1002 uint32_t val = 0;
1003
1004 if (!amdgpu_sriov_vf(adev)) {
1005 if (state == AMDGPU_IRQ_STATE_ENABLE)
1006 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1007
1008 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1009 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1010 }
1011 return 0;
1012}
1013
1014static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1015 struct amdgpu_irq_src *source,
1016 struct amdgpu_iv_entry *entry)
1017{
1018 DRM_DEBUG("IH: VCE\n");
1019
1020 switch (entry->src_data[0]) {
1021 case 0:
1022 case 1:
1023 case 2:
1024 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1025 break;
1026 default:
1027 DRM_ERROR("Unhandled interrupt: %d %d\n",
1028 entry->src_id, entry->src_data[0]);
1029 break;
1030 }
1031
1032 return 0;
1033}
1034
1035const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1036 .name = "vce_v4_0",
1037 .early_init = vce_v4_0_early_init,
1038 .late_init = NULL,
1039 .sw_init = vce_v4_0_sw_init,
1040 .sw_fini = vce_v4_0_sw_fini,
1041 .hw_init = vce_v4_0_hw_init,
1042 .hw_fini = vce_v4_0_hw_fini,
1043 .suspend = vce_v4_0_suspend,
1044 .resume = vce_v4_0_resume,
1045 .is_idle = NULL /* vce_v4_0_is_idle */,
1046 .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1047 .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1048 .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1049 .soft_reset = NULL /* vce_v4_0_soft_reset */,
1050 .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1051 .set_clockgating_state = vce_v4_0_set_clockgating_state,
1052 .set_powergating_state = NULL /* vce_v4_0_set_powergating_state */,
1053};
1054
1055static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1056 .type = AMDGPU_RING_TYPE_VCE,
1057 .align_mask = 0x3f,
1058 .nop = VCE_CMD_NO_OP,
1059 .support_64bit_ptrs = false,
1060 .vmhub = AMDGPU_MMHUB,
1061 .get_rptr = vce_v4_0_ring_get_rptr,
1062 .get_wptr = vce_v4_0_ring_get_wptr,
1063 .set_wptr = vce_v4_0_ring_set_wptr,
1064 .parse_cs = amdgpu_vce_ring_parse_cs_vm,
1065 .emit_frame_size =
1066 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1067 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1068 4 + /* vce_v4_0_emit_vm_flush */
1069 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1070 1, /* vce_v4_0_ring_insert_end */
1071 .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1072 .emit_ib = vce_v4_0_ring_emit_ib,
1073 .emit_vm_flush = vce_v4_0_emit_vm_flush,
1074 .emit_fence = vce_v4_0_ring_emit_fence,
1075 .test_ring = amdgpu_vce_ring_test_ring,
1076 .test_ib = amdgpu_vce_ring_test_ib,
1077 .insert_nop = amdgpu_ring_insert_nop,
1078 .insert_end = vce_v4_0_ring_insert_end,
1079 .pad_ib = amdgpu_ring_generic_pad_ib,
1080 .begin_use = amdgpu_vce_ring_begin_use,
1081 .end_use = amdgpu_vce_ring_end_use,
1082 .emit_wreg = vce_v4_0_emit_wreg,
1083 .emit_reg_wait = vce_v4_0_emit_reg_wait,
1084};
1085
1086static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1087{
1088 int i;
1089
1090 for (i = 0; i < adev->vce.num_rings; i++)
1091 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1092 DRM_INFO("VCE enabled in VM mode\n");
1093}
1094
1095static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1096 .set = vce_v4_0_set_interrupt_state,
1097 .process = vce_v4_0_process_interrupt,
1098};
1099
1100static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1101{
1102 adev->vce.irq.num_types = 1;
1103 adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1104};
1105
1106const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1107{
1108 .type = AMD_IP_BLOCK_TYPE_VCE,
1109 .major = 4,
1110 .minor = 0,
1111 .rev = 0,
1112 .funcs = &vce_v4_0_ip_funcs,
1113};