Loading...
1/*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * The above copyright notice and this permission notice (including the
22 * next paragraph) shall be included in all copies or substantial portions
23 * of the Software.
24 *
25 */
26
27#include <linux/firmware.h>
28
29#include "amdgpu.h"
30#include "amdgpu_vce.h"
31#include "soc15.h"
32#include "soc15d.h"
33#include "soc15_common.h"
34#include "mmsch_v1_0.h"
35
36#include "vce/vce_4_0_offset.h"
37#include "vce/vce_4_0_default.h"
38#include "vce/vce_4_0_sh_mask.h"
39#include "mmhub/mmhub_1_0_offset.h"
40#include "mmhub/mmhub_1_0_sh_mask.h"
41
42#include "ivsrcid/vce/irqsrcs_vce_4_0.h"
43
44#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02
45
46#define VCE_V4_0_FW_SIZE (384 * 1024)
47#define VCE_V4_0_STACK_SIZE (64 * 1024)
48#define VCE_V4_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
49
50static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
51static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
52static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
53
54/**
55 * vce_v4_0_ring_get_rptr - get read pointer
56 *
57 * @ring: amdgpu_ring pointer
58 *
59 * Returns the current hardware read pointer
60 */
61static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
62{
63 struct amdgpu_device *adev = ring->adev;
64
65 if (ring->me == 0)
66 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
67 else if (ring->me == 1)
68 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
69 else
70 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
71}
72
73/**
74 * vce_v4_0_ring_get_wptr - get write pointer
75 *
76 * @ring: amdgpu_ring pointer
77 *
78 * Returns the current hardware write pointer
79 */
80static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
81{
82 struct amdgpu_device *adev = ring->adev;
83
84 if (ring->use_doorbell)
85 return adev->wb.wb[ring->wptr_offs];
86
87 if (ring->me == 0)
88 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
89 else if (ring->me == 1)
90 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
91 else
92 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
93}
94
95/**
96 * vce_v4_0_ring_set_wptr - set write pointer
97 *
98 * @ring: amdgpu_ring pointer
99 *
100 * Commits the write pointer to the hardware
101 */
102static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
103{
104 struct amdgpu_device *adev = ring->adev;
105
106 if (ring->use_doorbell) {
107 /* XXX check if swapping is necessary on BE */
108 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
109 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
110 return;
111 }
112
113 if (ring->me == 0)
114 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
115 lower_32_bits(ring->wptr));
116 else if (ring->me == 1)
117 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
118 lower_32_bits(ring->wptr));
119 else
120 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
121 lower_32_bits(ring->wptr));
122}
123
124static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
125{
126 int i, j;
127
128 for (i = 0; i < 10; ++i) {
129 for (j = 0; j < 100; ++j) {
130 uint32_t status =
131 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
132
133 if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
134 return 0;
135 mdelay(10);
136 }
137
138 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
139 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
140 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
141 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
142 mdelay(10);
143 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
144 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
145 mdelay(10);
146
147 }
148
149 return -ETIMEDOUT;
150}
151
152static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
153 struct amdgpu_mm_table *table)
154{
155 uint32_t data = 0, loop;
156 uint64_t addr = table->gpu_addr;
157 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
158 uint32_t size;
159
160 size = header->header_size + header->vce_table_size + header->uvd_table_size;
161
162 /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
163 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
164 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
165
166 /* 2, update vmid of descriptor */
167 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
168 data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
169 data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
170 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
171
172 /* 3, notify mmsch about the size of this descriptor */
173 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
174
175 /* 4, set resp to zero */
176 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
177
178 WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
179 adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
180 adev->vce.ring[0].wptr = 0;
181 adev->vce.ring[0].wptr_old = 0;
182
183 /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
184 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
185
186 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
187 loop = 1000;
188 while ((data & 0x10000002) != 0x10000002) {
189 udelay(10);
190 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
191 loop--;
192 if (!loop)
193 break;
194 }
195
196 if (!loop) {
197 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
198 return -EBUSY;
199 }
200
201 return 0;
202}
203
204static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
205{
206 struct amdgpu_ring *ring;
207 uint32_t offset, size;
208 uint32_t table_size = 0;
209 struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
210 struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
211 struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
212 struct mmsch_v1_0_cmd_end end = { { 0 } };
213 uint32_t *init_table = adev->virt.mm_table.cpu_addr;
214 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
215
216 direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
217 direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
218 direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
219 end.cmd_header.command_type = MMSCH_COMMAND__END;
220
221 if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
222 header->version = MMSCH_VERSION;
223 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
224
225 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
226 header->vce_table_offset = header->header_size;
227 else
228 header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
229
230 init_table += header->vce_table_offset;
231
232 ring = &adev->vce.ring[0];
233 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
234 lower_32_bits(ring->gpu_addr));
235 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
236 upper_32_bits(ring->gpu_addr));
237 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
238 ring->ring_size / 4);
239
240 /* BEGING OF MC_RESUME */
241 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
242 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
243 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
244 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
245 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
246
247 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
248 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
249 uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
250 uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi;
251 uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low;
252
253 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
254 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8);
255 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
256 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
257 (tmr_mc_addr >> 40) & 0xff);
258 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
259 } else {
260 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
261 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
262 adev->vce.gpu_addr >> 8);
263 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
264 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
265 (adev->vce.gpu_addr >> 40) & 0xff);
266 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
267 offset & ~0x0f000000);
268
269 }
270 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
271 mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
272 adev->vce.gpu_addr >> 8);
273 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
274 mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
275 (adev->vce.gpu_addr >> 40) & 0xff);
276 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
277 mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
278 adev->vce.gpu_addr >> 8);
279 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
280 mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
281 (adev->vce.gpu_addr >> 40) & 0xff);
282
283 size = VCE_V4_0_FW_SIZE;
284 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
285
286 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
287 size = VCE_V4_0_STACK_SIZE;
288 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
289 (offset & ~0x0f000000) | (1 << 24));
290 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
291
292 offset += size;
293 size = VCE_V4_0_DATA_SIZE;
294 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
295 (offset & ~0x0f000000) | (2 << 24));
296 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
297
298 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
299 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
300 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
301 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
302
303 /* end of MC_RESUME */
304 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
305 VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
306 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
307 ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
308 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
309 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
310
311 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
312 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
313 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
314
315 /* clear BUSY flag */
316 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
317 ~VCE_STATUS__JOB_BUSY_MASK, 0);
318
319 /* add end packet */
320 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
321 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
322 header->vce_table_size = table_size;
323 }
324
325 return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
326}
327
328/**
329 * vce_v4_0_start - start VCE block
330 *
331 * @adev: amdgpu_device pointer
332 *
333 * Setup and start the VCE block
334 */
335static int vce_v4_0_start(struct amdgpu_device *adev)
336{
337 struct amdgpu_ring *ring;
338 int r;
339
340 ring = &adev->vce.ring[0];
341
342 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
343 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
344 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
345 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
346 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
347
348 ring = &adev->vce.ring[1];
349
350 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
351 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
352 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
353 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
354 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
355
356 ring = &adev->vce.ring[2];
357
358 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
359 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
360 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
361 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
362 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
363
364 vce_v4_0_mc_resume(adev);
365 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
366 ~VCE_STATUS__JOB_BUSY_MASK);
367
368 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
369
370 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
371 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
372 mdelay(100);
373
374 r = vce_v4_0_firmware_loaded(adev);
375
376 /* clear BUSY flag */
377 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
378
379 if (r) {
380 DRM_ERROR("VCE not responding, giving up!!!\n");
381 return r;
382 }
383
384 return 0;
385}
386
387static int vce_v4_0_stop(struct amdgpu_device *adev)
388{
389
390 /* Disable VCPU */
391 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
392
393 /* hold on ECPU */
394 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
395 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
396 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
397
398 /* clear VCE_STATUS */
399 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0);
400
401 /* Set Clock-Gating off */
402 /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
403 vce_v4_0_set_vce_sw_clock_gating(adev, false);
404 */
405
406 return 0;
407}
408
409static int vce_v4_0_early_init(void *handle)
410{
411 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
412
413 if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
414 adev->vce.num_rings = 1;
415 else
416 adev->vce.num_rings = 3;
417
418 vce_v4_0_set_ring_funcs(adev);
419 vce_v4_0_set_irq_funcs(adev);
420
421 return 0;
422}
423
424static int vce_v4_0_sw_init(void *handle)
425{
426 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
427 struct amdgpu_ring *ring;
428
429 unsigned size;
430 int r, i;
431
432 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
433 if (r)
434 return r;
435
436 size = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
437 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
438 size += VCE_V4_0_FW_SIZE;
439
440 r = amdgpu_vce_sw_init(adev, size);
441 if (r)
442 return r;
443
444 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
445 const struct common_firmware_header *hdr;
446 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
447
448 adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
449 if (!adev->vce.saved_bo)
450 return -ENOMEM;
451
452 hdr = (const struct common_firmware_header *)adev->vce.fw->data;
453 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
454 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
455 adev->firmware.fw_size +=
456 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
457 DRM_INFO("PSP loading VCE firmware\n");
458 } else {
459 r = amdgpu_vce_resume(adev);
460 if (r)
461 return r;
462 }
463
464 for (i = 0; i < adev->vce.num_rings; i++) {
465 ring = &adev->vce.ring[i];
466 sprintf(ring->name, "vce%d", i);
467 if (amdgpu_sriov_vf(adev)) {
468 /* DOORBELL only works under SRIOV */
469 ring->use_doorbell = true;
470
471 /* currently only use the first encoding ring for sriov,
472 * so set unused location for other unused rings.
473 */
474 if (i == 0)
475 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2;
476 else
477 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1;
478 }
479 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
480 AMDGPU_RING_PRIO_DEFAULT);
481 if (r)
482 return r;
483 }
484
485
486 r = amdgpu_vce_entity_init(adev);
487 if (r)
488 return r;
489
490 r = amdgpu_virt_alloc_mm_table(adev);
491 if (r)
492 return r;
493
494 return r;
495}
496
497static int vce_v4_0_sw_fini(void *handle)
498{
499 int r;
500 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
501
502 /* free MM table */
503 amdgpu_virt_free_mm_table(adev);
504
505 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
506 kvfree(adev->vce.saved_bo);
507 adev->vce.saved_bo = NULL;
508 }
509
510 r = amdgpu_vce_suspend(adev);
511 if (r)
512 return r;
513
514 return amdgpu_vce_sw_fini(adev);
515}
516
517static int vce_v4_0_hw_init(void *handle)
518{
519 int r, i;
520 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
521
522 if (amdgpu_sriov_vf(adev))
523 r = vce_v4_0_sriov_start(adev);
524 else
525 r = vce_v4_0_start(adev);
526 if (r)
527 return r;
528
529 for (i = 0; i < adev->vce.num_rings; i++) {
530 r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
531 if (r)
532 return r;
533 }
534
535 DRM_INFO("VCE initialized successfully.\n");
536
537 return 0;
538}
539
540static int vce_v4_0_hw_fini(void *handle)
541{
542 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
543
544 if (!amdgpu_sriov_vf(adev)) {
545 /* vce_v4_0_wait_for_idle(handle); */
546 vce_v4_0_stop(adev);
547 } else {
548 /* full access mode, so don't touch any VCE register */
549 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
550 }
551
552 return 0;
553}
554
555static int vce_v4_0_suspend(void *handle)
556{
557 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
558 int r;
559
560 if (adev->vce.vcpu_bo == NULL)
561 return 0;
562
563 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
564 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
565 void *ptr = adev->vce.cpu_addr;
566
567 memcpy_fromio(adev->vce.saved_bo, ptr, size);
568 }
569
570 r = vce_v4_0_hw_fini(adev);
571 if (r)
572 return r;
573
574 return amdgpu_vce_suspend(adev);
575}
576
577static int vce_v4_0_resume(void *handle)
578{
579 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
580 int r;
581
582 if (adev->vce.vcpu_bo == NULL)
583 return -EINVAL;
584
585 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
586 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
587 void *ptr = adev->vce.cpu_addr;
588
589 memcpy_toio(ptr, adev->vce.saved_bo, size);
590 } else {
591 r = amdgpu_vce_resume(adev);
592 if (r)
593 return r;
594 }
595
596 return vce_v4_0_hw_init(adev);
597}
598
599static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
600{
601 uint32_t offset, size;
602 uint64_t tmr_mc_addr;
603
604 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
605 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
606 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
607 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
608
609 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
610 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
611 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
612 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
613 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
614
615 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
616
617 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
618 tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 |
619 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
620 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
621 (tmr_mc_addr >> 8));
622 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
623 (tmr_mc_addr >> 40) & 0xff);
624 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
625 } else {
626 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
627 (adev->vce.gpu_addr >> 8));
628 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
629 (adev->vce.gpu_addr >> 40) & 0xff);
630 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
631 }
632
633 size = VCE_V4_0_FW_SIZE;
634 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
635
636 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
637 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
638 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
639 size = VCE_V4_0_STACK_SIZE;
640 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
641 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
642
643 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
644 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
645 offset += size;
646 size = VCE_V4_0_DATA_SIZE;
647 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
648 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
649
650 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
651 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
652 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
653 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
654}
655
656static int vce_v4_0_set_clockgating_state(void *handle,
657 enum amd_clockgating_state state)
658{
659 /* needed for driver unload*/
660 return 0;
661}
662
663#if 0
664static bool vce_v4_0_is_idle(void *handle)
665{
666 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
667 u32 mask = 0;
668
669 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
670 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
671
672 return !(RREG32(mmSRBM_STATUS2) & mask);
673}
674
675static int vce_v4_0_wait_for_idle(void *handle)
676{
677 unsigned i;
678 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
679
680 for (i = 0; i < adev->usec_timeout; i++)
681 if (vce_v4_0_is_idle(handle))
682 return 0;
683
684 return -ETIMEDOUT;
685}
686
687#define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */
688#define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */
689#define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */
690#define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
691 VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
692
693static bool vce_v4_0_check_soft_reset(void *handle)
694{
695 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
696 u32 srbm_soft_reset = 0;
697
698 /* According to VCE team , we should use VCE_STATUS instead
699 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
700 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
701 * instance's registers are accessed
702 * (0 for 1st instance, 10 for 2nd instance).
703 *
704 *VCE_STATUS
705 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB |
706 *|----+----+-----------+----+----+----+----------+---------+----|
707 *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0|
708 *
709 * VCE team suggest use bit 3--bit 6 for busy status check
710 */
711 mutex_lock(&adev->grbm_idx_mutex);
712 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
713 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
714 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
715 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
716 }
717 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
718 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
719 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
720 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
721 }
722 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
723 mutex_unlock(&adev->grbm_idx_mutex);
724
725 if (srbm_soft_reset) {
726 adev->vce.srbm_soft_reset = srbm_soft_reset;
727 return true;
728 } else {
729 adev->vce.srbm_soft_reset = 0;
730 return false;
731 }
732}
733
734static int vce_v4_0_soft_reset(void *handle)
735{
736 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
737 u32 srbm_soft_reset;
738
739 if (!adev->vce.srbm_soft_reset)
740 return 0;
741 srbm_soft_reset = adev->vce.srbm_soft_reset;
742
743 if (srbm_soft_reset) {
744 u32 tmp;
745
746 tmp = RREG32(mmSRBM_SOFT_RESET);
747 tmp |= srbm_soft_reset;
748 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
749 WREG32(mmSRBM_SOFT_RESET, tmp);
750 tmp = RREG32(mmSRBM_SOFT_RESET);
751
752 udelay(50);
753
754 tmp &= ~srbm_soft_reset;
755 WREG32(mmSRBM_SOFT_RESET, tmp);
756 tmp = RREG32(mmSRBM_SOFT_RESET);
757
758 /* Wait a little for things to settle down */
759 udelay(50);
760 }
761
762 return 0;
763}
764
765static int vce_v4_0_pre_soft_reset(void *handle)
766{
767 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
768
769 if (!adev->vce.srbm_soft_reset)
770 return 0;
771
772 mdelay(5);
773
774 return vce_v4_0_suspend(adev);
775}
776
777
778static int vce_v4_0_post_soft_reset(void *handle)
779{
780 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
781
782 if (!adev->vce.srbm_soft_reset)
783 return 0;
784
785 mdelay(5);
786
787 return vce_v4_0_resume(adev);
788}
789
790static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
791{
792 u32 tmp, data;
793
794 tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
795 if (override)
796 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
797 else
798 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
799
800 if (tmp != data)
801 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
802}
803
804static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
805 bool gated)
806{
807 u32 data;
808
809 /* Set Override to disable Clock Gating */
810 vce_v4_0_override_vce_clock_gating(adev, true);
811
812 /* This function enables MGCG which is controlled by firmware.
813 With the clocks in the gated state the core is still
814 accessible but the firmware will throttle the clocks on the
815 fly as necessary.
816 */
817 if (gated) {
818 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
819 data |= 0x1ff;
820 data &= ~0xef0000;
821 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
822
823 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
824 data |= 0x3ff000;
825 data &= ~0xffc00000;
826 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
827
828 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
829 data |= 0x2;
830 data &= ~0x00010000;
831 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
832
833 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
834 data |= 0x37f;
835 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
836
837 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
838 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
839 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
840 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
841 0x8;
842 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
843 } else {
844 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
845 data &= ~0x80010;
846 data |= 0xe70008;
847 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
848
849 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
850 data |= 0xffc00000;
851 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
852
853 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
854 data |= 0x10000;
855 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
856
857 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
858 data &= ~0xffc00000;
859 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
860
861 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
862 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
863 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
864 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
865 0x8);
866 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
867 }
868 vce_v4_0_override_vce_clock_gating(adev, false);
869}
870
871static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
872{
873 u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
874
875 if (enable)
876 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
877 else
878 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
879
880 WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
881}
882
883static int vce_v4_0_set_clockgating_state(void *handle,
884 enum amd_clockgating_state state)
885{
886 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
887 bool enable = (state == AMD_CG_STATE_GATE);
888 int i;
889
890 if ((adev->asic_type == CHIP_POLARIS10) ||
891 (adev->asic_type == CHIP_TONGA) ||
892 (adev->asic_type == CHIP_FIJI))
893 vce_v4_0_set_bypass_mode(adev, enable);
894
895 if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
896 return 0;
897
898 mutex_lock(&adev->grbm_idx_mutex);
899 for (i = 0; i < 2; i++) {
900 /* Program VCE Instance 0 or 1 if not harvested */
901 if (adev->vce.harvest_config & (1 << i))
902 continue;
903
904 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
905
906 if (enable) {
907 /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
908 uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
909 data &= ~(0xf | 0xff0);
910 data |= ((0x0 << 0) | (0x04 << 4));
911 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
912
913 /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
914 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
915 data &= ~(0xf | 0xff0);
916 data |= ((0x0 << 0) | (0x04 << 4));
917 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
918 }
919
920 vce_v4_0_set_vce_sw_clock_gating(adev, enable);
921 }
922
923 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
924 mutex_unlock(&adev->grbm_idx_mutex);
925
926 return 0;
927}
928#endif
929
930static int vce_v4_0_set_powergating_state(void *handle,
931 enum amd_powergating_state state)
932{
933 /* This doesn't actually powergate the VCE block.
934 * That's done in the dpm code via the SMC. This
935 * just re-inits the block as necessary. The actual
936 * gating still happens in the dpm code. We should
937 * revisit this when there is a cleaner line between
938 * the smc and the hw blocks
939 */
940 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
941
942 if (state == AMD_PG_STATE_GATE)
943 return vce_v4_0_stop(adev);
944 else
945 return vce_v4_0_start(adev);
946}
947
948static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
949 struct amdgpu_ib *ib, uint32_t flags)
950{
951 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
952
953 amdgpu_ring_write(ring, VCE_CMD_IB_VM);
954 amdgpu_ring_write(ring, vmid);
955 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
956 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
957 amdgpu_ring_write(ring, ib->length_dw);
958}
959
960static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
961 u64 seq, unsigned flags)
962{
963 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
964
965 amdgpu_ring_write(ring, VCE_CMD_FENCE);
966 amdgpu_ring_write(ring, addr);
967 amdgpu_ring_write(ring, upper_32_bits(addr));
968 amdgpu_ring_write(ring, seq);
969 amdgpu_ring_write(ring, VCE_CMD_TRAP);
970}
971
972static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
973{
974 amdgpu_ring_write(ring, VCE_CMD_END);
975}
976
977static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
978 uint32_t val, uint32_t mask)
979{
980 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
981 amdgpu_ring_write(ring, reg << 2);
982 amdgpu_ring_write(ring, mask);
983 amdgpu_ring_write(ring, val);
984}
985
986static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
987 unsigned int vmid, uint64_t pd_addr)
988{
989 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
990
991 pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
992
993 /* wait for reg writes */
994 vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 +
995 vmid * hub->ctx_addr_distance,
996 lower_32_bits(pd_addr), 0xffffffff);
997}
998
999static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
1000 uint32_t reg, uint32_t val)
1001{
1002 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1003 amdgpu_ring_write(ring, reg << 2);
1004 amdgpu_ring_write(ring, val);
1005}
1006
1007static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1008 struct amdgpu_irq_src *source,
1009 unsigned type,
1010 enum amdgpu_interrupt_state state)
1011{
1012 uint32_t val = 0;
1013
1014 if (!amdgpu_sriov_vf(adev)) {
1015 if (state == AMDGPU_IRQ_STATE_ENABLE)
1016 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1017
1018 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1019 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1020 }
1021 return 0;
1022}
1023
1024static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1025 struct amdgpu_irq_src *source,
1026 struct amdgpu_iv_entry *entry)
1027{
1028 DRM_DEBUG("IH: VCE\n");
1029
1030 switch (entry->src_data[0]) {
1031 case 0:
1032 case 1:
1033 case 2:
1034 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1035 break;
1036 default:
1037 DRM_ERROR("Unhandled interrupt: %d %d\n",
1038 entry->src_id, entry->src_data[0]);
1039 break;
1040 }
1041
1042 return 0;
1043}
1044
1045const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1046 .name = "vce_v4_0",
1047 .early_init = vce_v4_0_early_init,
1048 .late_init = NULL,
1049 .sw_init = vce_v4_0_sw_init,
1050 .sw_fini = vce_v4_0_sw_fini,
1051 .hw_init = vce_v4_0_hw_init,
1052 .hw_fini = vce_v4_0_hw_fini,
1053 .suspend = vce_v4_0_suspend,
1054 .resume = vce_v4_0_resume,
1055 .is_idle = NULL /* vce_v4_0_is_idle */,
1056 .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1057 .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1058 .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1059 .soft_reset = NULL /* vce_v4_0_soft_reset */,
1060 .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1061 .set_clockgating_state = vce_v4_0_set_clockgating_state,
1062 .set_powergating_state = vce_v4_0_set_powergating_state,
1063};
1064
1065static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1066 .type = AMDGPU_RING_TYPE_VCE,
1067 .align_mask = 0x3f,
1068 .nop = VCE_CMD_NO_OP,
1069 .support_64bit_ptrs = false,
1070 .no_user_fence = true,
1071 .vmhub = AMDGPU_MMHUB_0,
1072 .get_rptr = vce_v4_0_ring_get_rptr,
1073 .get_wptr = vce_v4_0_ring_get_wptr,
1074 .set_wptr = vce_v4_0_ring_set_wptr,
1075 .parse_cs = amdgpu_vce_ring_parse_cs_vm,
1076 .emit_frame_size =
1077 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1078 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1079 4 + /* vce_v4_0_emit_vm_flush */
1080 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1081 1, /* vce_v4_0_ring_insert_end */
1082 .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1083 .emit_ib = vce_v4_0_ring_emit_ib,
1084 .emit_vm_flush = vce_v4_0_emit_vm_flush,
1085 .emit_fence = vce_v4_0_ring_emit_fence,
1086 .test_ring = amdgpu_vce_ring_test_ring,
1087 .test_ib = amdgpu_vce_ring_test_ib,
1088 .insert_nop = amdgpu_ring_insert_nop,
1089 .insert_end = vce_v4_0_ring_insert_end,
1090 .pad_ib = amdgpu_ring_generic_pad_ib,
1091 .begin_use = amdgpu_vce_ring_begin_use,
1092 .end_use = amdgpu_vce_ring_end_use,
1093 .emit_wreg = vce_v4_0_emit_wreg,
1094 .emit_reg_wait = vce_v4_0_emit_reg_wait,
1095 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1096};
1097
1098static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1099{
1100 int i;
1101
1102 for (i = 0; i < adev->vce.num_rings; i++) {
1103 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1104 adev->vce.ring[i].me = i;
1105 }
1106 DRM_INFO("VCE enabled in VM mode\n");
1107}
1108
1109static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1110 .set = vce_v4_0_set_interrupt_state,
1111 .process = vce_v4_0_process_interrupt,
1112};
1113
1114static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1115{
1116 adev->vce.irq.num_types = 1;
1117 adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1118};
1119
1120const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1121{
1122 .type = AMD_IP_BLOCK_TYPE_VCE,
1123 .major = 4,
1124 .minor = 0,
1125 .rev = 0,
1126 .funcs = &vce_v4_0_ip_funcs,
1127};
1/*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * The above copyright notice and this permission notice (including the
22 * next paragraph) shall be included in all copies or substantial portions
23 * of the Software.
24 *
25 */
26
27#include <linux/firmware.h>
28#include <drm/drm_drv.h>
29
30#include "amdgpu.h"
31#include "amdgpu_vce.h"
32#include "soc15.h"
33#include "soc15d.h"
34#include "soc15_common.h"
35#include "mmsch_v1_0.h"
36
37#include "vce/vce_4_0_offset.h"
38#include "vce/vce_4_0_default.h"
39#include "vce/vce_4_0_sh_mask.h"
40#include "mmhub/mmhub_1_0_offset.h"
41#include "mmhub/mmhub_1_0_sh_mask.h"
42
43#include "ivsrcid/vce/irqsrcs_vce_4_0.h"
44
45#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02
46
47#define VCE_V4_0_FW_SIZE (384 * 1024)
48#define VCE_V4_0_STACK_SIZE (64 * 1024)
49#define VCE_V4_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
50
51static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
52static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
53static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
54
55/**
56 * vce_v4_0_ring_get_rptr - get read pointer
57 *
58 * @ring: amdgpu_ring pointer
59 *
60 * Returns the current hardware read pointer
61 */
62static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
63{
64 struct amdgpu_device *adev = ring->adev;
65
66 if (ring->me == 0)
67 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
68 else if (ring->me == 1)
69 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
70 else
71 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
72}
73
74/**
75 * vce_v4_0_ring_get_wptr - get write pointer
76 *
77 * @ring: amdgpu_ring pointer
78 *
79 * Returns the current hardware write pointer
80 */
81static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
82{
83 struct amdgpu_device *adev = ring->adev;
84
85 if (ring->use_doorbell)
86 return adev->wb.wb[ring->wptr_offs];
87
88 if (ring->me == 0)
89 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
90 else if (ring->me == 1)
91 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
92 else
93 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
94}
95
96/**
97 * vce_v4_0_ring_set_wptr - set write pointer
98 *
99 * @ring: amdgpu_ring pointer
100 *
101 * Commits the write pointer to the hardware
102 */
103static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
104{
105 struct amdgpu_device *adev = ring->adev;
106
107 if (ring->use_doorbell) {
108 /* XXX check if swapping is necessary on BE */
109 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
110 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
111 return;
112 }
113
114 if (ring->me == 0)
115 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
116 lower_32_bits(ring->wptr));
117 else if (ring->me == 1)
118 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
119 lower_32_bits(ring->wptr));
120 else
121 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
122 lower_32_bits(ring->wptr));
123}
124
125static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
126{
127 int i, j;
128
129 for (i = 0; i < 10; ++i) {
130 for (j = 0; j < 100; ++j) {
131 uint32_t status =
132 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
133
134 if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
135 return 0;
136 mdelay(10);
137 }
138
139 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
140 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
141 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
142 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
143 mdelay(10);
144 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
145 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
146 mdelay(10);
147
148 }
149
150 return -ETIMEDOUT;
151}
152
153static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
154 struct amdgpu_mm_table *table)
155{
156 uint32_t data = 0, loop;
157 uint64_t addr = table->gpu_addr;
158 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
159 uint32_t size;
160
161 size = header->header_size + header->vce_table_size + header->uvd_table_size;
162
163 /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
164 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
165 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
166
167 /* 2, update vmid of descriptor */
168 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
169 data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
170 data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
171 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
172
173 /* 3, notify mmsch about the size of this descriptor */
174 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
175
176 /* 4, set resp to zero */
177 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
178
179 WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
180 adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
181 adev->vce.ring[0].wptr = 0;
182 adev->vce.ring[0].wptr_old = 0;
183
184 /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
185 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
186
187 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
188 loop = 1000;
189 while ((data & 0x10000002) != 0x10000002) {
190 udelay(10);
191 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
192 loop--;
193 if (!loop)
194 break;
195 }
196
197 if (!loop) {
198 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
199 return -EBUSY;
200 }
201
202 return 0;
203}
204
205static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
206{
207 struct amdgpu_ring *ring;
208 uint32_t offset, size;
209 uint32_t table_size = 0;
210 struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
211 struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
212 struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
213 struct mmsch_v1_0_cmd_end end = { { 0 } };
214 uint32_t *init_table = adev->virt.mm_table.cpu_addr;
215 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
216
217 direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
218 direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
219 direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
220 end.cmd_header.command_type = MMSCH_COMMAND__END;
221
222 if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
223 header->version = MMSCH_VERSION;
224 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
225
226 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
227 header->vce_table_offset = header->header_size;
228 else
229 header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
230
231 init_table += header->vce_table_offset;
232
233 ring = &adev->vce.ring[0];
234 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
235 lower_32_bits(ring->gpu_addr));
236 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
237 upper_32_bits(ring->gpu_addr));
238 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
239 ring->ring_size / 4);
240
241 /* BEGING OF MC_RESUME */
242 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
243 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
244 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
245 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
246 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
247
248 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
249 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
250 uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
251 uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi;
252 uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low;
253
254 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
255 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8);
256 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
257 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
258 (tmr_mc_addr >> 40) & 0xff);
259 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
260 } else {
261 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
262 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
263 adev->vce.gpu_addr >> 8);
264 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
265 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
266 (adev->vce.gpu_addr >> 40) & 0xff);
267 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
268 offset & ~0x0f000000);
269
270 }
271 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
272 mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
273 adev->vce.gpu_addr >> 8);
274 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
275 mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
276 (adev->vce.gpu_addr >> 40) & 0xff);
277 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
278 mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
279 adev->vce.gpu_addr >> 8);
280 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
281 mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
282 (adev->vce.gpu_addr >> 40) & 0xff);
283
284 size = VCE_V4_0_FW_SIZE;
285 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
286
287 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
288 size = VCE_V4_0_STACK_SIZE;
289 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
290 (offset & ~0x0f000000) | (1 << 24));
291 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
292
293 offset += size;
294 size = VCE_V4_0_DATA_SIZE;
295 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
296 (offset & ~0x0f000000) | (2 << 24));
297 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
298
299 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
300 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
301 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
302 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
303
304 /* end of MC_RESUME */
305 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
306 VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
307 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
308 ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
309 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
310 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
311
312 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
313 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
314 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
315
316 /* clear BUSY flag */
317 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
318 ~VCE_STATUS__JOB_BUSY_MASK, 0);
319
320 /* add end packet */
321 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
322 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
323 header->vce_table_size = table_size;
324 }
325
326 return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
327}
328
329/**
330 * vce_v4_0_start - start VCE block
331 *
332 * @adev: amdgpu_device pointer
333 *
334 * Setup and start the VCE block
335 */
336static int vce_v4_0_start(struct amdgpu_device *adev)
337{
338 struct amdgpu_ring *ring;
339 int r;
340
341 ring = &adev->vce.ring[0];
342
343 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
344 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
345 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
346 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
347 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
348
349 ring = &adev->vce.ring[1];
350
351 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
352 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
353 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
354 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
355 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
356
357 ring = &adev->vce.ring[2];
358
359 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
360 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
361 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
362 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
363 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
364
365 vce_v4_0_mc_resume(adev);
366 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
367 ~VCE_STATUS__JOB_BUSY_MASK);
368
369 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
370
371 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
372 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
373 mdelay(100);
374
375 r = vce_v4_0_firmware_loaded(adev);
376
377 /* clear BUSY flag */
378 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
379
380 if (r) {
381 DRM_ERROR("VCE not responding, giving up!!!\n");
382 return r;
383 }
384
385 return 0;
386}
387
388static int vce_v4_0_stop(struct amdgpu_device *adev)
389{
390
391 /* Disable VCPU */
392 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
393
394 /* hold on ECPU */
395 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
396 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
397 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
398
399 /* clear VCE_STATUS */
400 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0);
401
402 /* Set Clock-Gating off */
403 /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
404 vce_v4_0_set_vce_sw_clock_gating(adev, false);
405 */
406
407 return 0;
408}
409
410static int vce_v4_0_early_init(void *handle)
411{
412 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
413
414 if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
415 adev->vce.num_rings = 1;
416 else
417 adev->vce.num_rings = 3;
418
419 vce_v4_0_set_ring_funcs(adev);
420 vce_v4_0_set_irq_funcs(adev);
421
422 return 0;
423}
424
425static int vce_v4_0_sw_init(void *handle)
426{
427 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
428 struct amdgpu_ring *ring;
429
430 unsigned size;
431 int r, i;
432
433 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
434 if (r)
435 return r;
436
437 size = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
438 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
439 size += VCE_V4_0_FW_SIZE;
440
441 r = amdgpu_vce_sw_init(adev, size);
442 if (r)
443 return r;
444
445 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
446 const struct common_firmware_header *hdr;
447 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
448
449 adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
450 if (!adev->vce.saved_bo)
451 return -ENOMEM;
452
453 hdr = (const struct common_firmware_header *)adev->vce.fw->data;
454 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
455 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
456 adev->firmware.fw_size +=
457 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
458 DRM_INFO("PSP loading VCE firmware\n");
459 } else {
460 r = amdgpu_vce_resume(adev);
461 if (r)
462 return r;
463 }
464
465 for (i = 0; i < adev->vce.num_rings; i++) {
466 ring = &adev->vce.ring[i];
467 sprintf(ring->name, "vce%d", i);
468 if (amdgpu_sriov_vf(adev)) {
469 /* DOORBELL only works under SRIOV */
470 ring->use_doorbell = true;
471
472 /* currently only use the first encoding ring for sriov,
473 * so set unused location for other unused rings.
474 */
475 if (i == 0)
476 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2;
477 else
478 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1;
479 }
480 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
481 AMDGPU_RING_PRIO_DEFAULT, NULL);
482 if (r)
483 return r;
484 }
485
486
487 r = amdgpu_vce_entity_init(adev);
488 if (r)
489 return r;
490
491 r = amdgpu_virt_alloc_mm_table(adev);
492 if (r)
493 return r;
494
495 return r;
496}
497
498static int vce_v4_0_sw_fini(void *handle)
499{
500 int r;
501 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
502
503 /* free MM table */
504 amdgpu_virt_free_mm_table(adev);
505
506 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
507 kvfree(adev->vce.saved_bo);
508 adev->vce.saved_bo = NULL;
509 }
510
511 r = amdgpu_vce_suspend(adev);
512 if (r)
513 return r;
514
515 return amdgpu_vce_sw_fini(adev);
516}
517
518static int vce_v4_0_hw_init(void *handle)
519{
520 int r, i;
521 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
522
523 if (amdgpu_sriov_vf(adev))
524 r = vce_v4_0_sriov_start(adev);
525 else
526 r = vce_v4_0_start(adev);
527 if (r)
528 return r;
529
530 for (i = 0; i < adev->vce.num_rings; i++) {
531 r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
532 if (r)
533 return r;
534 }
535
536 DRM_INFO("VCE initialized successfully.\n");
537
538 return 0;
539}
540
541static int vce_v4_0_hw_fini(void *handle)
542{
543 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
544
545 if (!amdgpu_sriov_vf(adev)) {
546 /* vce_v4_0_wait_for_idle(handle); */
547 vce_v4_0_stop(adev);
548 } else {
549 /* full access mode, so don't touch any VCE register */
550 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
551 }
552
553 return 0;
554}
555
556static int vce_v4_0_suspend(void *handle)
557{
558 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
559 int r, idx;
560
561 if (adev->vce.vcpu_bo == NULL)
562 return 0;
563
564 if (drm_dev_enter(&adev->ddev, &idx)) {
565 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
566 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
567 void *ptr = adev->vce.cpu_addr;
568
569 memcpy_fromio(adev->vce.saved_bo, ptr, size);
570 }
571 drm_dev_exit(idx);
572 }
573
574 r = vce_v4_0_hw_fini(adev);
575 if (r)
576 return r;
577
578 return amdgpu_vce_suspend(adev);
579}
580
581static int vce_v4_0_resume(void *handle)
582{
583 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
584 int r, idx;
585
586 if (adev->vce.vcpu_bo == NULL)
587 return -EINVAL;
588
589 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
590
591 if (drm_dev_enter(&adev->ddev, &idx)) {
592 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
593 void *ptr = adev->vce.cpu_addr;
594
595 memcpy_toio(ptr, adev->vce.saved_bo, size);
596 drm_dev_exit(idx);
597 }
598 } else {
599 r = amdgpu_vce_resume(adev);
600 if (r)
601 return r;
602 }
603
604 return vce_v4_0_hw_init(adev);
605}
606
607static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
608{
609 uint32_t offset, size;
610 uint64_t tmr_mc_addr;
611
612 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
613 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
614 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
615 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
616
617 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
618 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
619 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
620 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
621 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
622
623 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
624
625 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
626 tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 |
627 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
628 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
629 (tmr_mc_addr >> 8));
630 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
631 (tmr_mc_addr >> 40) & 0xff);
632 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
633 } else {
634 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
635 (adev->vce.gpu_addr >> 8));
636 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
637 (adev->vce.gpu_addr >> 40) & 0xff);
638 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
639 }
640
641 size = VCE_V4_0_FW_SIZE;
642 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
643
644 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
645 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
646 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
647 size = VCE_V4_0_STACK_SIZE;
648 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
649 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
650
651 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
652 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
653 offset += size;
654 size = VCE_V4_0_DATA_SIZE;
655 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
656 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
657
658 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
659 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
660 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
661 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
662}
663
664static int vce_v4_0_set_clockgating_state(void *handle,
665 enum amd_clockgating_state state)
666{
667 /* needed for driver unload*/
668 return 0;
669}
670
671#if 0
672static bool vce_v4_0_is_idle(void *handle)
673{
674 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
675 u32 mask = 0;
676
677 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
678 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
679
680 return !(RREG32(mmSRBM_STATUS2) & mask);
681}
682
683static int vce_v4_0_wait_for_idle(void *handle)
684{
685 unsigned i;
686 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
687
688 for (i = 0; i < adev->usec_timeout; i++)
689 if (vce_v4_0_is_idle(handle))
690 return 0;
691
692 return -ETIMEDOUT;
693}
694
695#define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */
696#define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */
697#define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */
698#define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
699 VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
700
701static bool vce_v4_0_check_soft_reset(void *handle)
702{
703 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
704 u32 srbm_soft_reset = 0;
705
706 /* According to VCE team , we should use VCE_STATUS instead
707 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
708 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
709 * instance's registers are accessed
710 * (0 for 1st instance, 10 for 2nd instance).
711 *
712 *VCE_STATUS
713 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB |
714 *|----+----+-----------+----+----+----+----------+---------+----|
715 *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0|
716 *
717 * VCE team suggest use bit 3--bit 6 for busy status check
718 */
719 mutex_lock(&adev->grbm_idx_mutex);
720 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
721 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
722 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
723 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
724 }
725 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
726 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
727 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
728 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
729 }
730 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
731 mutex_unlock(&adev->grbm_idx_mutex);
732
733 if (srbm_soft_reset) {
734 adev->vce.srbm_soft_reset = srbm_soft_reset;
735 return true;
736 } else {
737 adev->vce.srbm_soft_reset = 0;
738 return false;
739 }
740}
741
742static int vce_v4_0_soft_reset(void *handle)
743{
744 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
745 u32 srbm_soft_reset;
746
747 if (!adev->vce.srbm_soft_reset)
748 return 0;
749 srbm_soft_reset = adev->vce.srbm_soft_reset;
750
751 if (srbm_soft_reset) {
752 u32 tmp;
753
754 tmp = RREG32(mmSRBM_SOFT_RESET);
755 tmp |= srbm_soft_reset;
756 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
757 WREG32(mmSRBM_SOFT_RESET, tmp);
758 tmp = RREG32(mmSRBM_SOFT_RESET);
759
760 udelay(50);
761
762 tmp &= ~srbm_soft_reset;
763 WREG32(mmSRBM_SOFT_RESET, tmp);
764 tmp = RREG32(mmSRBM_SOFT_RESET);
765
766 /* Wait a little for things to settle down */
767 udelay(50);
768 }
769
770 return 0;
771}
772
773static int vce_v4_0_pre_soft_reset(void *handle)
774{
775 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
776
777 if (!adev->vce.srbm_soft_reset)
778 return 0;
779
780 mdelay(5);
781
782 return vce_v4_0_suspend(adev);
783}
784
785
786static int vce_v4_0_post_soft_reset(void *handle)
787{
788 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
789
790 if (!adev->vce.srbm_soft_reset)
791 return 0;
792
793 mdelay(5);
794
795 return vce_v4_0_resume(adev);
796}
797
798static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
799{
800 u32 tmp, data;
801
802 tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
803 if (override)
804 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
805 else
806 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
807
808 if (tmp != data)
809 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
810}
811
812static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
813 bool gated)
814{
815 u32 data;
816
817 /* Set Override to disable Clock Gating */
818 vce_v4_0_override_vce_clock_gating(adev, true);
819
820 /* This function enables MGCG which is controlled by firmware.
821 With the clocks in the gated state the core is still
822 accessible but the firmware will throttle the clocks on the
823 fly as necessary.
824 */
825 if (gated) {
826 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
827 data |= 0x1ff;
828 data &= ~0xef0000;
829 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
830
831 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
832 data |= 0x3ff000;
833 data &= ~0xffc00000;
834 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
835
836 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
837 data |= 0x2;
838 data &= ~0x00010000;
839 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
840
841 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
842 data |= 0x37f;
843 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
844
845 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
846 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
847 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
848 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
849 0x8;
850 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
851 } else {
852 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
853 data &= ~0x80010;
854 data |= 0xe70008;
855 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
856
857 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
858 data |= 0xffc00000;
859 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
860
861 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
862 data |= 0x10000;
863 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
864
865 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
866 data &= ~0xffc00000;
867 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
868
869 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
870 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
871 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
872 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
873 0x8);
874 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
875 }
876 vce_v4_0_override_vce_clock_gating(adev, false);
877}
878
879static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
880{
881 u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
882
883 if (enable)
884 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
885 else
886 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
887
888 WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
889}
890
891static int vce_v4_0_set_clockgating_state(void *handle,
892 enum amd_clockgating_state state)
893{
894 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
895 bool enable = (state == AMD_CG_STATE_GATE);
896 int i;
897
898 if ((adev->asic_type == CHIP_POLARIS10) ||
899 (adev->asic_type == CHIP_TONGA) ||
900 (adev->asic_type == CHIP_FIJI))
901 vce_v4_0_set_bypass_mode(adev, enable);
902
903 if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
904 return 0;
905
906 mutex_lock(&adev->grbm_idx_mutex);
907 for (i = 0; i < 2; i++) {
908 /* Program VCE Instance 0 or 1 if not harvested */
909 if (adev->vce.harvest_config & (1 << i))
910 continue;
911
912 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
913
914 if (enable) {
915 /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
916 uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
917 data &= ~(0xf | 0xff0);
918 data |= ((0x0 << 0) | (0x04 << 4));
919 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
920
921 /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
922 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
923 data &= ~(0xf | 0xff0);
924 data |= ((0x0 << 0) | (0x04 << 4));
925 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
926 }
927
928 vce_v4_0_set_vce_sw_clock_gating(adev, enable);
929 }
930
931 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
932 mutex_unlock(&adev->grbm_idx_mutex);
933
934 return 0;
935}
936#endif
937
938static int vce_v4_0_set_powergating_state(void *handle,
939 enum amd_powergating_state state)
940{
941 /* This doesn't actually powergate the VCE block.
942 * That's done in the dpm code via the SMC. This
943 * just re-inits the block as necessary. The actual
944 * gating still happens in the dpm code. We should
945 * revisit this when there is a cleaner line between
946 * the smc and the hw blocks
947 */
948 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
949
950 if (state == AMD_PG_STATE_GATE)
951 return vce_v4_0_stop(adev);
952 else
953 return vce_v4_0_start(adev);
954}
955
956static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
957 struct amdgpu_ib *ib, uint32_t flags)
958{
959 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
960
961 amdgpu_ring_write(ring, VCE_CMD_IB_VM);
962 amdgpu_ring_write(ring, vmid);
963 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
964 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
965 amdgpu_ring_write(ring, ib->length_dw);
966}
967
968static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
969 u64 seq, unsigned flags)
970{
971 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
972
973 amdgpu_ring_write(ring, VCE_CMD_FENCE);
974 amdgpu_ring_write(ring, addr);
975 amdgpu_ring_write(ring, upper_32_bits(addr));
976 amdgpu_ring_write(ring, seq);
977 amdgpu_ring_write(ring, VCE_CMD_TRAP);
978}
979
980static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
981{
982 amdgpu_ring_write(ring, VCE_CMD_END);
983}
984
985static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
986 uint32_t val, uint32_t mask)
987{
988 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
989 amdgpu_ring_write(ring, reg << 2);
990 amdgpu_ring_write(ring, mask);
991 amdgpu_ring_write(ring, val);
992}
993
994static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
995 unsigned int vmid, uint64_t pd_addr)
996{
997 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
998
999 pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1000
1001 /* wait for reg writes */
1002 vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 +
1003 vmid * hub->ctx_addr_distance,
1004 lower_32_bits(pd_addr), 0xffffffff);
1005}
1006
1007static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
1008 uint32_t reg, uint32_t val)
1009{
1010 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1011 amdgpu_ring_write(ring, reg << 2);
1012 amdgpu_ring_write(ring, val);
1013}
1014
1015static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1016 struct amdgpu_irq_src *source,
1017 unsigned type,
1018 enum amdgpu_interrupt_state state)
1019{
1020 uint32_t val = 0;
1021
1022 if (!amdgpu_sriov_vf(adev)) {
1023 if (state == AMDGPU_IRQ_STATE_ENABLE)
1024 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1025
1026 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1027 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1028 }
1029 return 0;
1030}
1031
1032static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1033 struct amdgpu_irq_src *source,
1034 struct amdgpu_iv_entry *entry)
1035{
1036 DRM_DEBUG("IH: VCE\n");
1037
1038 switch (entry->src_data[0]) {
1039 case 0:
1040 case 1:
1041 case 2:
1042 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1043 break;
1044 default:
1045 DRM_ERROR("Unhandled interrupt: %d %d\n",
1046 entry->src_id, entry->src_data[0]);
1047 break;
1048 }
1049
1050 return 0;
1051}
1052
1053const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1054 .name = "vce_v4_0",
1055 .early_init = vce_v4_0_early_init,
1056 .late_init = NULL,
1057 .sw_init = vce_v4_0_sw_init,
1058 .sw_fini = vce_v4_0_sw_fini,
1059 .hw_init = vce_v4_0_hw_init,
1060 .hw_fini = vce_v4_0_hw_fini,
1061 .suspend = vce_v4_0_suspend,
1062 .resume = vce_v4_0_resume,
1063 .is_idle = NULL /* vce_v4_0_is_idle */,
1064 .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1065 .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1066 .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1067 .soft_reset = NULL /* vce_v4_0_soft_reset */,
1068 .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1069 .set_clockgating_state = vce_v4_0_set_clockgating_state,
1070 .set_powergating_state = vce_v4_0_set_powergating_state,
1071};
1072
1073static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1074 .type = AMDGPU_RING_TYPE_VCE,
1075 .align_mask = 0x3f,
1076 .nop = VCE_CMD_NO_OP,
1077 .support_64bit_ptrs = false,
1078 .no_user_fence = true,
1079 .vmhub = AMDGPU_MMHUB_0,
1080 .get_rptr = vce_v4_0_ring_get_rptr,
1081 .get_wptr = vce_v4_0_ring_get_wptr,
1082 .set_wptr = vce_v4_0_ring_set_wptr,
1083 .parse_cs = amdgpu_vce_ring_parse_cs_vm,
1084 .emit_frame_size =
1085 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1086 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1087 4 + /* vce_v4_0_emit_vm_flush */
1088 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1089 1, /* vce_v4_0_ring_insert_end */
1090 .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1091 .emit_ib = vce_v4_0_ring_emit_ib,
1092 .emit_vm_flush = vce_v4_0_emit_vm_flush,
1093 .emit_fence = vce_v4_0_ring_emit_fence,
1094 .test_ring = amdgpu_vce_ring_test_ring,
1095 .test_ib = amdgpu_vce_ring_test_ib,
1096 .insert_nop = amdgpu_ring_insert_nop,
1097 .insert_end = vce_v4_0_ring_insert_end,
1098 .pad_ib = amdgpu_ring_generic_pad_ib,
1099 .begin_use = amdgpu_vce_ring_begin_use,
1100 .end_use = amdgpu_vce_ring_end_use,
1101 .emit_wreg = vce_v4_0_emit_wreg,
1102 .emit_reg_wait = vce_v4_0_emit_reg_wait,
1103 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1104};
1105
1106static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1107{
1108 int i;
1109
1110 for (i = 0; i < adev->vce.num_rings; i++) {
1111 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1112 adev->vce.ring[i].me = i;
1113 }
1114 DRM_INFO("VCE enabled in VM mode\n");
1115}
1116
1117static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1118 .set = vce_v4_0_set_interrupt_state,
1119 .process = vce_v4_0_process_interrupt,
1120};
1121
1122static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1123{
1124 adev->vce.irq.num_types = 1;
1125 adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1126};
1127
1128const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1129{
1130 .type = AMD_IP_BLOCK_TYPE_VCE,
1131 .major = 4,
1132 .minor = 0,
1133 .rev = 0,
1134 .funcs = &vce_v4_0_ip_funcs,
1135};