Loading...
1/*
2 * Copyright 2019 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24#include <linux/firmware.h>
25#include <linux/module.h>
26#include "amdgpu.h"
27#include "soc15_common.h"
28#include "nv.h"
29#include "gc/gc_10_1_0_offset.h"
30#include "gc/gc_10_1_0_sh_mask.h"
31#include "v10_structs.h"
32#include "mes_api_def.h"
33
34#define mmCP_MES_IC_OP_CNTL_Sienna_Cichlid 0x2820
35#define mmCP_MES_IC_OP_CNTL_Sienna_Cichlid_BASE_IDX 1
36
37MODULE_FIRMWARE("amdgpu/navi10_mes.bin");
38MODULE_FIRMWARE("amdgpu/sienna_cichlid_mes.bin");
39
40static int mes_v10_1_hw_fini(void *handle);
41
42#define MES_EOP_SIZE 2048
43
44static void mes_v10_1_ring_set_wptr(struct amdgpu_ring *ring)
45{
46 struct amdgpu_device *adev = ring->adev;
47
48 if (ring->use_doorbell) {
49 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs],
50 ring->wptr);
51 WDOORBELL64(ring->doorbell_index, ring->wptr);
52 } else {
53 BUG();
54 }
55}
56
57static u64 mes_v10_1_ring_get_rptr(struct amdgpu_ring *ring)
58{
59 return ring->adev->wb.wb[ring->rptr_offs];
60}
61
62static u64 mes_v10_1_ring_get_wptr(struct amdgpu_ring *ring)
63{
64 u64 wptr;
65
66 if (ring->use_doorbell)
67 wptr = atomic64_read((atomic64_t *)
68 &ring->adev->wb.wb[ring->wptr_offs]);
69 else
70 BUG();
71 return wptr;
72}
73
74static const struct amdgpu_ring_funcs mes_v10_1_ring_funcs = {
75 .type = AMDGPU_RING_TYPE_MES,
76 .align_mask = 1,
77 .nop = 0,
78 .support_64bit_ptrs = true,
79 .get_rptr = mes_v10_1_ring_get_rptr,
80 .get_wptr = mes_v10_1_ring_get_wptr,
81 .set_wptr = mes_v10_1_ring_set_wptr,
82 .insert_nop = amdgpu_ring_insert_nop,
83};
84
85static int mes_v10_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
86 void *pkt, int size)
87{
88 int ndw = size / 4;
89 signed long r;
90 union MESAPI__ADD_QUEUE *x_pkt = pkt;
91 struct amdgpu_device *adev = mes->adev;
92 struct amdgpu_ring *ring = &mes->ring;
93
94 BUG_ON(size % 4 != 0);
95
96 if (amdgpu_ring_alloc(ring, ndw))
97 return -ENOMEM;
98
99 amdgpu_ring_write_multiple(ring, pkt, ndw);
100 amdgpu_ring_commit(ring);
101
102 DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
103
104 r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
105 adev->usec_timeout);
106 if (r < 1) {
107 DRM_ERROR("MES failed to response msg=%d\n",
108 x_pkt->header.opcode);
109 return -ETIMEDOUT;
110 }
111
112 return 0;
113}
114
115static int convert_to_mes_queue_type(int queue_type)
116{
117 if (queue_type == AMDGPU_RING_TYPE_GFX)
118 return MES_QUEUE_TYPE_GFX;
119 else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
120 return MES_QUEUE_TYPE_COMPUTE;
121 else if (queue_type == AMDGPU_RING_TYPE_SDMA)
122 return MES_QUEUE_TYPE_SDMA;
123 else
124 BUG();
125 return -1;
126}
127
128static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes,
129 struct mes_add_queue_input *input)
130{
131 struct amdgpu_device *adev = mes->adev;
132 union MESAPI__ADD_QUEUE mes_add_queue_pkt;
133
134 memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
135
136 mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
137 mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
138 mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
139
140 mes_add_queue_pkt.process_id = input->process_id;
141 mes_add_queue_pkt.page_table_base_addr =
142 input->page_table_base_addr - adev->gmc.vram_start;
143 mes_add_queue_pkt.process_va_start = input->process_va_start;
144 mes_add_queue_pkt.process_va_end = input->process_va_end;
145 mes_add_queue_pkt.process_quantum = input->process_quantum;
146 mes_add_queue_pkt.process_context_addr = input->process_context_addr;
147 mes_add_queue_pkt.gang_quantum = input->gang_quantum;
148 mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
149 mes_add_queue_pkt.inprocess_gang_priority =
150 input->inprocess_gang_priority;
151 mes_add_queue_pkt.gang_global_priority_level =
152 input->gang_global_priority_level;
153 mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
154 mes_add_queue_pkt.mqd_addr = input->mqd_addr;
155 mes_add_queue_pkt.wptr_addr = input->wptr_addr;
156 mes_add_queue_pkt.queue_type =
157 convert_to_mes_queue_type(input->queue_type);
158 mes_add_queue_pkt.paging = input->paging;
159
160 mes_add_queue_pkt.api_status.api_completion_fence_addr =
161 mes->ring.fence_drv.gpu_addr;
162 mes_add_queue_pkt.api_status.api_completion_fence_value =
163 ++mes->ring.fence_drv.sync_seq;
164
165 return mes_v10_1_submit_pkt_and_poll_completion(mes,
166 &mes_add_queue_pkt, sizeof(mes_add_queue_pkt));
167}
168
169static int mes_v10_1_remove_hw_queue(struct amdgpu_mes *mes,
170 struct mes_remove_queue_input *input)
171{
172 union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
173
174 memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
175
176 mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
177 mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
178 mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
179
180 mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
181 mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
182
183 mes_remove_queue_pkt.api_status.api_completion_fence_addr =
184 mes->ring.fence_drv.gpu_addr;
185 mes_remove_queue_pkt.api_status.api_completion_fence_value =
186 ++mes->ring.fence_drv.sync_seq;
187
188 return mes_v10_1_submit_pkt_and_poll_completion(mes,
189 &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt));
190}
191
192static int mes_v10_1_suspend_gang(struct amdgpu_mes *mes,
193 struct mes_suspend_gang_input *input)
194{
195 return 0;
196}
197
198static int mes_v10_1_resume_gang(struct amdgpu_mes *mes,
199 struct mes_resume_gang_input *input)
200{
201 return 0;
202}
203
204static int mes_v10_1_query_sched_status(struct amdgpu_mes *mes)
205{
206 union MESAPI__QUERY_MES_STATUS mes_status_pkt;
207
208 memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
209
210 mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
211 mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
212 mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
213
214 mes_status_pkt.api_status.api_completion_fence_addr =
215 mes->ring.fence_drv.gpu_addr;
216 mes_status_pkt.api_status.api_completion_fence_value =
217 ++mes->ring.fence_drv.sync_seq;
218
219 return mes_v10_1_submit_pkt_and_poll_completion(mes,
220 &mes_status_pkt, sizeof(mes_status_pkt));
221}
222
223static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes)
224{
225 int i;
226 struct amdgpu_device *adev = mes->adev;
227 union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt;
228
229 memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
230
231 mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER;
232 mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC;
233 mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
234
235 mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub;
236 mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
237 mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
238 mes_set_hw_res_pkt.paging_vmid = 0;
239 mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr;
240 mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
241 mes->query_status_fence_gpu_addr;
242
243 for (i = 0; i < MAX_COMPUTE_PIPES; i++)
244 mes_set_hw_res_pkt.compute_hqd_mask[i] =
245 mes->compute_hqd_mask[i];
246
247 for (i = 0; i < MAX_GFX_PIPES; i++)
248 mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i];
249
250 for (i = 0; i < MAX_SDMA_PIPES; i++)
251 mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i];
252
253 for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
254 mes_set_hw_res_pkt.agreegated_doorbells[i] =
255 mes->agreegated_doorbells[i];
256
257 mes_set_hw_res_pkt.api_status.api_completion_fence_addr =
258 mes->ring.fence_drv.gpu_addr;
259 mes_set_hw_res_pkt.api_status.api_completion_fence_value =
260 ++mes->ring.fence_drv.sync_seq;
261
262 return mes_v10_1_submit_pkt_and_poll_completion(mes,
263 &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt));
264}
265
266static const struct amdgpu_mes_funcs mes_v10_1_funcs = {
267 .add_hw_queue = mes_v10_1_add_hw_queue,
268 .remove_hw_queue = mes_v10_1_remove_hw_queue,
269 .suspend_gang = mes_v10_1_suspend_gang,
270 .resume_gang = mes_v10_1_resume_gang,
271};
272
273static int mes_v10_1_init_microcode(struct amdgpu_device *adev)
274{
275 const char *chip_name;
276 char fw_name[30];
277 int err;
278 const struct mes_firmware_header_v1_0 *mes_hdr;
279 struct amdgpu_firmware_info *info;
280
281 switch (adev->asic_type) {
282 case CHIP_NAVI10:
283 chip_name = "navi10";
284 break;
285 case CHIP_SIENNA_CICHLID:
286 chip_name = "sienna_cichlid";
287 break;
288 default:
289 BUG();
290 }
291
292 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes.bin", chip_name);
293 err = request_firmware(&adev->mes.fw, fw_name, adev->dev);
294 if (err)
295 return err;
296
297 err = amdgpu_ucode_validate(adev->mes.fw);
298 if (err) {
299 release_firmware(adev->mes.fw);
300 adev->mes.fw = NULL;
301 return err;
302 }
303
304 mes_hdr = (const struct mes_firmware_header_v1_0 *)adev->mes.fw->data;
305 adev->mes.ucode_fw_version = le32_to_cpu(mes_hdr->mes_ucode_version);
306 adev->mes.ucode_fw_version =
307 le32_to_cpu(mes_hdr->mes_ucode_data_version);
308 adev->mes.uc_start_addr =
309 le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) |
310 ((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32);
311 adev->mes.data_start_addr =
312 le32_to_cpu(mes_hdr->mes_data_start_addr_lo) |
313 ((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32);
314
315 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
316 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MES];
317 info->ucode_id = AMDGPU_UCODE_ID_CP_MES;
318 info->fw = adev->mes.fw;
319 adev->firmware.fw_size +=
320 ALIGN(le32_to_cpu(mes_hdr->mes_ucode_size_bytes),
321 PAGE_SIZE);
322
323 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MES_DATA];
324 info->ucode_id = AMDGPU_UCODE_ID_CP_MES_DATA;
325 info->fw = adev->mes.fw;
326 adev->firmware.fw_size +=
327 ALIGN(le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes),
328 PAGE_SIZE);
329 }
330
331 return 0;
332}
333
334static void mes_v10_1_free_microcode(struct amdgpu_device *adev)
335{
336 release_firmware(adev->mes.fw);
337 adev->mes.fw = NULL;
338}
339
340static int mes_v10_1_allocate_ucode_buffer(struct amdgpu_device *adev)
341{
342 int r;
343 const struct mes_firmware_header_v1_0 *mes_hdr;
344 const __le32 *fw_data;
345 unsigned fw_size;
346
347 mes_hdr = (const struct mes_firmware_header_v1_0 *)
348 adev->mes.fw->data;
349
350 fw_data = (const __le32 *)(adev->mes.fw->data +
351 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
352 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
353
354 r = amdgpu_bo_create_reserved(adev, fw_size,
355 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
356 &adev->mes.ucode_fw_obj,
357 &adev->mes.ucode_fw_gpu_addr,
358 (void **)&adev->mes.ucode_fw_ptr);
359 if (r) {
360 dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r);
361 return r;
362 }
363
364 memcpy(adev->mes.ucode_fw_ptr, fw_data, fw_size);
365
366 amdgpu_bo_kunmap(adev->mes.ucode_fw_obj);
367 amdgpu_bo_unreserve(adev->mes.ucode_fw_obj);
368
369 return 0;
370}
371
372static int mes_v10_1_allocate_ucode_data_buffer(struct amdgpu_device *adev)
373{
374 int r;
375 const struct mes_firmware_header_v1_0 *mes_hdr;
376 const __le32 *fw_data;
377 unsigned fw_size;
378
379 mes_hdr = (const struct mes_firmware_header_v1_0 *)
380 adev->mes.fw->data;
381
382 fw_data = (const __le32 *)(adev->mes.fw->data +
383 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
384 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
385
386 r = amdgpu_bo_create_reserved(adev, fw_size,
387 64 * 1024, AMDGPU_GEM_DOMAIN_GTT,
388 &adev->mes.data_fw_obj,
389 &adev->mes.data_fw_gpu_addr,
390 (void **)&adev->mes.data_fw_ptr);
391 if (r) {
392 dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r);
393 return r;
394 }
395
396 memcpy(adev->mes.data_fw_ptr, fw_data, fw_size);
397
398 amdgpu_bo_kunmap(adev->mes.data_fw_obj);
399 amdgpu_bo_unreserve(adev->mes.data_fw_obj);
400
401 return 0;
402}
403
404static void mes_v10_1_free_ucode_buffers(struct amdgpu_device *adev)
405{
406 amdgpu_bo_free_kernel(&adev->mes.data_fw_obj,
407 &adev->mes.data_fw_gpu_addr,
408 (void **)&adev->mes.data_fw_ptr);
409
410 amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj,
411 &adev->mes.ucode_fw_gpu_addr,
412 (void **)&adev->mes.ucode_fw_ptr);
413}
414
415static void mes_v10_1_enable(struct amdgpu_device *adev, bool enable)
416{
417 uint32_t data = 0;
418
419 if (enable) {
420 data = RREG32_SOC15(GC, 0, mmCP_MES_CNTL);
421 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
422 WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
423
424 /* set ucode start address */
425 WREG32_SOC15(GC, 0, mmCP_MES_PRGRM_CNTR_START,
426 (uint32_t)(adev->mes.uc_start_addr) >> 2);
427
428 /* clear BYPASS_UNCACHED to avoid hangs after interrupt. */
429 data = RREG32_SOC15(GC, 0, mmCP_MES_DC_OP_CNTL);
430 data = REG_SET_FIELD(data, CP_MES_DC_OP_CNTL,
431 BYPASS_UNCACHED, 0);
432 WREG32_SOC15(GC, 0, mmCP_MES_DC_OP_CNTL, data);
433
434 /* unhalt MES and activate pipe0 */
435 data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1);
436 WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
437 } else {
438 data = RREG32_SOC15(GC, 0, mmCP_MES_CNTL);
439 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0);
440 data = REG_SET_FIELD(data, CP_MES_CNTL,
441 MES_INVALIDATE_ICACHE, 1);
442 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
443 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1);
444 WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
445 }
446}
447
448/* This function is for backdoor MES firmware */
449static int mes_v10_1_load_microcode(struct amdgpu_device *adev)
450{
451 int r;
452 uint32_t data;
453
454 if (!adev->mes.fw)
455 return -EINVAL;
456
457 r = mes_v10_1_allocate_ucode_buffer(adev);
458 if (r)
459 return r;
460
461 r = mes_v10_1_allocate_ucode_data_buffer(adev);
462 if (r) {
463 mes_v10_1_free_ucode_buffers(adev);
464 return r;
465 }
466
467 mes_v10_1_enable(adev, false);
468
469 WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_CNTL, 0);
470
471 mutex_lock(&adev->srbm_mutex);
472 /* me=3, pipe=0, queue=0 */
473 nv_grbm_select(adev, 3, 0, 0, 0);
474
475 /* set ucode start address */
476 WREG32_SOC15(GC, 0, mmCP_MES_PRGRM_CNTR_START,
477 (uint32_t)(adev->mes.uc_start_addr) >> 2);
478
479 /* set ucode fimrware address */
480 WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_LO,
481 lower_32_bits(adev->mes.ucode_fw_gpu_addr));
482 WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_HI,
483 upper_32_bits(adev->mes.ucode_fw_gpu_addr));
484
485 /* set ucode instruction cache boundary to 2M-1 */
486 WREG32_SOC15(GC, 0, mmCP_MES_MIBOUND_LO, 0x1FFFFF);
487
488 /* set ucode data firmware address */
489 WREG32_SOC15(GC, 0, mmCP_MES_MDBASE_LO,
490 lower_32_bits(adev->mes.data_fw_gpu_addr));
491 WREG32_SOC15(GC, 0, mmCP_MES_MDBASE_HI,
492 upper_32_bits(adev->mes.data_fw_gpu_addr));
493
494 /* Set 0x3FFFF (256K-1) to CP_MES_MDBOUND_LO */
495 WREG32_SOC15(GC, 0, mmCP_MES_MDBOUND_LO, 0x3FFFF);
496
497 /* invalidate ICACHE */
498 switch (adev->asic_type) {
499 case CHIP_SIENNA_CICHLID:
500 data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid);
501 break;
502 default:
503 data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL);
504 break;
505 }
506 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
507 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
508 switch (adev->asic_type) {
509 case CHIP_SIENNA_CICHLID:
510 WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid, data);
511 break;
512 default:
513 WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data);
514 break;
515 }
516
517 /* prime the ICACHE. */
518 switch (adev->asic_type) {
519 case CHIP_SIENNA_CICHLID:
520 data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid);
521 break;
522 default:
523 data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL);
524 break;
525 }
526 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
527 switch (adev->asic_type) {
528 case CHIP_SIENNA_CICHLID:
529 WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid, data);
530 break;
531 default:
532 WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data);
533 break;
534 }
535
536 nv_grbm_select(adev, 0, 0, 0, 0);
537 mutex_unlock(&adev->srbm_mutex);
538
539 return 0;
540}
541
542static int mes_v10_1_allocate_eop_buf(struct amdgpu_device *adev)
543{
544 int r;
545 u32 *eop;
546
547 r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE,
548 AMDGPU_GEM_DOMAIN_GTT,
549 &adev->mes.eop_gpu_obj,
550 &adev->mes.eop_gpu_addr,
551 (void **)&eop);
552 if (r) {
553 dev_warn(adev->dev, "(%d) create EOP bo failed\n", r);
554 return r;
555 }
556
557 memset(eop, 0, adev->mes.eop_gpu_obj->tbo.mem.size);
558
559 amdgpu_bo_kunmap(adev->mes.eop_gpu_obj);
560 amdgpu_bo_unreserve(adev->mes.eop_gpu_obj);
561
562 return 0;
563}
564
565static int mes_v10_1_allocate_mem_slots(struct amdgpu_device *adev)
566{
567 int r;
568
569 r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs);
570 if (r) {
571 dev_err(adev->dev,
572 "(%d) mes sch_ctx_offs wb alloc failed\n", r);
573 return r;
574 }
575 adev->mes.sch_ctx_gpu_addr =
576 adev->wb.gpu_addr + (adev->mes.sch_ctx_offs * 4);
577 adev->mes.sch_ctx_ptr =
578 (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs];
579
580 r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs);
581 if (r) {
582 dev_err(adev->dev,
583 "(%d) query_status_fence_offs wb alloc failed\n", r);
584 return r;
585 }
586 adev->mes.query_status_fence_gpu_addr =
587 adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4);
588 adev->mes.query_status_fence_ptr =
589 (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs];
590
591 return 0;
592}
593
594static int mes_v10_1_mqd_init(struct amdgpu_ring *ring)
595{
596 struct amdgpu_device *adev = ring->adev;
597 struct v10_compute_mqd *mqd = ring->mqd_ptr;
598 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
599 uint32_t tmp;
600
601 mqd->header = 0xC0310800;
602 mqd->compute_pipelinestat_enable = 0x00000001;
603 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
604 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
605 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
606 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
607 mqd->compute_misc_reserved = 0x00000003;
608
609 eop_base_addr = ring->eop_gpu_addr >> 8;
610 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
611 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
612
613 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
614 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
615 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
616 (order_base_2(MES_EOP_SIZE / 4) - 1));
617
618 mqd->cp_hqd_eop_control = tmp;
619
620 /* enable doorbell? */
621 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
622
623 if (ring->use_doorbell) {
624 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
625 DOORBELL_OFFSET, ring->doorbell_index);
626 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
627 DOORBELL_EN, 1);
628 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
629 DOORBELL_SOURCE, 0);
630 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
631 DOORBELL_HIT, 0);
632 }
633 else
634 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
635 DOORBELL_EN, 0);
636
637 mqd->cp_hqd_pq_doorbell_control = tmp;
638
639 /* disable the queue if it's active */
640 ring->wptr = 0;
641 mqd->cp_hqd_dequeue_request = 0;
642 mqd->cp_hqd_pq_rptr = 0;
643 mqd->cp_hqd_pq_wptr_lo = 0;
644 mqd->cp_hqd_pq_wptr_hi = 0;
645
646 /* set the pointer to the MQD */
647 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
648 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
649
650 /* set MQD vmid to 0 */
651 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
652 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
653 mqd->cp_mqd_control = tmp;
654
655 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
656 hqd_gpu_addr = ring->gpu_addr >> 8;
657 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
658 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
659
660 /* set up the HQD, this is similar to CP_RB0_CNTL */
661 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
662 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
663 (order_base_2(ring->ring_size / 4) - 1));
664 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
665 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
666#ifdef __BIG_ENDIAN
667 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
668#endif
669 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
670 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
671 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
672 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
673 mqd->cp_hqd_pq_control = tmp;
674
675 /* set the wb address whether it's enabled or not */
676 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
677 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
678 mqd->cp_hqd_pq_rptr_report_addr_hi =
679 upper_32_bits(wb_gpu_addr) & 0xffff;
680
681 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
682 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
683 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8;
684 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
685
686 tmp = 0;
687 /* enable the doorbell if requested */
688 if (ring->use_doorbell) {
689 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
690 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
691 DOORBELL_OFFSET, ring->doorbell_index);
692
693 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
694 DOORBELL_EN, 1);
695 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
696 DOORBELL_SOURCE, 0);
697 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
698 DOORBELL_HIT, 0);
699 }
700
701 mqd->cp_hqd_pq_doorbell_control = tmp;
702
703 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
704 ring->wptr = 0;
705 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
706
707 /* set the vmid for the queue */
708 mqd->cp_hqd_vmid = 0;
709
710 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
711 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
712 mqd->cp_hqd_persistent_state = tmp;
713
714 /* set MIN_IB_AVAIL_SIZE */
715 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
716 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
717 mqd->cp_hqd_ib_control = tmp;
718
719 /* activate the queue */
720 mqd->cp_hqd_active = 1;
721 return 0;
722}
723
724static void mes_v10_1_queue_init_register(struct amdgpu_ring *ring)
725{
726 struct v10_compute_mqd *mqd = ring->mqd_ptr;
727 struct amdgpu_device *adev = ring->adev;
728 uint32_t data = 0;
729
730 mutex_lock(&adev->srbm_mutex);
731 nv_grbm_select(adev, 3, 0, 0, 0);
732
733 /* set CP_HQD_VMID.VMID = 0. */
734 data = RREG32_SOC15(GC, 0, mmCP_HQD_VMID);
735 data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0);
736 WREG32_SOC15(GC, 0, mmCP_HQD_VMID, data);
737
738 /* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */
739 data = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
740 data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
741 DOORBELL_EN, 0);
742 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, data);
743
744 /* set CP_MQD_BASE_ADDR/HI with the MQD base address */
745 WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
746 WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
747
748 /* set CP_MQD_CONTROL.VMID=0 */
749 data = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
750 data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0);
751 WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, 0);
752
753 /* set CP_HQD_PQ_BASE/HI with the ring buffer base address */
754 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
755 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
756
757 /* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */
758 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
759 mqd->cp_hqd_pq_rptr_report_addr_lo);
760 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
761 mqd->cp_hqd_pq_rptr_report_addr_hi);
762
763 /* set CP_HQD_PQ_CONTROL */
764 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
765
766 /* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */
767 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
768 mqd->cp_hqd_pq_wptr_poll_addr_lo);
769 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
770 mqd->cp_hqd_pq_wptr_poll_addr_hi);
771
772 /* set CP_HQD_PQ_DOORBELL_CONTROL */
773 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
774 mqd->cp_hqd_pq_doorbell_control);
775
776 /* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */
777 WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
778
779 /* set CP_HQD_ACTIVE.ACTIVE=1 */
780 WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
781
782 nv_grbm_select(adev, 0, 0, 0, 0);
783 mutex_unlock(&adev->srbm_mutex);
784}
785
786#if 0
787static int mes_v10_1_kiq_enable_queue(struct amdgpu_device *adev)
788{
789 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
790 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
791 int r;
792
793 if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
794 return -EINVAL;
795
796 r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
797 if (r) {
798 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
799 return r;
800 }
801
802 kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring);
803
804 r = amdgpu_ring_test_ring(kiq_ring);
805 if (r) {
806 DRM_ERROR("kfq enable failed\n");
807 kiq_ring->sched.ready = false;
808 }
809 return r;
810}
811#endif
812
813static int mes_v10_1_queue_init(struct amdgpu_device *adev)
814{
815 int r;
816
817 r = mes_v10_1_mqd_init(&adev->mes.ring);
818 if (r)
819 return r;
820
821#if 0
822 r = mes_v10_1_kiq_enable_queue(adev);
823 if (r)
824 return r;
825#else
826 mes_v10_1_queue_init_register(&adev->mes.ring);
827#endif
828
829 return 0;
830}
831
832static int mes_v10_1_ring_init(struct amdgpu_device *adev)
833{
834 struct amdgpu_ring *ring;
835 int r;
836
837 ring = &adev->mes.ring;
838
839 ring->funcs = &mes_v10_1_ring_funcs;
840
841 ring->me = 3;
842 ring->pipe = 0;
843 ring->queue = 0;
844
845 ring->ring_obj = NULL;
846 ring->use_doorbell = true;
847 ring->doorbell_index = adev->doorbell_index.mes_ring << 1;
848 ring->eop_gpu_addr = adev->mes.eop_gpu_addr;
849 ring->no_scheduler = true;
850 sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue);
851
852 r = amdgpu_ring_init(adev, ring, 1024, NULL, 0, AMDGPU_RING_PRIO_DEFAULT);
853 if (r)
854 return r;
855
856 return 0;
857}
858
859static int mes_v10_1_mqd_sw_init(struct amdgpu_device *adev)
860{
861 int r, mqd_size = sizeof(struct v10_compute_mqd);
862 struct amdgpu_ring *ring = &adev->mes.ring;
863
864 if (ring->mqd_obj)
865 return 0;
866
867 r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
868 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
869 &ring->mqd_gpu_addr, &ring->mqd_ptr);
870 if (r) {
871 dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
872 return r;
873 }
874
875 /* prepare MQD backup */
876 adev->mes.mqd_backup = kmalloc(mqd_size, GFP_KERNEL);
877 if (!adev->mes.mqd_backup)
878 dev_warn(adev->dev,
879 "no memory to create MQD backup for ring %s\n",
880 ring->name);
881
882 return 0;
883}
884
885static int mes_v10_1_sw_init(void *handle)
886{
887 int r;
888 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
889
890 adev->mes.adev = adev;
891 adev->mes.funcs = &mes_v10_1_funcs;
892
893 r = mes_v10_1_init_microcode(adev);
894 if (r)
895 return r;
896
897 r = mes_v10_1_allocate_eop_buf(adev);
898 if (r)
899 return r;
900
901 r = mes_v10_1_mqd_sw_init(adev);
902 if (r)
903 return r;
904
905 r = mes_v10_1_ring_init(adev);
906 if (r)
907 return r;
908
909 r = mes_v10_1_allocate_mem_slots(adev);
910 if (r)
911 return r;
912
913 return 0;
914}
915
916static int mes_v10_1_sw_fini(void *handle)
917{
918 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
919
920 amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
921 amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
922
923 kfree(adev->mes.mqd_backup);
924
925 amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj,
926 &adev->mes.ring.mqd_gpu_addr,
927 &adev->mes.ring.mqd_ptr);
928
929 amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj,
930 &adev->mes.eop_gpu_addr,
931 NULL);
932
933 mes_v10_1_free_microcode(adev);
934
935 return 0;
936}
937
938static int mes_v10_1_hw_init(void *handle)
939{
940 int r;
941 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
942
943 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
944 r = mes_v10_1_load_microcode(adev);
945 if (r) {
946 DRM_ERROR("failed to MES fw, r=%d\n", r);
947 return r;
948 }
949 }
950
951 mes_v10_1_enable(adev, true);
952
953 r = mes_v10_1_queue_init(adev);
954 if (r)
955 goto failure;
956
957 r = mes_v10_1_set_hw_resources(&adev->mes);
958 if (r)
959 goto failure;
960
961 r = mes_v10_1_query_sched_status(&adev->mes);
962 if (r) {
963 DRM_ERROR("MES is busy\n");
964 goto failure;
965 }
966
967 return 0;
968
969failure:
970 mes_v10_1_hw_fini(adev);
971 return r;
972}
973
974static int mes_v10_1_hw_fini(void *handle)
975{
976 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
977
978 mes_v10_1_enable(adev, false);
979
980 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)
981 mes_v10_1_free_ucode_buffers(adev);
982
983 return 0;
984}
985
986static int mes_v10_1_suspend(void *handle)
987{
988 return 0;
989}
990
991static int mes_v10_1_resume(void *handle)
992{
993 return 0;
994}
995
996static const struct amd_ip_funcs mes_v10_1_ip_funcs = {
997 .name = "mes_v10_1",
998 .sw_init = mes_v10_1_sw_init,
999 .sw_fini = mes_v10_1_sw_fini,
1000 .hw_init = mes_v10_1_hw_init,
1001 .hw_fini = mes_v10_1_hw_fini,
1002 .suspend = mes_v10_1_suspend,
1003 .resume = mes_v10_1_resume,
1004};
1005
1006const struct amdgpu_ip_block_version mes_v10_1_ip_block = {
1007 .type = AMD_IP_BLOCK_TYPE_MES,
1008 .major = 10,
1009 .minor = 1,
1010 .rev = 0,
1011 .funcs = &mes_v10_1_ip_funcs,
1012};
1/*
2 * Copyright 2019 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24#include <linux/firmware.h>
25#include <linux/module.h>
26#include "amdgpu.h"
27#include "soc15_common.h"
28#include "nv.h"
29#include "gc/gc_10_1_0_offset.h"
30#include "gc/gc_10_1_0_sh_mask.h"
31#include "gc/gc_10_1_0_default.h"
32#include "v10_structs.h"
33#include "mes_api_def.h"
34
35#define mmCP_MES_IC_OP_CNTL_Sienna_Cichlid 0x2820
36#define mmCP_MES_IC_OP_CNTL_Sienna_Cichlid_BASE_IDX 1
37#define mmRLC_CP_SCHEDULERS_Sienna_Cichlid 0x4ca1
38#define mmRLC_CP_SCHEDULERS_Sienna_Cichlid_BASE_IDX 1
39
40MODULE_FIRMWARE("amdgpu/navi10_mes.bin");
41MODULE_FIRMWARE("amdgpu/sienna_cichlid_mes.bin");
42MODULE_FIRMWARE("amdgpu/sienna_cichlid_mes1.bin");
43
44static int mes_v10_1_hw_fini(void *handle);
45static int mes_v10_1_kiq_hw_init(struct amdgpu_device *adev);
46
47#define MES_EOP_SIZE 2048
48
49static void mes_v10_1_ring_set_wptr(struct amdgpu_ring *ring)
50{
51 struct amdgpu_device *adev = ring->adev;
52
53 if (ring->use_doorbell) {
54 atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
55 ring->wptr);
56 WDOORBELL64(ring->doorbell_index, ring->wptr);
57 } else {
58 BUG();
59 }
60}
61
62static u64 mes_v10_1_ring_get_rptr(struct amdgpu_ring *ring)
63{
64 return *ring->rptr_cpu_addr;
65}
66
67static u64 mes_v10_1_ring_get_wptr(struct amdgpu_ring *ring)
68{
69 u64 wptr;
70
71 if (ring->use_doorbell)
72 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
73 else
74 BUG();
75 return wptr;
76}
77
78static const struct amdgpu_ring_funcs mes_v10_1_ring_funcs = {
79 .type = AMDGPU_RING_TYPE_MES,
80 .align_mask = 1,
81 .nop = 0,
82 .support_64bit_ptrs = true,
83 .get_rptr = mes_v10_1_ring_get_rptr,
84 .get_wptr = mes_v10_1_ring_get_wptr,
85 .set_wptr = mes_v10_1_ring_set_wptr,
86 .insert_nop = amdgpu_ring_insert_nop,
87};
88
89static int mes_v10_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
90 void *pkt, int size,
91 int api_status_off)
92{
93 int ndw = size / 4;
94 signed long r;
95 union MESAPI__ADD_QUEUE *x_pkt = pkt;
96 struct MES_API_STATUS *api_status;
97 struct amdgpu_device *adev = mes->adev;
98 struct amdgpu_ring *ring = &mes->ring;
99 unsigned long flags;
100
101 BUG_ON(size % 4 != 0);
102
103 spin_lock_irqsave(&mes->ring_lock, flags);
104 if (amdgpu_ring_alloc(ring, ndw)) {
105 spin_unlock_irqrestore(&mes->ring_lock, flags);
106 return -ENOMEM;
107 }
108
109 api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
110 api_status->api_completion_fence_addr = mes->ring.fence_drv.gpu_addr;
111 api_status->api_completion_fence_value = ++mes->ring.fence_drv.sync_seq;
112
113 amdgpu_ring_write_multiple(ring, pkt, ndw);
114 amdgpu_ring_commit(ring);
115 spin_unlock_irqrestore(&mes->ring_lock, flags);
116
117 DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
118
119 r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
120 adev->usec_timeout);
121 if (r < 1) {
122 DRM_ERROR("MES failed to response msg=%d\n",
123 x_pkt->header.opcode);
124
125 while (halt_if_hws_hang)
126 schedule();
127
128 return -ETIMEDOUT;
129 }
130
131 return 0;
132}
133
134static int convert_to_mes_queue_type(int queue_type)
135{
136 if (queue_type == AMDGPU_RING_TYPE_GFX)
137 return MES_QUEUE_TYPE_GFX;
138 else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
139 return MES_QUEUE_TYPE_COMPUTE;
140 else if (queue_type == AMDGPU_RING_TYPE_SDMA)
141 return MES_QUEUE_TYPE_SDMA;
142 else
143 BUG();
144 return -1;
145}
146
147static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes,
148 struct mes_add_queue_input *input)
149{
150 struct amdgpu_device *adev = mes->adev;
151 union MESAPI__ADD_QUEUE mes_add_queue_pkt;
152 struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
153 uint32_t vm_cntx_cntl = hub->vm_cntx_cntl;
154
155 memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
156
157 mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
158 mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
159 mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
160
161 mes_add_queue_pkt.process_id = input->process_id;
162 mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr;
163 mes_add_queue_pkt.process_va_start = input->process_va_start;
164 mes_add_queue_pkt.process_va_end = input->process_va_end;
165 mes_add_queue_pkt.process_quantum = input->process_quantum;
166 mes_add_queue_pkt.process_context_addr = input->process_context_addr;
167 mes_add_queue_pkt.gang_quantum = input->gang_quantum;
168 mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
169 mes_add_queue_pkt.inprocess_gang_priority =
170 input->inprocess_gang_priority;
171 mes_add_queue_pkt.gang_global_priority_level =
172 input->gang_global_priority_level;
173 mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
174 mes_add_queue_pkt.mqd_addr = input->mqd_addr;
175 mes_add_queue_pkt.wptr_addr = input->wptr_addr;
176 mes_add_queue_pkt.queue_type =
177 convert_to_mes_queue_type(input->queue_type);
178 mes_add_queue_pkt.paging = input->paging;
179 mes_add_queue_pkt.vm_context_cntl = vm_cntx_cntl;
180 mes_add_queue_pkt.gws_base = input->gws_base;
181 mes_add_queue_pkt.gws_size = input->gws_size;
182 mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
183
184 return mes_v10_1_submit_pkt_and_poll_completion(mes,
185 &mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
186 offsetof(union MESAPI__ADD_QUEUE, api_status));
187}
188
189static int mes_v10_1_remove_hw_queue(struct amdgpu_mes *mes,
190 struct mes_remove_queue_input *input)
191{
192 union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
193
194 memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
195
196 mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
197 mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
198 mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
199
200 mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
201 mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
202
203 return mes_v10_1_submit_pkt_and_poll_completion(mes,
204 &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
205 offsetof(union MESAPI__REMOVE_QUEUE, api_status));
206}
207
208static int mes_v10_1_unmap_legacy_queue(struct amdgpu_mes *mes,
209 struct mes_unmap_legacy_queue_input *input)
210{
211 union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
212
213 memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
214
215 mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
216 mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
217 mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
218
219 mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
220 mes_remove_queue_pkt.gang_context_addr = 0;
221
222 mes_remove_queue_pkt.pipe_id = input->pipe_id;
223 mes_remove_queue_pkt.queue_id = input->queue_id;
224
225 if (input->action == PREEMPT_QUEUES_NO_UNMAP) {
226 mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1;
227 mes_remove_queue_pkt.tf_addr = input->trail_fence_addr;
228 mes_remove_queue_pkt.tf_data =
229 lower_32_bits(input->trail_fence_data);
230 } else {
231 if (input->queue_type == AMDGPU_RING_TYPE_GFX)
232 mes_remove_queue_pkt.unmap_legacy_gfx_queue = 1;
233 else
234 mes_remove_queue_pkt.unmap_kiq_utility_queue = 1;
235 }
236
237 return mes_v10_1_submit_pkt_and_poll_completion(mes,
238 &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
239 offsetof(union MESAPI__REMOVE_QUEUE, api_status));
240}
241
242static int mes_v10_1_suspend_gang(struct amdgpu_mes *mes,
243 struct mes_suspend_gang_input *input)
244{
245 return 0;
246}
247
248static int mes_v10_1_resume_gang(struct amdgpu_mes *mes,
249 struct mes_resume_gang_input *input)
250{
251 return 0;
252}
253
254static int mes_v10_1_query_sched_status(struct amdgpu_mes *mes)
255{
256 union MESAPI__QUERY_MES_STATUS mes_status_pkt;
257
258 memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
259
260 mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
261 mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
262 mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
263
264 return mes_v10_1_submit_pkt_and_poll_completion(mes,
265 &mes_status_pkt, sizeof(mes_status_pkt),
266 offsetof(union MESAPI__QUERY_MES_STATUS, api_status));
267}
268
269static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes)
270{
271 int i;
272 struct amdgpu_device *adev = mes->adev;
273 union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt;
274
275 memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
276
277 mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER;
278 mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC;
279 mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
280
281 mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub;
282 mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
283 mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
284 mes_set_hw_res_pkt.paging_vmid = 0;
285 mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr;
286 mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
287 mes->query_status_fence_gpu_addr;
288
289 for (i = 0; i < MAX_COMPUTE_PIPES; i++)
290 mes_set_hw_res_pkt.compute_hqd_mask[i] =
291 mes->compute_hqd_mask[i];
292
293 for (i = 0; i < MAX_GFX_PIPES; i++)
294 mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i];
295
296 for (i = 0; i < MAX_SDMA_PIPES; i++)
297 mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i];
298
299 for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
300 mes_set_hw_res_pkt.aggregated_doorbells[i] =
301 mes->aggregated_doorbells[i];
302
303 for (i = 0; i < 5; i++) {
304 mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i];
305 mes_set_hw_res_pkt.mmhub_base[i] =
306 adev->reg_offset[MMHUB_HWIP][0][i];
307 mes_set_hw_res_pkt.osssys_base[i] =
308 adev->reg_offset[OSSSYS_HWIP][0][i];
309 }
310
311 mes_set_hw_res_pkt.disable_reset = 1;
312 mes_set_hw_res_pkt.disable_mes_log = 1;
313 mes_set_hw_res_pkt.use_different_vmid_compute = 1;
314
315 return mes_v10_1_submit_pkt_and_poll_completion(mes,
316 &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
317 offsetof(union MESAPI_SET_HW_RESOURCES, api_status));
318}
319
320static void mes_v10_1_init_aggregated_doorbell(struct amdgpu_mes *mes)
321{
322 struct amdgpu_device *adev = mes->adev;
323 uint32_t data;
324
325 data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL1);
326 data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK |
327 CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK |
328 CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK);
329 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] <<
330 CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT;
331 data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT;
332 WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL1, data);
333
334 data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL2);
335 data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK |
336 CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK |
337 CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK);
338 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] <<
339 CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT;
340 data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT;
341 WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL2, data);
342
343 data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL3);
344 data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK |
345 CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK |
346 CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK);
347 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] <<
348 CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT;
349 data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT;
350 WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL3, data);
351
352 data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL4);
353 data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK |
354 CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK |
355 CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK);
356 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] <<
357 CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT;
358 data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT;
359 WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL4, data);
360
361 data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL5);
362 data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK |
363 CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK |
364 CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK);
365 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] <<
366 CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT;
367 data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT;
368 WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL5, data);
369
370 data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT;
371 WREG32_SOC15(GC, 0, mmCP_HQD_GFX_CONTROL, data);
372}
373
374static const struct amdgpu_mes_funcs mes_v10_1_funcs = {
375 .add_hw_queue = mes_v10_1_add_hw_queue,
376 .remove_hw_queue = mes_v10_1_remove_hw_queue,
377 .unmap_legacy_queue = mes_v10_1_unmap_legacy_queue,
378 .suspend_gang = mes_v10_1_suspend_gang,
379 .resume_gang = mes_v10_1_resume_gang,
380};
381
382static int mes_v10_1_init_microcode(struct amdgpu_device *adev,
383 enum admgpu_mes_pipe pipe)
384{
385 const char *chip_name;
386 char fw_name[30];
387 int err;
388 const struct mes_firmware_header_v1_0 *mes_hdr;
389 struct amdgpu_firmware_info *info;
390
391 switch (adev->ip_versions[GC_HWIP][0]) {
392 case IP_VERSION(10, 1, 10):
393 chip_name = "navi10";
394 break;
395 case IP_VERSION(10, 3, 0):
396 chip_name = "sienna_cichlid";
397 break;
398 default:
399 BUG();
400 }
401
402 if (pipe == AMDGPU_MES_SCHED_PIPE)
403 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes.bin",
404 chip_name);
405 else
406 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes1.bin",
407 chip_name);
408
409 err = request_firmware(&adev->mes.fw[pipe], fw_name, adev->dev);
410 if (err)
411 return err;
412
413 err = amdgpu_ucode_validate(adev->mes.fw[pipe]);
414 if (err) {
415 release_firmware(adev->mes.fw[pipe]);
416 adev->mes.fw[pipe] = NULL;
417 return err;
418 }
419
420 mes_hdr = (const struct mes_firmware_header_v1_0 *)
421 adev->mes.fw[pipe]->data;
422 adev->mes.uc_start_addr[pipe] =
423 le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) |
424 ((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32);
425 adev->mes.data_start_addr[pipe] =
426 le32_to_cpu(mes_hdr->mes_data_start_addr_lo) |
427 ((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32);
428
429 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
430 int ucode, ucode_data;
431
432 if (pipe == AMDGPU_MES_SCHED_PIPE) {
433 ucode = AMDGPU_UCODE_ID_CP_MES;
434 ucode_data = AMDGPU_UCODE_ID_CP_MES_DATA;
435 } else {
436 ucode = AMDGPU_UCODE_ID_CP_MES1;
437 ucode_data = AMDGPU_UCODE_ID_CP_MES1_DATA;
438 }
439
440 info = &adev->firmware.ucode[ucode];
441 info->ucode_id = ucode;
442 info->fw = adev->mes.fw[pipe];
443 adev->firmware.fw_size +=
444 ALIGN(le32_to_cpu(mes_hdr->mes_ucode_size_bytes),
445 PAGE_SIZE);
446
447 info = &adev->firmware.ucode[ucode_data];
448 info->ucode_id = ucode_data;
449 info->fw = adev->mes.fw[pipe];
450 adev->firmware.fw_size +=
451 ALIGN(le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes),
452 PAGE_SIZE);
453 }
454
455 return 0;
456}
457
458static void mes_v10_1_free_microcode(struct amdgpu_device *adev,
459 enum admgpu_mes_pipe pipe)
460{
461 release_firmware(adev->mes.fw[pipe]);
462 adev->mes.fw[pipe] = NULL;
463}
464
465static int mes_v10_1_allocate_ucode_buffer(struct amdgpu_device *adev,
466 enum admgpu_mes_pipe pipe)
467{
468 int r;
469 const struct mes_firmware_header_v1_0 *mes_hdr;
470 const __le32 *fw_data;
471 unsigned fw_size;
472
473 mes_hdr = (const struct mes_firmware_header_v1_0 *)
474 adev->mes.fw[pipe]->data;
475
476 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
477 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
478 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
479
480 r = amdgpu_bo_create_reserved(adev, fw_size,
481 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
482 &adev->mes.ucode_fw_obj[pipe],
483 &adev->mes.ucode_fw_gpu_addr[pipe],
484 (void **)&adev->mes.ucode_fw_ptr[pipe]);
485 if (r) {
486 dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r);
487 return r;
488 }
489
490 memcpy(adev->mes.ucode_fw_ptr[pipe], fw_data, fw_size);
491
492 amdgpu_bo_kunmap(adev->mes.ucode_fw_obj[pipe]);
493 amdgpu_bo_unreserve(adev->mes.ucode_fw_obj[pipe]);
494
495 return 0;
496}
497
498static int mes_v10_1_allocate_ucode_data_buffer(struct amdgpu_device *adev,
499 enum admgpu_mes_pipe pipe)
500{
501 int r;
502 const struct mes_firmware_header_v1_0 *mes_hdr;
503 const __le32 *fw_data;
504 unsigned fw_size;
505
506 mes_hdr = (const struct mes_firmware_header_v1_0 *)
507 adev->mes.fw[pipe]->data;
508
509 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
510 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
511 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
512
513 r = amdgpu_bo_create_reserved(adev, fw_size,
514 64 * 1024, AMDGPU_GEM_DOMAIN_GTT,
515 &adev->mes.data_fw_obj[pipe],
516 &adev->mes.data_fw_gpu_addr[pipe],
517 (void **)&adev->mes.data_fw_ptr[pipe]);
518 if (r) {
519 dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r);
520 return r;
521 }
522
523 memcpy(adev->mes.data_fw_ptr[pipe], fw_data, fw_size);
524
525 amdgpu_bo_kunmap(adev->mes.data_fw_obj[pipe]);
526 amdgpu_bo_unreserve(adev->mes.data_fw_obj[pipe]);
527
528 return 0;
529}
530
531static void mes_v10_1_free_ucode_buffers(struct amdgpu_device *adev,
532 enum admgpu_mes_pipe pipe)
533{
534 amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[pipe],
535 &adev->mes.data_fw_gpu_addr[pipe],
536 (void **)&adev->mes.data_fw_ptr[pipe]);
537
538 amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj[pipe],
539 &adev->mes.ucode_fw_gpu_addr[pipe],
540 (void **)&adev->mes.ucode_fw_ptr[pipe]);
541}
542
543static void mes_v10_1_enable(struct amdgpu_device *adev, bool enable)
544{
545 uint32_t pipe, data = 0;
546
547 if (enable) {
548 data = RREG32_SOC15(GC, 0, mmCP_MES_CNTL);
549 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
550 data = REG_SET_FIELD(data, CP_MES_CNTL,
551 MES_PIPE1_RESET, adev->enable_mes_kiq ? 1 : 0);
552 WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
553
554 mutex_lock(&adev->srbm_mutex);
555 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
556 if (!adev->enable_mes_kiq &&
557 pipe == AMDGPU_MES_KIQ_PIPE)
558 continue;
559
560 nv_grbm_select(adev, 3, pipe, 0, 0);
561 WREG32_SOC15(GC, 0, mmCP_MES_PRGRM_CNTR_START,
562 (uint32_t)(adev->mes.uc_start_addr[pipe]) >> 2);
563 }
564 nv_grbm_select(adev, 0, 0, 0, 0);
565 mutex_unlock(&adev->srbm_mutex);
566
567 /* clear BYPASS_UNCACHED to avoid hangs after interrupt. */
568 data = RREG32_SOC15(GC, 0, mmCP_MES_DC_OP_CNTL);
569 data = REG_SET_FIELD(data, CP_MES_DC_OP_CNTL,
570 BYPASS_UNCACHED, 0);
571 WREG32_SOC15(GC, 0, mmCP_MES_DC_OP_CNTL, data);
572
573 /* unhalt MES and activate pipe0 */
574 data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1);
575 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE,
576 adev->enable_mes_kiq ? 1 : 0);
577 WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
578 udelay(100);
579 } else {
580 data = RREG32_SOC15(GC, 0, mmCP_MES_CNTL);
581 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0);
582 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 0);
583 data = REG_SET_FIELD(data, CP_MES_CNTL,
584 MES_INVALIDATE_ICACHE, 1);
585 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
586 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET,
587 adev->enable_mes_kiq ? 1 : 0);
588 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1);
589 WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
590 }
591}
592
593/* This function is for backdoor MES firmware */
594static int mes_v10_1_load_microcode(struct amdgpu_device *adev,
595 enum admgpu_mes_pipe pipe)
596{
597 int r;
598 uint32_t data;
599
600 mes_v10_1_enable(adev, false);
601
602 if (!adev->mes.fw[pipe])
603 return -EINVAL;
604
605 r = mes_v10_1_allocate_ucode_buffer(adev, pipe);
606 if (r)
607 return r;
608
609 r = mes_v10_1_allocate_ucode_data_buffer(adev, pipe);
610 if (r) {
611 mes_v10_1_free_ucode_buffers(adev, pipe);
612 return r;
613 }
614
615 WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_CNTL, 0);
616
617 mutex_lock(&adev->srbm_mutex);
618 /* me=3, pipe=0, queue=0 */
619 nv_grbm_select(adev, 3, pipe, 0, 0);
620
621 /* set ucode start address */
622 WREG32_SOC15(GC, 0, mmCP_MES_PRGRM_CNTR_START,
623 (uint32_t)(adev->mes.uc_start_addr[pipe]) >> 2);
624
625 /* set ucode fimrware address */
626 WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_LO,
627 lower_32_bits(adev->mes.ucode_fw_gpu_addr[pipe]));
628 WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_HI,
629 upper_32_bits(adev->mes.ucode_fw_gpu_addr[pipe]));
630
631 /* set ucode instruction cache boundary to 2M-1 */
632 WREG32_SOC15(GC, 0, mmCP_MES_MIBOUND_LO, 0x1FFFFF);
633
634 /* set ucode data firmware address */
635 WREG32_SOC15(GC, 0, mmCP_MES_MDBASE_LO,
636 lower_32_bits(adev->mes.data_fw_gpu_addr[pipe]));
637 WREG32_SOC15(GC, 0, mmCP_MES_MDBASE_HI,
638 upper_32_bits(adev->mes.data_fw_gpu_addr[pipe]));
639
640 /* Set 0x3FFFF (256K-1) to CP_MES_MDBOUND_LO */
641 WREG32_SOC15(GC, 0, mmCP_MES_MDBOUND_LO, 0x3FFFF);
642
643 /* invalidate ICACHE */
644 switch (adev->ip_versions[GC_HWIP][0]) {
645 case IP_VERSION(10, 3, 0):
646 data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid);
647 break;
648 default:
649 data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL);
650 break;
651 }
652 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
653 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
654 switch (adev->ip_versions[GC_HWIP][0]) {
655 case IP_VERSION(10, 3, 0):
656 WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid, data);
657 break;
658 default:
659 WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data);
660 break;
661 }
662
663 /* prime the ICACHE. */
664 switch (adev->ip_versions[GC_HWIP][0]) {
665 case IP_VERSION(10, 3, 0):
666 data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid);
667 break;
668 default:
669 data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL);
670 break;
671 }
672 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
673 switch (adev->ip_versions[GC_HWIP][0]) {
674 case IP_VERSION(10, 3, 0):
675 WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid, data);
676 break;
677 default:
678 WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data);
679 break;
680 }
681
682 nv_grbm_select(adev, 0, 0, 0, 0);
683 mutex_unlock(&adev->srbm_mutex);
684
685 return 0;
686}
687
688static int mes_v10_1_allocate_eop_buf(struct amdgpu_device *adev,
689 enum admgpu_mes_pipe pipe)
690{
691 int r;
692 u32 *eop;
693
694 r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE,
695 AMDGPU_GEM_DOMAIN_GTT,
696 &adev->mes.eop_gpu_obj[pipe],
697 &adev->mes.eop_gpu_addr[pipe],
698 (void **)&eop);
699 if (r) {
700 dev_warn(adev->dev, "(%d) create EOP bo failed\n", r);
701 return r;
702 }
703
704 memset(eop, 0, adev->mes.eop_gpu_obj[pipe]->tbo.base.size);
705
706 amdgpu_bo_kunmap(adev->mes.eop_gpu_obj[pipe]);
707 amdgpu_bo_unreserve(adev->mes.eop_gpu_obj[pipe]);
708
709 return 0;
710}
711
712static int mes_v10_1_mqd_init(struct amdgpu_ring *ring)
713{
714 struct v10_compute_mqd *mqd = ring->mqd_ptr;
715 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
716 uint32_t tmp;
717
718 mqd->header = 0xC0310800;
719 mqd->compute_pipelinestat_enable = 0x00000001;
720 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
721 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
722 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
723 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
724 mqd->compute_misc_reserved = 0x00000003;
725
726 eop_base_addr = ring->eop_gpu_addr >> 8;
727
728 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
729 tmp = mmCP_HQD_EOP_CONTROL_DEFAULT;
730 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
731 (order_base_2(MES_EOP_SIZE / 4) - 1));
732
733 mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_base_addr);
734 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
735 mqd->cp_hqd_eop_control = tmp;
736
737 /* disable the queue if it's active */
738 ring->wptr = 0;
739 mqd->cp_hqd_pq_rptr = 0;
740 mqd->cp_hqd_pq_wptr_lo = 0;
741 mqd->cp_hqd_pq_wptr_hi = 0;
742
743 /* set the pointer to the MQD */
744 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
745 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
746
747 /* set MQD vmid to 0 */
748 tmp = mmCP_MQD_CONTROL_DEFAULT;
749 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
750 mqd->cp_mqd_control = tmp;
751
752 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
753 hqd_gpu_addr = ring->gpu_addr >> 8;
754 mqd->cp_hqd_pq_base_lo = lower_32_bits(hqd_gpu_addr);
755 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
756
757 /* set the wb address whether it's enabled or not */
758 wb_gpu_addr = ring->rptr_gpu_addr;
759 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
760 mqd->cp_hqd_pq_rptr_report_addr_hi =
761 upper_32_bits(wb_gpu_addr) & 0xffff;
762
763 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
764 wb_gpu_addr = ring->wptr_gpu_addr;
765 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8;
766 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
767
768 /* set up the HQD, this is similar to CP_RB0_CNTL */
769 tmp = mmCP_HQD_PQ_CONTROL_DEFAULT;
770 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
771 (order_base_2(ring->ring_size / 4) - 1));
772 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
773 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
774#ifdef __BIG_ENDIAN
775 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
776#endif
777 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
778 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
779 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
780 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
781 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, NO_UPDATE_RPTR, 1);
782 mqd->cp_hqd_pq_control = tmp;
783
784 /* enable doorbell? */
785 tmp = 0;
786 if (ring->use_doorbell) {
787 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
788 DOORBELL_OFFSET, ring->doorbell_index);
789 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
790 DOORBELL_EN, 1);
791 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
792 DOORBELL_SOURCE, 0);
793 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
794 DOORBELL_HIT, 0);
795 }
796 else
797 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
798 DOORBELL_EN, 0);
799 mqd->cp_hqd_pq_doorbell_control = tmp;
800
801 mqd->cp_hqd_vmid = 0;
802 /* activate the queue */
803 mqd->cp_hqd_active = 1;
804 mqd->cp_hqd_persistent_state = mmCP_HQD_PERSISTENT_STATE_DEFAULT;
805 mqd->cp_hqd_ib_control = mmCP_HQD_IB_CONTROL_DEFAULT;
806 mqd->cp_hqd_iq_timer = mmCP_HQD_IQ_TIMER_DEFAULT;
807 mqd->cp_hqd_quantum = mmCP_HQD_QUANTUM_DEFAULT;
808
809 tmp = mmCP_HQD_GFX_CONTROL_DEFAULT;
810 tmp = REG_SET_FIELD(tmp, CP_HQD_GFX_CONTROL, DB_UPDATED_MSG_EN, 1);
811 /* offset: 184 - this is used for CP_HQD_GFX_CONTROL */
812 mqd->cp_hqd_suspend_cntl_stack_offset = tmp;
813
814 return 0;
815}
816
817#if 0
818static void mes_v10_1_queue_init_register(struct amdgpu_ring *ring)
819{
820 struct v10_compute_mqd *mqd = ring->mqd_ptr;
821 struct amdgpu_device *adev = ring->adev;
822 uint32_t data = 0;
823
824 mutex_lock(&adev->srbm_mutex);
825 nv_grbm_select(adev, 3, ring->pipe, 0, 0);
826
827 /* set CP_HQD_VMID.VMID = 0. */
828 data = RREG32_SOC15(GC, 0, mmCP_HQD_VMID);
829 data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0);
830 WREG32_SOC15(GC, 0, mmCP_HQD_VMID, data);
831
832 /* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */
833 data = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
834 data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
835 DOORBELL_EN, 0);
836 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, data);
837
838 /* set CP_MQD_BASE_ADDR/HI with the MQD base address */
839 WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
840 WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
841
842 /* set CP_MQD_CONTROL.VMID=0 */
843 data = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
844 data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0);
845 WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, 0);
846
847 /* set CP_HQD_PQ_BASE/HI with the ring buffer base address */
848 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
849 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
850
851 /* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */
852 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
853 mqd->cp_hqd_pq_rptr_report_addr_lo);
854 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
855 mqd->cp_hqd_pq_rptr_report_addr_hi);
856
857 /* set CP_HQD_PQ_CONTROL */
858 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
859
860 /* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */
861 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
862 mqd->cp_hqd_pq_wptr_poll_addr_lo);
863 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
864 mqd->cp_hqd_pq_wptr_poll_addr_hi);
865
866 /* set CP_HQD_PQ_DOORBELL_CONTROL */
867 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
868 mqd->cp_hqd_pq_doorbell_control);
869
870 /* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */
871 WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
872
873 /* set CP_HQD_ACTIVE.ACTIVE=1 */
874 WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
875
876 nv_grbm_select(adev, 0, 0, 0, 0);
877 mutex_unlock(&adev->srbm_mutex);
878}
879#endif
880
881static int mes_v10_1_kiq_enable_queue(struct amdgpu_device *adev)
882{
883 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
884 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
885 int r;
886
887 if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
888 return -EINVAL;
889
890 r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
891 if (r) {
892 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
893 return r;
894 }
895
896 kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring);
897
898 r = amdgpu_ring_test_ring(kiq_ring);
899 if (r) {
900 DRM_ERROR("kfq enable failed\n");
901 kiq_ring->sched.ready = false;
902 }
903
904 return r;
905}
906
907static int mes_v10_1_queue_init(struct amdgpu_device *adev)
908{
909 int r;
910
911 r = mes_v10_1_mqd_init(&adev->mes.ring);
912 if (r)
913 return r;
914
915 r = mes_v10_1_kiq_enable_queue(adev);
916 if (r)
917 return r;
918
919 return 0;
920}
921
922static int mes_v10_1_ring_init(struct amdgpu_device *adev)
923{
924 struct amdgpu_ring *ring;
925
926 ring = &adev->mes.ring;
927
928 ring->funcs = &mes_v10_1_ring_funcs;
929
930 ring->me = 3;
931 ring->pipe = 0;
932 ring->queue = 0;
933
934 ring->ring_obj = NULL;
935 ring->use_doorbell = true;
936 ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1;
937 ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_SCHED_PIPE];
938 ring->no_scheduler = true;
939 sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue);
940
941 return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
942 AMDGPU_RING_PRIO_DEFAULT, NULL);
943}
944
945static int mes_v10_1_kiq_ring_init(struct amdgpu_device *adev)
946{
947 struct amdgpu_ring *ring;
948
949 spin_lock_init(&adev->gfx.kiq.ring_lock);
950
951 ring = &adev->gfx.kiq.ring;
952
953 ring->me = 3;
954 ring->pipe = 1;
955 ring->queue = 0;
956
957 ring->adev = NULL;
958 ring->ring_obj = NULL;
959 ring->use_doorbell = true;
960 ring->doorbell_index = adev->doorbell_index.mes_ring1 << 1;
961 ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_KIQ_PIPE];
962 ring->no_scheduler = true;
963 sprintf(ring->name, "mes_kiq_%d.%d.%d",
964 ring->me, ring->pipe, ring->queue);
965
966 return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
967 AMDGPU_RING_PRIO_DEFAULT, NULL);
968}
969
970static int mes_v10_1_mqd_sw_init(struct amdgpu_device *adev,
971 enum admgpu_mes_pipe pipe)
972{
973 int r, mqd_size = sizeof(struct v10_compute_mqd);
974 struct amdgpu_ring *ring;
975
976 if (pipe == AMDGPU_MES_KIQ_PIPE)
977 ring = &adev->gfx.kiq.ring;
978 else if (pipe == AMDGPU_MES_SCHED_PIPE)
979 ring = &adev->mes.ring;
980 else
981 BUG();
982
983 if (ring->mqd_obj)
984 return 0;
985
986 r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
987 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
988 &ring->mqd_gpu_addr, &ring->mqd_ptr);
989 if (r) {
990 dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
991 return r;
992 }
993 memset(ring->mqd_ptr, 0, mqd_size);
994
995 /* prepare MQD backup */
996 adev->mes.mqd_backup[pipe] = kmalloc(mqd_size, GFP_KERNEL);
997 if (!adev->mes.mqd_backup[pipe])
998 dev_warn(adev->dev,
999 "no memory to create MQD backup for ring %s\n",
1000 ring->name);
1001
1002 return 0;
1003}
1004
1005static int mes_v10_1_sw_init(void *handle)
1006{
1007 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1008 int pipe, r;
1009
1010 adev->mes.adev = adev;
1011 adev->mes.funcs = &mes_v10_1_funcs;
1012 adev->mes.kiq_hw_init = &mes_v10_1_kiq_hw_init;
1013
1014 r = amdgpu_mes_init(adev);
1015 if (r)
1016 return r;
1017
1018 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
1019 if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE)
1020 continue;
1021
1022 r = mes_v10_1_init_microcode(adev, pipe);
1023 if (r)
1024 return r;
1025
1026 r = mes_v10_1_allocate_eop_buf(adev, pipe);
1027 if (r)
1028 return r;
1029
1030 r = mes_v10_1_mqd_sw_init(adev, pipe);
1031 if (r)
1032 return r;
1033 }
1034
1035 if (adev->enable_mes_kiq) {
1036 r = mes_v10_1_kiq_ring_init(adev);
1037 if (r)
1038 return r;
1039 }
1040
1041 r = mes_v10_1_ring_init(adev);
1042 if (r)
1043 return r;
1044
1045 return 0;
1046}
1047
1048static int mes_v10_1_sw_fini(void *handle)
1049{
1050 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1051 int pipe;
1052
1053 amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
1054 amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
1055
1056 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
1057 kfree(adev->mes.mqd_backup[pipe]);
1058
1059 amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[pipe],
1060 &adev->mes.eop_gpu_addr[pipe],
1061 NULL);
1062
1063 mes_v10_1_free_microcode(adev, pipe);
1064 }
1065
1066 amdgpu_bo_free_kernel(&adev->gfx.kiq.ring.mqd_obj,
1067 &adev->gfx.kiq.ring.mqd_gpu_addr,
1068 &adev->gfx.kiq.ring.mqd_ptr);
1069
1070 amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj,
1071 &adev->mes.ring.mqd_gpu_addr,
1072 &adev->mes.ring.mqd_ptr);
1073
1074 amdgpu_ring_fini(&adev->gfx.kiq.ring);
1075 amdgpu_ring_fini(&adev->mes.ring);
1076
1077 amdgpu_mes_fini(adev);
1078 return 0;
1079}
1080
1081static void mes_v10_1_kiq_setting(struct amdgpu_ring *ring)
1082{
1083 uint32_t tmp;
1084 struct amdgpu_device *adev = ring->adev;
1085
1086 /* tell RLC which is KIQ queue */
1087 switch (adev->ip_versions[GC_HWIP][0]) {
1088 case IP_VERSION(10, 3, 0):
1089 case IP_VERSION(10, 3, 2):
1090 case IP_VERSION(10, 3, 1):
1091 case IP_VERSION(10, 3, 4):
1092 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid);
1093 tmp &= 0xffffff00;
1094 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
1095 WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
1096 tmp |= 0x80;
1097 WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
1098 break;
1099 default:
1100 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
1101 tmp &= 0xffffff00;
1102 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
1103 WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
1104 tmp |= 0x80;
1105 WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
1106 break;
1107 }
1108}
1109
1110static int mes_v10_1_kiq_hw_init(struct amdgpu_device *adev)
1111{
1112 int r = 0;
1113
1114 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1115 r = mes_v10_1_load_microcode(adev, AMDGPU_MES_KIQ_PIPE);
1116 if (r) {
1117 DRM_ERROR("failed to load MES kiq fw, r=%d\n", r);
1118 return r;
1119 }
1120
1121 r = mes_v10_1_load_microcode(adev, AMDGPU_MES_SCHED_PIPE);
1122 if (r) {
1123 DRM_ERROR("failed to load MES fw, r=%d\n", r);
1124 return r;
1125 }
1126 }
1127
1128 mes_v10_1_enable(adev, true);
1129
1130 mes_v10_1_kiq_setting(&adev->gfx.kiq.ring);
1131
1132 r = mes_v10_1_queue_init(adev);
1133 if (r)
1134 goto failure;
1135
1136 return r;
1137
1138failure:
1139 mes_v10_1_hw_fini(adev);
1140 return r;
1141}
1142
1143static int mes_v10_1_hw_init(void *handle)
1144{
1145 int r;
1146 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1147
1148 if (!adev->enable_mes_kiq) {
1149 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1150 r = mes_v10_1_load_microcode(adev,
1151 AMDGPU_MES_SCHED_PIPE);
1152 if (r) {
1153 DRM_ERROR("failed to MES fw, r=%d\n", r);
1154 return r;
1155 }
1156 }
1157
1158 mes_v10_1_enable(adev, true);
1159 }
1160
1161 r = mes_v10_1_queue_init(adev);
1162 if (r)
1163 goto failure;
1164
1165 r = mes_v10_1_set_hw_resources(&adev->mes);
1166 if (r)
1167 goto failure;
1168
1169 mes_v10_1_init_aggregated_doorbell(&adev->mes);
1170
1171 r = mes_v10_1_query_sched_status(&adev->mes);
1172 if (r) {
1173 DRM_ERROR("MES is busy\n");
1174 goto failure;
1175 }
1176
1177 /*
1178 * Disable KIQ ring usage from the driver once MES is enabled.
1179 * MES uses KIQ ring exclusively so driver cannot access KIQ ring
1180 * with MES enabled.
1181 */
1182 adev->gfx.kiq.ring.sched.ready = false;
1183 adev->mes.ring.sched.ready = true;
1184
1185 return 0;
1186
1187failure:
1188 mes_v10_1_hw_fini(adev);
1189 return r;
1190}
1191
1192static int mes_v10_1_hw_fini(void *handle)
1193{
1194 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1195
1196 adev->mes.ring.sched.ready = false;
1197
1198 mes_v10_1_enable(adev, false);
1199
1200 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1201 mes_v10_1_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE);
1202 mes_v10_1_free_ucode_buffers(adev, AMDGPU_MES_SCHED_PIPE);
1203 }
1204
1205 return 0;
1206}
1207
1208static int mes_v10_1_suspend(void *handle)
1209{
1210 int r;
1211 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1212
1213 r = amdgpu_mes_suspend(adev);
1214 if (r)
1215 return r;
1216
1217 return mes_v10_1_hw_fini(adev);
1218}
1219
1220static int mes_v10_1_resume(void *handle)
1221{
1222 int r;
1223 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1224
1225 r = mes_v10_1_hw_init(adev);
1226 if (r)
1227 return r;
1228
1229 return amdgpu_mes_resume(adev);
1230}
1231
1232static int mes_v10_0_late_init(void *handle)
1233{
1234 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1235
1236 if (!amdgpu_in_reset(adev))
1237 amdgpu_mes_self_test(adev);
1238
1239 return 0;
1240}
1241
1242static const struct amd_ip_funcs mes_v10_1_ip_funcs = {
1243 .name = "mes_v10_1",
1244 .late_init = mes_v10_0_late_init,
1245 .sw_init = mes_v10_1_sw_init,
1246 .sw_fini = mes_v10_1_sw_fini,
1247 .hw_init = mes_v10_1_hw_init,
1248 .hw_fini = mes_v10_1_hw_fini,
1249 .suspend = mes_v10_1_suspend,
1250 .resume = mes_v10_1_resume,
1251};
1252
1253const struct amdgpu_ip_block_version mes_v10_1_ip_block = {
1254 .type = AMD_IP_BLOCK_TYPE_MES,
1255 .major = 10,
1256 .minor = 1,
1257 .rev = 0,
1258 .funcs = &mes_v10_1_ip_funcs,
1259};