Loading...
Note: File does not exist in v3.1.
1/*
2 * Copyright 2019 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24#include <linux/firmware.h>
25#include <linux/module.h>
26#include "amdgpu.h"
27#include "soc15_common.h"
28#include "nv.h"
29#include "gc/gc_10_1_0_offset.h"
30#include "gc/gc_10_1_0_sh_mask.h"
31#include "v10_structs.h"
32#include "mes_api_def.h"
33
34#define mmCP_MES_IC_OP_CNTL_Sienna_Cichlid 0x2820
35#define mmCP_MES_IC_OP_CNTL_Sienna_Cichlid_BASE_IDX 1
36
37MODULE_FIRMWARE("amdgpu/navi10_mes.bin");
38MODULE_FIRMWARE("amdgpu/sienna_cichlid_mes.bin");
39
40static int mes_v10_1_hw_fini(void *handle);
41
42#define MES_EOP_SIZE 2048
43
44static void mes_v10_1_ring_set_wptr(struct amdgpu_ring *ring)
45{
46 struct amdgpu_device *adev = ring->adev;
47
48 if (ring->use_doorbell) {
49 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs],
50 ring->wptr);
51 WDOORBELL64(ring->doorbell_index, ring->wptr);
52 } else {
53 BUG();
54 }
55}
56
57static u64 mes_v10_1_ring_get_rptr(struct amdgpu_ring *ring)
58{
59 return ring->adev->wb.wb[ring->rptr_offs];
60}
61
62static u64 mes_v10_1_ring_get_wptr(struct amdgpu_ring *ring)
63{
64 u64 wptr;
65
66 if (ring->use_doorbell)
67 wptr = atomic64_read((atomic64_t *)
68 &ring->adev->wb.wb[ring->wptr_offs]);
69 else
70 BUG();
71 return wptr;
72}
73
74static const struct amdgpu_ring_funcs mes_v10_1_ring_funcs = {
75 .type = AMDGPU_RING_TYPE_MES,
76 .align_mask = 1,
77 .nop = 0,
78 .support_64bit_ptrs = true,
79 .get_rptr = mes_v10_1_ring_get_rptr,
80 .get_wptr = mes_v10_1_ring_get_wptr,
81 .set_wptr = mes_v10_1_ring_set_wptr,
82 .insert_nop = amdgpu_ring_insert_nop,
83};
84
85static int mes_v10_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
86 void *pkt, int size)
87{
88 int ndw = size / 4;
89 signed long r;
90 union MESAPI__ADD_QUEUE *x_pkt = pkt;
91 struct amdgpu_device *adev = mes->adev;
92 struct amdgpu_ring *ring = &mes->ring;
93
94 BUG_ON(size % 4 != 0);
95
96 if (amdgpu_ring_alloc(ring, ndw))
97 return -ENOMEM;
98
99 amdgpu_ring_write_multiple(ring, pkt, ndw);
100 amdgpu_ring_commit(ring);
101
102 DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
103
104 r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
105 adev->usec_timeout);
106 if (r < 1) {
107 DRM_ERROR("MES failed to response msg=%d\n",
108 x_pkt->header.opcode);
109 return -ETIMEDOUT;
110 }
111
112 return 0;
113}
114
115static int convert_to_mes_queue_type(int queue_type)
116{
117 if (queue_type == AMDGPU_RING_TYPE_GFX)
118 return MES_QUEUE_TYPE_GFX;
119 else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
120 return MES_QUEUE_TYPE_COMPUTE;
121 else if (queue_type == AMDGPU_RING_TYPE_SDMA)
122 return MES_QUEUE_TYPE_SDMA;
123 else
124 BUG();
125 return -1;
126}
127
128static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes,
129 struct mes_add_queue_input *input)
130{
131 struct amdgpu_device *adev = mes->adev;
132 union MESAPI__ADD_QUEUE mes_add_queue_pkt;
133
134 memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
135
136 mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
137 mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
138 mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
139
140 mes_add_queue_pkt.process_id = input->process_id;
141 mes_add_queue_pkt.page_table_base_addr =
142 input->page_table_base_addr - adev->gmc.vram_start;
143 mes_add_queue_pkt.process_va_start = input->process_va_start;
144 mes_add_queue_pkt.process_va_end = input->process_va_end;
145 mes_add_queue_pkt.process_quantum = input->process_quantum;
146 mes_add_queue_pkt.process_context_addr = input->process_context_addr;
147 mes_add_queue_pkt.gang_quantum = input->gang_quantum;
148 mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
149 mes_add_queue_pkt.inprocess_gang_priority =
150 input->inprocess_gang_priority;
151 mes_add_queue_pkt.gang_global_priority_level =
152 input->gang_global_priority_level;
153 mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
154 mes_add_queue_pkt.mqd_addr = input->mqd_addr;
155 mes_add_queue_pkt.wptr_addr = input->wptr_addr;
156 mes_add_queue_pkt.queue_type =
157 convert_to_mes_queue_type(input->queue_type);
158 mes_add_queue_pkt.paging = input->paging;
159
160 mes_add_queue_pkt.api_status.api_completion_fence_addr =
161 mes->ring.fence_drv.gpu_addr;
162 mes_add_queue_pkt.api_status.api_completion_fence_value =
163 ++mes->ring.fence_drv.sync_seq;
164
165 return mes_v10_1_submit_pkt_and_poll_completion(mes,
166 &mes_add_queue_pkt, sizeof(mes_add_queue_pkt));
167}
168
169static int mes_v10_1_remove_hw_queue(struct amdgpu_mes *mes,
170 struct mes_remove_queue_input *input)
171{
172 union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
173
174 memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
175
176 mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
177 mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
178 mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
179
180 mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
181 mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
182
183 mes_remove_queue_pkt.api_status.api_completion_fence_addr =
184 mes->ring.fence_drv.gpu_addr;
185 mes_remove_queue_pkt.api_status.api_completion_fence_value =
186 ++mes->ring.fence_drv.sync_seq;
187
188 return mes_v10_1_submit_pkt_and_poll_completion(mes,
189 &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt));
190}
191
192static int mes_v10_1_suspend_gang(struct amdgpu_mes *mes,
193 struct mes_suspend_gang_input *input)
194{
195 return 0;
196}
197
198static int mes_v10_1_resume_gang(struct amdgpu_mes *mes,
199 struct mes_resume_gang_input *input)
200{
201 return 0;
202}
203
204static int mes_v10_1_query_sched_status(struct amdgpu_mes *mes)
205{
206 union MESAPI__QUERY_MES_STATUS mes_status_pkt;
207
208 memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
209
210 mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
211 mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
212 mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
213
214 mes_status_pkt.api_status.api_completion_fence_addr =
215 mes->ring.fence_drv.gpu_addr;
216 mes_status_pkt.api_status.api_completion_fence_value =
217 ++mes->ring.fence_drv.sync_seq;
218
219 return mes_v10_1_submit_pkt_and_poll_completion(mes,
220 &mes_status_pkt, sizeof(mes_status_pkt));
221}
222
223static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes)
224{
225 int i;
226 struct amdgpu_device *adev = mes->adev;
227 union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt;
228
229 memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
230
231 mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER;
232 mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC;
233 mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
234
235 mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub;
236 mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
237 mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
238 mes_set_hw_res_pkt.paging_vmid = 0;
239 mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr;
240 mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
241 mes->query_status_fence_gpu_addr;
242
243 for (i = 0; i < MAX_COMPUTE_PIPES; i++)
244 mes_set_hw_res_pkt.compute_hqd_mask[i] =
245 mes->compute_hqd_mask[i];
246
247 for (i = 0; i < MAX_GFX_PIPES; i++)
248 mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i];
249
250 for (i = 0; i < MAX_SDMA_PIPES; i++)
251 mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i];
252
253 for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
254 mes_set_hw_res_pkt.agreegated_doorbells[i] =
255 mes->agreegated_doorbells[i];
256
257 mes_set_hw_res_pkt.api_status.api_completion_fence_addr =
258 mes->ring.fence_drv.gpu_addr;
259 mes_set_hw_res_pkt.api_status.api_completion_fence_value =
260 ++mes->ring.fence_drv.sync_seq;
261
262 return mes_v10_1_submit_pkt_and_poll_completion(mes,
263 &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt));
264}
265
266static const struct amdgpu_mes_funcs mes_v10_1_funcs = {
267 .add_hw_queue = mes_v10_1_add_hw_queue,
268 .remove_hw_queue = mes_v10_1_remove_hw_queue,
269 .suspend_gang = mes_v10_1_suspend_gang,
270 .resume_gang = mes_v10_1_resume_gang,
271};
272
273static int mes_v10_1_init_microcode(struct amdgpu_device *adev)
274{
275 const char *chip_name;
276 char fw_name[30];
277 int err;
278 const struct mes_firmware_header_v1_0 *mes_hdr;
279 struct amdgpu_firmware_info *info;
280
281 switch (adev->asic_type) {
282 case CHIP_NAVI10:
283 chip_name = "navi10";
284 break;
285 case CHIP_SIENNA_CICHLID:
286 chip_name = "sienna_cichlid";
287 break;
288 default:
289 BUG();
290 }
291
292 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes.bin", chip_name);
293 err = request_firmware(&adev->mes.fw, fw_name, adev->dev);
294 if (err)
295 return err;
296
297 err = amdgpu_ucode_validate(adev->mes.fw);
298 if (err) {
299 release_firmware(adev->mes.fw);
300 adev->mes.fw = NULL;
301 return err;
302 }
303
304 mes_hdr = (const struct mes_firmware_header_v1_0 *)adev->mes.fw->data;
305 adev->mes.ucode_fw_version = le32_to_cpu(mes_hdr->mes_ucode_version);
306 adev->mes.ucode_fw_version =
307 le32_to_cpu(mes_hdr->mes_ucode_data_version);
308 adev->mes.uc_start_addr =
309 le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) |
310 ((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32);
311 adev->mes.data_start_addr =
312 le32_to_cpu(mes_hdr->mes_data_start_addr_lo) |
313 ((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32);
314
315 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
316 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MES];
317 info->ucode_id = AMDGPU_UCODE_ID_CP_MES;
318 info->fw = adev->mes.fw;
319 adev->firmware.fw_size +=
320 ALIGN(le32_to_cpu(mes_hdr->mes_ucode_size_bytes),
321 PAGE_SIZE);
322
323 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MES_DATA];
324 info->ucode_id = AMDGPU_UCODE_ID_CP_MES_DATA;
325 info->fw = adev->mes.fw;
326 adev->firmware.fw_size +=
327 ALIGN(le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes),
328 PAGE_SIZE);
329 }
330
331 return 0;
332}
333
334static void mes_v10_1_free_microcode(struct amdgpu_device *adev)
335{
336 release_firmware(adev->mes.fw);
337 adev->mes.fw = NULL;
338}
339
340static int mes_v10_1_allocate_ucode_buffer(struct amdgpu_device *adev)
341{
342 int r;
343 const struct mes_firmware_header_v1_0 *mes_hdr;
344 const __le32 *fw_data;
345 unsigned fw_size;
346
347 mes_hdr = (const struct mes_firmware_header_v1_0 *)
348 adev->mes.fw->data;
349
350 fw_data = (const __le32 *)(adev->mes.fw->data +
351 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
352 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
353
354 r = amdgpu_bo_create_reserved(adev, fw_size,
355 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
356 &adev->mes.ucode_fw_obj,
357 &adev->mes.ucode_fw_gpu_addr,
358 (void **)&adev->mes.ucode_fw_ptr);
359 if (r) {
360 dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r);
361 return r;
362 }
363
364 memcpy(adev->mes.ucode_fw_ptr, fw_data, fw_size);
365
366 amdgpu_bo_kunmap(adev->mes.ucode_fw_obj);
367 amdgpu_bo_unreserve(adev->mes.ucode_fw_obj);
368
369 return 0;
370}
371
372static int mes_v10_1_allocate_ucode_data_buffer(struct amdgpu_device *adev)
373{
374 int r;
375 const struct mes_firmware_header_v1_0 *mes_hdr;
376 const __le32 *fw_data;
377 unsigned fw_size;
378
379 mes_hdr = (const struct mes_firmware_header_v1_0 *)
380 adev->mes.fw->data;
381
382 fw_data = (const __le32 *)(adev->mes.fw->data +
383 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
384 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
385
386 r = amdgpu_bo_create_reserved(adev, fw_size,
387 64 * 1024, AMDGPU_GEM_DOMAIN_GTT,
388 &adev->mes.data_fw_obj,
389 &adev->mes.data_fw_gpu_addr,
390 (void **)&adev->mes.data_fw_ptr);
391 if (r) {
392 dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r);
393 return r;
394 }
395
396 memcpy(adev->mes.data_fw_ptr, fw_data, fw_size);
397
398 amdgpu_bo_kunmap(adev->mes.data_fw_obj);
399 amdgpu_bo_unreserve(adev->mes.data_fw_obj);
400
401 return 0;
402}
403
404static void mes_v10_1_free_ucode_buffers(struct amdgpu_device *adev)
405{
406 amdgpu_bo_free_kernel(&adev->mes.data_fw_obj,
407 &adev->mes.data_fw_gpu_addr,
408 (void **)&adev->mes.data_fw_ptr);
409
410 amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj,
411 &adev->mes.ucode_fw_gpu_addr,
412 (void **)&adev->mes.ucode_fw_ptr);
413}
414
415static void mes_v10_1_enable(struct amdgpu_device *adev, bool enable)
416{
417 uint32_t data = 0;
418
419 if (enable) {
420 data = RREG32_SOC15(GC, 0, mmCP_MES_CNTL);
421 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
422 WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
423
424 /* set ucode start address */
425 WREG32_SOC15(GC, 0, mmCP_MES_PRGRM_CNTR_START,
426 (uint32_t)(adev->mes.uc_start_addr) >> 2);
427
428 /* clear BYPASS_UNCACHED to avoid hangs after interrupt. */
429 data = RREG32_SOC15(GC, 0, mmCP_MES_DC_OP_CNTL);
430 data = REG_SET_FIELD(data, CP_MES_DC_OP_CNTL,
431 BYPASS_UNCACHED, 0);
432 WREG32_SOC15(GC, 0, mmCP_MES_DC_OP_CNTL, data);
433
434 /* unhalt MES and activate pipe0 */
435 data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1);
436 WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
437 } else {
438 data = RREG32_SOC15(GC, 0, mmCP_MES_CNTL);
439 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0);
440 data = REG_SET_FIELD(data, CP_MES_CNTL,
441 MES_INVALIDATE_ICACHE, 1);
442 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
443 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1);
444 WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
445 }
446}
447
448/* This function is for backdoor MES firmware */
449static int mes_v10_1_load_microcode(struct amdgpu_device *adev)
450{
451 int r;
452 uint32_t data;
453
454 if (!adev->mes.fw)
455 return -EINVAL;
456
457 r = mes_v10_1_allocate_ucode_buffer(adev);
458 if (r)
459 return r;
460
461 r = mes_v10_1_allocate_ucode_data_buffer(adev);
462 if (r) {
463 mes_v10_1_free_ucode_buffers(adev);
464 return r;
465 }
466
467 mes_v10_1_enable(adev, false);
468
469 WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_CNTL, 0);
470
471 mutex_lock(&adev->srbm_mutex);
472 /* me=3, pipe=0, queue=0 */
473 nv_grbm_select(adev, 3, 0, 0, 0);
474
475 /* set ucode start address */
476 WREG32_SOC15(GC, 0, mmCP_MES_PRGRM_CNTR_START,
477 (uint32_t)(adev->mes.uc_start_addr) >> 2);
478
479 /* set ucode fimrware address */
480 WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_LO,
481 lower_32_bits(adev->mes.ucode_fw_gpu_addr));
482 WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_HI,
483 upper_32_bits(adev->mes.ucode_fw_gpu_addr));
484
485 /* set ucode instruction cache boundary to 2M-1 */
486 WREG32_SOC15(GC, 0, mmCP_MES_MIBOUND_LO, 0x1FFFFF);
487
488 /* set ucode data firmware address */
489 WREG32_SOC15(GC, 0, mmCP_MES_MDBASE_LO,
490 lower_32_bits(adev->mes.data_fw_gpu_addr));
491 WREG32_SOC15(GC, 0, mmCP_MES_MDBASE_HI,
492 upper_32_bits(adev->mes.data_fw_gpu_addr));
493
494 /* Set 0x3FFFF (256K-1) to CP_MES_MDBOUND_LO */
495 WREG32_SOC15(GC, 0, mmCP_MES_MDBOUND_LO, 0x3FFFF);
496
497 /* invalidate ICACHE */
498 switch (adev->asic_type) {
499 case CHIP_SIENNA_CICHLID:
500 data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid);
501 break;
502 default:
503 data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL);
504 break;
505 }
506 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
507 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
508 switch (adev->asic_type) {
509 case CHIP_SIENNA_CICHLID:
510 WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid, data);
511 break;
512 default:
513 WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data);
514 break;
515 }
516
517 /* prime the ICACHE. */
518 switch (adev->asic_type) {
519 case CHIP_SIENNA_CICHLID:
520 data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid);
521 break;
522 default:
523 data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL);
524 break;
525 }
526 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
527 switch (adev->asic_type) {
528 case CHIP_SIENNA_CICHLID:
529 WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid, data);
530 break;
531 default:
532 WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data);
533 break;
534 }
535
536 nv_grbm_select(adev, 0, 0, 0, 0);
537 mutex_unlock(&adev->srbm_mutex);
538
539 return 0;
540}
541
542static int mes_v10_1_allocate_eop_buf(struct amdgpu_device *adev)
543{
544 int r;
545 u32 *eop;
546
547 r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE,
548 AMDGPU_GEM_DOMAIN_GTT,
549 &adev->mes.eop_gpu_obj,
550 &adev->mes.eop_gpu_addr,
551 (void **)&eop);
552 if (r) {
553 dev_warn(adev->dev, "(%d) create EOP bo failed\n", r);
554 return r;
555 }
556
557 memset(eop, 0, adev->mes.eop_gpu_obj->tbo.mem.size);
558
559 amdgpu_bo_kunmap(adev->mes.eop_gpu_obj);
560 amdgpu_bo_unreserve(adev->mes.eop_gpu_obj);
561
562 return 0;
563}
564
565static int mes_v10_1_allocate_mem_slots(struct amdgpu_device *adev)
566{
567 int r;
568
569 r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs);
570 if (r) {
571 dev_err(adev->dev,
572 "(%d) mes sch_ctx_offs wb alloc failed\n", r);
573 return r;
574 }
575 adev->mes.sch_ctx_gpu_addr =
576 adev->wb.gpu_addr + (adev->mes.sch_ctx_offs * 4);
577 adev->mes.sch_ctx_ptr =
578 (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs];
579
580 r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs);
581 if (r) {
582 dev_err(adev->dev,
583 "(%d) query_status_fence_offs wb alloc failed\n", r);
584 return r;
585 }
586 adev->mes.query_status_fence_gpu_addr =
587 adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4);
588 adev->mes.query_status_fence_ptr =
589 (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs];
590
591 return 0;
592}
593
594static int mes_v10_1_mqd_init(struct amdgpu_ring *ring)
595{
596 struct amdgpu_device *adev = ring->adev;
597 struct v10_compute_mqd *mqd = ring->mqd_ptr;
598 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
599 uint32_t tmp;
600
601 mqd->header = 0xC0310800;
602 mqd->compute_pipelinestat_enable = 0x00000001;
603 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
604 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
605 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
606 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
607 mqd->compute_misc_reserved = 0x00000003;
608
609 eop_base_addr = ring->eop_gpu_addr >> 8;
610 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
611 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
612
613 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
614 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
615 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
616 (order_base_2(MES_EOP_SIZE / 4) - 1));
617
618 mqd->cp_hqd_eop_control = tmp;
619
620 /* enable doorbell? */
621 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
622
623 if (ring->use_doorbell) {
624 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
625 DOORBELL_OFFSET, ring->doorbell_index);
626 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
627 DOORBELL_EN, 1);
628 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
629 DOORBELL_SOURCE, 0);
630 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
631 DOORBELL_HIT, 0);
632 }
633 else
634 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
635 DOORBELL_EN, 0);
636
637 mqd->cp_hqd_pq_doorbell_control = tmp;
638
639 /* disable the queue if it's active */
640 ring->wptr = 0;
641 mqd->cp_hqd_dequeue_request = 0;
642 mqd->cp_hqd_pq_rptr = 0;
643 mqd->cp_hqd_pq_wptr_lo = 0;
644 mqd->cp_hqd_pq_wptr_hi = 0;
645
646 /* set the pointer to the MQD */
647 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
648 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
649
650 /* set MQD vmid to 0 */
651 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
652 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
653 mqd->cp_mqd_control = tmp;
654
655 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
656 hqd_gpu_addr = ring->gpu_addr >> 8;
657 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
658 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
659
660 /* set up the HQD, this is similar to CP_RB0_CNTL */
661 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
662 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
663 (order_base_2(ring->ring_size / 4) - 1));
664 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
665 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
666#ifdef __BIG_ENDIAN
667 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
668#endif
669 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
670 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
671 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
672 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
673 mqd->cp_hqd_pq_control = tmp;
674
675 /* set the wb address whether it's enabled or not */
676 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
677 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
678 mqd->cp_hqd_pq_rptr_report_addr_hi =
679 upper_32_bits(wb_gpu_addr) & 0xffff;
680
681 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
682 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
683 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8;
684 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
685
686 tmp = 0;
687 /* enable the doorbell if requested */
688 if (ring->use_doorbell) {
689 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
690 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
691 DOORBELL_OFFSET, ring->doorbell_index);
692
693 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
694 DOORBELL_EN, 1);
695 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
696 DOORBELL_SOURCE, 0);
697 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
698 DOORBELL_HIT, 0);
699 }
700
701 mqd->cp_hqd_pq_doorbell_control = tmp;
702
703 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
704 ring->wptr = 0;
705 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
706
707 /* set the vmid for the queue */
708 mqd->cp_hqd_vmid = 0;
709
710 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
711 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
712 mqd->cp_hqd_persistent_state = tmp;
713
714 /* set MIN_IB_AVAIL_SIZE */
715 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
716 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
717 mqd->cp_hqd_ib_control = tmp;
718
719 /* activate the queue */
720 mqd->cp_hqd_active = 1;
721 return 0;
722}
723
724static void mes_v10_1_queue_init_register(struct amdgpu_ring *ring)
725{
726 struct v10_compute_mqd *mqd = ring->mqd_ptr;
727 struct amdgpu_device *adev = ring->adev;
728 uint32_t data = 0;
729
730 mutex_lock(&adev->srbm_mutex);
731 nv_grbm_select(adev, 3, 0, 0, 0);
732
733 /* set CP_HQD_VMID.VMID = 0. */
734 data = RREG32_SOC15(GC, 0, mmCP_HQD_VMID);
735 data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0);
736 WREG32_SOC15(GC, 0, mmCP_HQD_VMID, data);
737
738 /* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */
739 data = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
740 data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
741 DOORBELL_EN, 0);
742 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, data);
743
744 /* set CP_MQD_BASE_ADDR/HI with the MQD base address */
745 WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
746 WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
747
748 /* set CP_MQD_CONTROL.VMID=0 */
749 data = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
750 data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0);
751 WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, 0);
752
753 /* set CP_HQD_PQ_BASE/HI with the ring buffer base address */
754 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
755 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
756
757 /* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */
758 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
759 mqd->cp_hqd_pq_rptr_report_addr_lo);
760 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
761 mqd->cp_hqd_pq_rptr_report_addr_hi);
762
763 /* set CP_HQD_PQ_CONTROL */
764 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
765
766 /* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */
767 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
768 mqd->cp_hqd_pq_wptr_poll_addr_lo);
769 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
770 mqd->cp_hqd_pq_wptr_poll_addr_hi);
771
772 /* set CP_HQD_PQ_DOORBELL_CONTROL */
773 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
774 mqd->cp_hqd_pq_doorbell_control);
775
776 /* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */
777 WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
778
779 /* set CP_HQD_ACTIVE.ACTIVE=1 */
780 WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
781
782 nv_grbm_select(adev, 0, 0, 0, 0);
783 mutex_unlock(&adev->srbm_mutex);
784}
785
786#if 0
787static int mes_v10_1_kiq_enable_queue(struct amdgpu_device *adev)
788{
789 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
790 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
791 int r;
792
793 if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
794 return -EINVAL;
795
796 r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
797 if (r) {
798 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
799 return r;
800 }
801
802 kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring);
803
804 r = amdgpu_ring_test_ring(kiq_ring);
805 if (r) {
806 DRM_ERROR("kfq enable failed\n");
807 kiq_ring->sched.ready = false;
808 }
809 return r;
810}
811#endif
812
813static int mes_v10_1_queue_init(struct amdgpu_device *adev)
814{
815 int r;
816
817 r = mes_v10_1_mqd_init(&adev->mes.ring);
818 if (r)
819 return r;
820
821#if 0
822 r = mes_v10_1_kiq_enable_queue(adev);
823 if (r)
824 return r;
825#else
826 mes_v10_1_queue_init_register(&adev->mes.ring);
827#endif
828
829 return 0;
830}
831
832static int mes_v10_1_ring_init(struct amdgpu_device *adev)
833{
834 struct amdgpu_ring *ring;
835 int r;
836
837 ring = &adev->mes.ring;
838
839 ring->funcs = &mes_v10_1_ring_funcs;
840
841 ring->me = 3;
842 ring->pipe = 0;
843 ring->queue = 0;
844
845 ring->ring_obj = NULL;
846 ring->use_doorbell = true;
847 ring->doorbell_index = adev->doorbell_index.mes_ring << 1;
848 ring->eop_gpu_addr = adev->mes.eop_gpu_addr;
849 ring->no_scheduler = true;
850 sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue);
851
852 r = amdgpu_ring_init(adev, ring, 1024, NULL, 0, AMDGPU_RING_PRIO_DEFAULT);
853 if (r)
854 return r;
855
856 return 0;
857}
858
859static int mes_v10_1_mqd_sw_init(struct amdgpu_device *adev)
860{
861 int r, mqd_size = sizeof(struct v10_compute_mqd);
862 struct amdgpu_ring *ring = &adev->mes.ring;
863
864 if (ring->mqd_obj)
865 return 0;
866
867 r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
868 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
869 &ring->mqd_gpu_addr, &ring->mqd_ptr);
870 if (r) {
871 dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
872 return r;
873 }
874
875 /* prepare MQD backup */
876 adev->mes.mqd_backup = kmalloc(mqd_size, GFP_KERNEL);
877 if (!adev->mes.mqd_backup)
878 dev_warn(adev->dev,
879 "no memory to create MQD backup for ring %s\n",
880 ring->name);
881
882 return 0;
883}
884
885static int mes_v10_1_sw_init(void *handle)
886{
887 int r;
888 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
889
890 adev->mes.adev = adev;
891 adev->mes.funcs = &mes_v10_1_funcs;
892
893 r = mes_v10_1_init_microcode(adev);
894 if (r)
895 return r;
896
897 r = mes_v10_1_allocate_eop_buf(adev);
898 if (r)
899 return r;
900
901 r = mes_v10_1_mqd_sw_init(adev);
902 if (r)
903 return r;
904
905 r = mes_v10_1_ring_init(adev);
906 if (r)
907 return r;
908
909 r = mes_v10_1_allocate_mem_slots(adev);
910 if (r)
911 return r;
912
913 return 0;
914}
915
916static int mes_v10_1_sw_fini(void *handle)
917{
918 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
919
920 amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
921 amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
922
923 kfree(adev->mes.mqd_backup);
924
925 amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj,
926 &adev->mes.ring.mqd_gpu_addr,
927 &adev->mes.ring.mqd_ptr);
928
929 amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj,
930 &adev->mes.eop_gpu_addr,
931 NULL);
932
933 mes_v10_1_free_microcode(adev);
934
935 return 0;
936}
937
938static int mes_v10_1_hw_init(void *handle)
939{
940 int r;
941 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
942
943 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
944 r = mes_v10_1_load_microcode(adev);
945 if (r) {
946 DRM_ERROR("failed to MES fw, r=%d\n", r);
947 return r;
948 }
949 }
950
951 mes_v10_1_enable(adev, true);
952
953 r = mes_v10_1_queue_init(adev);
954 if (r)
955 goto failure;
956
957 r = mes_v10_1_set_hw_resources(&adev->mes);
958 if (r)
959 goto failure;
960
961 r = mes_v10_1_query_sched_status(&adev->mes);
962 if (r) {
963 DRM_ERROR("MES is busy\n");
964 goto failure;
965 }
966
967 return 0;
968
969failure:
970 mes_v10_1_hw_fini(adev);
971 return r;
972}
973
974static int mes_v10_1_hw_fini(void *handle)
975{
976 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
977
978 mes_v10_1_enable(adev, false);
979
980 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)
981 mes_v10_1_free_ucode_buffers(adev);
982
983 return 0;
984}
985
986static int mes_v10_1_suspend(void *handle)
987{
988 return 0;
989}
990
991static int mes_v10_1_resume(void *handle)
992{
993 return 0;
994}
995
996static const struct amd_ip_funcs mes_v10_1_ip_funcs = {
997 .name = "mes_v10_1",
998 .sw_init = mes_v10_1_sw_init,
999 .sw_fini = mes_v10_1_sw_fini,
1000 .hw_init = mes_v10_1_hw_init,
1001 .hw_fini = mes_v10_1_hw_fini,
1002 .suspend = mes_v10_1_suspend,
1003 .resume = mes_v10_1_resume,
1004};
1005
1006const struct amdgpu_ip_block_version mes_v10_1_ip_block = {
1007 .type = AMD_IP_BLOCK_TYPE_MES,
1008 .major = 10,
1009 .minor = 1,
1010 .rev = 0,
1011 .funcs = &mes_v10_1_ip_funcs,
1012};