Loading...
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24#include <linux/types.h>
25#include <linux/kernel.h>
26#include <linux/log2.h>
27#include <linux/sched.h>
28#include <linux/slab.h>
29#include <linux/mutex.h>
30#include <linux/device.h>
31
32#include "kfd_pm4_headers.h"
33#include "kfd_pm4_headers_diq.h"
34#include "kfd_kernel_queue.h"
35#include "kfd_priv.h"
36#include "kfd_pm4_opcodes.h"
37#include "cik_regs.h"
38#include "kfd_dbgmgr.h"
39#include "kfd_dbgdev.h"
40#include "kfd_device_queue_manager.h"
41
42static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
43{
44 dev->kfd2kgd->address_watch_disable(dev->kgd);
45}
46
47static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
48 unsigned int pasid, uint64_t vmid0_address,
49 uint32_t *packet_buff, size_t size_in_bytes)
50{
51 struct pm4__release_mem *rm_packet;
52 struct pm4__indirect_buffer_pasid *ib_packet;
53 struct kfd_mem_obj *mem_obj;
54 size_t pq_packets_size_in_bytes;
55 union ULARGE_INTEGER *largep;
56 union ULARGE_INTEGER addr;
57 struct kernel_queue *kq;
58 uint64_t *rm_state;
59 unsigned int *ib_packet_buff;
60 int status;
61
62 if (WARN_ON(!size_in_bytes))
63 return -EINVAL;
64
65 kq = dbgdev->kq;
66
67 pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) +
68 sizeof(struct pm4__indirect_buffer_pasid);
69
70 /*
71 * We acquire a buffer from DIQ
72 * The receive packet buff will be sitting on the Indirect Buffer
73 * and in the PQ we put the IB packet + sync packet(s).
74 */
75 status = kq_acquire_packet_buffer(kq,
76 pq_packets_size_in_bytes / sizeof(uint32_t),
77 &ib_packet_buff);
78 if (status) {
79 pr_err("kq_acquire_packet_buffer failed\n");
80 return status;
81 }
82
83 memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
84
85 ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
86
87 ib_packet->header.count = 3;
88 ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
89 ib_packet->header.type = PM4_TYPE_3;
90
91 largep = (union ULARGE_INTEGER *) &vmid0_address;
92
93 ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
94 ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
95
96 ib_packet->control = (1 << 23) | (1 << 31) |
97 ((size_in_bytes / 4) & 0xfffff);
98
99 ib_packet->bitfields5.pasid = pasid;
100
101 /*
102 * for now we use release mem for GPU-CPU synchronization
103 * Consider WaitRegMem + WriteData as a better alternative
104 * we get a GART allocations ( gpu/cpu mapping),
105 * for the sync variable, and wait until:
106 * (a) Sync with HW
107 * (b) Sync var is written by CP to mem.
108 */
109 rm_packet = (struct pm4__release_mem *) (ib_packet_buff +
110 (sizeof(struct pm4__indirect_buffer_pasid) /
111 sizeof(unsigned int)));
112
113 status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
114 &mem_obj);
115
116 if (status) {
117 pr_err("Failed to allocate GART memory\n");
118 kq_rollback_packet(kq);
119 return status;
120 }
121
122 rm_state = (uint64_t *) mem_obj->cpu_ptr;
123
124 *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
125
126 rm_packet->header.opcode = IT_RELEASE_MEM;
127 rm_packet->header.type = PM4_TYPE_3;
128 rm_packet->header.count = sizeof(struct pm4__release_mem) / 4 - 2;
129
130 rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
131 rm_packet->bitfields2.event_index =
132 event_index___release_mem__end_of_pipe;
133
134 rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
135 rm_packet->bitfields2.atc = 0;
136 rm_packet->bitfields2.tc_wb_action_ena = 1;
137
138 addr.quad_part = mem_obj->gpu_addr;
139
140 rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
141 rm_packet->address_hi = addr.u.high_part;
142
143 rm_packet->bitfields3.data_sel =
144 data_sel___release_mem__send_64_bit_data;
145
146 rm_packet->bitfields3.int_sel =
147 int_sel___release_mem__send_data_after_write_confirm;
148
149 rm_packet->bitfields3.dst_sel =
150 dst_sel___release_mem__memory_controller;
151
152 rm_packet->data_lo = QUEUESTATE__ACTIVE;
153
154 kq_submit_packet(kq);
155
156 /* Wait till CP writes sync code: */
157 status = amdkfd_fence_wait_timeout(
158 (unsigned int *) rm_state,
159 QUEUESTATE__ACTIVE, 1500);
160
161 kfd_gtt_sa_free(dbgdev->dev, mem_obj);
162
163 return status;
164}
165
166static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
167{
168 /*
169 * no action is needed in this case,
170 * just make sure diq will not be used
171 */
172
173 dbgdev->kq = NULL;
174
175 return 0;
176}
177
178static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
179{
180 struct queue_properties properties;
181 unsigned int qid;
182 struct kernel_queue *kq = NULL;
183 int status;
184
185 properties.type = KFD_QUEUE_TYPE_DIQ;
186
187 status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
188 &properties, &qid, NULL);
189
190 if (status) {
191 pr_err("Failed to create DIQ\n");
192 return status;
193 }
194
195 pr_debug("DIQ Created with queue id: %d\n", qid);
196
197 kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
198
199 if (!kq) {
200 pr_err("Error getting DIQ\n");
201 pqm_destroy_queue(dbgdev->pqm, qid);
202 return -EFAULT;
203 }
204
205 dbgdev->kq = kq;
206
207 return status;
208}
209
210static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
211{
212 /* disable watch address */
213 dbgdev_address_watch_disable_nodiq(dbgdev->dev);
214 return 0;
215}
216
217static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
218{
219 /* todo - disable address watch */
220 int status;
221
222 status = pqm_destroy_queue(dbgdev->pqm,
223 dbgdev->kq->queue->properties.queue_id);
224 dbgdev->kq = NULL;
225
226 return status;
227}
228
229static void dbgdev_address_watch_set_registers(
230 const struct dbg_address_watch_info *adw_info,
231 union TCP_WATCH_ADDR_H_BITS *addrHi,
232 union TCP_WATCH_ADDR_L_BITS *addrLo,
233 union TCP_WATCH_CNTL_BITS *cntl,
234 unsigned int index, unsigned int vmid)
235{
236 union ULARGE_INTEGER addr;
237
238 addr.quad_part = 0;
239 addrHi->u32All = 0;
240 addrLo->u32All = 0;
241 cntl->u32All = 0;
242
243 if (adw_info->watch_mask)
244 cntl->bitfields.mask =
245 (uint32_t) (adw_info->watch_mask[index] &
246 ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
247 else
248 cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
249
250 addr.quad_part = (unsigned long long) adw_info->watch_address[index];
251
252 addrHi->bitfields.addr = addr.u.high_part &
253 ADDRESS_WATCH_REG_ADDHIGH_MASK;
254 addrLo->bitfields.addr =
255 (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT);
256
257 cntl->bitfields.mode = adw_info->watch_mode[index];
258 cntl->bitfields.vmid = (uint32_t) vmid;
259 /* for now assume it is an ATC address */
260 cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT;
261
262 pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask);
263 pr_debug("\t\t%20s %08x\n", "set reg add high :",
264 addrHi->bitfields.addr);
265 pr_debug("\t\t%20s %08x\n", "set reg add low :",
266 addrLo->bitfields.addr);
267}
268
269static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
270 struct dbg_address_watch_info *adw_info)
271{
272 union TCP_WATCH_ADDR_H_BITS addrHi;
273 union TCP_WATCH_ADDR_L_BITS addrLo;
274 union TCP_WATCH_CNTL_BITS cntl;
275 struct kfd_process_device *pdd;
276 unsigned int i;
277
278 /* taking the vmid for that process on the safe way using pdd */
279 pdd = kfd_get_process_device_data(dbgdev->dev,
280 adw_info->process);
281 if (!pdd) {
282 pr_err("Failed to get pdd for wave control no DIQ\n");
283 return -EFAULT;
284 }
285
286 addrHi.u32All = 0;
287 addrLo.u32All = 0;
288 cntl.u32All = 0;
289
290 if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
291 (adw_info->num_watch_points == 0)) {
292 pr_err("num_watch_points is invalid\n");
293 return -EINVAL;
294 }
295
296 if (!adw_info->watch_mode || !adw_info->watch_address) {
297 pr_err("adw_info fields are not valid\n");
298 return -EINVAL;
299 }
300
301 for (i = 0; i < adw_info->num_watch_points; i++) {
302 dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
303 &cntl, i, pdd->qpd.vmid);
304
305 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
306 pr_debug("\t\t%20s %08x\n", "register index :", i);
307 pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid);
308 pr_debug("\t\t%20s %08x\n", "Address Low is :",
309 addrLo.bitfields.addr);
310 pr_debug("\t\t%20s %08x\n", "Address high is :",
311 addrHi.bitfields.addr);
312 pr_debug("\t\t%20s %08x\n", "Address high is :",
313 addrHi.bitfields.addr);
314 pr_debug("\t\t%20s %08x\n", "Control Mask is :",
315 cntl.bitfields.mask);
316 pr_debug("\t\t%20s %08x\n", "Control Mode is :",
317 cntl.bitfields.mode);
318 pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
319 cntl.bitfields.vmid);
320 pr_debug("\t\t%20s %08x\n", "Control atc is :",
321 cntl.bitfields.atc);
322 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
323
324 pdd->dev->kfd2kgd->address_watch_execute(
325 dbgdev->dev->kgd,
326 i,
327 cntl.u32All,
328 addrHi.u32All,
329 addrLo.u32All);
330 }
331
332 return 0;
333}
334
335static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
336 struct dbg_address_watch_info *adw_info)
337{
338 struct pm4__set_config_reg *packets_vec;
339 union TCP_WATCH_ADDR_H_BITS addrHi;
340 union TCP_WATCH_ADDR_L_BITS addrLo;
341 union TCP_WATCH_CNTL_BITS cntl;
342 struct kfd_mem_obj *mem_obj;
343 unsigned int aw_reg_add_dword;
344 uint32_t *packet_buff_uint;
345 unsigned int i;
346 int status;
347 size_t ib_size = sizeof(struct pm4__set_config_reg) * 4;
348 /* we do not control the vmid in DIQ mode, just a place holder */
349 unsigned int vmid = 0;
350
351 addrHi.u32All = 0;
352 addrLo.u32All = 0;
353 cntl.u32All = 0;
354
355 if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
356 (adw_info->num_watch_points == 0)) {
357 pr_err("num_watch_points is invalid\n");
358 return -EINVAL;
359 }
360
361 if (!adw_info->watch_mode || !adw_info->watch_address) {
362 pr_err("adw_info fields are not valid\n");
363 return -EINVAL;
364 }
365
366 status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
367
368 if (status) {
369 pr_err("Failed to allocate GART memory\n");
370 return status;
371 }
372
373 packet_buff_uint = mem_obj->cpu_ptr;
374
375 memset(packet_buff_uint, 0, ib_size);
376
377 packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
378
379 packets_vec[0].header.count = 1;
380 packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
381 packets_vec[0].header.type = PM4_TYPE_3;
382 packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
383 packets_vec[0].bitfields2.insert_vmid = 1;
384 packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
385 packets_vec[1].bitfields2.insert_vmid = 0;
386 packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
387 packets_vec[2].bitfields2.insert_vmid = 0;
388 packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
389 packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
390 packets_vec[3].bitfields2.insert_vmid = 1;
391
392 for (i = 0; i < adw_info->num_watch_points; i++) {
393 dbgdev_address_watch_set_registers(adw_info,
394 &addrHi,
395 &addrLo,
396 &cntl,
397 i,
398 vmid);
399
400 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
401 pr_debug("\t\t%20s %08x\n", "register index :", i);
402 pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
403 pr_debug("\t\t%20s %p\n", "Add ptr is :",
404 adw_info->watch_address);
405 pr_debug("\t\t%20s %08llx\n", "Add is :",
406 adw_info->watch_address[i]);
407 pr_debug("\t\t%20s %08x\n", "Address Low is :",
408 addrLo.bitfields.addr);
409 pr_debug("\t\t%20s %08x\n", "Address high is :",
410 addrHi.bitfields.addr);
411 pr_debug("\t\t%20s %08x\n", "Control Mask is :",
412 cntl.bitfields.mask);
413 pr_debug("\t\t%20s %08x\n", "Control Mode is :",
414 cntl.bitfields.mode);
415 pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
416 cntl.bitfields.vmid);
417 pr_debug("\t\t%20s %08x\n", "Control atc is :",
418 cntl.bitfields.atc);
419 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
420
421 aw_reg_add_dword =
422 dbgdev->dev->kfd2kgd->address_watch_get_offset(
423 dbgdev->dev->kgd,
424 i,
425 ADDRESS_WATCH_REG_CNTL);
426
427 packets_vec[0].bitfields2.reg_offset =
428 aw_reg_add_dword - AMD_CONFIG_REG_BASE;
429
430 packets_vec[0].reg_data[0] = cntl.u32All;
431
432 aw_reg_add_dword =
433 dbgdev->dev->kfd2kgd->address_watch_get_offset(
434 dbgdev->dev->kgd,
435 i,
436 ADDRESS_WATCH_REG_ADDR_HI);
437
438 packets_vec[1].bitfields2.reg_offset =
439 aw_reg_add_dword - AMD_CONFIG_REG_BASE;
440 packets_vec[1].reg_data[0] = addrHi.u32All;
441
442 aw_reg_add_dword =
443 dbgdev->dev->kfd2kgd->address_watch_get_offset(
444 dbgdev->dev->kgd,
445 i,
446 ADDRESS_WATCH_REG_ADDR_LO);
447
448 packets_vec[2].bitfields2.reg_offset =
449 aw_reg_add_dword - AMD_CONFIG_REG_BASE;
450 packets_vec[2].reg_data[0] = addrLo.u32All;
451
452 /* enable watch flag if address is not zero*/
453 if (adw_info->watch_address[i] > 0)
454 cntl.bitfields.valid = 1;
455 else
456 cntl.bitfields.valid = 0;
457
458 aw_reg_add_dword =
459 dbgdev->dev->kfd2kgd->address_watch_get_offset(
460 dbgdev->dev->kgd,
461 i,
462 ADDRESS_WATCH_REG_CNTL);
463
464 packets_vec[3].bitfields2.reg_offset =
465 aw_reg_add_dword - AMD_CONFIG_REG_BASE;
466 packets_vec[3].reg_data[0] = cntl.u32All;
467
468 status = dbgdev_diq_submit_ib(
469 dbgdev,
470 adw_info->process->pasid,
471 mem_obj->gpu_addr,
472 packet_buff_uint,
473 ib_size);
474
475 if (status) {
476 pr_err("Failed to submit IB to DIQ\n");
477 break;
478 }
479 }
480
481 kfd_gtt_sa_free(dbgdev->dev, mem_obj);
482 return status;
483}
484
485static int dbgdev_wave_control_set_registers(
486 struct dbg_wave_control_info *wac_info,
487 union SQ_CMD_BITS *in_reg_sq_cmd,
488 union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
489{
490 int status = 0;
491 union SQ_CMD_BITS reg_sq_cmd;
492 union GRBM_GFX_INDEX_BITS reg_gfx_index;
493 struct HsaDbgWaveMsgAMDGen2 *pMsg;
494
495 reg_sq_cmd.u32All = 0;
496 reg_gfx_index.u32All = 0;
497 pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
498
499 switch (wac_info->mode) {
500 /* Send command to single wave */
501 case HSA_DBG_WAVEMODE_SINGLE:
502 /*
503 * Limit access to the process waves only,
504 * by setting vmid check
505 */
506 reg_sq_cmd.bits.check_vmid = 1;
507 reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
508 reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
509 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
510
511 reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
512 reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
513 reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
514
515 break;
516
517 /* Send command to all waves with matching VMID */
518 case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
519
520 reg_gfx_index.bits.sh_broadcast_writes = 1;
521 reg_gfx_index.bits.se_broadcast_writes = 1;
522 reg_gfx_index.bits.instance_broadcast_writes = 1;
523
524 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
525
526 break;
527
528 /* Send command to all CU waves with matching VMID */
529 case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
530
531 reg_sq_cmd.bits.check_vmid = 1;
532 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
533
534 reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
535 reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
536 reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
537
538 break;
539
540 default:
541 return -EINVAL;
542 }
543
544 switch (wac_info->operand) {
545 case HSA_DBG_WAVEOP_HALT:
546 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT;
547 break;
548
549 case HSA_DBG_WAVEOP_RESUME:
550 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME;
551 break;
552
553 case HSA_DBG_WAVEOP_KILL:
554 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
555 break;
556
557 case HSA_DBG_WAVEOP_DEBUG:
558 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG;
559 break;
560
561 case HSA_DBG_WAVEOP_TRAP:
562 if (wac_info->trapId < MAX_TRAPID) {
563 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP;
564 reg_sq_cmd.bits.trap_id = wac_info->trapId;
565 } else {
566 status = -EINVAL;
567 }
568 break;
569
570 default:
571 status = -EINVAL;
572 break;
573 }
574
575 if (status == 0) {
576 *in_reg_sq_cmd = reg_sq_cmd;
577 *in_reg_gfx_index = reg_gfx_index;
578 }
579
580 return status;
581}
582
583static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
584 struct dbg_wave_control_info *wac_info)
585{
586
587 int status;
588 union SQ_CMD_BITS reg_sq_cmd;
589 union GRBM_GFX_INDEX_BITS reg_gfx_index;
590 struct kfd_mem_obj *mem_obj;
591 uint32_t *packet_buff_uint;
592 struct pm4__set_config_reg *packets_vec;
593 size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
594
595 reg_sq_cmd.u32All = 0;
596
597 status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd,
598 ®_gfx_index);
599 if (status) {
600 pr_err("Failed to set wave control registers\n");
601 return status;
602 }
603
604 /* we do not control the VMID in DIQ, so reset it to a known value */
605 reg_sq_cmd.bits.vm_id = 0;
606
607 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
608
609 pr_debug("\t\t mode is: %u\n", wac_info->mode);
610 pr_debug("\t\t operand is: %u\n", wac_info->operand);
611 pr_debug("\t\t trap id is: %u\n", wac_info->trapId);
612 pr_debug("\t\t msg value is: %u\n",
613 wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
614 pr_debug("\t\t vmid is: N/A\n");
615
616 pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid);
617 pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd);
618 pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id);
619 pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id);
620 pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode);
621 pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id);
622 pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id);
623
624 pr_debug("\t\t ibw is : %u\n",
625 reg_gfx_index.bitfields.instance_broadcast_writes);
626 pr_debug("\t\t ii is : %u\n",
627 reg_gfx_index.bitfields.instance_index);
628 pr_debug("\t\t sebw is : %u\n",
629 reg_gfx_index.bitfields.se_broadcast_writes);
630 pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index);
631 pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index);
632 pr_debug("\t\t sbw is : %u\n",
633 reg_gfx_index.bitfields.sh_broadcast_writes);
634
635 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
636
637 status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
638
639 if (status != 0) {
640 pr_err("Failed to allocate GART memory\n");
641 return status;
642 }
643
644 packet_buff_uint = mem_obj->cpu_ptr;
645
646 memset(packet_buff_uint, 0, ib_size);
647
648 packets_vec = (struct pm4__set_config_reg *) packet_buff_uint;
649 packets_vec[0].header.count = 1;
650 packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
651 packets_vec[0].header.type = PM4_TYPE_3;
652 packets_vec[0].bitfields2.reg_offset =
653 GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
654
655 packets_vec[0].bitfields2.insert_vmid = 0;
656 packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
657
658 packets_vec[1].header.count = 1;
659 packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
660 packets_vec[1].header.type = PM4_TYPE_3;
661 packets_vec[1].bitfields2.reg_offset = SQ_CMD / 4 - AMD_CONFIG_REG_BASE;
662
663 packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
664 packets_vec[1].bitfields2.insert_vmid = 1;
665 packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
666
667 /* Restore the GRBM_GFX_INDEX register */
668
669 reg_gfx_index.u32All = 0;
670 reg_gfx_index.bits.sh_broadcast_writes = 1;
671 reg_gfx_index.bits.instance_broadcast_writes = 1;
672 reg_gfx_index.bits.se_broadcast_writes = 1;
673
674
675 packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
676 packets_vec[2].bitfields2.reg_offset =
677 GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
678
679 packets_vec[2].bitfields2.insert_vmid = 0;
680 packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
681
682 status = dbgdev_diq_submit_ib(
683 dbgdev,
684 wac_info->process->pasid,
685 mem_obj->gpu_addr,
686 packet_buff_uint,
687 ib_size);
688
689 if (status)
690 pr_err("Failed to submit IB to DIQ\n");
691
692 kfd_gtt_sa_free(dbgdev->dev, mem_obj);
693
694 return status;
695}
696
697static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
698 struct dbg_wave_control_info *wac_info)
699{
700 int status;
701 union SQ_CMD_BITS reg_sq_cmd;
702 union GRBM_GFX_INDEX_BITS reg_gfx_index;
703 struct kfd_process_device *pdd;
704
705 reg_sq_cmd.u32All = 0;
706
707 /* taking the VMID for that process on the safe way using PDD */
708 pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
709
710 if (!pdd) {
711 pr_err("Failed to get pdd for wave control no DIQ\n");
712 return -EFAULT;
713 }
714 status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd,
715 ®_gfx_index);
716 if (status) {
717 pr_err("Failed to set wave control registers\n");
718 return status;
719 }
720
721 /* for non DIQ we need to patch the VMID: */
722
723 reg_sq_cmd.bits.vm_id = pdd->qpd.vmid;
724
725 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
726
727 pr_debug("\t\t mode is: %u\n", wac_info->mode);
728 pr_debug("\t\t operand is: %u\n", wac_info->operand);
729 pr_debug("\t\t trap id is: %u\n", wac_info->trapId);
730 pr_debug("\t\t msg value is: %u\n",
731 wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
732 pr_debug("\t\t vmid is: %u\n", pdd->qpd.vmid);
733
734 pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid);
735 pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd);
736 pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id);
737 pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id);
738 pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode);
739 pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id);
740 pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id);
741
742 pr_debug("\t\t ibw is : %u\n",
743 reg_gfx_index.bitfields.instance_broadcast_writes);
744 pr_debug("\t\t ii is : %u\n",
745 reg_gfx_index.bitfields.instance_index);
746 pr_debug("\t\t sebw is : %u\n",
747 reg_gfx_index.bitfields.se_broadcast_writes);
748 pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index);
749 pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index);
750 pr_debug("\t\t sbw is : %u\n",
751 reg_gfx_index.bitfields.sh_broadcast_writes);
752
753 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
754
755 return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd,
756 reg_gfx_index.u32All,
757 reg_sq_cmd.u32All);
758}
759
760int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
761{
762 int status = 0;
763 unsigned int vmid;
764 uint16_t queried_pasid;
765 union SQ_CMD_BITS reg_sq_cmd;
766 union GRBM_GFX_INDEX_BITS reg_gfx_index;
767 struct kfd_process_device *pdd;
768 struct dbg_wave_control_info wac_info;
769 int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
770 int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
771
772 reg_sq_cmd.u32All = 0;
773 status = 0;
774
775 wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
776 wac_info.operand = HSA_DBG_WAVEOP_KILL;
777
778 pr_debug("Killing all process wavefronts\n");
779
780 /* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
781 * ATC_VMID15_PASID_MAPPING
782 * to check which VMID the current process is mapped to.
783 */
784
785 for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
786 status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info
787 (dev->kgd, vmid, &queried_pasid);
788
789 if (status && queried_pasid == p->pasid) {
790 pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n",
791 vmid, p->pasid);
792 break;
793 }
794 }
795
796 if (vmid > last_vmid_to_scan) {
797 pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid);
798 return -EFAULT;
799 }
800
801 /* taking the VMID for that process on the safe way using PDD */
802 pdd = kfd_get_process_device_data(dev, p);
803 if (!pdd)
804 return -EFAULT;
805
806 status = dbgdev_wave_control_set_registers(&wac_info, ®_sq_cmd,
807 ®_gfx_index);
808 if (status != 0)
809 return -EINVAL;
810
811 /* for non DIQ we need to patch the VMID: */
812 reg_sq_cmd.bits.vm_id = vmid;
813
814 dev->kfd2kgd->wave_control_execute(dev->kgd,
815 reg_gfx_index.u32All,
816 reg_sq_cmd.u32All);
817
818 return 0;
819}
820
821void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
822 enum DBGDEV_TYPE type)
823{
824 pdbgdev->dev = pdev;
825 pdbgdev->kq = NULL;
826 pdbgdev->type = type;
827 pdbgdev->pqm = NULL;
828
829 switch (type) {
830 case DBGDEV_TYPE_NODIQ:
831 pdbgdev->dbgdev_register = dbgdev_register_nodiq;
832 pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq;
833 pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq;
834 pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq;
835 break;
836 case DBGDEV_TYPE_DIQ:
837 default:
838 pdbgdev->dbgdev_register = dbgdev_register_diq;
839 pdbgdev->dbgdev_unregister = dbgdev_unregister_diq;
840 pdbgdev->dbgdev_wave_control = dbgdev_wave_control_diq;
841 pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq;
842 break;
843 }
844
845}
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24#include <linux/types.h>
25#include <linux/kernel.h>
26#include <linux/log2.h>
27#include <linux/sched.h>
28#include <linux/slab.h>
29#include <linux/mutex.h>
30#include <linux/device.h>
31
32#include "kfd_pm4_headers.h"
33#include "kfd_pm4_headers_diq.h"
34#include "kfd_kernel_queue.h"
35#include "kfd_priv.h"
36#include "kfd_pm4_opcodes.h"
37#include "cik_regs.h"
38#include "kfd_dbgmgr.h"
39#include "kfd_dbgdev.h"
40#include "kfd_device_queue_manager.h"
41#include "../../radeon/cik_reg.h"
42
43static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
44{
45 BUG_ON(!dev || !dev->kfd2kgd);
46
47 dev->kfd2kgd->address_watch_disable(dev->kgd);
48}
49
50static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
51 unsigned int pasid, uint64_t vmid0_address,
52 uint32_t *packet_buff, size_t size_in_bytes)
53{
54 struct pm4__release_mem *rm_packet;
55 struct pm4__indirect_buffer_pasid *ib_packet;
56 struct kfd_mem_obj *mem_obj;
57 size_t pq_packets_size_in_bytes;
58 union ULARGE_INTEGER *largep;
59 union ULARGE_INTEGER addr;
60 struct kernel_queue *kq;
61 uint64_t *rm_state;
62 unsigned int *ib_packet_buff;
63 int status;
64
65 BUG_ON(!dbgdev || !dbgdev->kq || !packet_buff || !size_in_bytes);
66
67 kq = dbgdev->kq;
68
69 pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) +
70 sizeof(struct pm4__indirect_buffer_pasid);
71
72 /*
73 * We acquire a buffer from DIQ
74 * The receive packet buff will be sitting on the Indirect Buffer
75 * and in the PQ we put the IB packet + sync packet(s).
76 */
77 status = kq->ops.acquire_packet_buffer(kq,
78 pq_packets_size_in_bytes / sizeof(uint32_t),
79 &ib_packet_buff);
80 if (status != 0) {
81 pr_err("amdkfd: acquire_packet_buffer failed\n");
82 return status;
83 }
84
85 memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
86
87 ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
88
89 ib_packet->header.count = 3;
90 ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
91 ib_packet->header.type = PM4_TYPE_3;
92
93 largep = (union ULARGE_INTEGER *) &vmid0_address;
94
95 ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
96 ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
97
98 ib_packet->control = (1 << 23) | (1 << 31) |
99 ((size_in_bytes / sizeof(uint32_t)) & 0xfffff);
100
101 ib_packet->bitfields5.pasid = pasid;
102
103 /*
104 * for now we use release mem for GPU-CPU synchronization
105 * Consider WaitRegMem + WriteData as a better alternative
106 * we get a GART allocations ( gpu/cpu mapping),
107 * for the sync variable, and wait until:
108 * (a) Sync with HW
109 * (b) Sync var is written by CP to mem.
110 */
111 rm_packet = (struct pm4__release_mem *) (ib_packet_buff +
112 (sizeof(struct pm4__indirect_buffer_pasid) /
113 sizeof(unsigned int)));
114
115 status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
116 &mem_obj);
117
118 if (status != 0) {
119 pr_err("amdkfd: Failed to allocate GART memory\n");
120 kq->ops.rollback_packet(kq);
121 return status;
122 }
123
124 rm_state = (uint64_t *) mem_obj->cpu_ptr;
125
126 *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
127
128 rm_packet->header.opcode = IT_RELEASE_MEM;
129 rm_packet->header.type = PM4_TYPE_3;
130 rm_packet->header.count = sizeof(struct pm4__release_mem) /
131 sizeof(unsigned int) - 2;
132
133 rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
134 rm_packet->bitfields2.event_index =
135 event_index___release_mem__end_of_pipe;
136
137 rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
138 rm_packet->bitfields2.atc = 0;
139 rm_packet->bitfields2.tc_wb_action_ena = 1;
140
141 addr.quad_part = mem_obj->gpu_addr;
142
143 rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
144 rm_packet->address_hi = addr.u.high_part;
145
146 rm_packet->bitfields3.data_sel =
147 data_sel___release_mem__send_64_bit_data;
148
149 rm_packet->bitfields3.int_sel =
150 int_sel___release_mem__send_data_after_write_confirm;
151
152 rm_packet->bitfields3.dst_sel =
153 dst_sel___release_mem__memory_controller;
154
155 rm_packet->data_lo = QUEUESTATE__ACTIVE;
156
157 kq->ops.submit_packet(kq);
158
159 /* Wait till CP writes sync code: */
160 status = amdkfd_fence_wait_timeout(
161 (unsigned int *) rm_state,
162 QUEUESTATE__ACTIVE, 1500);
163
164 kfd_gtt_sa_free(dbgdev->dev, mem_obj);
165
166 return status;
167}
168
169static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
170{
171 BUG_ON(!dbgdev);
172
173 /*
174 * no action is needed in this case,
175 * just make sure diq will not be used
176 */
177
178 dbgdev->kq = NULL;
179
180 return 0;
181}
182
183static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
184{
185 struct queue_properties properties;
186 unsigned int qid;
187 struct kernel_queue *kq = NULL;
188 int status;
189
190 BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->dev);
191
192 status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
193 &properties, 0, KFD_QUEUE_TYPE_DIQ,
194 &qid);
195
196 if (status) {
197 pr_err("amdkfd: Failed to create DIQ\n");
198 return status;
199 }
200
201 pr_debug("DIQ Created with queue id: %d\n", qid);
202
203 kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
204
205 if (kq == NULL) {
206 pr_err("amdkfd: Error getting DIQ\n");
207 pqm_destroy_queue(dbgdev->pqm, qid);
208 return -EFAULT;
209 }
210
211 dbgdev->kq = kq;
212
213 return status;
214}
215
216static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
217{
218 BUG_ON(!dbgdev || !dbgdev->dev);
219
220 /* disable watch address */
221 dbgdev_address_watch_disable_nodiq(dbgdev->dev);
222 return 0;
223}
224
225static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
226{
227 /* todo - disable address watch */
228 int status;
229
230 BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->kq);
231
232 status = pqm_destroy_queue(dbgdev->pqm,
233 dbgdev->kq->queue->properties.queue_id);
234 dbgdev->kq = NULL;
235
236 return status;
237}
238
239static void dbgdev_address_watch_set_registers(
240 const struct dbg_address_watch_info *adw_info,
241 union TCP_WATCH_ADDR_H_BITS *addrHi,
242 union TCP_WATCH_ADDR_L_BITS *addrLo,
243 union TCP_WATCH_CNTL_BITS *cntl,
244 unsigned int index, unsigned int vmid)
245{
246 union ULARGE_INTEGER addr;
247
248 BUG_ON(!adw_info || !addrHi || !addrLo || !cntl);
249
250 addr.quad_part = 0;
251 addrHi->u32All = 0;
252 addrLo->u32All = 0;
253 cntl->u32All = 0;
254
255 if (adw_info->watch_mask != NULL)
256 cntl->bitfields.mask =
257 (uint32_t) (adw_info->watch_mask[index] &
258 ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
259 else
260 cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
261
262 addr.quad_part = (unsigned long long) adw_info->watch_address[index];
263
264 addrHi->bitfields.addr = addr.u.high_part &
265 ADDRESS_WATCH_REG_ADDHIGH_MASK;
266 addrLo->bitfields.addr =
267 (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT);
268
269 cntl->bitfields.mode = adw_info->watch_mode[index];
270 cntl->bitfields.vmid = (uint32_t) vmid;
271 /* for now assume it is an ATC address */
272 cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT;
273
274 pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask);
275 pr_debug("\t\t%20s %08x\n", "set reg add high :",
276 addrHi->bitfields.addr);
277 pr_debug("\t\t%20s %08x\n", "set reg add low :",
278 addrLo->bitfields.addr);
279}
280
281static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
282 struct dbg_address_watch_info *adw_info)
283{
284 union TCP_WATCH_ADDR_H_BITS addrHi;
285 union TCP_WATCH_ADDR_L_BITS addrLo;
286 union TCP_WATCH_CNTL_BITS cntl;
287 struct kfd_process_device *pdd;
288 unsigned int i;
289
290 BUG_ON(!dbgdev || !dbgdev->dev || !adw_info);
291
292 /* taking the vmid for that process on the safe way using pdd */
293 pdd = kfd_get_process_device_data(dbgdev->dev,
294 adw_info->process);
295 if (!pdd) {
296 pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n");
297 return -EFAULT;
298 }
299
300 addrHi.u32All = 0;
301 addrLo.u32All = 0;
302 cntl.u32All = 0;
303
304 if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
305 (adw_info->num_watch_points == 0)) {
306 pr_err("amdkfd: num_watch_points is invalid\n");
307 return -EINVAL;
308 }
309
310 if ((adw_info->watch_mode == NULL) ||
311 (adw_info->watch_address == NULL)) {
312 pr_err("amdkfd: adw_info fields are not valid\n");
313 return -EINVAL;
314 }
315
316 for (i = 0 ; i < adw_info->num_watch_points ; i++) {
317 dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
318 &cntl, i, pdd->qpd.vmid);
319
320 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
321 pr_debug("\t\t%20s %08x\n", "register index :", i);
322 pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid);
323 pr_debug("\t\t%20s %08x\n", "Address Low is :",
324 addrLo.bitfields.addr);
325 pr_debug("\t\t%20s %08x\n", "Address high is :",
326 addrHi.bitfields.addr);
327 pr_debug("\t\t%20s %08x\n", "Address high is :",
328 addrHi.bitfields.addr);
329 pr_debug("\t\t%20s %08x\n", "Control Mask is :",
330 cntl.bitfields.mask);
331 pr_debug("\t\t%20s %08x\n", "Control Mode is :",
332 cntl.bitfields.mode);
333 pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
334 cntl.bitfields.vmid);
335 pr_debug("\t\t%20s %08x\n", "Control atc is :",
336 cntl.bitfields.atc);
337 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
338
339 pdd->dev->kfd2kgd->address_watch_execute(
340 dbgdev->dev->kgd,
341 i,
342 cntl.u32All,
343 addrHi.u32All,
344 addrLo.u32All);
345 }
346
347 return 0;
348}
349
350static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
351 struct dbg_address_watch_info *adw_info)
352{
353 struct pm4__set_config_reg *packets_vec;
354 union TCP_WATCH_ADDR_H_BITS addrHi;
355 union TCP_WATCH_ADDR_L_BITS addrLo;
356 union TCP_WATCH_CNTL_BITS cntl;
357 struct kfd_mem_obj *mem_obj;
358 unsigned int aw_reg_add_dword;
359 uint32_t *packet_buff_uint;
360 unsigned int i;
361 int status;
362 size_t ib_size = sizeof(struct pm4__set_config_reg) * 4;
363 /* we do not control the vmid in DIQ mode, just a place holder */
364 unsigned int vmid = 0;
365
366 BUG_ON(!dbgdev || !dbgdev->dev || !adw_info);
367
368 addrHi.u32All = 0;
369 addrLo.u32All = 0;
370 cntl.u32All = 0;
371
372 if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
373 (adw_info->num_watch_points == 0)) {
374 pr_err("amdkfd: num_watch_points is invalid\n");
375 return -EINVAL;
376 }
377
378 if ((NULL == adw_info->watch_mode) ||
379 (NULL == adw_info->watch_address)) {
380 pr_err("amdkfd: adw_info fields are not valid\n");
381 return -EINVAL;
382 }
383
384 status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
385
386 if (status != 0) {
387 pr_err("amdkfd: Failed to allocate GART memory\n");
388 return status;
389 }
390
391 packet_buff_uint = mem_obj->cpu_ptr;
392
393 memset(packet_buff_uint, 0, ib_size);
394
395 packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
396
397 packets_vec[0].header.count = 1;
398 packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
399 packets_vec[0].header.type = PM4_TYPE_3;
400 packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
401 packets_vec[0].bitfields2.insert_vmid = 1;
402 packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
403 packets_vec[1].bitfields2.insert_vmid = 0;
404 packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
405 packets_vec[2].bitfields2.insert_vmid = 0;
406 packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
407 packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
408 packets_vec[3].bitfields2.insert_vmid = 1;
409
410 for (i = 0; i < adw_info->num_watch_points; i++) {
411 dbgdev_address_watch_set_registers(adw_info,
412 &addrHi,
413 &addrLo,
414 &cntl,
415 i,
416 vmid);
417
418 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
419 pr_debug("\t\t%20s %08x\n", "register index :", i);
420 pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
421 pr_debug("\t\t%20s %p\n", "Add ptr is :",
422 adw_info->watch_address);
423 pr_debug("\t\t%20s %08llx\n", "Add is :",
424 adw_info->watch_address[i]);
425 pr_debug("\t\t%20s %08x\n", "Address Low is :",
426 addrLo.bitfields.addr);
427 pr_debug("\t\t%20s %08x\n", "Address high is :",
428 addrHi.bitfields.addr);
429 pr_debug("\t\t%20s %08x\n", "Control Mask is :",
430 cntl.bitfields.mask);
431 pr_debug("\t\t%20s %08x\n", "Control Mode is :",
432 cntl.bitfields.mode);
433 pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
434 cntl.bitfields.vmid);
435 pr_debug("\t\t%20s %08x\n", "Control atc is :",
436 cntl.bitfields.atc);
437 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
438
439 aw_reg_add_dword =
440 dbgdev->dev->kfd2kgd->address_watch_get_offset(
441 dbgdev->dev->kgd,
442 i,
443 ADDRESS_WATCH_REG_CNTL);
444
445 aw_reg_add_dword /= sizeof(uint32_t);
446
447 packets_vec[0].bitfields2.reg_offset =
448 aw_reg_add_dword - AMD_CONFIG_REG_BASE;
449
450 packets_vec[0].reg_data[0] = cntl.u32All;
451
452 aw_reg_add_dword =
453 dbgdev->dev->kfd2kgd->address_watch_get_offset(
454 dbgdev->dev->kgd,
455 i,
456 ADDRESS_WATCH_REG_ADDR_HI);
457
458 aw_reg_add_dword /= sizeof(uint32_t);
459
460 packets_vec[1].bitfields2.reg_offset =
461 aw_reg_add_dword - AMD_CONFIG_REG_BASE;
462 packets_vec[1].reg_data[0] = addrHi.u32All;
463
464 aw_reg_add_dword =
465 dbgdev->dev->kfd2kgd->address_watch_get_offset(
466 dbgdev->dev->kgd,
467 i,
468 ADDRESS_WATCH_REG_ADDR_LO);
469
470 aw_reg_add_dword /= sizeof(uint32_t);
471
472 packets_vec[2].bitfields2.reg_offset =
473 aw_reg_add_dword - AMD_CONFIG_REG_BASE;
474 packets_vec[2].reg_data[0] = addrLo.u32All;
475
476 /* enable watch flag if address is not zero*/
477 if (adw_info->watch_address[i] > 0)
478 cntl.bitfields.valid = 1;
479 else
480 cntl.bitfields.valid = 0;
481
482 aw_reg_add_dword =
483 dbgdev->dev->kfd2kgd->address_watch_get_offset(
484 dbgdev->dev->kgd,
485 i,
486 ADDRESS_WATCH_REG_CNTL);
487
488 aw_reg_add_dword /= sizeof(uint32_t);
489
490 packets_vec[3].bitfields2.reg_offset =
491 aw_reg_add_dword - AMD_CONFIG_REG_BASE;
492 packets_vec[3].reg_data[0] = cntl.u32All;
493
494 status = dbgdev_diq_submit_ib(
495 dbgdev,
496 adw_info->process->pasid,
497 mem_obj->gpu_addr,
498 packet_buff_uint,
499 ib_size);
500
501 if (status != 0) {
502 pr_err("amdkfd: Failed to submit IB to DIQ\n");
503 break;
504 }
505 }
506
507 kfd_gtt_sa_free(dbgdev->dev, mem_obj);
508 return status;
509}
510
511static int dbgdev_wave_control_set_registers(
512 struct dbg_wave_control_info *wac_info,
513 union SQ_CMD_BITS *in_reg_sq_cmd,
514 union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
515{
516 int status = 0;
517 union SQ_CMD_BITS reg_sq_cmd;
518 union GRBM_GFX_INDEX_BITS reg_gfx_index;
519 struct HsaDbgWaveMsgAMDGen2 *pMsg;
520
521 BUG_ON(!wac_info || !in_reg_sq_cmd || !in_reg_gfx_index);
522
523 reg_sq_cmd.u32All = 0;
524 reg_gfx_index.u32All = 0;
525 pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
526
527 switch (wac_info->mode) {
528 /* Send command to single wave */
529 case HSA_DBG_WAVEMODE_SINGLE:
530 /*
531 * Limit access to the process waves only,
532 * by setting vmid check
533 */
534 reg_sq_cmd.bits.check_vmid = 1;
535 reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
536 reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
537 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
538
539 reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
540 reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
541 reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
542
543 break;
544
545 /* Send command to all waves with matching VMID */
546 case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
547
548 reg_gfx_index.bits.sh_broadcast_writes = 1;
549 reg_gfx_index.bits.se_broadcast_writes = 1;
550 reg_gfx_index.bits.instance_broadcast_writes = 1;
551
552 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
553
554 break;
555
556 /* Send command to all CU waves with matching VMID */
557 case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
558
559 reg_sq_cmd.bits.check_vmid = 1;
560 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
561
562 reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
563 reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
564 reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
565
566 break;
567
568 default:
569 return -EINVAL;
570 }
571
572 switch (wac_info->operand) {
573 case HSA_DBG_WAVEOP_HALT:
574 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT;
575 break;
576
577 case HSA_DBG_WAVEOP_RESUME:
578 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME;
579 break;
580
581 case HSA_DBG_WAVEOP_KILL:
582 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
583 break;
584
585 case HSA_DBG_WAVEOP_DEBUG:
586 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG;
587 break;
588
589 case HSA_DBG_WAVEOP_TRAP:
590 if (wac_info->trapId < MAX_TRAPID) {
591 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP;
592 reg_sq_cmd.bits.trap_id = wac_info->trapId;
593 } else {
594 status = -EINVAL;
595 }
596 break;
597
598 default:
599 status = -EINVAL;
600 break;
601 }
602
603 if (status == 0) {
604 *in_reg_sq_cmd = reg_sq_cmd;
605 *in_reg_gfx_index = reg_gfx_index;
606 }
607
608 return status;
609}
610
611static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
612 struct dbg_wave_control_info *wac_info)
613{
614
615 int status;
616 union SQ_CMD_BITS reg_sq_cmd;
617 union GRBM_GFX_INDEX_BITS reg_gfx_index;
618 struct kfd_mem_obj *mem_obj;
619 uint32_t *packet_buff_uint;
620 struct pm4__set_config_reg *packets_vec;
621 size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
622
623 BUG_ON(!dbgdev || !wac_info);
624
625 reg_sq_cmd.u32All = 0;
626
627 status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd,
628 ®_gfx_index);
629 if (status) {
630 pr_err("amdkfd: Failed to set wave control registers\n");
631 return status;
632 }
633
634 /* we do not control the VMID in DIQ,so reset it to a known value */
635 reg_sq_cmd.bits.vm_id = 0;
636
637 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
638
639 pr_debug("\t\t mode is: %u\n", wac_info->mode);
640 pr_debug("\t\t operand is: %u\n", wac_info->operand);
641 pr_debug("\t\t trap id is: %u\n", wac_info->trapId);
642 pr_debug("\t\t msg value is: %u\n",
643 wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
644 pr_debug("\t\t vmid is: N/A\n");
645
646 pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid);
647 pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd);
648 pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id);
649 pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id);
650 pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode);
651 pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id);
652 pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id);
653
654 pr_debug("\t\t ibw is : %u\n",
655 reg_gfx_index.bitfields.instance_broadcast_writes);
656 pr_debug("\t\t ii is : %u\n",
657 reg_gfx_index.bitfields.instance_index);
658 pr_debug("\t\t sebw is : %u\n",
659 reg_gfx_index.bitfields.se_broadcast_writes);
660 pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index);
661 pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index);
662 pr_debug("\t\t sbw is : %u\n",
663 reg_gfx_index.bitfields.sh_broadcast_writes);
664
665 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
666
667 status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
668
669 if (status != 0) {
670 pr_err("amdkfd: Failed to allocate GART memory\n");
671 return status;
672 }
673
674 packet_buff_uint = mem_obj->cpu_ptr;
675
676 memset(packet_buff_uint, 0, ib_size);
677
678 packets_vec = (struct pm4__set_config_reg *) packet_buff_uint;
679 packets_vec[0].header.count = 1;
680 packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
681 packets_vec[0].header.type = PM4_TYPE_3;
682 packets_vec[0].bitfields2.reg_offset =
683 GRBM_GFX_INDEX / (sizeof(uint32_t)) -
684 USERCONFIG_REG_BASE;
685
686 packets_vec[0].bitfields2.insert_vmid = 0;
687 packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
688
689 packets_vec[1].header.count = 1;
690 packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
691 packets_vec[1].header.type = PM4_TYPE_3;
692 packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) -
693 AMD_CONFIG_REG_BASE;
694
695 packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
696 packets_vec[1].bitfields2.insert_vmid = 1;
697 packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
698
699 /* Restore the GRBM_GFX_INDEX register */
700
701 reg_gfx_index.u32All = 0;
702 reg_gfx_index.bits.sh_broadcast_writes = 1;
703 reg_gfx_index.bits.instance_broadcast_writes = 1;
704 reg_gfx_index.bits.se_broadcast_writes = 1;
705
706
707 packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
708 packets_vec[2].bitfields2.reg_offset =
709 GRBM_GFX_INDEX / (sizeof(uint32_t)) -
710 USERCONFIG_REG_BASE;
711
712 packets_vec[2].bitfields2.insert_vmid = 0;
713 packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
714
715 status = dbgdev_diq_submit_ib(
716 dbgdev,
717 wac_info->process->pasid,
718 mem_obj->gpu_addr,
719 packet_buff_uint,
720 ib_size);
721
722 if (status != 0)
723 pr_err("amdkfd: Failed to submit IB to DIQ\n");
724
725 kfd_gtt_sa_free(dbgdev->dev, mem_obj);
726
727 return status;
728}
729
730static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
731 struct dbg_wave_control_info *wac_info)
732{
733 int status;
734 union SQ_CMD_BITS reg_sq_cmd;
735 union GRBM_GFX_INDEX_BITS reg_gfx_index;
736 struct kfd_process_device *pdd;
737
738 BUG_ON(!dbgdev || !dbgdev->dev || !wac_info);
739
740 reg_sq_cmd.u32All = 0;
741
742 /* taking the VMID for that process on the safe way using PDD */
743 pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
744
745 if (!pdd) {
746 pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n");
747 return -EFAULT;
748 }
749 status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd,
750 ®_gfx_index);
751 if (status) {
752 pr_err("amdkfd: Failed to set wave control registers\n");
753 return status;
754 }
755
756 /* for non DIQ we need to patch the VMID: */
757
758 reg_sq_cmd.bits.vm_id = pdd->qpd.vmid;
759
760 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
761
762 pr_debug("\t\t mode is: %u\n", wac_info->mode);
763 pr_debug("\t\t operand is: %u\n", wac_info->operand);
764 pr_debug("\t\t trap id is: %u\n", wac_info->trapId);
765 pr_debug("\t\t msg value is: %u\n",
766 wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
767 pr_debug("\t\t vmid is: %u\n", pdd->qpd.vmid);
768
769 pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid);
770 pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd);
771 pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id);
772 pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id);
773 pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode);
774 pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id);
775 pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id);
776
777 pr_debug("\t\t ibw is : %u\n",
778 reg_gfx_index.bitfields.instance_broadcast_writes);
779 pr_debug("\t\t ii is : %u\n",
780 reg_gfx_index.bitfields.instance_index);
781 pr_debug("\t\t sebw is : %u\n",
782 reg_gfx_index.bitfields.se_broadcast_writes);
783 pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index);
784 pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index);
785 pr_debug("\t\t sbw is : %u\n",
786 reg_gfx_index.bitfields.sh_broadcast_writes);
787
788 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
789
790 return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd,
791 reg_gfx_index.u32All,
792 reg_sq_cmd.u32All);
793}
794
795int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
796{
797 int status = 0;
798 unsigned int vmid;
799 union SQ_CMD_BITS reg_sq_cmd;
800 union GRBM_GFX_INDEX_BITS reg_gfx_index;
801 struct kfd_process_device *pdd;
802 struct dbg_wave_control_info wac_info;
803 int temp;
804 int first_vmid_to_scan = 8;
805 int last_vmid_to_scan = 15;
806
807 first_vmid_to_scan = ffs(dev->shared_resources.compute_vmid_bitmap) - 1;
808 temp = dev->shared_resources.compute_vmid_bitmap >> first_vmid_to_scan;
809 last_vmid_to_scan = first_vmid_to_scan + ffz(temp);
810
811 reg_sq_cmd.u32All = 0;
812 status = 0;
813
814 wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
815 wac_info.operand = HSA_DBG_WAVEOP_KILL;
816
817 pr_debug("Killing all process wavefronts\n");
818
819 /* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
820 * ATC_VMID15_PASID_MAPPING
821 * to check which VMID the current process is mapped to. */
822
823 for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
824 if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
825 (dev->kgd, vmid)) {
826 if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
827 (dev->kgd, vmid) == p->pasid) {
828 pr_debug("Killing wave fronts of vmid %d and pasid %d\n",
829 vmid, p->pasid);
830 break;
831 }
832 }
833 }
834
835 if (vmid > last_vmid_to_scan) {
836 pr_err("amdkfd: didn't found vmid for pasid (%d)\n", p->pasid);
837 return -EFAULT;
838 }
839
840 /* taking the VMID for that process on the safe way using PDD */
841 pdd = kfd_get_process_device_data(dev, p);
842 if (!pdd)
843 return -EFAULT;
844
845 status = dbgdev_wave_control_set_registers(&wac_info, ®_sq_cmd,
846 ®_gfx_index);
847 if (status != 0)
848 return -EINVAL;
849
850 /* for non DIQ we need to patch the VMID: */
851 reg_sq_cmd.bits.vm_id = vmid;
852
853 dev->kfd2kgd->wave_control_execute(dev->kgd,
854 reg_gfx_index.u32All,
855 reg_sq_cmd.u32All);
856
857 return 0;
858}
859
860void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
861 enum DBGDEV_TYPE type)
862{
863 BUG_ON(!pdbgdev || !pdev);
864
865 pdbgdev->dev = pdev;
866 pdbgdev->kq = NULL;
867 pdbgdev->type = type;
868 pdbgdev->pqm = NULL;
869
870 switch (type) {
871 case DBGDEV_TYPE_NODIQ:
872 pdbgdev->dbgdev_register = dbgdev_register_nodiq;
873 pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq;
874 pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq;
875 pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq;
876 break;
877 case DBGDEV_TYPE_DIQ:
878 default:
879 pdbgdev->dbgdev_register = dbgdev_register_diq;
880 pdbgdev->dbgdev_unregister = dbgdev_unregister_diq;
881 pdbgdev->dbgdev_wave_control = dbgdev_wave_control_diq;
882 pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq;
883 break;
884 }
885
886}