Linux Audio

Check our new training course

Loading...
v5.9
  1/*
  2 * Copyright 2014 Advanced Micro Devices, Inc.
  3 *
  4 * Permission is hereby granted, free of charge, to any person obtaining a
  5 * copy of this software and associated documentation files (the "Software"),
  6 * to deal in the Software without restriction, including without limitation
  7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 * and/or sell copies of the Software, and to permit persons to whom the
  9 * Software is furnished to do so, subject to the following conditions:
 10 *
 11 * The above copyright notice and this permission notice shall be included in
 12 * all copies or substantial portions of the Software.
 13 *
 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 20 * OTHER DEALINGS IN THE SOFTWARE.
 21 *
 22 */
 23
 24#include <linux/types.h>
 25#include <linux/kernel.h>
 26#include <linux/log2.h>
 27#include <linux/sched.h>
 28#include <linux/slab.h>
 29#include <linux/mutex.h>
 30#include <linux/device.h>
 31
 32#include "kfd_pm4_headers.h"
 33#include "kfd_pm4_headers_diq.h"
 34#include "kfd_kernel_queue.h"
 35#include "kfd_priv.h"
 36#include "kfd_pm4_opcodes.h"
 37#include "cik_regs.h"
 38#include "kfd_dbgmgr.h"
 39#include "kfd_dbgdev.h"
 40#include "kfd_device_queue_manager.h"
 
 41
 42static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
 43{
 44	dev->kfd2kgd->address_watch_disable(dev->kgd);
 45}
 46
 47static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
 48				unsigned int pasid, uint64_t vmid0_address,
 49				uint32_t *packet_buff, size_t size_in_bytes)
 50{
 51	struct pm4__release_mem *rm_packet;
 52	struct pm4__indirect_buffer_pasid *ib_packet;
 53	struct kfd_mem_obj *mem_obj;
 54	size_t pq_packets_size_in_bytes;
 55	union ULARGE_INTEGER *largep;
 56	union ULARGE_INTEGER addr;
 57	struct kernel_queue *kq;
 58	uint64_t *rm_state;
 59	unsigned int *ib_packet_buff;
 60	int status;
 61
 62	if (WARN_ON(!size_in_bytes))
 63		return -EINVAL;
 64
 65	kq = dbgdev->kq;
 66
 67	pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) +
 68				sizeof(struct pm4__indirect_buffer_pasid);
 69
 70	/*
 71	 * We acquire a buffer from DIQ
 72	 * The receive packet buff will be sitting on the Indirect Buffer
 73	 * and in the PQ we put the IB packet + sync packet(s).
 74	 */
 75	status = kq_acquire_packet_buffer(kq,
 76				pq_packets_size_in_bytes / sizeof(uint32_t),
 77				&ib_packet_buff);
 78	if (status) {
 79		pr_err("kq_acquire_packet_buffer failed\n");
 80		return status;
 81	}
 82
 83	memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
 84
 85	ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
 86
 87	ib_packet->header.count = 3;
 88	ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
 89	ib_packet->header.type = PM4_TYPE_3;
 90
 91	largep = (union ULARGE_INTEGER *) &vmid0_address;
 92
 93	ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
 94	ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
 95
 96	ib_packet->control = (1 << 23) | (1 << 31) |
 97			((size_in_bytes / 4) & 0xfffff);
 98
 99	ib_packet->bitfields5.pasid = pasid;
100
101	/*
102	 * for now we use release mem for GPU-CPU synchronization
103	 * Consider WaitRegMem + WriteData as a better alternative
104	 * we get a GART allocations ( gpu/cpu mapping),
105	 * for the sync variable, and wait until:
106	 * (a) Sync with HW
107	 * (b) Sync var is written by CP to mem.
108	 */
109	rm_packet = (struct pm4__release_mem *) (ib_packet_buff +
110			(sizeof(struct pm4__indirect_buffer_pasid) /
111					sizeof(unsigned int)));
112
113	status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
114					&mem_obj);
115
116	if (status) {
117		pr_err("Failed to allocate GART memory\n");
118		kq_rollback_packet(kq);
119		return status;
120	}
121
122	rm_state = (uint64_t *) mem_obj->cpu_ptr;
123
124	*rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
125
126	rm_packet->header.opcode = IT_RELEASE_MEM;
127	rm_packet->header.type = PM4_TYPE_3;
128	rm_packet->header.count = sizeof(struct pm4__release_mem) / 4 - 2;
129
130	rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
131	rm_packet->bitfields2.event_index =
132				event_index___release_mem__end_of_pipe;
133
134	rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
135	rm_packet->bitfields2.atc = 0;
136	rm_packet->bitfields2.tc_wb_action_ena = 1;
137
138	addr.quad_part = mem_obj->gpu_addr;
139
140	rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
141	rm_packet->address_hi = addr.u.high_part;
142
143	rm_packet->bitfields3.data_sel =
144				data_sel___release_mem__send_64_bit_data;
145
146	rm_packet->bitfields3.int_sel =
147			int_sel___release_mem__send_data_after_write_confirm;
148
149	rm_packet->bitfields3.dst_sel =
150			dst_sel___release_mem__memory_controller;
151
152	rm_packet->data_lo = QUEUESTATE__ACTIVE;
153
154	kq_submit_packet(kq);
155
156	/* Wait till CP writes sync code: */
157	status = amdkfd_fence_wait_timeout(
158			(unsigned int *) rm_state,
159			QUEUESTATE__ACTIVE, 1500);
160
161	kfd_gtt_sa_free(dbgdev->dev, mem_obj);
162
163	return status;
164}
165
166static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
167{
168	/*
169	 * no action is needed in this case,
170	 * just make sure diq will not be used
171	 */
172
173	dbgdev->kq = NULL;
174
175	return 0;
176}
177
178static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
179{
180	struct queue_properties properties;
181	unsigned int qid;
182	struct kernel_queue *kq = NULL;
183	int status;
184
185	properties.type = KFD_QUEUE_TYPE_DIQ;
186
187	status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
188				&properties, &qid, NULL);
189
190	if (status) {
191		pr_err("Failed to create DIQ\n");
192		return status;
193	}
194
195	pr_debug("DIQ Created with queue id: %d\n", qid);
196
197	kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
198
199	if (!kq) {
200		pr_err("Error getting DIQ\n");
201		pqm_destroy_queue(dbgdev->pqm, qid);
202		return -EFAULT;
203	}
204
205	dbgdev->kq = kq;
206
207	return status;
208}
209
210static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
211{
212	/* disable watch address */
213	dbgdev_address_watch_disable_nodiq(dbgdev->dev);
214	return 0;
215}
216
217static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
218{
219	/* todo - disable address watch */
220	int status;
221
222	status = pqm_destroy_queue(dbgdev->pqm,
223			dbgdev->kq->queue->properties.queue_id);
224	dbgdev->kq = NULL;
225
226	return status;
227}
228
229static void dbgdev_address_watch_set_registers(
230			const struct dbg_address_watch_info *adw_info,
231			union TCP_WATCH_ADDR_H_BITS *addrHi,
232			union TCP_WATCH_ADDR_L_BITS *addrLo,
233			union TCP_WATCH_CNTL_BITS *cntl,
234			unsigned int index, unsigned int vmid)
235{
236	union ULARGE_INTEGER addr;
237
238	addr.quad_part = 0;
239	addrHi->u32All = 0;
240	addrLo->u32All = 0;
241	cntl->u32All = 0;
242
243	if (adw_info->watch_mask)
244		cntl->bitfields.mask =
245			(uint32_t) (adw_info->watch_mask[index] &
246					ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
247	else
248		cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
249
250	addr.quad_part = (unsigned long long) adw_info->watch_address[index];
251
252	addrHi->bitfields.addr = addr.u.high_part &
253					ADDRESS_WATCH_REG_ADDHIGH_MASK;
254	addrLo->bitfields.addr =
255			(addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT);
256
257	cntl->bitfields.mode = adw_info->watch_mode[index];
258	cntl->bitfields.vmid = (uint32_t) vmid;
259	/* for now assume it is an ATC address */
260	cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT;
261
262	pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask);
263	pr_debug("\t\t%20s %08x\n", "set reg add high :",
264			addrHi->bitfields.addr);
265	pr_debug("\t\t%20s %08x\n", "set reg add low :",
266			addrLo->bitfields.addr);
267}
268
269static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
270				      struct dbg_address_watch_info *adw_info)
271{
272	union TCP_WATCH_ADDR_H_BITS addrHi;
273	union TCP_WATCH_ADDR_L_BITS addrLo;
274	union TCP_WATCH_CNTL_BITS cntl;
275	struct kfd_process_device *pdd;
276	unsigned int i;
277
278	/* taking the vmid for that process on the safe way using pdd */
279	pdd = kfd_get_process_device_data(dbgdev->dev,
280					adw_info->process);
281	if (!pdd) {
282		pr_err("Failed to get pdd for wave control no DIQ\n");
283		return -EFAULT;
284	}
285
286	addrHi.u32All = 0;
287	addrLo.u32All = 0;
288	cntl.u32All = 0;
289
290	if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
291			(adw_info->num_watch_points == 0)) {
292		pr_err("num_watch_points is invalid\n");
293		return -EINVAL;
294	}
295
296	if (!adw_info->watch_mode || !adw_info->watch_address) {
297		pr_err("adw_info fields are not valid\n");
298		return -EINVAL;
299	}
300
301	for (i = 0; i < adw_info->num_watch_points; i++) {
302		dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
303						&cntl, i, pdd->qpd.vmid);
304
305		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
306		pr_debug("\t\t%20s %08x\n", "register index :", i);
307		pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid);
308		pr_debug("\t\t%20s %08x\n", "Address Low is :",
309				addrLo.bitfields.addr);
310		pr_debug("\t\t%20s %08x\n", "Address high is :",
311				addrHi.bitfields.addr);
312		pr_debug("\t\t%20s %08x\n", "Address high is :",
313				addrHi.bitfields.addr);
314		pr_debug("\t\t%20s %08x\n", "Control Mask is :",
315				cntl.bitfields.mask);
316		pr_debug("\t\t%20s %08x\n", "Control Mode is :",
317				cntl.bitfields.mode);
318		pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
319				cntl.bitfields.vmid);
320		pr_debug("\t\t%20s %08x\n", "Control atc  is :",
321				cntl.bitfields.atc);
322		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
323
324		pdd->dev->kfd2kgd->address_watch_execute(
325						dbgdev->dev->kgd,
326						i,
327						cntl.u32All,
328						addrHi.u32All,
329						addrLo.u32All);
330	}
331
332	return 0;
333}
334
335static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
336				    struct dbg_address_watch_info *adw_info)
337{
338	struct pm4__set_config_reg *packets_vec;
339	union TCP_WATCH_ADDR_H_BITS addrHi;
340	union TCP_WATCH_ADDR_L_BITS addrLo;
341	union TCP_WATCH_CNTL_BITS cntl;
342	struct kfd_mem_obj *mem_obj;
343	unsigned int aw_reg_add_dword;
344	uint32_t *packet_buff_uint;
345	unsigned int i;
346	int status;
347	size_t ib_size = sizeof(struct pm4__set_config_reg) * 4;
348	/* we do not control the vmid in DIQ mode, just a place holder */
349	unsigned int vmid = 0;
350
351	addrHi.u32All = 0;
352	addrLo.u32All = 0;
353	cntl.u32All = 0;
354
355	if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
356			(adw_info->num_watch_points == 0)) {
357		pr_err("num_watch_points is invalid\n");
358		return -EINVAL;
359	}
360
361	if (!adw_info->watch_mode || !adw_info->watch_address) {
362		pr_err("adw_info fields are not valid\n");
363		return -EINVAL;
364	}
365
366	status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
367
368	if (status) {
369		pr_err("Failed to allocate GART memory\n");
370		return status;
371	}
372
373	packet_buff_uint = mem_obj->cpu_ptr;
374
375	memset(packet_buff_uint, 0, ib_size);
376
377	packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
378
379	packets_vec[0].header.count = 1;
380	packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
381	packets_vec[0].header.type = PM4_TYPE_3;
382	packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
383	packets_vec[0].bitfields2.insert_vmid = 1;
384	packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
385	packets_vec[1].bitfields2.insert_vmid = 0;
386	packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
387	packets_vec[2].bitfields2.insert_vmid = 0;
388	packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
389	packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
390	packets_vec[3].bitfields2.insert_vmid = 1;
391
392	for (i = 0; i < adw_info->num_watch_points; i++) {
393		dbgdev_address_watch_set_registers(adw_info,
394						&addrHi,
395						&addrLo,
396						&cntl,
397						i,
398						vmid);
399
400		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
401		pr_debug("\t\t%20s %08x\n", "register index :", i);
402		pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
403		pr_debug("\t\t%20s %p\n", "Add ptr is :",
404				adw_info->watch_address);
405		pr_debug("\t\t%20s %08llx\n", "Add     is :",
406				adw_info->watch_address[i]);
407		pr_debug("\t\t%20s %08x\n", "Address Low is :",
408				addrLo.bitfields.addr);
409		pr_debug("\t\t%20s %08x\n", "Address high is :",
410				addrHi.bitfields.addr);
411		pr_debug("\t\t%20s %08x\n", "Control Mask is :",
412				cntl.bitfields.mask);
413		pr_debug("\t\t%20s %08x\n", "Control Mode is :",
414				cntl.bitfields.mode);
415		pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
416				cntl.bitfields.vmid);
417		pr_debug("\t\t%20s %08x\n", "Control atc  is :",
418				cntl.bitfields.atc);
419		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
420
421		aw_reg_add_dword =
422				dbgdev->dev->kfd2kgd->address_watch_get_offset(
423					dbgdev->dev->kgd,
424					i,
425					ADDRESS_WATCH_REG_CNTL);
426
427		packets_vec[0].bitfields2.reg_offset =
428					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
429
430		packets_vec[0].reg_data[0] = cntl.u32All;
431
432		aw_reg_add_dword =
433				dbgdev->dev->kfd2kgd->address_watch_get_offset(
434					dbgdev->dev->kgd,
435					i,
436					ADDRESS_WATCH_REG_ADDR_HI);
437
438		packets_vec[1].bitfields2.reg_offset =
439					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
440		packets_vec[1].reg_data[0] = addrHi.u32All;
441
442		aw_reg_add_dword =
443				dbgdev->dev->kfd2kgd->address_watch_get_offset(
444					dbgdev->dev->kgd,
445					i,
446					ADDRESS_WATCH_REG_ADDR_LO);
447
448		packets_vec[2].bitfields2.reg_offset =
449				aw_reg_add_dword - AMD_CONFIG_REG_BASE;
450		packets_vec[2].reg_data[0] = addrLo.u32All;
451
452		/* enable watch flag if address is not zero*/
453		if (adw_info->watch_address[i] > 0)
454			cntl.bitfields.valid = 1;
455		else
456			cntl.bitfields.valid = 0;
457
458		aw_reg_add_dword =
459				dbgdev->dev->kfd2kgd->address_watch_get_offset(
460					dbgdev->dev->kgd,
461					i,
462					ADDRESS_WATCH_REG_CNTL);
463
464		packets_vec[3].bitfields2.reg_offset =
465					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
466		packets_vec[3].reg_data[0] = cntl.u32All;
467
468		status = dbgdev_diq_submit_ib(
469					dbgdev,
470					adw_info->process->pasid,
471					mem_obj->gpu_addr,
472					packet_buff_uint,
473					ib_size);
474
475		if (status) {
476			pr_err("Failed to submit IB to DIQ\n");
477			break;
478		}
479	}
480
481	kfd_gtt_sa_free(dbgdev->dev, mem_obj);
482	return status;
483}
484
485static int dbgdev_wave_control_set_registers(
486				struct dbg_wave_control_info *wac_info,
487				union SQ_CMD_BITS *in_reg_sq_cmd,
488				union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
489{
490	int status = 0;
491	union SQ_CMD_BITS reg_sq_cmd;
492	union GRBM_GFX_INDEX_BITS reg_gfx_index;
493	struct HsaDbgWaveMsgAMDGen2 *pMsg;
494
495	reg_sq_cmd.u32All = 0;
496	reg_gfx_index.u32All = 0;
497	pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
498
499	switch (wac_info->mode) {
500	/* Send command to single wave */
501	case HSA_DBG_WAVEMODE_SINGLE:
502		/*
503		 * Limit access to the process waves only,
504		 * by setting vmid check
505		 */
506		reg_sq_cmd.bits.check_vmid = 1;
507		reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
508		reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
509		reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
510
511		reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
512		reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
513		reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
514
515		break;
516
517	/* Send command to all waves with matching VMID */
518	case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
519
520		reg_gfx_index.bits.sh_broadcast_writes = 1;
521		reg_gfx_index.bits.se_broadcast_writes = 1;
522		reg_gfx_index.bits.instance_broadcast_writes = 1;
523
524		reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
525
526		break;
527
528	/* Send command to all CU waves with matching VMID */
529	case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
530
531		reg_sq_cmd.bits.check_vmid = 1;
532		reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
533
534		reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
535		reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
536		reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
537
538		break;
539
540	default:
541		return -EINVAL;
542	}
543
544	switch (wac_info->operand) {
545	case HSA_DBG_WAVEOP_HALT:
546		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT;
547		break;
548
549	case HSA_DBG_WAVEOP_RESUME:
550		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME;
551		break;
552
553	case HSA_DBG_WAVEOP_KILL:
554		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
555		break;
556
557	case HSA_DBG_WAVEOP_DEBUG:
558		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG;
559		break;
560
561	case HSA_DBG_WAVEOP_TRAP:
562		if (wac_info->trapId < MAX_TRAPID) {
563			reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP;
564			reg_sq_cmd.bits.trap_id = wac_info->trapId;
565		} else {
566			status = -EINVAL;
567		}
568		break;
569
570	default:
571		status = -EINVAL;
572		break;
573	}
574
575	if (status == 0) {
576		*in_reg_sq_cmd = reg_sq_cmd;
577		*in_reg_gfx_index = reg_gfx_index;
578	}
579
580	return status;
581}
582
583static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
584					struct dbg_wave_control_info *wac_info)
585{
586
587	int status;
588	union SQ_CMD_BITS reg_sq_cmd;
589	union GRBM_GFX_INDEX_BITS reg_gfx_index;
590	struct kfd_mem_obj *mem_obj;
591	uint32_t *packet_buff_uint;
592	struct pm4__set_config_reg *packets_vec;
593	size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
594
595	reg_sq_cmd.u32All = 0;
596
597	status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
598							&reg_gfx_index);
599	if (status) {
600		pr_err("Failed to set wave control registers\n");
601		return status;
602	}
603
604	/* we do not control the VMID in DIQ, so reset it to a known value */
605	reg_sq_cmd.bits.vm_id = 0;
606
607	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
608
609	pr_debug("\t\t mode      is: %u\n", wac_info->mode);
610	pr_debug("\t\t operand   is: %u\n", wac_info->operand);
611	pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
612	pr_debug("\t\t msg value is: %u\n",
613			wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
614	pr_debug("\t\t vmid      is: N/A\n");
615
616	pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
617	pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
618	pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
619	pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
620	pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
621	pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
622	pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
623
624	pr_debug("\t\t ibw       is : %u\n",
625			reg_gfx_index.bitfields.instance_broadcast_writes);
626	pr_debug("\t\t ii        is : %u\n",
627			reg_gfx_index.bitfields.instance_index);
628	pr_debug("\t\t sebw      is : %u\n",
629			reg_gfx_index.bitfields.se_broadcast_writes);
630	pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
631	pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
632	pr_debug("\t\t sbw       is : %u\n",
633			reg_gfx_index.bitfields.sh_broadcast_writes);
634
635	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
636
637	status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
638
639	if (status != 0) {
640		pr_err("Failed to allocate GART memory\n");
641		return status;
642	}
643
644	packet_buff_uint = mem_obj->cpu_ptr;
645
646	memset(packet_buff_uint, 0, ib_size);
647
648	packets_vec =  (struct pm4__set_config_reg *) packet_buff_uint;
649	packets_vec[0].header.count = 1;
650	packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
651	packets_vec[0].header.type = PM4_TYPE_3;
652	packets_vec[0].bitfields2.reg_offset =
653			GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
654
655	packets_vec[0].bitfields2.insert_vmid = 0;
656	packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
657
658	packets_vec[1].header.count = 1;
659	packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
660	packets_vec[1].header.type = PM4_TYPE_3;
661	packets_vec[1].bitfields2.reg_offset = SQ_CMD / 4 - AMD_CONFIG_REG_BASE;
662
663	packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
664	packets_vec[1].bitfields2.insert_vmid = 1;
665	packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
666
667	/* Restore the GRBM_GFX_INDEX register */
668
669	reg_gfx_index.u32All = 0;
670	reg_gfx_index.bits.sh_broadcast_writes = 1;
671	reg_gfx_index.bits.instance_broadcast_writes = 1;
672	reg_gfx_index.bits.se_broadcast_writes = 1;
673
674
675	packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
676	packets_vec[2].bitfields2.reg_offset =
677				GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
678
679	packets_vec[2].bitfields2.insert_vmid = 0;
680	packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
681
682	status = dbgdev_diq_submit_ib(
683			dbgdev,
684			wac_info->process->pasid,
685			mem_obj->gpu_addr,
686			packet_buff_uint,
687			ib_size);
688
689	if (status)
690		pr_err("Failed to submit IB to DIQ\n");
691
692	kfd_gtt_sa_free(dbgdev->dev, mem_obj);
693
694	return status;
695}
696
697static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
698					struct dbg_wave_control_info *wac_info)
699{
700	int status;
701	union SQ_CMD_BITS reg_sq_cmd;
702	union GRBM_GFX_INDEX_BITS reg_gfx_index;
703	struct kfd_process_device *pdd;
704
705	reg_sq_cmd.u32All = 0;
706
707	/* taking the VMID for that process on the safe way using PDD */
708	pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
709
710	if (!pdd) {
711		pr_err("Failed to get pdd for wave control no DIQ\n");
712		return -EFAULT;
713	}
714	status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
715							&reg_gfx_index);
716	if (status) {
717		pr_err("Failed to set wave control registers\n");
718		return status;
719	}
720
721	/* for non DIQ we need to patch the VMID: */
722
723	reg_sq_cmd.bits.vm_id = pdd->qpd.vmid;
724
725	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
726
727	pr_debug("\t\t mode      is: %u\n", wac_info->mode);
728	pr_debug("\t\t operand   is: %u\n", wac_info->operand);
729	pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
730	pr_debug("\t\t msg value is: %u\n",
731			wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
732	pr_debug("\t\t vmid      is: %u\n", pdd->qpd.vmid);
733
734	pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
735	pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
736	pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
737	pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
738	pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
739	pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
740	pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
741
742	pr_debug("\t\t ibw       is : %u\n",
743			reg_gfx_index.bitfields.instance_broadcast_writes);
744	pr_debug("\t\t ii        is : %u\n",
745			reg_gfx_index.bitfields.instance_index);
746	pr_debug("\t\t sebw      is : %u\n",
747			reg_gfx_index.bitfields.se_broadcast_writes);
748	pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
749	pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
750	pr_debug("\t\t sbw       is : %u\n",
751			reg_gfx_index.bitfields.sh_broadcast_writes);
752
753	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
754
755	return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd,
756							reg_gfx_index.u32All,
757							reg_sq_cmd.u32All);
758}
759
760int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
761{
762	int status = 0;
763	unsigned int vmid;
764	uint16_t queried_pasid;
765	union SQ_CMD_BITS reg_sq_cmd;
766	union GRBM_GFX_INDEX_BITS reg_gfx_index;
767	struct kfd_process_device *pdd;
768	struct dbg_wave_control_info wac_info;
769	int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
770	int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
771
772	reg_sq_cmd.u32All = 0;
773	status = 0;
774
775	wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
776	wac_info.operand = HSA_DBG_WAVEOP_KILL;
777
778	pr_debug("Killing all process wavefronts\n");
779
780	/* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
781	 * ATC_VMID15_PASID_MAPPING
782	 * to check which VMID the current process is mapped to.
783	 */
784
785	for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
786		status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info
787				(dev->kgd, vmid, &queried_pasid);
788
789		if (status && queried_pasid == p->pasid) {
790			pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n",
791					vmid, p->pasid);
792			break;
 
793		}
794	}
795
796	if (vmid > last_vmid_to_scan) {
797		pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid);
798		return -EFAULT;
799	}
800
801	/* taking the VMID for that process on the safe way using PDD */
802	pdd = kfd_get_process_device_data(dev, p);
803	if (!pdd)
804		return -EFAULT;
805
806	status = dbgdev_wave_control_set_registers(&wac_info, &reg_sq_cmd,
807			&reg_gfx_index);
808	if (status != 0)
809		return -EINVAL;
810
811	/* for non DIQ we need to patch the VMID: */
812	reg_sq_cmd.bits.vm_id = vmid;
813
814	dev->kfd2kgd->wave_control_execute(dev->kgd,
815					reg_gfx_index.u32All,
816					reg_sq_cmd.u32All);
817
818	return 0;
819}
820
821void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
822			enum DBGDEV_TYPE type)
823{
824	pdbgdev->dev = pdev;
825	pdbgdev->kq = NULL;
826	pdbgdev->type = type;
827	pdbgdev->pqm = NULL;
828
829	switch (type) {
830	case DBGDEV_TYPE_NODIQ:
831		pdbgdev->dbgdev_register = dbgdev_register_nodiq;
832		pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq;
833		pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq;
834		pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq;
835		break;
836	case DBGDEV_TYPE_DIQ:
837	default:
838		pdbgdev->dbgdev_register = dbgdev_register_diq;
839		pdbgdev->dbgdev_unregister = dbgdev_unregister_diq;
840		pdbgdev->dbgdev_wave_control =  dbgdev_wave_control_diq;
841		pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq;
842		break;
843	}
844
845}
v4.17
  1/*
  2 * Copyright 2014 Advanced Micro Devices, Inc.
  3 *
  4 * Permission is hereby granted, free of charge, to any person obtaining a
  5 * copy of this software and associated documentation files (the "Software"),
  6 * to deal in the Software without restriction, including without limitation
  7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 * and/or sell copies of the Software, and to permit persons to whom the
  9 * Software is furnished to do so, subject to the following conditions:
 10 *
 11 * The above copyright notice and this permission notice shall be included in
 12 * all copies or substantial portions of the Software.
 13 *
 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 20 * OTHER DEALINGS IN THE SOFTWARE.
 21 *
 22 */
 23
 24#include <linux/types.h>
 25#include <linux/kernel.h>
 26#include <linux/log2.h>
 27#include <linux/sched.h>
 28#include <linux/slab.h>
 29#include <linux/mutex.h>
 30#include <linux/device.h>
 31
 32#include "kfd_pm4_headers.h"
 33#include "kfd_pm4_headers_diq.h"
 34#include "kfd_kernel_queue.h"
 35#include "kfd_priv.h"
 36#include "kfd_pm4_opcodes.h"
 37#include "cik_regs.h"
 38#include "kfd_dbgmgr.h"
 39#include "kfd_dbgdev.h"
 40#include "kfd_device_queue_manager.h"
 41#include "../../radeon/cik_reg.h"
 42
 43static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
 44{
 45	dev->kfd2kgd->address_watch_disable(dev->kgd);
 46}
 47
 48static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
 49				unsigned int pasid, uint64_t vmid0_address,
 50				uint32_t *packet_buff, size_t size_in_bytes)
 51{
 52	struct pm4__release_mem *rm_packet;
 53	struct pm4__indirect_buffer_pasid *ib_packet;
 54	struct kfd_mem_obj *mem_obj;
 55	size_t pq_packets_size_in_bytes;
 56	union ULARGE_INTEGER *largep;
 57	union ULARGE_INTEGER addr;
 58	struct kernel_queue *kq;
 59	uint64_t *rm_state;
 60	unsigned int *ib_packet_buff;
 61	int status;
 62
 63	if (WARN_ON(!size_in_bytes))
 64		return -EINVAL;
 65
 66	kq = dbgdev->kq;
 67
 68	pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) +
 69				sizeof(struct pm4__indirect_buffer_pasid);
 70
 71	/*
 72	 * We acquire a buffer from DIQ
 73	 * The receive packet buff will be sitting on the Indirect Buffer
 74	 * and in the PQ we put the IB packet + sync packet(s).
 75	 */
 76	status = kq->ops.acquire_packet_buffer(kq,
 77				pq_packets_size_in_bytes / sizeof(uint32_t),
 78				&ib_packet_buff);
 79	if (status) {
 80		pr_err("acquire_packet_buffer failed\n");
 81		return status;
 82	}
 83
 84	memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
 85
 86	ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
 87
 88	ib_packet->header.count = 3;
 89	ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
 90	ib_packet->header.type = PM4_TYPE_3;
 91
 92	largep = (union ULARGE_INTEGER *) &vmid0_address;
 93
 94	ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
 95	ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
 96
 97	ib_packet->control = (1 << 23) | (1 << 31) |
 98			((size_in_bytes / 4) & 0xfffff);
 99
100	ib_packet->bitfields5.pasid = pasid;
101
102	/*
103	 * for now we use release mem for GPU-CPU synchronization
104	 * Consider WaitRegMem + WriteData as a better alternative
105	 * we get a GART allocations ( gpu/cpu mapping),
106	 * for the sync variable, and wait until:
107	 * (a) Sync with HW
108	 * (b) Sync var is written by CP to mem.
109	 */
110	rm_packet = (struct pm4__release_mem *) (ib_packet_buff +
111			(sizeof(struct pm4__indirect_buffer_pasid) /
112					sizeof(unsigned int)));
113
114	status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
115					&mem_obj);
116
117	if (status) {
118		pr_err("Failed to allocate GART memory\n");
119		kq->ops.rollback_packet(kq);
120		return status;
121	}
122
123	rm_state = (uint64_t *) mem_obj->cpu_ptr;
124
125	*rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
126
127	rm_packet->header.opcode = IT_RELEASE_MEM;
128	rm_packet->header.type = PM4_TYPE_3;
129	rm_packet->header.count = sizeof(struct pm4__release_mem) / 4 - 2;
130
131	rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
132	rm_packet->bitfields2.event_index =
133				event_index___release_mem__end_of_pipe;
134
135	rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
136	rm_packet->bitfields2.atc = 0;
137	rm_packet->bitfields2.tc_wb_action_ena = 1;
138
139	addr.quad_part = mem_obj->gpu_addr;
140
141	rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
142	rm_packet->address_hi = addr.u.high_part;
143
144	rm_packet->bitfields3.data_sel =
145				data_sel___release_mem__send_64_bit_data;
146
147	rm_packet->bitfields3.int_sel =
148			int_sel___release_mem__send_data_after_write_confirm;
149
150	rm_packet->bitfields3.dst_sel =
151			dst_sel___release_mem__memory_controller;
152
153	rm_packet->data_lo = QUEUESTATE__ACTIVE;
154
155	kq->ops.submit_packet(kq);
156
157	/* Wait till CP writes sync code: */
158	status = amdkfd_fence_wait_timeout(
159			(unsigned int *) rm_state,
160			QUEUESTATE__ACTIVE, 1500);
161
162	kfd_gtt_sa_free(dbgdev->dev, mem_obj);
163
164	return status;
165}
166
167static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
168{
169	/*
170	 * no action is needed in this case,
171	 * just make sure diq will not be used
172	 */
173
174	dbgdev->kq = NULL;
175
176	return 0;
177}
178
179static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
180{
181	struct queue_properties properties;
182	unsigned int qid;
183	struct kernel_queue *kq = NULL;
184	int status;
185
186	properties.type = KFD_QUEUE_TYPE_DIQ;
187
188	status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
189				&properties, &qid);
190
191	if (status) {
192		pr_err("Failed to create DIQ\n");
193		return status;
194	}
195
196	pr_debug("DIQ Created with queue id: %d\n", qid);
197
198	kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
199
200	if (!kq) {
201		pr_err("Error getting DIQ\n");
202		pqm_destroy_queue(dbgdev->pqm, qid);
203		return -EFAULT;
204	}
205
206	dbgdev->kq = kq;
207
208	return status;
209}
210
211static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
212{
213	/* disable watch address */
214	dbgdev_address_watch_disable_nodiq(dbgdev->dev);
215	return 0;
216}
217
218static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
219{
220	/* todo - disable address watch */
221	int status;
222
223	status = pqm_destroy_queue(dbgdev->pqm,
224			dbgdev->kq->queue->properties.queue_id);
225	dbgdev->kq = NULL;
226
227	return status;
228}
229
230static void dbgdev_address_watch_set_registers(
231			const struct dbg_address_watch_info *adw_info,
232			union TCP_WATCH_ADDR_H_BITS *addrHi,
233			union TCP_WATCH_ADDR_L_BITS *addrLo,
234			union TCP_WATCH_CNTL_BITS *cntl,
235			unsigned int index, unsigned int vmid)
236{
237	union ULARGE_INTEGER addr;
238
239	addr.quad_part = 0;
240	addrHi->u32All = 0;
241	addrLo->u32All = 0;
242	cntl->u32All = 0;
243
244	if (adw_info->watch_mask)
245		cntl->bitfields.mask =
246			(uint32_t) (adw_info->watch_mask[index] &
247					ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
248	else
249		cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
250
251	addr.quad_part = (unsigned long long) adw_info->watch_address[index];
252
253	addrHi->bitfields.addr = addr.u.high_part &
254					ADDRESS_WATCH_REG_ADDHIGH_MASK;
255	addrLo->bitfields.addr =
256			(addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT);
257
258	cntl->bitfields.mode = adw_info->watch_mode[index];
259	cntl->bitfields.vmid = (uint32_t) vmid;
260	/* for now assume it is an ATC address */
261	cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT;
262
263	pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask);
264	pr_debug("\t\t%20s %08x\n", "set reg add high :",
265			addrHi->bitfields.addr);
266	pr_debug("\t\t%20s %08x\n", "set reg add low :",
267			addrLo->bitfields.addr);
268}
269
270static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
271				      struct dbg_address_watch_info *adw_info)
272{
273	union TCP_WATCH_ADDR_H_BITS addrHi;
274	union TCP_WATCH_ADDR_L_BITS addrLo;
275	union TCP_WATCH_CNTL_BITS cntl;
276	struct kfd_process_device *pdd;
277	unsigned int i;
278
279	/* taking the vmid for that process on the safe way using pdd */
280	pdd = kfd_get_process_device_data(dbgdev->dev,
281					adw_info->process);
282	if (!pdd) {
283		pr_err("Failed to get pdd for wave control no DIQ\n");
284		return -EFAULT;
285	}
286
287	addrHi.u32All = 0;
288	addrLo.u32All = 0;
289	cntl.u32All = 0;
290
291	if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
292			(adw_info->num_watch_points == 0)) {
293		pr_err("num_watch_points is invalid\n");
294		return -EINVAL;
295	}
296
297	if (!adw_info->watch_mode || !adw_info->watch_address) {
298		pr_err("adw_info fields are not valid\n");
299		return -EINVAL;
300	}
301
302	for (i = 0; i < adw_info->num_watch_points; i++) {
303		dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
304						&cntl, i, pdd->qpd.vmid);
305
306		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
307		pr_debug("\t\t%20s %08x\n", "register index :", i);
308		pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid);
309		pr_debug("\t\t%20s %08x\n", "Address Low is :",
310				addrLo.bitfields.addr);
311		pr_debug("\t\t%20s %08x\n", "Address high is :",
312				addrHi.bitfields.addr);
313		pr_debug("\t\t%20s %08x\n", "Address high is :",
314				addrHi.bitfields.addr);
315		pr_debug("\t\t%20s %08x\n", "Control Mask is :",
316				cntl.bitfields.mask);
317		pr_debug("\t\t%20s %08x\n", "Control Mode is :",
318				cntl.bitfields.mode);
319		pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
320				cntl.bitfields.vmid);
321		pr_debug("\t\t%20s %08x\n", "Control atc  is :",
322				cntl.bitfields.atc);
323		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
324
325		pdd->dev->kfd2kgd->address_watch_execute(
326						dbgdev->dev->kgd,
327						i,
328						cntl.u32All,
329						addrHi.u32All,
330						addrLo.u32All);
331	}
332
333	return 0;
334}
335
336static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
337				    struct dbg_address_watch_info *adw_info)
338{
339	struct pm4__set_config_reg *packets_vec;
340	union TCP_WATCH_ADDR_H_BITS addrHi;
341	union TCP_WATCH_ADDR_L_BITS addrLo;
342	union TCP_WATCH_CNTL_BITS cntl;
343	struct kfd_mem_obj *mem_obj;
344	unsigned int aw_reg_add_dword;
345	uint32_t *packet_buff_uint;
346	unsigned int i;
347	int status;
348	size_t ib_size = sizeof(struct pm4__set_config_reg) * 4;
349	/* we do not control the vmid in DIQ mode, just a place holder */
350	unsigned int vmid = 0;
351
352	addrHi.u32All = 0;
353	addrLo.u32All = 0;
354	cntl.u32All = 0;
355
356	if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
357			(adw_info->num_watch_points == 0)) {
358		pr_err("num_watch_points is invalid\n");
359		return -EINVAL;
360	}
361
362	if (!adw_info->watch_mode || !adw_info->watch_address) {
363		pr_err("adw_info fields are not valid\n");
364		return -EINVAL;
365	}
366
367	status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
368
369	if (status) {
370		pr_err("Failed to allocate GART memory\n");
371		return status;
372	}
373
374	packet_buff_uint = mem_obj->cpu_ptr;
375
376	memset(packet_buff_uint, 0, ib_size);
377
378	packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
379
380	packets_vec[0].header.count = 1;
381	packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
382	packets_vec[0].header.type = PM4_TYPE_3;
383	packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
384	packets_vec[0].bitfields2.insert_vmid = 1;
385	packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
386	packets_vec[1].bitfields2.insert_vmid = 0;
387	packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
388	packets_vec[2].bitfields2.insert_vmid = 0;
389	packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
390	packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
391	packets_vec[3].bitfields2.insert_vmid = 1;
392
393	for (i = 0; i < adw_info->num_watch_points; i++) {
394		dbgdev_address_watch_set_registers(adw_info,
395						&addrHi,
396						&addrLo,
397						&cntl,
398						i,
399						vmid);
400
401		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
402		pr_debug("\t\t%20s %08x\n", "register index :", i);
403		pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
404		pr_debug("\t\t%20s %p\n", "Add ptr is :",
405				adw_info->watch_address);
406		pr_debug("\t\t%20s %08llx\n", "Add     is :",
407				adw_info->watch_address[i]);
408		pr_debug("\t\t%20s %08x\n", "Address Low is :",
409				addrLo.bitfields.addr);
410		pr_debug("\t\t%20s %08x\n", "Address high is :",
411				addrHi.bitfields.addr);
412		pr_debug("\t\t%20s %08x\n", "Control Mask is :",
413				cntl.bitfields.mask);
414		pr_debug("\t\t%20s %08x\n", "Control Mode is :",
415				cntl.bitfields.mode);
416		pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
417				cntl.bitfields.vmid);
418		pr_debug("\t\t%20s %08x\n", "Control atc  is :",
419				cntl.bitfields.atc);
420		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
421
422		aw_reg_add_dword =
423				dbgdev->dev->kfd2kgd->address_watch_get_offset(
424					dbgdev->dev->kgd,
425					i,
426					ADDRESS_WATCH_REG_CNTL);
427
428		packets_vec[0].bitfields2.reg_offset =
429					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
430
431		packets_vec[0].reg_data[0] = cntl.u32All;
432
433		aw_reg_add_dword =
434				dbgdev->dev->kfd2kgd->address_watch_get_offset(
435					dbgdev->dev->kgd,
436					i,
437					ADDRESS_WATCH_REG_ADDR_HI);
438
439		packets_vec[1].bitfields2.reg_offset =
440					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
441		packets_vec[1].reg_data[0] = addrHi.u32All;
442
443		aw_reg_add_dword =
444				dbgdev->dev->kfd2kgd->address_watch_get_offset(
445					dbgdev->dev->kgd,
446					i,
447					ADDRESS_WATCH_REG_ADDR_LO);
448
449		packets_vec[2].bitfields2.reg_offset =
450				aw_reg_add_dword - AMD_CONFIG_REG_BASE;
451		packets_vec[2].reg_data[0] = addrLo.u32All;
452
453		/* enable watch flag if address is not zero*/
454		if (adw_info->watch_address[i] > 0)
455			cntl.bitfields.valid = 1;
456		else
457			cntl.bitfields.valid = 0;
458
459		aw_reg_add_dword =
460				dbgdev->dev->kfd2kgd->address_watch_get_offset(
461					dbgdev->dev->kgd,
462					i,
463					ADDRESS_WATCH_REG_CNTL);
464
465		packets_vec[3].bitfields2.reg_offset =
466					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
467		packets_vec[3].reg_data[0] = cntl.u32All;
468
469		status = dbgdev_diq_submit_ib(
470					dbgdev,
471					adw_info->process->pasid,
472					mem_obj->gpu_addr,
473					packet_buff_uint,
474					ib_size);
475
476		if (status) {
477			pr_err("Failed to submit IB to DIQ\n");
478			break;
479		}
480	}
481
482	kfd_gtt_sa_free(dbgdev->dev, mem_obj);
483	return status;
484}
485
486static int dbgdev_wave_control_set_registers(
487				struct dbg_wave_control_info *wac_info,
488				union SQ_CMD_BITS *in_reg_sq_cmd,
489				union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
490{
491	int status = 0;
492	union SQ_CMD_BITS reg_sq_cmd;
493	union GRBM_GFX_INDEX_BITS reg_gfx_index;
494	struct HsaDbgWaveMsgAMDGen2 *pMsg;
495
496	reg_sq_cmd.u32All = 0;
497	reg_gfx_index.u32All = 0;
498	pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
499
500	switch (wac_info->mode) {
501	/* Send command to single wave */
502	case HSA_DBG_WAVEMODE_SINGLE:
503		/*
504		 * Limit access to the process waves only,
505		 * by setting vmid check
506		 */
507		reg_sq_cmd.bits.check_vmid = 1;
508		reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
509		reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
510		reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
511
512		reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
513		reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
514		reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
515
516		break;
517
518	/* Send command to all waves with matching VMID */
519	case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
520
521		reg_gfx_index.bits.sh_broadcast_writes = 1;
522		reg_gfx_index.bits.se_broadcast_writes = 1;
523		reg_gfx_index.bits.instance_broadcast_writes = 1;
524
525		reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
526
527		break;
528
529	/* Send command to all CU waves with matching VMID */
530	case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
531
532		reg_sq_cmd.bits.check_vmid = 1;
533		reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
534
535		reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
536		reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
537		reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
538
539		break;
540
541	default:
542		return -EINVAL;
543	}
544
545	switch (wac_info->operand) {
546	case HSA_DBG_WAVEOP_HALT:
547		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT;
548		break;
549
550	case HSA_DBG_WAVEOP_RESUME:
551		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME;
552		break;
553
554	case HSA_DBG_WAVEOP_KILL:
555		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
556		break;
557
558	case HSA_DBG_WAVEOP_DEBUG:
559		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG;
560		break;
561
562	case HSA_DBG_WAVEOP_TRAP:
563		if (wac_info->trapId < MAX_TRAPID) {
564			reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP;
565			reg_sq_cmd.bits.trap_id = wac_info->trapId;
566		} else {
567			status = -EINVAL;
568		}
569		break;
570
571	default:
572		status = -EINVAL;
573		break;
574	}
575
576	if (status == 0) {
577		*in_reg_sq_cmd = reg_sq_cmd;
578		*in_reg_gfx_index = reg_gfx_index;
579	}
580
581	return status;
582}
583
584static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
585					struct dbg_wave_control_info *wac_info)
586{
587
588	int status;
589	union SQ_CMD_BITS reg_sq_cmd;
590	union GRBM_GFX_INDEX_BITS reg_gfx_index;
591	struct kfd_mem_obj *mem_obj;
592	uint32_t *packet_buff_uint;
593	struct pm4__set_config_reg *packets_vec;
594	size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
595
596	reg_sq_cmd.u32All = 0;
597
598	status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
599							&reg_gfx_index);
600	if (status) {
601		pr_err("Failed to set wave control registers\n");
602		return status;
603	}
604
605	/* we do not control the VMID in DIQ, so reset it to a known value */
606	reg_sq_cmd.bits.vm_id = 0;
607
608	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
609
610	pr_debug("\t\t mode      is: %u\n", wac_info->mode);
611	pr_debug("\t\t operand   is: %u\n", wac_info->operand);
612	pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
613	pr_debug("\t\t msg value is: %u\n",
614			wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
615	pr_debug("\t\t vmid      is: N/A\n");
616
617	pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
618	pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
619	pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
620	pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
621	pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
622	pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
623	pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
624
625	pr_debug("\t\t ibw       is : %u\n",
626			reg_gfx_index.bitfields.instance_broadcast_writes);
627	pr_debug("\t\t ii        is : %u\n",
628			reg_gfx_index.bitfields.instance_index);
629	pr_debug("\t\t sebw      is : %u\n",
630			reg_gfx_index.bitfields.se_broadcast_writes);
631	pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
632	pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
633	pr_debug("\t\t sbw       is : %u\n",
634			reg_gfx_index.bitfields.sh_broadcast_writes);
635
636	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
637
638	status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
639
640	if (status != 0) {
641		pr_err("Failed to allocate GART memory\n");
642		return status;
643	}
644
645	packet_buff_uint = mem_obj->cpu_ptr;
646
647	memset(packet_buff_uint, 0, ib_size);
648
649	packets_vec =  (struct pm4__set_config_reg *) packet_buff_uint;
650	packets_vec[0].header.count = 1;
651	packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
652	packets_vec[0].header.type = PM4_TYPE_3;
653	packets_vec[0].bitfields2.reg_offset =
654			GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
655
656	packets_vec[0].bitfields2.insert_vmid = 0;
657	packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
658
659	packets_vec[1].header.count = 1;
660	packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
661	packets_vec[1].header.type = PM4_TYPE_3;
662	packets_vec[1].bitfields2.reg_offset = SQ_CMD / 4 - AMD_CONFIG_REG_BASE;
663
664	packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
665	packets_vec[1].bitfields2.insert_vmid = 1;
666	packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
667
668	/* Restore the GRBM_GFX_INDEX register */
669
670	reg_gfx_index.u32All = 0;
671	reg_gfx_index.bits.sh_broadcast_writes = 1;
672	reg_gfx_index.bits.instance_broadcast_writes = 1;
673	reg_gfx_index.bits.se_broadcast_writes = 1;
674
675
676	packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
677	packets_vec[2].bitfields2.reg_offset =
678				GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
679
680	packets_vec[2].bitfields2.insert_vmid = 0;
681	packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
682
683	status = dbgdev_diq_submit_ib(
684			dbgdev,
685			wac_info->process->pasid,
686			mem_obj->gpu_addr,
687			packet_buff_uint,
688			ib_size);
689
690	if (status)
691		pr_err("Failed to submit IB to DIQ\n");
692
693	kfd_gtt_sa_free(dbgdev->dev, mem_obj);
694
695	return status;
696}
697
698static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
699					struct dbg_wave_control_info *wac_info)
700{
701	int status;
702	union SQ_CMD_BITS reg_sq_cmd;
703	union GRBM_GFX_INDEX_BITS reg_gfx_index;
704	struct kfd_process_device *pdd;
705
706	reg_sq_cmd.u32All = 0;
707
708	/* taking the VMID for that process on the safe way using PDD */
709	pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
710
711	if (!pdd) {
712		pr_err("Failed to get pdd for wave control no DIQ\n");
713		return -EFAULT;
714	}
715	status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
716							&reg_gfx_index);
717	if (status) {
718		pr_err("Failed to set wave control registers\n");
719		return status;
720	}
721
722	/* for non DIQ we need to patch the VMID: */
723
724	reg_sq_cmd.bits.vm_id = pdd->qpd.vmid;
725
726	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
727
728	pr_debug("\t\t mode      is: %u\n", wac_info->mode);
729	pr_debug("\t\t operand   is: %u\n", wac_info->operand);
730	pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
731	pr_debug("\t\t msg value is: %u\n",
732			wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
733	pr_debug("\t\t vmid      is: %u\n", pdd->qpd.vmid);
734
735	pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
736	pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
737	pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
738	pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
739	pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
740	pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
741	pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
742
743	pr_debug("\t\t ibw       is : %u\n",
744			reg_gfx_index.bitfields.instance_broadcast_writes);
745	pr_debug("\t\t ii        is : %u\n",
746			reg_gfx_index.bitfields.instance_index);
747	pr_debug("\t\t sebw      is : %u\n",
748			reg_gfx_index.bitfields.se_broadcast_writes);
749	pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
750	pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
751	pr_debug("\t\t sbw       is : %u\n",
752			reg_gfx_index.bitfields.sh_broadcast_writes);
753
754	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
755
756	return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd,
757							reg_gfx_index.u32All,
758							reg_sq_cmd.u32All);
759}
760
761int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
762{
763	int status = 0;
764	unsigned int vmid;
 
765	union SQ_CMD_BITS reg_sq_cmd;
766	union GRBM_GFX_INDEX_BITS reg_gfx_index;
767	struct kfd_process_device *pdd;
768	struct dbg_wave_control_info wac_info;
769	int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
770	int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
771
772	reg_sq_cmd.u32All = 0;
773	status = 0;
774
775	wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
776	wac_info.operand = HSA_DBG_WAVEOP_KILL;
777
778	pr_debug("Killing all process wavefronts\n");
779
780	/* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
781	 * ATC_VMID15_PASID_MAPPING
782	 * to check which VMID the current process is mapped to.
783	 */
784
785	for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
786		if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
787				(dev->kgd, vmid)) {
788			if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid
789					(dev->kgd, vmid) == p->pasid) {
790				pr_debug("Killing wave fronts of vmid %d and pasid %d\n",
791						vmid, p->pasid);
792				break;
793			}
794		}
795	}
796
797	if (vmid > last_vmid_to_scan) {
798		pr_err("Didn't find vmid for pasid %d\n", p->pasid);
799		return -EFAULT;
800	}
801
802	/* taking the VMID for that process on the safe way using PDD */
803	pdd = kfd_get_process_device_data(dev, p);
804	if (!pdd)
805		return -EFAULT;
806
807	status = dbgdev_wave_control_set_registers(&wac_info, &reg_sq_cmd,
808			&reg_gfx_index);
809	if (status != 0)
810		return -EINVAL;
811
812	/* for non DIQ we need to patch the VMID: */
813	reg_sq_cmd.bits.vm_id = vmid;
814
815	dev->kfd2kgd->wave_control_execute(dev->kgd,
816					reg_gfx_index.u32All,
817					reg_sq_cmd.u32All);
818
819	return 0;
820}
821
822void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
823			enum DBGDEV_TYPE type)
824{
825	pdbgdev->dev = pdev;
826	pdbgdev->kq = NULL;
827	pdbgdev->type = type;
828	pdbgdev->pqm = NULL;
829
830	switch (type) {
831	case DBGDEV_TYPE_NODIQ:
832		pdbgdev->dbgdev_register = dbgdev_register_nodiq;
833		pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq;
834		pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq;
835		pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq;
836		break;
837	case DBGDEV_TYPE_DIQ:
838	default:
839		pdbgdev->dbgdev_register = dbgdev_register_diq;
840		pdbgdev->dbgdev_unregister = dbgdev_unregister_diq;
841		pdbgdev->dbgdev_wave_control =  dbgdev_wave_control_diq;
842		pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq;
843		break;
844	}
845
846}