Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
  1/*
  2 * Copyright 2014 Advanced Micro Devices, Inc.
  3 *
  4 * Permission is hereby granted, free of charge, to any person obtaining a
  5 * copy of this software and associated documentation files (the "Software"),
  6 * to deal in the Software without restriction, including without limitation
  7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 * and/or sell copies of the Software, and to permit persons to whom the
  9 * Software is furnished to do so, subject to the following conditions:
 10 *
 11 * The above copyright notice and this permission notice shall be included in
 12 * all copies or substantial portions of the Software.
 13 *
 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 20 * OTHER DEALINGS IN THE SOFTWARE.
 21 *
 22 */
 23
 24#include <linux/types.h>
 25#include <linux/kernel.h>
 26#include <linux/log2.h>
 27#include <linux/sched.h>
 28#include <linux/slab.h>
 29#include <linux/mutex.h>
 30#include <linux/device.h>
 31
 32#include "kfd_pm4_headers.h"
 33#include "kfd_pm4_headers_diq.h"
 34#include "kfd_kernel_queue.h"
 35#include "kfd_priv.h"
 36#include "kfd_pm4_opcodes.h"
 37#include "cik_regs.h"
 38#include "kfd_dbgmgr.h"
 39#include "kfd_dbgdev.h"
 40#include "kfd_device_queue_manager.h"
 41#include "../../radeon/cik_reg.h"
 42
 43static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
 44{
 45	dev->kfd2kgd->address_watch_disable(dev->kgd);
 46}
 47
 48static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
 49				unsigned int pasid, uint64_t vmid0_address,
 50				uint32_t *packet_buff, size_t size_in_bytes)
 51{
 52	struct pm4__release_mem *rm_packet;
 53	struct pm4__indirect_buffer_pasid *ib_packet;
 54	struct kfd_mem_obj *mem_obj;
 55	size_t pq_packets_size_in_bytes;
 56	union ULARGE_INTEGER *largep;
 57	union ULARGE_INTEGER addr;
 58	struct kernel_queue *kq;
 59	uint64_t *rm_state;
 60	unsigned int *ib_packet_buff;
 61	int status;
 62
 63	if (WARN_ON(!size_in_bytes))
 64		return -EINVAL;
 65
 66	kq = dbgdev->kq;
 67
 68	pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) +
 69				sizeof(struct pm4__indirect_buffer_pasid);
 70
 71	/*
 72	 * We acquire a buffer from DIQ
 73	 * The receive packet buff will be sitting on the Indirect Buffer
 74	 * and in the PQ we put the IB packet + sync packet(s).
 75	 */
 76	status = kq->ops.acquire_packet_buffer(kq,
 77				pq_packets_size_in_bytes / sizeof(uint32_t),
 78				&ib_packet_buff);
 79	if (status) {
 80		pr_err("acquire_packet_buffer failed\n");
 81		return status;
 82	}
 83
 84	memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
 85
 86	ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
 87
 88	ib_packet->header.count = 3;
 89	ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
 90	ib_packet->header.type = PM4_TYPE_3;
 91
 92	largep = (union ULARGE_INTEGER *) &vmid0_address;
 93
 94	ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
 95	ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
 96
 97	ib_packet->control = (1 << 23) | (1 << 31) |
 98			((size_in_bytes / 4) & 0xfffff);
 99
100	ib_packet->bitfields5.pasid = pasid;
101
102	/*
103	 * for now we use release mem for GPU-CPU synchronization
104	 * Consider WaitRegMem + WriteData as a better alternative
105	 * we get a GART allocations ( gpu/cpu mapping),
106	 * for the sync variable, and wait until:
107	 * (a) Sync with HW
108	 * (b) Sync var is written by CP to mem.
109	 */
110	rm_packet = (struct pm4__release_mem *) (ib_packet_buff +
111			(sizeof(struct pm4__indirect_buffer_pasid) /
112					sizeof(unsigned int)));
113
114	status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
115					&mem_obj);
116
117	if (status) {
118		pr_err("Failed to allocate GART memory\n");
119		kq->ops.rollback_packet(kq);
120		return status;
121	}
122
123	rm_state = (uint64_t *) mem_obj->cpu_ptr;
124
125	*rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
126
127	rm_packet->header.opcode = IT_RELEASE_MEM;
128	rm_packet->header.type = PM4_TYPE_3;
129	rm_packet->header.count = sizeof(struct pm4__release_mem) / 4 - 2;
130
131	rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
132	rm_packet->bitfields2.event_index =
133				event_index___release_mem__end_of_pipe;
134
135	rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
136	rm_packet->bitfields2.atc = 0;
137	rm_packet->bitfields2.tc_wb_action_ena = 1;
138
139	addr.quad_part = mem_obj->gpu_addr;
140
141	rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
142	rm_packet->address_hi = addr.u.high_part;
143
144	rm_packet->bitfields3.data_sel =
145				data_sel___release_mem__send_64_bit_data;
146
147	rm_packet->bitfields3.int_sel =
148			int_sel___release_mem__send_data_after_write_confirm;
149
150	rm_packet->bitfields3.dst_sel =
151			dst_sel___release_mem__memory_controller;
152
153	rm_packet->data_lo = QUEUESTATE__ACTIVE;
154
155	kq->ops.submit_packet(kq);
156
157	/* Wait till CP writes sync code: */
158	status = amdkfd_fence_wait_timeout(
159			(unsigned int *) rm_state,
160			QUEUESTATE__ACTIVE, 1500);
161
162	kfd_gtt_sa_free(dbgdev->dev, mem_obj);
163
164	return status;
165}
166
167static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
168{
169	/*
170	 * no action is needed in this case,
171	 * just make sure diq will not be used
172	 */
173
174	dbgdev->kq = NULL;
175
176	return 0;
177}
178
179static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
180{
181	struct queue_properties properties;
182	unsigned int qid;
183	struct kernel_queue *kq = NULL;
184	int status;
185
186	properties.type = KFD_QUEUE_TYPE_DIQ;
187
188	status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
189				&properties, &qid);
190
191	if (status) {
192		pr_err("Failed to create DIQ\n");
193		return status;
194	}
195
196	pr_debug("DIQ Created with queue id: %d\n", qid);
197
198	kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
199
200	if (!kq) {
201		pr_err("Error getting DIQ\n");
202		pqm_destroy_queue(dbgdev->pqm, qid);
203		return -EFAULT;
204	}
205
206	dbgdev->kq = kq;
207
208	return status;
209}
210
211static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
212{
213	/* disable watch address */
214	dbgdev_address_watch_disable_nodiq(dbgdev->dev);
215	return 0;
216}
217
218static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
219{
220	/* todo - disable address watch */
221	int status;
222
223	status = pqm_destroy_queue(dbgdev->pqm,
224			dbgdev->kq->queue->properties.queue_id);
225	dbgdev->kq = NULL;
226
227	return status;
228}
229
230static void dbgdev_address_watch_set_registers(
231			const struct dbg_address_watch_info *adw_info,
232			union TCP_WATCH_ADDR_H_BITS *addrHi,
233			union TCP_WATCH_ADDR_L_BITS *addrLo,
234			union TCP_WATCH_CNTL_BITS *cntl,
235			unsigned int index, unsigned int vmid)
236{
237	union ULARGE_INTEGER addr;
238
239	addr.quad_part = 0;
240	addrHi->u32All = 0;
241	addrLo->u32All = 0;
242	cntl->u32All = 0;
243
244	if (adw_info->watch_mask)
245		cntl->bitfields.mask =
246			(uint32_t) (adw_info->watch_mask[index] &
247					ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
248	else
249		cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
250
251	addr.quad_part = (unsigned long long) adw_info->watch_address[index];
252
253	addrHi->bitfields.addr = addr.u.high_part &
254					ADDRESS_WATCH_REG_ADDHIGH_MASK;
255	addrLo->bitfields.addr =
256			(addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT);
257
258	cntl->bitfields.mode = adw_info->watch_mode[index];
259	cntl->bitfields.vmid = (uint32_t) vmid;
260	/* for now assume it is an ATC address */
261	cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT;
262
263	pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask);
264	pr_debug("\t\t%20s %08x\n", "set reg add high :",
265			addrHi->bitfields.addr);
266	pr_debug("\t\t%20s %08x\n", "set reg add low :",
267			addrLo->bitfields.addr);
268}
269
270static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
271				      struct dbg_address_watch_info *adw_info)
272{
273	union TCP_WATCH_ADDR_H_BITS addrHi;
274	union TCP_WATCH_ADDR_L_BITS addrLo;
275	union TCP_WATCH_CNTL_BITS cntl;
276	struct kfd_process_device *pdd;
277	unsigned int i;
278
279	/* taking the vmid for that process on the safe way using pdd */
280	pdd = kfd_get_process_device_data(dbgdev->dev,
281					adw_info->process);
282	if (!pdd) {
283		pr_err("Failed to get pdd for wave control no DIQ\n");
284		return -EFAULT;
285	}
286
287	addrHi.u32All = 0;
288	addrLo.u32All = 0;
289	cntl.u32All = 0;
290
291	if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
292			(adw_info->num_watch_points == 0)) {
293		pr_err("num_watch_points is invalid\n");
294		return -EINVAL;
295	}
296
297	if (!adw_info->watch_mode || !adw_info->watch_address) {
298		pr_err("adw_info fields are not valid\n");
299		return -EINVAL;
300	}
301
302	for (i = 0; i < adw_info->num_watch_points; i++) {
303		dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
304						&cntl, i, pdd->qpd.vmid);
305
306		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
307		pr_debug("\t\t%20s %08x\n", "register index :", i);
308		pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid);
309		pr_debug("\t\t%20s %08x\n", "Address Low is :",
310				addrLo.bitfields.addr);
311		pr_debug("\t\t%20s %08x\n", "Address high is :",
312				addrHi.bitfields.addr);
313		pr_debug("\t\t%20s %08x\n", "Address high is :",
314				addrHi.bitfields.addr);
315		pr_debug("\t\t%20s %08x\n", "Control Mask is :",
316				cntl.bitfields.mask);
317		pr_debug("\t\t%20s %08x\n", "Control Mode is :",
318				cntl.bitfields.mode);
319		pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
320				cntl.bitfields.vmid);
321		pr_debug("\t\t%20s %08x\n", "Control atc  is :",
322				cntl.bitfields.atc);
323		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
324
325		pdd->dev->kfd2kgd->address_watch_execute(
326						dbgdev->dev->kgd,
327						i,
328						cntl.u32All,
329						addrHi.u32All,
330						addrLo.u32All);
331	}
332
333	return 0;
334}
335
336static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
337				    struct dbg_address_watch_info *adw_info)
338{
339	struct pm4__set_config_reg *packets_vec;
340	union TCP_WATCH_ADDR_H_BITS addrHi;
341	union TCP_WATCH_ADDR_L_BITS addrLo;
342	union TCP_WATCH_CNTL_BITS cntl;
343	struct kfd_mem_obj *mem_obj;
344	unsigned int aw_reg_add_dword;
345	uint32_t *packet_buff_uint;
346	unsigned int i;
347	int status;
348	size_t ib_size = sizeof(struct pm4__set_config_reg) * 4;
349	/* we do not control the vmid in DIQ mode, just a place holder */
350	unsigned int vmid = 0;
351
352	addrHi.u32All = 0;
353	addrLo.u32All = 0;
354	cntl.u32All = 0;
355
356	if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
357			(adw_info->num_watch_points == 0)) {
358		pr_err("num_watch_points is invalid\n");
359		return -EINVAL;
360	}
361
362	if (!adw_info->watch_mode || !adw_info->watch_address) {
363		pr_err("adw_info fields are not valid\n");
364		return -EINVAL;
365	}
366
367	status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
368
369	if (status) {
370		pr_err("Failed to allocate GART memory\n");
371		return status;
372	}
373
374	packet_buff_uint = mem_obj->cpu_ptr;
375
376	memset(packet_buff_uint, 0, ib_size);
377
378	packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
379
380	packets_vec[0].header.count = 1;
381	packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
382	packets_vec[0].header.type = PM4_TYPE_3;
383	packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
384	packets_vec[0].bitfields2.insert_vmid = 1;
385	packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
386	packets_vec[1].bitfields2.insert_vmid = 0;
387	packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
388	packets_vec[2].bitfields2.insert_vmid = 0;
389	packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
390	packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
391	packets_vec[3].bitfields2.insert_vmid = 1;
392
393	for (i = 0; i < adw_info->num_watch_points; i++) {
394		dbgdev_address_watch_set_registers(adw_info,
395						&addrHi,
396						&addrLo,
397						&cntl,
398						i,
399						vmid);
400
401		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
402		pr_debug("\t\t%20s %08x\n", "register index :", i);
403		pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
404		pr_debug("\t\t%20s %p\n", "Add ptr is :",
405				adw_info->watch_address);
406		pr_debug("\t\t%20s %08llx\n", "Add     is :",
407				adw_info->watch_address[i]);
408		pr_debug("\t\t%20s %08x\n", "Address Low is :",
409				addrLo.bitfields.addr);
410		pr_debug("\t\t%20s %08x\n", "Address high is :",
411				addrHi.bitfields.addr);
412		pr_debug("\t\t%20s %08x\n", "Control Mask is :",
413				cntl.bitfields.mask);
414		pr_debug("\t\t%20s %08x\n", "Control Mode is :",
415				cntl.bitfields.mode);
416		pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
417				cntl.bitfields.vmid);
418		pr_debug("\t\t%20s %08x\n", "Control atc  is :",
419				cntl.bitfields.atc);
420		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
421
422		aw_reg_add_dword =
423				dbgdev->dev->kfd2kgd->address_watch_get_offset(
424					dbgdev->dev->kgd,
425					i,
426					ADDRESS_WATCH_REG_CNTL);
427
428		packets_vec[0].bitfields2.reg_offset =
429					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
430
431		packets_vec[0].reg_data[0] = cntl.u32All;
432
433		aw_reg_add_dword =
434				dbgdev->dev->kfd2kgd->address_watch_get_offset(
435					dbgdev->dev->kgd,
436					i,
437					ADDRESS_WATCH_REG_ADDR_HI);
438
439		packets_vec[1].bitfields2.reg_offset =
440					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
441		packets_vec[1].reg_data[0] = addrHi.u32All;
442
443		aw_reg_add_dword =
444				dbgdev->dev->kfd2kgd->address_watch_get_offset(
445					dbgdev->dev->kgd,
446					i,
447					ADDRESS_WATCH_REG_ADDR_LO);
448
449		packets_vec[2].bitfields2.reg_offset =
450				aw_reg_add_dword - AMD_CONFIG_REG_BASE;
451		packets_vec[2].reg_data[0] = addrLo.u32All;
452
453		/* enable watch flag if address is not zero*/
454		if (adw_info->watch_address[i] > 0)
455			cntl.bitfields.valid = 1;
456		else
457			cntl.bitfields.valid = 0;
458
459		aw_reg_add_dword =
460				dbgdev->dev->kfd2kgd->address_watch_get_offset(
461					dbgdev->dev->kgd,
462					i,
463					ADDRESS_WATCH_REG_CNTL);
464
465		packets_vec[3].bitfields2.reg_offset =
466					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
467		packets_vec[3].reg_data[0] = cntl.u32All;
468
469		status = dbgdev_diq_submit_ib(
470					dbgdev,
471					adw_info->process->pasid,
472					mem_obj->gpu_addr,
473					packet_buff_uint,
474					ib_size);
475
476		if (status) {
477			pr_err("Failed to submit IB to DIQ\n");
478			break;
479		}
480	}
481
482	kfd_gtt_sa_free(dbgdev->dev, mem_obj);
483	return status;
484}
485
486static int dbgdev_wave_control_set_registers(
487				struct dbg_wave_control_info *wac_info,
488				union SQ_CMD_BITS *in_reg_sq_cmd,
489				union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
490{
491	int status = 0;
492	union SQ_CMD_BITS reg_sq_cmd;
493	union GRBM_GFX_INDEX_BITS reg_gfx_index;
494	struct HsaDbgWaveMsgAMDGen2 *pMsg;
495
496	reg_sq_cmd.u32All = 0;
497	reg_gfx_index.u32All = 0;
498	pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
499
500	switch (wac_info->mode) {
501	/* Send command to single wave */
502	case HSA_DBG_WAVEMODE_SINGLE:
503		/*
504		 * Limit access to the process waves only,
505		 * by setting vmid check
506		 */
507		reg_sq_cmd.bits.check_vmid = 1;
508		reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
509		reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
510		reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
511
512		reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
513		reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
514		reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
515
516		break;
517
518	/* Send command to all waves with matching VMID */
519	case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
520
521		reg_gfx_index.bits.sh_broadcast_writes = 1;
522		reg_gfx_index.bits.se_broadcast_writes = 1;
523		reg_gfx_index.bits.instance_broadcast_writes = 1;
524
525		reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
526
527		break;
528
529	/* Send command to all CU waves with matching VMID */
530	case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
531
532		reg_sq_cmd.bits.check_vmid = 1;
533		reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
534
535		reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
536		reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
537		reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
538
539		break;
540
541	default:
542		return -EINVAL;
543	}
544
545	switch (wac_info->operand) {
546	case HSA_DBG_WAVEOP_HALT:
547		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT;
548		break;
549
550	case HSA_DBG_WAVEOP_RESUME:
551		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME;
552		break;
553
554	case HSA_DBG_WAVEOP_KILL:
555		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
556		break;
557
558	case HSA_DBG_WAVEOP_DEBUG:
559		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG;
560		break;
561
562	case HSA_DBG_WAVEOP_TRAP:
563		if (wac_info->trapId < MAX_TRAPID) {
564			reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP;
565			reg_sq_cmd.bits.trap_id = wac_info->trapId;
566		} else {
567			status = -EINVAL;
568		}
569		break;
570
571	default:
572		status = -EINVAL;
573		break;
574	}
575
576	if (status == 0) {
577		*in_reg_sq_cmd = reg_sq_cmd;
578		*in_reg_gfx_index = reg_gfx_index;
579	}
580
581	return status;
582}
583
584static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
585					struct dbg_wave_control_info *wac_info)
586{
587
588	int status;
589	union SQ_CMD_BITS reg_sq_cmd;
590	union GRBM_GFX_INDEX_BITS reg_gfx_index;
591	struct kfd_mem_obj *mem_obj;
592	uint32_t *packet_buff_uint;
593	struct pm4__set_config_reg *packets_vec;
594	size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
595
596	reg_sq_cmd.u32All = 0;
597
598	status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
599							&reg_gfx_index);
600	if (status) {
601		pr_err("Failed to set wave control registers\n");
602		return status;
603	}
604
605	/* we do not control the VMID in DIQ, so reset it to a known value */
606	reg_sq_cmd.bits.vm_id = 0;
607
608	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
609
610	pr_debug("\t\t mode      is: %u\n", wac_info->mode);
611	pr_debug("\t\t operand   is: %u\n", wac_info->operand);
612	pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
613	pr_debug("\t\t msg value is: %u\n",
614			wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
615	pr_debug("\t\t vmid      is: N/A\n");
616
617	pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
618	pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
619	pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
620	pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
621	pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
622	pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
623	pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
624
625	pr_debug("\t\t ibw       is : %u\n",
626			reg_gfx_index.bitfields.instance_broadcast_writes);
627	pr_debug("\t\t ii        is : %u\n",
628			reg_gfx_index.bitfields.instance_index);
629	pr_debug("\t\t sebw      is : %u\n",
630			reg_gfx_index.bitfields.se_broadcast_writes);
631	pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
632	pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
633	pr_debug("\t\t sbw       is : %u\n",
634			reg_gfx_index.bitfields.sh_broadcast_writes);
635
636	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
637
638	status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
639
640	if (status != 0) {
641		pr_err("Failed to allocate GART memory\n");
642		return status;
643	}
644
645	packet_buff_uint = mem_obj->cpu_ptr;
646
647	memset(packet_buff_uint, 0, ib_size);
648
649	packets_vec =  (struct pm4__set_config_reg *) packet_buff_uint;
650	packets_vec[0].header.count = 1;
651	packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
652	packets_vec[0].header.type = PM4_TYPE_3;
653	packets_vec[0].bitfields2.reg_offset =
654			GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
655
656	packets_vec[0].bitfields2.insert_vmid = 0;
657	packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
658
659	packets_vec[1].header.count = 1;
660	packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
661	packets_vec[1].header.type = PM4_TYPE_3;
662	packets_vec[1].bitfields2.reg_offset = SQ_CMD / 4 - AMD_CONFIG_REG_BASE;
663
664	packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
665	packets_vec[1].bitfields2.insert_vmid = 1;
666	packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
667
668	/* Restore the GRBM_GFX_INDEX register */
669
670	reg_gfx_index.u32All = 0;
671	reg_gfx_index.bits.sh_broadcast_writes = 1;
672	reg_gfx_index.bits.instance_broadcast_writes = 1;
673	reg_gfx_index.bits.se_broadcast_writes = 1;
674
675
676	packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
677	packets_vec[2].bitfields2.reg_offset =
678				GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
679
680	packets_vec[2].bitfields2.insert_vmid = 0;
681	packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
682
683	status = dbgdev_diq_submit_ib(
684			dbgdev,
685			wac_info->process->pasid,
686			mem_obj->gpu_addr,
687			packet_buff_uint,
688			ib_size);
689
690	if (status)
691		pr_err("Failed to submit IB to DIQ\n");
692
693	kfd_gtt_sa_free(dbgdev->dev, mem_obj);
694
695	return status;
696}
697
698static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
699					struct dbg_wave_control_info *wac_info)
700{
701	int status;
702	union SQ_CMD_BITS reg_sq_cmd;
703	union GRBM_GFX_INDEX_BITS reg_gfx_index;
704	struct kfd_process_device *pdd;
705
706	reg_sq_cmd.u32All = 0;
707
708	/* taking the VMID for that process on the safe way using PDD */
709	pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
710
711	if (!pdd) {
712		pr_err("Failed to get pdd for wave control no DIQ\n");
713		return -EFAULT;
714	}
715	status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
716							&reg_gfx_index);
717	if (status) {
718		pr_err("Failed to set wave control registers\n");
719		return status;
720	}
721
722	/* for non DIQ we need to patch the VMID: */
723
724	reg_sq_cmd.bits.vm_id = pdd->qpd.vmid;
725
726	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
727
728	pr_debug("\t\t mode      is: %u\n", wac_info->mode);
729	pr_debug("\t\t operand   is: %u\n", wac_info->operand);
730	pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
731	pr_debug("\t\t msg value is: %u\n",
732			wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
733	pr_debug("\t\t vmid      is: %u\n", pdd->qpd.vmid);
734
735	pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
736	pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
737	pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
738	pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
739	pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
740	pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
741	pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
742
743	pr_debug("\t\t ibw       is : %u\n",
744			reg_gfx_index.bitfields.instance_broadcast_writes);
745	pr_debug("\t\t ii        is : %u\n",
746			reg_gfx_index.bitfields.instance_index);
747	pr_debug("\t\t sebw      is : %u\n",
748			reg_gfx_index.bitfields.se_broadcast_writes);
749	pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
750	pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
751	pr_debug("\t\t sbw       is : %u\n",
752			reg_gfx_index.bitfields.sh_broadcast_writes);
753
754	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
755
756	return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd,
757							reg_gfx_index.u32All,
758							reg_sq_cmd.u32All);
759}
760
761int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
762{
763	int status = 0;
764	unsigned int vmid;
765	union SQ_CMD_BITS reg_sq_cmd;
766	union GRBM_GFX_INDEX_BITS reg_gfx_index;
767	struct kfd_process_device *pdd;
768	struct dbg_wave_control_info wac_info;
769	int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
770	int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
771
772	reg_sq_cmd.u32All = 0;
773	status = 0;
774
775	wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
776	wac_info.operand = HSA_DBG_WAVEOP_KILL;
777
778	pr_debug("Killing all process wavefronts\n");
779
780	/* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
781	 * ATC_VMID15_PASID_MAPPING
782	 * to check which VMID the current process is mapped to.
783	 */
784
785	for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
786		if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
787				(dev->kgd, vmid)) {
788			if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid
789					(dev->kgd, vmid) == p->pasid) {
790				pr_debug("Killing wave fronts of vmid %d and pasid %d\n",
791						vmid, p->pasid);
792				break;
793			}
794		}
795	}
796
797	if (vmid > last_vmid_to_scan) {
798		pr_err("Didn't find vmid for pasid %d\n", p->pasid);
799		return -EFAULT;
800	}
801
802	/* taking the VMID for that process on the safe way using PDD */
803	pdd = kfd_get_process_device_data(dev, p);
804	if (!pdd)
805		return -EFAULT;
806
807	status = dbgdev_wave_control_set_registers(&wac_info, &reg_sq_cmd,
808			&reg_gfx_index);
809	if (status != 0)
810		return -EINVAL;
811
812	/* for non DIQ we need to patch the VMID: */
813	reg_sq_cmd.bits.vm_id = vmid;
814
815	dev->kfd2kgd->wave_control_execute(dev->kgd,
816					reg_gfx_index.u32All,
817					reg_sq_cmd.u32All);
818
819	return 0;
820}
821
822void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
823			enum DBGDEV_TYPE type)
824{
825	pdbgdev->dev = pdev;
826	pdbgdev->kq = NULL;
827	pdbgdev->type = type;
828	pdbgdev->pqm = NULL;
829
830	switch (type) {
831	case DBGDEV_TYPE_NODIQ:
832		pdbgdev->dbgdev_register = dbgdev_register_nodiq;
833		pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq;
834		pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq;
835		pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq;
836		break;
837	case DBGDEV_TYPE_DIQ:
838	default:
839		pdbgdev->dbgdev_register = dbgdev_register_diq;
840		pdbgdev->dbgdev_unregister = dbgdev_unregister_diq;
841		pdbgdev->dbgdev_wave_control =  dbgdev_wave_control_diq;
842		pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq;
843		break;
844	}
845
846}