Linux Audio

Check our new training course

Loading...
Note: File does not exist in v6.9.4.
  1/*
  2 * Copyright 2014 Advanced Micro Devices, Inc.
  3 *
  4 * Permission is hereby granted, free of charge, to any person obtaining a
  5 * copy of this software and associated documentation files (the "Software"),
  6 * to deal in the Software without restriction, including without limitation
  7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 * and/or sell copies of the Software, and to permit persons to whom the
  9 * Software is furnished to do so, subject to the following conditions:
 10 *
 11 * The above copyright notice and this permission notice shall be included in
 12 * all copies or substantial portions of the Software.
 13 *
 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 20 * OTHER DEALINGS IN THE SOFTWARE.
 21 *
 22 */
 23
 24#include <linux/types.h>
 25#include <linux/kernel.h>
 26#include <linux/log2.h>
 27#include <linux/sched.h>
 28#include <linux/slab.h>
 29#include <linux/mutex.h>
 30#include <linux/device.h>
 31
 32#include "kfd_pm4_headers.h"
 33#include "kfd_pm4_headers_diq.h"
 34#include "kfd_kernel_queue.h"
 35#include "kfd_priv.h"
 36#include "kfd_pm4_opcodes.h"
 37#include "cik_regs.h"
 38#include "kfd_dbgmgr.h"
 39#include "kfd_dbgdev.h"
 40#include "kfd_device_queue_manager.h"
 41#include "../../radeon/cik_reg.h"
 42
 43static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
 44{
 45	BUG_ON(!dev || !dev->kfd2kgd);
 46
 47	dev->kfd2kgd->address_watch_disable(dev->kgd);
 48}
 49
 50static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
 51				unsigned int pasid, uint64_t vmid0_address,
 52				uint32_t *packet_buff, size_t size_in_bytes)
 53{
 54	struct pm4__release_mem *rm_packet;
 55	struct pm4__indirect_buffer_pasid *ib_packet;
 56	struct kfd_mem_obj *mem_obj;
 57	size_t pq_packets_size_in_bytes;
 58	union ULARGE_INTEGER *largep;
 59	union ULARGE_INTEGER addr;
 60	struct kernel_queue *kq;
 61	uint64_t *rm_state;
 62	unsigned int *ib_packet_buff;
 63	int status;
 64
 65	BUG_ON(!dbgdev || !dbgdev->kq || !packet_buff || !size_in_bytes);
 66
 67	kq = dbgdev->kq;
 68
 69	pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) +
 70				sizeof(struct pm4__indirect_buffer_pasid);
 71
 72	/*
 73	 * We acquire a buffer from DIQ
 74	 * The receive packet buff will be sitting on the Indirect Buffer
 75	 * and in the PQ we put the IB packet + sync packet(s).
 76	 */
 77	status = kq->ops.acquire_packet_buffer(kq,
 78				pq_packets_size_in_bytes / sizeof(uint32_t),
 79				&ib_packet_buff);
 80	if (status != 0) {
 81		pr_err("amdkfd: acquire_packet_buffer failed\n");
 82		return status;
 83	}
 84
 85	memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
 86
 87	ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
 88
 89	ib_packet->header.count = 3;
 90	ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
 91	ib_packet->header.type = PM4_TYPE_3;
 92
 93	largep = (union ULARGE_INTEGER *) &vmid0_address;
 94
 95	ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
 96	ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
 97
 98	ib_packet->control = (1 << 23) | (1 << 31) |
 99			((size_in_bytes / sizeof(uint32_t)) & 0xfffff);
100
101	ib_packet->bitfields5.pasid = pasid;
102
103	/*
104	 * for now we use release mem for GPU-CPU synchronization
105	 * Consider WaitRegMem + WriteData as a better alternative
106	 * we get a GART allocations ( gpu/cpu mapping),
107	 * for the sync variable, and wait until:
108	 * (a) Sync with HW
109	 * (b) Sync var is written by CP to mem.
110	 */
111	rm_packet = (struct pm4__release_mem *) (ib_packet_buff +
112			(sizeof(struct pm4__indirect_buffer_pasid) /
113					sizeof(unsigned int)));
114
115	status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
116					&mem_obj);
117
118	if (status != 0) {
119		pr_err("amdkfd: Failed to allocate GART memory\n");
120		kq->ops.rollback_packet(kq);
121		return status;
122	}
123
124	rm_state = (uint64_t *) mem_obj->cpu_ptr;
125
126	*rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
127
128	rm_packet->header.opcode = IT_RELEASE_MEM;
129	rm_packet->header.type = PM4_TYPE_3;
130	rm_packet->header.count = sizeof(struct pm4__release_mem) /
131					sizeof(unsigned int) - 2;
132
133	rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
134	rm_packet->bitfields2.event_index =
135				event_index___release_mem__end_of_pipe;
136
137	rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
138	rm_packet->bitfields2.atc = 0;
139	rm_packet->bitfields2.tc_wb_action_ena = 1;
140
141	addr.quad_part = mem_obj->gpu_addr;
142
143	rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
144	rm_packet->address_hi = addr.u.high_part;
145
146	rm_packet->bitfields3.data_sel =
147				data_sel___release_mem__send_64_bit_data;
148
149	rm_packet->bitfields3.int_sel =
150			int_sel___release_mem__send_data_after_write_confirm;
151
152	rm_packet->bitfields3.dst_sel =
153			dst_sel___release_mem__memory_controller;
154
155	rm_packet->data_lo = QUEUESTATE__ACTIVE;
156
157	kq->ops.submit_packet(kq);
158
159	/* Wait till CP writes sync code: */
160	status = amdkfd_fence_wait_timeout(
161			(unsigned int *) rm_state,
162			QUEUESTATE__ACTIVE, 1500);
163
164	kfd_gtt_sa_free(dbgdev->dev, mem_obj);
165
166	return status;
167}
168
169static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
170{
171	BUG_ON(!dbgdev);
172
173	/*
174	 * no action is needed in this case,
175	 * just make sure diq will not be used
176	 */
177
178	dbgdev->kq = NULL;
179
180	return 0;
181}
182
183static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
184{
185	struct queue_properties properties;
186	unsigned int qid;
187	struct kernel_queue *kq = NULL;
188	int status;
189
190	BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->dev);
191
192	status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
193				&properties, 0, KFD_QUEUE_TYPE_DIQ,
194				&qid);
195
196	if (status) {
197		pr_err("amdkfd: Failed to create DIQ\n");
198		return status;
199	}
200
201	pr_debug("DIQ Created with queue id: %d\n", qid);
202
203	kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
204
205	if (kq == NULL) {
206		pr_err("amdkfd: Error getting DIQ\n");
207		pqm_destroy_queue(dbgdev->pqm, qid);
208		return -EFAULT;
209	}
210
211	dbgdev->kq = kq;
212
213	return status;
214}
215
216static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
217{
218	BUG_ON(!dbgdev || !dbgdev->dev);
219
220	/* disable watch address */
221	dbgdev_address_watch_disable_nodiq(dbgdev->dev);
222	return 0;
223}
224
225static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
226{
227	/* todo - disable address watch */
228	int status;
229
230	BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->kq);
231
232	status = pqm_destroy_queue(dbgdev->pqm,
233			dbgdev->kq->queue->properties.queue_id);
234	dbgdev->kq = NULL;
235
236	return status;
237}
238
239static void dbgdev_address_watch_set_registers(
240			const struct dbg_address_watch_info *adw_info,
241			union TCP_WATCH_ADDR_H_BITS *addrHi,
242			union TCP_WATCH_ADDR_L_BITS *addrLo,
243			union TCP_WATCH_CNTL_BITS *cntl,
244			unsigned int index, unsigned int vmid)
245{
246	union ULARGE_INTEGER addr;
247
248	BUG_ON(!adw_info || !addrHi || !addrLo || !cntl);
249
250	addr.quad_part = 0;
251	addrHi->u32All = 0;
252	addrLo->u32All = 0;
253	cntl->u32All = 0;
254
255	if (adw_info->watch_mask != NULL)
256		cntl->bitfields.mask =
257			(uint32_t) (adw_info->watch_mask[index] &
258					ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
259	else
260		cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
261
262	addr.quad_part = (unsigned long long) adw_info->watch_address[index];
263
264	addrHi->bitfields.addr = addr.u.high_part &
265					ADDRESS_WATCH_REG_ADDHIGH_MASK;
266	addrLo->bitfields.addr =
267			(addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT);
268
269	cntl->bitfields.mode = adw_info->watch_mode[index];
270	cntl->bitfields.vmid = (uint32_t) vmid;
271	/* for now assume it is an ATC address */
272	cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT;
273
274	pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask);
275	pr_debug("\t\t%20s %08x\n", "set reg add high :",
276			addrHi->bitfields.addr);
277	pr_debug("\t\t%20s %08x\n", "set reg add low :",
278			addrLo->bitfields.addr);
279}
280
281static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
282					struct dbg_address_watch_info *adw_info)
283{
284	union TCP_WATCH_ADDR_H_BITS addrHi;
285	union TCP_WATCH_ADDR_L_BITS addrLo;
286	union TCP_WATCH_CNTL_BITS cntl;
287	struct kfd_process_device *pdd;
288	unsigned int i;
289
290	BUG_ON(!dbgdev || !dbgdev->dev || !adw_info);
291
292	/* taking the vmid for that process on the safe way using pdd */
293	pdd = kfd_get_process_device_data(dbgdev->dev,
294					adw_info->process);
295	if (!pdd) {
296		pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n");
297		return -EFAULT;
298	}
299
300	addrHi.u32All = 0;
301	addrLo.u32All = 0;
302	cntl.u32All = 0;
303
304	if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
305			(adw_info->num_watch_points == 0)) {
306		pr_err("amdkfd: num_watch_points is invalid\n");
307		return -EINVAL;
308	}
309
310	if ((adw_info->watch_mode == NULL) ||
311		(adw_info->watch_address == NULL)) {
312		pr_err("amdkfd: adw_info fields are not valid\n");
313		return -EINVAL;
314	}
315
316	for (i = 0 ; i < adw_info->num_watch_points ; i++) {
317		dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
318						&cntl, i, pdd->qpd.vmid);
319
320		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
321		pr_debug("\t\t%20s %08x\n", "register index :", i);
322		pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid);
323		pr_debug("\t\t%20s %08x\n", "Address Low is :",
324				addrLo.bitfields.addr);
325		pr_debug("\t\t%20s %08x\n", "Address high is :",
326				addrHi.bitfields.addr);
327		pr_debug("\t\t%20s %08x\n", "Address high is :",
328				addrHi.bitfields.addr);
329		pr_debug("\t\t%20s %08x\n", "Control Mask is :",
330				cntl.bitfields.mask);
331		pr_debug("\t\t%20s %08x\n", "Control Mode is :",
332				cntl.bitfields.mode);
333		pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
334				cntl.bitfields.vmid);
335		pr_debug("\t\t%20s %08x\n", "Control atc  is :",
336				cntl.bitfields.atc);
337		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
338
339		pdd->dev->kfd2kgd->address_watch_execute(
340						dbgdev->dev->kgd,
341						i,
342						cntl.u32All,
343						addrHi.u32All,
344						addrLo.u32All);
345	}
346
347	return 0;
348}
349
350static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
351					struct dbg_address_watch_info *adw_info)
352{
353	struct pm4__set_config_reg *packets_vec;
354	union TCP_WATCH_ADDR_H_BITS addrHi;
355	union TCP_WATCH_ADDR_L_BITS addrLo;
356	union TCP_WATCH_CNTL_BITS cntl;
357	struct kfd_mem_obj *mem_obj;
358	unsigned int aw_reg_add_dword;
359	uint32_t *packet_buff_uint;
360	unsigned int i;
361	int status;
362	size_t ib_size = sizeof(struct pm4__set_config_reg) * 4;
363	/* we do not control the vmid in DIQ mode, just a place holder */
364	unsigned int vmid = 0;
365
366	BUG_ON(!dbgdev || !dbgdev->dev || !adw_info);
367
368	addrHi.u32All = 0;
369	addrLo.u32All = 0;
370	cntl.u32All = 0;
371
372	if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
373			(adw_info->num_watch_points == 0)) {
374		pr_err("amdkfd: num_watch_points is invalid\n");
375		return -EINVAL;
376	}
377
378	if ((NULL == adw_info->watch_mode) ||
379			(NULL == adw_info->watch_address)) {
380		pr_err("amdkfd: adw_info fields are not valid\n");
381		return -EINVAL;
382	}
383
384	status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
385
386	if (status != 0) {
387		pr_err("amdkfd: Failed to allocate GART memory\n");
388		return status;
389	}
390
391	packet_buff_uint = mem_obj->cpu_ptr;
392
393	memset(packet_buff_uint, 0, ib_size);
394
395	packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
396
397	packets_vec[0].header.count = 1;
398	packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
399	packets_vec[0].header.type = PM4_TYPE_3;
400	packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
401	packets_vec[0].bitfields2.insert_vmid = 1;
402	packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
403	packets_vec[1].bitfields2.insert_vmid = 0;
404	packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
405	packets_vec[2].bitfields2.insert_vmid = 0;
406	packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
407	packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
408	packets_vec[3].bitfields2.insert_vmid = 1;
409
410	for (i = 0; i < adw_info->num_watch_points; i++) {
411		dbgdev_address_watch_set_registers(adw_info,
412						&addrHi,
413						&addrLo,
414						&cntl,
415						i,
416						vmid);
417
418		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
419		pr_debug("\t\t%20s %08x\n", "register index :", i);
420		pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
421		pr_debug("\t\t%20s %p\n", "Add ptr is :",
422				adw_info->watch_address);
423		pr_debug("\t\t%20s %08llx\n", "Add     is :",
424				adw_info->watch_address[i]);
425		pr_debug("\t\t%20s %08x\n", "Address Low is :",
426				addrLo.bitfields.addr);
427		pr_debug("\t\t%20s %08x\n", "Address high is :",
428				addrHi.bitfields.addr);
429		pr_debug("\t\t%20s %08x\n", "Control Mask is :",
430				cntl.bitfields.mask);
431		pr_debug("\t\t%20s %08x\n", "Control Mode is :",
432				cntl.bitfields.mode);
433		pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
434				cntl.bitfields.vmid);
435		pr_debug("\t\t%20s %08x\n", "Control atc  is :",
436				cntl.bitfields.atc);
437		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
438
439		aw_reg_add_dword =
440				dbgdev->dev->kfd2kgd->address_watch_get_offset(
441					dbgdev->dev->kgd,
442					i,
443					ADDRESS_WATCH_REG_CNTL);
444
445		aw_reg_add_dword /= sizeof(uint32_t);
446
447		packets_vec[0].bitfields2.reg_offset =
448					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
449
450		packets_vec[0].reg_data[0] = cntl.u32All;
451
452		aw_reg_add_dword =
453				dbgdev->dev->kfd2kgd->address_watch_get_offset(
454					dbgdev->dev->kgd,
455					i,
456					ADDRESS_WATCH_REG_ADDR_HI);
457
458		aw_reg_add_dword /= sizeof(uint32_t);
459
460		packets_vec[1].bitfields2.reg_offset =
461					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
462		packets_vec[1].reg_data[0] = addrHi.u32All;
463
464		aw_reg_add_dword =
465				dbgdev->dev->kfd2kgd->address_watch_get_offset(
466					dbgdev->dev->kgd,
467					i,
468					ADDRESS_WATCH_REG_ADDR_LO);
469
470		aw_reg_add_dword /= sizeof(uint32_t);
471
472		packets_vec[2].bitfields2.reg_offset =
473				aw_reg_add_dword - AMD_CONFIG_REG_BASE;
474		packets_vec[2].reg_data[0] = addrLo.u32All;
475
476		/* enable watch flag if address is not zero*/
477		if (adw_info->watch_address[i] > 0)
478			cntl.bitfields.valid = 1;
479		else
480			cntl.bitfields.valid = 0;
481
482		aw_reg_add_dword =
483				dbgdev->dev->kfd2kgd->address_watch_get_offset(
484					dbgdev->dev->kgd,
485					i,
486					ADDRESS_WATCH_REG_CNTL);
487
488		aw_reg_add_dword /= sizeof(uint32_t);
489
490		packets_vec[3].bitfields2.reg_offset =
491					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
492		packets_vec[3].reg_data[0] = cntl.u32All;
493
494		status = dbgdev_diq_submit_ib(
495					dbgdev,
496					adw_info->process->pasid,
497					mem_obj->gpu_addr,
498					packet_buff_uint,
499					ib_size);
500
501		if (status != 0) {
502			pr_err("amdkfd: Failed to submit IB to DIQ\n");
503			break;
504		}
505	}
506
507	kfd_gtt_sa_free(dbgdev->dev, mem_obj);
508	return status;
509}
510
511static int dbgdev_wave_control_set_registers(
512				struct dbg_wave_control_info *wac_info,
513				union SQ_CMD_BITS *in_reg_sq_cmd,
514				union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
515{
516	int status = 0;
517	union SQ_CMD_BITS reg_sq_cmd;
518	union GRBM_GFX_INDEX_BITS reg_gfx_index;
519	struct HsaDbgWaveMsgAMDGen2 *pMsg;
520
521	BUG_ON(!wac_info || !in_reg_sq_cmd || !in_reg_gfx_index);
522
523	reg_sq_cmd.u32All = 0;
524	reg_gfx_index.u32All = 0;
525	pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
526
527	switch (wac_info->mode) {
528	/* Send command to single wave */
529	case HSA_DBG_WAVEMODE_SINGLE:
530		/*
531		 * Limit access to the process waves only,
532		 * by setting vmid check
533		 */
534		reg_sq_cmd.bits.check_vmid = 1;
535		reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
536		reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
537		reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
538
539		reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
540		reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
541		reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
542
543		break;
544
545	/* Send command to all waves with matching VMID */
546	case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
547
548		reg_gfx_index.bits.sh_broadcast_writes = 1;
549		reg_gfx_index.bits.se_broadcast_writes = 1;
550		reg_gfx_index.bits.instance_broadcast_writes = 1;
551
552		reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
553
554		break;
555
556	/* Send command to all CU waves with matching VMID */
557	case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
558
559		reg_sq_cmd.bits.check_vmid = 1;
560		reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
561
562		reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
563		reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
564		reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
565
566		break;
567
568	default:
569		return -EINVAL;
570	}
571
572	switch (wac_info->operand) {
573	case HSA_DBG_WAVEOP_HALT:
574		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT;
575		break;
576
577	case HSA_DBG_WAVEOP_RESUME:
578		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME;
579		break;
580
581	case HSA_DBG_WAVEOP_KILL:
582		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
583		break;
584
585	case HSA_DBG_WAVEOP_DEBUG:
586		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG;
587		break;
588
589	case HSA_DBG_WAVEOP_TRAP:
590		if (wac_info->trapId < MAX_TRAPID) {
591			reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP;
592			reg_sq_cmd.bits.trap_id = wac_info->trapId;
593		} else {
594			status = -EINVAL;
595		}
596		break;
597
598	default:
599		status = -EINVAL;
600		break;
601	}
602
603	if (status == 0) {
604		*in_reg_sq_cmd = reg_sq_cmd;
605		*in_reg_gfx_index = reg_gfx_index;
606	}
607
608	return status;
609}
610
611static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
612					struct dbg_wave_control_info *wac_info)
613{
614
615	int status;
616	union SQ_CMD_BITS reg_sq_cmd;
617	union GRBM_GFX_INDEX_BITS reg_gfx_index;
618	struct kfd_mem_obj *mem_obj;
619	uint32_t *packet_buff_uint;
620	struct pm4__set_config_reg *packets_vec;
621	size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
622
623	BUG_ON(!dbgdev || !wac_info);
624
625	reg_sq_cmd.u32All = 0;
626
627	status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
628							&reg_gfx_index);
629	if (status) {
630		pr_err("amdkfd: Failed to set wave control registers\n");
631		return status;
632	}
633
634	/* we do not control the VMID in DIQ,so reset it to a known value */
635	reg_sq_cmd.bits.vm_id = 0;
636
637	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
638
639	pr_debug("\t\t mode      is: %u\n", wac_info->mode);
640	pr_debug("\t\t operand   is: %u\n", wac_info->operand);
641	pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
642	pr_debug("\t\t msg value is: %u\n",
643			wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
644	pr_debug("\t\t vmid      is: N/A\n");
645
646	pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
647	pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
648	pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
649	pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
650	pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
651	pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
652	pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
653
654	pr_debug("\t\t ibw       is : %u\n",
655			reg_gfx_index.bitfields.instance_broadcast_writes);
656	pr_debug("\t\t ii        is : %u\n",
657			reg_gfx_index.bitfields.instance_index);
658	pr_debug("\t\t sebw      is : %u\n",
659			reg_gfx_index.bitfields.se_broadcast_writes);
660	pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
661	pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
662	pr_debug("\t\t sbw       is : %u\n",
663			reg_gfx_index.bitfields.sh_broadcast_writes);
664
665	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
666
667	status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
668
669	if (status != 0) {
670		pr_err("amdkfd: Failed to allocate GART memory\n");
671		return status;
672	}
673
674	packet_buff_uint = mem_obj->cpu_ptr;
675
676	memset(packet_buff_uint, 0, ib_size);
677
678	packets_vec =  (struct pm4__set_config_reg *) packet_buff_uint;
679	packets_vec[0].header.count = 1;
680	packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
681	packets_vec[0].header.type = PM4_TYPE_3;
682	packets_vec[0].bitfields2.reg_offset =
683			GRBM_GFX_INDEX / (sizeof(uint32_t)) -
684				USERCONFIG_REG_BASE;
685
686	packets_vec[0].bitfields2.insert_vmid = 0;
687	packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
688
689	packets_vec[1].header.count = 1;
690	packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
691	packets_vec[1].header.type = PM4_TYPE_3;
692	packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) -
693						AMD_CONFIG_REG_BASE;
694
695	packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
696	packets_vec[1].bitfields2.insert_vmid = 1;
697	packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
698
699	/* Restore the GRBM_GFX_INDEX register */
700
701	reg_gfx_index.u32All = 0;
702	reg_gfx_index.bits.sh_broadcast_writes = 1;
703	reg_gfx_index.bits.instance_broadcast_writes = 1;
704	reg_gfx_index.bits.se_broadcast_writes = 1;
705
706
707	packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
708	packets_vec[2].bitfields2.reg_offset =
709				GRBM_GFX_INDEX / (sizeof(uint32_t)) -
710					USERCONFIG_REG_BASE;
711
712	packets_vec[2].bitfields2.insert_vmid = 0;
713	packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
714
715	status = dbgdev_diq_submit_ib(
716			dbgdev,
717			wac_info->process->pasid,
718			mem_obj->gpu_addr,
719			packet_buff_uint,
720			ib_size);
721
722	if (status != 0)
723		pr_err("amdkfd: Failed to submit IB to DIQ\n");
724
725	kfd_gtt_sa_free(dbgdev->dev, mem_obj);
726
727	return status;
728}
729
730static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
731					struct dbg_wave_control_info *wac_info)
732{
733	int status;
734	union SQ_CMD_BITS reg_sq_cmd;
735	union GRBM_GFX_INDEX_BITS reg_gfx_index;
736	struct kfd_process_device *pdd;
737
738	BUG_ON(!dbgdev || !dbgdev->dev || !wac_info);
739
740	reg_sq_cmd.u32All = 0;
741
742	/* taking the VMID for that process on the safe way using PDD */
743	pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
744
745	if (!pdd) {
746		pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n");
747		return -EFAULT;
748	}
749	status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
750							&reg_gfx_index);
751	if (status) {
752		pr_err("amdkfd: Failed to set wave control registers\n");
753		return status;
754	}
755
756	/* for non DIQ we need to patch the VMID: */
757
758	reg_sq_cmd.bits.vm_id = pdd->qpd.vmid;
759
760	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
761
762	pr_debug("\t\t mode      is: %u\n", wac_info->mode);
763	pr_debug("\t\t operand   is: %u\n", wac_info->operand);
764	pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
765	pr_debug("\t\t msg value is: %u\n",
766			wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
767	pr_debug("\t\t vmid      is: %u\n", pdd->qpd.vmid);
768
769	pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
770	pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
771	pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
772	pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
773	pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
774	pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
775	pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
776
777	pr_debug("\t\t ibw       is : %u\n",
778			reg_gfx_index.bitfields.instance_broadcast_writes);
779	pr_debug("\t\t ii        is : %u\n",
780			reg_gfx_index.bitfields.instance_index);
781	pr_debug("\t\t sebw      is : %u\n",
782			reg_gfx_index.bitfields.se_broadcast_writes);
783	pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
784	pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
785	pr_debug("\t\t sbw       is : %u\n",
786			reg_gfx_index.bitfields.sh_broadcast_writes);
787
788	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
789
790	return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd,
791							reg_gfx_index.u32All,
792							reg_sq_cmd.u32All);
793}
794
795int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
796{
797	int status = 0;
798	unsigned int vmid;
799	union SQ_CMD_BITS reg_sq_cmd;
800	union GRBM_GFX_INDEX_BITS reg_gfx_index;
801	struct kfd_process_device *pdd;
802	struct dbg_wave_control_info wac_info;
803	int temp;
804	int first_vmid_to_scan = 8;
805	int last_vmid_to_scan = 15;
806
807	first_vmid_to_scan = ffs(dev->shared_resources.compute_vmid_bitmap) - 1;
808	temp = dev->shared_resources.compute_vmid_bitmap >> first_vmid_to_scan;
809	last_vmid_to_scan = first_vmid_to_scan + ffz(temp);
810
811	reg_sq_cmd.u32All = 0;
812	status = 0;
813
814	wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
815	wac_info.operand = HSA_DBG_WAVEOP_KILL;
816
817	pr_debug("Killing all process wavefronts\n");
818
819	/* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
820	 * ATC_VMID15_PASID_MAPPING
821	 * to check which VMID the current process is mapped to. */
822
823	for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
824		if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
825				(dev->kgd, vmid)) {
826			if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
827					(dev->kgd, vmid) == p->pasid) {
828				pr_debug("Killing wave fronts of vmid %d and pasid %d\n",
829						vmid, p->pasid);
830				break;
831			}
832		}
833	}
834
835	if (vmid > last_vmid_to_scan) {
836		pr_err("amdkfd: didn't found vmid for pasid (%d)\n", p->pasid);
837		return -EFAULT;
838	}
839
840	/* taking the VMID for that process on the safe way using PDD */
841	pdd = kfd_get_process_device_data(dev, p);
842	if (!pdd)
843		return -EFAULT;
844
845	status = dbgdev_wave_control_set_registers(&wac_info, &reg_sq_cmd,
846			&reg_gfx_index);
847	if (status != 0)
848		return -EINVAL;
849
850	/* for non DIQ we need to patch the VMID: */
851	reg_sq_cmd.bits.vm_id = vmid;
852
853	dev->kfd2kgd->wave_control_execute(dev->kgd,
854					reg_gfx_index.u32All,
855					reg_sq_cmd.u32All);
856
857	return 0;
858}
859
860void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
861			enum DBGDEV_TYPE type)
862{
863	BUG_ON(!pdbgdev || !pdev);
864
865	pdbgdev->dev = pdev;
866	pdbgdev->kq = NULL;
867	pdbgdev->type = type;
868	pdbgdev->pqm = NULL;
869
870	switch (type) {
871	case DBGDEV_TYPE_NODIQ:
872		pdbgdev->dbgdev_register = dbgdev_register_nodiq;
873		pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq;
874		pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq;
875		pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq;
876		break;
877	case DBGDEV_TYPE_DIQ:
878	default:
879		pdbgdev->dbgdev_register = dbgdev_register_diq;
880		pdbgdev->dbgdev_unregister = dbgdev_unregister_diq;
881		pdbgdev->dbgdev_wave_control =  dbgdev_wave_control_diq;
882		pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq;
883		break;
884	}
885
886}