Linux Audio

Check our new training course

Loading...
v5.9
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 */
  22
  23#include <linux/device.h>
  24#include <linux/export.h>
  25#include <linux/err.h>
  26#include <linux/fs.h>
  27#include <linux/file.h>
  28#include <linux/sched.h>
  29#include <linux/slab.h>
  30#include <linux/uaccess.h>
  31#include <linux/compat.h>
  32#include <uapi/linux/kfd_ioctl.h>
  33#include <linux/time.h>
  34#include <linux/mm.h>
  35#include <linux/mman.h>
  36#include <linux/dma-buf.h>
  37#include <asm/processor.h>
  38#include "kfd_priv.h"
  39#include "kfd_device_queue_manager.h"
  40#include "kfd_dbgmgr.h"
  41#include "amdgpu_amdkfd.h"
  42#include "kfd_smi_events.h"
  43
  44static long kfd_ioctl(struct file *, unsigned int, unsigned long);
  45static int kfd_open(struct inode *, struct file *);
  46static int kfd_release(struct inode *, struct file *);
  47static int kfd_mmap(struct file *, struct vm_area_struct *);
  48
  49static const char kfd_dev_name[] = "kfd";
  50
  51static const struct file_operations kfd_fops = {
  52	.owner = THIS_MODULE,
  53	.unlocked_ioctl = kfd_ioctl,
  54	.compat_ioctl = compat_ptr_ioctl,
  55	.open = kfd_open,
  56	.release = kfd_release,
  57	.mmap = kfd_mmap,
  58};
  59
  60static int kfd_char_dev_major = -1;
  61static struct class *kfd_class;
  62struct device *kfd_device;
  63
  64int kfd_chardev_init(void)
  65{
  66	int err = 0;
  67
  68	kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops);
  69	err = kfd_char_dev_major;
  70	if (err < 0)
  71		goto err_register_chrdev;
  72
  73	kfd_class = class_create(THIS_MODULE, kfd_dev_name);
  74	err = PTR_ERR(kfd_class);
  75	if (IS_ERR(kfd_class))
  76		goto err_class_create;
  77
  78	kfd_device = device_create(kfd_class, NULL,
  79					MKDEV(kfd_char_dev_major, 0),
  80					NULL, kfd_dev_name);
  81	err = PTR_ERR(kfd_device);
  82	if (IS_ERR(kfd_device))
  83		goto err_device_create;
  84
  85	return 0;
  86
  87err_device_create:
  88	class_destroy(kfd_class);
  89err_class_create:
  90	unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
  91err_register_chrdev:
  92	return err;
  93}
  94
  95void kfd_chardev_exit(void)
  96{
  97	device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0));
  98	class_destroy(kfd_class);
  99	unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
 100}
 101
 102struct device *kfd_chardev(void)
 103{
 104	return kfd_device;
 105}
 106
 107
 108static int kfd_open(struct inode *inode, struct file *filep)
 109{
 110	struct kfd_process *process;
 111	bool is_32bit_user_mode;
 112
 113	if (iminor(inode) != 0)
 114		return -ENODEV;
 115
 116	is_32bit_user_mode = in_compat_syscall();
 117
 118	if (is_32bit_user_mode) {
 119		dev_warn(kfd_device,
 120			"Process %d (32-bit) failed to open /dev/kfd\n"
 121			"32-bit processes are not supported by amdkfd\n",
 122			current->pid);
 123		return -EPERM;
 124	}
 125
 126	process = kfd_create_process(filep);
 127	if (IS_ERR(process))
 128		return PTR_ERR(process);
 129
 130	if (kfd_is_locked()) {
 131		dev_dbg(kfd_device, "kfd is locked!\n"
 132				"process %d unreferenced", process->pasid);
 133		kfd_unref_process(process);
 134		return -EAGAIN;
 135	}
 136
 137	/* filep now owns the reference returned by kfd_create_process */
 138	filep->private_data = process;
 139
 140	dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
 141		process->pasid, process->is_32bit_user_mode);
 142
 143	return 0;
 144}
 145
 146static int kfd_release(struct inode *inode, struct file *filep)
 147{
 148	struct kfd_process *process = filep->private_data;
 149
 150	if (process)
 151		kfd_unref_process(process);
 152
 153	return 0;
 154}
 155
 156static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,
 157					void *data)
 158{
 159	struct kfd_ioctl_get_version_args *args = data;
 
 160
 161	args->major_version = KFD_IOCTL_MAJOR_VERSION;
 162	args->minor_version = KFD_IOCTL_MINOR_VERSION;
 163
 164	return 0;
 165}
 166
 167static int set_queue_properties_from_user(struct queue_properties *q_properties,
 168				struct kfd_ioctl_create_queue_args *args)
 169{
 170	if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
 171		pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
 172		return -EINVAL;
 173	}
 174
 175	if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
 176		pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
 177		return -EINVAL;
 178	}
 179
 180	if ((args->ring_base_address) &&
 181		(!access_ok((const void __user *) args->ring_base_address,
 
 182			sizeof(uint64_t)))) {
 183		pr_err("Can't access ring base address\n");
 184		return -EFAULT;
 185	}
 186
 187	if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
 188		pr_err("Ring size must be a power of 2 or 0\n");
 189		return -EINVAL;
 190	}
 191
 192	if (!access_ok((const void __user *) args->read_pointer_address,
 
 193			sizeof(uint32_t))) {
 194		pr_err("Can't access read pointer\n");
 195		return -EFAULT;
 196	}
 197
 198	if (!access_ok((const void __user *) args->write_pointer_address,
 
 199			sizeof(uint32_t))) {
 200		pr_err("Can't access write pointer\n");
 201		return -EFAULT;
 202	}
 203
 204	if (args->eop_buffer_address &&
 205		!access_ok((const void __user *) args->eop_buffer_address,
 
 206			sizeof(uint32_t))) {
 207		pr_debug("Can't access eop buffer");
 208		return -EFAULT;
 209	}
 210
 211	if (args->ctx_save_restore_address &&
 212		!access_ok((const void __user *) args->ctx_save_restore_address,
 
 213			sizeof(uint32_t))) {
 214		pr_debug("Can't access ctx save restore buffer");
 215		return -EFAULT;
 216	}
 217
 218	q_properties->is_interop = false;
 219	q_properties->is_gws = false;
 220	q_properties->queue_percent = args->queue_percentage;
 221	q_properties->priority = args->queue_priority;
 222	q_properties->queue_address = args->ring_base_address;
 223	q_properties->queue_size = args->ring_size;
 224	q_properties->read_ptr = (uint32_t *) args->read_pointer_address;
 225	q_properties->write_ptr = (uint32_t *) args->write_pointer_address;
 226	q_properties->eop_ring_buffer_address = args->eop_buffer_address;
 227	q_properties->eop_ring_buffer_size = args->eop_buffer_size;
 228	q_properties->ctx_save_restore_area_address =
 229			args->ctx_save_restore_address;
 230	q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
 231	q_properties->ctl_stack_size = args->ctl_stack_size;
 232	if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
 233		args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
 234		q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
 235	else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
 236		q_properties->type = KFD_QUEUE_TYPE_SDMA;
 237	else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)
 238		q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;
 239	else
 240		return -ENOTSUPP;
 241
 242	if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
 243		q_properties->format = KFD_QUEUE_FORMAT_AQL;
 244	else
 245		q_properties->format = KFD_QUEUE_FORMAT_PM4;
 246
 247	pr_debug("Queue Percentage: %d, %d\n",
 248			q_properties->queue_percent, args->queue_percentage);
 249
 250	pr_debug("Queue Priority: %d, %d\n",
 251			q_properties->priority, args->queue_priority);
 252
 253	pr_debug("Queue Address: 0x%llX, 0x%llX\n",
 254			q_properties->queue_address, args->ring_base_address);
 255
 256	pr_debug("Queue Size: 0x%llX, %u\n",
 257			q_properties->queue_size, args->ring_size);
 258
 259	pr_debug("Queue r/w Pointers: %px, %px\n",
 260			q_properties->read_ptr,
 261			q_properties->write_ptr);
 262
 263	pr_debug("Queue Format: %d\n", q_properties->format);
 264
 265	pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address);
 266
 267	pr_debug("Queue CTX save area: 0x%llX\n",
 268			q_properties->ctx_save_restore_area_address);
 269
 270	return 0;
 271}
 272
 273static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
 274					void *data)
 275{
 276	struct kfd_ioctl_create_queue_args *args = data;
 277	struct kfd_dev *dev;
 278	int err = 0;
 279	unsigned int queue_id;
 280	struct kfd_process_device *pdd;
 281	struct queue_properties q_properties;
 282	uint32_t doorbell_offset_in_process = 0;
 283
 284	memset(&q_properties, 0, sizeof(struct queue_properties));
 285
 286	pr_debug("Creating queue ioctl\n");
 287
 288	err = set_queue_properties_from_user(&q_properties, args);
 289	if (err)
 290		return err;
 291
 292	pr_debug("Looking for gpu id 0x%x\n", args->gpu_id);
 293	dev = kfd_device_by_id(args->gpu_id);
 294	if (!dev) {
 295		pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
 296		return -EINVAL;
 297	}
 298
 299	mutex_lock(&p->mutex);
 300
 301	pdd = kfd_bind_process_to_device(dev, p);
 302	if (IS_ERR(pdd)) {
 303		err = -ESRCH;
 304		goto err_bind_process;
 305	}
 306
 307	pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n",
 308			p->pasid,
 309			dev->id);
 310
 311	err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id,
 312			&doorbell_offset_in_process);
 313	if (err != 0)
 314		goto err_create_queue;
 315
 316	args->queue_id = queue_id;
 317
 318
 319	/* Return gpu_id as doorbell offset for mmap usage */
 320	args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
 321	args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
 322	if (KFD_IS_SOC15(dev->device_info->asic_family))
 323		/* On SOC15 ASICs, include the doorbell offset within the
 324		 * process doorbell frame, which is 2 pages.
 325		 */
 326		args->doorbell_offset |= doorbell_offset_in_process;
 327
 328	mutex_unlock(&p->mutex);
 329
 330	pr_debug("Queue id %d was created successfully\n", args->queue_id);
 331
 332	pr_debug("Ring buffer address == 0x%016llX\n",
 333			args->ring_base_address);
 334
 335	pr_debug("Read ptr address    == 0x%016llX\n",
 336			args->read_pointer_address);
 337
 338	pr_debug("Write ptr address   == 0x%016llX\n",
 339			args->write_pointer_address);
 340
 341	return 0;
 342
 343err_create_queue:
 344err_bind_process:
 345	mutex_unlock(&p->mutex);
 346	return err;
 347}
 348
 349static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
 350					void *data)
 351{
 352	int retval;
 353	struct kfd_ioctl_destroy_queue_args *args = data;
 354
 355	pr_debug("Destroying queue id %d for pasid 0x%x\n",
 356				args->queue_id,
 357				p->pasid);
 358
 359	mutex_lock(&p->mutex);
 360
 361	retval = pqm_destroy_queue(&p->pqm, args->queue_id);
 362
 363	mutex_unlock(&p->mutex);
 364	return retval;
 365}
 366
 367static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
 368					void *data)
 369{
 370	int retval;
 371	struct kfd_ioctl_update_queue_args *args = data;
 372	struct queue_properties properties;
 373
 374	if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
 375		pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
 376		return -EINVAL;
 377	}
 378
 379	if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
 380		pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
 381		return -EINVAL;
 382	}
 383
 384	if ((args->ring_base_address) &&
 385		(!access_ok((const void __user *) args->ring_base_address,
 
 386			sizeof(uint64_t)))) {
 387		pr_err("Can't access ring base address\n");
 388		return -EFAULT;
 389	}
 390
 391	if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
 392		pr_err("Ring size must be a power of 2 or 0\n");
 393		return -EINVAL;
 394	}
 395
 396	properties.queue_address = args->ring_base_address;
 397	properties.queue_size = args->ring_size;
 398	properties.queue_percent = args->queue_percentage;
 399	properties.priority = args->queue_priority;
 400
 401	pr_debug("Updating queue id %d for pasid 0x%x\n",
 402			args->queue_id, p->pasid);
 403
 404	mutex_lock(&p->mutex);
 405
 406	retval = pqm_update_queue(&p->pqm, args->queue_id, &properties);
 407
 408	mutex_unlock(&p->mutex);
 409
 410	return retval;
 411}
 412
 413static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
 414					void *data)
 415{
 416	int retval;
 417	const int max_num_cus = 1024;
 418	struct kfd_ioctl_set_cu_mask_args *args = data;
 419	struct queue_properties properties;
 420	uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr;
 421	size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32);
 422
 423	if ((args->num_cu_mask % 32) != 0) {
 424		pr_debug("num_cu_mask 0x%x must be a multiple of 32",
 425				args->num_cu_mask);
 426		return -EINVAL;
 427	}
 428
 429	properties.cu_mask_count = args->num_cu_mask;
 430	if (properties.cu_mask_count == 0) {
 431		pr_debug("CU mask cannot be 0");
 432		return -EINVAL;
 433	}
 434
 435	/* To prevent an unreasonably large CU mask size, set an arbitrary
 436	 * limit of max_num_cus bits.  We can then just drop any CU mask bits
 437	 * past max_num_cus bits and just use the first max_num_cus bits.
 438	 */
 439	if (properties.cu_mask_count > max_num_cus) {
 440		pr_debug("CU mask cannot be greater than 1024 bits");
 441		properties.cu_mask_count = max_num_cus;
 442		cu_mask_size = sizeof(uint32_t) * (max_num_cus/32);
 443	}
 444
 445	properties.cu_mask = kzalloc(cu_mask_size, GFP_KERNEL);
 446	if (!properties.cu_mask)
 447		return -ENOMEM;
 448
 449	retval = copy_from_user(properties.cu_mask, cu_mask_ptr, cu_mask_size);
 450	if (retval) {
 451		pr_debug("Could not copy CU mask from userspace");
 452		kfree(properties.cu_mask);
 453		return -EFAULT;
 454	}
 455
 456	mutex_lock(&p->mutex);
 457
 458	retval = pqm_set_cu_mask(&p->pqm, args->queue_id, &properties);
 459
 460	mutex_unlock(&p->mutex);
 461
 462	if (retval)
 463		kfree(properties.cu_mask);
 464
 465	return retval;
 466}
 467
 468static int kfd_ioctl_get_queue_wave_state(struct file *filep,
 469					  struct kfd_process *p, void *data)
 470{
 471	struct kfd_ioctl_get_queue_wave_state_args *args = data;
 472	int r;
 473
 474	mutex_lock(&p->mutex);
 475
 476	r = pqm_get_wave_state(&p->pqm, args->queue_id,
 477			       (void __user *)args->ctl_stack_address,
 478			       &args->ctl_stack_used_size,
 479			       &args->save_area_used_size);
 480
 481	mutex_unlock(&p->mutex);
 482
 483	return r;
 484}
 485
 486static int kfd_ioctl_set_memory_policy(struct file *filep,
 487					struct kfd_process *p, void *data)
 488{
 489	struct kfd_ioctl_set_memory_policy_args *args = data;
 490	struct kfd_dev *dev;
 491	int err = 0;
 492	struct kfd_process_device *pdd;
 493	enum cache_policy default_policy, alternate_policy;
 494
 495	if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT
 496	    && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
 497		return -EINVAL;
 498	}
 499
 500	if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT
 501	    && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
 502		return -EINVAL;
 503	}
 504
 505	dev = kfd_device_by_id(args->gpu_id);
 506	if (!dev)
 507		return -EINVAL;
 508
 509	mutex_lock(&p->mutex);
 510
 511	pdd = kfd_bind_process_to_device(dev, p);
 512	if (IS_ERR(pdd)) {
 513		err = -ESRCH;
 514		goto out;
 515	}
 516
 517	default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT)
 518			 ? cache_policy_coherent : cache_policy_noncoherent;
 519
 520	alternate_policy =
 521		(args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
 522		   ? cache_policy_coherent : cache_policy_noncoherent;
 523
 524	if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm,
 525				&pdd->qpd,
 526				default_policy,
 527				alternate_policy,
 528				(void __user *)args->alternate_aperture_base,
 529				args->alternate_aperture_size))
 530		err = -EINVAL;
 531
 532out:
 533	mutex_unlock(&p->mutex);
 534
 535	return err;
 536}
 537
 538static int kfd_ioctl_set_trap_handler(struct file *filep,
 539					struct kfd_process *p, void *data)
 540{
 541	struct kfd_ioctl_set_trap_handler_args *args = data;
 542	struct kfd_dev *dev;
 543	int err = 0;
 544	struct kfd_process_device *pdd;
 545
 546	dev = kfd_device_by_id(args->gpu_id);
 547	if (!dev)
 548		return -EINVAL;
 549
 550	mutex_lock(&p->mutex);
 551
 552	pdd = kfd_bind_process_to_device(dev, p);
 553	if (IS_ERR(pdd)) {
 554		err = -ESRCH;
 555		goto out;
 556	}
 557
 558	if (dev->dqm->ops.set_trap_handler(dev->dqm,
 559					&pdd->qpd,
 560					args->tba_addr,
 561					args->tma_addr))
 562		err = -EINVAL;
 563
 564out:
 565	mutex_unlock(&p->mutex);
 566
 567	return err;
 568}
 569
 570static int kfd_ioctl_dbg_register(struct file *filep,
 571				struct kfd_process *p, void *data)
 572{
 573	struct kfd_ioctl_dbg_register_args *args = data;
 574	struct kfd_dev *dev;
 575	struct kfd_dbgmgr *dbgmgr_ptr;
 576	struct kfd_process_device *pdd;
 577	bool create_ok;
 578	long status = 0;
 579
 580	dev = kfd_device_by_id(args->gpu_id);
 581	if (!dev)
 582		return -EINVAL;
 583
 584	if (dev->device_info->asic_family == CHIP_CARRIZO) {
 585		pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
 586		return -EINVAL;
 587	}
 588
 589	mutex_lock(&p->mutex);
 590	mutex_lock(kfd_get_dbgmgr_mutex());
 
 591
 592	/*
 593	 * make sure that we have pdd, if this the first queue created for
 594	 * this process
 595	 */
 596	pdd = kfd_bind_process_to_device(dev, p);
 597	if (IS_ERR(pdd)) {
 598		status = PTR_ERR(pdd);
 599		goto out;
 
 600	}
 601
 602	if (!dev->dbgmgr) {
 603		/* In case of a legal call, we have no dbgmgr yet */
 604		create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev);
 605		if (create_ok) {
 606			status = kfd_dbgmgr_register(dbgmgr_ptr, p);
 607			if (status != 0)
 608				kfd_dbgmgr_destroy(dbgmgr_ptr);
 609			else
 610				dev->dbgmgr = dbgmgr_ptr;
 611		}
 612	} else {
 613		pr_debug("debugger already registered\n");
 614		status = -EINVAL;
 615	}
 616
 617out:
 618	mutex_unlock(kfd_get_dbgmgr_mutex());
 619	mutex_unlock(&p->mutex);
 
 620
 621	return status;
 622}
 623
 624static int kfd_ioctl_dbg_unregister(struct file *filep,
 625				struct kfd_process *p, void *data)
 626{
 627	struct kfd_ioctl_dbg_unregister_args *args = data;
 628	struct kfd_dev *dev;
 629	long status;
 630
 631	dev = kfd_device_by_id(args->gpu_id);
 632	if (!dev || !dev->dbgmgr)
 633		return -EINVAL;
 634
 635	if (dev->device_info->asic_family == CHIP_CARRIZO) {
 636		pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n");
 637		return -EINVAL;
 638	}
 639
 640	mutex_lock(kfd_get_dbgmgr_mutex());
 641
 642	status = kfd_dbgmgr_unregister(dev->dbgmgr, p);
 643	if (!status) {
 644		kfd_dbgmgr_destroy(dev->dbgmgr);
 645		dev->dbgmgr = NULL;
 646	}
 647
 648	mutex_unlock(kfd_get_dbgmgr_mutex());
 649
 650	return status;
 651}
 652
 653/*
 654 * Parse and generate variable size data structure for address watch.
 655 * Total size of the buffer and # watch points is limited in order
 656 * to prevent kernel abuse. (no bearing to the much smaller HW limitation
 657 * which is enforced by dbgdev module)
 658 * please also note that the watch address itself are not "copied from user",
 659 * since it be set into the HW in user mode values.
 660 *
 661 */
 662static int kfd_ioctl_dbg_address_watch(struct file *filep,
 663					struct kfd_process *p, void *data)
 664{
 665	struct kfd_ioctl_dbg_address_watch_args *args = data;
 666	struct kfd_dev *dev;
 667	struct dbg_address_watch_info aw_info;
 668	unsigned char *args_buff;
 669	long status;
 670	void __user *cmd_from_user;
 671	uint64_t watch_mask_value = 0;
 672	unsigned int args_idx = 0;
 673
 674	memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info));
 675
 676	dev = kfd_device_by_id(args->gpu_id);
 677	if (!dev)
 678		return -EINVAL;
 679
 680	if (dev->device_info->asic_family == CHIP_CARRIZO) {
 681		pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
 682		return -EINVAL;
 683	}
 684
 685	cmd_from_user = (void __user *) args->content_ptr;
 686
 687	/* Validate arguments */
 688
 689	if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) ||
 690		(args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) ||
 691		(cmd_from_user == NULL))
 692		return -EINVAL;
 693
 694	/* this is the actual buffer to work with */
 695	args_buff = memdup_user(cmd_from_user,
 696				args->buf_size_in_bytes - sizeof(*args));
 697	if (IS_ERR(args_buff))
 698		return PTR_ERR(args_buff);
 699
 700	aw_info.process = p;
 701
 702	aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx]));
 703	args_idx += sizeof(aw_info.num_watch_points);
 704
 705	aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx];
 706	args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points;
 707
 708	/*
 709	 * set watch address base pointer to point on the array base
 710	 * within args_buff
 711	 */
 712	aw_info.watch_address = (uint64_t *) &args_buff[args_idx];
 713
 714	/* skip over the addresses buffer */
 715	args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points;
 716
 717	if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) {
 718		status = -EINVAL;
 719		goto out;
 720	}
 721
 722	watch_mask_value = (uint64_t) args_buff[args_idx];
 723
 724	if (watch_mask_value > 0) {
 725		/*
 726		 * There is an array of masks.
 727		 * set watch mask base pointer to point on the array base
 728		 * within args_buff
 729		 */
 730		aw_info.watch_mask = (uint64_t *) &args_buff[args_idx];
 731
 732		/* skip over the masks buffer */
 733		args_idx += sizeof(aw_info.watch_mask) *
 734				aw_info.num_watch_points;
 735	} else {
 736		/* just the NULL mask, set to NULL and skip over it */
 737		aw_info.watch_mask = NULL;
 738		args_idx += sizeof(aw_info.watch_mask);
 739	}
 740
 741	if (args_idx >= args->buf_size_in_bytes - sizeof(args)) {
 742		status = -EINVAL;
 743		goto out;
 744	}
 745
 746	/* Currently HSA Event is not supported for DBG */
 747	aw_info.watch_event = NULL;
 748
 749	mutex_lock(kfd_get_dbgmgr_mutex());
 750
 751	status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info);
 752
 753	mutex_unlock(kfd_get_dbgmgr_mutex());
 754
 755out:
 756	kfree(args_buff);
 757
 758	return status;
 759}
 760
 761/* Parse and generate fixed size data structure for wave control */
 762static int kfd_ioctl_dbg_wave_control(struct file *filep,
 763					struct kfd_process *p, void *data)
 764{
 765	struct kfd_ioctl_dbg_wave_control_args *args = data;
 766	struct kfd_dev *dev;
 767	struct dbg_wave_control_info wac_info;
 768	unsigned char *args_buff;
 769	uint32_t computed_buff_size;
 770	long status;
 771	void __user *cmd_from_user;
 772	unsigned int args_idx = 0;
 773
 774	memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info));
 775
 776	/* we use compact form, independent of the packing attribute value */
 777	computed_buff_size = sizeof(*args) +
 778				sizeof(wac_info.mode) +
 779				sizeof(wac_info.operand) +
 780				sizeof(wac_info.dbgWave_msg.DbgWaveMsg) +
 781				sizeof(wac_info.dbgWave_msg.MemoryVA) +
 782				sizeof(wac_info.trapId);
 783
 784	dev = kfd_device_by_id(args->gpu_id);
 785	if (!dev)
 786		return -EINVAL;
 787
 788	if (dev->device_info->asic_family == CHIP_CARRIZO) {
 789		pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
 790		return -EINVAL;
 791	}
 792
 793	/* input size must match the computed "compact" size */
 794	if (args->buf_size_in_bytes != computed_buff_size) {
 795		pr_debug("size mismatch, computed : actual %u : %u\n",
 796				args->buf_size_in_bytes, computed_buff_size);
 797		return -EINVAL;
 798	}
 799
 800	cmd_from_user = (void __user *) args->content_ptr;
 801
 802	if (cmd_from_user == NULL)
 803		return -EINVAL;
 804
 805	/* copy the entire buffer from user */
 806
 807	args_buff = memdup_user(cmd_from_user,
 808				args->buf_size_in_bytes - sizeof(*args));
 809	if (IS_ERR(args_buff))
 810		return PTR_ERR(args_buff);
 811
 812	/* move ptr to the start of the "pay-load" area */
 813	wac_info.process = p;
 814
 815	wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx]));
 816	args_idx += sizeof(wac_info.operand);
 817
 818	wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx]));
 819	args_idx += sizeof(wac_info.mode);
 820
 821	wac_info.trapId = *((uint32_t *)(&args_buff[args_idx]));
 822	args_idx += sizeof(wac_info.trapId);
 823
 824	wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value =
 825					*((uint32_t *)(&args_buff[args_idx]));
 826	wac_info.dbgWave_msg.MemoryVA = NULL;
 827
 828	mutex_lock(kfd_get_dbgmgr_mutex());
 829
 830	pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n",
 831			wac_info.process, wac_info.operand,
 832			wac_info.mode, wac_info.trapId,
 833			wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
 834
 835	status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info);
 836
 837	pr_debug("Returned status of dbg manager is %ld\n", status);
 838
 839	mutex_unlock(kfd_get_dbgmgr_mutex());
 840
 841	kfree(args_buff);
 842
 843	return status;
 844}
 845
 846static int kfd_ioctl_get_clock_counters(struct file *filep,
 847				struct kfd_process *p, void *data)
 848{
 849	struct kfd_ioctl_get_clock_counters_args *args = data;
 850	struct kfd_dev *dev;
 
 851
 852	dev = kfd_device_by_id(args->gpu_id);
 853	if (dev)
 854		/* Reading GPU clock counter from KGD */
 855		args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(dev->kgd);
 856	else
 857		/* Node without GPU resource */
 858		args->gpu_clock_counter = 0;
 859
 860	/* No access to rdtsc. Using raw monotonic time */
 861	args->cpu_clock_counter = ktime_get_raw_ns();
 862	args->system_clock_counter = ktime_get_boottime_ns();
 
 
 
 863
 864	/* Since the counter is in nano-seconds we use 1GHz frequency */
 865	args->system_clock_freq = 1000000000;
 866
 867	return 0;
 868}
 869
 870
 871static int kfd_ioctl_get_process_apertures(struct file *filp,
 872				struct kfd_process *p, void *data)
 873{
 874	struct kfd_ioctl_get_process_apertures_args *args = data;
 875	struct kfd_process_device_apertures *pAperture;
 876	struct kfd_process_device *pdd;
 877
 878	dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
 879
 880	args->num_of_nodes = 0;
 881
 882	mutex_lock(&p->mutex);
 883
 884	/*if the process-device list isn't empty*/
 885	if (kfd_has_process_device_data(p)) {
 886		/* Run over all pdd of the process */
 887		pdd = kfd_get_first_process_device_data(p);
 888		do {
 889			pAperture =
 890				&args->process_apertures[args->num_of_nodes];
 891			pAperture->gpu_id = pdd->dev->id;
 892			pAperture->lds_base = pdd->lds_base;
 893			pAperture->lds_limit = pdd->lds_limit;
 894			pAperture->gpuvm_base = pdd->gpuvm_base;
 895			pAperture->gpuvm_limit = pdd->gpuvm_limit;
 896			pAperture->scratch_base = pdd->scratch_base;
 897			pAperture->scratch_limit = pdd->scratch_limit;
 898
 899			dev_dbg(kfd_device,
 900				"node id %u\n", args->num_of_nodes);
 901			dev_dbg(kfd_device,
 902				"gpu id %u\n", pdd->dev->id);
 903			dev_dbg(kfd_device,
 904				"lds_base %llX\n", pdd->lds_base);
 905			dev_dbg(kfd_device,
 906				"lds_limit %llX\n", pdd->lds_limit);
 907			dev_dbg(kfd_device,
 908				"gpuvm_base %llX\n", pdd->gpuvm_base);
 909			dev_dbg(kfd_device,
 910				"gpuvm_limit %llX\n", pdd->gpuvm_limit);
 911			dev_dbg(kfd_device,
 912				"scratch_base %llX\n", pdd->scratch_base);
 913			dev_dbg(kfd_device,
 914				"scratch_limit %llX\n", pdd->scratch_limit);
 915
 916			args->num_of_nodes++;
 917
 918			pdd = kfd_get_next_process_device_data(p, pdd);
 919		} while (pdd && (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS));
 920	}
 921
 922	mutex_unlock(&p->mutex);
 923
 924	return 0;
 925}
 926
 927static int kfd_ioctl_get_process_apertures_new(struct file *filp,
 928				struct kfd_process *p, void *data)
 929{
 930	struct kfd_ioctl_get_process_apertures_new_args *args = data;
 931	struct kfd_process_device_apertures *pa;
 932	struct kfd_process_device *pdd;
 933	uint32_t nodes = 0;
 934	int ret;
 935
 936	dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
 937
 938	if (args->num_of_nodes == 0) {
 939		/* Return number of nodes, so that user space can alloacate
 940		 * sufficient memory
 941		 */
 942		mutex_lock(&p->mutex);
 943
 944		if (!kfd_has_process_device_data(p))
 945			goto out_unlock;
 946
 947		/* Run over all pdd of the process */
 948		pdd = kfd_get_first_process_device_data(p);
 949		do {
 950			args->num_of_nodes++;
 951			pdd = kfd_get_next_process_device_data(p, pdd);
 952		} while (pdd);
 953
 954		goto out_unlock;
 955	}
 956
 957	/* Fill in process-aperture information for all available
 958	 * nodes, but not more than args->num_of_nodes as that is
 959	 * the amount of memory allocated by user
 960	 */
 961	pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
 962				args->num_of_nodes), GFP_KERNEL);
 963	if (!pa)
 964		return -ENOMEM;
 965
 966	mutex_lock(&p->mutex);
 967
 968	if (!kfd_has_process_device_data(p)) {
 969		args->num_of_nodes = 0;
 970		kfree(pa);
 971		goto out_unlock;
 972	}
 973
 974	/* Run over all pdd of the process */
 975	pdd = kfd_get_first_process_device_data(p);
 976	do {
 977		pa[nodes].gpu_id = pdd->dev->id;
 978		pa[nodes].lds_base = pdd->lds_base;
 979		pa[nodes].lds_limit = pdd->lds_limit;
 980		pa[nodes].gpuvm_base = pdd->gpuvm_base;
 981		pa[nodes].gpuvm_limit = pdd->gpuvm_limit;
 982		pa[nodes].scratch_base = pdd->scratch_base;
 983		pa[nodes].scratch_limit = pdd->scratch_limit;
 984
 985		dev_dbg(kfd_device,
 986			"gpu id %u\n", pdd->dev->id);
 987		dev_dbg(kfd_device,
 988			"lds_base %llX\n", pdd->lds_base);
 989		dev_dbg(kfd_device,
 990			"lds_limit %llX\n", pdd->lds_limit);
 991		dev_dbg(kfd_device,
 992			"gpuvm_base %llX\n", pdd->gpuvm_base);
 993		dev_dbg(kfd_device,
 994			"gpuvm_limit %llX\n", pdd->gpuvm_limit);
 995		dev_dbg(kfd_device,
 996			"scratch_base %llX\n", pdd->scratch_base);
 997		dev_dbg(kfd_device,
 998			"scratch_limit %llX\n", pdd->scratch_limit);
 999		nodes++;
1000
1001		pdd = kfd_get_next_process_device_data(p, pdd);
1002	} while (pdd && (nodes < args->num_of_nodes));
1003	mutex_unlock(&p->mutex);
1004
1005	args->num_of_nodes = nodes;
1006	ret = copy_to_user(
1007			(void __user *)args->kfd_process_device_apertures_ptr,
1008			pa,
1009			(nodes * sizeof(struct kfd_process_device_apertures)));
1010	kfree(pa);
1011	return ret ? -EFAULT : 0;
1012
1013out_unlock:
1014	mutex_unlock(&p->mutex);
1015	return 0;
1016}
1017
1018static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
1019					void *data)
1020{
1021	struct kfd_ioctl_create_event_args *args = data;
1022	int err;
1023
1024	/* For dGPUs the event page is allocated in user mode. The
1025	 * handle is passed to KFD with the first call to this IOCTL
1026	 * through the event_page_offset field.
1027	 */
1028	if (args->event_page_offset) {
1029		struct kfd_dev *kfd;
1030		struct kfd_process_device *pdd;
1031		void *mem, *kern_addr;
1032		uint64_t size;
1033
1034		if (p->signal_page) {
1035			pr_err("Event page is already set\n");
1036			return -EINVAL;
1037		}
1038
1039		kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
1040		if (!kfd) {
1041			pr_err("Getting device by id failed in %s\n", __func__);
1042			return -EINVAL;
1043		}
1044
1045		mutex_lock(&p->mutex);
1046		pdd = kfd_bind_process_to_device(kfd, p);
1047		if (IS_ERR(pdd)) {
1048			err = PTR_ERR(pdd);
1049			goto out_unlock;
1050		}
1051
1052		mem = kfd_process_device_translate_handle(pdd,
1053				GET_IDR_HANDLE(args->event_page_offset));
1054		if (!mem) {
1055			pr_err("Can't find BO, offset is 0x%llx\n",
1056			       args->event_page_offset);
1057			err = -EINVAL;
1058			goto out_unlock;
1059		}
1060		mutex_unlock(&p->mutex);
1061
1062		err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd,
1063						mem, &kern_addr, &size);
1064		if (err) {
1065			pr_err("Failed to map event page to kernel\n");
1066			return err;
1067		}
1068
1069		err = kfd_event_page_set(p, kern_addr, size);
1070		if (err) {
1071			pr_err("Failed to set event page\n");
1072			return err;
1073		}
1074	}
1075
1076	err = kfd_event_create(filp, p, args->event_type,
1077				args->auto_reset != 0, args->node_id,
1078				&args->event_id, &args->event_trigger_data,
1079				&args->event_page_offset,
1080				&args->event_slot_index);
1081
1082	return err;
1083
1084out_unlock:
1085	mutex_unlock(&p->mutex);
1086	return err;
1087}
1088
1089static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,
1090					void *data)
1091{
1092	struct kfd_ioctl_destroy_event_args *args = data;
1093
1094	return kfd_event_destroy(p, args->event_id);
1095}
1096
1097static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p,
1098				void *data)
1099{
1100	struct kfd_ioctl_set_event_args *args = data;
1101
1102	return kfd_set_event(p, args->event_id);
1103}
1104
1105static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p,
1106				void *data)
1107{
1108	struct kfd_ioctl_reset_event_args *args = data;
1109
1110	return kfd_reset_event(p, args->event_id);
1111}
1112
1113static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
1114				void *data)
1115{
1116	struct kfd_ioctl_wait_events_args *args = data;
 
1117	int err;
1118
1119	err = kfd_wait_on_events(p, args->num_events,
1120			(void __user *)args->events_ptr,
1121			(args->wait_for_all != 0),
1122			args->timeout, &args->wait_result);
1123
1124	return err;
1125}
1126static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
1127					struct kfd_process *p, void *data)
1128{
1129	struct kfd_ioctl_set_scratch_backing_va_args *args = data;
1130	struct kfd_process_device *pdd;
1131	struct kfd_dev *dev;
1132	long err;
1133
1134	dev = kfd_device_by_id(args->gpu_id);
1135	if (!dev)
1136		return -EINVAL;
1137
1138	mutex_lock(&p->mutex);
1139
1140	pdd = kfd_bind_process_to_device(dev, p);
1141	if (IS_ERR(pdd)) {
1142		err = PTR_ERR(pdd);
1143		goto bind_process_to_device_fail;
1144	}
1145
1146	pdd->qpd.sh_hidden_private_base = args->va_addr;
1147
1148	mutex_unlock(&p->mutex);
1149
1150	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
1151	    pdd->qpd.vmid != 0 && dev->kfd2kgd->set_scratch_backing_va)
1152		dev->kfd2kgd->set_scratch_backing_va(
1153			dev->kgd, args->va_addr, pdd->qpd.vmid);
1154
1155	return 0;
1156
1157bind_process_to_device_fail:
1158	mutex_unlock(&p->mutex);
1159	return err;
1160}
1161
1162static int kfd_ioctl_get_tile_config(struct file *filep,
1163		struct kfd_process *p, void *data)
1164{
1165	struct kfd_ioctl_get_tile_config_args *args = data;
1166	struct kfd_dev *dev;
1167	struct tile_config config;
1168	int err = 0;
1169
1170	dev = kfd_device_by_id(args->gpu_id);
1171	if (!dev)
1172		return -EINVAL;
1173
1174	amdgpu_amdkfd_get_tile_config(dev->kgd, &config);
1175
1176	args->gb_addr_config = config.gb_addr_config;
1177	args->num_banks = config.num_banks;
1178	args->num_ranks = config.num_ranks;
1179
1180	if (args->num_tile_configs > config.num_tile_configs)
1181		args->num_tile_configs = config.num_tile_configs;
1182	err = copy_to_user((void __user *)args->tile_config_ptr,
1183			config.tile_config_ptr,
1184			args->num_tile_configs * sizeof(uint32_t));
1185	if (err) {
1186		args->num_tile_configs = 0;
1187		return -EFAULT;
1188	}
1189
1190	if (args->num_macro_tile_configs > config.num_macro_tile_configs)
1191		args->num_macro_tile_configs =
1192				config.num_macro_tile_configs;
1193	err = copy_to_user((void __user *)args->macro_tile_config_ptr,
1194			config.macro_tile_config_ptr,
1195			args->num_macro_tile_configs * sizeof(uint32_t));
1196	if (err) {
1197		args->num_macro_tile_configs = 0;
1198		return -EFAULT;
1199	}
1200
1201	return 0;
1202}
1203
1204static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
1205				void *data)
1206{
1207	struct kfd_ioctl_acquire_vm_args *args = data;
1208	struct kfd_process_device *pdd;
1209	struct kfd_dev *dev;
1210	struct file *drm_file;
1211	int ret;
1212
1213	dev = kfd_device_by_id(args->gpu_id);
1214	if (!dev)
1215		return -EINVAL;
1216
1217	drm_file = fget(args->drm_fd);
1218	if (!drm_file)
1219		return -EINVAL;
1220
1221	mutex_lock(&p->mutex);
1222
1223	pdd = kfd_get_process_device_data(dev, p);
1224	if (!pdd) {
1225		ret = -EINVAL;
1226		goto err_unlock;
1227	}
1228
1229	if (pdd->drm_file) {
1230		ret = pdd->drm_file == drm_file ? 0 : -EBUSY;
1231		goto err_unlock;
1232	}
1233
1234	ret = kfd_process_device_init_vm(pdd, drm_file);
1235	if (ret)
1236		goto err_unlock;
1237	/* On success, the PDD keeps the drm_file reference */
1238	mutex_unlock(&p->mutex);
1239
1240	return 0;
1241
1242err_unlock:
1243	mutex_unlock(&p->mutex);
1244	fput(drm_file);
1245	return ret;
1246}
1247
1248bool kfd_dev_is_large_bar(struct kfd_dev *dev)
1249{
1250	struct kfd_local_mem_info mem_info;
1251
1252	if (debug_largebar) {
1253		pr_debug("Simulate large-bar allocation on non large-bar machine\n");
1254		return true;
1255	}
1256
1257	if (dev->device_info->needs_iommu_device)
1258		return false;
1259
1260	amdgpu_amdkfd_get_local_mem_info(dev->kgd, &mem_info);
1261	if (mem_info.local_mem_size_private == 0 &&
1262			mem_info.local_mem_size_public > 0)
1263		return true;
1264	return false;
1265}
1266
1267static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
1268					struct kfd_process *p, void *data)
1269{
1270	struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;
1271	struct kfd_process_device *pdd;
1272	void *mem;
1273	struct kfd_dev *dev;
1274	int idr_handle;
1275	long err;
1276	uint64_t offset = args->mmap_offset;
1277	uint32_t flags = args->flags;
1278
1279	if (args->size == 0)
1280		return -EINVAL;
1281
1282	dev = kfd_device_by_id(args->gpu_id);
1283	if (!dev)
1284		return -EINVAL;
1285
1286	if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
1287		(flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
1288		!kfd_dev_is_large_bar(dev)) {
1289		pr_err("Alloc host visible vram on small bar is not allowed\n");
1290		return -EINVAL;
1291	}
1292
1293	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
1294		if (args->size != kfd_doorbell_process_slice(dev))
1295			return -EINVAL;
1296		offset = kfd_get_process_doorbells(dev, p);
1297	} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
1298		if (args->size != PAGE_SIZE)
1299			return -EINVAL;
1300		offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
1301		if (!offset)
1302			return -ENOMEM;
1303	}
1304
1305	mutex_lock(&p->mutex);
1306
1307	pdd = kfd_bind_process_to_device(dev, p);
1308	if (IS_ERR(pdd)) {
1309		err = PTR_ERR(pdd);
1310		goto err_unlock;
1311	}
1312
1313	err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
1314		dev->kgd, args->va_addr, args->size,
1315		pdd->vm, (struct kgd_mem **) &mem, &offset,
1316		flags);
1317
1318	if (err)
1319		goto err_unlock;
1320
1321	idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
1322	if (idr_handle < 0) {
1323		err = -EFAULT;
1324		goto err_free;
1325	}
1326
1327	/* Update the VRAM usage count */
1328	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
1329		WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + args->size);
1330
1331	mutex_unlock(&p->mutex);
1332
1333	args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1334	args->mmap_offset = offset;
1335
1336	/* MMIO is mapped through kfd device
1337	 * Generate a kfd mmap offset
1338	 */
1339	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)
1340		args->mmap_offset = KFD_MMAP_TYPE_MMIO
1341					| KFD_MMAP_GPU_ID(args->gpu_id);
1342
1343	return 0;
1344
1345err_free:
1346	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL);
1347err_unlock:
1348	mutex_unlock(&p->mutex);
1349	return err;
1350}
1351
1352static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
1353					struct kfd_process *p, void *data)
1354{
1355	struct kfd_ioctl_free_memory_of_gpu_args *args = data;
1356	struct kfd_process_device *pdd;
1357	void *mem;
1358	struct kfd_dev *dev;
1359	int ret;
1360	uint64_t size = 0;
1361
1362	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1363	if (!dev)
1364		return -EINVAL;
1365
1366	mutex_lock(&p->mutex);
1367
1368	pdd = kfd_get_process_device_data(dev, p);
1369	if (!pdd) {
1370		pr_err("Process device data doesn't exist\n");
1371		ret = -EINVAL;
1372		goto err_unlock;
1373	}
1374
1375	mem = kfd_process_device_translate_handle(
1376		pdd, GET_IDR_HANDLE(args->handle));
1377	if (!mem) {
1378		ret = -EINVAL;
1379		goto err_unlock;
1380	}
1381
1382	ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd,
1383						(struct kgd_mem *)mem, &size);
1384
1385	/* If freeing the buffer failed, leave the handle in place for
1386	 * clean-up during process tear-down.
1387	 */
1388	if (!ret)
1389		kfd_process_device_remove_obj_handle(
1390			pdd, GET_IDR_HANDLE(args->handle));
1391
1392	WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size);
1393
1394err_unlock:
1395	mutex_unlock(&p->mutex);
1396	return ret;
1397}
1398
1399static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
1400					struct kfd_process *p, void *data)
1401{
1402	struct kfd_ioctl_map_memory_to_gpu_args *args = data;
1403	struct kfd_process_device *pdd, *peer_pdd;
1404	void *mem;
1405	struct kfd_dev *dev, *peer;
1406	long err = 0;
1407	int i;
1408	uint32_t *devices_arr = NULL;
1409
1410	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1411	if (!dev)
1412		return -EINVAL;
1413
1414	if (!args->n_devices) {
1415		pr_debug("Device IDs array empty\n");
1416		return -EINVAL;
1417	}
1418	if (args->n_success > args->n_devices) {
1419		pr_debug("n_success exceeds n_devices\n");
1420		return -EINVAL;
1421	}
1422
1423	devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1424				    GFP_KERNEL);
1425	if (!devices_arr)
1426		return -ENOMEM;
1427
1428	err = copy_from_user(devices_arr,
1429			     (void __user *)args->device_ids_array_ptr,
1430			     args->n_devices * sizeof(*devices_arr));
1431	if (err != 0) {
1432		err = -EFAULT;
1433		goto copy_from_user_failed;
1434	}
1435
1436	mutex_lock(&p->mutex);
1437
1438	pdd = kfd_bind_process_to_device(dev, p);
1439	if (IS_ERR(pdd)) {
1440		err = PTR_ERR(pdd);
1441		goto bind_process_to_device_failed;
1442	}
1443
1444	mem = kfd_process_device_translate_handle(pdd,
1445						GET_IDR_HANDLE(args->handle));
1446	if (!mem) {
1447		err = -ENOMEM;
1448		goto get_mem_obj_from_handle_failed;
1449	}
1450
1451	for (i = args->n_success; i < args->n_devices; i++) {
1452		peer = kfd_device_by_id(devices_arr[i]);
1453		if (!peer) {
1454			pr_debug("Getting device by id failed for 0x%x\n",
1455				 devices_arr[i]);
1456			err = -EINVAL;
1457			goto get_mem_obj_from_handle_failed;
1458		}
1459
1460		peer_pdd = kfd_bind_process_to_device(peer, p);
1461		if (IS_ERR(peer_pdd)) {
1462			err = PTR_ERR(peer_pdd);
1463			goto get_mem_obj_from_handle_failed;
1464		}
1465		err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
1466			peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
1467		if (err) {
1468			pr_err("Failed to map to gpu %d/%d\n",
1469			       i, args->n_devices);
1470			goto map_memory_to_gpu_failed;
1471		}
1472		args->n_success = i+1;
1473	}
1474
1475	mutex_unlock(&p->mutex);
1476
1477	err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
1478	if (err) {
1479		pr_debug("Sync memory failed, wait interrupted by user signal\n");
1480		goto sync_memory_failed;
1481	}
1482
1483	/* Flush TLBs after waiting for the page table updates to complete */
1484	for (i = 0; i < args->n_devices; i++) {
1485		peer = kfd_device_by_id(devices_arr[i]);
1486		if (WARN_ON_ONCE(!peer))
1487			continue;
1488		peer_pdd = kfd_get_process_device_data(peer, p);
1489		if (WARN_ON_ONCE(!peer_pdd))
1490			continue;
1491		kfd_flush_tlb(peer_pdd);
1492	}
1493
1494	kfree(devices_arr);
1495
1496	return err;
1497
1498bind_process_to_device_failed:
1499get_mem_obj_from_handle_failed:
1500map_memory_to_gpu_failed:
1501	mutex_unlock(&p->mutex);
1502copy_from_user_failed:
1503sync_memory_failed:
1504	kfree(devices_arr);
1505
1506	return err;
1507}
1508
1509static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
1510					struct kfd_process *p, void *data)
1511{
1512	struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
1513	struct kfd_process_device *pdd, *peer_pdd;
1514	void *mem;
1515	struct kfd_dev *dev, *peer;
1516	long err = 0;
1517	uint32_t *devices_arr = NULL, i;
1518
1519	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1520	if (!dev)
1521		return -EINVAL;
1522
1523	if (!args->n_devices) {
1524		pr_debug("Device IDs array empty\n");
1525		return -EINVAL;
1526	}
1527	if (args->n_success > args->n_devices) {
1528		pr_debug("n_success exceeds n_devices\n");
1529		return -EINVAL;
1530	}
1531
1532	devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1533				    GFP_KERNEL);
1534	if (!devices_arr)
1535		return -ENOMEM;
1536
1537	err = copy_from_user(devices_arr,
1538			     (void __user *)args->device_ids_array_ptr,
1539			     args->n_devices * sizeof(*devices_arr));
1540	if (err != 0) {
1541		err = -EFAULT;
1542		goto copy_from_user_failed;
1543	}
1544
1545	mutex_lock(&p->mutex);
1546
1547	pdd = kfd_get_process_device_data(dev, p);
1548	if (!pdd) {
1549		err = -EINVAL;
1550		goto bind_process_to_device_failed;
1551	}
1552
1553	mem = kfd_process_device_translate_handle(pdd,
1554						GET_IDR_HANDLE(args->handle));
1555	if (!mem) {
1556		err = -ENOMEM;
1557		goto get_mem_obj_from_handle_failed;
1558	}
1559
1560	for (i = args->n_success; i < args->n_devices; i++) {
1561		peer = kfd_device_by_id(devices_arr[i]);
1562		if (!peer) {
1563			err = -EINVAL;
1564			goto get_mem_obj_from_handle_failed;
1565		}
1566
1567		peer_pdd = kfd_get_process_device_data(peer, p);
1568		if (!peer_pdd) {
1569			err = -ENODEV;
1570			goto get_mem_obj_from_handle_failed;
1571		}
1572		err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
1573			peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
1574		if (err) {
1575			pr_err("Failed to unmap from gpu %d/%d\n",
1576			       i, args->n_devices);
1577			goto unmap_memory_from_gpu_failed;
1578		}
1579		args->n_success = i+1;
1580	}
1581	kfree(devices_arr);
1582
1583	mutex_unlock(&p->mutex);
1584
1585	return 0;
1586
1587bind_process_to_device_failed:
1588get_mem_obj_from_handle_failed:
1589unmap_memory_from_gpu_failed:
1590	mutex_unlock(&p->mutex);
1591copy_from_user_failed:
1592	kfree(devices_arr);
1593	return err;
1594}
1595
1596static int kfd_ioctl_alloc_queue_gws(struct file *filep,
1597		struct kfd_process *p, void *data)
1598{
1599	int retval;
1600	struct kfd_ioctl_alloc_queue_gws_args *args = data;
1601	struct queue *q;
1602	struct kfd_dev *dev;
1603
1604	mutex_lock(&p->mutex);
1605	q = pqm_get_user_queue(&p->pqm, args->queue_id);
1606
1607	if (q) {
1608		dev = q->device;
1609	} else {
1610		retval = -EINVAL;
1611		goto out_unlock;
1612	}
1613
1614	if (!dev->gws) {
1615		retval = -ENODEV;
1616		goto out_unlock;
1617	}
1618
1619	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
1620		retval = -ENODEV;
1621		goto out_unlock;
1622	}
1623
1624	retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);
1625	mutex_unlock(&p->mutex);
1626
1627	args->first_gws = 0;
1628	return retval;
1629
1630out_unlock:
1631	mutex_unlock(&p->mutex);
1632	return retval;
1633}
1634
1635static int kfd_ioctl_get_dmabuf_info(struct file *filep,
1636		struct kfd_process *p, void *data)
1637{
1638	struct kfd_ioctl_get_dmabuf_info_args *args = data;
1639	struct kfd_dev *dev = NULL;
1640	struct kgd_dev *dma_buf_kgd;
1641	void *metadata_buffer = NULL;
1642	uint32_t flags;
1643	unsigned int i;
1644	int r;
1645
1646	/* Find a KFD GPU device that supports the get_dmabuf_info query */
1647	for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++)
1648		if (dev)
1649			break;
1650	if (!dev)
1651		return -EINVAL;
1652
1653	if (args->metadata_ptr) {
1654		metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL);
1655		if (!metadata_buffer)
1656			return -ENOMEM;
1657	}
1658
1659	/* Get dmabuf info from KGD */
1660	r = amdgpu_amdkfd_get_dmabuf_info(dev->kgd, args->dmabuf_fd,
1661					  &dma_buf_kgd, &args->size,
1662					  metadata_buffer, args->metadata_size,
1663					  &args->metadata_size, &flags);
1664	if (r)
1665		goto exit;
1666
1667	/* Reverse-lookup gpu_id from kgd pointer */
1668	dev = kfd_device_by_kgd(dma_buf_kgd);
1669	if (!dev) {
1670		r = -EINVAL;
1671		goto exit;
1672	}
1673	args->gpu_id = dev->id;
1674	args->flags = flags;
1675
1676	/* Copy metadata buffer to user mode */
1677	if (metadata_buffer) {
1678		r = copy_to_user((void __user *)args->metadata_ptr,
1679				 metadata_buffer, args->metadata_size);
1680		if (r != 0)
1681			r = -EFAULT;
1682	}
1683
1684exit:
1685	kfree(metadata_buffer);
1686
1687	return r;
1688}
1689
1690static int kfd_ioctl_import_dmabuf(struct file *filep,
1691				   struct kfd_process *p, void *data)
1692{
1693	struct kfd_ioctl_import_dmabuf_args *args = data;
1694	struct kfd_process_device *pdd;
1695	struct dma_buf *dmabuf;
1696	struct kfd_dev *dev;
1697	int idr_handle;
1698	uint64_t size;
1699	void *mem;
1700	int r;
1701
1702	dev = kfd_device_by_id(args->gpu_id);
1703	if (!dev)
1704		return -EINVAL;
1705
1706	dmabuf = dma_buf_get(args->dmabuf_fd);
1707	if (IS_ERR(dmabuf))
1708		return PTR_ERR(dmabuf);
1709
1710	mutex_lock(&p->mutex);
1711
1712	pdd = kfd_bind_process_to_device(dev, p);
1713	if (IS_ERR(pdd)) {
1714		r = PTR_ERR(pdd);
1715		goto err_unlock;
1716	}
1717
1718	r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf,
1719					      args->va_addr, pdd->vm,
1720					      (struct kgd_mem **)&mem, &size,
1721					      NULL);
1722	if (r)
1723		goto err_unlock;
1724
1725	idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
1726	if (idr_handle < 0) {
1727		r = -EFAULT;
1728		goto err_free;
1729	}
1730
1731	mutex_unlock(&p->mutex);
1732
1733	args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1734
1735	return 0;
1736
1737err_free:
1738	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL);
1739err_unlock:
1740	mutex_unlock(&p->mutex);
1741	return r;
1742}
1743
1744/* Handle requests for watching SMI events */
1745static int kfd_ioctl_smi_events(struct file *filep,
1746				struct kfd_process *p, void *data)
1747{
1748	struct kfd_ioctl_smi_events_args *args = data;
1749	struct kfd_dev *dev;
1750
1751	dev = kfd_device_by_id(args->gpuid);
1752	if (!dev)
1753		return -EINVAL;
1754
1755	return kfd_smi_event_open(dev, &args->anon_fd);
1756}
1757
1758#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
1759	[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
1760			    .cmd_drv = 0, .name = #ioctl}
1761
1762/** Ioctl table */
1763static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
1764	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION,
1765			kfd_ioctl_get_version, 0),
1766
1767	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE,
1768			kfd_ioctl_create_queue, 0),
1769
1770	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE,
1771			kfd_ioctl_destroy_queue, 0),
1772
1773	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY,
1774			kfd_ioctl_set_memory_policy, 0),
1775
1776	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS,
1777			kfd_ioctl_get_clock_counters, 0),
1778
1779	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES,
1780			kfd_ioctl_get_process_apertures, 0),
1781
1782	AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE,
1783			kfd_ioctl_update_queue, 0),
1784
1785	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT,
1786			kfd_ioctl_create_event, 0),
1787
1788	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT,
1789			kfd_ioctl_destroy_event, 0),
1790
1791	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT,
1792			kfd_ioctl_set_event, 0),
1793
1794	AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT,
1795			kfd_ioctl_reset_event, 0),
1796
1797	AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
1798			kfd_ioctl_wait_events, 0),
1799
1800	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER,
1801			kfd_ioctl_dbg_register, 0),
1802
1803	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER,
1804			kfd_ioctl_dbg_unregister, 0),
1805
1806	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH,
1807			kfd_ioctl_dbg_address_watch, 0),
1808
1809	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
1810			kfd_ioctl_dbg_wave_control, 0),
1811
1812	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA,
1813			kfd_ioctl_set_scratch_backing_va, 0),
1814
1815	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,
1816			kfd_ioctl_get_tile_config, 0),
1817
1818	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
1819			kfd_ioctl_set_trap_handler, 0),
1820
1821	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
1822			kfd_ioctl_get_process_apertures_new, 0),
1823
1824	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM,
1825			kfd_ioctl_acquire_vm, 0),
1826
1827	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU,
1828			kfd_ioctl_alloc_memory_of_gpu, 0),
1829
1830	AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU,
1831			kfd_ioctl_free_memory_of_gpu, 0),
1832
1833	AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU,
1834			kfd_ioctl_map_memory_to_gpu, 0),
1835
1836	AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
1837			kfd_ioctl_unmap_memory_from_gpu, 0),
1838
1839	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK,
1840			kfd_ioctl_set_cu_mask, 0),
1841
1842	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE,
1843			kfd_ioctl_get_queue_wave_state, 0),
1844
1845	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_DMABUF_INFO,
1846				kfd_ioctl_get_dmabuf_info, 0),
1847
1848	AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
1849				kfd_ioctl_import_dmabuf, 0),
1850
1851	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,
1852			kfd_ioctl_alloc_queue_gws, 0),
1853
1854	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS,
1855			kfd_ioctl_smi_events, 0),
1856};
1857
1858#define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
1859
1860static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
1861{
1862	struct kfd_process *process;
1863	amdkfd_ioctl_t *func;
1864	const struct amdkfd_ioctl_desc *ioctl = NULL;
1865	unsigned int nr = _IOC_NR(cmd);
1866	char stack_kdata[128];
1867	char *kdata = NULL;
1868	unsigned int usize, asize;
1869	int retcode = -EINVAL;
1870
1871	if (nr >= AMDKFD_CORE_IOCTL_COUNT)
1872		goto err_i1;
1873
1874	if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) {
1875		u32 amdkfd_size;
1876
1877		ioctl = &amdkfd_ioctls[nr];
1878
1879		amdkfd_size = _IOC_SIZE(ioctl->cmd);
1880		usize = asize = _IOC_SIZE(cmd);
1881		if (amdkfd_size > asize)
1882			asize = amdkfd_size;
1883
1884		cmd = ioctl->cmd;
1885	} else
1886		goto err_i1;
1887
1888	dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, arg);
1889
1890	/* Get the process struct from the filep. Only the process
1891	 * that opened /dev/kfd can use the file descriptor. Child
1892	 * processes need to create their own KFD device context.
1893	 */
1894	process = filep->private_data;
1895	if (process->lead_thread != current->group_leader) {
1896		dev_dbg(kfd_device, "Using KFD FD in wrong process\n");
1897		retcode = -EBADF;
1898		goto err_i1;
1899	}
1900
1901	/* Do not trust userspace, use our own definition */
1902	func = ioctl->func;
1903
1904	if (unlikely(!func)) {
1905		dev_dbg(kfd_device, "no function\n");
1906		retcode = -EINVAL;
1907		goto err_i1;
1908	}
1909
1910	if (cmd & (IOC_IN | IOC_OUT)) {
1911		if (asize <= sizeof(stack_kdata)) {
1912			kdata = stack_kdata;
1913		} else {
1914			kdata = kmalloc(asize, GFP_KERNEL);
1915			if (!kdata) {
1916				retcode = -ENOMEM;
1917				goto err_i1;
1918			}
1919		}
1920		if (asize > usize)
1921			memset(kdata + usize, 0, asize - usize);
1922	}
1923
1924	if (cmd & IOC_IN) {
1925		if (copy_from_user(kdata, (void __user *)arg, usize) != 0) {
1926			retcode = -EFAULT;
1927			goto err_i1;
1928		}
1929	} else if (cmd & IOC_OUT) {
1930		memset(kdata, 0, usize);
1931	}
1932
1933	retcode = func(filep, process, kdata);
1934
1935	if (cmd & IOC_OUT)
1936		if (copy_to_user((void __user *)arg, kdata, usize) != 0)
1937			retcode = -EFAULT;
1938
1939err_i1:
1940	if (!ioctl)
1941		dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
1942			  task_pid_nr(current), cmd, nr);
1943
1944	if (kdata != stack_kdata)
1945		kfree(kdata);
1946
1947	if (retcode)
1948		dev_dbg(kfd_device, "ioctl cmd (#0x%x), arg 0x%lx, ret = %d\n",
1949				nr, arg, retcode);
1950
1951	return retcode;
1952}
1953
1954static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process,
1955		      struct vm_area_struct *vma)
1956{
1957	phys_addr_t address;
1958	int ret;
1959
1960	if (vma->vm_end - vma->vm_start != PAGE_SIZE)
1961		return -EINVAL;
1962
1963	address = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
1964
1965	vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
1966				VM_DONTDUMP | VM_PFNMAP;
1967
1968	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1969
1970	pr_debug("pasid 0x%x mapping mmio page\n"
1971		 "     target user address == 0x%08llX\n"
1972		 "     physical address    == 0x%08llX\n"
1973		 "     vm_flags            == 0x%04lX\n"
1974		 "     size                == 0x%04lX\n",
1975		 process->pasid, (unsigned long long) vma->vm_start,
1976		 address, vma->vm_flags, PAGE_SIZE);
1977
1978	ret = io_remap_pfn_range(vma,
1979				vma->vm_start,
1980				address >> PAGE_SHIFT,
1981				PAGE_SIZE,
1982				vma->vm_page_prot);
1983	return ret;
1984}
1985
1986
1987static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
1988{
1989	struct kfd_process *process;
1990	struct kfd_dev *dev = NULL;
1991	unsigned long mmap_offset;
1992	unsigned int gpu_id;
1993
1994	process = kfd_get_process(current);
1995	if (IS_ERR(process))
1996		return PTR_ERR(process);
1997
1998	mmap_offset = vma->vm_pgoff << PAGE_SHIFT;
1999	gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset);
2000	if (gpu_id)
2001		dev = kfd_device_by_id(gpu_id);
2002
2003	switch (mmap_offset & KFD_MMAP_TYPE_MASK) {
2004	case KFD_MMAP_TYPE_DOORBELL:
2005		if (!dev)
2006			return -ENODEV;
2007		return kfd_doorbell_mmap(dev, process, vma);
2008
2009	case KFD_MMAP_TYPE_EVENTS:
2010		return kfd_event_mmap(process, vma);
2011
2012	case KFD_MMAP_TYPE_RESERVED_MEM:
2013		if (!dev)
2014			return -ENODEV;
2015		return kfd_reserved_mem_mmap(dev, process, vma);
2016	case KFD_MMAP_TYPE_MMIO:
2017		if (!dev)
2018			return -ENODEV;
2019		return kfd_mmio_mmap(dev, process, vma);
2020	}
2021
2022	return -EFAULT;
2023}
v4.6
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 */
  22
  23#include <linux/device.h>
  24#include <linux/export.h>
  25#include <linux/err.h>
  26#include <linux/fs.h>
 
  27#include <linux/sched.h>
  28#include <linux/slab.h>
  29#include <linux/uaccess.h>
  30#include <linux/compat.h>
  31#include <uapi/linux/kfd_ioctl.h>
  32#include <linux/time.h>
  33#include <linux/mm.h>
  34#include <linux/mman.h>
 
  35#include <asm/processor.h>
  36#include "kfd_priv.h"
  37#include "kfd_device_queue_manager.h"
  38#include "kfd_dbgmgr.h"
 
 
  39
  40static long kfd_ioctl(struct file *, unsigned int, unsigned long);
  41static int kfd_open(struct inode *, struct file *);
 
  42static int kfd_mmap(struct file *, struct vm_area_struct *);
  43
  44static const char kfd_dev_name[] = "kfd";
  45
  46static const struct file_operations kfd_fops = {
  47	.owner = THIS_MODULE,
  48	.unlocked_ioctl = kfd_ioctl,
  49	.compat_ioctl = kfd_ioctl,
  50	.open = kfd_open,
 
  51	.mmap = kfd_mmap,
  52};
  53
  54static int kfd_char_dev_major = -1;
  55static struct class *kfd_class;
  56struct device *kfd_device;
  57
  58int kfd_chardev_init(void)
  59{
  60	int err = 0;
  61
  62	kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops);
  63	err = kfd_char_dev_major;
  64	if (err < 0)
  65		goto err_register_chrdev;
  66
  67	kfd_class = class_create(THIS_MODULE, kfd_dev_name);
  68	err = PTR_ERR(kfd_class);
  69	if (IS_ERR(kfd_class))
  70		goto err_class_create;
  71
  72	kfd_device = device_create(kfd_class, NULL,
  73					MKDEV(kfd_char_dev_major, 0),
  74					NULL, kfd_dev_name);
  75	err = PTR_ERR(kfd_device);
  76	if (IS_ERR(kfd_device))
  77		goto err_device_create;
  78
  79	return 0;
  80
  81err_device_create:
  82	class_destroy(kfd_class);
  83err_class_create:
  84	unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
  85err_register_chrdev:
  86	return err;
  87}
  88
  89void kfd_chardev_exit(void)
  90{
  91	device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0));
  92	class_destroy(kfd_class);
  93	unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
  94}
  95
  96struct device *kfd_chardev(void)
  97{
  98	return kfd_device;
  99}
 100
 101
 102static int kfd_open(struct inode *inode, struct file *filep)
 103{
 104	struct kfd_process *process;
 105	bool is_32bit_user_mode;
 106
 107	if (iminor(inode) != 0)
 108		return -ENODEV;
 109
 110	is_32bit_user_mode = in_compat_syscall();
 111
 112	if (is_32bit_user_mode == true) {
 113		dev_warn(kfd_device,
 114			"Process %d (32-bit) failed to open /dev/kfd\n"
 115			"32-bit processes are not supported by amdkfd\n",
 116			current->pid);
 117		return -EPERM;
 118	}
 119
 120	process = kfd_create_process(current);
 121	if (IS_ERR(process))
 122		return PTR_ERR(process);
 123
 
 
 
 
 
 
 
 
 
 
 124	dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
 125		process->pasid, process->is_32bit_user_mode);
 126
 127	return 0;
 128}
 129
 
 
 
 
 
 
 
 
 
 
 130static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,
 131					void *data)
 132{
 133	struct kfd_ioctl_get_version_args *args = data;
 134	int err = 0;
 135
 136	args->major_version = KFD_IOCTL_MAJOR_VERSION;
 137	args->minor_version = KFD_IOCTL_MINOR_VERSION;
 138
 139	return err;
 140}
 141
 142static int set_queue_properties_from_user(struct queue_properties *q_properties,
 143				struct kfd_ioctl_create_queue_args *args)
 144{
 145	if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
 146		pr_err("kfd: queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
 147		return -EINVAL;
 148	}
 149
 150	if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
 151		pr_err("kfd: queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
 152		return -EINVAL;
 153	}
 154
 155	if ((args->ring_base_address) &&
 156		(!access_ok(VERIFY_WRITE,
 157			(const void __user *) args->ring_base_address,
 158			sizeof(uint64_t)))) {
 159		pr_err("kfd: can't access ring base address\n");
 160		return -EFAULT;
 161	}
 162
 163	if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
 164		pr_err("kfd: ring size must be a power of 2 or 0\n");
 165		return -EINVAL;
 166	}
 167
 168	if (!access_ok(VERIFY_WRITE,
 169			(const void __user *) args->read_pointer_address,
 170			sizeof(uint32_t))) {
 171		pr_err("kfd: can't access read pointer\n");
 172		return -EFAULT;
 173	}
 174
 175	if (!access_ok(VERIFY_WRITE,
 176			(const void __user *) args->write_pointer_address,
 177			sizeof(uint32_t))) {
 178		pr_err("kfd: can't access write pointer\n");
 179		return -EFAULT;
 180	}
 181
 182	if (args->eop_buffer_address &&
 183		!access_ok(VERIFY_WRITE,
 184			(const void __user *) args->eop_buffer_address,
 185			sizeof(uint32_t))) {
 186		pr_debug("kfd: can't access eop buffer");
 187		return -EFAULT;
 188	}
 189
 190	if (args->ctx_save_restore_address &&
 191		!access_ok(VERIFY_WRITE,
 192			(const void __user *) args->ctx_save_restore_address,
 193			sizeof(uint32_t))) {
 194		pr_debug("kfd: can't access ctx save restore buffer");
 195		return -EFAULT;
 196	}
 197
 198	q_properties->is_interop = false;
 
 199	q_properties->queue_percent = args->queue_percentage;
 200	q_properties->priority = args->queue_priority;
 201	q_properties->queue_address = args->ring_base_address;
 202	q_properties->queue_size = args->ring_size;
 203	q_properties->read_ptr = (uint32_t *) args->read_pointer_address;
 204	q_properties->write_ptr = (uint32_t *) args->write_pointer_address;
 205	q_properties->eop_ring_buffer_address = args->eop_buffer_address;
 206	q_properties->eop_ring_buffer_size = args->eop_buffer_size;
 207	q_properties->ctx_save_restore_area_address =
 208			args->ctx_save_restore_address;
 209	q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
 
 210	if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
 211		args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
 212		q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
 213	else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
 214		q_properties->type = KFD_QUEUE_TYPE_SDMA;
 
 
 215	else
 216		return -ENOTSUPP;
 217
 218	if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
 219		q_properties->format = KFD_QUEUE_FORMAT_AQL;
 220	else
 221		q_properties->format = KFD_QUEUE_FORMAT_PM4;
 222
 223	pr_debug("Queue Percentage (%d, %d)\n",
 224			q_properties->queue_percent, args->queue_percentage);
 225
 226	pr_debug("Queue Priority (%d, %d)\n",
 227			q_properties->priority, args->queue_priority);
 228
 229	pr_debug("Queue Address (0x%llX, 0x%llX)\n",
 230			q_properties->queue_address, args->ring_base_address);
 231
 232	pr_debug("Queue Size (0x%llX, %u)\n",
 233			q_properties->queue_size, args->ring_size);
 234
 235	pr_debug("Queue r/w Pointers (0x%llX, 0x%llX)\n",
 236			(uint64_t) q_properties->read_ptr,
 237			(uint64_t) q_properties->write_ptr);
 238
 239	pr_debug("Queue Format (%d)\n", q_properties->format);
 240
 241	pr_debug("Queue EOP (0x%llX)\n", q_properties->eop_ring_buffer_address);
 242
 243	pr_debug("Queue CTX save arex (0x%llX)\n",
 244			q_properties->ctx_save_restore_area_address);
 245
 246	return 0;
 247}
 248
 249static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
 250					void *data)
 251{
 252	struct kfd_ioctl_create_queue_args *args = data;
 253	struct kfd_dev *dev;
 254	int err = 0;
 255	unsigned int queue_id;
 256	struct kfd_process_device *pdd;
 257	struct queue_properties q_properties;
 
 258
 259	memset(&q_properties, 0, sizeof(struct queue_properties));
 260
 261	pr_debug("kfd: creating queue ioctl\n");
 262
 263	err = set_queue_properties_from_user(&q_properties, args);
 264	if (err)
 265		return err;
 266
 267	pr_debug("kfd: looking for gpu id 0x%x\n", args->gpu_id);
 268	dev = kfd_device_by_id(args->gpu_id);
 269	if (dev == NULL) {
 270		pr_debug("kfd: gpu id 0x%x was not found\n", args->gpu_id);
 271		return -EINVAL;
 272	}
 273
 274	mutex_lock(&p->mutex);
 275
 276	pdd = kfd_bind_process_to_device(dev, p);
 277	if (IS_ERR(pdd)) {
 278		err = -ESRCH;
 279		goto err_bind_process;
 280	}
 281
 282	pr_debug("kfd: creating queue for PASID %d on GPU 0x%x\n",
 283			p->pasid,
 284			dev->id);
 285
 286	err = pqm_create_queue(&p->pqm, dev, filep, &q_properties,
 287				0, q_properties.type, &queue_id);
 288	if (err != 0)
 289		goto err_create_queue;
 290
 291	args->queue_id = queue_id;
 292
 293
 294	/* Return gpu_id as doorbell offset for mmap usage */
 295	args->doorbell_offset = (KFD_MMAP_DOORBELL_MASK | args->gpu_id);
 296	args->doorbell_offset <<= PAGE_SHIFT;
 
 
 
 
 
 297
 298	mutex_unlock(&p->mutex);
 299
 300	pr_debug("kfd: queue id %d was created successfully\n", args->queue_id);
 301
 302	pr_debug("ring buffer address == 0x%016llX\n",
 303			args->ring_base_address);
 304
 305	pr_debug("read ptr address    == 0x%016llX\n",
 306			args->read_pointer_address);
 307
 308	pr_debug("write ptr address   == 0x%016llX\n",
 309			args->write_pointer_address);
 310
 311	return 0;
 312
 313err_create_queue:
 314err_bind_process:
 315	mutex_unlock(&p->mutex);
 316	return err;
 317}
 318
 319static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
 320					void *data)
 321{
 322	int retval;
 323	struct kfd_ioctl_destroy_queue_args *args = data;
 324
 325	pr_debug("kfd: destroying queue id %d for PASID %d\n",
 326				args->queue_id,
 327				p->pasid);
 328
 329	mutex_lock(&p->mutex);
 330
 331	retval = pqm_destroy_queue(&p->pqm, args->queue_id);
 332
 333	mutex_unlock(&p->mutex);
 334	return retval;
 335}
 336
 337static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
 338					void *data)
 339{
 340	int retval;
 341	struct kfd_ioctl_update_queue_args *args = data;
 342	struct queue_properties properties;
 343
 344	if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
 345		pr_err("kfd: queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
 346		return -EINVAL;
 347	}
 348
 349	if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
 350		pr_err("kfd: queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
 351		return -EINVAL;
 352	}
 353
 354	if ((args->ring_base_address) &&
 355		(!access_ok(VERIFY_WRITE,
 356			(const void __user *) args->ring_base_address,
 357			sizeof(uint64_t)))) {
 358		pr_err("kfd: can't access ring base address\n");
 359		return -EFAULT;
 360	}
 361
 362	if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
 363		pr_err("kfd: ring size must be a power of 2 or 0\n");
 364		return -EINVAL;
 365	}
 366
 367	properties.queue_address = args->ring_base_address;
 368	properties.queue_size = args->ring_size;
 369	properties.queue_percent = args->queue_percentage;
 370	properties.priority = args->queue_priority;
 371
 372	pr_debug("kfd: updating queue id %d for PASID %d\n",
 373			args->queue_id, p->pasid);
 374
 375	mutex_lock(&p->mutex);
 376
 377	retval = pqm_update_queue(&p->pqm, args->queue_id, &properties);
 378
 379	mutex_unlock(&p->mutex);
 380
 381	return retval;
 382}
 383
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 384static int kfd_ioctl_set_memory_policy(struct file *filep,
 385					struct kfd_process *p, void *data)
 386{
 387	struct kfd_ioctl_set_memory_policy_args *args = data;
 388	struct kfd_dev *dev;
 389	int err = 0;
 390	struct kfd_process_device *pdd;
 391	enum cache_policy default_policy, alternate_policy;
 392
 393	if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT
 394	    && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
 395		return -EINVAL;
 396	}
 397
 398	if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT
 399	    && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
 400		return -EINVAL;
 401	}
 402
 403	dev = kfd_device_by_id(args->gpu_id);
 404	if (dev == NULL)
 405		return -EINVAL;
 406
 407	mutex_lock(&p->mutex);
 408
 409	pdd = kfd_bind_process_to_device(dev, p);
 410	if (IS_ERR(pdd)) {
 411		err = -ESRCH;
 412		goto out;
 413	}
 414
 415	default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT)
 416			 ? cache_policy_coherent : cache_policy_noncoherent;
 417
 418	alternate_policy =
 419		(args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
 420		   ? cache_policy_coherent : cache_policy_noncoherent;
 421
 422	if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm,
 423				&pdd->qpd,
 424				default_policy,
 425				alternate_policy,
 426				(void __user *)args->alternate_aperture_base,
 427				args->alternate_aperture_size))
 428		err = -EINVAL;
 429
 430out:
 431	mutex_unlock(&p->mutex);
 432
 433	return err;
 434}
 435
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 436static int kfd_ioctl_dbg_register(struct file *filep,
 437				struct kfd_process *p, void *data)
 438{
 439	struct kfd_ioctl_dbg_register_args *args = data;
 440	struct kfd_dev *dev;
 441	struct kfd_dbgmgr *dbgmgr_ptr;
 442	struct kfd_process_device *pdd;
 443	bool create_ok;
 444	long status = 0;
 445
 446	dev = kfd_device_by_id(args->gpu_id);
 447	if (dev == NULL)
 448		return -EINVAL;
 449
 450	if (dev->device_info->asic_family == CHIP_CARRIZO) {
 451		pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
 452		return -EINVAL;
 453	}
 454
 
 455	mutex_lock(kfd_get_dbgmgr_mutex());
 456	mutex_lock(&p->mutex);
 457
 458	/*
 459	 * make sure that we have pdd, if this the first queue created for
 460	 * this process
 461	 */
 462	pdd = kfd_bind_process_to_device(dev, p);
 463	if (IS_ERR(pdd)) {
 464		mutex_unlock(&p->mutex);
 465		mutex_unlock(kfd_get_dbgmgr_mutex());
 466		return PTR_ERR(pdd);
 467	}
 468
 469	if (dev->dbgmgr == NULL) {
 470		/* In case of a legal call, we have no dbgmgr yet */
 471		create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev);
 472		if (create_ok) {
 473			status = kfd_dbgmgr_register(dbgmgr_ptr, p);
 474			if (status != 0)
 475				kfd_dbgmgr_destroy(dbgmgr_ptr);
 476			else
 477				dev->dbgmgr = dbgmgr_ptr;
 478		}
 479	} else {
 480		pr_debug("debugger already registered\n");
 481		status = -EINVAL;
 482	}
 483
 
 
 484	mutex_unlock(&p->mutex);
 485	mutex_unlock(kfd_get_dbgmgr_mutex());
 486
 487	return status;
 488}
 489
 490static int kfd_ioctl_dbg_unrgesiter(struct file *filep,
 491				struct kfd_process *p, void *data)
 492{
 493	struct kfd_ioctl_dbg_unregister_args *args = data;
 494	struct kfd_dev *dev;
 495	long status;
 496
 497	dev = kfd_device_by_id(args->gpu_id);
 498	if (dev == NULL)
 499		return -EINVAL;
 500
 501	if (dev->device_info->asic_family == CHIP_CARRIZO) {
 502		pr_debug("kfd_ioctl_dbg_unrgesiter not supported on CZ\n");
 503		return -EINVAL;
 504	}
 505
 506	mutex_lock(kfd_get_dbgmgr_mutex());
 507
 508	status = kfd_dbgmgr_unregister(dev->dbgmgr, p);
 509	if (status == 0) {
 510		kfd_dbgmgr_destroy(dev->dbgmgr);
 511		dev->dbgmgr = NULL;
 512	}
 513
 514	mutex_unlock(kfd_get_dbgmgr_mutex());
 515
 516	return status;
 517}
 518
 519/*
 520 * Parse and generate variable size data structure for address watch.
 521 * Total size of the buffer and # watch points is limited in order
 522 * to prevent kernel abuse. (no bearing to the much smaller HW limitation
 523 * which is enforced by dbgdev module)
 524 * please also note that the watch address itself are not "copied from user",
 525 * since it be set into the HW in user mode values.
 526 *
 527 */
 528static int kfd_ioctl_dbg_address_watch(struct file *filep,
 529					struct kfd_process *p, void *data)
 530{
 531	struct kfd_ioctl_dbg_address_watch_args *args = data;
 532	struct kfd_dev *dev;
 533	struct dbg_address_watch_info aw_info;
 534	unsigned char *args_buff;
 535	long status;
 536	void __user *cmd_from_user;
 537	uint64_t watch_mask_value = 0;
 538	unsigned int args_idx = 0;
 539
 540	memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info));
 541
 542	dev = kfd_device_by_id(args->gpu_id);
 543	if (dev == NULL)
 544		return -EINVAL;
 545
 546	if (dev->device_info->asic_family == CHIP_CARRIZO) {
 547		pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
 548		return -EINVAL;
 549	}
 550
 551	cmd_from_user = (void __user *) args->content_ptr;
 552
 553	/* Validate arguments */
 554
 555	if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) ||
 556		(args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) ||
 557		(cmd_from_user == NULL))
 558		return -EINVAL;
 559
 560	/* this is the actual buffer to work with */
 561	args_buff = memdup_user(cmd_from_user,
 562				args->buf_size_in_bytes - sizeof(*args));
 563	if (IS_ERR(args_buff))
 564		return PTR_ERR(args_buff);
 565
 566	aw_info.process = p;
 567
 568	aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx]));
 569	args_idx += sizeof(aw_info.num_watch_points);
 570
 571	aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx];
 572	args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points;
 573
 574	/*
 575	 * set watch address base pointer to point on the array base
 576	 * within args_buff
 577	 */
 578	aw_info.watch_address = (uint64_t *) &args_buff[args_idx];
 579
 580	/* skip over the addresses buffer */
 581	args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points;
 582
 583	if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) {
 584		kfree(args_buff);
 585		return -EINVAL;
 586	}
 587
 588	watch_mask_value = (uint64_t) args_buff[args_idx];
 589
 590	if (watch_mask_value > 0) {
 591		/*
 592		 * There is an array of masks.
 593		 * set watch mask base pointer to point on the array base
 594		 * within args_buff
 595		 */
 596		aw_info.watch_mask = (uint64_t *) &args_buff[args_idx];
 597
 598		/* skip over the masks buffer */
 599		args_idx += sizeof(aw_info.watch_mask) *
 600				aw_info.num_watch_points;
 601	} else {
 602		/* just the NULL mask, set to NULL and skip over it */
 603		aw_info.watch_mask = NULL;
 604		args_idx += sizeof(aw_info.watch_mask);
 605	}
 606
 607	if (args_idx >= args->buf_size_in_bytes - sizeof(args)) {
 608		kfree(args_buff);
 609		return -EINVAL;
 610	}
 611
 612	/* Currently HSA Event is not supported for DBG */
 613	aw_info.watch_event = NULL;
 614
 615	mutex_lock(kfd_get_dbgmgr_mutex());
 616
 617	status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info);
 618
 619	mutex_unlock(kfd_get_dbgmgr_mutex());
 620
 
 621	kfree(args_buff);
 622
 623	return status;
 624}
 625
 626/* Parse and generate fixed size data structure for wave control */
 627static int kfd_ioctl_dbg_wave_control(struct file *filep,
 628					struct kfd_process *p, void *data)
 629{
 630	struct kfd_ioctl_dbg_wave_control_args *args = data;
 631	struct kfd_dev *dev;
 632	struct dbg_wave_control_info wac_info;
 633	unsigned char *args_buff;
 634	uint32_t computed_buff_size;
 635	long status;
 636	void __user *cmd_from_user;
 637	unsigned int args_idx = 0;
 638
 639	memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info));
 640
 641	/* we use compact form, independent of the packing attribute value */
 642	computed_buff_size = sizeof(*args) +
 643				sizeof(wac_info.mode) +
 644				sizeof(wac_info.operand) +
 645				sizeof(wac_info.dbgWave_msg.DbgWaveMsg) +
 646				sizeof(wac_info.dbgWave_msg.MemoryVA) +
 647				sizeof(wac_info.trapId);
 648
 649	dev = kfd_device_by_id(args->gpu_id);
 650	if (dev == NULL)
 651		return -EINVAL;
 652
 653	if (dev->device_info->asic_family == CHIP_CARRIZO) {
 654		pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
 655		return -EINVAL;
 656	}
 657
 658	/* input size must match the computed "compact" size */
 659	if (args->buf_size_in_bytes != computed_buff_size) {
 660		pr_debug("size mismatch, computed : actual %u : %u\n",
 661				args->buf_size_in_bytes, computed_buff_size);
 662		return -EINVAL;
 663	}
 664
 665	cmd_from_user = (void __user *) args->content_ptr;
 666
 667	if (cmd_from_user == NULL)
 668		return -EINVAL;
 669
 670	/* copy the entire buffer from user */
 671
 672	args_buff = memdup_user(cmd_from_user,
 673				args->buf_size_in_bytes - sizeof(*args));
 674	if (IS_ERR(args_buff))
 675		return PTR_ERR(args_buff);
 676
 677	/* move ptr to the start of the "pay-load" area */
 678	wac_info.process = p;
 679
 680	wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx]));
 681	args_idx += sizeof(wac_info.operand);
 682
 683	wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx]));
 684	args_idx += sizeof(wac_info.mode);
 685
 686	wac_info.trapId = *((uint32_t *)(&args_buff[args_idx]));
 687	args_idx += sizeof(wac_info.trapId);
 688
 689	wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value =
 690					*((uint32_t *)(&args_buff[args_idx]));
 691	wac_info.dbgWave_msg.MemoryVA = NULL;
 692
 693	mutex_lock(kfd_get_dbgmgr_mutex());
 694
 695	pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n",
 696			wac_info.process, wac_info.operand,
 697			wac_info.mode, wac_info.trapId,
 698			wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
 699
 700	status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info);
 701
 702	pr_debug("Returned status of dbg manager is %ld\n", status);
 703
 704	mutex_unlock(kfd_get_dbgmgr_mutex());
 705
 706	kfree(args_buff);
 707
 708	return status;
 709}
 710
 711static int kfd_ioctl_get_clock_counters(struct file *filep,
 712				struct kfd_process *p, void *data)
 713{
 714	struct kfd_ioctl_get_clock_counters_args *args = data;
 715	struct kfd_dev *dev;
 716	struct timespec64 time;
 717
 718	dev = kfd_device_by_id(args->gpu_id);
 719	if (dev == NULL)
 720		return -EINVAL;
 721
 722	/* Reading GPU clock counter from KGD */
 723	args->gpu_clock_counter =
 724		dev->kfd2kgd->get_gpu_clock_counter(dev->kgd);
 725
 726	/* No access to rdtsc. Using raw monotonic time */
 727	getrawmonotonic64(&time);
 728	args->cpu_clock_counter = (uint64_t)timespec64_to_ns(&time);
 729
 730	get_monotonic_boottime64(&time);
 731	args->system_clock_counter = (uint64_t)timespec64_to_ns(&time);
 732
 733	/* Since the counter is in nano-seconds we use 1GHz frequency */
 734	args->system_clock_freq = 1000000000;
 735
 736	return 0;
 737}
 738
 739
 740static int kfd_ioctl_get_process_apertures(struct file *filp,
 741				struct kfd_process *p, void *data)
 742{
 743	struct kfd_ioctl_get_process_apertures_args *args = data;
 744	struct kfd_process_device_apertures *pAperture;
 745	struct kfd_process_device *pdd;
 746
 747	dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
 748
 749	args->num_of_nodes = 0;
 750
 751	mutex_lock(&p->mutex);
 752
 753	/*if the process-device list isn't empty*/
 754	if (kfd_has_process_device_data(p)) {
 755		/* Run over all pdd of the process */
 756		pdd = kfd_get_first_process_device_data(p);
 757		do {
 758			pAperture =
 759				&args->process_apertures[args->num_of_nodes];
 760			pAperture->gpu_id = pdd->dev->id;
 761			pAperture->lds_base = pdd->lds_base;
 762			pAperture->lds_limit = pdd->lds_limit;
 763			pAperture->gpuvm_base = pdd->gpuvm_base;
 764			pAperture->gpuvm_limit = pdd->gpuvm_limit;
 765			pAperture->scratch_base = pdd->scratch_base;
 766			pAperture->scratch_limit = pdd->scratch_limit;
 767
 768			dev_dbg(kfd_device,
 769				"node id %u\n", args->num_of_nodes);
 770			dev_dbg(kfd_device,
 771				"gpu id %u\n", pdd->dev->id);
 772			dev_dbg(kfd_device,
 773				"lds_base %llX\n", pdd->lds_base);
 774			dev_dbg(kfd_device,
 775				"lds_limit %llX\n", pdd->lds_limit);
 776			dev_dbg(kfd_device,
 777				"gpuvm_base %llX\n", pdd->gpuvm_base);
 778			dev_dbg(kfd_device,
 779				"gpuvm_limit %llX\n", pdd->gpuvm_limit);
 780			dev_dbg(kfd_device,
 781				"scratch_base %llX\n", pdd->scratch_base);
 782			dev_dbg(kfd_device,
 783				"scratch_limit %llX\n", pdd->scratch_limit);
 784
 785			args->num_of_nodes++;
 786		} while ((pdd = kfd_get_next_process_device_data(p, pdd)) != NULL &&
 787				(args->num_of_nodes < NUM_OF_SUPPORTED_GPUS));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 788	}
 789
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 790	mutex_unlock(&p->mutex);
 791
 
 
 
 
 
 
 
 
 
 
 792	return 0;
 793}
 794
 795static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
 796					void *data)
 797{
 798	struct kfd_ioctl_create_event_args *args = data;
 799	int err;
 800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 801	err = kfd_event_create(filp, p, args->event_type,
 802				args->auto_reset != 0, args->node_id,
 803				&args->event_id, &args->event_trigger_data,
 804				&args->event_page_offset,
 805				&args->event_slot_index);
 806
 807	return err;
 
 
 
 
 808}
 809
 810static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,
 811					void *data)
 812{
 813	struct kfd_ioctl_destroy_event_args *args = data;
 814
 815	return kfd_event_destroy(p, args->event_id);
 816}
 817
 818static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p,
 819				void *data)
 820{
 821	struct kfd_ioctl_set_event_args *args = data;
 822
 823	return kfd_set_event(p, args->event_id);
 824}
 825
 826static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p,
 827				void *data)
 828{
 829	struct kfd_ioctl_reset_event_args *args = data;
 830
 831	return kfd_reset_event(p, args->event_id);
 832}
 833
 834static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
 835				void *data)
 836{
 837	struct kfd_ioctl_wait_events_args *args = data;
 838	enum kfd_event_wait_result wait_result;
 839	int err;
 840
 841	err = kfd_wait_on_events(p, args->num_events,
 842			(void __user *)args->events_ptr,
 843			(args->wait_for_all != 0),
 844			args->timeout, &wait_result);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 845
 846	args->wait_result = wait_result;
 
 
 847
 
 
 
 
 
 
 848	return err;
 849}
 850
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 851#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
 852	[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, .cmd_drv = 0, .name = #ioctl}
 
 853
 854/** Ioctl table */
 855static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
 856	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION,
 857			kfd_ioctl_get_version, 0),
 858
 859	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE,
 860			kfd_ioctl_create_queue, 0),
 861
 862	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE,
 863			kfd_ioctl_destroy_queue, 0),
 864
 865	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY,
 866			kfd_ioctl_set_memory_policy, 0),
 867
 868	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS,
 869			kfd_ioctl_get_clock_counters, 0),
 870
 871	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES,
 872			kfd_ioctl_get_process_apertures, 0),
 873
 874	AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE,
 875			kfd_ioctl_update_queue, 0),
 876
 877	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT,
 878			kfd_ioctl_create_event, 0),
 879
 880	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT,
 881			kfd_ioctl_destroy_event, 0),
 882
 883	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT,
 884			kfd_ioctl_set_event, 0),
 885
 886	AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT,
 887			kfd_ioctl_reset_event, 0),
 888
 889	AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
 890			kfd_ioctl_wait_events, 0),
 891
 892	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER,
 893			kfd_ioctl_dbg_register, 0),
 894
 895	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER,
 896			kfd_ioctl_dbg_unrgesiter, 0),
 897
 898	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH,
 899			kfd_ioctl_dbg_address_watch, 0),
 900
 901	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
 902			kfd_ioctl_dbg_wave_control, 0),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 903};
 904
 905#define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
 906
 907static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
 908{
 909	struct kfd_process *process;
 910	amdkfd_ioctl_t *func;
 911	const struct amdkfd_ioctl_desc *ioctl = NULL;
 912	unsigned int nr = _IOC_NR(cmd);
 913	char stack_kdata[128];
 914	char *kdata = NULL;
 915	unsigned int usize, asize;
 916	int retcode = -EINVAL;
 917
 918	if (nr >= AMDKFD_CORE_IOCTL_COUNT)
 919		goto err_i1;
 920
 921	if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) {
 922		u32 amdkfd_size;
 923
 924		ioctl = &amdkfd_ioctls[nr];
 925
 926		amdkfd_size = _IOC_SIZE(ioctl->cmd);
 927		usize = asize = _IOC_SIZE(cmd);
 928		if (amdkfd_size > asize)
 929			asize = amdkfd_size;
 930
 931		cmd = ioctl->cmd;
 932	} else
 933		goto err_i1;
 934
 935	dev_dbg(kfd_device, "ioctl cmd 0x%x (#%d), arg 0x%lx\n", cmd, nr, arg);
 936
 937	process = kfd_get_process(current);
 938	if (IS_ERR(process)) {
 939		dev_dbg(kfd_device, "no process\n");
 
 
 
 
 
 940		goto err_i1;
 941	}
 942
 943	/* Do not trust userspace, use our own definition */
 944	func = ioctl->func;
 945
 946	if (unlikely(!func)) {
 947		dev_dbg(kfd_device, "no function\n");
 948		retcode = -EINVAL;
 949		goto err_i1;
 950	}
 951
 952	if (cmd & (IOC_IN | IOC_OUT)) {
 953		if (asize <= sizeof(stack_kdata)) {
 954			kdata = stack_kdata;
 955		} else {
 956			kdata = kmalloc(asize, GFP_KERNEL);
 957			if (!kdata) {
 958				retcode = -ENOMEM;
 959				goto err_i1;
 960			}
 961		}
 962		if (asize > usize)
 963			memset(kdata + usize, 0, asize - usize);
 964	}
 965
 966	if (cmd & IOC_IN) {
 967		if (copy_from_user(kdata, (void __user *)arg, usize) != 0) {
 968			retcode = -EFAULT;
 969			goto err_i1;
 970		}
 971	} else if (cmd & IOC_OUT) {
 972		memset(kdata, 0, usize);
 973	}
 974
 975	retcode = func(filep, process, kdata);
 976
 977	if (cmd & IOC_OUT)
 978		if (copy_to_user((void __user *)arg, kdata, usize) != 0)
 979			retcode = -EFAULT;
 980
 981err_i1:
 982	if (!ioctl)
 983		dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
 984			  task_pid_nr(current), cmd, nr);
 985
 986	if (kdata != stack_kdata)
 987		kfree(kdata);
 988
 989	if (retcode)
 990		dev_dbg(kfd_device, "ret = %d\n", retcode);
 
 991
 992	return retcode;
 993}
 994
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 995static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
 996{
 997	struct kfd_process *process;
 
 
 
 998
 999	process = kfd_get_process(current);
1000	if (IS_ERR(process))
1001		return PTR_ERR(process);
1002
1003	if ((vma->vm_pgoff & KFD_MMAP_DOORBELL_MASK) ==
1004			KFD_MMAP_DOORBELL_MASK) {
1005		vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_DOORBELL_MASK;
1006		return kfd_doorbell_mmap(process, vma);
1007	} else if ((vma->vm_pgoff & KFD_MMAP_EVENTS_MASK) ==
1008			KFD_MMAP_EVENTS_MASK) {
1009		vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_EVENTS_MASK;
 
 
 
 
 
1010		return kfd_event_mmap(process, vma);
 
 
 
 
 
 
 
 
 
1011	}
1012
1013	return -EFAULT;
1014}