Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1/*
   2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33
  34#include <linux/kref.h>
  35#include <linux/random.h>
  36#include <linux/debugfs.h>
  37#include <linux/export.h>
  38#include <linux/delay.h>
  39#include <rdma/ib_umem.h>
  40#include <rdma/ib_umem_odp.h>
  41#include <rdma/ib_verbs.h>
  42#include "mlx5_ib.h"
  43
  44enum {
  45	MAX_PENDING_REG_MR = 8,
  46};
  47
  48#define MLX5_UMR_ALIGN 2048
  49#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  50static __be64 mlx5_ib_update_mtt_emergency_buffer[
  51		MLX5_UMR_MTT_MIN_CHUNK_SIZE/sizeof(__be64)]
  52	__aligned(MLX5_UMR_ALIGN);
  53static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex);
  54#endif
  55
  56static int clean_mr(struct mlx5_ib_mr *mr);
  57
  58static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  59{
  60	int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
  61
  62#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  63	/* Wait until all page fault handlers using the mr complete. */
  64	synchronize_srcu(&dev->mr_srcu);
  65#endif
  66
  67	return err;
  68}
  69
  70static int order2idx(struct mlx5_ib_dev *dev, int order)
  71{
  72	struct mlx5_mr_cache *cache = &dev->cache;
  73
  74	if (order < cache->ent[0].order)
  75		return 0;
  76	else
  77		return order - cache->ent[0].order;
  78}
  79
  80static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
  81{
  82	return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >=
  83		length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
  84}
  85
  86#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  87static void update_odp_mr(struct mlx5_ib_mr *mr)
  88{
  89	if (mr->umem->odp_data) {
  90		/*
  91		 * This barrier prevents the compiler from moving the
  92		 * setting of umem->odp_data->private to point to our
  93		 * MR, before reg_umr finished, to ensure that the MR
  94		 * initialization have finished before starting to
  95		 * handle invalidations.
  96		 */
  97		smp_wmb();
  98		mr->umem->odp_data->private = mr;
  99		/*
 100		 * Make sure we will see the new
 101		 * umem->odp_data->private value in the invalidation
 102		 * routines, before we can get page faults on the
 103		 * MR. Page faults can happen once we put the MR in
 104		 * the tree, below this line. Without the barrier,
 105		 * there can be a fault handling and an invalidation
 106		 * before umem->odp_data->private == mr is visible to
 107		 * the invalidation handler.
 108		 */
 109		smp_wmb();
 110	}
 111}
 112#endif
 113
 114static void reg_mr_callback(int status, void *context)
 115{
 116	struct mlx5_ib_mr *mr = context;
 117	struct mlx5_ib_dev *dev = mr->dev;
 118	struct mlx5_mr_cache *cache = &dev->cache;
 119	int c = order2idx(dev, mr->order);
 120	struct mlx5_cache_ent *ent = &cache->ent[c];
 121	u8 key;
 122	unsigned long flags;
 123	struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table;
 124	int err;
 125
 126	spin_lock_irqsave(&ent->lock, flags);
 127	ent->pending--;
 128	spin_unlock_irqrestore(&ent->lock, flags);
 129	if (status) {
 130		mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
 131		kfree(mr);
 132		dev->fill_delay = 1;
 133		mod_timer(&dev->delay_timer, jiffies + HZ);
 134		return;
 135	}
 136
 137	spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
 138	key = dev->mdev->priv.mkey_key++;
 139	spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
 140	mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key;
 141
 142	cache->last_add = jiffies;
 143
 144	spin_lock_irqsave(&ent->lock, flags);
 145	list_add_tail(&mr->list, &ent->head);
 146	ent->cur++;
 147	ent->size++;
 148	spin_unlock_irqrestore(&ent->lock, flags);
 149
 150	write_lock_irqsave(&table->lock, flags);
 151	err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmkey.key),
 152				&mr->mmkey);
 153	if (err)
 154		pr_err("Error inserting to mkey tree. 0x%x\n", -err);
 155	write_unlock_irqrestore(&table->lock, flags);
 156}
 157
 158static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
 159{
 160	struct mlx5_mr_cache *cache = &dev->cache;
 161	struct mlx5_cache_ent *ent = &cache->ent[c];
 162	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
 163	struct mlx5_ib_mr *mr;
 164	int npages = 1 << ent->order;
 165	void *mkc;
 166	u32 *in;
 167	int err = 0;
 168	int i;
 169
 170	in = kzalloc(inlen, GFP_KERNEL);
 171	if (!in)
 172		return -ENOMEM;
 173
 174	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
 175	for (i = 0; i < num; i++) {
 176		if (ent->pending >= MAX_PENDING_REG_MR) {
 177			err = -EAGAIN;
 178			break;
 179		}
 180
 181		mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 182		if (!mr) {
 183			err = -ENOMEM;
 184			break;
 185		}
 186		mr->order = ent->order;
 187		mr->umred = 1;
 188		mr->dev = dev;
 189
 190		MLX5_SET(mkc, mkc, free, 1);
 191		MLX5_SET(mkc, mkc, umr_en, 1);
 192		MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT);
 193
 194		MLX5_SET(mkc, mkc, qpn, 0xffffff);
 195		MLX5_SET(mkc, mkc, translations_octword_size, (npages + 1) / 2);
 196		MLX5_SET(mkc, mkc, log_page_size, 12);
 197
 198		spin_lock_irq(&ent->lock);
 199		ent->pending++;
 200		spin_unlock_irq(&ent->lock);
 201		err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey,
 202					       in, inlen,
 203					       mr->out, sizeof(mr->out),
 204					       reg_mr_callback, mr);
 205		if (err) {
 206			spin_lock_irq(&ent->lock);
 207			ent->pending--;
 208			spin_unlock_irq(&ent->lock);
 209			mlx5_ib_warn(dev, "create mkey failed %d\n", err);
 210			kfree(mr);
 211			break;
 212		}
 213	}
 214
 215	kfree(in);
 216	return err;
 217}
 218
 219static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
 220{
 221	struct mlx5_mr_cache *cache = &dev->cache;
 222	struct mlx5_cache_ent *ent = &cache->ent[c];
 223	struct mlx5_ib_mr *mr;
 224	int err;
 225	int i;
 226
 227	for (i = 0; i < num; i++) {
 228		spin_lock_irq(&ent->lock);
 229		if (list_empty(&ent->head)) {
 230			spin_unlock_irq(&ent->lock);
 231			return;
 232		}
 233		mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
 234		list_del(&mr->list);
 235		ent->cur--;
 236		ent->size--;
 237		spin_unlock_irq(&ent->lock);
 238		err = destroy_mkey(dev, mr);
 239		if (err)
 240			mlx5_ib_warn(dev, "failed destroy mkey\n");
 241		else
 242			kfree(mr);
 243	}
 244}
 245
 246static ssize_t size_write(struct file *filp, const char __user *buf,
 247			  size_t count, loff_t *pos)
 248{
 249	struct mlx5_cache_ent *ent = filp->private_data;
 250	struct mlx5_ib_dev *dev = ent->dev;
 251	char lbuf[20];
 252	u32 var;
 253	int err;
 254	int c;
 255
 256	if (copy_from_user(lbuf, buf, sizeof(lbuf)))
 257		return -EFAULT;
 258
 259	c = order2idx(dev, ent->order);
 260	lbuf[sizeof(lbuf) - 1] = 0;
 261
 262	if (sscanf(lbuf, "%u", &var) != 1)
 263		return -EINVAL;
 264
 265	if (var < ent->limit)
 266		return -EINVAL;
 267
 268	if (var > ent->size) {
 269		do {
 270			err = add_keys(dev, c, var - ent->size);
 271			if (err && err != -EAGAIN)
 272				return err;
 273
 274			usleep_range(3000, 5000);
 275		} while (err);
 276	} else if (var < ent->size) {
 277		remove_keys(dev, c, ent->size - var);
 278	}
 279
 280	return count;
 281}
 282
 283static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
 284			 loff_t *pos)
 285{
 286	struct mlx5_cache_ent *ent = filp->private_data;
 287	char lbuf[20];
 288	int err;
 289
 290	if (*pos)
 291		return 0;
 292
 293	err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
 294	if (err < 0)
 295		return err;
 296
 297	if (copy_to_user(buf, lbuf, err))
 298		return -EFAULT;
 299
 300	*pos += err;
 301
 302	return err;
 303}
 304
 305static const struct file_operations size_fops = {
 306	.owner	= THIS_MODULE,
 307	.open	= simple_open,
 308	.write	= size_write,
 309	.read	= size_read,
 310};
 311
 312static ssize_t limit_write(struct file *filp, const char __user *buf,
 313			   size_t count, loff_t *pos)
 314{
 315	struct mlx5_cache_ent *ent = filp->private_data;
 316	struct mlx5_ib_dev *dev = ent->dev;
 317	char lbuf[20];
 318	u32 var;
 319	int err;
 320	int c;
 321
 322	if (copy_from_user(lbuf, buf, sizeof(lbuf)))
 323		return -EFAULT;
 324
 325	c = order2idx(dev, ent->order);
 326	lbuf[sizeof(lbuf) - 1] = 0;
 327
 328	if (sscanf(lbuf, "%u", &var) != 1)
 329		return -EINVAL;
 330
 331	if (var > ent->size)
 332		return -EINVAL;
 333
 334	ent->limit = var;
 335
 336	if (ent->cur < ent->limit) {
 337		err = add_keys(dev, c, 2 * ent->limit - ent->cur);
 338		if (err)
 339			return err;
 340	}
 341
 342	return count;
 343}
 344
 345static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
 346			  loff_t *pos)
 347{
 348	struct mlx5_cache_ent *ent = filp->private_data;
 349	char lbuf[20];
 350	int err;
 351
 352	if (*pos)
 353		return 0;
 354
 355	err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
 356	if (err < 0)
 357		return err;
 358
 359	if (copy_to_user(buf, lbuf, err))
 360		return -EFAULT;
 361
 362	*pos += err;
 363
 364	return err;
 365}
 366
 367static const struct file_operations limit_fops = {
 368	.owner	= THIS_MODULE,
 369	.open	= simple_open,
 370	.write	= limit_write,
 371	.read	= limit_read,
 372};
 373
 374static int someone_adding(struct mlx5_mr_cache *cache)
 375{
 376	int i;
 377
 378	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
 379		if (cache->ent[i].cur < cache->ent[i].limit)
 380			return 1;
 381	}
 382
 383	return 0;
 384}
 385
 386static void __cache_work_func(struct mlx5_cache_ent *ent)
 387{
 388	struct mlx5_ib_dev *dev = ent->dev;
 389	struct mlx5_mr_cache *cache = &dev->cache;
 390	int i = order2idx(dev, ent->order);
 391	int err;
 392
 393	if (cache->stopped)
 394		return;
 395
 396	ent = &dev->cache.ent[i];
 397	if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
 398		err = add_keys(dev, i, 1);
 399		if (ent->cur < 2 * ent->limit) {
 400			if (err == -EAGAIN) {
 401				mlx5_ib_dbg(dev, "returned eagain, order %d\n",
 402					    i + 2);
 403				queue_delayed_work(cache->wq, &ent->dwork,
 404						   msecs_to_jiffies(3));
 405			} else if (err) {
 406				mlx5_ib_warn(dev, "command failed order %d, err %d\n",
 407					     i + 2, err);
 408				queue_delayed_work(cache->wq, &ent->dwork,
 409						   msecs_to_jiffies(1000));
 410			} else {
 411				queue_work(cache->wq, &ent->work);
 412			}
 413		}
 414	} else if (ent->cur > 2 * ent->limit) {
 415		/*
 416		 * The remove_keys() logic is performed as garbage collection
 417		 * task. Such task is intended to be run when no other active
 418		 * processes are running.
 419		 *
 420		 * The need_resched() will return TRUE if there are user tasks
 421		 * to be activated in near future.
 422		 *
 423		 * In such case, we don't execute remove_keys() and postpone
 424		 * the garbage collection work to try to run in next cycle,
 425		 * in order to free CPU resources to other tasks.
 426		 */
 427		if (!need_resched() && !someone_adding(cache) &&
 428		    time_after(jiffies, cache->last_add + 300 * HZ)) {
 429			remove_keys(dev, i, 1);
 430			if (ent->cur > ent->limit)
 431				queue_work(cache->wq, &ent->work);
 432		} else {
 433			queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
 434		}
 435	}
 436}
 437
 438static void delayed_cache_work_func(struct work_struct *work)
 439{
 440	struct mlx5_cache_ent *ent;
 441
 442	ent = container_of(work, struct mlx5_cache_ent, dwork.work);
 443	__cache_work_func(ent);
 444}
 445
 446static void cache_work_func(struct work_struct *work)
 447{
 448	struct mlx5_cache_ent *ent;
 449
 450	ent = container_of(work, struct mlx5_cache_ent, work);
 451	__cache_work_func(ent);
 452}
 453
 454static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
 455{
 456	struct mlx5_mr_cache *cache = &dev->cache;
 457	struct mlx5_ib_mr *mr = NULL;
 458	struct mlx5_cache_ent *ent;
 459	int c;
 460	int i;
 461
 462	c = order2idx(dev, order);
 463	if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
 464		mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
 465		return NULL;
 466	}
 467
 468	for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) {
 469		ent = &cache->ent[i];
 470
 471		mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
 472
 473		spin_lock_irq(&ent->lock);
 474		if (!list_empty(&ent->head)) {
 475			mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
 476					      list);
 477			list_del(&mr->list);
 478			ent->cur--;
 479			spin_unlock_irq(&ent->lock);
 480			if (ent->cur < ent->limit)
 481				queue_work(cache->wq, &ent->work);
 482			break;
 483		}
 484		spin_unlock_irq(&ent->lock);
 485
 486		queue_work(cache->wq, &ent->work);
 487	}
 488
 489	if (!mr)
 490		cache->ent[c].miss++;
 491
 492	return mr;
 493}
 494
 495static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 496{
 497	struct mlx5_mr_cache *cache = &dev->cache;
 498	struct mlx5_cache_ent *ent;
 499	int shrink = 0;
 500	int c;
 501
 502	c = order2idx(dev, mr->order);
 503	if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
 504		mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
 505		return;
 506	}
 507	ent = &cache->ent[c];
 508	spin_lock_irq(&ent->lock);
 509	list_add_tail(&mr->list, &ent->head);
 510	ent->cur++;
 511	if (ent->cur > 2 * ent->limit)
 512		shrink = 1;
 513	spin_unlock_irq(&ent->lock);
 514
 515	if (shrink)
 516		queue_work(cache->wq, &ent->work);
 517}
 518
 519static void clean_keys(struct mlx5_ib_dev *dev, int c)
 520{
 521	struct mlx5_mr_cache *cache = &dev->cache;
 522	struct mlx5_cache_ent *ent = &cache->ent[c];
 523	struct mlx5_ib_mr *mr;
 524	int err;
 525
 526	cancel_delayed_work(&ent->dwork);
 527	while (1) {
 528		spin_lock_irq(&ent->lock);
 529		if (list_empty(&ent->head)) {
 530			spin_unlock_irq(&ent->lock);
 531			return;
 532		}
 533		mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
 534		list_del(&mr->list);
 535		ent->cur--;
 536		ent->size--;
 537		spin_unlock_irq(&ent->lock);
 538		err = destroy_mkey(dev, mr);
 539		if (err)
 540			mlx5_ib_warn(dev, "failed destroy mkey\n");
 541		else
 542			kfree(mr);
 543	}
 544}
 545
 546static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
 547{
 548	struct mlx5_mr_cache *cache = &dev->cache;
 549	struct mlx5_cache_ent *ent;
 550	int i;
 551
 552	if (!mlx5_debugfs_root)
 553		return 0;
 554
 555	cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
 556	if (!cache->root)
 557		return -ENOMEM;
 558
 559	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
 560		ent = &cache->ent[i];
 561		sprintf(ent->name, "%d", ent->order);
 562		ent->dir = debugfs_create_dir(ent->name,  cache->root);
 563		if (!ent->dir)
 564			return -ENOMEM;
 565
 566		ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent,
 567						 &size_fops);
 568		if (!ent->fsize)
 569			return -ENOMEM;
 570
 571		ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent,
 572						  &limit_fops);
 573		if (!ent->flimit)
 574			return -ENOMEM;
 575
 576		ent->fcur = debugfs_create_u32("cur", 0400, ent->dir,
 577					       &ent->cur);
 578		if (!ent->fcur)
 579			return -ENOMEM;
 580
 581		ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir,
 582						&ent->miss);
 583		if (!ent->fmiss)
 584			return -ENOMEM;
 585	}
 586
 587	return 0;
 588}
 589
 590static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
 591{
 592	if (!mlx5_debugfs_root)
 593		return;
 594
 595	debugfs_remove_recursive(dev->cache.root);
 596}
 597
 598static void delay_time_func(unsigned long ctx)
 599{
 600	struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx;
 601
 602	dev->fill_delay = 0;
 603}
 604
 605int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
 606{
 607	struct mlx5_mr_cache *cache = &dev->cache;
 608	struct mlx5_cache_ent *ent;
 609	int limit;
 610	int err;
 611	int i;
 612
 613	mutex_init(&dev->slow_path_mutex);
 614	cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
 615	if (!cache->wq) {
 616		mlx5_ib_warn(dev, "failed to create work queue\n");
 617		return -ENOMEM;
 618	}
 619
 620	setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
 621	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
 622		INIT_LIST_HEAD(&cache->ent[i].head);
 623		spin_lock_init(&cache->ent[i].lock);
 624
 625		ent = &cache->ent[i];
 626		INIT_LIST_HEAD(&ent->head);
 627		spin_lock_init(&ent->lock);
 628		ent->order = i + 2;
 629		ent->dev = dev;
 630
 631		if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
 632		    (mlx5_core_is_pf(dev->mdev)))
 633			limit = dev->mdev->profile->mr_cache[i].limit;
 634		else
 635			limit = 0;
 636
 637		INIT_WORK(&ent->work, cache_work_func);
 638		INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
 639		ent->limit = limit;
 640		queue_work(cache->wq, &ent->work);
 641	}
 642
 643	err = mlx5_mr_cache_debugfs_init(dev);
 644	if (err)
 645		mlx5_ib_warn(dev, "cache debugfs failure\n");
 646
 647	return 0;
 648}
 649
 650static void wait_for_async_commands(struct mlx5_ib_dev *dev)
 651{
 652	struct mlx5_mr_cache *cache = &dev->cache;
 653	struct mlx5_cache_ent *ent;
 654	int total = 0;
 655	int i;
 656	int j;
 657
 658	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
 659		ent = &cache->ent[i];
 660		for (j = 0 ; j < 1000; j++) {
 661			if (!ent->pending)
 662				break;
 663			msleep(50);
 664		}
 665	}
 666	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
 667		ent = &cache->ent[i];
 668		total += ent->pending;
 669	}
 670
 671	if (total)
 672		mlx5_ib_warn(dev, "aborted while there are %d pending mr requests\n", total);
 673	else
 674		mlx5_ib_warn(dev, "done with all pending requests\n");
 675}
 676
 677int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
 678{
 679	int i;
 680
 681	dev->cache.stopped = 1;
 682	flush_workqueue(dev->cache.wq);
 683
 684	mlx5_mr_cache_debugfs_cleanup(dev);
 685
 686	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
 687		clean_keys(dev, i);
 688
 689	destroy_workqueue(dev->cache.wq);
 690	wait_for_async_commands(dev);
 691	del_timer_sync(&dev->delay_timer);
 692
 693	return 0;
 694}
 695
 696struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
 697{
 698	struct mlx5_ib_dev *dev = to_mdev(pd->device);
 699	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
 700	struct mlx5_core_dev *mdev = dev->mdev;
 701	struct mlx5_ib_mr *mr;
 702	void *mkc;
 703	u32 *in;
 704	int err;
 705
 706	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 707	if (!mr)
 708		return ERR_PTR(-ENOMEM);
 709
 710	in = kzalloc(inlen, GFP_KERNEL);
 711	if (!in) {
 712		err = -ENOMEM;
 713		goto err_free;
 714	}
 715
 716	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
 717
 718	MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_PA);
 719	MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
 720	MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
 721	MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
 722	MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
 723	MLX5_SET(mkc, mkc, lr, 1);
 724
 725	MLX5_SET(mkc, mkc, length64, 1);
 726	MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
 727	MLX5_SET(mkc, mkc, qpn, 0xffffff);
 728	MLX5_SET64(mkc, mkc, start_addr, 0);
 729
 730	err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen);
 731	if (err)
 732		goto err_in;
 733
 734	kfree(in);
 735	mr->ibmr.lkey = mr->mmkey.key;
 736	mr->ibmr.rkey = mr->mmkey.key;
 737	mr->umem = NULL;
 738
 739	return &mr->ibmr;
 740
 741err_in:
 742	kfree(in);
 743
 744err_free:
 745	kfree(mr);
 746
 747	return ERR_PTR(err);
 748}
 749
 750static int get_octo_len(u64 addr, u64 len, int page_size)
 751{
 752	u64 offset;
 753	int npages;
 754
 755	offset = addr & (page_size - 1);
 756	npages = ALIGN(len + offset, page_size) >> ilog2(page_size);
 757	return (npages + 1) / 2;
 758}
 759
 760static int use_umr(int order)
 761{
 762	return order <= MLX5_MAX_UMR_SHIFT;
 763}
 764
 765static int dma_map_mr_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
 766			  int npages, int page_shift, int *size,
 767			  __be64 **mr_pas, dma_addr_t *dma)
 768{
 769	__be64 *pas;
 770	struct device *ddev = dev->ib_dev.dma_device;
 771
 772	/*
 773	 * UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
 774	 * To avoid copying garbage after the pas array, we allocate
 775	 * a little more.
 776	 */
 777	*size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT);
 778	*mr_pas = kmalloc(*size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
 779	if (!(*mr_pas))
 780		return -ENOMEM;
 781
 782	pas = PTR_ALIGN(*mr_pas, MLX5_UMR_ALIGN);
 783	mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT);
 784	/* Clear padding after the actual pages. */
 785	memset(pas + npages, 0, *size - npages * sizeof(u64));
 786
 787	*dma = dma_map_single(ddev, pas, *size, DMA_TO_DEVICE);
 788	if (dma_mapping_error(ddev, *dma)) {
 789		kfree(*mr_pas);
 790		return -ENOMEM;
 791	}
 792
 793	return 0;
 794}
 795
 796static void prep_umr_wqe_common(struct ib_pd *pd, struct ib_send_wr *wr,
 797				struct ib_sge *sg, u64 dma, int n, u32 key,
 798				int page_shift)
 799{
 800	struct mlx5_ib_dev *dev = to_mdev(pd->device);
 801	struct mlx5_umr_wr *umrwr = umr_wr(wr);
 802
 803	sg->addr = dma;
 804	sg->length = ALIGN(sizeof(u64) * n, 64);
 805	sg->lkey = dev->umrc.pd->local_dma_lkey;
 806
 807	wr->next = NULL;
 808	wr->sg_list = sg;
 809	if (n)
 810		wr->num_sge = 1;
 811	else
 812		wr->num_sge = 0;
 813
 814	wr->opcode = MLX5_IB_WR_UMR;
 815
 816	umrwr->npages = n;
 817	umrwr->page_shift = page_shift;
 818	umrwr->mkey = key;
 819}
 820
 821static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
 822			     struct ib_sge *sg, u64 dma, int n, u32 key,
 823			     int page_shift, u64 virt_addr, u64 len,
 824			     int access_flags)
 825{
 826	struct mlx5_umr_wr *umrwr = umr_wr(wr);
 827
 828	prep_umr_wqe_common(pd, wr, sg, dma, n, key, page_shift);
 829
 830	wr->send_flags = 0;
 831
 832	umrwr->target.virt_addr = virt_addr;
 833	umrwr->length = len;
 834	umrwr->access_flags = access_flags;
 835	umrwr->pd = pd;
 836}
 837
 838static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
 839			       struct ib_send_wr *wr, u32 key)
 840{
 841	struct mlx5_umr_wr *umrwr = umr_wr(wr);
 842
 843	wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE;
 844	wr->opcode = MLX5_IB_WR_UMR;
 845	umrwr->mkey = key;
 846}
 847
 848static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
 849		       int access_flags, struct ib_umem **umem,
 850		       int *npages, int *page_shift, int *ncont,
 851		       int *order)
 852{
 853	struct mlx5_ib_dev *dev = to_mdev(pd->device);
 854	int err;
 855
 856	*umem = ib_umem_get(pd->uobject->context, start, length,
 857			    access_flags, 0);
 858	err = PTR_ERR_OR_ZERO(*umem);
 859	if (err < 0) {
 860		mlx5_ib_err(dev, "umem get failed (%ld)\n", PTR_ERR(umem));
 861		return err;
 862	}
 863
 864	mlx5_ib_cont_pages(*umem, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages,
 865			   page_shift, ncont, order);
 866	if (!*npages) {
 867		mlx5_ib_warn(dev, "avoid zero region\n");
 868		ib_umem_release(*umem);
 869		return -EINVAL;
 870	}
 871
 872	mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
 873		    *npages, *ncont, *order, *page_shift);
 874
 875	return 0;
 876}
 877
 878static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
 879{
 880	struct mlx5_ib_umr_context *context =
 881		container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
 882
 883	context->status = wc->status;
 884	complete(&context->done);
 885}
 886
 887static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
 888{
 889	context->cqe.done = mlx5_ib_umr_done;
 890	context->status = -1;
 891	init_completion(&context->done);
 892}
 893
 894static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
 895				  u64 virt_addr, u64 len, int npages,
 896				  int page_shift, int order, int access_flags)
 897{
 898	struct mlx5_ib_dev *dev = to_mdev(pd->device);
 899	struct device *ddev = dev->ib_dev.dma_device;
 900	struct umr_common *umrc = &dev->umrc;
 901	struct mlx5_ib_umr_context umr_context;
 902	struct mlx5_umr_wr umrwr = {};
 903	struct ib_send_wr *bad;
 904	struct mlx5_ib_mr *mr;
 905	struct ib_sge sg;
 906	int size;
 907	__be64 *mr_pas;
 908	dma_addr_t dma;
 909	int err = 0;
 910	int i;
 911
 912	for (i = 0; i < 1; i++) {
 913		mr = alloc_cached_mr(dev, order);
 914		if (mr)
 915			break;
 916
 917		err = add_keys(dev, order2idx(dev, order), 1);
 918		if (err && err != -EAGAIN) {
 919			mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
 920			break;
 921		}
 922	}
 923
 924	if (!mr)
 925		return ERR_PTR(-EAGAIN);
 926
 927	err = dma_map_mr_pas(dev, umem, npages, page_shift, &size, &mr_pas,
 928			     &dma);
 929	if (err)
 930		goto free_mr;
 931
 932	mlx5_ib_init_umr_context(&umr_context);
 933
 934	umrwr.wr.wr_cqe = &umr_context.cqe;
 935	prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key,
 936			 page_shift, virt_addr, len, access_flags);
 937
 938	down(&umrc->sem);
 939	err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
 940	if (err) {
 941		mlx5_ib_warn(dev, "post send failed, err %d\n", err);
 942		goto unmap_dma;
 943	} else {
 944		wait_for_completion(&umr_context.done);
 945		if (umr_context.status != IB_WC_SUCCESS) {
 946			mlx5_ib_warn(dev, "reg umr failed\n");
 947			err = -EFAULT;
 948		}
 949	}
 950
 951	mr->mmkey.iova = virt_addr;
 952	mr->mmkey.size = len;
 953	mr->mmkey.pd = to_mpd(pd)->pdn;
 954
 955	mr->live = 1;
 956
 957unmap_dma:
 958	up(&umrc->sem);
 959	dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
 960
 961	kfree(mr_pas);
 962
 963free_mr:
 964	if (err) {
 965		free_cached_mr(dev, mr);
 966		return ERR_PTR(err);
 967	}
 968
 969	return mr;
 970}
 971
 972#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 973int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
 974		       int zap)
 975{
 976	struct mlx5_ib_dev *dev = mr->dev;
 977	struct device *ddev = dev->ib_dev.dma_device;
 978	struct umr_common *umrc = &dev->umrc;
 979	struct mlx5_ib_umr_context umr_context;
 980	struct ib_umem *umem = mr->umem;
 981	int size;
 982	__be64 *pas;
 983	dma_addr_t dma;
 984	struct ib_send_wr *bad;
 985	struct mlx5_umr_wr wr;
 986	struct ib_sge sg;
 987	int err = 0;
 988	const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64);
 989	const int page_index_mask = page_index_alignment - 1;
 990	size_t pages_mapped = 0;
 991	size_t pages_to_map = 0;
 992	size_t pages_iter = 0;
 993	int use_emergency_buf = 0;
 994
 995	/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
 996	 * so we need to align the offset and length accordingly */
 997	if (start_page_index & page_index_mask) {
 998		npages += start_page_index & page_index_mask;
 999		start_page_index &= ~page_index_mask;
1000	}
1001
1002	pages_to_map = ALIGN(npages, page_index_alignment);
1003
1004	if (start_page_index + pages_to_map > MLX5_MAX_UMR_PAGES)
1005		return -EINVAL;
1006
1007	size = sizeof(u64) * pages_to_map;
1008	size = min_t(int, PAGE_SIZE, size);
1009	/* We allocate with GFP_ATOMIC to avoid recursion into page-reclaim
1010	 * code, when we are called from an invalidation. The pas buffer must
1011	 * be 2k-aligned for Connect-IB. */
1012	pas = (__be64 *)get_zeroed_page(GFP_ATOMIC);
1013	if (!pas) {
1014		mlx5_ib_warn(dev, "unable to allocate memory during MTT update, falling back to slower chunked mechanism.\n");
1015		pas = mlx5_ib_update_mtt_emergency_buffer;
1016		size = MLX5_UMR_MTT_MIN_CHUNK_SIZE;
1017		use_emergency_buf = 1;
1018		mutex_lock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
1019		memset(pas, 0, size);
1020	}
1021	pages_iter = size / sizeof(u64);
1022	dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
1023	if (dma_mapping_error(ddev, dma)) {
1024		mlx5_ib_err(dev, "unable to map DMA during MTT update.\n");
1025		err = -ENOMEM;
1026		goto free_pas;
1027	}
1028
1029	for (pages_mapped = 0;
1030	     pages_mapped < pages_to_map && !err;
1031	     pages_mapped += pages_iter, start_page_index += pages_iter) {
1032		dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
1033
1034		npages = min_t(size_t,
1035			       pages_iter,
1036			       ib_umem_num_pages(umem) - start_page_index);
1037
1038		if (!zap) {
1039			__mlx5_ib_populate_pas(dev, umem, PAGE_SHIFT,
1040					       start_page_index, npages, pas,
1041					       MLX5_IB_MTT_PRESENT);
1042			/* Clear padding after the pages brought from the
1043			 * umem. */
1044			memset(pas + npages, 0, size - npages * sizeof(u64));
1045		}
1046
1047		dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
1048
1049		mlx5_ib_init_umr_context(&umr_context);
1050
1051		memset(&wr, 0, sizeof(wr));
1052		wr.wr.wr_cqe = &umr_context.cqe;
1053
1054		sg.addr = dma;
1055		sg.length = ALIGN(npages * sizeof(u64),
1056				MLX5_UMR_MTT_ALIGNMENT);
1057		sg.lkey = dev->umrc.pd->local_dma_lkey;
1058
1059		wr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
1060				MLX5_IB_SEND_UMR_UPDATE_MTT;
1061		wr.wr.sg_list = &sg;
1062		wr.wr.num_sge = 1;
1063		wr.wr.opcode = MLX5_IB_WR_UMR;
1064		wr.npages = sg.length / sizeof(u64);
1065		wr.page_shift = PAGE_SHIFT;
1066		wr.mkey = mr->mmkey.key;
1067		wr.target.offset = start_page_index;
1068
1069		down(&umrc->sem);
1070		err = ib_post_send(umrc->qp, &wr.wr, &bad);
1071		if (err) {
1072			mlx5_ib_err(dev, "UMR post send failed, err %d\n", err);
1073		} else {
1074			wait_for_completion(&umr_context.done);
1075			if (umr_context.status != IB_WC_SUCCESS) {
1076				mlx5_ib_err(dev, "UMR completion failed, code %d\n",
1077					    umr_context.status);
1078				err = -EFAULT;
1079			}
1080		}
1081		up(&umrc->sem);
1082	}
1083	dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
1084
1085free_pas:
1086	if (!use_emergency_buf)
1087		free_page((unsigned long)pas);
1088	else
1089		mutex_unlock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
1090
1091	return err;
1092}
1093#endif
1094
1095/*
1096 * If ibmr is NULL it will be allocated by reg_create.
1097 * Else, the given ibmr will be used.
1098 */
1099static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
1100				     u64 virt_addr, u64 length,
1101				     struct ib_umem *umem, int npages,
1102				     int page_shift, int access_flags)
1103{
1104	struct mlx5_ib_dev *dev = to_mdev(pd->device);
1105	struct mlx5_ib_mr *mr;
1106	__be64 *pas;
1107	void *mkc;
1108	int inlen;
1109	u32 *in;
1110	int err;
1111	bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
1112
1113	mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL);
1114	if (!mr)
1115		return ERR_PTR(-ENOMEM);
1116
1117	inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
1118		sizeof(*pas) * ((npages + 1) / 2) * 2;
1119	in = mlx5_vzalloc(inlen);
1120	if (!in) {
1121		err = -ENOMEM;
1122		goto err_1;
1123	}
1124	pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
1125	mlx5_ib_populate_pas(dev, umem, page_shift, pas,
1126			     pg_cap ? MLX5_IB_MTT_PRESENT : 0);
1127
1128	/* The pg_access bit allows setting the access flags
1129	 * in the page list submitted with the command. */
1130	MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
1131
1132	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1133	MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT);
1134	MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
1135	MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
1136	MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
1137	MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE));
1138	MLX5_SET(mkc, mkc, lr, 1);
1139
1140	MLX5_SET64(mkc, mkc, start_addr, virt_addr);
1141	MLX5_SET64(mkc, mkc, len, length);
1142	MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
1143	MLX5_SET(mkc, mkc, bsf_octword_size, 0);
1144	MLX5_SET(mkc, mkc, translations_octword_size,
1145		 get_octo_len(virt_addr, length, 1 << page_shift));
1146	MLX5_SET(mkc, mkc, log_page_size, page_shift);
1147	MLX5_SET(mkc, mkc, qpn, 0xffffff);
1148	MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
1149		 get_octo_len(virt_addr, length, 1 << page_shift));
1150
1151	err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
1152	if (err) {
1153		mlx5_ib_warn(dev, "create mkey failed\n");
1154		goto err_2;
1155	}
1156	mr->umem = umem;
1157	mr->dev = dev;
1158	mr->live = 1;
1159	kvfree(in);
1160
1161	mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
1162
1163	return mr;
1164
1165err_2:
1166	kvfree(in);
1167
1168err_1:
1169	if (!ibmr)
1170		kfree(mr);
1171
1172	return ERR_PTR(err);
1173}
1174
1175static void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
1176			  int npages, u64 length, int access_flags)
1177{
1178	mr->npages = npages;
1179	atomic_add(npages, &dev->mdev->priv.reg_pages);
1180	mr->ibmr.lkey = mr->mmkey.key;
1181	mr->ibmr.rkey = mr->mmkey.key;
1182	mr->ibmr.length = length;
1183	mr->access_flags = access_flags;
1184}
1185
1186struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
1187				  u64 virt_addr, int access_flags,
1188				  struct ib_udata *udata)
1189{
1190	struct mlx5_ib_dev *dev = to_mdev(pd->device);
1191	struct mlx5_ib_mr *mr = NULL;
1192	struct ib_umem *umem;
1193	int page_shift;
1194	int npages;
1195	int ncont;
1196	int order;
1197	int err;
1198
1199	mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
1200		    start, virt_addr, length, access_flags);
1201	err = mr_umem_get(pd, start, length, access_flags, &umem, &npages,
1202			   &page_shift, &ncont, &order);
1203
1204        if (err < 0)
1205		return ERR_PTR(err);
1206
1207	if (use_umr(order)) {
1208		mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
1209			     order, access_flags);
1210		if (PTR_ERR(mr) == -EAGAIN) {
1211			mlx5_ib_dbg(dev, "cache empty for order %d", order);
1212			mr = NULL;
1213		}
1214	} else if (access_flags & IB_ACCESS_ON_DEMAND) {
1215		err = -EINVAL;
1216		pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB");
1217		goto error;
1218	}
1219
1220	if (!mr) {
1221		mutex_lock(&dev->slow_path_mutex);
1222		mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
1223				page_shift, access_flags);
1224		mutex_unlock(&dev->slow_path_mutex);
1225	}
1226
1227	if (IS_ERR(mr)) {
1228		err = PTR_ERR(mr);
1229		goto error;
1230	}
1231
1232	mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
1233
1234	mr->umem = umem;
1235	set_mr_fileds(dev, mr, npages, length, access_flags);
1236
1237#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1238	update_odp_mr(mr);
1239#endif
1240
1241	return &mr->ibmr;
1242
1243error:
1244	ib_umem_release(umem);
1245	return ERR_PTR(err);
1246}
1247
1248static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
1249{
1250	struct mlx5_core_dev *mdev = dev->mdev;
1251	struct umr_common *umrc = &dev->umrc;
1252	struct mlx5_ib_umr_context umr_context;
1253	struct mlx5_umr_wr umrwr = {};
1254	struct ib_send_wr *bad;
1255	int err;
1256
1257	if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
1258		return 0;
1259
1260	mlx5_ib_init_umr_context(&umr_context);
1261
1262	umrwr.wr.wr_cqe = &umr_context.cqe;
1263	prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmkey.key);
1264
1265	down(&umrc->sem);
1266	err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
1267	if (err) {
1268		up(&umrc->sem);
1269		mlx5_ib_dbg(dev, "err %d\n", err);
1270		goto error;
1271	} else {
1272		wait_for_completion(&umr_context.done);
1273		up(&umrc->sem);
1274	}
1275	if (umr_context.status != IB_WC_SUCCESS) {
1276		mlx5_ib_warn(dev, "unreg umr failed\n");
1277		err = -EFAULT;
1278		goto error;
1279	}
1280	return 0;
1281
1282error:
1283	return err;
1284}
1285
1286static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, u64 virt_addr,
1287		     u64 length, int npages, int page_shift, int order,
1288		     int access_flags, int flags)
1289{
1290	struct mlx5_ib_dev *dev = to_mdev(pd->device);
1291	struct device *ddev = dev->ib_dev.dma_device;
1292	struct mlx5_ib_umr_context umr_context;
1293	struct ib_send_wr *bad;
1294	struct mlx5_umr_wr umrwr = {};
1295	struct ib_sge sg;
1296	struct umr_common *umrc = &dev->umrc;
1297	dma_addr_t dma = 0;
1298	__be64 *mr_pas = NULL;
1299	int size;
1300	int err;
1301
1302	mlx5_ib_init_umr_context(&umr_context);
1303
1304	umrwr.wr.wr_cqe = &umr_context.cqe;
1305	umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE;
1306
1307	if (flags & IB_MR_REREG_TRANS) {
1308		err = dma_map_mr_pas(dev, mr->umem, npages, page_shift, &size,
1309				     &mr_pas, &dma);
1310		if (err)
1311			return err;
1312
1313		umrwr.target.virt_addr = virt_addr;
1314		umrwr.length = length;
1315		umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
1316	}
1317
1318	prep_umr_wqe_common(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key,
1319			    page_shift);
1320
1321	if (flags & IB_MR_REREG_PD) {
1322		umrwr.pd = pd;
1323		umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD;
1324	}
1325
1326	if (flags & IB_MR_REREG_ACCESS) {
1327		umrwr.access_flags = access_flags;
1328		umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_ACCESS;
1329	}
1330
1331	/* post send request to UMR QP */
1332	down(&umrc->sem);
1333	err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
1334
1335	if (err) {
1336		mlx5_ib_warn(dev, "post send failed, err %d\n", err);
1337	} else {
1338		wait_for_completion(&umr_context.done);
1339		if (umr_context.status != IB_WC_SUCCESS) {
1340			mlx5_ib_warn(dev, "reg umr failed (%u)\n",
1341				     umr_context.status);
1342			err = -EFAULT;
1343		}
1344	}
1345
1346	up(&umrc->sem);
1347	if (flags & IB_MR_REREG_TRANS) {
1348		dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
1349		kfree(mr_pas);
1350	}
1351	return err;
1352}
1353
1354int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
1355			  u64 length, u64 virt_addr, int new_access_flags,
1356			  struct ib_pd *new_pd, struct ib_udata *udata)
1357{
1358	struct mlx5_ib_dev *dev = to_mdev(ib_mr->device);
1359	struct mlx5_ib_mr *mr = to_mmr(ib_mr);
1360	struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd;
1361	int access_flags = flags & IB_MR_REREG_ACCESS ?
1362			    new_access_flags :
1363			    mr->access_flags;
1364	u64 addr = (flags & IB_MR_REREG_TRANS) ? virt_addr : mr->umem->address;
1365	u64 len = (flags & IB_MR_REREG_TRANS) ? length : mr->umem->length;
1366	int page_shift = 0;
1367	int npages = 0;
1368	int ncont = 0;
1369	int order = 0;
1370	int err;
1371
1372	mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
1373		    start, virt_addr, length, access_flags);
1374
1375	if (flags != IB_MR_REREG_PD) {
1376		/*
1377		 * Replace umem. This needs to be done whether or not UMR is
1378		 * used.
1379		 */
1380		flags |= IB_MR_REREG_TRANS;
1381		ib_umem_release(mr->umem);
1382		err = mr_umem_get(pd, addr, len, access_flags, &mr->umem,
1383				  &npages, &page_shift, &ncont, &order);
1384		if (err < 0) {
1385			mr->umem = NULL;
1386			return err;
1387		}
1388	}
1389
1390	if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) {
1391		/*
1392		 * UMR can't be used - MKey needs to be replaced.
1393		 */
1394		if (mr->umred) {
1395			err = unreg_umr(dev, mr);
1396			if (err)
1397				mlx5_ib_warn(dev, "Failed to unregister MR\n");
1398		} else {
1399			err = destroy_mkey(dev, mr);
1400			if (err)
1401				mlx5_ib_warn(dev, "Failed to destroy MKey\n");
1402		}
1403		if (err)
1404			return err;
1405
1406		mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont,
1407				page_shift, access_flags);
1408
1409		if (IS_ERR(mr))
1410			return PTR_ERR(mr);
1411
1412		mr->umred = 0;
1413	} else {
1414		/*
1415		 * Send a UMR WQE
1416		 */
1417		err = rereg_umr(pd, mr, addr, len, npages, page_shift,
1418				order, access_flags, flags);
1419		if (err) {
1420			mlx5_ib_warn(dev, "Failed to rereg UMR\n");
1421			return err;
1422		}
1423	}
1424
1425	if (flags & IB_MR_REREG_PD) {
1426		ib_mr->pd = pd;
1427		mr->mmkey.pd = to_mpd(pd)->pdn;
1428	}
1429
1430	if (flags & IB_MR_REREG_ACCESS)
1431		mr->access_flags = access_flags;
1432
1433	if (flags & IB_MR_REREG_TRANS) {
1434		atomic_sub(mr->npages, &dev->mdev->priv.reg_pages);
1435		set_mr_fileds(dev, mr, npages, len, access_flags);
1436		mr->mmkey.iova = addr;
1437		mr->mmkey.size = len;
1438	}
1439#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1440	update_odp_mr(mr);
1441#endif
1442
1443	return 0;
1444}
1445
1446static int
1447mlx5_alloc_priv_descs(struct ib_device *device,
1448		      struct mlx5_ib_mr *mr,
1449		      int ndescs,
1450		      int desc_size)
1451{
1452	int size = ndescs * desc_size;
1453	int add_size;
1454	int ret;
1455
1456	add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
1457
1458	mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL);
1459	if (!mr->descs_alloc)
1460		return -ENOMEM;
1461
1462	mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
1463
1464	mr->desc_map = dma_map_single(device->dma_device, mr->descs,
1465				      size, DMA_TO_DEVICE);
1466	if (dma_mapping_error(device->dma_device, mr->desc_map)) {
1467		ret = -ENOMEM;
1468		goto err;
1469	}
1470
1471	return 0;
1472err:
1473	kfree(mr->descs_alloc);
1474
1475	return ret;
1476}
1477
1478static void
1479mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
1480{
1481	if (mr->descs) {
1482		struct ib_device *device = mr->ibmr.device;
1483		int size = mr->max_descs * mr->desc_size;
1484
1485		dma_unmap_single(device->dma_device, mr->desc_map,
1486				 size, DMA_TO_DEVICE);
1487		kfree(mr->descs_alloc);
1488		mr->descs = NULL;
1489	}
1490}
1491
1492static int clean_mr(struct mlx5_ib_mr *mr)
1493{
1494	struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
1495	int umred = mr->umred;
1496	int err;
1497
1498	if (mr->sig) {
1499		if (mlx5_core_destroy_psv(dev->mdev,
1500					  mr->sig->psv_memory.psv_idx))
1501			mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1502				     mr->sig->psv_memory.psv_idx);
1503		if (mlx5_core_destroy_psv(dev->mdev,
1504					  mr->sig->psv_wire.psv_idx))
1505			mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1506				     mr->sig->psv_wire.psv_idx);
1507		kfree(mr->sig);
1508		mr->sig = NULL;
1509	}
1510
1511	mlx5_free_priv_descs(mr);
1512
1513	if (!umred) {
1514		err = destroy_mkey(dev, mr);
1515		if (err) {
1516			mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
1517				     mr->mmkey.key, err);
1518			return err;
1519		}
1520	} else {
1521		err = unreg_umr(dev, mr);
1522		if (err) {
1523			mlx5_ib_warn(dev, "failed unregister\n");
1524			return err;
1525		}
1526		free_cached_mr(dev, mr);
1527	}
1528
1529	if (!umred)
1530		kfree(mr);
1531
1532	return 0;
1533}
1534
1535int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
1536{
1537	struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
1538	struct mlx5_ib_mr *mr = to_mmr(ibmr);
1539	int npages = mr->npages;
1540	struct ib_umem *umem = mr->umem;
1541
1542#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1543	if (umem && umem->odp_data) {
1544		/* Prevent new page faults from succeeding */
1545		mr->live = 0;
1546		/* Wait for all running page-fault handlers to finish. */
1547		synchronize_srcu(&dev->mr_srcu);
1548		/* Destroy all page mappings */
1549		mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
1550					 ib_umem_end(umem));
1551		/*
1552		 * We kill the umem before the MR for ODP,
1553		 * so that there will not be any invalidations in
1554		 * flight, looking at the *mr struct.
1555		 */
1556		ib_umem_release(umem);
1557		atomic_sub(npages, &dev->mdev->priv.reg_pages);
1558
1559		/* Avoid double-freeing the umem. */
1560		umem = NULL;
1561	}
1562#endif
1563
1564	clean_mr(mr);
1565
1566	if (umem) {
1567		ib_umem_release(umem);
1568		atomic_sub(npages, &dev->mdev->priv.reg_pages);
1569	}
1570
1571	return 0;
1572}
1573
1574struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
1575			       enum ib_mr_type mr_type,
1576			       u32 max_num_sg)
1577{
1578	struct mlx5_ib_dev *dev = to_mdev(pd->device);
1579	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1580	int ndescs = ALIGN(max_num_sg, 4);
1581	struct mlx5_ib_mr *mr;
1582	void *mkc;
1583	u32 *in;
1584	int err;
1585
1586	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1587	if (!mr)
1588		return ERR_PTR(-ENOMEM);
1589
1590	in = kzalloc(inlen, GFP_KERNEL);
1591	if (!in) {
1592		err = -ENOMEM;
1593		goto err_free;
1594	}
1595
1596	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1597	MLX5_SET(mkc, mkc, free, 1);
1598	MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
1599	MLX5_SET(mkc, mkc, qpn, 0xffffff);
1600	MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
1601
1602	if (mr_type == IB_MR_TYPE_MEM_REG) {
1603		mr->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
1604		MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
1605		err = mlx5_alloc_priv_descs(pd->device, mr,
1606					    ndescs, sizeof(u64));
1607		if (err)
1608			goto err_free_in;
1609
1610		mr->desc_size = sizeof(u64);
1611		mr->max_descs = ndescs;
1612	} else if (mr_type == IB_MR_TYPE_SG_GAPS) {
1613		mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
1614
1615		err = mlx5_alloc_priv_descs(pd->device, mr,
1616					    ndescs, sizeof(struct mlx5_klm));
1617		if (err)
1618			goto err_free_in;
1619		mr->desc_size = sizeof(struct mlx5_klm);
1620		mr->max_descs = ndescs;
1621	} else if (mr_type == IB_MR_TYPE_SIGNATURE) {
1622		u32 psv_index[2];
1623
1624		MLX5_SET(mkc, mkc, bsf_en, 1);
1625		MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
1626		mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
1627		if (!mr->sig) {
1628			err = -ENOMEM;
1629			goto err_free_in;
1630		}
1631
1632		/* create mem & wire PSVs */
1633		err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn,
1634					   2, psv_index);
1635		if (err)
1636			goto err_free_sig;
1637
1638		mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
1639		mr->sig->psv_memory.psv_idx = psv_index[0];
1640		mr->sig->psv_wire.psv_idx = psv_index[1];
1641
1642		mr->sig->sig_status_checked = true;
1643		mr->sig->sig_err_exists = false;
1644		/* Next UMR, Arm SIGERR */
1645		++mr->sig->sigerr_count;
1646	} else {
1647		mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type);
1648		err = -EINVAL;
1649		goto err_free_in;
1650	}
1651
1652	MLX5_SET(mkc, mkc, access_mode, mr->access_mode);
1653	MLX5_SET(mkc, mkc, umr_en, 1);
1654
1655	err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
1656	if (err)
1657		goto err_destroy_psv;
1658
1659	mr->ibmr.lkey = mr->mmkey.key;
1660	mr->ibmr.rkey = mr->mmkey.key;
1661	mr->umem = NULL;
1662	kfree(in);
1663
1664	return &mr->ibmr;
1665
1666err_destroy_psv:
1667	if (mr->sig) {
1668		if (mlx5_core_destroy_psv(dev->mdev,
1669					  mr->sig->psv_memory.psv_idx))
1670			mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1671				     mr->sig->psv_memory.psv_idx);
1672		if (mlx5_core_destroy_psv(dev->mdev,
1673					  mr->sig->psv_wire.psv_idx))
1674			mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1675				     mr->sig->psv_wire.psv_idx);
1676	}
1677	mlx5_free_priv_descs(mr);
1678err_free_sig:
1679	kfree(mr->sig);
1680err_free_in:
1681	kfree(in);
1682err_free:
1683	kfree(mr);
1684	return ERR_PTR(err);
1685}
1686
1687struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
1688			       struct ib_udata *udata)
1689{
1690	struct mlx5_ib_dev *dev = to_mdev(pd->device);
1691	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1692	struct mlx5_ib_mw *mw = NULL;
1693	u32 *in = NULL;
1694	void *mkc;
1695	int ndescs;
1696	int err;
1697	struct mlx5_ib_alloc_mw req = {};
1698	struct {
1699		__u32	comp_mask;
1700		__u32	response_length;
1701	} resp = {};
1702
1703	err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
1704	if (err)
1705		return ERR_PTR(err);
1706
1707	if (req.comp_mask || req.reserved1 || req.reserved2)
1708		return ERR_PTR(-EOPNOTSUPP);
1709
1710	if (udata->inlen > sizeof(req) &&
1711	    !ib_is_udata_cleared(udata, sizeof(req),
1712				 udata->inlen - sizeof(req)))
1713		return ERR_PTR(-EOPNOTSUPP);
1714
1715	ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
1716
1717	mw = kzalloc(sizeof(*mw), GFP_KERNEL);
1718	in = kzalloc(inlen, GFP_KERNEL);
1719	if (!mw || !in) {
1720		err = -ENOMEM;
1721		goto free;
1722	}
1723
1724	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1725
1726	MLX5_SET(mkc, mkc, free, 1);
1727	MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
1728	MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
1729	MLX5_SET(mkc, mkc, umr_en, 1);
1730	MLX5_SET(mkc, mkc, lr, 1);
1731	MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_KLMS);
1732	MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2)));
1733	MLX5_SET(mkc, mkc, qpn, 0xffffff);
1734
1735	err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, inlen);
1736	if (err)
1737		goto free;
1738
1739	mw->ibmw.rkey = mw->mmkey.key;
1740
1741	resp.response_length = min(offsetof(typeof(resp), response_length) +
1742				   sizeof(resp.response_length), udata->outlen);
1743	if (resp.response_length) {
1744		err = ib_copy_to_udata(udata, &resp, resp.response_length);
1745		if (err) {
1746			mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
1747			goto free;
1748		}
1749	}
1750
1751	kfree(in);
1752	return &mw->ibmw;
1753
1754free:
1755	kfree(mw);
1756	kfree(in);
1757	return ERR_PTR(err);
1758}
1759
1760int mlx5_ib_dealloc_mw(struct ib_mw *mw)
1761{
1762	struct mlx5_ib_mw *mmw = to_mmw(mw);
1763	int err;
1764
1765	err =  mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev,
1766				      &mmw->mmkey);
1767	if (!err)
1768		kfree(mmw);
1769	return err;
1770}
1771
1772int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
1773			    struct ib_mr_status *mr_status)
1774{
1775	struct mlx5_ib_mr *mmr = to_mmr(ibmr);
1776	int ret = 0;
1777
1778	if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
1779		pr_err("Invalid status check mask\n");
1780		ret = -EINVAL;
1781		goto done;
1782	}
1783
1784	mr_status->fail_status = 0;
1785	if (check_mask & IB_MR_CHECK_SIG_STATUS) {
1786		if (!mmr->sig) {
1787			ret = -EINVAL;
1788			pr_err("signature status check requested on a non-signature enabled MR\n");
1789			goto done;
1790		}
1791
1792		mmr->sig->sig_status_checked = true;
1793		if (!mmr->sig->sig_err_exists)
1794			goto done;
1795
1796		if (ibmr->lkey == mmr->sig->err_item.key)
1797			memcpy(&mr_status->sig_err, &mmr->sig->err_item,
1798			       sizeof(mr_status->sig_err));
1799		else {
1800			mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
1801			mr_status->sig_err.sig_err_offset = 0;
1802			mr_status->sig_err.key = mmr->sig->err_item.key;
1803		}
1804
1805		mmr->sig->sig_err_exists = false;
1806		mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
1807	}
1808
1809done:
1810	return ret;
1811}
1812
1813static int
1814mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
1815		   struct scatterlist *sgl,
1816		   unsigned short sg_nents,
1817		   unsigned int *sg_offset_p)
1818{
1819	struct scatterlist *sg = sgl;
1820	struct mlx5_klm *klms = mr->descs;
1821	unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
1822	u32 lkey = mr->ibmr.pd->local_dma_lkey;
1823	int i;
1824
1825	mr->ibmr.iova = sg_dma_address(sg) + sg_offset;
1826	mr->ibmr.length = 0;
1827	mr->ndescs = sg_nents;
1828
1829	for_each_sg(sgl, sg, sg_nents, i) {
1830		if (unlikely(i > mr->max_descs))
1831			break;
1832		klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset);
1833		klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset);
1834		klms[i].key = cpu_to_be32(lkey);
1835		mr->ibmr.length += sg_dma_len(sg);
1836
1837		sg_offset = 0;
1838	}
1839
1840	if (sg_offset_p)
1841		*sg_offset_p = sg_offset;
1842
1843	return i;
1844}
1845
1846static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
1847{
1848	struct mlx5_ib_mr *mr = to_mmr(ibmr);
1849	__be64 *descs;
1850
1851	if (unlikely(mr->ndescs == mr->max_descs))
1852		return -ENOMEM;
1853
1854	descs = mr->descs;
1855	descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
1856
1857	return 0;
1858}
1859
1860int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
1861		      unsigned int *sg_offset)
1862{
1863	struct mlx5_ib_mr *mr = to_mmr(ibmr);
1864	int n;
1865
1866	mr->ndescs = 0;
1867
1868	ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map,
1869				   mr->desc_size * mr->max_descs,
1870				   DMA_TO_DEVICE);
1871
1872	if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
1873		n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset);
1874	else
1875		n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
1876				mlx5_set_page);
1877
1878	ib_dma_sync_single_for_device(ibmr->device, mr->desc_map,
1879				      mr->desc_size * mr->max_descs,
1880				      DMA_TO_DEVICE);
1881
1882	return n;
1883}