Linux Audio

Check our new training course

Loading...
   1/*
   2 *  IBM eServer eHCA Infiniband device driver for Linux on POWER
   3 *
   4 *  MR/MW functions
   5 *
   6 *  Authors: Dietmar Decker <ddecker@de.ibm.com>
   7 *           Christoph Raisch <raisch@de.ibm.com>
   8 *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
   9 *
  10 *  Copyright (c) 2005 IBM Corporation
  11 *
  12 *  All rights reserved.
  13 *
  14 *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
  15 *  BSD.
  16 *
  17 * OpenIB BSD License
  18 *
  19 * Redistribution and use in source and binary forms, with or without
  20 * modification, are permitted provided that the following conditions are met:
  21 *
  22 * Redistributions of source code must retain the above copyright notice, this
  23 * list of conditions and the following disclaimer.
  24 *
  25 * Redistributions in binary form must reproduce the above copyright notice,
  26 * this list of conditions and the following disclaimer in the documentation
  27 * and/or other materials
  28 * provided with the distribution.
  29 *
  30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  31 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  34 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  35 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  36 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  37 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
  38 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  39 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  40 * POSSIBILITY OF SUCH DAMAGE.
  41 */
  42
  43#include <linux/slab.h>
  44#include <rdma/ib_umem.h>
  45
  46#include "ehca_iverbs.h"
  47#include "ehca_mrmw.h"
  48#include "hcp_if.h"
  49#include "hipz_hw.h"
  50
  51#define NUM_CHUNKS(length, chunk_size) \
  52	(((length) + (chunk_size - 1)) / (chunk_size))
  53
  54/* max number of rpages (per hcall register_rpages) */
  55#define MAX_RPAGES 512
  56
  57/* DMEM toleration management */
  58#define EHCA_SECTSHIFT        SECTION_SIZE_BITS
  59#define EHCA_SECTSIZE          (1UL << EHCA_SECTSHIFT)
  60#define EHCA_HUGEPAGESHIFT     34
  61#define EHCA_HUGEPAGE_SIZE     (1UL << EHCA_HUGEPAGESHIFT)
  62#define EHCA_HUGEPAGE_PFN_MASK ((EHCA_HUGEPAGE_SIZE - 1) >> PAGE_SHIFT)
  63#define EHCA_INVAL_ADDR        0xFFFFFFFFFFFFFFFFULL
  64#define EHCA_DIR_INDEX_SHIFT 13                   /* 8k Entries in 64k block */
  65#define EHCA_TOP_INDEX_SHIFT (EHCA_DIR_INDEX_SHIFT * 2)
  66#define EHCA_MAP_ENTRIES (1 << EHCA_DIR_INDEX_SHIFT)
  67#define EHCA_TOP_MAP_SIZE (0x10000)               /* currently fixed map size */
  68#define EHCA_DIR_MAP_SIZE (0x10000)
  69#define EHCA_ENT_MAP_SIZE (0x10000)
  70#define EHCA_INDEX_MASK (EHCA_MAP_ENTRIES - 1)
  71
  72static unsigned long ehca_mr_len;
  73
  74/*
  75 * Memory map data structures
  76 */
  77struct ehca_dir_bmap {
  78	u64 ent[EHCA_MAP_ENTRIES];
  79};
  80struct ehca_top_bmap {
  81	struct ehca_dir_bmap *dir[EHCA_MAP_ENTRIES];
  82};
  83struct ehca_bmap {
  84	struct ehca_top_bmap *top[EHCA_MAP_ENTRIES];
  85};
  86
  87static struct ehca_bmap *ehca_bmap;
  88
  89static struct kmem_cache *mr_cache;
  90static struct kmem_cache *mw_cache;
  91
  92enum ehca_mr_pgsize {
  93	EHCA_MR_PGSIZE4K  = 0x1000L,
  94	EHCA_MR_PGSIZE64K = 0x10000L,
  95	EHCA_MR_PGSIZE1M  = 0x100000L,
  96	EHCA_MR_PGSIZE16M = 0x1000000L
  97};
  98
  99#define EHCA_MR_PGSHIFT4K  12
 100#define EHCA_MR_PGSHIFT64K 16
 101#define EHCA_MR_PGSHIFT1M  20
 102#define EHCA_MR_PGSHIFT16M 24
 103
 104static u64 ehca_map_vaddr(void *caddr);
 105
 106static u32 ehca_encode_hwpage_size(u32 pgsize)
 107{
 108	int log = ilog2(pgsize);
 109	WARN_ON(log < 12 || log > 24 || log & 3);
 110	return (log - 12) / 4;
 111}
 112
 113static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca)
 114{
 115	return rounddown_pow_of_two(shca->hca_cap_mr_pgsize);
 116}
 117
 118static struct ehca_mr *ehca_mr_new(void)
 119{
 120	struct ehca_mr *me;
 121
 122	me = kmem_cache_zalloc(mr_cache, GFP_KERNEL);
 123	if (me)
 124		spin_lock_init(&me->mrlock);
 125	else
 126		ehca_gen_err("alloc failed");
 127
 128	return me;
 129}
 130
 131static void ehca_mr_delete(struct ehca_mr *me)
 132{
 133	kmem_cache_free(mr_cache, me);
 134}
 135
 136static struct ehca_mw *ehca_mw_new(void)
 137{
 138	struct ehca_mw *me;
 139
 140	me = kmem_cache_zalloc(mw_cache, GFP_KERNEL);
 141	if (me)
 142		spin_lock_init(&me->mwlock);
 143	else
 144		ehca_gen_err("alloc failed");
 145
 146	return me;
 147}
 148
 149static void ehca_mw_delete(struct ehca_mw *me)
 150{
 151	kmem_cache_free(mw_cache, me);
 152}
 153
 154/*----------------------------------------------------------------------*/
 155
 156struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
 157{
 158	struct ib_mr *ib_mr;
 159	int ret;
 160	struct ehca_mr *e_maxmr;
 161	struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
 162	struct ehca_shca *shca =
 163		container_of(pd->device, struct ehca_shca, ib_device);
 164
 165	if (shca->maxmr) {
 166		e_maxmr = ehca_mr_new();
 167		if (!e_maxmr) {
 168			ehca_err(&shca->ib_device, "out of memory");
 169			ib_mr = ERR_PTR(-ENOMEM);
 170			goto get_dma_mr_exit0;
 171		}
 172
 173		ret = ehca_reg_maxmr(shca, e_maxmr,
 174				     (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)),
 175				     mr_access_flags, e_pd,
 176				     &e_maxmr->ib.ib_mr.lkey,
 177				     &e_maxmr->ib.ib_mr.rkey);
 178		if (ret) {
 179			ehca_mr_delete(e_maxmr);
 180			ib_mr = ERR_PTR(ret);
 181			goto get_dma_mr_exit0;
 182		}
 183		ib_mr = &e_maxmr->ib.ib_mr;
 184	} else {
 185		ehca_err(&shca->ib_device, "no internal max-MR exist!");
 186		ib_mr = ERR_PTR(-EINVAL);
 187		goto get_dma_mr_exit0;
 188	}
 189
 190get_dma_mr_exit0:
 191	if (IS_ERR(ib_mr))
 192		ehca_err(&shca->ib_device, "h_ret=%li pd=%p mr_access_flags=%x",
 193			 PTR_ERR(ib_mr), pd, mr_access_flags);
 194	return ib_mr;
 195} /* end ehca_get_dma_mr() */
 196
 197/*----------------------------------------------------------------------*/
 198
 199struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
 200			       struct ib_phys_buf *phys_buf_array,
 201			       int num_phys_buf,
 202			       int mr_access_flags,
 203			       u64 *iova_start)
 204{
 205	struct ib_mr *ib_mr;
 206	int ret;
 207	struct ehca_mr *e_mr;
 208	struct ehca_shca *shca =
 209		container_of(pd->device, struct ehca_shca, ib_device);
 210	struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
 211
 212	u64 size;
 213
 214	if ((num_phys_buf <= 0) || !phys_buf_array) {
 215		ehca_err(pd->device, "bad input values: num_phys_buf=%x "
 216			 "phys_buf_array=%p", num_phys_buf, phys_buf_array);
 217		ib_mr = ERR_PTR(-EINVAL);
 218		goto reg_phys_mr_exit0;
 219	}
 220	if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
 221	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
 222	    ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
 223	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
 224		/*
 225		 * Remote Write Access requires Local Write Access
 226		 * Remote Atomic Access requires Local Write Access
 227		 */
 228		ehca_err(pd->device, "bad input values: mr_access_flags=%x",
 229			 mr_access_flags);
 230		ib_mr = ERR_PTR(-EINVAL);
 231		goto reg_phys_mr_exit0;
 232	}
 233
 234	/* check physical buffer list and calculate size */
 235	ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, num_phys_buf,
 236					    iova_start, &size);
 237	if (ret) {
 238		ib_mr = ERR_PTR(ret);
 239		goto reg_phys_mr_exit0;
 240	}
 241	if ((size == 0) ||
 242	    (((u64)iova_start + size) < (u64)iova_start)) {
 243		ehca_err(pd->device, "bad input values: size=%llx iova_start=%p",
 244			 size, iova_start);
 245		ib_mr = ERR_PTR(-EINVAL);
 246		goto reg_phys_mr_exit0;
 247	}
 248
 249	e_mr = ehca_mr_new();
 250	if (!e_mr) {
 251		ehca_err(pd->device, "out of memory");
 252		ib_mr = ERR_PTR(-ENOMEM);
 253		goto reg_phys_mr_exit0;
 254	}
 255
 256	/* register MR on HCA */
 257	if (ehca_mr_is_maxmr(size, iova_start)) {
 258		e_mr->flags |= EHCA_MR_FLAG_MAXMR;
 259		ret = ehca_reg_maxmr(shca, e_mr, iova_start, mr_access_flags,
 260				     e_pd, &e_mr->ib.ib_mr.lkey,
 261				     &e_mr->ib.ib_mr.rkey);
 262		if (ret) {
 263			ib_mr = ERR_PTR(ret);
 264			goto reg_phys_mr_exit1;
 265		}
 266	} else {
 267		struct ehca_mr_pginfo pginfo;
 268		u32 num_kpages;
 269		u32 num_hwpages;
 270		u64 hw_pgsize;
 271
 272		num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size,
 273					PAGE_SIZE);
 274		/* for kernel space we try most possible pgsize */
 275		hw_pgsize = ehca_get_max_hwpage_size(shca);
 276		num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size,
 277					 hw_pgsize);
 278		memset(&pginfo, 0, sizeof(pginfo));
 279		pginfo.type = EHCA_MR_PGI_PHYS;
 280		pginfo.num_kpages = num_kpages;
 281		pginfo.hwpage_size = hw_pgsize;
 282		pginfo.num_hwpages = num_hwpages;
 283		pginfo.u.phy.num_phys_buf = num_phys_buf;
 284		pginfo.u.phy.phys_buf_array = phys_buf_array;
 285		pginfo.next_hwpage =
 286			((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
 287
 288		ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags,
 289				  e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
 290				  &e_mr->ib.ib_mr.rkey, EHCA_REG_MR);
 291		if (ret) {
 292			ib_mr = ERR_PTR(ret);
 293			goto reg_phys_mr_exit1;
 294		}
 295	}
 296
 297	/* successful registration of all pages */
 298	return &e_mr->ib.ib_mr;
 299
 300reg_phys_mr_exit1:
 301	ehca_mr_delete(e_mr);
 302reg_phys_mr_exit0:
 303	if (IS_ERR(ib_mr))
 304		ehca_err(pd->device, "h_ret=%li pd=%p phys_buf_array=%p "
 305			 "num_phys_buf=%x mr_access_flags=%x iova_start=%p",
 306			 PTR_ERR(ib_mr), pd, phys_buf_array,
 307			 num_phys_buf, mr_access_flags, iova_start);
 308	return ib_mr;
 309} /* end ehca_reg_phys_mr() */
 310
 311/*----------------------------------------------------------------------*/
 312
 313struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 314			       u64 virt, int mr_access_flags,
 315			       struct ib_udata *udata)
 316{
 317	struct ib_mr *ib_mr;
 318	struct ehca_mr *e_mr;
 319	struct ehca_shca *shca =
 320		container_of(pd->device, struct ehca_shca, ib_device);
 321	struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
 322	struct ehca_mr_pginfo pginfo;
 323	int ret, page_shift;
 324	u32 num_kpages;
 325	u32 num_hwpages;
 326	u64 hwpage_size;
 327
 328	if (!pd) {
 329		ehca_gen_err("bad pd=%p", pd);
 330		return ERR_PTR(-EFAULT);
 331	}
 332
 333	if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
 334	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
 335	    ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
 336	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
 337		/*
 338		 * Remote Write Access requires Local Write Access
 339		 * Remote Atomic Access requires Local Write Access
 340		 */
 341		ehca_err(pd->device, "bad input values: mr_access_flags=%x",
 342			 mr_access_flags);
 343		ib_mr = ERR_PTR(-EINVAL);
 344		goto reg_user_mr_exit0;
 345	}
 346
 347	if (length == 0 || virt + length < virt) {
 348		ehca_err(pd->device, "bad input values: length=%llx "
 349			 "virt_base=%llx", length, virt);
 350		ib_mr = ERR_PTR(-EINVAL);
 351		goto reg_user_mr_exit0;
 352	}
 353
 354	e_mr = ehca_mr_new();
 355	if (!e_mr) {
 356		ehca_err(pd->device, "out of memory");
 357		ib_mr = ERR_PTR(-ENOMEM);
 358		goto reg_user_mr_exit0;
 359	}
 360
 361	e_mr->umem = ib_umem_get(pd->uobject->context, start, length,
 362				 mr_access_flags, 0);
 363	if (IS_ERR(e_mr->umem)) {
 364		ib_mr = (void *)e_mr->umem;
 365		goto reg_user_mr_exit1;
 366	}
 367
 368	if (e_mr->umem->page_size != PAGE_SIZE) {
 369		ehca_err(pd->device, "page size not supported, "
 370			 "e_mr->umem->page_size=%x", e_mr->umem->page_size);
 371		ib_mr = ERR_PTR(-EINVAL);
 372		goto reg_user_mr_exit2;
 373	}
 374
 375	/* determine number of MR pages */
 376	num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE);
 377	/* select proper hw_pgsize */
 378	page_shift = PAGE_SHIFT;
 379	if (e_mr->umem->hugetlb) {
 380		/* determine page_shift, clamp between 4K and 16M */
 381		page_shift = (fls64(length - 1) + 3) & ~3;
 382		page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K),
 383				 EHCA_MR_PGSHIFT16M);
 384	}
 385	hwpage_size = 1UL << page_shift;
 386
 387	/* now that we have the desired page size, shift until it's
 388	 * supported, too. 4K is always supported, so this terminates.
 389	 */
 390	while (!(hwpage_size & shca->hca_cap_mr_pgsize))
 391		hwpage_size >>= 4;
 392
 393reg_user_mr_fallback:
 394	num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size);
 395	/* register MR on HCA */
 396	memset(&pginfo, 0, sizeof(pginfo));
 397	pginfo.type = EHCA_MR_PGI_USER;
 398	pginfo.hwpage_size = hwpage_size;
 399	pginfo.num_kpages = num_kpages;
 400	pginfo.num_hwpages = num_hwpages;
 401	pginfo.u.usr.region = e_mr->umem;
 402	pginfo.next_hwpage = e_mr->umem->offset / hwpage_size;
 403	pginfo.u.usr.next_chunk = list_prepare_entry(pginfo.u.usr.next_chunk,
 404						     (&e_mr->umem->chunk_list),
 405						     list);
 406
 407	ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags,
 408			  e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
 409			  &e_mr->ib.ib_mr.rkey, EHCA_REG_MR);
 410	if (ret == -EINVAL && pginfo.hwpage_size > PAGE_SIZE) {
 411		ehca_warn(pd->device, "failed to register mr "
 412			  "with hwpage_size=%llx", hwpage_size);
 413		ehca_info(pd->device, "try to register mr with "
 414			  "kpage_size=%lx", PAGE_SIZE);
 415		/*
 416		 * this means kpages are not contiguous for a hw page
 417		 * try kernel page size as fallback solution
 418		 */
 419		hwpage_size = PAGE_SIZE;
 420		goto reg_user_mr_fallback;
 421	}
 422	if (ret) {
 423		ib_mr = ERR_PTR(ret);
 424		goto reg_user_mr_exit2;
 425	}
 426
 427	/* successful registration of all pages */
 428	return &e_mr->ib.ib_mr;
 429
 430reg_user_mr_exit2:
 431	ib_umem_release(e_mr->umem);
 432reg_user_mr_exit1:
 433	ehca_mr_delete(e_mr);
 434reg_user_mr_exit0:
 435	if (IS_ERR(ib_mr))
 436		ehca_err(pd->device, "rc=%li pd=%p mr_access_flags=%x udata=%p",
 437			 PTR_ERR(ib_mr), pd, mr_access_flags, udata);
 438	return ib_mr;
 439} /* end ehca_reg_user_mr() */
 440
 441/*----------------------------------------------------------------------*/
 442
 443int ehca_rereg_phys_mr(struct ib_mr *mr,
 444		       int mr_rereg_mask,
 445		       struct ib_pd *pd,
 446		       struct ib_phys_buf *phys_buf_array,
 447		       int num_phys_buf,
 448		       int mr_access_flags,
 449		       u64 *iova_start)
 450{
 451	int ret;
 452
 453	struct ehca_shca *shca =
 454		container_of(mr->device, struct ehca_shca, ib_device);
 455	struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
 456	u64 new_size;
 457	u64 *new_start;
 458	u32 new_acl;
 459	struct ehca_pd *new_pd;
 460	u32 tmp_lkey, tmp_rkey;
 461	unsigned long sl_flags;
 462	u32 num_kpages = 0;
 463	u32 num_hwpages = 0;
 464	struct ehca_mr_pginfo pginfo;
 465
 466	if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) {
 467		/* TODO not supported, because PHYP rereg hCall needs pages */
 468		ehca_err(mr->device, "rereg without IB_MR_REREG_TRANS not "
 469			 "supported yet, mr_rereg_mask=%x", mr_rereg_mask);
 470		ret = -EINVAL;
 471		goto rereg_phys_mr_exit0;
 472	}
 473
 474	if (mr_rereg_mask & IB_MR_REREG_PD) {
 475		if (!pd) {
 476			ehca_err(mr->device, "rereg with bad pd, pd=%p "
 477				 "mr_rereg_mask=%x", pd, mr_rereg_mask);
 478			ret = -EINVAL;
 479			goto rereg_phys_mr_exit0;
 480		}
 481	}
 482
 483	if ((mr_rereg_mask &
 484	     ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) ||
 485	    (mr_rereg_mask == 0)) {
 486		ret = -EINVAL;
 487		goto rereg_phys_mr_exit0;
 488	}
 489
 490	/* check other parameters */
 491	if (e_mr == shca->maxmr) {
 492		/* should be impossible, however reject to be sure */
 493		ehca_err(mr->device, "rereg internal max-MR impossible, mr=%p "
 494			 "shca->maxmr=%p mr->lkey=%x",
 495			 mr, shca->maxmr, mr->lkey);
 496		ret = -EINVAL;
 497		goto rereg_phys_mr_exit0;
 498	}
 499	if (mr_rereg_mask & IB_MR_REREG_TRANS) { /* transl., i.e. addr/size */
 500		if (e_mr->flags & EHCA_MR_FLAG_FMR) {
 501			ehca_err(mr->device, "not supported for FMR, mr=%p "
 502				 "flags=%x", mr, e_mr->flags);
 503			ret = -EINVAL;
 504			goto rereg_phys_mr_exit0;
 505		}
 506		if (!phys_buf_array || num_phys_buf <= 0) {
 507			ehca_err(mr->device, "bad input values mr_rereg_mask=%x"
 508				 " phys_buf_array=%p num_phys_buf=%x",
 509				 mr_rereg_mask, phys_buf_array, num_phys_buf);
 510			ret = -EINVAL;
 511			goto rereg_phys_mr_exit0;
 512		}
 513	}
 514	if ((mr_rereg_mask & IB_MR_REREG_ACCESS) &&	/* change ACL */
 515	    (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
 516	      !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
 517	     ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
 518	      !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)))) {
 519		/*
 520		 * Remote Write Access requires Local Write Access
 521		 * Remote Atomic Access requires Local Write Access
 522		 */
 523		ehca_err(mr->device, "bad input values: mr_rereg_mask=%x "
 524			 "mr_access_flags=%x", mr_rereg_mask, mr_access_flags);
 525		ret = -EINVAL;
 526		goto rereg_phys_mr_exit0;
 527	}
 528
 529	/* set requested values dependent on rereg request */
 530	spin_lock_irqsave(&e_mr->mrlock, sl_flags);
 531	new_start = e_mr->start;
 532	new_size = e_mr->size;
 533	new_acl = e_mr->acl;
 534	new_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
 535
 536	if (mr_rereg_mask & IB_MR_REREG_TRANS) {
 537		u64 hw_pgsize = ehca_get_max_hwpage_size(shca);
 538
 539		new_start = iova_start;	/* change address */
 540		/* check physical buffer list and calculate size */
 541		ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array,
 542						    num_phys_buf, iova_start,
 543						    &new_size);
 544		if (ret)
 545			goto rereg_phys_mr_exit1;
 546		if ((new_size == 0) ||
 547		    (((u64)iova_start + new_size) < (u64)iova_start)) {
 548			ehca_err(mr->device, "bad input values: new_size=%llx "
 549				 "iova_start=%p", new_size, iova_start);
 550			ret = -EINVAL;
 551			goto rereg_phys_mr_exit1;
 552		}
 553		num_kpages = NUM_CHUNKS(((u64)new_start % PAGE_SIZE) +
 554					new_size, PAGE_SIZE);
 555		num_hwpages = NUM_CHUNKS(((u64)new_start % hw_pgsize) +
 556					 new_size, hw_pgsize);
 557		memset(&pginfo, 0, sizeof(pginfo));
 558		pginfo.type = EHCA_MR_PGI_PHYS;
 559		pginfo.num_kpages = num_kpages;
 560		pginfo.hwpage_size = hw_pgsize;
 561		pginfo.num_hwpages = num_hwpages;
 562		pginfo.u.phy.num_phys_buf = num_phys_buf;
 563		pginfo.u.phy.phys_buf_array = phys_buf_array;
 564		pginfo.next_hwpage =
 565			((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
 566	}
 567	if (mr_rereg_mask & IB_MR_REREG_ACCESS)
 568		new_acl = mr_access_flags;
 569	if (mr_rereg_mask & IB_MR_REREG_PD)
 570		new_pd = container_of(pd, struct ehca_pd, ib_pd);
 571
 572	ret = ehca_rereg_mr(shca, e_mr, new_start, new_size, new_acl,
 573			    new_pd, &pginfo, &tmp_lkey, &tmp_rkey);
 574	if (ret)
 575		goto rereg_phys_mr_exit1;
 576
 577	/* successful reregistration */
 578	if (mr_rereg_mask & IB_MR_REREG_PD)
 579		mr->pd = pd;
 580	mr->lkey = tmp_lkey;
 581	mr->rkey = tmp_rkey;
 582
 583rereg_phys_mr_exit1:
 584	spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
 585rereg_phys_mr_exit0:
 586	if (ret)
 587		ehca_err(mr->device, "ret=%i mr=%p mr_rereg_mask=%x pd=%p "
 588			 "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x "
 589			 "iova_start=%p",
 590			 ret, mr, mr_rereg_mask, pd, phys_buf_array,
 591			 num_phys_buf, mr_access_flags, iova_start);
 592	return ret;
 593} /* end ehca_rereg_phys_mr() */
 594
 595/*----------------------------------------------------------------------*/
 596
 597int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
 598{
 599	int ret = 0;
 600	u64 h_ret;
 601	struct ehca_shca *shca =
 602		container_of(mr->device, struct ehca_shca, ib_device);
 603	struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
 604	unsigned long sl_flags;
 605	struct ehca_mr_hipzout_parms hipzout;
 606
 607	if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
 608		ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
 609			 "e_mr->flags=%x", mr, e_mr, e_mr->flags);
 610		ret = -EINVAL;
 611		goto query_mr_exit0;
 612	}
 613
 614	memset(mr_attr, 0, sizeof(struct ib_mr_attr));
 615	spin_lock_irqsave(&e_mr->mrlock, sl_flags);
 616
 617	h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout);
 618	if (h_ret != H_SUCCESS) {
 619		ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lli mr=%p "
 620			 "hca_hndl=%llx mr_hndl=%llx lkey=%x",
 621			 h_ret, mr, shca->ipz_hca_handle.handle,
 622			 e_mr->ipz_mr_handle.handle, mr->lkey);
 623		ret = ehca2ib_return_code(h_ret);
 624		goto query_mr_exit1;
 625	}
 626	mr_attr->pd = mr->pd;
 627	mr_attr->device_virt_addr = hipzout.vaddr;
 628	mr_attr->size = hipzout.len;
 629	mr_attr->lkey = hipzout.lkey;
 630	mr_attr->rkey = hipzout.rkey;
 631	ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags);
 632
 633query_mr_exit1:
 634	spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
 635query_mr_exit0:
 636	if (ret)
 637		ehca_err(mr->device, "ret=%i mr=%p mr_attr=%p",
 638			 ret, mr, mr_attr);
 639	return ret;
 640} /* end ehca_query_mr() */
 641
 642/*----------------------------------------------------------------------*/
 643
 644int ehca_dereg_mr(struct ib_mr *mr)
 645{
 646	int ret = 0;
 647	u64 h_ret;
 648	struct ehca_shca *shca =
 649		container_of(mr->device, struct ehca_shca, ib_device);
 650	struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
 651
 652	if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
 653		ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
 654			 "e_mr->flags=%x", mr, e_mr, e_mr->flags);
 655		ret = -EINVAL;
 656		goto dereg_mr_exit0;
 657	} else if (e_mr == shca->maxmr) {
 658		/* should be impossible, however reject to be sure */
 659		ehca_err(mr->device, "dereg internal max-MR impossible, mr=%p "
 660			 "shca->maxmr=%p mr->lkey=%x",
 661			 mr, shca->maxmr, mr->lkey);
 662		ret = -EINVAL;
 663		goto dereg_mr_exit0;
 664	}
 665
 666	/* TODO: BUSY: MR still has bound window(s) */
 667	h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
 668	if (h_ret != H_SUCCESS) {
 669		ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lli shca=%p "
 670			 "e_mr=%p hca_hndl=%llx mr_hndl=%llx mr->lkey=%x",
 671			 h_ret, shca, e_mr, shca->ipz_hca_handle.handle,
 672			 e_mr->ipz_mr_handle.handle, mr->lkey);
 673		ret = ehca2ib_return_code(h_ret);
 674		goto dereg_mr_exit0;
 675	}
 676
 677	if (e_mr->umem)
 678		ib_umem_release(e_mr->umem);
 679
 680	/* successful deregistration */
 681	ehca_mr_delete(e_mr);
 682
 683dereg_mr_exit0:
 684	if (ret)
 685		ehca_err(mr->device, "ret=%i mr=%p", ret, mr);
 686	return ret;
 687} /* end ehca_dereg_mr() */
 688
 689/*----------------------------------------------------------------------*/
 690
 691struct ib_mw *ehca_alloc_mw(struct ib_pd *pd)
 692{
 693	struct ib_mw *ib_mw;
 694	u64 h_ret;
 695	struct ehca_mw *e_mw;
 696	struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
 697	struct ehca_shca *shca =
 698		container_of(pd->device, struct ehca_shca, ib_device);
 699	struct ehca_mw_hipzout_parms hipzout;
 700
 701	e_mw = ehca_mw_new();
 702	if (!e_mw) {
 703		ib_mw = ERR_PTR(-ENOMEM);
 704		goto alloc_mw_exit0;
 705	}
 706
 707	h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw,
 708					 e_pd->fw_pd, &hipzout);
 709	if (h_ret != H_SUCCESS) {
 710		ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lli "
 711			 "shca=%p hca_hndl=%llx mw=%p",
 712			 h_ret, shca, shca->ipz_hca_handle.handle, e_mw);
 713		ib_mw = ERR_PTR(ehca2ib_return_code(h_ret));
 714		goto alloc_mw_exit1;
 715	}
 716	/* successful MW allocation */
 717	e_mw->ipz_mw_handle = hipzout.handle;
 718	e_mw->ib_mw.rkey    = hipzout.rkey;
 719	return &e_mw->ib_mw;
 720
 721alloc_mw_exit1:
 722	ehca_mw_delete(e_mw);
 723alloc_mw_exit0:
 724	if (IS_ERR(ib_mw))
 725		ehca_err(pd->device, "h_ret=%li pd=%p", PTR_ERR(ib_mw), pd);
 726	return ib_mw;
 727} /* end ehca_alloc_mw() */
 728
 729/*----------------------------------------------------------------------*/
 730
 731int ehca_bind_mw(struct ib_qp *qp,
 732		 struct ib_mw *mw,
 733		 struct ib_mw_bind *mw_bind)
 734{
 735	/* TODO: not supported up to now */
 736	ehca_gen_err("bind MW currently not supported by HCAD");
 737
 738	return -EPERM;
 739} /* end ehca_bind_mw() */
 740
 741/*----------------------------------------------------------------------*/
 742
 743int ehca_dealloc_mw(struct ib_mw *mw)
 744{
 745	u64 h_ret;
 746	struct ehca_shca *shca =
 747		container_of(mw->device, struct ehca_shca, ib_device);
 748	struct ehca_mw *e_mw = container_of(mw, struct ehca_mw, ib_mw);
 749
 750	h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw);
 751	if (h_ret != H_SUCCESS) {
 752		ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lli shca=%p "
 753			 "mw=%p rkey=%x hca_hndl=%llx mw_hndl=%llx",
 754			 h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle,
 755			 e_mw->ipz_mw_handle.handle);
 756		return ehca2ib_return_code(h_ret);
 757	}
 758	/* successful deallocation */
 759	ehca_mw_delete(e_mw);
 760	return 0;
 761} /* end ehca_dealloc_mw() */
 762
 763/*----------------------------------------------------------------------*/
 764
 765struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
 766			      int mr_access_flags,
 767			      struct ib_fmr_attr *fmr_attr)
 768{
 769	struct ib_fmr *ib_fmr;
 770	struct ehca_shca *shca =
 771		container_of(pd->device, struct ehca_shca, ib_device);
 772	struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
 773	struct ehca_mr *e_fmr;
 774	int ret;
 775	u32 tmp_lkey, tmp_rkey;
 776	struct ehca_mr_pginfo pginfo;
 777	u64 hw_pgsize;
 778
 779	/* check other parameters */
 780	if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
 781	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
 782	    ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
 783	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
 784		/*
 785		 * Remote Write Access requires Local Write Access
 786		 * Remote Atomic Access requires Local Write Access
 787		 */
 788		ehca_err(pd->device, "bad input values: mr_access_flags=%x",
 789			 mr_access_flags);
 790		ib_fmr = ERR_PTR(-EINVAL);
 791		goto alloc_fmr_exit0;
 792	}
 793	if (mr_access_flags & IB_ACCESS_MW_BIND) {
 794		ehca_err(pd->device, "bad input values: mr_access_flags=%x",
 795			 mr_access_flags);
 796		ib_fmr = ERR_PTR(-EINVAL);
 797		goto alloc_fmr_exit0;
 798	}
 799	if ((fmr_attr->max_pages == 0) || (fmr_attr->max_maps == 0)) {
 800		ehca_err(pd->device, "bad input values: fmr_attr->max_pages=%x "
 801			 "fmr_attr->max_maps=%x fmr_attr->page_shift=%x",
 802			 fmr_attr->max_pages, fmr_attr->max_maps,
 803			 fmr_attr->page_shift);
 804		ib_fmr = ERR_PTR(-EINVAL);
 805		goto alloc_fmr_exit0;
 806	}
 807
 808	hw_pgsize = 1 << fmr_attr->page_shift;
 809	if (!(hw_pgsize & shca->hca_cap_mr_pgsize)) {
 810		ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x",
 811			 fmr_attr->page_shift);
 812		ib_fmr = ERR_PTR(-EINVAL);
 813		goto alloc_fmr_exit0;
 814	}
 815
 816	e_fmr = ehca_mr_new();
 817	if (!e_fmr) {
 818		ib_fmr = ERR_PTR(-ENOMEM);
 819		goto alloc_fmr_exit0;
 820	}
 821	e_fmr->flags |= EHCA_MR_FLAG_FMR;
 822
 823	/* register MR on HCA */
 824	memset(&pginfo, 0, sizeof(pginfo));
 825	pginfo.hwpage_size = hw_pgsize;
 826	/*
 827	 * pginfo.num_hwpages==0, ie register_rpages() will not be called
 828	 * but deferred to map_phys_fmr()
 829	 */
 830	ret = ehca_reg_mr(shca, e_fmr, NULL,
 831			  fmr_attr->max_pages * (1 << fmr_attr->page_shift),
 832			  mr_access_flags, e_pd, &pginfo,
 833			  &tmp_lkey, &tmp_rkey, EHCA_REG_MR);
 834	if (ret) {
 835		ib_fmr = ERR_PTR(ret);
 836		goto alloc_fmr_exit1;
 837	}
 838
 839	/* successful */
 840	e_fmr->hwpage_size = hw_pgsize;
 841	e_fmr->fmr_page_size = 1 << fmr_attr->page_shift;
 842	e_fmr->fmr_max_pages = fmr_attr->max_pages;
 843	e_fmr->fmr_max_maps = fmr_attr->max_maps;
 844	e_fmr->fmr_map_cnt = 0;
 845	return &e_fmr->ib.ib_fmr;
 846
 847alloc_fmr_exit1:
 848	ehca_mr_delete(e_fmr);
 849alloc_fmr_exit0:
 850	return ib_fmr;
 851} /* end ehca_alloc_fmr() */
 852
 853/*----------------------------------------------------------------------*/
 854
 855int ehca_map_phys_fmr(struct ib_fmr *fmr,
 856		      u64 *page_list,
 857		      int list_len,
 858		      u64 iova)
 859{
 860	int ret;
 861	struct ehca_shca *shca =
 862		container_of(fmr->device, struct ehca_shca, ib_device);
 863	struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
 864	struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd);
 865	struct ehca_mr_pginfo pginfo;
 866	u32 tmp_lkey, tmp_rkey;
 867
 868	if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
 869		ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
 870			 e_fmr, e_fmr->flags);
 871		ret = -EINVAL;
 872		goto map_phys_fmr_exit0;
 873	}
 874	ret = ehca_fmr_check_page_list(e_fmr, page_list, list_len);
 875	if (ret)
 876		goto map_phys_fmr_exit0;
 877	if (iova % e_fmr->fmr_page_size) {
 878		/* only whole-numbered pages */
 879		ehca_err(fmr->device, "bad iova, iova=%llx fmr_page_size=%x",
 880			 iova, e_fmr->fmr_page_size);
 881		ret = -EINVAL;
 882		goto map_phys_fmr_exit0;
 883	}
 884	if (e_fmr->fmr_map_cnt >= e_fmr->fmr_max_maps) {
 885		/* HCAD does not limit the maps, however trace this anyway */
 886		ehca_info(fmr->device, "map limit exceeded, fmr=%p "
 887			  "e_fmr->fmr_map_cnt=%x e_fmr->fmr_max_maps=%x",
 888			  fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps);
 889	}
 890
 891	memset(&pginfo, 0, sizeof(pginfo));
 892	pginfo.type = EHCA_MR_PGI_FMR;
 893	pginfo.num_kpages = list_len;
 894	pginfo.hwpage_size = e_fmr->hwpage_size;
 895	pginfo.num_hwpages =
 896		list_len * e_fmr->fmr_page_size / pginfo.hwpage_size;
 897	pginfo.u.fmr.page_list = page_list;
 898	pginfo.next_hwpage =
 899		(iova & (e_fmr->fmr_page_size-1)) / pginfo.hwpage_size;
 900	pginfo.u.fmr.fmr_pgsize = e_fmr->fmr_page_size;
 901
 902	ret = ehca_rereg_mr(shca, e_fmr, (u64 *)iova,
 903			    list_len * e_fmr->fmr_page_size,
 904			    e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey);
 905	if (ret)
 906		goto map_phys_fmr_exit0;
 907
 908	/* successful reregistration */
 909	e_fmr->fmr_map_cnt++;
 910	e_fmr->ib.ib_fmr.lkey = tmp_lkey;
 911	e_fmr->ib.ib_fmr.rkey = tmp_rkey;
 912	return 0;
 913
 914map_phys_fmr_exit0:
 915	if (ret)
 916		ehca_err(fmr->device, "ret=%i fmr=%p page_list=%p list_len=%x "
 917			 "iova=%llx", ret, fmr, page_list, list_len, iova);
 918	return ret;
 919} /* end ehca_map_phys_fmr() */
 920
 921/*----------------------------------------------------------------------*/
 922
 923int ehca_unmap_fmr(struct list_head *fmr_list)
 924{
 925	int ret = 0;
 926	struct ib_fmr *ib_fmr;
 927	struct ehca_shca *shca = NULL;
 928	struct ehca_shca *prev_shca;
 929	struct ehca_mr *e_fmr;
 930	u32 num_fmr = 0;
 931	u32 unmap_fmr_cnt = 0;
 932
 933	/* check all FMR belong to same SHCA, and check internal flag */
 934	list_for_each_entry(ib_fmr, fmr_list, list) {
 935		prev_shca = shca;
 936		shca = container_of(ib_fmr->device, struct ehca_shca,
 937				    ib_device);
 938		e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
 939		if ((shca != prev_shca) && prev_shca) {
 940			ehca_err(&shca->ib_device, "SHCA mismatch, shca=%p "
 941				 "prev_shca=%p e_fmr=%p",
 942				 shca, prev_shca, e_fmr);
 943			ret = -EINVAL;
 944			goto unmap_fmr_exit0;
 945		}
 946		if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
 947			ehca_err(&shca->ib_device, "not a FMR, e_fmr=%p "
 948				 "e_fmr->flags=%x", e_fmr, e_fmr->flags);
 949			ret = -EINVAL;
 950			goto unmap_fmr_exit0;
 951		}
 952		num_fmr++;
 953	}
 954
 955	/* loop over all FMRs to unmap */
 956	list_for_each_entry(ib_fmr, fmr_list, list) {
 957		unmap_fmr_cnt++;
 958		e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
 959		shca = container_of(ib_fmr->device, struct ehca_shca,
 960				    ib_device);
 961		ret = ehca_unmap_one_fmr(shca, e_fmr);
 962		if (ret) {
 963			/* unmap failed, stop unmapping of rest of FMRs */
 964			ehca_err(&shca->ib_device, "unmap of one FMR failed, "
 965				 "stop rest, e_fmr=%p num_fmr=%x "
 966				 "unmap_fmr_cnt=%x lkey=%x", e_fmr, num_fmr,
 967				 unmap_fmr_cnt, e_fmr->ib.ib_fmr.lkey);
 968			goto unmap_fmr_exit0;
 969		}
 970	}
 971
 972unmap_fmr_exit0:
 973	if (ret)
 974		ehca_gen_err("ret=%i fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x",
 975			     ret, fmr_list, num_fmr, unmap_fmr_cnt);
 976	return ret;
 977} /* end ehca_unmap_fmr() */
 978
 979/*----------------------------------------------------------------------*/
 980
 981int ehca_dealloc_fmr(struct ib_fmr *fmr)
 982{
 983	int ret;
 984	u64 h_ret;
 985	struct ehca_shca *shca =
 986		container_of(fmr->device, struct ehca_shca, ib_device);
 987	struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
 988
 989	if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
 990		ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
 991			 e_fmr, e_fmr->flags);
 992		ret = -EINVAL;
 993		goto free_fmr_exit0;
 994	}
 995
 996	h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
 997	if (h_ret != H_SUCCESS) {
 998		ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lli e_fmr=%p "
 999			 "hca_hndl=%llx fmr_hndl=%llx fmr->lkey=%x",
1000			 h_ret, e_fmr, shca->ipz_hca_handle.handle,
1001			 e_fmr->ipz_mr_handle.handle, fmr->lkey);
1002		ret = ehca2ib_return_code(h_ret);
1003		goto free_fmr_exit0;
1004	}
1005	/* successful deregistration */
1006	ehca_mr_delete(e_fmr);
1007	return 0;
1008
1009free_fmr_exit0:
1010	if (ret)
1011		ehca_err(&shca->ib_device, "ret=%i fmr=%p", ret, fmr);
1012	return ret;
1013} /* end ehca_dealloc_fmr() */
1014
1015/*----------------------------------------------------------------------*/
1016
1017static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca,
1018				   struct ehca_mr *e_mr,
1019				   struct ehca_mr_pginfo *pginfo);
1020
1021int ehca_reg_mr(struct ehca_shca *shca,
1022		struct ehca_mr *e_mr,
1023		u64 *iova_start,
1024		u64 size,
1025		int acl,
1026		struct ehca_pd *e_pd,
1027		struct ehca_mr_pginfo *pginfo,
1028		u32 *lkey, /*OUT*/
1029		u32 *rkey, /*OUT*/
1030		enum ehca_reg_type reg_type)
1031{
1032	int ret;
1033	u64 h_ret;
1034	u32 hipz_acl;
1035	struct ehca_mr_hipzout_parms hipzout;
1036
1037	ehca_mrmw_map_acl(acl, &hipz_acl);
1038	ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl);
1039	if (ehca_use_hp_mr == 1)
1040		hipz_acl |= 0x00000001;
1041
1042	h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr,
1043					 (u64)iova_start, size, hipz_acl,
1044					 e_pd->fw_pd, &hipzout);
1045	if (h_ret != H_SUCCESS) {
1046		ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lli "
1047			 "hca_hndl=%llx", h_ret, shca->ipz_hca_handle.handle);
1048		ret = ehca2ib_return_code(h_ret);
1049		goto ehca_reg_mr_exit0;
1050	}
1051
1052	e_mr->ipz_mr_handle = hipzout.handle;
1053
1054	if (reg_type == EHCA_REG_BUSMAP_MR)
1055		ret = ehca_reg_bmap_mr_rpages(shca, e_mr, pginfo);
1056	else if (reg_type == EHCA_REG_MR)
1057		ret = ehca_reg_mr_rpages(shca, e_mr, pginfo);
1058	else
1059		ret = -EINVAL;
1060
1061	if (ret)
1062		goto ehca_reg_mr_exit1;
1063
1064	/* successful registration */
1065	e_mr->num_kpages = pginfo->num_kpages;
1066	e_mr->num_hwpages = pginfo->num_hwpages;
1067	e_mr->hwpage_size = pginfo->hwpage_size;
1068	e_mr->start = iova_start;
1069	e_mr->size = size;
1070	e_mr->acl = acl;
1071	*lkey = hipzout.lkey;
1072	*rkey = hipzout.rkey;
1073	return 0;
1074
1075ehca_reg_mr_exit1:
1076	h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
1077	if (h_ret != H_SUCCESS) {
1078		ehca_err(&shca->ib_device, "h_ret=%lli shca=%p e_mr=%p "
1079			 "iova_start=%p size=%llx acl=%x e_pd=%p lkey=%x "
1080			 "pginfo=%p num_kpages=%llx num_hwpages=%llx ret=%i",
1081			 h_ret, shca, e_mr, iova_start, size, acl, e_pd,
1082			 hipzout.lkey, pginfo, pginfo->num_kpages,
1083			 pginfo->num_hwpages, ret);
1084		ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, "
1085			 "not recoverable");
1086	}
1087ehca_reg_mr_exit0:
1088	if (ret)
1089		ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
1090			 "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p "
1091			 "num_kpages=%llx num_hwpages=%llx",
1092			 ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo,
1093			 pginfo->num_kpages, pginfo->num_hwpages);
1094	return ret;
1095} /* end ehca_reg_mr() */
1096
1097/*----------------------------------------------------------------------*/
1098
1099int ehca_reg_mr_rpages(struct ehca_shca *shca,
1100		       struct ehca_mr *e_mr,
1101		       struct ehca_mr_pginfo *pginfo)
1102{
1103	int ret = 0;
1104	u64 h_ret;
1105	u32 rnum;
1106	u64 rpage;
1107	u32 i;
1108	u64 *kpage;
1109
1110	if (!pginfo->num_hwpages) /* in case of fmr */
1111		return 0;
1112
1113	kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
1114	if (!kpage) {
1115		ehca_err(&shca->ib_device, "kpage alloc failed");
1116		ret = -ENOMEM;
1117		goto ehca_reg_mr_rpages_exit0;
1118	}
1119
1120	/* max MAX_RPAGES ehca mr pages per register call */
1121	for (i = 0; i < NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES); i++) {
1122
1123		if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) {
1124			rnum = pginfo->num_hwpages % MAX_RPAGES; /* last shot */
1125			if (rnum == 0)
1126				rnum = MAX_RPAGES;      /* last shot is full */
1127		} else
1128			rnum = MAX_RPAGES;
1129
1130		ret = ehca_set_pagebuf(pginfo, rnum, kpage);
1131		if (ret) {
1132			ehca_err(&shca->ib_device, "ehca_set_pagebuf "
1133				 "bad rc, ret=%i rnum=%x kpage=%p",
1134				 ret, rnum, kpage);
1135			goto ehca_reg_mr_rpages_exit1;
1136		}
1137
1138		if (rnum > 1) {
1139			rpage = virt_to_abs(kpage);
1140			if (!rpage) {
1141				ehca_err(&shca->ib_device, "kpage=%p i=%x",
1142					 kpage, i);
1143				ret = -EFAULT;
1144				goto ehca_reg_mr_rpages_exit1;
1145			}
1146		} else
1147			rpage = *kpage;
1148
1149		h_ret = hipz_h_register_rpage_mr(
1150			shca->ipz_hca_handle, e_mr,
1151			ehca_encode_hwpage_size(pginfo->hwpage_size),
1152			0, rpage, rnum);
1153
1154		if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) {
1155			/*
1156			 * check for 'registration complete'==H_SUCCESS
1157			 * and for 'page registered'==H_PAGE_REGISTERED
1158			 */
1159			if (h_ret != H_SUCCESS) {
1160				ehca_err(&shca->ib_device, "last "
1161					 "hipz_reg_rpage_mr failed, h_ret=%lli "
1162					 "e_mr=%p i=%x hca_hndl=%llx mr_hndl=%llx"
1163					 " lkey=%x", h_ret, e_mr, i,
1164					 shca->ipz_hca_handle.handle,
1165					 e_mr->ipz_mr_handle.handle,
1166					 e_mr->ib.ib_mr.lkey);
1167				ret = ehca2ib_return_code(h_ret);
1168				break;
1169			} else
1170				ret = 0;
1171		} else if (h_ret != H_PAGE_REGISTERED) {
1172			ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, "
1173				 "h_ret=%lli e_mr=%p i=%x lkey=%x hca_hndl=%llx "
1174				 "mr_hndl=%llx", h_ret, e_mr, i,
1175				 e_mr->ib.ib_mr.lkey,
1176				 shca->ipz_hca_handle.handle,
1177				 e_mr->ipz_mr_handle.handle);
1178			ret = ehca2ib_return_code(h_ret);
1179			break;
1180		} else
1181			ret = 0;
1182	} /* end for(i) */
1183
1184
1185ehca_reg_mr_rpages_exit1:
1186	ehca_free_fw_ctrlblock(kpage);
1187ehca_reg_mr_rpages_exit0:
1188	if (ret)
1189		ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p pginfo=%p "
1190			 "num_kpages=%llx num_hwpages=%llx", ret, shca, e_mr,
1191			 pginfo, pginfo->num_kpages, pginfo->num_hwpages);
1192	return ret;
1193} /* end ehca_reg_mr_rpages() */
1194
1195/*----------------------------------------------------------------------*/
1196
1197inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
1198				struct ehca_mr *e_mr,
1199				u64 *iova_start,
1200				u64 size,
1201				u32 acl,
1202				struct ehca_pd *e_pd,
1203				struct ehca_mr_pginfo *pginfo,
1204				u32 *lkey, /*OUT*/
1205				u32 *rkey) /*OUT*/
1206{
1207	int ret;
1208	u64 h_ret;
1209	u32 hipz_acl;
1210	u64 *kpage;
1211	u64 rpage;
1212	struct ehca_mr_pginfo pginfo_save;
1213	struct ehca_mr_hipzout_parms hipzout;
1214
1215	ehca_mrmw_map_acl(acl, &hipz_acl);
1216	ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl);
1217
1218	kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
1219	if (!kpage) {
1220		ehca_err(&shca->ib_device, "kpage alloc failed");
1221		ret = -ENOMEM;
1222		goto ehca_rereg_mr_rereg1_exit0;
1223	}
1224
1225	pginfo_save = *pginfo;
1226	ret = ehca_set_pagebuf(pginfo, pginfo->num_hwpages, kpage);
1227	if (ret) {
1228		ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p "
1229			 "pginfo=%p type=%x num_kpages=%llx num_hwpages=%llx "
1230			 "kpage=%p", e_mr, pginfo, pginfo->type,
1231			 pginfo->num_kpages, pginfo->num_hwpages, kpage);
1232		goto ehca_rereg_mr_rereg1_exit1;
1233	}
1234	rpage = virt_to_abs(kpage);
1235	if (!rpage) {
1236		ehca_err(&shca->ib_device, "kpage=%p", kpage);
1237		ret = -EFAULT;
1238		goto ehca_rereg_mr_rereg1_exit1;
1239	}
1240	h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_mr,
1241				      (u64)iova_start, size, hipz_acl,
1242				      e_pd->fw_pd, rpage, &hipzout);
1243	if (h_ret != H_SUCCESS) {
1244		/*
1245		 * reregistration unsuccessful, try it again with the 3 hCalls,
1246		 * e.g. this is required in case H_MR_CONDITION
1247		 * (MW bound or MR is shared)
1248		 */
1249		ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed "
1250			  "(Rereg1), h_ret=%lli e_mr=%p", h_ret, e_mr);
1251		*pginfo = pginfo_save;
1252		ret = -EAGAIN;
1253	} else if ((u64 *)hipzout.vaddr != iova_start) {
1254		ehca_err(&shca->ib_device, "PHYP changed iova_start in "
1255			 "rereg_pmr, iova_start=%p iova_start_out=%llx e_mr=%p "
1256			 "mr_handle=%llx lkey=%x lkey_out=%x", iova_start,
1257			 hipzout.vaddr, e_mr, e_mr->ipz_mr_handle.handle,
1258			 e_mr->ib.ib_mr.lkey, hipzout.lkey);
1259		ret = -EFAULT;
1260	} else {
1261		/*
1262		 * successful reregistration
1263		 * note: start and start_out are identical for eServer HCAs
1264		 */
1265		e_mr->num_kpages = pginfo->num_kpages;
1266		e_mr->num_hwpages = pginfo->num_hwpages;
1267		e_mr->hwpage_size = pginfo->hwpage_size;
1268		e_mr->start = iova_start;
1269		e_mr->size = size;
1270		e_mr->acl = acl;
1271		*lkey = hipzout.lkey;
1272		*rkey = hipzout.rkey;
1273	}
1274
1275ehca_rereg_mr_rereg1_exit1:
1276	ehca_free_fw_ctrlblock(kpage);
1277ehca_rereg_mr_rereg1_exit0:
1278	if ( ret && (ret != -EAGAIN) )
1279		ehca_err(&shca->ib_device, "ret=%i lkey=%x rkey=%x "
1280			 "pginfo=%p num_kpages=%llx num_hwpages=%llx",
1281			 ret, *lkey, *rkey, pginfo, pginfo->num_kpages,
1282			 pginfo->num_hwpages);
1283	return ret;
1284} /* end ehca_rereg_mr_rereg1() */
1285
1286/*----------------------------------------------------------------------*/
1287
1288int ehca_rereg_mr(struct ehca_shca *shca,
1289		  struct ehca_mr *e_mr,
1290		  u64 *iova_start,
1291		  u64 size,
1292		  int acl,
1293		  struct ehca_pd *e_pd,
1294		  struct ehca_mr_pginfo *pginfo,
1295		  u32 *lkey,
1296		  u32 *rkey)
1297{
1298	int ret = 0;
1299	u64 h_ret;
1300	int rereg_1_hcall = 1; /* 1: use hipz_h_reregister_pmr directly */
1301	int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */
1302
1303	/* first determine reregistration hCall(s) */
1304	if ((pginfo->num_hwpages > MAX_RPAGES) ||
1305	    (e_mr->num_hwpages > MAX_RPAGES) ||
1306	    (pginfo->num_hwpages > e_mr->num_hwpages)) {
1307		ehca_dbg(&shca->ib_device, "Rereg3 case, "
1308			 "pginfo->num_hwpages=%llx e_mr->num_hwpages=%x",
1309			 pginfo->num_hwpages, e_mr->num_hwpages);
1310		rereg_1_hcall = 0;
1311		rereg_3_hcall = 1;
1312	}
1313
1314	if (e_mr->flags & EHCA_MR_FLAG_MAXMR) {	/* check for max-MR */
1315		rereg_1_hcall = 0;
1316		rereg_3_hcall = 1;
1317		e_mr->flags &= ~EHCA_MR_FLAG_MAXMR;
1318		ehca_err(&shca->ib_device, "Rereg MR for max-MR! e_mr=%p",
1319			 e_mr);
1320	}
1321
1322	if (rereg_1_hcall) {
1323		ret = ehca_rereg_mr_rereg1(shca, e_mr, iova_start, size,
1324					   acl, e_pd, pginfo, lkey, rkey);
1325		if (ret) {
1326			if (ret == -EAGAIN)
1327				rereg_3_hcall = 1;
1328			else
1329				goto ehca_rereg_mr_exit0;
1330		}
1331	}
1332
1333	if (rereg_3_hcall) {
1334		struct ehca_mr save_mr;
1335
1336		/* first deregister old MR */
1337		h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
1338		if (h_ret != H_SUCCESS) {
1339			ehca_err(&shca->ib_device, "hipz_free_mr failed, "
1340				 "h_ret=%lli e_mr=%p hca_hndl=%llx mr_hndl=%llx "
1341				 "mr->lkey=%x",
1342				 h_ret, e_mr, shca->ipz_hca_handle.handle,
1343				 e_mr->ipz_mr_handle.handle,
1344				 e_mr->ib.ib_mr.lkey);
1345			ret = ehca2ib_return_code(h_ret);
1346			goto ehca_rereg_mr_exit0;
1347		}
1348		/* clean ehca_mr_t, without changing struct ib_mr and lock */
1349		save_mr = *e_mr;
1350		ehca_mr_deletenew(e_mr);
1351
1352		/* set some MR values */
1353		e_mr->flags = save_mr.flags;
1354		e_mr->hwpage_size = save_mr.hwpage_size;
1355		e_mr->fmr_page_size = save_mr.fmr_page_size;
1356		e_mr->fmr_max_pages = save_mr.fmr_max_pages;
1357		e_mr->fmr_max_maps = save_mr.fmr_max_maps;
1358		e_mr->fmr_map_cnt = save_mr.fmr_map_cnt;
1359
1360		ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl,
1361				  e_pd, pginfo, lkey, rkey, EHCA_REG_MR);
1362		if (ret) {
1363			u32 offset = (u64)(&e_mr->flags) - (u64)e_mr;
1364			memcpy(&e_mr->flags, &(save_mr.flags),
1365			       sizeof(struct ehca_mr) - offset);
1366			goto ehca_rereg_mr_exit0;
1367		}
1368	}
1369
1370ehca_rereg_mr_exit0:
1371	if (ret)
1372		ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
1373			 "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p "
1374			 "num_kpages=%llx lkey=%x rkey=%x rereg_1_hcall=%x "
1375			 "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size,
1376			 acl, e_pd, pginfo, pginfo->num_kpages, *lkey, *rkey,
1377			 rereg_1_hcall, rereg_3_hcall);
1378	return ret;
1379} /* end ehca_rereg_mr() */
1380
1381/*----------------------------------------------------------------------*/
1382
1383int ehca_unmap_one_fmr(struct ehca_shca *shca,
1384		       struct ehca_mr *e_fmr)
1385{
1386	int ret = 0;
1387	u64 h_ret;
1388	struct ehca_pd *e_pd =
1389		container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd);
1390	struct ehca_mr save_fmr;
1391	u32 tmp_lkey, tmp_rkey;
1392	struct ehca_mr_pginfo pginfo;
1393	struct ehca_mr_hipzout_parms hipzout;
1394	struct ehca_mr save_mr;
1395
1396	if (e_fmr->fmr_max_pages <= MAX_RPAGES) {
1397		/*
1398		 * note: after using rereg hcall with len=0,
1399		 * rereg hcall must be used again for registering pages
1400		 */
1401		h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0,
1402					      0, 0, e_pd->fw_pd, 0, &hipzout);
1403		if (h_ret == H_SUCCESS) {
1404			/* successful reregistration */
1405			e_fmr->start = NULL;
1406			e_fmr->size = 0;
1407			tmp_lkey = hipzout.lkey;
1408			tmp_rkey = hipzout.rkey;
1409			return 0;
1410		}
1411		/*
1412		 * should not happen, because length checked above,
1413		 * FMRs are not shared and no MW bound to FMRs
1414		 */
1415		ehca_err(&shca->ib_device, "hipz_reregister_pmr failed "
1416			 "(Rereg1), h_ret=%lli e_fmr=%p hca_hndl=%llx "
1417			 "mr_hndl=%llx lkey=%x lkey_out=%x",
1418			 h_ret, e_fmr, shca->ipz_hca_handle.handle,
1419			 e_fmr->ipz_mr_handle.handle,
1420			 e_fmr->ib.ib_fmr.lkey, hipzout.lkey);
1421		/* try free and rereg */
1422	}
1423
1424	/* first free old FMR */
1425	h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
1426	if (h_ret != H_SUCCESS) {
1427		ehca_err(&shca->ib_device, "hipz_free_mr failed, "
1428			 "h_ret=%lli e_fmr=%p hca_hndl=%llx mr_hndl=%llx "
1429			 "lkey=%x",
1430			 h_ret, e_fmr, shca->ipz_hca_handle.handle,
1431			 e_fmr->ipz_mr_handle.handle,
1432			 e_fmr->ib.ib_fmr.lkey);
1433		ret = ehca2ib_return_code(h_ret);
1434		goto ehca_unmap_one_fmr_exit0;
1435	}
1436	/* clean ehca_mr_t, without changing lock */
1437	save_fmr = *e_fmr;
1438	ehca_mr_deletenew(e_fmr);
1439
1440	/* set some MR values */
1441	e_fmr->flags = save_fmr.flags;
1442	e_fmr->hwpage_size = save_fmr.hwpage_size;
1443	e_fmr->fmr_page_size = save_fmr.fmr_page_size;
1444	e_fmr->fmr_max_pages = save_fmr.fmr_max_pages;
1445	e_fmr->fmr_max_maps = save_fmr.fmr_max_maps;
1446	e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt;
1447	e_fmr->acl = save_fmr.acl;
1448
1449	memset(&pginfo, 0, sizeof(pginfo));
1450	pginfo.type = EHCA_MR_PGI_FMR;
1451	ret = ehca_reg_mr(shca, e_fmr, NULL,
1452			  (e_fmr->fmr_max_pages * e_fmr->fmr_page_size),
1453			  e_fmr->acl, e_pd, &pginfo, &tmp_lkey,
1454			  &tmp_rkey, EHCA_REG_MR);
1455	if (ret) {
1456		u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr;
1457		memcpy(&e_fmr->flags, &(save_mr.flags),
1458		       sizeof(struct ehca_mr) - offset);
1459	}
1460
1461ehca_unmap_one_fmr_exit0:
1462	if (ret)
1463		ehca_err(&shca->ib_device, "ret=%i tmp_lkey=%x tmp_rkey=%x "
1464			 "fmr_max_pages=%x",
1465			 ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages);
1466	return ret;
1467} /* end ehca_unmap_one_fmr() */
1468
1469/*----------------------------------------------------------------------*/
1470
1471int ehca_reg_smr(struct ehca_shca *shca,
1472		 struct ehca_mr *e_origmr,
1473		 struct ehca_mr *e_newmr,
1474		 u64 *iova_start,
1475		 int acl,
1476		 struct ehca_pd *e_pd,
1477		 u32 *lkey, /*OUT*/
1478		 u32 *rkey) /*OUT*/
1479{
1480	int ret = 0;
1481	u64 h_ret;
1482	u32 hipz_acl;
1483	struct ehca_mr_hipzout_parms hipzout;
1484
1485	ehca_mrmw_map_acl(acl, &hipz_acl);
1486	ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl);
1487
1488	h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
1489				    (u64)iova_start, hipz_acl, e_pd->fw_pd,
1490				    &hipzout);
1491	if (h_ret != H_SUCCESS) {
1492		ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli "
1493			 "shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x "
1494			 "e_pd=%p hca_hndl=%llx mr_hndl=%llx lkey=%x",
1495			 h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd,
1496			 shca->ipz_hca_handle.handle,
1497			 e_origmr->ipz_mr_handle.handle,
1498			 e_origmr->ib.ib_mr.lkey);
1499		ret = ehca2ib_return_code(h_ret);
1500		goto ehca_reg_smr_exit0;
1501	}
1502	/* successful registration */
1503	e_newmr->num_kpages = e_origmr->num_kpages;
1504	e_newmr->num_hwpages = e_origmr->num_hwpages;
1505	e_newmr->hwpage_size   = e_origmr->hwpage_size;
1506	e_newmr->start = iova_start;
1507	e_newmr->size = e_origmr->size;
1508	e_newmr->acl = acl;
1509	e_newmr->ipz_mr_handle = hipzout.handle;
1510	*lkey = hipzout.lkey;
1511	*rkey = hipzout.rkey;
1512	return 0;
1513
1514ehca_reg_smr_exit0:
1515	if (ret)
1516		ehca_err(&shca->ib_device, "ret=%i shca=%p e_origmr=%p "
1517			 "e_newmr=%p iova_start=%p acl=%x e_pd=%p",
1518			 ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd);
1519	return ret;
1520} /* end ehca_reg_smr() */
1521
1522/*----------------------------------------------------------------------*/
1523static inline void *ehca_calc_sectbase(int top, int dir, int idx)
1524{
1525	unsigned long ret = idx;
1526	ret |= dir << EHCA_DIR_INDEX_SHIFT;
1527	ret |= top << EHCA_TOP_INDEX_SHIFT;
1528	return abs_to_virt(ret << SECTION_SIZE_BITS);
1529}
1530
1531#define ehca_bmap_valid(entry) \
1532	((u64)entry != (u64)EHCA_INVAL_ADDR)
1533
1534static u64 ehca_reg_mr_section(int top, int dir, int idx, u64 *kpage,
1535			       struct ehca_shca *shca, struct ehca_mr *mr,
1536			       struct ehca_mr_pginfo *pginfo)
1537{
1538	u64 h_ret = 0;
1539	unsigned long page = 0;
1540	u64 rpage = virt_to_abs(kpage);
1541	int page_count;
1542
1543	void *sectbase = ehca_calc_sectbase(top, dir, idx);
1544	if ((unsigned long)sectbase & (pginfo->hwpage_size - 1)) {
1545		ehca_err(&shca->ib_device, "reg_mr_section will probably fail:"
1546					   "hwpage_size does not fit to "
1547					   "section start address");
1548	}
1549	page_count = EHCA_SECTSIZE / pginfo->hwpage_size;
1550
1551	while (page < page_count) {
1552		u64 rnum;
1553		for (rnum = 0; (rnum < MAX_RPAGES) && (page < page_count);
1554		     rnum++) {
1555			void *pg = sectbase + ((page++) * pginfo->hwpage_size);
1556			kpage[rnum] = virt_to_abs(pg);
1557		}
1558
1559		h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, mr,
1560			ehca_encode_hwpage_size(pginfo->hwpage_size),
1561			0, rpage, rnum);
1562
1563		if ((h_ret != H_SUCCESS) && (h_ret != H_PAGE_REGISTERED)) {
1564			ehca_err(&shca->ib_device, "register_rpage_mr failed");
1565			return h_ret;
1566		}
1567	}
1568	return h_ret;
1569}
1570
1571static u64 ehca_reg_mr_sections(int top, int dir, u64 *kpage,
1572				struct ehca_shca *shca, struct ehca_mr *mr,
1573				struct ehca_mr_pginfo *pginfo)
1574{
1575	u64 hret = H_SUCCESS;
1576	int idx;
1577
1578	for (idx = 0; idx < EHCA_MAP_ENTRIES; idx++) {
1579		if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]->ent[idx]))
1580			continue;
1581
1582		hret = ehca_reg_mr_section(top, dir, idx, kpage, shca, mr,
1583					   pginfo);
1584		if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED))
1585				return hret;
1586	}
1587	return hret;
1588}
1589
1590static u64 ehca_reg_mr_dir_sections(int top, u64 *kpage, struct ehca_shca *shca,
1591				    struct ehca_mr *mr,
1592				    struct ehca_mr_pginfo *pginfo)
1593{
1594	u64 hret = H_SUCCESS;
1595	int dir;
1596
1597	for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) {
1598		if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
1599			continue;
1600
1601		hret = ehca_reg_mr_sections(top, dir, kpage, shca, mr, pginfo);
1602		if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED))
1603				return hret;
1604	}
1605	return hret;
1606}
1607
1608/* register internal max-MR to internal SHCA */
1609int ehca_reg_internal_maxmr(
1610	struct ehca_shca *shca,
1611	struct ehca_pd *e_pd,
1612	struct ehca_mr **e_maxmr)  /*OUT*/
1613{
1614	int ret;
1615	struct ehca_mr *e_mr;
1616	u64 *iova_start;
1617	u64 size_maxmr;
1618	struct ehca_mr_pginfo pginfo;
1619	struct ib_phys_buf ib_pbuf;
1620	u32 num_kpages;
1621	u32 num_hwpages;
1622	u64 hw_pgsize;
1623
1624	if (!ehca_bmap) {
1625		ret = -EFAULT;
1626		goto ehca_reg_internal_maxmr_exit0;
1627	}
1628
1629	e_mr = ehca_mr_new();
1630	if (!e_mr) {
1631		ehca_err(&shca->ib_device, "out of memory");
1632		ret = -ENOMEM;
1633		goto ehca_reg_internal_maxmr_exit0;
1634	}
1635	e_mr->flags |= EHCA_MR_FLAG_MAXMR;
1636
1637	/* register internal max-MR on HCA */
1638	size_maxmr = ehca_mr_len;
1639	iova_start = (u64 *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START));
1640	ib_pbuf.addr = 0;
1641	ib_pbuf.size = size_maxmr;
1642	num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr,
1643				PAGE_SIZE);
1644	hw_pgsize = ehca_get_max_hwpage_size(shca);
1645	num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size_maxmr,
1646				 hw_pgsize);
1647
1648	memset(&pginfo, 0, sizeof(pginfo));
1649	pginfo.type = EHCA_MR_PGI_PHYS;
1650	pginfo.num_kpages = num_kpages;
1651	pginfo.num_hwpages = num_hwpages;
1652	pginfo.hwpage_size = hw_pgsize;
1653	pginfo.u.phy.num_phys_buf = 1;
1654	pginfo.u.phy.phys_buf_array = &ib_pbuf;
1655
1656	ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd,
1657			  &pginfo, &e_mr->ib.ib_mr.lkey,
1658			  &e_mr->ib.ib_mr.rkey, EHCA_REG_BUSMAP_MR);
1659	if (ret) {
1660		ehca_err(&shca->ib_device, "reg of internal max MR failed, "
1661			 "e_mr=%p iova_start=%p size_maxmr=%llx num_kpages=%x "
1662			 "num_hwpages=%x", e_mr, iova_start, size_maxmr,
1663			 num_kpages, num_hwpages);
1664		goto ehca_reg_internal_maxmr_exit1;
1665	}
1666
1667	/* successful registration of all pages */
1668	e_mr->ib.ib_mr.device = e_pd->ib_pd.device;
1669	e_mr->ib.ib_mr.pd = &e_pd->ib_pd;
1670	e_mr->ib.ib_mr.uobject = NULL;
1671	atomic_inc(&(e_pd->ib_pd.usecnt));
1672	atomic_set(&(e_mr->ib.ib_mr.usecnt), 0);
1673	*e_maxmr = e_mr;
1674	return 0;
1675
1676ehca_reg_internal_maxmr_exit1:
1677	ehca_mr_delete(e_mr);
1678ehca_reg_internal_maxmr_exit0:
1679	if (ret)
1680		ehca_err(&shca->ib_device, "ret=%i shca=%p e_pd=%p e_maxmr=%p",
1681			 ret, shca, e_pd, e_maxmr);
1682	return ret;
1683} /* end ehca_reg_internal_maxmr() */
1684
1685/*----------------------------------------------------------------------*/
1686
1687int ehca_reg_maxmr(struct ehca_shca *shca,
1688		   struct ehca_mr *e_newmr,
1689		   u64 *iova_start,
1690		   int acl,
1691		   struct ehca_pd *e_pd,
1692		   u32 *lkey,
1693		   u32 *rkey)
1694{
1695	u64 h_ret;
1696	struct ehca_mr *e_origmr = shca->maxmr;
1697	u32 hipz_acl;
1698	struct ehca_mr_hipzout_parms hipzout;
1699
1700	ehca_mrmw_map_acl(acl, &hipz_acl);
1701	ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl);
1702
1703	h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
1704				    (u64)iova_start, hipz_acl, e_pd->fw_pd,
1705				    &hipzout);
1706	if (h_ret != H_SUCCESS) {
1707		ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli "
1708			 "e_origmr=%p hca_hndl=%llx mr_hndl=%llx lkey=%x",
1709			 h_ret, e_origmr, shca->ipz_hca_handle.handle,
1710			 e_origmr->ipz_mr_handle.handle,
1711			 e_origmr->ib.ib_mr.lkey);
1712		return ehca2ib_return_code(h_ret);
1713	}
1714	/* successful registration */
1715	e_newmr->num_kpages = e_origmr->num_kpages;
1716	e_newmr->num_hwpages = e_origmr->num_hwpages;
1717	e_newmr->hwpage_size = e_origmr->hwpage_size;
1718	e_newmr->start = iova_start;
1719	e_newmr->size = e_origmr->size;
1720	e_newmr->acl = acl;
1721	e_newmr->ipz_mr_handle = hipzout.handle;
1722	*lkey = hipzout.lkey;
1723	*rkey = hipzout.rkey;
1724	return 0;
1725} /* end ehca_reg_maxmr() */
1726
1727/*----------------------------------------------------------------------*/
1728
1729int ehca_dereg_internal_maxmr(struct ehca_shca *shca)
1730{
1731	int ret;
1732	struct ehca_mr *e_maxmr;
1733	struct ib_pd *ib_pd;
1734
1735	if (!shca->maxmr) {
1736		ehca_err(&shca->ib_device, "bad call, shca=%p", shca);
1737		ret = -EINVAL;
1738		goto ehca_dereg_internal_maxmr_exit0;
1739	}
1740
1741	e_maxmr = shca->maxmr;
1742	ib_pd = e_maxmr->ib.ib_mr.pd;
1743	shca->maxmr = NULL; /* remove internal max-MR indication from SHCA */
1744
1745	ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr);
1746	if (ret) {
1747		ehca_err(&shca->ib_device, "dereg internal max-MR failed, "
1748			 "ret=%i e_maxmr=%p shca=%p lkey=%x",
1749			 ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey);
1750		shca->maxmr = e_maxmr;
1751		goto ehca_dereg_internal_maxmr_exit0;
1752	}
1753
1754	atomic_dec(&ib_pd->usecnt);
1755
1756ehca_dereg_internal_maxmr_exit0:
1757	if (ret)
1758		ehca_err(&shca->ib_device, "ret=%i shca=%p shca->maxmr=%p",
1759			 ret, shca, shca->maxmr);
1760	return ret;
1761} /* end ehca_dereg_internal_maxmr() */
1762
1763/*----------------------------------------------------------------------*/
1764
1765/*
1766 * check physical buffer array of MR verbs for validness and
1767 * calculates MR size
1768 */
1769int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
1770				  int num_phys_buf,
1771				  u64 *iova_start,
1772				  u64 *size)
1773{
1774	struct ib_phys_buf *pbuf = phys_buf_array;
1775	u64 size_count = 0;
1776	u32 i;
1777
1778	if (num_phys_buf == 0) {
1779		ehca_gen_err("bad phys buf array len, num_phys_buf=0");
1780		return -EINVAL;
1781	}
1782	/* check first buffer */
1783	if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) {
1784		ehca_gen_err("iova_start/addr mismatch, iova_start=%p "
1785			     "pbuf->addr=%llx pbuf->size=%llx",
1786			     iova_start, pbuf->addr, pbuf->size);
1787		return -EINVAL;
1788	}
1789	if (((pbuf->addr + pbuf->size) % PAGE_SIZE) &&
1790	    (num_phys_buf > 1)) {
1791		ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%llx "
1792			     "pbuf->size=%llx", pbuf->addr, pbuf->size);
1793		return -EINVAL;
1794	}
1795
1796	for (i = 0; i < num_phys_buf; i++) {
1797		if ((i > 0) && (pbuf->addr % PAGE_SIZE)) {
1798			ehca_gen_err("bad address, i=%x pbuf->addr=%llx "
1799				     "pbuf->size=%llx",
1800				     i, pbuf->addr, pbuf->size);
1801			return -EINVAL;
1802		}
1803		if (((i > 0) &&	/* not 1st */
1804		     (i < (num_phys_buf - 1)) &&	/* not last */
1805		     (pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) {
1806			ehca_gen_err("bad size, i=%x pbuf->size=%llx",
1807				     i, pbuf->size);
1808			return -EINVAL;
1809		}
1810		size_count += pbuf->size;
1811		pbuf++;
1812	}
1813
1814	*size = size_count;
1815	return 0;
1816} /* end ehca_mr_chk_buf_and_calc_size() */
1817
1818/*----------------------------------------------------------------------*/
1819
1820/* check page list of map FMR verb for validness */
1821int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
1822			     u64 *page_list,
1823			     int list_len)
1824{
1825	u32 i;
1826	u64 *page;
1827
1828	if ((list_len == 0) || (list_len > e_fmr->fmr_max_pages)) {
1829		ehca_gen_err("bad list_len, list_len=%x "
1830			     "e_fmr->fmr_max_pages=%x fmr=%p",
1831			     list_len, e_fmr->fmr_max_pages, e_fmr);
1832		return -EINVAL;
1833	}
1834
1835	/* each page must be aligned */
1836	page = page_list;
1837	for (i = 0; i < list_len; i++) {
1838		if (*page % e_fmr->fmr_page_size) {
1839			ehca_gen_err("bad page, i=%x *page=%llx page=%p fmr=%p "
1840				     "fmr_page_size=%x", i, *page, page, e_fmr,
1841				     e_fmr->fmr_page_size);
1842			return -EINVAL;
1843		}
1844		page++;
1845	}
1846
1847	return 0;
1848} /* end ehca_fmr_check_page_list() */
1849
1850/*----------------------------------------------------------------------*/
1851
1852/* PAGE_SIZE >= pginfo->hwpage_size */
1853static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
1854				  u32 number,
1855				  u64 *kpage)
1856{
1857	int ret = 0;
1858	struct ib_umem_chunk *prev_chunk;
1859	struct ib_umem_chunk *chunk;
1860	u64 pgaddr;
1861	u32 i = 0;
1862	u32 j = 0;
1863	int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size;
1864
1865	/* loop over desired chunk entries */
1866	chunk      = pginfo->u.usr.next_chunk;
1867	prev_chunk = pginfo->u.usr.next_chunk;
1868	list_for_each_entry_continue(
1869		chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
1870		for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
1871			pgaddr = page_to_pfn(sg_page(&chunk->page_list[i]))
1872				<< PAGE_SHIFT ;
1873			*kpage = phys_to_abs(pgaddr +
1874					     (pginfo->next_hwpage *
1875					      pginfo->hwpage_size));
1876			if ( !(*kpage) ) {
1877				ehca_gen_err("pgaddr=%llx "
1878					     "chunk->page_list[i]=%llx "
1879					     "i=%x next_hwpage=%llx",
1880					     pgaddr, (u64)sg_dma_address(
1881						     &chunk->page_list[i]),
1882					     i, pginfo->next_hwpage);
1883				return -EFAULT;
1884			}
1885			(pginfo->hwpage_cnt)++;
1886			(pginfo->next_hwpage)++;
1887			kpage++;
1888			if (pginfo->next_hwpage % hwpages_per_kpage == 0) {
1889				(pginfo->kpage_cnt)++;
1890				(pginfo->u.usr.next_nmap)++;
1891				pginfo->next_hwpage = 0;
1892				i++;
1893			}
1894			j++;
1895			if (j >= number) break;
1896		}
1897		if ((pginfo->u.usr.next_nmap >= chunk->nmap) &&
1898		    (j >= number)) {
1899			pginfo->u.usr.next_nmap = 0;
1900			prev_chunk = chunk;
1901			break;
1902		} else if (pginfo->u.usr.next_nmap >= chunk->nmap) {
1903			pginfo->u.usr.next_nmap = 0;
1904			prev_chunk = chunk;
1905		} else if (j >= number)
1906			break;
1907		else
1908			prev_chunk = chunk;
1909	}
1910	pginfo->u.usr.next_chunk =
1911		list_prepare_entry(prev_chunk,
1912				   (&(pginfo->u.usr.region->chunk_list)),
1913				   list);
1914	return ret;
1915}
1916
1917/*
1918 * check given pages for contiguous layout
1919 * last page addr is returned in prev_pgaddr for further check
1920 */
1921static int ehca_check_kpages_per_ate(struct scatterlist *page_list,
1922				     int start_idx, int end_idx,
1923				     u64 *prev_pgaddr)
1924{
1925	int t;
1926	for (t = start_idx; t <= end_idx; t++) {
1927		u64 pgaddr = page_to_pfn(sg_page(&page_list[t])) << PAGE_SHIFT;
1928		if (ehca_debug_level >= 3)
1929			ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr,
1930				     *(u64 *)abs_to_virt(phys_to_abs(pgaddr)));
1931		if (pgaddr - PAGE_SIZE != *prev_pgaddr) {
1932			ehca_gen_err("uncontiguous page found pgaddr=%llx "
1933				     "prev_pgaddr=%llx page_list_i=%x",
1934				     pgaddr, *prev_pgaddr, t);
1935			return -EINVAL;
1936		}
1937		*prev_pgaddr = pgaddr;
1938	}
1939	return 0;
1940}
1941
1942/* PAGE_SIZE < pginfo->hwpage_size */
1943static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo,
1944				  u32 number,
1945				  u64 *kpage)
1946{
1947	int ret = 0;
1948	struct ib_umem_chunk *prev_chunk;
1949	struct ib_umem_chunk *chunk;
1950	u64 pgaddr, prev_pgaddr;
1951	u32 i = 0;
1952	u32 j = 0;
1953	int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE;
1954	int nr_kpages = kpages_per_hwpage;
1955
1956	/* loop over desired chunk entries */
1957	chunk      = pginfo->u.usr.next_chunk;
1958	prev_chunk = pginfo->u.usr.next_chunk;
1959	list_for_each_entry_continue(
1960		chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
1961		for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
1962			if (nr_kpages == kpages_per_hwpage) {
1963				pgaddr = ( page_to_pfn(sg_page(&chunk->page_list[i]))
1964					   << PAGE_SHIFT );
1965				*kpage = phys_to_abs(pgaddr);
1966				if ( !(*kpage) ) {
1967					ehca_gen_err("pgaddr=%llx i=%x",
1968						     pgaddr, i);
1969					ret = -EFAULT;
1970					return ret;
1971				}
1972				/*
1973				 * The first page in a hwpage must be aligned;
1974				 * the first MR page is exempt from this rule.
1975				 */
1976				if (pgaddr & (pginfo->hwpage_size - 1)) {
1977					if (pginfo->hwpage_cnt) {
1978						ehca_gen_err(
1979							"invalid alignment "
1980							"pgaddr=%llx i=%x "
1981							"mr_pgsize=%llx",
1982							pgaddr, i,
1983							pginfo->hwpage_size);
1984						ret = -EFAULT;
1985						return ret;
1986					}
1987					/* first MR page */
1988					pginfo->kpage_cnt =
1989						(pgaddr &
1990						 (pginfo->hwpage_size - 1)) >>
1991						PAGE_SHIFT;
1992					nr_kpages -= pginfo->kpage_cnt;
1993					*kpage = phys_to_abs(
1994						pgaddr &
1995						~(pginfo->hwpage_size - 1));
1996				}
1997				if (ehca_debug_level >= 3) {
1998					u64 val = *(u64 *)abs_to_virt(
1999						phys_to_abs(pgaddr));
2000					ehca_gen_dbg("kpage=%llx chunk_page=%llx "
2001						     "value=%016llx",
2002						     *kpage, pgaddr, val);
2003				}
2004				prev_pgaddr = pgaddr;
2005				i++;
2006				pginfo->kpage_cnt++;
2007				pginfo->u.usr.next_nmap++;
2008				nr_kpages--;
2009				if (!nr_kpages)
2010					goto next_kpage;
2011				continue;
2012			}
2013			if (i + nr_kpages > chunk->nmap) {
2014				ret = ehca_check_kpages_per_ate(
2015					chunk->page_list, i,
2016					chunk->nmap - 1, &prev_pgaddr);
2017				if (ret) return ret;
2018				pginfo->kpage_cnt += chunk->nmap - i;
2019				pginfo->u.usr.next_nmap += chunk->nmap - i;
2020				nr_kpages -= chunk->nmap - i;
2021				break;
2022			}
2023
2024			ret = ehca_check_kpages_per_ate(chunk->page_list, i,
2025							i + nr_kpages - 1,
2026							&prev_pgaddr);
2027			if (ret) return ret;
2028			i += nr_kpages;
2029			pginfo->kpage_cnt += nr_kpages;
2030			pginfo->u.usr.next_nmap += nr_kpages;
2031next_kpage:
2032			nr_kpages = kpages_per_hwpage;
2033			(pginfo->hwpage_cnt)++;
2034			kpage++;
2035			j++;
2036			if (j >= number) break;
2037		}
2038		if ((pginfo->u.usr.next_nmap >= chunk->nmap) &&
2039		    (j >= number)) {
2040			pginfo->u.usr.next_nmap = 0;
2041			prev_chunk = chunk;
2042			break;
2043		} else if (pginfo->u.usr.next_nmap >= chunk->nmap) {
2044			pginfo->u.usr.next_nmap = 0;
2045			prev_chunk = chunk;
2046		} else if (j >= number)
2047			break;
2048		else
2049			prev_chunk = chunk;
2050	}
2051	pginfo->u.usr.next_chunk =
2052		list_prepare_entry(prev_chunk,
2053				   (&(pginfo->u.usr.region->chunk_list)),
2054				   list);
2055	return ret;
2056}
2057
2058static int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo,
2059				 u32 number, u64 *kpage)
2060{
2061	int ret = 0;
2062	struct ib_phys_buf *pbuf;
2063	u64 num_hw, offs_hw;
2064	u32 i = 0;
2065
2066	/* loop over desired phys_buf_array entries */
2067	while (i < number) {
2068		pbuf   = pginfo->u.phy.phys_buf_array + pginfo->u.phy.next_buf;
2069		num_hw  = NUM_CHUNKS((pbuf->addr % pginfo->hwpage_size) +
2070				     pbuf->size, pginfo->hwpage_size);
2071		offs_hw = (pbuf->addr & ~(pginfo->hwpage_size - 1)) /
2072			pginfo->hwpage_size;
2073		while (pginfo->next_hwpage < offs_hw + num_hw) {
2074			/* sanity check */
2075			if ((pginfo->kpage_cnt >= pginfo->num_kpages) ||
2076			    (pginfo->hwpage_cnt >= pginfo->num_hwpages)) {
2077				ehca_gen_err("kpage_cnt >= num_kpages, "
2078					     "kpage_cnt=%llx num_kpages=%llx "
2079					     "hwpage_cnt=%llx "
2080					     "num_hwpages=%llx i=%x",
2081					     pginfo->kpage_cnt,
2082					     pginfo->num_kpages,
2083					     pginfo->hwpage_cnt,
2084					     pginfo->num_hwpages, i);
2085				return -EFAULT;
2086			}
2087			*kpage = phys_to_abs(
2088				(pbuf->addr & ~(pginfo->hwpage_size - 1)) +
2089				(pginfo->next_hwpage * pginfo->hwpage_size));
2090			if ( !(*kpage) && pbuf->addr ) {
2091				ehca_gen_err("pbuf->addr=%llx pbuf->size=%llx "
2092					     "next_hwpage=%llx", pbuf->addr,
2093					     pbuf->size, pginfo->next_hwpage);
2094				return -EFAULT;
2095			}
2096			(pginfo->hwpage_cnt)++;
2097			(pginfo->next_hwpage)++;
2098			if (PAGE_SIZE >= pginfo->hwpage_size) {
2099				if (pginfo->next_hwpage %
2100				    (PAGE_SIZE / pginfo->hwpage_size) == 0)
2101					(pginfo->kpage_cnt)++;
2102			} else
2103				pginfo->kpage_cnt += pginfo->hwpage_size /
2104					PAGE_SIZE;
2105			kpage++;
2106			i++;
2107			if (i >= number) break;
2108		}
2109		if (pginfo->next_hwpage >= offs_hw + num_hw) {
2110			(pginfo->u.phy.next_buf)++;
2111			pginfo->next_hwpage = 0;
2112		}
2113	}
2114	return ret;
2115}
2116
2117static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo,
2118				u32 number, u64 *kpage)
2119{
2120	int ret = 0;
2121	u64 *fmrlist;
2122	u32 i;
2123
2124	/* loop over desired page_list entries */
2125	fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem;
2126	for (i = 0; i < number; i++) {
2127		*kpage = phys_to_abs((*fmrlist & ~(pginfo->hwpage_size - 1)) +
2128				     pginfo->next_hwpage * pginfo->hwpage_size);
2129		if ( !(*kpage) ) {
2130			ehca_gen_err("*fmrlist=%llx fmrlist=%p "
2131				     "next_listelem=%llx next_hwpage=%llx",
2132				     *fmrlist, fmrlist,
2133				     pginfo->u.fmr.next_listelem,
2134				     pginfo->next_hwpage);
2135			return -EFAULT;
2136		}
2137		(pginfo->hwpage_cnt)++;
2138		if (pginfo->u.fmr.fmr_pgsize >= pginfo->hwpage_size) {
2139			if (pginfo->next_hwpage %
2140			    (pginfo->u.fmr.fmr_pgsize /
2141			     pginfo->hwpage_size) == 0) {
2142				(pginfo->kpage_cnt)++;
2143				(pginfo->u.fmr.next_listelem)++;
2144				fmrlist++;
2145				pginfo->next_hwpage = 0;
2146			} else
2147				(pginfo->next_hwpage)++;
2148		} else {
2149			unsigned int cnt_per_hwpage = pginfo->hwpage_size /
2150				pginfo->u.fmr.fmr_pgsize;
2151			unsigned int j;
2152			u64 prev = *kpage;
2153			/* check if adrs are contiguous */
2154			for (j = 1; j < cnt_per_hwpage; j++) {
2155				u64 p = phys_to_abs(fmrlist[j] &
2156						    ~(pginfo->hwpage_size - 1));
2157				if (prev + pginfo->u.fmr.fmr_pgsize != p) {
2158					ehca_gen_err("uncontiguous fmr pages "
2159						     "found prev=%llx p=%llx "
2160						     "idx=%x", prev, p, i + j);
2161					return -EINVAL;
2162				}
2163				prev = p;
2164			}
2165			pginfo->kpage_cnt += cnt_per_hwpage;
2166			pginfo->u.fmr.next_listelem += cnt_per_hwpage;
2167			fmrlist += cnt_per_hwpage;
2168		}
2169		kpage++;
2170	}
2171	return ret;
2172}
2173
2174/* setup page buffer from page info */
2175int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo,
2176		     u32 number,
2177		     u64 *kpage)
2178{
2179	int ret;
2180
2181	switch (pginfo->type) {
2182	case EHCA_MR_PGI_PHYS:
2183		ret = ehca_set_pagebuf_phys(pginfo, number, kpage);
2184		break;
2185	case EHCA_MR_PGI_USER:
2186		ret = PAGE_SIZE >= pginfo->hwpage_size ?
2187			ehca_set_pagebuf_user1(pginfo, number, kpage) :
2188			ehca_set_pagebuf_user2(pginfo, number, kpage);
2189		break;
2190	case EHCA_MR_PGI_FMR:
2191		ret = ehca_set_pagebuf_fmr(pginfo, number, kpage);
2192		break;
2193	default:
2194		ehca_gen_err("bad pginfo->type=%x", pginfo->type);
2195		ret = -EFAULT;
2196		break;
2197	}
2198	return ret;
2199} /* end ehca_set_pagebuf() */
2200
2201/*----------------------------------------------------------------------*/
2202
2203/*
2204 * check MR if it is a max-MR, i.e. uses whole memory
2205 * in case it's a max-MR 1 is returned, else 0
2206 */
2207int ehca_mr_is_maxmr(u64 size,
2208		     u64 *iova_start)
2209{
2210	/* a MR is treated as max-MR only if it fits following: */
2211	if ((size == ehca_mr_len) &&
2212	    (iova_start == (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)))) {
2213		ehca_gen_dbg("this is a max-MR");
2214		return 1;
2215	} else
2216		return 0;
2217} /* end ehca_mr_is_maxmr() */
2218
2219/*----------------------------------------------------------------------*/
2220
2221/* map access control for MR/MW. This routine is used for MR and MW. */
2222void ehca_mrmw_map_acl(int ib_acl,
2223		       u32 *hipz_acl)
2224{
2225	*hipz_acl = 0;
2226	if (ib_acl & IB_ACCESS_REMOTE_READ)
2227		*hipz_acl |= HIPZ_ACCESSCTRL_R_READ;
2228	if (ib_acl & IB_ACCESS_REMOTE_WRITE)
2229		*hipz_acl |= HIPZ_ACCESSCTRL_R_WRITE;
2230	if (ib_acl & IB_ACCESS_REMOTE_ATOMIC)
2231		*hipz_acl |= HIPZ_ACCESSCTRL_R_ATOMIC;
2232	if (ib_acl & IB_ACCESS_LOCAL_WRITE)
2233		*hipz_acl |= HIPZ_ACCESSCTRL_L_WRITE;
2234	if (ib_acl & IB_ACCESS_MW_BIND)
2235		*hipz_acl |= HIPZ_ACCESSCTRL_MW_BIND;
2236} /* end ehca_mrmw_map_acl() */
2237
2238/*----------------------------------------------------------------------*/
2239
2240/* sets page size in hipz access control for MR/MW. */
2241void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl) /*INOUT*/
2242{
2243	*hipz_acl |= (ehca_encode_hwpage_size(pgsize) << 24);
2244} /* end ehca_mrmw_set_pgsize_hipz_acl() */
2245
2246/*----------------------------------------------------------------------*/
2247
2248/*
2249 * reverse map access control for MR/MW.
2250 * This routine is used for MR and MW.
2251 */
2252void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
2253			       int *ib_acl) /*OUT*/
2254{
2255	*ib_acl = 0;
2256	if (*hipz_acl & HIPZ_ACCESSCTRL_R_READ)
2257		*ib_acl |= IB_ACCESS_REMOTE_READ;
2258	if (*hipz_acl & HIPZ_ACCESSCTRL_R_WRITE)
2259		*ib_acl |= IB_ACCESS_REMOTE_WRITE;
2260	if (*hipz_acl & HIPZ_ACCESSCTRL_R_ATOMIC)
2261		*ib_acl |= IB_ACCESS_REMOTE_ATOMIC;
2262	if (*hipz_acl & HIPZ_ACCESSCTRL_L_WRITE)
2263		*ib_acl |= IB_ACCESS_LOCAL_WRITE;
2264	if (*hipz_acl & HIPZ_ACCESSCTRL_MW_BIND)
2265		*ib_acl |= IB_ACCESS_MW_BIND;
2266} /* end ehca_mrmw_reverse_map_acl() */
2267
2268
2269/*----------------------------------------------------------------------*/
2270
2271/*
2272 * MR destructor and constructor
2273 * used in Reregister MR verb, sets all fields in ehca_mr_t to 0,
2274 * except struct ib_mr and spinlock
2275 */
2276void ehca_mr_deletenew(struct ehca_mr *mr)
2277{
2278	mr->flags = 0;
2279	mr->num_kpages = 0;
2280	mr->num_hwpages = 0;
2281	mr->acl = 0;
2282	mr->start = NULL;
2283	mr->fmr_page_size = 0;
2284	mr->fmr_max_pages = 0;
2285	mr->fmr_max_maps = 0;
2286	mr->fmr_map_cnt = 0;
2287	memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle));
2288	memset(&mr->galpas, 0, sizeof(mr->galpas));
2289} /* end ehca_mr_deletenew() */
2290
2291int ehca_init_mrmw_cache(void)
2292{
2293	mr_cache = kmem_cache_create("ehca_cache_mr",
2294				     sizeof(struct ehca_mr), 0,
2295				     SLAB_HWCACHE_ALIGN,
2296				     NULL);
2297	if (!mr_cache)
2298		return -ENOMEM;
2299	mw_cache = kmem_cache_create("ehca_cache_mw",
2300				     sizeof(struct ehca_mw), 0,
2301				     SLAB_HWCACHE_ALIGN,
2302				     NULL);
2303	if (!mw_cache) {
2304		kmem_cache_destroy(mr_cache);
2305		mr_cache = NULL;
2306		return -ENOMEM;
2307	}
2308	return 0;
2309}
2310
2311void ehca_cleanup_mrmw_cache(void)
2312{
2313	if (mr_cache)
2314		kmem_cache_destroy(mr_cache);
2315	if (mw_cache)
2316		kmem_cache_destroy(mw_cache);
2317}
2318
2319static inline int ehca_init_top_bmap(struct ehca_top_bmap *ehca_top_bmap,
2320				     int dir)
2321{
2322	if (!ehca_bmap_valid(ehca_top_bmap->dir[dir])) {
2323		ehca_top_bmap->dir[dir] =
2324			kmalloc(sizeof(struct ehca_dir_bmap), GFP_KERNEL);
2325		if (!ehca_top_bmap->dir[dir])
2326			return -ENOMEM;
2327		/* Set map block to 0xFF according to EHCA_INVAL_ADDR */
2328		memset(ehca_top_bmap->dir[dir], 0xFF, EHCA_ENT_MAP_SIZE);
2329	}
2330	return 0;
2331}
2332
2333static inline int ehca_init_bmap(struct ehca_bmap *ehca_bmap, int top, int dir)
2334{
2335	if (!ehca_bmap_valid(ehca_bmap->top[top])) {
2336		ehca_bmap->top[top] =
2337			kmalloc(sizeof(struct ehca_top_bmap), GFP_KERNEL);
2338		if (!ehca_bmap->top[top])
2339			return -ENOMEM;
2340		/* Set map block to 0xFF according to EHCA_INVAL_ADDR */
2341		memset(ehca_bmap->top[top], 0xFF, EHCA_DIR_MAP_SIZE);
2342	}
2343	return ehca_init_top_bmap(ehca_bmap->top[top], dir);
2344}
2345
2346static inline int ehca_calc_index(unsigned long i, unsigned long s)
2347{
2348	return (i >> s) & EHCA_INDEX_MASK;
2349}
2350
2351void ehca_destroy_busmap(void)
2352{
2353	int top, dir;
2354
2355	if (!ehca_bmap)
2356		return;
2357
2358	for (top = 0; top < EHCA_MAP_ENTRIES; top++) {
2359		if (!ehca_bmap_valid(ehca_bmap->top[top]))
2360			continue;
2361		for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) {
2362			if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
2363				continue;
2364
2365			kfree(ehca_bmap->top[top]->dir[dir]);
2366		}
2367
2368		kfree(ehca_bmap->top[top]);
2369	}
2370
2371	kfree(ehca_bmap);
2372	ehca_bmap = NULL;
2373}
2374
2375static int ehca_update_busmap(unsigned long pfn, unsigned long nr_pages)
2376{
2377	unsigned long i, start_section, end_section;
2378	int top, dir, idx;
2379
2380	if (!nr_pages)
2381		return 0;
2382
2383	if (!ehca_bmap) {
2384		ehca_bmap = kmalloc(sizeof(struct ehca_bmap), GFP_KERNEL);
2385		if (!ehca_bmap)
2386			return -ENOMEM;
2387		/* Set map block to 0xFF according to EHCA_INVAL_ADDR */
2388		memset(ehca_bmap, 0xFF, EHCA_TOP_MAP_SIZE);
2389	}
2390
2391	start_section = phys_to_abs(pfn * PAGE_SIZE) / EHCA_SECTSIZE;
2392	end_section = phys_to_abs((pfn + nr_pages) * PAGE_SIZE) / EHCA_SECTSIZE;
2393	for (i = start_section; i < end_section; i++) {
2394		int ret;
2395		top = ehca_calc_index(i, EHCA_TOP_INDEX_SHIFT);
2396		dir = ehca_calc_index(i, EHCA_DIR_INDEX_SHIFT);
2397		idx = i & EHCA_INDEX_MASK;
2398
2399		ret = ehca_init_bmap(ehca_bmap, top, dir);
2400		if (ret) {
2401			ehca_destroy_busmap();
2402			return ret;
2403		}
2404		ehca_bmap->top[top]->dir[dir]->ent[idx] = ehca_mr_len;
2405		ehca_mr_len += EHCA_SECTSIZE;
2406	}
2407	return 0;
2408}
2409
2410static int ehca_is_hugepage(unsigned long pfn)
2411{
2412	int page_order;
2413
2414	if (pfn & EHCA_HUGEPAGE_PFN_MASK)
2415		return 0;
2416
2417	page_order = compound_order(pfn_to_page(pfn));
2418	if (page_order + PAGE_SHIFT != EHCA_HUGEPAGESHIFT)
2419		return 0;
2420
2421	return 1;
2422}
2423
2424static int ehca_create_busmap_callback(unsigned long initial_pfn,
2425				       unsigned long total_nr_pages, void *arg)
2426{
2427	int ret;
2428	unsigned long pfn, start_pfn, end_pfn, nr_pages;
2429
2430	if ((total_nr_pages * PAGE_SIZE) < EHCA_HUGEPAGE_SIZE)
2431		return ehca_update_busmap(initial_pfn, total_nr_pages);
2432
2433	/* Given chunk is >= 16GB -> check for hugepages */
2434	start_pfn = initial_pfn;
2435	end_pfn = initial_pfn + total_nr_pages;
2436	pfn = start_pfn;
2437
2438	while (pfn < end_pfn) {
2439		if (ehca_is_hugepage(pfn)) {
2440			/* Add mem found in front of the hugepage */
2441			nr_pages = pfn - start_pfn;
2442			ret = ehca_update_busmap(start_pfn, nr_pages);
2443			if (ret)
2444				return ret;
2445			/* Skip the hugepage */
2446			pfn += (EHCA_HUGEPAGE_SIZE / PAGE_SIZE);
2447			start_pfn = pfn;
2448		} else
2449			pfn += (EHCA_SECTSIZE / PAGE_SIZE);
2450	}
2451
2452	/* Add mem found behind the hugepage(s)  */
2453	nr_pages = pfn - start_pfn;
2454	return ehca_update_busmap(start_pfn, nr_pages);
2455}
2456
2457int ehca_create_busmap(void)
2458{
2459	int ret;
2460
2461	ehca_mr_len = 0;
2462	ret = walk_system_ram_range(0, 1ULL << MAX_PHYSMEM_BITS, NULL,
2463				   ehca_create_busmap_callback);
2464	return ret;
2465}
2466
2467static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca,
2468				   struct ehca_mr *e_mr,
2469				   struct ehca_mr_pginfo *pginfo)
2470{
2471	int top;
2472	u64 hret, *kpage;
2473
2474	kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
2475	if (!kpage) {
2476		ehca_err(&shca->ib_device, "kpage alloc failed");
2477		return -ENOMEM;
2478	}
2479	for (top = 0; top < EHCA_MAP_ENTRIES; top++) {
2480		if (!ehca_bmap_valid(ehca_bmap->top[top]))
2481			continue;
2482		hret = ehca_reg_mr_dir_sections(top, kpage, shca, e_mr, pginfo);
2483		if ((hret != H_PAGE_REGISTERED) && (hret != H_SUCCESS))
2484			break;
2485	}
2486
2487	ehca_free_fw_ctrlblock(kpage);
2488
2489	if (hret == H_SUCCESS)
2490		return 0; /* Everything is fine */
2491	else {
2492		ehca_err(&shca->ib_device, "ehca_reg_bmap_mr_rpages failed, "
2493				 "h_ret=%lli e_mr=%p top=%x lkey=%x "
2494				 "hca_hndl=%llx mr_hndl=%llx", hret, e_mr, top,
2495				 e_mr->ib.ib_mr.lkey,
2496				 shca->ipz_hca_handle.handle,
2497				 e_mr->ipz_mr_handle.handle);
2498		return ehca2ib_return_code(hret);
2499	}
2500}
2501
2502static u64 ehca_map_vaddr(void *caddr)
2503{
2504	int top, dir, idx;
2505	unsigned long abs_addr, offset;
2506	u64 entry;
2507
2508	if (!ehca_bmap)
2509		return EHCA_INVAL_ADDR;
2510
2511	abs_addr = virt_to_abs(caddr);
2512	top = ehca_calc_index(abs_addr, EHCA_TOP_INDEX_SHIFT + EHCA_SECTSHIFT);
2513	if (!ehca_bmap_valid(ehca_bmap->top[top]))
2514		return EHCA_INVAL_ADDR;
2515
2516	dir = ehca_calc_index(abs_addr, EHCA_DIR_INDEX_SHIFT + EHCA_SECTSHIFT);
2517	if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
2518		return EHCA_INVAL_ADDR;
2519
2520	idx = ehca_calc_index(abs_addr, EHCA_SECTSHIFT);
2521
2522	entry = ehca_bmap->top[top]->dir[dir]->ent[idx];
2523	if (ehca_bmap_valid(entry)) {
2524		offset = (unsigned long)caddr & (EHCA_SECTSIZE - 1);
2525		return entry | offset;
2526	} else
2527		return EHCA_INVAL_ADDR;
2528}
2529
2530static int ehca_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
2531{
2532	return dma_addr == EHCA_INVAL_ADDR;
2533}
2534
2535static u64 ehca_dma_map_single(struct ib_device *dev, void *cpu_addr,
2536			       size_t size, enum dma_data_direction direction)
2537{
2538	if (cpu_addr)
2539		return ehca_map_vaddr(cpu_addr);
2540	else
2541		return EHCA_INVAL_ADDR;
2542}
2543
2544static void ehca_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size,
2545				  enum dma_data_direction direction)
2546{
2547	/* This is only a stub; nothing to be done here */
2548}
2549
2550static u64 ehca_dma_map_page(struct ib_device *dev, struct page *page,
2551			     unsigned long offset, size_t size,
2552			     enum dma_data_direction direction)
2553{
2554	u64 addr;
2555
2556	if (offset + size > PAGE_SIZE)
2557		return EHCA_INVAL_ADDR;
2558
2559	addr = ehca_map_vaddr(page_address(page));
2560	if (!ehca_dma_mapping_error(dev, addr))
2561		addr += offset;
2562
2563	return addr;
2564}
2565
2566static void ehca_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size,
2567				enum dma_data_direction direction)
2568{
2569	/* This is only a stub; nothing to be done here */
2570}
2571
2572static int ehca_dma_map_sg(struct ib_device *dev, struct scatterlist *sgl,
2573			   int nents, enum dma_data_direction direction)
2574{
2575	struct scatterlist *sg;
2576	int i;
2577
2578	for_each_sg(sgl, sg, nents, i) {
2579		u64 addr;
2580		addr = ehca_map_vaddr(sg_virt(sg));
2581		if (ehca_dma_mapping_error(dev, addr))
2582			return 0;
2583
2584		sg->dma_address = addr;
2585		sg->dma_length = sg->length;
2586	}
2587	return nents;
2588}
2589
2590static void ehca_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg,
2591			      int nents, enum dma_data_direction direction)
2592{
2593	/* This is only a stub; nothing to be done here */
2594}
2595
2596static u64 ehca_dma_address(struct ib_device *dev, struct scatterlist *sg)
2597{
2598	return sg->dma_address;
2599}
2600
2601static unsigned int ehca_dma_len(struct ib_device *dev, struct scatterlist *sg)
2602{
2603	return sg->length;
2604}
2605
2606static void ehca_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr,
2607					 size_t size,
2608					 enum dma_data_direction dir)
2609{
2610	dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
2611}
2612
2613static void ehca_dma_sync_single_for_device(struct ib_device *dev, u64 addr,
2614					    size_t size,
2615					    enum dma_data_direction dir)
2616{
2617	dma_sync_single_for_device(dev->dma_device, addr, size, dir);
2618}
2619
2620static void *ehca_dma_alloc_coherent(struct ib_device *dev, size_t size,
2621				     u64 *dma_handle, gfp_t flag)
2622{
2623	struct page *p;
2624	void *addr = NULL;
2625	u64 dma_addr;
2626
2627	p = alloc_pages(flag, get_order(size));
2628	if (p) {
2629		addr = page_address(p);
2630		dma_addr = ehca_map_vaddr(addr);
2631		if (ehca_dma_mapping_error(dev, dma_addr)) {
2632			free_pages((unsigned long)addr,	get_order(size));
2633			return NULL;
2634		}
2635		if (dma_handle)
2636			*dma_handle = dma_addr;
2637		return addr;
2638	}
2639	return NULL;
2640}
2641
2642static void ehca_dma_free_coherent(struct ib_device *dev, size_t size,
2643				   void *cpu_addr, u64 dma_handle)
2644{
2645	if (cpu_addr && size)
2646		free_pages((unsigned long)cpu_addr, get_order(size));
2647}
2648
2649
2650struct ib_dma_mapping_ops ehca_dma_mapping_ops = {
2651	.mapping_error          = ehca_dma_mapping_error,
2652	.map_single             = ehca_dma_map_single,
2653	.unmap_single           = ehca_dma_unmap_single,
2654	.map_page               = ehca_dma_map_page,
2655	.unmap_page             = ehca_dma_unmap_page,
2656	.map_sg                 = ehca_dma_map_sg,
2657	.unmap_sg               = ehca_dma_unmap_sg,
2658	.dma_address            = ehca_dma_address,
2659	.dma_len                = ehca_dma_len,
2660	.sync_single_for_cpu    = ehca_dma_sync_single_for_cpu,
2661	.sync_single_for_device = ehca_dma_sync_single_for_device,
2662	.alloc_coherent         = ehca_dma_alloc_coherent,
2663	.free_coherent          = ehca_dma_free_coherent,
2664};