Linux Audio

Check our new training course

Loading...
v6.13.7
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright © 2006-2009, Intel Corporation.
   4 *
   5 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
   6 */
   7
   8#include <linux/iova.h>
   9#include <linux/kmemleak.h>
  10#include <linux/module.h>
  11#include <linux/slab.h>
  12#include <linux/smp.h>
  13#include <linux/bitops.h>
  14#include <linux/cpu.h>
  15#include <linux/workqueue.h>
  16
  17/* The anchor node sits above the top of the usable address space */
  18#define IOVA_ANCHOR	~0UL
  19
  20#define IOVA_RANGE_CACHE_MAX_SIZE 6	/* log of max cached IOVA range size (in pages) */
  21
  22static bool iova_rcache_insert(struct iova_domain *iovad,
  23			       unsigned long pfn,
  24			       unsigned long size);
  25static unsigned long iova_rcache_get(struct iova_domain *iovad,
  26				     unsigned long size,
  27				     unsigned long limit_pfn);
  28static void free_iova_rcaches(struct iova_domain *iovad);
  29static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  30static void free_global_cached_iovas(struct iova_domain *iovad);
  31
  32static struct iova *to_iova(struct rb_node *node)
  33{
  34	return rb_entry(node, struct iova, node);
  35}
  36
  37void
  38init_iova_domain(struct iova_domain *iovad, unsigned long granule,
  39	unsigned long start_pfn)
  40{
  41	/*
  42	 * IOVA granularity will normally be equal to the smallest
  43	 * supported IOMMU page size; both *must* be capable of
  44	 * representing individual CPU pages exactly.
  45	 */
  46	BUG_ON((granule > PAGE_SIZE) || !is_power_of_2(granule));
  47
  48	spin_lock_init(&iovad->iova_rbtree_lock);
  49	iovad->rbroot = RB_ROOT;
  50	iovad->cached_node = &iovad->anchor.node;
  51	iovad->cached32_node = &iovad->anchor.node;
  52	iovad->granule = granule;
  53	iovad->start_pfn = start_pfn;
  54	iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad));
  55	iovad->max32_alloc_size = iovad->dma_32bit_pfn;
  56	iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR;
  57	rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node);
  58	rb_insert_color(&iovad->anchor.node, &iovad->rbroot);
  59}
  60EXPORT_SYMBOL_GPL(init_iova_domain);
  61
  62static struct rb_node *
  63__get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn)
  64{
  65	if (limit_pfn <= iovad->dma_32bit_pfn)
  66		return iovad->cached32_node;
  67
  68	return iovad->cached_node;
  69}
  70
  71static void
  72__cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new)
  73{
  74	if (new->pfn_hi < iovad->dma_32bit_pfn)
  75		iovad->cached32_node = &new->node;
  76	else
  77		iovad->cached_node = &new->node;
  78}
  79
  80static void
  81__cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
  82{
  83	struct iova *cached_iova;
  84
  85	cached_iova = to_iova(iovad->cached32_node);
  86	if (free == cached_iova ||
  87	    (free->pfn_hi < iovad->dma_32bit_pfn &&
  88	     free->pfn_lo >= cached_iova->pfn_lo))
  89		iovad->cached32_node = rb_next(&free->node);
  90
  91	if (free->pfn_lo < iovad->dma_32bit_pfn)
  92		iovad->max32_alloc_size = iovad->dma_32bit_pfn;
  93
  94	cached_iova = to_iova(iovad->cached_node);
  95	if (free->pfn_lo >= cached_iova->pfn_lo)
  96		iovad->cached_node = rb_next(&free->node);
  97}
  98
  99static struct rb_node *iova_find_limit(struct iova_domain *iovad, unsigned long limit_pfn)
 100{
 101	struct rb_node *node, *next;
 102	/*
 103	 * Ideally what we'd like to judge here is whether limit_pfn is close
 104	 * enough to the highest-allocated IOVA that starting the allocation
 105	 * walk from the anchor node will be quicker than this initial work to
 106	 * find an exact starting point (especially if that ends up being the
 107	 * anchor node anyway). This is an incredibly crude approximation which
 108	 * only really helps the most likely case, but is at least trivially easy.
 109	 */
 110	if (limit_pfn > iovad->dma_32bit_pfn)
 111		return &iovad->anchor.node;
 112
 113	node = iovad->rbroot.rb_node;
 114	while (to_iova(node)->pfn_hi < limit_pfn)
 115		node = node->rb_right;
 116
 117search_left:
 118	while (node->rb_left && to_iova(node->rb_left)->pfn_lo >= limit_pfn)
 119		node = node->rb_left;
 120
 121	if (!node->rb_left)
 122		return node;
 123
 124	next = node->rb_left;
 125	while (next->rb_right) {
 126		next = next->rb_right;
 127		if (to_iova(next)->pfn_lo >= limit_pfn) {
 128			node = next;
 129			goto search_left;
 130		}
 131	}
 132
 133	return node;
 134}
 135
 136/* Insert the iova into domain rbtree by holding writer lock */
 137static void
 138iova_insert_rbtree(struct rb_root *root, struct iova *iova,
 139		   struct rb_node *start)
 140{
 141	struct rb_node **new, *parent = NULL;
 142
 143	new = (start) ? &start : &(root->rb_node);
 144	/* Figure out where to put new node */
 145	while (*new) {
 146		struct iova *this = to_iova(*new);
 147
 148		parent = *new;
 149
 150		if (iova->pfn_lo < this->pfn_lo)
 151			new = &((*new)->rb_left);
 152		else if (iova->pfn_lo > this->pfn_lo)
 153			new = &((*new)->rb_right);
 154		else {
 155			WARN_ON(1); /* this should not happen */
 156			return;
 157		}
 158	}
 159	/* Add new node and rebalance tree. */
 160	rb_link_node(&iova->node, parent, new);
 161	rb_insert_color(&iova->node, root);
 162}
 163
 164static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
 165		unsigned long size, unsigned long limit_pfn,
 166			struct iova *new, bool size_aligned)
 167{
 168	struct rb_node *curr, *prev;
 169	struct iova *curr_iova;
 170	unsigned long flags;
 171	unsigned long new_pfn, retry_pfn;
 172	unsigned long align_mask = ~0UL;
 173	unsigned long high_pfn = limit_pfn, low_pfn = iovad->start_pfn;
 174
 175	if (size_aligned)
 176		align_mask <<= fls_long(size - 1);
 177
 178	/* Walk the tree backwards */
 179	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 180	if (limit_pfn <= iovad->dma_32bit_pfn &&
 181			size >= iovad->max32_alloc_size)
 182		goto iova32_full;
 183
 184	curr = __get_cached_rbnode(iovad, limit_pfn);
 185	curr_iova = to_iova(curr);
 186	retry_pfn = curr_iova->pfn_hi;
 187
 188retry:
 189	do {
 190		high_pfn = min(high_pfn, curr_iova->pfn_lo);
 191		new_pfn = (high_pfn - size) & align_mask;
 192		prev = curr;
 193		curr = rb_prev(curr);
 194		curr_iova = to_iova(curr);
 195	} while (curr && new_pfn <= curr_iova->pfn_hi && new_pfn >= low_pfn);
 196
 197	if (high_pfn < size || new_pfn < low_pfn) {
 198		if (low_pfn == iovad->start_pfn && retry_pfn < limit_pfn) {
 199			high_pfn = limit_pfn;
 200			low_pfn = retry_pfn + 1;
 201			curr = iova_find_limit(iovad, limit_pfn);
 202			curr_iova = to_iova(curr);
 203			goto retry;
 204		}
 205		iovad->max32_alloc_size = size;
 206		goto iova32_full;
 207	}
 208
 209	/* pfn_lo will point to size aligned address if size_aligned is set */
 210	new->pfn_lo = new_pfn;
 211	new->pfn_hi = new->pfn_lo + size - 1;
 212
 213	/* If we have 'prev', it's a valid place to start the insertion. */
 214	iova_insert_rbtree(&iovad->rbroot, new, prev);
 215	__cached_rbnode_insert_update(iovad, new);
 216
 217	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 218	return 0;
 219
 220iova32_full:
 221	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 222	return -ENOMEM;
 223}
 224
 225static struct kmem_cache *iova_cache;
 226static unsigned int iova_cache_users;
 227static DEFINE_MUTEX(iova_cache_mutex);
 228
 229static struct iova *alloc_iova_mem(void)
 230{
 231	return kmem_cache_zalloc(iova_cache, GFP_ATOMIC | __GFP_NOWARN);
 232}
 233
 234static void free_iova_mem(struct iova *iova)
 235{
 236	if (iova->pfn_lo != IOVA_ANCHOR)
 237		kmem_cache_free(iova_cache, iova);
 238}
 239
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 240/**
 241 * alloc_iova - allocates an iova
 242 * @iovad: - iova domain in question
 243 * @size: - size of page frames to allocate
 244 * @limit_pfn: - max limit address
 245 * @size_aligned: - set if size_aligned address range is required
 246 * This function allocates an iova in the range iovad->start_pfn to limit_pfn,
 247 * searching top-down from limit_pfn to iovad->start_pfn. If the size_aligned
 248 * flag is set then the allocated address iova->pfn_lo will be naturally
 249 * aligned on roundup_power_of_two(size).
 250 */
 251struct iova *
 252alloc_iova(struct iova_domain *iovad, unsigned long size,
 253	unsigned long limit_pfn,
 254	bool size_aligned)
 255{
 256	struct iova *new_iova;
 257	int ret;
 258
 259	new_iova = alloc_iova_mem();
 260	if (!new_iova)
 261		return NULL;
 262
 263	ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn + 1,
 264			new_iova, size_aligned);
 265
 266	if (ret) {
 267		free_iova_mem(new_iova);
 268		return NULL;
 269	}
 270
 271	return new_iova;
 272}
 273EXPORT_SYMBOL_GPL(alloc_iova);
 274
 275static struct iova *
 276private_find_iova(struct iova_domain *iovad, unsigned long pfn)
 277{
 278	struct rb_node *node = iovad->rbroot.rb_node;
 279
 280	assert_spin_locked(&iovad->iova_rbtree_lock);
 281
 282	while (node) {
 283		struct iova *iova = to_iova(node);
 284
 285		if (pfn < iova->pfn_lo)
 286			node = node->rb_left;
 287		else if (pfn > iova->pfn_hi)
 288			node = node->rb_right;
 289		else
 290			return iova;	/* pfn falls within iova's range */
 291	}
 292
 293	return NULL;
 294}
 295
 296static void remove_iova(struct iova_domain *iovad, struct iova *iova)
 297{
 298	assert_spin_locked(&iovad->iova_rbtree_lock);
 299	__cached_rbnode_delete_update(iovad, iova);
 300	rb_erase(&iova->node, &iovad->rbroot);
 301}
 302
 303/**
 304 * find_iova - finds an iova for a given pfn
 305 * @iovad: - iova domain in question.
 306 * @pfn: - page frame number
 307 * This function finds and returns an iova belonging to the
 308 * given domain which matches the given pfn.
 309 */
 310struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn)
 311{
 312	unsigned long flags;
 313	struct iova *iova;
 314
 315	/* Take the lock so that no other thread is manipulating the rbtree */
 316	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 317	iova = private_find_iova(iovad, pfn);
 318	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 319	return iova;
 320}
 321EXPORT_SYMBOL_GPL(find_iova);
 322
 323/**
 324 * __free_iova - frees the given iova
 325 * @iovad: iova domain in question.
 326 * @iova: iova in question.
 327 * Frees the given iova belonging to the giving domain
 328 */
 329void
 330__free_iova(struct iova_domain *iovad, struct iova *iova)
 331{
 332	unsigned long flags;
 333
 334	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 335	remove_iova(iovad, iova);
 336	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 337	free_iova_mem(iova);
 338}
 339EXPORT_SYMBOL_GPL(__free_iova);
 340
 341/**
 342 * free_iova - finds and frees the iova for a given pfn
 343 * @iovad: - iova domain in question.
 344 * @pfn: - pfn that is allocated previously
 345 * This functions finds an iova for a given pfn and then
 346 * frees the iova from that domain.
 347 */
 348void
 349free_iova(struct iova_domain *iovad, unsigned long pfn)
 350{
 351	unsigned long flags;
 352	struct iova *iova;
 353
 354	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 355	iova = private_find_iova(iovad, pfn);
 356	if (!iova) {
 357		spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 358		return;
 359	}
 360	remove_iova(iovad, iova);
 361	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 362	free_iova_mem(iova);
 363}
 364EXPORT_SYMBOL_GPL(free_iova);
 365
 366/**
 367 * alloc_iova_fast - allocates an iova from rcache
 368 * @iovad: - iova domain in question
 369 * @size: - size of page frames to allocate
 370 * @limit_pfn: - max limit address
 371 * @flush_rcache: - set to flush rcache on regular allocation failure
 372 * This function tries to satisfy an iova allocation from the rcache,
 373 * and falls back to regular allocation on failure. If regular allocation
 374 * fails too and the flush_rcache flag is set then the rcache will be flushed.
 375*/
 376unsigned long
 377alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
 378		unsigned long limit_pfn, bool flush_rcache)
 379{
 380	unsigned long iova_pfn;
 381	struct iova *new_iova;
 382
 383	/*
 384	 * Freeing non-power-of-two-sized allocations back into the IOVA caches
 385	 * will come back to bite us badly, so we have to waste a bit of space
 386	 * rounding up anything cacheable to make sure that can't happen. The
 387	 * order of the unadjusted size will still match upon freeing.
 388	 */
 389	if (size < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
 390		size = roundup_pow_of_two(size);
 391
 392	iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1);
 393	if (iova_pfn)
 394		return iova_pfn;
 395
 396retry:
 397	new_iova = alloc_iova(iovad, size, limit_pfn, true);
 398	if (!new_iova) {
 399		unsigned int cpu;
 400
 401		if (!flush_rcache)
 402			return 0;
 403
 404		/* Try replenishing IOVAs by flushing rcache. */
 405		flush_rcache = false;
 406		for_each_online_cpu(cpu)
 407			free_cpu_cached_iovas(cpu, iovad);
 408		free_global_cached_iovas(iovad);
 409		goto retry;
 410	}
 411
 412	return new_iova->pfn_lo;
 413}
 414EXPORT_SYMBOL_GPL(alloc_iova_fast);
 415
 416/**
 417 * free_iova_fast - free iova pfn range into rcache
 418 * @iovad: - iova domain in question.
 419 * @pfn: - pfn that is allocated previously
 420 * @size: - # of pages in range
 421 * This functions frees an iova range by trying to put it into the rcache,
 422 * falling back to regular iova deallocation via free_iova() if this fails.
 423 */
 424void
 425free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size)
 426{
 427	if (iova_rcache_insert(iovad, pfn, size))
 428		return;
 429
 430	free_iova(iovad, pfn);
 431}
 432EXPORT_SYMBOL_GPL(free_iova_fast);
 433
 434static void iova_domain_free_rcaches(struct iova_domain *iovad)
 435{
 436	cpuhp_state_remove_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD,
 437					    &iovad->cpuhp_dead);
 438	free_iova_rcaches(iovad);
 439}
 440
 441/**
 442 * put_iova_domain - destroys the iova domain
 443 * @iovad: - iova domain in question.
 444 * All the iova's in that domain are destroyed.
 445 */
 446void put_iova_domain(struct iova_domain *iovad)
 447{
 448	struct iova *iova, *tmp;
 449
 450	if (iovad->rcaches)
 451		iova_domain_free_rcaches(iovad);
 452
 453	rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node)
 454		free_iova_mem(iova);
 455}
 456EXPORT_SYMBOL_GPL(put_iova_domain);
 457
 458static int
 459__is_range_overlap(struct rb_node *node,
 460	unsigned long pfn_lo, unsigned long pfn_hi)
 461{
 462	struct iova *iova = to_iova(node);
 463
 464	if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo))
 465		return 1;
 466	return 0;
 467}
 468
 469static inline struct iova *
 470alloc_and_init_iova(unsigned long pfn_lo, unsigned long pfn_hi)
 471{
 472	struct iova *iova;
 473
 474	iova = alloc_iova_mem();
 475	if (iova) {
 476		iova->pfn_lo = pfn_lo;
 477		iova->pfn_hi = pfn_hi;
 478	}
 479
 480	return iova;
 481}
 482
 483static struct iova *
 484__insert_new_range(struct iova_domain *iovad,
 485	unsigned long pfn_lo, unsigned long pfn_hi)
 486{
 487	struct iova *iova;
 488
 489	iova = alloc_and_init_iova(pfn_lo, pfn_hi);
 490	if (iova)
 491		iova_insert_rbtree(&iovad->rbroot, iova, NULL);
 492
 493	return iova;
 494}
 495
 496static void
 497__adjust_overlap_range(struct iova *iova,
 498	unsigned long *pfn_lo, unsigned long *pfn_hi)
 499{
 500	if (*pfn_lo < iova->pfn_lo)
 501		iova->pfn_lo = *pfn_lo;
 502	if (*pfn_hi > iova->pfn_hi)
 503		*pfn_lo = iova->pfn_hi + 1;
 504}
 505
 506/**
 507 * reserve_iova - reserves an iova in the given range
 508 * @iovad: - iova domain pointer
 509 * @pfn_lo: - lower page frame address
 510 * @pfn_hi:- higher pfn address
 511 * This function allocates reserves the address range from pfn_lo to pfn_hi so
 512 * that this address is not dished out as part of alloc_iova.
 513 */
 514struct iova *
 515reserve_iova(struct iova_domain *iovad,
 516	unsigned long pfn_lo, unsigned long pfn_hi)
 517{
 518	struct rb_node *node;
 519	unsigned long flags;
 520	struct iova *iova;
 521	unsigned int overlap = 0;
 522
 523	/* Don't allow nonsensical pfns */
 524	if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad))))
 525		return NULL;
 526
 527	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 528	for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) {
 529		if (__is_range_overlap(node, pfn_lo, pfn_hi)) {
 530			iova = to_iova(node);
 531			__adjust_overlap_range(iova, &pfn_lo, &pfn_hi);
 532			if ((pfn_lo >= iova->pfn_lo) &&
 533				(pfn_hi <= iova->pfn_hi))
 534				goto finish;
 535			overlap = 1;
 536
 537		} else if (overlap)
 538				break;
 539	}
 540
 541	/* We are here either because this is the first reserver node
 542	 * or need to insert remaining non overlap addr range
 543	 */
 544	iova = __insert_new_range(iovad, pfn_lo, pfn_hi);
 545finish:
 546
 547	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 548	return iova;
 549}
 550EXPORT_SYMBOL_GPL(reserve_iova);
 551
 552/*
 553 * Magazine caches for IOVA ranges.  For an introduction to magazines,
 554 * see the USENIX 2001 paper "Magazines and Vmem: Extending the Slab
 555 * Allocator to Many CPUs and Arbitrary Resources" by Bonwick and Adams.
 556 * For simplicity, we use a static magazine size and don't implement the
 557 * dynamic size tuning described in the paper.
 558 */
 559
 560/*
 561 * As kmalloc's buffer size is fixed to power of 2, 127 is chosen to
 562 * assure size of 'iova_magazine' to be 1024 bytes, so that no memory
 563 * will be wasted. Since only full magazines are inserted into the depot,
 564 * we don't need to waste PFN capacity on a separate list head either.
 565 */
 566#define IOVA_MAG_SIZE 127
 567
 568#define IOVA_DEPOT_DELAY msecs_to_jiffies(100)
 569
 570struct iova_magazine {
 571	union {
 572		unsigned long size;
 573		struct iova_magazine *next;
 574	};
 575	unsigned long pfns[IOVA_MAG_SIZE];
 576};
 577static_assert(!(sizeof(struct iova_magazine) & (sizeof(struct iova_magazine) - 1)));
 578
 579struct iova_cpu_rcache {
 580	spinlock_t lock;
 581	struct iova_magazine *loaded;
 582	struct iova_magazine *prev;
 583};
 584
 585struct iova_rcache {
 586	spinlock_t lock;
 587	unsigned int depot_size;
 588	struct iova_magazine *depot;
 589	struct iova_cpu_rcache __percpu *cpu_rcaches;
 590	struct iova_domain *iovad;
 591	struct delayed_work work;
 592};
 593
 594static struct kmem_cache *iova_magazine_cache;
 595
 596unsigned long iova_rcache_range(void)
 597{
 598	return PAGE_SIZE << (IOVA_RANGE_CACHE_MAX_SIZE - 1);
 599}
 600
 601static struct iova_magazine *iova_magazine_alloc(gfp_t flags)
 602{
 603	struct iova_magazine *mag;
 604
 605	mag = kmem_cache_alloc(iova_magazine_cache, flags);
 606	if (mag)
 607		mag->size = 0;
 608
 609	return mag;
 610}
 611
 612static void iova_magazine_free(struct iova_magazine *mag)
 613{
 614	kmem_cache_free(iova_magazine_cache, mag);
 615}
 616
 617static void
 618iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad)
 619{
 620	unsigned long flags;
 621	int i;
 622
 623	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 624
 625	for (i = 0 ; i < mag->size; ++i) {
 626		struct iova *iova = private_find_iova(iovad, mag->pfns[i]);
 627
 628		if (WARN_ON(!iova))
 629			continue;
 630
 631		remove_iova(iovad, iova);
 632		free_iova_mem(iova);
 633	}
 634
 635	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 636
 637	mag->size = 0;
 638}
 639
 640static bool iova_magazine_full(struct iova_magazine *mag)
 641{
 642	return mag->size == IOVA_MAG_SIZE;
 643}
 644
 645static bool iova_magazine_empty(struct iova_magazine *mag)
 646{
 647	return mag->size == 0;
 648}
 649
 650static unsigned long iova_magazine_pop(struct iova_magazine *mag,
 651				       unsigned long limit_pfn)
 652{
 653	int i;
 654	unsigned long pfn;
 655
 656	/* Only fall back to the rbtree if we have no suitable pfns at all */
 657	for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--)
 658		if (i == 0)
 659			return 0;
 660
 661	/* Swap it to pop it */
 662	pfn = mag->pfns[i];
 663	mag->pfns[i] = mag->pfns[--mag->size];
 664
 665	return pfn;
 666}
 667
 668static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
 669{
 670	mag->pfns[mag->size++] = pfn;
 671}
 672
 673static struct iova_magazine *iova_depot_pop(struct iova_rcache *rcache)
 674{
 675	struct iova_magazine *mag = rcache->depot;
 676
 677	/*
 678	 * As the mag->next pointer is moved to rcache->depot and reset via
 679	 * the mag->size assignment, mark it as a transient false positive.
 680	 */
 681	kmemleak_transient_leak(mag->next);
 682	rcache->depot = mag->next;
 683	mag->size = IOVA_MAG_SIZE;
 684	rcache->depot_size--;
 685	return mag;
 686}
 687
 688static void iova_depot_push(struct iova_rcache *rcache, struct iova_magazine *mag)
 689{
 690	mag->next = rcache->depot;
 691	rcache->depot = mag;
 692	rcache->depot_size++;
 693}
 694
 695static void iova_depot_work_func(struct work_struct *work)
 696{
 697	struct iova_rcache *rcache = container_of(work, typeof(*rcache), work.work);
 698	struct iova_magazine *mag = NULL;
 699	unsigned long flags;
 700
 701	spin_lock_irqsave(&rcache->lock, flags);
 702	if (rcache->depot_size > num_online_cpus())
 703		mag = iova_depot_pop(rcache);
 704	spin_unlock_irqrestore(&rcache->lock, flags);
 705
 706	if (mag) {
 707		iova_magazine_free_pfns(mag, rcache->iovad);
 708		iova_magazine_free(mag);
 709		schedule_delayed_work(&rcache->work, IOVA_DEPOT_DELAY);
 710	}
 711}
 712
 713int iova_domain_init_rcaches(struct iova_domain *iovad)
 714{
 715	unsigned int cpu;
 716	int i, ret;
 717
 718	iovad->rcaches = kcalloc(IOVA_RANGE_CACHE_MAX_SIZE,
 719				 sizeof(struct iova_rcache),
 720				 GFP_KERNEL);
 721	if (!iovad->rcaches)
 722		return -ENOMEM;
 723
 724	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
 725		struct iova_cpu_rcache *cpu_rcache;
 726		struct iova_rcache *rcache;
 727
 728		rcache = &iovad->rcaches[i];
 729		spin_lock_init(&rcache->lock);
 730		rcache->iovad = iovad;
 731		INIT_DELAYED_WORK(&rcache->work, iova_depot_work_func);
 732		rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache),
 733						     cache_line_size());
 734		if (!rcache->cpu_rcaches) {
 735			ret = -ENOMEM;
 736			goto out_err;
 737		}
 738		for_each_possible_cpu(cpu) {
 739			cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
 740
 741			spin_lock_init(&cpu_rcache->lock);
 742			cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL);
 743			cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL);
 744			if (!cpu_rcache->loaded || !cpu_rcache->prev) {
 745				ret = -ENOMEM;
 746				goto out_err;
 747			}
 748		}
 749	}
 750
 751	ret = cpuhp_state_add_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD,
 752					       &iovad->cpuhp_dead);
 753	if (ret)
 754		goto out_err;
 755	return 0;
 756
 757out_err:
 758	free_iova_rcaches(iovad);
 759	return ret;
 760}
 761EXPORT_SYMBOL_GPL(iova_domain_init_rcaches);
 762
 763/*
 764 * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and
 765 * return true on success.  Can fail if rcache is full and we can't free
 766 * space, and free_iova() (our only caller) will then return the IOVA
 767 * range to the rbtree instead.
 768 */
 769static bool __iova_rcache_insert(struct iova_domain *iovad,
 770				 struct iova_rcache *rcache,
 771				 unsigned long iova_pfn)
 772{
 
 773	struct iova_cpu_rcache *cpu_rcache;
 774	bool can_insert = false;
 775	unsigned long flags;
 776
 777	cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
 778	spin_lock_irqsave(&cpu_rcache->lock, flags);
 779
 780	if (!iova_magazine_full(cpu_rcache->loaded)) {
 781		can_insert = true;
 782	} else if (!iova_magazine_full(cpu_rcache->prev)) {
 783		swap(cpu_rcache->prev, cpu_rcache->loaded);
 784		can_insert = true;
 785	} else {
 786		struct iova_magazine *new_mag = iova_magazine_alloc(GFP_ATOMIC);
 787
 788		if (new_mag) {
 789			spin_lock(&rcache->lock);
 790			iova_depot_push(rcache, cpu_rcache->loaded);
 
 
 
 
 
 791			spin_unlock(&rcache->lock);
 792			schedule_delayed_work(&rcache->work, IOVA_DEPOT_DELAY);
 793
 794			cpu_rcache->loaded = new_mag;
 795			can_insert = true;
 796		}
 797	}
 798
 799	if (can_insert)
 800		iova_magazine_push(cpu_rcache->loaded, iova_pfn);
 801
 802	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
 803
 
 
 
 
 
 804	return can_insert;
 805}
 806
 807static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn,
 808			       unsigned long size)
 809{
 810	unsigned int log_size = order_base_2(size);
 811
 812	if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
 813		return false;
 814
 815	return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn);
 816}
 817
 818/*
 819 * Caller wants to allocate a new IOVA range from 'rcache'.  If we can
 820 * satisfy the request, return a matching non-NULL range and remove
 821 * it from the 'rcache'.
 822 */
 823static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
 824				       unsigned long limit_pfn)
 825{
 826	struct iova_cpu_rcache *cpu_rcache;
 827	unsigned long iova_pfn = 0;
 828	bool has_pfn = false;
 829	unsigned long flags;
 830
 831	cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
 832	spin_lock_irqsave(&cpu_rcache->lock, flags);
 833
 834	if (!iova_magazine_empty(cpu_rcache->loaded)) {
 835		has_pfn = true;
 836	} else if (!iova_magazine_empty(cpu_rcache->prev)) {
 837		swap(cpu_rcache->prev, cpu_rcache->loaded);
 838		has_pfn = true;
 839	} else {
 840		spin_lock(&rcache->lock);
 841		if (rcache->depot) {
 842			iova_magazine_free(cpu_rcache->loaded);
 843			cpu_rcache->loaded = iova_depot_pop(rcache);
 844			has_pfn = true;
 845		}
 846		spin_unlock(&rcache->lock);
 847	}
 848
 849	if (has_pfn)
 850		iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn);
 851
 852	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
 853
 854	return iova_pfn;
 855}
 856
 857/*
 858 * Try to satisfy IOVA allocation range from rcache.  Fail if requested
 859 * size is too big or the DMA limit we are given isn't satisfied by the
 860 * top element in the magazine.
 861 */
 862static unsigned long iova_rcache_get(struct iova_domain *iovad,
 863				     unsigned long size,
 864				     unsigned long limit_pfn)
 865{
 866	unsigned int log_size = order_base_2(size);
 867
 868	if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
 869		return 0;
 870
 871	return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size);
 872}
 873
 874/*
 875 * free rcache data structures.
 876 */
 877static void free_iova_rcaches(struct iova_domain *iovad)
 878{
 879	struct iova_rcache *rcache;
 880	struct iova_cpu_rcache *cpu_rcache;
 881	unsigned int cpu;
 
 882
 883	for (int i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
 884		rcache = &iovad->rcaches[i];
 885		if (!rcache->cpu_rcaches)
 886			break;
 887		for_each_possible_cpu(cpu) {
 888			cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
 889			iova_magazine_free(cpu_rcache->loaded);
 890			iova_magazine_free(cpu_rcache->prev);
 891		}
 892		free_percpu(rcache->cpu_rcaches);
 893		cancel_delayed_work_sync(&rcache->work);
 894		while (rcache->depot)
 895			iova_magazine_free(iova_depot_pop(rcache));
 896	}
 897
 898	kfree(iovad->rcaches);
 899	iovad->rcaches = NULL;
 900}
 901
 902/*
 903 * free all the IOVA ranges cached by a cpu (used when cpu is unplugged)
 904 */
 905static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad)
 906{
 907	struct iova_cpu_rcache *cpu_rcache;
 908	struct iova_rcache *rcache;
 909	unsigned long flags;
 910	int i;
 911
 912	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
 913		rcache = &iovad->rcaches[i];
 914		cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
 915		spin_lock_irqsave(&cpu_rcache->lock, flags);
 916		iova_magazine_free_pfns(cpu_rcache->loaded, iovad);
 917		iova_magazine_free_pfns(cpu_rcache->prev, iovad);
 918		spin_unlock_irqrestore(&cpu_rcache->lock, flags);
 919	}
 920}
 921
 922/*
 923 * free all the IOVA ranges of global cache
 924 */
 925static void free_global_cached_iovas(struct iova_domain *iovad)
 926{
 927	struct iova_rcache *rcache;
 928	unsigned long flags;
 
 929
 930	for (int i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
 931		rcache = &iovad->rcaches[i];
 932		spin_lock_irqsave(&rcache->lock, flags);
 933		while (rcache->depot) {
 934			struct iova_magazine *mag = iova_depot_pop(rcache);
 935
 936			iova_magazine_free_pfns(mag, iovad);
 937			iova_magazine_free(mag);
 938		}
 
 939		spin_unlock_irqrestore(&rcache->lock, flags);
 940	}
 941}
 942
 943static int iova_cpuhp_dead(unsigned int cpu, struct hlist_node *node)
 944{
 945	struct iova_domain *iovad;
 946
 947	iovad = hlist_entry_safe(node, struct iova_domain, cpuhp_dead);
 948
 949	free_cpu_cached_iovas(cpu, iovad);
 950	return 0;
 951}
 952
 953int iova_cache_get(void)
 954{
 955	int err = -ENOMEM;
 956
 957	mutex_lock(&iova_cache_mutex);
 958	if (!iova_cache_users) {
 959		iova_cache = kmem_cache_create("iommu_iova", sizeof(struct iova), 0,
 960					       SLAB_HWCACHE_ALIGN, NULL);
 961		if (!iova_cache)
 962			goto out_err;
 963
 964		iova_magazine_cache = kmem_cache_create("iommu_iova_magazine",
 965							sizeof(struct iova_magazine),
 966							0, SLAB_HWCACHE_ALIGN, NULL);
 967		if (!iova_magazine_cache)
 968			goto out_err;
 969
 970		err = cpuhp_setup_state_multi(CPUHP_IOMMU_IOVA_DEAD, "iommu/iova:dead",
 971					      NULL, iova_cpuhp_dead);
 972		if (err) {
 973			pr_err("IOVA: Couldn't register cpuhp handler: %pe\n", ERR_PTR(err));
 974			goto out_err;
 975		}
 976	}
 977
 978	iova_cache_users++;
 979	mutex_unlock(&iova_cache_mutex);
 980
 981	return 0;
 982
 983out_err:
 984	kmem_cache_destroy(iova_cache);
 985	kmem_cache_destroy(iova_magazine_cache);
 986	mutex_unlock(&iova_cache_mutex);
 987	return err;
 988}
 989EXPORT_SYMBOL_GPL(iova_cache_get);
 990
 991void iova_cache_put(void)
 992{
 993	mutex_lock(&iova_cache_mutex);
 994	if (WARN_ON(!iova_cache_users)) {
 995		mutex_unlock(&iova_cache_mutex);
 996		return;
 997	}
 998	iova_cache_users--;
 999	if (!iova_cache_users) {
1000		cpuhp_remove_multi_state(CPUHP_IOMMU_IOVA_DEAD);
1001		kmem_cache_destroy(iova_cache);
1002		kmem_cache_destroy(iova_magazine_cache);
1003	}
1004	mutex_unlock(&iova_cache_mutex);
1005}
1006EXPORT_SYMBOL_GPL(iova_cache_put);
1007
1008MODULE_AUTHOR("Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>");
1009MODULE_DESCRIPTION("IOMMU I/O Virtual Address management");
1010MODULE_LICENSE("GPL");
v6.2
  1// SPDX-License-Identifier: GPL-2.0-only
  2/*
  3 * Copyright © 2006-2009, Intel Corporation.
  4 *
  5 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
  6 */
  7
  8#include <linux/iova.h>
 
  9#include <linux/module.h>
 10#include <linux/slab.h>
 11#include <linux/smp.h>
 12#include <linux/bitops.h>
 13#include <linux/cpu.h>
 
 14
 15/* The anchor node sits above the top of the usable address space */
 16#define IOVA_ANCHOR	~0UL
 17
 18#define IOVA_RANGE_CACHE_MAX_SIZE 6	/* log of max cached IOVA range size (in pages) */
 19
 20static bool iova_rcache_insert(struct iova_domain *iovad,
 21			       unsigned long pfn,
 22			       unsigned long size);
 23static unsigned long iova_rcache_get(struct iova_domain *iovad,
 24				     unsigned long size,
 25				     unsigned long limit_pfn);
 
 26static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad);
 27static void free_iova_rcaches(struct iova_domain *iovad);
 28
 29unsigned long iova_rcache_range(void)
 30{
 31	return PAGE_SIZE << (IOVA_RANGE_CACHE_MAX_SIZE - 1);
 32}
 33
 34static int iova_cpuhp_dead(unsigned int cpu, struct hlist_node *node)
 35{
 36	struct iova_domain *iovad;
 37
 38	iovad = hlist_entry_safe(node, struct iova_domain, cpuhp_dead);
 39
 40	free_cpu_cached_iovas(cpu, iovad);
 41	return 0;
 42}
 43
 44static void free_global_cached_iovas(struct iova_domain *iovad);
 45
 46static struct iova *to_iova(struct rb_node *node)
 47{
 48	return rb_entry(node, struct iova, node);
 49}
 50
 51void
 52init_iova_domain(struct iova_domain *iovad, unsigned long granule,
 53	unsigned long start_pfn)
 54{
 55	/*
 56	 * IOVA granularity will normally be equal to the smallest
 57	 * supported IOMMU page size; both *must* be capable of
 58	 * representing individual CPU pages exactly.
 59	 */
 60	BUG_ON((granule > PAGE_SIZE) || !is_power_of_2(granule));
 61
 62	spin_lock_init(&iovad->iova_rbtree_lock);
 63	iovad->rbroot = RB_ROOT;
 64	iovad->cached_node = &iovad->anchor.node;
 65	iovad->cached32_node = &iovad->anchor.node;
 66	iovad->granule = granule;
 67	iovad->start_pfn = start_pfn;
 68	iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad));
 69	iovad->max32_alloc_size = iovad->dma_32bit_pfn;
 70	iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR;
 71	rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node);
 72	rb_insert_color(&iovad->anchor.node, &iovad->rbroot);
 73}
 74EXPORT_SYMBOL_GPL(init_iova_domain);
 75
 76static struct rb_node *
 77__get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn)
 78{
 79	if (limit_pfn <= iovad->dma_32bit_pfn)
 80		return iovad->cached32_node;
 81
 82	return iovad->cached_node;
 83}
 84
 85static void
 86__cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new)
 87{
 88	if (new->pfn_hi < iovad->dma_32bit_pfn)
 89		iovad->cached32_node = &new->node;
 90	else
 91		iovad->cached_node = &new->node;
 92}
 93
 94static void
 95__cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
 96{
 97	struct iova *cached_iova;
 98
 99	cached_iova = to_iova(iovad->cached32_node);
100	if (free == cached_iova ||
101	    (free->pfn_hi < iovad->dma_32bit_pfn &&
102	     free->pfn_lo >= cached_iova->pfn_lo))
103		iovad->cached32_node = rb_next(&free->node);
104
105	if (free->pfn_lo < iovad->dma_32bit_pfn)
106		iovad->max32_alloc_size = iovad->dma_32bit_pfn;
107
108	cached_iova = to_iova(iovad->cached_node);
109	if (free->pfn_lo >= cached_iova->pfn_lo)
110		iovad->cached_node = rb_next(&free->node);
111}
112
113static struct rb_node *iova_find_limit(struct iova_domain *iovad, unsigned long limit_pfn)
114{
115	struct rb_node *node, *next;
116	/*
117	 * Ideally what we'd like to judge here is whether limit_pfn is close
118	 * enough to the highest-allocated IOVA that starting the allocation
119	 * walk from the anchor node will be quicker than this initial work to
120	 * find an exact starting point (especially if that ends up being the
121	 * anchor node anyway). This is an incredibly crude approximation which
122	 * only really helps the most likely case, but is at least trivially easy.
123	 */
124	if (limit_pfn > iovad->dma_32bit_pfn)
125		return &iovad->anchor.node;
126
127	node = iovad->rbroot.rb_node;
128	while (to_iova(node)->pfn_hi < limit_pfn)
129		node = node->rb_right;
130
131search_left:
132	while (node->rb_left && to_iova(node->rb_left)->pfn_lo >= limit_pfn)
133		node = node->rb_left;
134
135	if (!node->rb_left)
136		return node;
137
138	next = node->rb_left;
139	while (next->rb_right) {
140		next = next->rb_right;
141		if (to_iova(next)->pfn_lo >= limit_pfn) {
142			node = next;
143			goto search_left;
144		}
145	}
146
147	return node;
148}
149
150/* Insert the iova into domain rbtree by holding writer lock */
151static void
152iova_insert_rbtree(struct rb_root *root, struct iova *iova,
153		   struct rb_node *start)
154{
155	struct rb_node **new, *parent = NULL;
156
157	new = (start) ? &start : &(root->rb_node);
158	/* Figure out where to put new node */
159	while (*new) {
160		struct iova *this = to_iova(*new);
161
162		parent = *new;
163
164		if (iova->pfn_lo < this->pfn_lo)
165			new = &((*new)->rb_left);
166		else if (iova->pfn_lo > this->pfn_lo)
167			new = &((*new)->rb_right);
168		else {
169			WARN_ON(1); /* this should not happen */
170			return;
171		}
172	}
173	/* Add new node and rebalance tree. */
174	rb_link_node(&iova->node, parent, new);
175	rb_insert_color(&iova->node, root);
176}
177
178static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
179		unsigned long size, unsigned long limit_pfn,
180			struct iova *new, bool size_aligned)
181{
182	struct rb_node *curr, *prev;
183	struct iova *curr_iova;
184	unsigned long flags;
185	unsigned long new_pfn, retry_pfn;
186	unsigned long align_mask = ~0UL;
187	unsigned long high_pfn = limit_pfn, low_pfn = iovad->start_pfn;
188
189	if (size_aligned)
190		align_mask <<= fls_long(size - 1);
191
192	/* Walk the tree backwards */
193	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
194	if (limit_pfn <= iovad->dma_32bit_pfn &&
195			size >= iovad->max32_alloc_size)
196		goto iova32_full;
197
198	curr = __get_cached_rbnode(iovad, limit_pfn);
199	curr_iova = to_iova(curr);
200	retry_pfn = curr_iova->pfn_hi;
201
202retry:
203	do {
204		high_pfn = min(high_pfn, curr_iova->pfn_lo);
205		new_pfn = (high_pfn - size) & align_mask;
206		prev = curr;
207		curr = rb_prev(curr);
208		curr_iova = to_iova(curr);
209	} while (curr && new_pfn <= curr_iova->pfn_hi && new_pfn >= low_pfn);
210
211	if (high_pfn < size || new_pfn < low_pfn) {
212		if (low_pfn == iovad->start_pfn && retry_pfn < limit_pfn) {
213			high_pfn = limit_pfn;
214			low_pfn = retry_pfn + 1;
215			curr = iova_find_limit(iovad, limit_pfn);
216			curr_iova = to_iova(curr);
217			goto retry;
218		}
219		iovad->max32_alloc_size = size;
220		goto iova32_full;
221	}
222
223	/* pfn_lo will point to size aligned address if size_aligned is set */
224	new->pfn_lo = new_pfn;
225	new->pfn_hi = new->pfn_lo + size - 1;
226
227	/* If we have 'prev', it's a valid place to start the insertion. */
228	iova_insert_rbtree(&iovad->rbroot, new, prev);
229	__cached_rbnode_insert_update(iovad, new);
230
231	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
232	return 0;
233
234iova32_full:
235	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
236	return -ENOMEM;
237}
238
239static struct kmem_cache *iova_cache;
240static unsigned int iova_cache_users;
241static DEFINE_MUTEX(iova_cache_mutex);
242
243static struct iova *alloc_iova_mem(void)
244{
245	return kmem_cache_zalloc(iova_cache, GFP_ATOMIC | __GFP_NOWARN);
246}
247
248static void free_iova_mem(struct iova *iova)
249{
250	if (iova->pfn_lo != IOVA_ANCHOR)
251		kmem_cache_free(iova_cache, iova);
252}
253
254int iova_cache_get(void)
255{
256	mutex_lock(&iova_cache_mutex);
257	if (!iova_cache_users) {
258		int ret;
259
260		ret = cpuhp_setup_state_multi(CPUHP_IOMMU_IOVA_DEAD, "iommu/iova:dead", NULL,
261					iova_cpuhp_dead);
262		if (ret) {
263			mutex_unlock(&iova_cache_mutex);
264			pr_err("Couldn't register cpuhp handler\n");
265			return ret;
266		}
267
268		iova_cache = kmem_cache_create(
269			"iommu_iova", sizeof(struct iova), 0,
270			SLAB_HWCACHE_ALIGN, NULL);
271		if (!iova_cache) {
272			cpuhp_remove_multi_state(CPUHP_IOMMU_IOVA_DEAD);
273			mutex_unlock(&iova_cache_mutex);
274			pr_err("Couldn't create iova cache\n");
275			return -ENOMEM;
276		}
277	}
278
279	iova_cache_users++;
280	mutex_unlock(&iova_cache_mutex);
281
282	return 0;
283}
284EXPORT_SYMBOL_GPL(iova_cache_get);
285
286void iova_cache_put(void)
287{
288	mutex_lock(&iova_cache_mutex);
289	if (WARN_ON(!iova_cache_users)) {
290		mutex_unlock(&iova_cache_mutex);
291		return;
292	}
293	iova_cache_users--;
294	if (!iova_cache_users) {
295		cpuhp_remove_multi_state(CPUHP_IOMMU_IOVA_DEAD);
296		kmem_cache_destroy(iova_cache);
297	}
298	mutex_unlock(&iova_cache_mutex);
299}
300EXPORT_SYMBOL_GPL(iova_cache_put);
301
302/**
303 * alloc_iova - allocates an iova
304 * @iovad: - iova domain in question
305 * @size: - size of page frames to allocate
306 * @limit_pfn: - max limit address
307 * @size_aligned: - set if size_aligned address range is required
308 * This function allocates an iova in the range iovad->start_pfn to limit_pfn,
309 * searching top-down from limit_pfn to iovad->start_pfn. If the size_aligned
310 * flag is set then the allocated address iova->pfn_lo will be naturally
311 * aligned on roundup_power_of_two(size).
312 */
313struct iova *
314alloc_iova(struct iova_domain *iovad, unsigned long size,
315	unsigned long limit_pfn,
316	bool size_aligned)
317{
318	struct iova *new_iova;
319	int ret;
320
321	new_iova = alloc_iova_mem();
322	if (!new_iova)
323		return NULL;
324
325	ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn + 1,
326			new_iova, size_aligned);
327
328	if (ret) {
329		free_iova_mem(new_iova);
330		return NULL;
331	}
332
333	return new_iova;
334}
335EXPORT_SYMBOL_GPL(alloc_iova);
336
337static struct iova *
338private_find_iova(struct iova_domain *iovad, unsigned long pfn)
339{
340	struct rb_node *node = iovad->rbroot.rb_node;
341
342	assert_spin_locked(&iovad->iova_rbtree_lock);
343
344	while (node) {
345		struct iova *iova = to_iova(node);
346
347		if (pfn < iova->pfn_lo)
348			node = node->rb_left;
349		else if (pfn > iova->pfn_hi)
350			node = node->rb_right;
351		else
352			return iova;	/* pfn falls within iova's range */
353	}
354
355	return NULL;
356}
357
358static void remove_iova(struct iova_domain *iovad, struct iova *iova)
359{
360	assert_spin_locked(&iovad->iova_rbtree_lock);
361	__cached_rbnode_delete_update(iovad, iova);
362	rb_erase(&iova->node, &iovad->rbroot);
363}
364
365/**
366 * find_iova - finds an iova for a given pfn
367 * @iovad: - iova domain in question.
368 * @pfn: - page frame number
369 * This function finds and returns an iova belonging to the
370 * given domain which matches the given pfn.
371 */
372struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn)
373{
374	unsigned long flags;
375	struct iova *iova;
376
377	/* Take the lock so that no other thread is manipulating the rbtree */
378	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
379	iova = private_find_iova(iovad, pfn);
380	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
381	return iova;
382}
383EXPORT_SYMBOL_GPL(find_iova);
384
385/**
386 * __free_iova - frees the given iova
387 * @iovad: iova domain in question.
388 * @iova: iova in question.
389 * Frees the given iova belonging to the giving domain
390 */
391void
392__free_iova(struct iova_domain *iovad, struct iova *iova)
393{
394	unsigned long flags;
395
396	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
397	remove_iova(iovad, iova);
398	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
399	free_iova_mem(iova);
400}
401EXPORT_SYMBOL_GPL(__free_iova);
402
403/**
404 * free_iova - finds and frees the iova for a given pfn
405 * @iovad: - iova domain in question.
406 * @pfn: - pfn that is allocated previously
407 * This functions finds an iova for a given pfn and then
408 * frees the iova from that domain.
409 */
410void
411free_iova(struct iova_domain *iovad, unsigned long pfn)
412{
413	unsigned long flags;
414	struct iova *iova;
415
416	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
417	iova = private_find_iova(iovad, pfn);
418	if (!iova) {
419		spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
420		return;
421	}
422	remove_iova(iovad, iova);
423	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
424	free_iova_mem(iova);
425}
426EXPORT_SYMBOL_GPL(free_iova);
427
428/**
429 * alloc_iova_fast - allocates an iova from rcache
430 * @iovad: - iova domain in question
431 * @size: - size of page frames to allocate
432 * @limit_pfn: - max limit address
433 * @flush_rcache: - set to flush rcache on regular allocation failure
434 * This function tries to satisfy an iova allocation from the rcache,
435 * and falls back to regular allocation on failure. If regular allocation
436 * fails too and the flush_rcache flag is set then the rcache will be flushed.
437*/
438unsigned long
439alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
440		unsigned long limit_pfn, bool flush_rcache)
441{
442	unsigned long iova_pfn;
443	struct iova *new_iova;
444
445	/*
446	 * Freeing non-power-of-two-sized allocations back into the IOVA caches
447	 * will come back to bite us badly, so we have to waste a bit of space
448	 * rounding up anything cacheable to make sure that can't happen. The
449	 * order of the unadjusted size will still match upon freeing.
450	 */
451	if (size < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
452		size = roundup_pow_of_two(size);
453
454	iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1);
455	if (iova_pfn)
456		return iova_pfn;
457
458retry:
459	new_iova = alloc_iova(iovad, size, limit_pfn, true);
460	if (!new_iova) {
461		unsigned int cpu;
462
463		if (!flush_rcache)
464			return 0;
465
466		/* Try replenishing IOVAs by flushing rcache. */
467		flush_rcache = false;
468		for_each_online_cpu(cpu)
469			free_cpu_cached_iovas(cpu, iovad);
470		free_global_cached_iovas(iovad);
471		goto retry;
472	}
473
474	return new_iova->pfn_lo;
475}
476EXPORT_SYMBOL_GPL(alloc_iova_fast);
477
478/**
479 * free_iova_fast - free iova pfn range into rcache
480 * @iovad: - iova domain in question.
481 * @pfn: - pfn that is allocated previously
482 * @size: - # of pages in range
483 * This functions frees an iova range by trying to put it into the rcache,
484 * falling back to regular iova deallocation via free_iova() if this fails.
485 */
486void
487free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size)
488{
489	if (iova_rcache_insert(iovad, pfn, size))
490		return;
491
492	free_iova(iovad, pfn);
493}
494EXPORT_SYMBOL_GPL(free_iova_fast);
495
496static void iova_domain_free_rcaches(struct iova_domain *iovad)
497{
498	cpuhp_state_remove_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD,
499					    &iovad->cpuhp_dead);
500	free_iova_rcaches(iovad);
501}
502
503/**
504 * put_iova_domain - destroys the iova domain
505 * @iovad: - iova domain in question.
506 * All the iova's in that domain are destroyed.
507 */
508void put_iova_domain(struct iova_domain *iovad)
509{
510	struct iova *iova, *tmp;
511
512	if (iovad->rcaches)
513		iova_domain_free_rcaches(iovad);
514
515	rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node)
516		free_iova_mem(iova);
517}
518EXPORT_SYMBOL_GPL(put_iova_domain);
519
520static int
521__is_range_overlap(struct rb_node *node,
522	unsigned long pfn_lo, unsigned long pfn_hi)
523{
524	struct iova *iova = to_iova(node);
525
526	if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo))
527		return 1;
528	return 0;
529}
530
531static inline struct iova *
532alloc_and_init_iova(unsigned long pfn_lo, unsigned long pfn_hi)
533{
534	struct iova *iova;
535
536	iova = alloc_iova_mem();
537	if (iova) {
538		iova->pfn_lo = pfn_lo;
539		iova->pfn_hi = pfn_hi;
540	}
541
542	return iova;
543}
544
545static struct iova *
546__insert_new_range(struct iova_domain *iovad,
547	unsigned long pfn_lo, unsigned long pfn_hi)
548{
549	struct iova *iova;
550
551	iova = alloc_and_init_iova(pfn_lo, pfn_hi);
552	if (iova)
553		iova_insert_rbtree(&iovad->rbroot, iova, NULL);
554
555	return iova;
556}
557
558static void
559__adjust_overlap_range(struct iova *iova,
560	unsigned long *pfn_lo, unsigned long *pfn_hi)
561{
562	if (*pfn_lo < iova->pfn_lo)
563		iova->pfn_lo = *pfn_lo;
564	if (*pfn_hi > iova->pfn_hi)
565		*pfn_lo = iova->pfn_hi + 1;
566}
567
568/**
569 * reserve_iova - reserves an iova in the given range
570 * @iovad: - iova domain pointer
571 * @pfn_lo: - lower page frame address
572 * @pfn_hi:- higher pfn adderss
573 * This function allocates reserves the address range from pfn_lo to pfn_hi so
574 * that this address is not dished out as part of alloc_iova.
575 */
576struct iova *
577reserve_iova(struct iova_domain *iovad,
578	unsigned long pfn_lo, unsigned long pfn_hi)
579{
580	struct rb_node *node;
581	unsigned long flags;
582	struct iova *iova;
583	unsigned int overlap = 0;
584
585	/* Don't allow nonsensical pfns */
586	if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad))))
587		return NULL;
588
589	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
590	for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) {
591		if (__is_range_overlap(node, pfn_lo, pfn_hi)) {
592			iova = to_iova(node);
593			__adjust_overlap_range(iova, &pfn_lo, &pfn_hi);
594			if ((pfn_lo >= iova->pfn_lo) &&
595				(pfn_hi <= iova->pfn_hi))
596				goto finish;
597			overlap = 1;
598
599		} else if (overlap)
600				break;
601	}
602
603	/* We are here either because this is the first reserver node
604	 * or need to insert remaining non overlap addr range
605	 */
606	iova = __insert_new_range(iovad, pfn_lo, pfn_hi);
607finish:
608
609	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
610	return iova;
611}
612EXPORT_SYMBOL_GPL(reserve_iova);
613
614/*
615 * Magazine caches for IOVA ranges.  For an introduction to magazines,
616 * see the USENIX 2001 paper "Magazines and Vmem: Extending the Slab
617 * Allocator to Many CPUs and Arbitrary Resources" by Bonwick and Adams.
618 * For simplicity, we use a static magazine size and don't implement the
619 * dynamic size tuning described in the paper.
620 */
621
622/*
623 * As kmalloc's buffer size is fixed to power of 2, 127 is chosen to
624 * assure size of 'iova_magazine' to be 1024 bytes, so that no memory
625 * will be wasted.
 
626 */
627#define IOVA_MAG_SIZE 127
628#define MAX_GLOBAL_MAGS 32	/* magazines per bin */
 
629
630struct iova_magazine {
631	unsigned long size;
 
 
 
632	unsigned long pfns[IOVA_MAG_SIZE];
633};
 
634
635struct iova_cpu_rcache {
636	spinlock_t lock;
637	struct iova_magazine *loaded;
638	struct iova_magazine *prev;
639};
640
641struct iova_rcache {
642	spinlock_t lock;
643	unsigned long depot_size;
644	struct iova_magazine *depot[MAX_GLOBAL_MAGS];
645	struct iova_cpu_rcache __percpu *cpu_rcaches;
 
 
646};
647
 
 
 
 
 
 
 
648static struct iova_magazine *iova_magazine_alloc(gfp_t flags)
649{
650	return kzalloc(sizeof(struct iova_magazine), flags);
 
 
 
 
 
 
651}
652
653static void iova_magazine_free(struct iova_magazine *mag)
654{
655	kfree(mag);
656}
657
658static void
659iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad)
660{
661	unsigned long flags;
662	int i;
663
664	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
665
666	for (i = 0 ; i < mag->size; ++i) {
667		struct iova *iova = private_find_iova(iovad, mag->pfns[i]);
668
669		if (WARN_ON(!iova))
670			continue;
671
672		remove_iova(iovad, iova);
673		free_iova_mem(iova);
674	}
675
676	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
677
678	mag->size = 0;
679}
680
681static bool iova_magazine_full(struct iova_magazine *mag)
682{
683	return mag->size == IOVA_MAG_SIZE;
684}
685
686static bool iova_magazine_empty(struct iova_magazine *mag)
687{
688	return mag->size == 0;
689}
690
691static unsigned long iova_magazine_pop(struct iova_magazine *mag,
692				       unsigned long limit_pfn)
693{
694	int i;
695	unsigned long pfn;
696
697	/* Only fall back to the rbtree if we have no suitable pfns at all */
698	for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--)
699		if (i == 0)
700			return 0;
701
702	/* Swap it to pop it */
703	pfn = mag->pfns[i];
704	mag->pfns[i] = mag->pfns[--mag->size];
705
706	return pfn;
707}
708
709static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
710{
711	mag->pfns[mag->size++] = pfn;
712}
713
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
714int iova_domain_init_rcaches(struct iova_domain *iovad)
715{
716	unsigned int cpu;
717	int i, ret;
718
719	iovad->rcaches = kcalloc(IOVA_RANGE_CACHE_MAX_SIZE,
720				 sizeof(struct iova_rcache),
721				 GFP_KERNEL);
722	if (!iovad->rcaches)
723		return -ENOMEM;
724
725	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
726		struct iova_cpu_rcache *cpu_rcache;
727		struct iova_rcache *rcache;
728
729		rcache = &iovad->rcaches[i];
730		spin_lock_init(&rcache->lock);
731		rcache->depot_size = 0;
 
732		rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache),
733						     cache_line_size());
734		if (!rcache->cpu_rcaches) {
735			ret = -ENOMEM;
736			goto out_err;
737		}
738		for_each_possible_cpu(cpu) {
739			cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
740
741			spin_lock_init(&cpu_rcache->lock);
742			cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL);
743			cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL);
744			if (!cpu_rcache->loaded || !cpu_rcache->prev) {
745				ret = -ENOMEM;
746				goto out_err;
747			}
748		}
749	}
750
751	ret = cpuhp_state_add_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD,
752					       &iovad->cpuhp_dead);
753	if (ret)
754		goto out_err;
755	return 0;
756
757out_err:
758	free_iova_rcaches(iovad);
759	return ret;
760}
761EXPORT_SYMBOL_GPL(iova_domain_init_rcaches);
762
763/*
764 * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and
765 * return true on success.  Can fail if rcache is full and we can't free
766 * space, and free_iova() (our only caller) will then return the IOVA
767 * range to the rbtree instead.
768 */
769static bool __iova_rcache_insert(struct iova_domain *iovad,
770				 struct iova_rcache *rcache,
771				 unsigned long iova_pfn)
772{
773	struct iova_magazine *mag_to_free = NULL;
774	struct iova_cpu_rcache *cpu_rcache;
775	bool can_insert = false;
776	unsigned long flags;
777
778	cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
779	spin_lock_irqsave(&cpu_rcache->lock, flags);
780
781	if (!iova_magazine_full(cpu_rcache->loaded)) {
782		can_insert = true;
783	} else if (!iova_magazine_full(cpu_rcache->prev)) {
784		swap(cpu_rcache->prev, cpu_rcache->loaded);
785		can_insert = true;
786	} else {
787		struct iova_magazine *new_mag = iova_magazine_alloc(GFP_ATOMIC);
788
789		if (new_mag) {
790			spin_lock(&rcache->lock);
791			if (rcache->depot_size < MAX_GLOBAL_MAGS) {
792				rcache->depot[rcache->depot_size++] =
793						cpu_rcache->loaded;
794			} else {
795				mag_to_free = cpu_rcache->loaded;
796			}
797			spin_unlock(&rcache->lock);
 
798
799			cpu_rcache->loaded = new_mag;
800			can_insert = true;
801		}
802	}
803
804	if (can_insert)
805		iova_magazine_push(cpu_rcache->loaded, iova_pfn);
806
807	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
808
809	if (mag_to_free) {
810		iova_magazine_free_pfns(mag_to_free, iovad);
811		iova_magazine_free(mag_to_free);
812	}
813
814	return can_insert;
815}
816
817static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn,
818			       unsigned long size)
819{
820	unsigned int log_size = order_base_2(size);
821
822	if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
823		return false;
824
825	return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn);
826}
827
828/*
829 * Caller wants to allocate a new IOVA range from 'rcache'.  If we can
830 * satisfy the request, return a matching non-NULL range and remove
831 * it from the 'rcache'.
832 */
833static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
834				       unsigned long limit_pfn)
835{
836	struct iova_cpu_rcache *cpu_rcache;
837	unsigned long iova_pfn = 0;
838	bool has_pfn = false;
839	unsigned long flags;
840
841	cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
842	spin_lock_irqsave(&cpu_rcache->lock, flags);
843
844	if (!iova_magazine_empty(cpu_rcache->loaded)) {
845		has_pfn = true;
846	} else if (!iova_magazine_empty(cpu_rcache->prev)) {
847		swap(cpu_rcache->prev, cpu_rcache->loaded);
848		has_pfn = true;
849	} else {
850		spin_lock(&rcache->lock);
851		if (rcache->depot_size > 0) {
852			iova_magazine_free(cpu_rcache->loaded);
853			cpu_rcache->loaded = rcache->depot[--rcache->depot_size];
854			has_pfn = true;
855		}
856		spin_unlock(&rcache->lock);
857	}
858
859	if (has_pfn)
860		iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn);
861
862	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
863
864	return iova_pfn;
865}
866
867/*
868 * Try to satisfy IOVA allocation range from rcache.  Fail if requested
869 * size is too big or the DMA limit we are given isn't satisfied by the
870 * top element in the magazine.
871 */
872static unsigned long iova_rcache_get(struct iova_domain *iovad,
873				     unsigned long size,
874				     unsigned long limit_pfn)
875{
876	unsigned int log_size = order_base_2(size);
877
878	if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
879		return 0;
880
881	return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size);
882}
883
884/*
885 * free rcache data structures.
886 */
887static void free_iova_rcaches(struct iova_domain *iovad)
888{
889	struct iova_rcache *rcache;
890	struct iova_cpu_rcache *cpu_rcache;
891	unsigned int cpu;
892	int i, j;
893
894	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
895		rcache = &iovad->rcaches[i];
896		if (!rcache->cpu_rcaches)
897			break;
898		for_each_possible_cpu(cpu) {
899			cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
900			iova_magazine_free(cpu_rcache->loaded);
901			iova_magazine_free(cpu_rcache->prev);
902		}
903		free_percpu(rcache->cpu_rcaches);
904		for (j = 0; j < rcache->depot_size; ++j)
905			iova_magazine_free(rcache->depot[j]);
 
906	}
907
908	kfree(iovad->rcaches);
909	iovad->rcaches = NULL;
910}
911
912/*
913 * free all the IOVA ranges cached by a cpu (used when cpu is unplugged)
914 */
915static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad)
916{
917	struct iova_cpu_rcache *cpu_rcache;
918	struct iova_rcache *rcache;
919	unsigned long flags;
920	int i;
921
922	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
923		rcache = &iovad->rcaches[i];
924		cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
925		spin_lock_irqsave(&cpu_rcache->lock, flags);
926		iova_magazine_free_pfns(cpu_rcache->loaded, iovad);
927		iova_magazine_free_pfns(cpu_rcache->prev, iovad);
928		spin_unlock_irqrestore(&cpu_rcache->lock, flags);
929	}
930}
931
932/*
933 * free all the IOVA ranges of global cache
934 */
935static void free_global_cached_iovas(struct iova_domain *iovad)
936{
937	struct iova_rcache *rcache;
938	unsigned long flags;
939	int i, j;
940
941	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
942		rcache = &iovad->rcaches[i];
943		spin_lock_irqsave(&rcache->lock, flags);
944		for (j = 0; j < rcache->depot_size; ++j) {
945			iova_magazine_free_pfns(rcache->depot[j], iovad);
946			iova_magazine_free(rcache->depot[j]);
 
 
947		}
948		rcache->depot_size = 0;
949		spin_unlock_irqrestore(&rcache->lock, flags);
950	}
951}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
952MODULE_AUTHOR("Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>");
 
953MODULE_LICENSE("GPL");