Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.15.
   1// SPDX-License-Identifier: GPL-2.0+
   2/*
   3 * Maple Tree implementation
   4 * Copyright (c) 2018-2022 Oracle Corporation
   5 * Authors: Liam R. Howlett <Liam.Howlett@oracle.com>
   6 *	    Matthew Wilcox <willy@infradead.org>
   7 */
   8
   9/*
  10 * DOC: Interesting implementation details of the Maple Tree
  11 *
  12 * Each node type has a number of slots for entries and a number of slots for
  13 * pivots.  In the case of dense nodes, the pivots are implied by the position
  14 * and are simply the slot index + the minimum of the node.
  15 *
  16 * In regular B-Tree terms, pivots are called keys.  The term pivot is used to
  17 * indicate that the tree is specifying ranges,  Pivots may appear in the
  18 * subtree with an entry attached to the value where as keys are unique to a
  19 * specific position of a B-tree.  Pivot values are inclusive of the slot with
  20 * the same index.
  21 *
  22 *
  23 * The following illustrates the layout of a range64 nodes slots and pivots.
  24 *
  25 *
  26 *  Slots -> | 0 | 1 | 2 | ... | 12 | 13 | 14 | 15 |
  27 *           ┬   ┬   ┬   ┬     ┬    ┬    ┬    ┬    ┬
  28 *           │   │   │   │     │    │    │    │    └─ Implied maximum
  29 *           │   │   │   │     │    │    │    └─ Pivot 14
  30 *           │   │   │   │     │    │    └─ Pivot 13
  31 *           │   │   │   │     │    └─ Pivot 12
  32 *           │   │   │   │     └─ Pivot 11
  33 *           │   │   │   └─ Pivot 2
  34 *           │   │   └─ Pivot 1
  35 *           │   └─ Pivot 0
  36 *           └─  Implied minimum
  37 *
  38 * Slot contents:
  39 *  Internal (non-leaf) nodes contain pointers to other nodes.
  40 *  Leaf nodes contain entries.
  41 *
  42 * The location of interest is often referred to as an offset.  All offsets have
  43 * a slot, but the last offset has an implied pivot from the node above (or
  44 * UINT_MAX for the root node.
  45 *
  46 * Ranges complicate certain write activities.  When modifying any of
  47 * the B-tree variants, it is known that one entry will either be added or
  48 * deleted.  When modifying the Maple Tree, one store operation may overwrite
  49 * the entire data set, or one half of the tree, or the middle half of the tree.
  50 *
  51 */
  52
  53
  54#include <linux/maple_tree.h>
  55#include <linux/xarray.h>
  56#include <linux/types.h>
  57#include <linux/export.h>
  58#include <linux/slab.h>
  59#include <linux/limits.h>
  60#include <asm/barrier.h>
  61
  62#define CREATE_TRACE_POINTS
  63#include <trace/events/maple_tree.h>
  64
  65#define MA_ROOT_PARENT 1
  66
  67/*
  68 * Maple state flags
  69 * * MA_STATE_BULK		- Bulk insert mode
  70 * * MA_STATE_REBALANCE		- Indicate a rebalance during bulk insert
  71 * * MA_STATE_PREALLOC		- Preallocated nodes, WARN_ON allocation
  72 */
  73#define MA_STATE_BULK		1
  74#define MA_STATE_REBALANCE	2
  75#define MA_STATE_PREALLOC	4
  76
  77#define ma_parent_ptr(x) ((struct maple_pnode *)(x))
  78#define ma_mnode_ptr(x) ((struct maple_node *)(x))
  79#define ma_enode_ptr(x) ((struct maple_enode *)(x))
  80static struct kmem_cache *maple_node_cache;
  81
  82#ifdef CONFIG_DEBUG_MAPLE_TREE
  83static const unsigned long mt_max[] = {
  84	[maple_dense]		= MAPLE_NODE_SLOTS,
  85	[maple_leaf_64]		= ULONG_MAX,
  86	[maple_range_64]	= ULONG_MAX,
  87	[maple_arange_64]	= ULONG_MAX,
  88};
  89#define mt_node_max(x) mt_max[mte_node_type(x)]
  90#endif
  91
  92static const unsigned char mt_slots[] = {
  93	[maple_dense]		= MAPLE_NODE_SLOTS,
  94	[maple_leaf_64]		= MAPLE_RANGE64_SLOTS,
  95	[maple_range_64]	= MAPLE_RANGE64_SLOTS,
  96	[maple_arange_64]	= MAPLE_ARANGE64_SLOTS,
  97};
  98#define mt_slot_count(x) mt_slots[mte_node_type(x)]
  99
 100static const unsigned char mt_pivots[] = {
 101	[maple_dense]		= 0,
 102	[maple_leaf_64]		= MAPLE_RANGE64_SLOTS - 1,
 103	[maple_range_64]	= MAPLE_RANGE64_SLOTS - 1,
 104	[maple_arange_64]	= MAPLE_ARANGE64_SLOTS - 1,
 105};
 106#define mt_pivot_count(x) mt_pivots[mte_node_type(x)]
 107
 108static const unsigned char mt_min_slots[] = {
 109	[maple_dense]		= MAPLE_NODE_SLOTS / 2,
 110	[maple_leaf_64]		= (MAPLE_RANGE64_SLOTS / 2) - 2,
 111	[maple_range_64]	= (MAPLE_RANGE64_SLOTS / 2) - 2,
 112	[maple_arange_64]	= (MAPLE_ARANGE64_SLOTS / 2) - 1,
 113};
 114#define mt_min_slot_count(x) mt_min_slots[mte_node_type(x)]
 115
 116#define MAPLE_BIG_NODE_SLOTS	(MAPLE_RANGE64_SLOTS * 2 + 2)
 117#define MAPLE_BIG_NODE_GAPS	(MAPLE_ARANGE64_SLOTS * 2 + 1)
 118
 119struct maple_big_node {
 120	struct maple_pnode *parent;
 121	unsigned long pivot[MAPLE_BIG_NODE_SLOTS - 1];
 122	union {
 123		struct maple_enode *slot[MAPLE_BIG_NODE_SLOTS];
 124		struct {
 125			unsigned long padding[MAPLE_BIG_NODE_GAPS];
 126			unsigned long gap[MAPLE_BIG_NODE_GAPS];
 127		};
 128	};
 129	unsigned char b_end;
 130	enum maple_type type;
 131};
 132
 133/*
 134 * The maple_subtree_state is used to build a tree to replace a segment of an
 135 * existing tree in a more atomic way.  Any walkers of the older tree will hit a
 136 * dead node and restart on updates.
 137 */
 138struct maple_subtree_state {
 139	struct ma_state *orig_l;	/* Original left side of subtree */
 140	struct ma_state *orig_r;	/* Original right side of subtree */
 141	struct ma_state *l;		/* New left side of subtree */
 142	struct ma_state *m;		/* New middle of subtree (rare) */
 143	struct ma_state *r;		/* New right side of subtree */
 144	struct ma_topiary *free;	/* nodes to be freed */
 145	struct ma_topiary *destroy;	/* Nodes to be destroyed (walked and freed) */
 146	struct maple_big_node *bn;
 147};
 148
 149/* Functions */
 150static inline struct maple_node *mt_alloc_one(gfp_t gfp)
 151{
 152	return kmem_cache_alloc(maple_node_cache, gfp | __GFP_ZERO);
 153}
 154
 155static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes)
 156{
 157	return kmem_cache_alloc_bulk(maple_node_cache, gfp | __GFP_ZERO, size,
 158				     nodes);
 159}
 160
 161static inline void mt_free_bulk(size_t size, void __rcu **nodes)
 162{
 163	kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes);
 164}
 165
 166static void mt_free_rcu(struct rcu_head *head)
 167{
 168	struct maple_node *node = container_of(head, struct maple_node, rcu);
 169
 170	kmem_cache_free(maple_node_cache, node);
 171}
 172
 173/*
 174 * ma_free_rcu() - Use rcu callback to free a maple node
 175 * @node: The node to free
 176 *
 177 * The maple tree uses the parent pointer to indicate this node is no longer in
 178 * use and will be freed.
 179 */
 180static void ma_free_rcu(struct maple_node *node)
 181{
 182	node->parent = ma_parent_ptr(node);
 183	call_rcu(&node->rcu, mt_free_rcu);
 184}
 185
 186
 187static void mas_set_height(struct ma_state *mas)
 188{
 189	unsigned int new_flags = mas->tree->ma_flags;
 190
 191	new_flags &= ~MT_FLAGS_HEIGHT_MASK;
 192	BUG_ON(mas->depth > MAPLE_HEIGHT_MAX);
 193	new_flags |= mas->depth << MT_FLAGS_HEIGHT_OFFSET;
 194	mas->tree->ma_flags = new_flags;
 195}
 196
 197static unsigned int mas_mt_height(struct ma_state *mas)
 198{
 199	return mt_height(mas->tree);
 200}
 201
 202static inline enum maple_type mte_node_type(const struct maple_enode *entry)
 203{
 204	return ((unsigned long)entry >> MAPLE_NODE_TYPE_SHIFT) &
 205		MAPLE_NODE_TYPE_MASK;
 206}
 207
 208static inline bool ma_is_dense(const enum maple_type type)
 209{
 210	return type < maple_leaf_64;
 211}
 212
 213static inline bool ma_is_leaf(const enum maple_type type)
 214{
 215	return type < maple_range_64;
 216}
 217
 218static inline bool mte_is_leaf(const struct maple_enode *entry)
 219{
 220	return ma_is_leaf(mte_node_type(entry));
 221}
 222
 223/*
 224 * We also reserve values with the bottom two bits set to '10' which are
 225 * below 4096
 226 */
 227static inline bool mt_is_reserved(const void *entry)
 228{
 229	return ((unsigned long)entry < MAPLE_RESERVED_RANGE) &&
 230		xa_is_internal(entry);
 231}
 232
 233static inline void mas_set_err(struct ma_state *mas, long err)
 234{
 235	mas->node = MA_ERROR(err);
 236}
 237
 238static inline bool mas_is_ptr(struct ma_state *mas)
 239{
 240	return mas->node == MAS_ROOT;
 241}
 242
 243static inline bool mas_is_start(struct ma_state *mas)
 244{
 245	return mas->node == MAS_START;
 246}
 247
 248bool mas_is_err(struct ma_state *mas)
 249{
 250	return xa_is_err(mas->node);
 251}
 252
 253static inline bool mas_searchable(struct ma_state *mas)
 254{
 255	if (mas_is_none(mas))
 256		return false;
 257
 258	if (mas_is_ptr(mas))
 259		return false;
 260
 261	return true;
 262}
 263
 264static inline struct maple_node *mte_to_node(const struct maple_enode *entry)
 265{
 266	return (struct maple_node *)((unsigned long)entry & ~MAPLE_NODE_MASK);
 267}
 268
 269/*
 270 * mte_to_mat() - Convert a maple encoded node to a maple topiary node.
 271 * @entry: The maple encoded node
 272 *
 273 * Return: a maple topiary pointer
 274 */
 275static inline struct maple_topiary *mte_to_mat(const struct maple_enode *entry)
 276{
 277	return (struct maple_topiary *)
 278		((unsigned long)entry & ~MAPLE_NODE_MASK);
 279}
 280
 281/*
 282 * mas_mn() - Get the maple state node.
 283 * @mas: The maple state
 284 *
 285 * Return: the maple node (not encoded - bare pointer).
 286 */
 287static inline struct maple_node *mas_mn(const struct ma_state *mas)
 288{
 289	return mte_to_node(mas->node);
 290}
 291
 292/*
 293 * mte_set_node_dead() - Set a maple encoded node as dead.
 294 * @mn: The maple encoded node.
 295 */
 296static inline void mte_set_node_dead(struct maple_enode *mn)
 297{
 298	mte_to_node(mn)->parent = ma_parent_ptr(mte_to_node(mn));
 299	smp_wmb(); /* Needed for RCU */
 300}
 301
 302/* Bit 1 indicates the root is a node */
 303#define MAPLE_ROOT_NODE			0x02
 304/* maple_type stored bit 3-6 */
 305#define MAPLE_ENODE_TYPE_SHIFT		0x03
 306/* Bit 2 means a NULL somewhere below */
 307#define MAPLE_ENODE_NULL		0x04
 308
 309static inline struct maple_enode *mt_mk_node(const struct maple_node *node,
 310					     enum maple_type type)
 311{
 312	return (void *)((unsigned long)node |
 313			(type << MAPLE_ENODE_TYPE_SHIFT) | MAPLE_ENODE_NULL);
 314}
 315
 316static inline void *mte_mk_root(const struct maple_enode *node)
 317{
 318	return (void *)((unsigned long)node | MAPLE_ROOT_NODE);
 319}
 320
 321static inline void *mte_safe_root(const struct maple_enode *node)
 322{
 323	return (void *)((unsigned long)node & ~MAPLE_ROOT_NODE);
 324}
 325
 326static inline void *mte_set_full(const struct maple_enode *node)
 327{
 328	return (void *)((unsigned long)node & ~MAPLE_ENODE_NULL);
 329}
 330
 331static inline void *mte_clear_full(const struct maple_enode *node)
 332{
 333	return (void *)((unsigned long)node | MAPLE_ENODE_NULL);
 334}
 335
 336static inline bool mte_has_null(const struct maple_enode *node)
 337{
 338	return (unsigned long)node & MAPLE_ENODE_NULL;
 339}
 340
 341static inline bool ma_is_root(struct maple_node *node)
 342{
 343	return ((unsigned long)node->parent & MA_ROOT_PARENT);
 344}
 345
 346static inline bool mte_is_root(const struct maple_enode *node)
 347{
 348	return ma_is_root(mte_to_node(node));
 349}
 350
 351static inline bool mas_is_root_limits(const struct ma_state *mas)
 352{
 353	return !mas->min && mas->max == ULONG_MAX;
 354}
 355
 356static inline bool mt_is_alloc(struct maple_tree *mt)
 357{
 358	return (mt->ma_flags & MT_FLAGS_ALLOC_RANGE);
 359}
 360
 361/*
 362 * The Parent Pointer
 363 * Excluding root, the parent pointer is 256B aligned like all other tree nodes.
 364 * When storing a 32 or 64 bit values, the offset can fit into 5 bits.  The 16
 365 * bit values need an extra bit to store the offset.  This extra bit comes from
 366 * a reuse of the last bit in the node type.  This is possible by using bit 1 to
 367 * indicate if bit 2 is part of the type or the slot.
 368 *
 369 * Note types:
 370 *  0x??1 = Root
 371 *  0x?00 = 16 bit nodes
 372 *  0x010 = 32 bit nodes
 373 *  0x110 = 64 bit nodes
 374 *
 375 * Slot size and alignment
 376 *  0b??1 : Root
 377 *  0b?00 : 16 bit values, type in 0-1, slot in 2-7
 378 *  0b010 : 32 bit values, type in 0-2, slot in 3-7
 379 *  0b110 : 64 bit values, type in 0-2, slot in 3-7
 380 */
 381
 382#define MAPLE_PARENT_ROOT		0x01
 383
 384#define MAPLE_PARENT_SLOT_SHIFT		0x03
 385#define MAPLE_PARENT_SLOT_MASK		0xF8
 386
 387#define MAPLE_PARENT_16B_SLOT_SHIFT	0x02
 388#define MAPLE_PARENT_16B_SLOT_MASK	0xFC
 389
 390#define MAPLE_PARENT_RANGE64		0x06
 391#define MAPLE_PARENT_RANGE32		0x04
 392#define MAPLE_PARENT_NOT_RANGE16	0x02
 393
 394/*
 395 * mte_parent_shift() - Get the parent shift for the slot storage.
 396 * @parent: The parent pointer cast as an unsigned long
 397 * Return: The shift into that pointer to the star to of the slot
 398 */
 399static inline unsigned long mte_parent_shift(unsigned long parent)
 400{
 401	/* Note bit 1 == 0 means 16B */
 402	if (likely(parent & MAPLE_PARENT_NOT_RANGE16))
 403		return MAPLE_PARENT_SLOT_SHIFT;
 404
 405	return MAPLE_PARENT_16B_SLOT_SHIFT;
 406}
 407
 408/*
 409 * mte_parent_slot_mask() - Get the slot mask for the parent.
 410 * @parent: The parent pointer cast as an unsigned long.
 411 * Return: The slot mask for that parent.
 412 */
 413static inline unsigned long mte_parent_slot_mask(unsigned long parent)
 414{
 415	/* Note bit 1 == 0 means 16B */
 416	if (likely(parent & MAPLE_PARENT_NOT_RANGE16))
 417		return MAPLE_PARENT_SLOT_MASK;
 418
 419	return MAPLE_PARENT_16B_SLOT_MASK;
 420}
 421
 422/*
 423 * mas_parent_enum() - Return the maple_type of the parent from the stored
 424 * parent type.
 425 * @mas: The maple state
 426 * @node: The maple_enode to extract the parent's enum
 427 * Return: The node->parent maple_type
 428 */
 429static inline
 430enum maple_type mte_parent_enum(struct maple_enode *p_enode,
 431				struct maple_tree *mt)
 432{
 433	unsigned long p_type;
 434
 435	p_type = (unsigned long)p_enode;
 436	if (p_type & MAPLE_PARENT_ROOT)
 437		return 0; /* Validated in the caller. */
 438
 439	p_type &= MAPLE_NODE_MASK;
 440	p_type = p_type & ~(MAPLE_PARENT_ROOT | mte_parent_slot_mask(p_type));
 441
 442	switch (p_type) {
 443	case MAPLE_PARENT_RANGE64: /* or MAPLE_PARENT_ARANGE64 */
 444		if (mt_is_alloc(mt))
 445			return maple_arange_64;
 446		return maple_range_64;
 447	}
 448
 449	return 0;
 450}
 451
 452static inline
 453enum maple_type mas_parent_enum(struct ma_state *mas, struct maple_enode *enode)
 454{
 455	return mte_parent_enum(ma_enode_ptr(mte_to_node(enode)->parent), mas->tree);
 456}
 457
 458/*
 459 * mte_set_parent() - Set the parent node and encode the slot
 460 * @enode: The encoded maple node.
 461 * @parent: The encoded maple node that is the parent of @enode.
 462 * @slot: The slot that @enode resides in @parent.
 463 *
 464 * Slot number is encoded in the enode->parent bit 3-6 or 2-6, depending on the
 465 * parent type.
 466 */
 467static inline
 468void mte_set_parent(struct maple_enode *enode, const struct maple_enode *parent,
 469		    unsigned char slot)
 470{
 471	unsigned long val = (unsigned long) parent;
 472	unsigned long shift;
 473	unsigned long type;
 474	enum maple_type p_type = mte_node_type(parent);
 475
 476	BUG_ON(p_type == maple_dense);
 477	BUG_ON(p_type == maple_leaf_64);
 478
 479	switch (p_type) {
 480	case maple_range_64:
 481	case maple_arange_64:
 482		shift = MAPLE_PARENT_SLOT_SHIFT;
 483		type = MAPLE_PARENT_RANGE64;
 484		break;
 485	default:
 486	case maple_dense:
 487	case maple_leaf_64:
 488		shift = type = 0;
 489		break;
 490	}
 491
 492	val &= ~MAPLE_NODE_MASK; /* Clear all node metadata in parent */
 493	val |= (slot << shift) | type;
 494	mte_to_node(enode)->parent = ma_parent_ptr(val);
 495}
 496
 497/*
 498 * mte_parent_slot() - get the parent slot of @enode.
 499 * @enode: The encoded maple node.
 500 *
 501 * Return: The slot in the parent node where @enode resides.
 502 */
 503static inline unsigned int mte_parent_slot(const struct maple_enode *enode)
 504{
 505	unsigned long val = (unsigned long) mte_to_node(enode)->parent;
 506
 507	/* Root. */
 508	if (val & 1)
 509		return 0;
 510
 511	/*
 512	 * Okay to use MAPLE_PARENT_16B_SLOT_MASK as the last bit will be lost
 513	 * by shift if the parent shift is MAPLE_PARENT_SLOT_SHIFT
 514	 */
 515	return (val & MAPLE_PARENT_16B_SLOT_MASK) >> mte_parent_shift(val);
 516}
 517
 518/*
 519 * mte_parent() - Get the parent of @node.
 520 * @node: The encoded maple node.
 521 *
 522 * Return: The parent maple node.
 523 */
 524static inline struct maple_node *mte_parent(const struct maple_enode *enode)
 525{
 526	return (void *)((unsigned long)
 527			(mte_to_node(enode)->parent) & ~MAPLE_NODE_MASK);
 528}
 529
 530/*
 531 * ma_dead_node() - check if the @enode is dead.
 532 * @enode: The encoded maple node
 533 *
 534 * Return: true if dead, false otherwise.
 535 */
 536static inline bool ma_dead_node(const struct maple_node *node)
 537{
 538	struct maple_node *parent = (void *)((unsigned long)
 539					     node->parent & ~MAPLE_NODE_MASK);
 540
 541	return (parent == node);
 542}
 543/*
 544 * mte_dead_node() - check if the @enode is dead.
 545 * @enode: The encoded maple node
 546 *
 547 * Return: true if dead, false otherwise.
 548 */
 549static inline bool mte_dead_node(const struct maple_enode *enode)
 550{
 551	struct maple_node *parent, *node;
 552
 553	node = mte_to_node(enode);
 554	parent = mte_parent(enode);
 555	return (parent == node);
 556}
 557
 558/*
 559 * mas_allocated() - Get the number of nodes allocated in a maple state.
 560 * @mas: The maple state
 561 *
 562 * The ma_state alloc member is overloaded to hold a pointer to the first
 563 * allocated node or to the number of requested nodes to allocate.  If bit 0 is
 564 * set, then the alloc contains the number of requested nodes.  If there is an
 565 * allocated node, then the total allocated nodes is in that node.
 566 *
 567 * Return: The total number of nodes allocated
 568 */
 569static inline unsigned long mas_allocated(const struct ma_state *mas)
 570{
 571	if (!mas->alloc || ((unsigned long)mas->alloc & 0x1))
 572		return 0;
 573
 574	return mas->alloc->total;
 575}
 576
 577/*
 578 * mas_set_alloc_req() - Set the requested number of allocations.
 579 * @mas: the maple state
 580 * @count: the number of allocations.
 581 *
 582 * The requested number of allocations is either in the first allocated node,
 583 * located in @mas->alloc->request_count, or directly in @mas->alloc if there is
 584 * no allocated node.  Set the request either in the node or do the necessary
 585 * encoding to store in @mas->alloc directly.
 586 */
 587static inline void mas_set_alloc_req(struct ma_state *mas, unsigned long count)
 588{
 589	if (!mas->alloc || ((unsigned long)mas->alloc & 0x1)) {
 590		if (!count)
 591			mas->alloc = NULL;
 592		else
 593			mas->alloc = (struct maple_alloc *)(((count) << 1U) | 1U);
 594		return;
 595	}
 596
 597	mas->alloc->request_count = count;
 598}
 599
 600/*
 601 * mas_alloc_req() - get the requested number of allocations.
 602 * @mas: The maple state
 603 *
 604 * The alloc count is either stored directly in @mas, or in
 605 * @mas->alloc->request_count if there is at least one node allocated.  Decode
 606 * the request count if it's stored directly in @mas->alloc.
 607 *
 608 * Return: The allocation request count.
 609 */
 610static inline unsigned int mas_alloc_req(const struct ma_state *mas)
 611{
 612	if ((unsigned long)mas->alloc & 0x1)
 613		return (unsigned long)(mas->alloc) >> 1;
 614	else if (mas->alloc)
 615		return mas->alloc->request_count;
 616	return 0;
 617}
 618
 619/*
 620 * ma_pivots() - Get a pointer to the maple node pivots.
 621 * @node - the maple node
 622 * @type - the node type
 623 *
 624 * Return: A pointer to the maple node pivots
 625 */
 626static inline unsigned long *ma_pivots(struct maple_node *node,
 627					   enum maple_type type)
 628{
 629	switch (type) {
 630	case maple_arange_64:
 631		return node->ma64.pivot;
 632	case maple_range_64:
 633	case maple_leaf_64:
 634		return node->mr64.pivot;
 635	case maple_dense:
 636		return NULL;
 637	}
 638	return NULL;
 639}
 640
 641/*
 642 * ma_gaps() - Get a pointer to the maple node gaps.
 643 * @node - the maple node
 644 * @type - the node type
 645 *
 646 * Return: A pointer to the maple node gaps
 647 */
 648static inline unsigned long *ma_gaps(struct maple_node *node,
 649				     enum maple_type type)
 650{
 651	switch (type) {
 652	case maple_arange_64:
 653		return node->ma64.gap;
 654	case maple_range_64:
 655	case maple_leaf_64:
 656	case maple_dense:
 657		return NULL;
 658	}
 659	return NULL;
 660}
 661
 662/*
 663 * mte_pivot() - Get the pivot at @piv of the maple encoded node.
 664 * @mn: The maple encoded node.
 665 * @piv: The pivot.
 666 *
 667 * Return: the pivot at @piv of @mn.
 668 */
 669static inline unsigned long mte_pivot(const struct maple_enode *mn,
 670				 unsigned char piv)
 671{
 672	struct maple_node *node = mte_to_node(mn);
 673	enum maple_type type = mte_node_type(mn);
 674
 675	if (piv >= mt_pivots[type]) {
 676		WARN_ON(1);
 677		return 0;
 678	}
 679	switch (type) {
 680	case maple_arange_64:
 681		return node->ma64.pivot[piv];
 682	case maple_range_64:
 683	case maple_leaf_64:
 684		return node->mr64.pivot[piv];
 685	case maple_dense:
 686		return 0;
 687	}
 688	return 0;
 689}
 690
 691/*
 692 * mas_safe_pivot() - get the pivot at @piv or mas->max.
 693 * @mas: The maple state
 694 * @pivots: The pointer to the maple node pivots
 695 * @piv: The pivot to fetch
 696 * @type: The maple node type
 697 *
 698 * Return: The pivot at @piv within the limit of the @pivots array, @mas->max
 699 * otherwise.
 700 */
 701static inline unsigned long
 702mas_safe_pivot(const struct ma_state *mas, unsigned long *pivots,
 703	       unsigned char piv, enum maple_type type)
 704{
 705	if (piv >= mt_pivots[type])
 706		return mas->max;
 707
 708	return pivots[piv];
 709}
 710
 711/*
 712 * mas_safe_min() - Return the minimum for a given offset.
 713 * @mas: The maple state
 714 * @pivots: The pointer to the maple node pivots
 715 * @offset: The offset into the pivot array
 716 *
 717 * Return: The minimum range value that is contained in @offset.
 718 */
 719static inline unsigned long
 720mas_safe_min(struct ma_state *mas, unsigned long *pivots, unsigned char offset)
 721{
 722	if (likely(offset))
 723		return pivots[offset - 1] + 1;
 724
 725	return mas->min;
 726}
 727
 728/*
 729 * mas_logical_pivot() - Get the logical pivot of a given offset.
 730 * @mas: The maple state
 731 * @pivots: The pointer to the maple node pivots
 732 * @offset: The offset into the pivot array
 733 * @type: The maple node type
 734 *
 735 * When there is no value at a pivot (beyond the end of the data), then the
 736 * pivot is actually @mas->max.
 737 *
 738 * Return: the logical pivot of a given @offset.
 739 */
 740static inline unsigned long
 741mas_logical_pivot(struct ma_state *mas, unsigned long *pivots,
 742		  unsigned char offset, enum maple_type type)
 743{
 744	unsigned long lpiv = mas_safe_pivot(mas, pivots, offset, type);
 745
 746	if (likely(lpiv))
 747		return lpiv;
 748
 749	if (likely(offset))
 750		return mas->max;
 751
 752	return lpiv;
 753}
 754
 755/*
 756 * mte_set_pivot() - Set a pivot to a value in an encoded maple node.
 757 * @mn: The encoded maple node
 758 * @piv: The pivot offset
 759 * @val: The value of the pivot
 760 */
 761static inline void mte_set_pivot(struct maple_enode *mn, unsigned char piv,
 762				unsigned long val)
 763{
 764	struct maple_node *node = mte_to_node(mn);
 765	enum maple_type type = mte_node_type(mn);
 766
 767	BUG_ON(piv >= mt_pivots[type]);
 768	switch (type) {
 769	default:
 770	case maple_range_64:
 771	case maple_leaf_64:
 772		node->mr64.pivot[piv] = val;
 773		break;
 774	case maple_arange_64:
 775		node->ma64.pivot[piv] = val;
 776		break;
 777	case maple_dense:
 778		break;
 779	}
 780
 781}
 782
 783/*
 784 * ma_slots() - Get a pointer to the maple node slots.
 785 * @mn: The maple node
 786 * @mt: The maple node type
 787 *
 788 * Return: A pointer to the maple node slots
 789 */
 790static inline void __rcu **ma_slots(struct maple_node *mn, enum maple_type mt)
 791{
 792	switch (mt) {
 793	default:
 794	case maple_arange_64:
 795		return mn->ma64.slot;
 796	case maple_range_64:
 797	case maple_leaf_64:
 798		return mn->mr64.slot;
 799	case maple_dense:
 800		return mn->slot;
 801	}
 802}
 803
 804static inline bool mt_locked(const struct maple_tree *mt)
 805{
 806	return mt_external_lock(mt) ? mt_lock_is_held(mt) :
 807		lockdep_is_held(&mt->ma_lock);
 808}
 809
 810static inline void *mt_slot(const struct maple_tree *mt,
 811		void __rcu **slots, unsigned char offset)
 812{
 813	return rcu_dereference_check(slots[offset], mt_locked(mt));
 814}
 815
 816/*
 817 * mas_slot_locked() - Get the slot value when holding the maple tree lock.
 818 * @mas: The maple state
 819 * @slots: The pointer to the slots
 820 * @offset: The offset into the slots array to fetch
 821 *
 822 * Return: The entry stored in @slots at the @offset.
 823 */
 824static inline void *mas_slot_locked(struct ma_state *mas, void __rcu **slots,
 825				       unsigned char offset)
 826{
 827	return rcu_dereference_protected(slots[offset], mt_locked(mas->tree));
 828}
 829
 830/*
 831 * mas_slot() - Get the slot value when not holding the maple tree lock.
 832 * @mas: The maple state
 833 * @slots: The pointer to the slots
 834 * @offset: The offset into the slots array to fetch
 835 *
 836 * Return: The entry stored in @slots at the @offset
 837 */
 838static inline void *mas_slot(struct ma_state *mas, void __rcu **slots,
 839			     unsigned char offset)
 840{
 841	return mt_slot(mas->tree, slots, offset);
 842}
 843
 844/*
 845 * mas_root() - Get the maple tree root.
 846 * @mas: The maple state.
 847 *
 848 * Return: The pointer to the root of the tree
 849 */
 850static inline void *mas_root(struct ma_state *mas)
 851{
 852	return rcu_dereference_check(mas->tree->ma_root, mt_locked(mas->tree));
 853}
 854
 855static inline void *mt_root_locked(struct maple_tree *mt)
 856{
 857	return rcu_dereference_protected(mt->ma_root, mt_locked(mt));
 858}
 859
 860/*
 861 * mas_root_locked() - Get the maple tree root when holding the maple tree lock.
 862 * @mas: The maple state.
 863 *
 864 * Return: The pointer to the root of the tree
 865 */
 866static inline void *mas_root_locked(struct ma_state *mas)
 867{
 868	return mt_root_locked(mas->tree);
 869}
 870
 871static inline struct maple_metadata *ma_meta(struct maple_node *mn,
 872					     enum maple_type mt)
 873{
 874	switch (mt) {
 875	case maple_arange_64:
 876		return &mn->ma64.meta;
 877	default:
 878		return &mn->mr64.meta;
 879	}
 880}
 881
 882/*
 883 * ma_set_meta() - Set the metadata information of a node.
 884 * @mn: The maple node
 885 * @mt: The maple node type
 886 * @offset: The offset of the highest sub-gap in this node.
 887 * @end: The end of the data in this node.
 888 */
 889static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt,
 890			       unsigned char offset, unsigned char end)
 891{
 892	struct maple_metadata *meta = ma_meta(mn, mt);
 893
 894	meta->gap = offset;
 895	meta->end = end;
 896}
 897
 898/*
 899 * ma_meta_end() - Get the data end of a node from the metadata
 900 * @mn: The maple node
 901 * @mt: The maple node type
 902 */
 903static inline unsigned char ma_meta_end(struct maple_node *mn,
 904					enum maple_type mt)
 905{
 906	struct maple_metadata *meta = ma_meta(mn, mt);
 907
 908	return meta->end;
 909}
 910
 911/*
 912 * ma_meta_gap() - Get the largest gap location of a node from the metadata
 913 * @mn: The maple node
 914 * @mt: The maple node type
 915 */
 916static inline unsigned char ma_meta_gap(struct maple_node *mn,
 917					enum maple_type mt)
 918{
 919	BUG_ON(mt != maple_arange_64);
 920
 921	return mn->ma64.meta.gap;
 922}
 923
 924/*
 925 * ma_set_meta_gap() - Set the largest gap location in a nodes metadata
 926 * @mn: The maple node
 927 * @mn: The maple node type
 928 * @offset: The location of the largest gap.
 929 */
 930static inline void ma_set_meta_gap(struct maple_node *mn, enum maple_type mt,
 931				   unsigned char offset)
 932{
 933
 934	struct maple_metadata *meta = ma_meta(mn, mt);
 935
 936	meta->gap = offset;
 937}
 938
 939/*
 940 * mat_add() - Add a @dead_enode to the ma_topiary of a list of dead nodes.
 941 * @mat - the ma_topiary, a linked list of dead nodes.
 942 * @dead_enode - the node to be marked as dead and added to the tail of the list
 943 *
 944 * Add the @dead_enode to the linked list in @mat.
 945 */
 946static inline void mat_add(struct ma_topiary *mat,
 947			   struct maple_enode *dead_enode)
 948{
 949	mte_set_node_dead(dead_enode);
 950	mte_to_mat(dead_enode)->next = NULL;
 951	if (!mat->tail) {
 952		mat->tail = mat->head = dead_enode;
 953		return;
 954	}
 955
 956	mte_to_mat(mat->tail)->next = dead_enode;
 957	mat->tail = dead_enode;
 958}
 959
 960static void mte_destroy_walk(struct maple_enode *, struct maple_tree *);
 961static inline void mas_free(struct ma_state *mas, struct maple_enode *used);
 962
 963/*
 964 * mas_mat_free() - Free all nodes in a dead list.
 965 * @mas - the maple state
 966 * @mat - the ma_topiary linked list of dead nodes to free.
 967 *
 968 * Free walk a dead list.
 969 */
 970static void mas_mat_free(struct ma_state *mas, struct ma_topiary *mat)
 971{
 972	struct maple_enode *next;
 973
 974	while (mat->head) {
 975		next = mte_to_mat(mat->head)->next;
 976		mas_free(mas, mat->head);
 977		mat->head = next;
 978	}
 979}
 980
 981/*
 982 * mas_mat_destroy() - Free all nodes and subtrees in a dead list.
 983 * @mas - the maple state
 984 * @mat - the ma_topiary linked list of dead nodes to free.
 985 *
 986 * Destroy walk a dead list.
 987 */
 988static void mas_mat_destroy(struct ma_state *mas, struct ma_topiary *mat)
 989{
 990	struct maple_enode *next;
 991
 992	while (mat->head) {
 993		next = mte_to_mat(mat->head)->next;
 994		mte_destroy_walk(mat->head, mat->mtree);
 995		mat->head = next;
 996	}
 997}
 998/*
 999 * mas_descend() - Descend into the slot stored in the ma_state.
1000 * @mas - the maple state.
1001 *
1002 * Note: Not RCU safe, only use in write side or debug code.
1003 */
1004static inline void mas_descend(struct ma_state *mas)
1005{
1006	enum maple_type type;
1007	unsigned long *pivots;
1008	struct maple_node *node;
1009	void __rcu **slots;
1010
1011	node = mas_mn(mas);
1012	type = mte_node_type(mas->node);
1013	pivots = ma_pivots(node, type);
1014	slots = ma_slots(node, type);
1015
1016	if (mas->offset)
1017		mas->min = pivots[mas->offset - 1] + 1;
1018	mas->max = mas_safe_pivot(mas, pivots, mas->offset, type);
1019	mas->node = mas_slot(mas, slots, mas->offset);
1020}
1021
1022/*
1023 * mte_set_gap() - Set a maple node gap.
1024 * @mn: The encoded maple node
1025 * @gap: The offset of the gap to set
1026 * @val: The gap value
1027 */
1028static inline void mte_set_gap(const struct maple_enode *mn,
1029				 unsigned char gap, unsigned long val)
1030{
1031	switch (mte_node_type(mn)) {
1032	default:
1033		break;
1034	case maple_arange_64:
1035		mte_to_node(mn)->ma64.gap[gap] = val;
1036		break;
1037	}
1038}
1039
1040/*
1041 * mas_ascend() - Walk up a level of the tree.
1042 * @mas: The maple state
1043 *
1044 * Sets the @mas->max and @mas->min to the correct values when walking up.  This
1045 * may cause several levels of walking up to find the correct min and max.
1046 * May find a dead node which will cause a premature return.
1047 * Return: 1 on dead node, 0 otherwise
1048 */
1049static int mas_ascend(struct ma_state *mas)
1050{
1051	struct maple_enode *p_enode; /* parent enode. */
1052	struct maple_enode *a_enode; /* ancestor enode. */
1053	struct maple_node *a_node; /* ancestor node. */
1054	struct maple_node *p_node; /* parent node. */
1055	unsigned char a_slot;
1056	enum maple_type a_type;
1057	unsigned long min, max;
1058	unsigned long *pivots;
1059	unsigned char offset;
1060	bool set_max = false, set_min = false;
1061
1062	a_node = mas_mn(mas);
1063	if (ma_is_root(a_node)) {
1064		mas->offset = 0;
1065		return 0;
1066	}
1067
1068	p_node = mte_parent(mas->node);
1069	if (unlikely(a_node == p_node))
1070		return 1;
1071	a_type = mas_parent_enum(mas, mas->node);
1072	offset = mte_parent_slot(mas->node);
1073	a_enode = mt_mk_node(p_node, a_type);
1074
1075	/* Check to make sure all parent information is still accurate */
1076	if (p_node != mte_parent(mas->node))
1077		return 1;
1078
1079	mas->node = a_enode;
1080	mas->offset = offset;
1081
1082	if (mte_is_root(a_enode)) {
1083		mas->max = ULONG_MAX;
1084		mas->min = 0;
1085		return 0;
1086	}
1087
1088	min = 0;
1089	max = ULONG_MAX;
1090	do {
1091		p_enode = a_enode;
1092		a_type = mas_parent_enum(mas, p_enode);
1093		a_node = mte_parent(p_enode);
1094		a_slot = mte_parent_slot(p_enode);
1095		pivots = ma_pivots(a_node, a_type);
1096		a_enode = mt_mk_node(a_node, a_type);
1097
1098		if (!set_min && a_slot) {
1099			set_min = true;
1100			min = pivots[a_slot - 1] + 1;
1101		}
1102
1103		if (!set_max && a_slot < mt_pivots[a_type]) {
1104			set_max = true;
1105			max = pivots[a_slot];
1106		}
1107
1108		if (unlikely(ma_dead_node(a_node)))
1109			return 1;
1110
1111		if (unlikely(ma_is_root(a_node)))
1112			break;
1113
1114	} while (!set_min || !set_max);
1115
1116	mas->max = max;
1117	mas->min = min;
1118	return 0;
1119}
1120
1121/*
1122 * mas_pop_node() - Get a previously allocated maple node from the maple state.
1123 * @mas: The maple state
1124 *
1125 * Return: A pointer to a maple node.
1126 */
1127static inline struct maple_node *mas_pop_node(struct ma_state *mas)
1128{
1129	struct maple_alloc *ret, *node = mas->alloc;
1130	unsigned long total = mas_allocated(mas);
1131
1132	/* nothing or a request pending. */
1133	if (unlikely(!total))
1134		return NULL;
1135
1136	if (total == 1) {
1137		/* single allocation in this ma_state */
1138		mas->alloc = NULL;
1139		ret = node;
1140		goto single_node;
1141	}
1142
1143	if (!node->node_count) {
1144		/* Single allocation in this node. */
1145		mas->alloc = node->slot[0];
1146		node->slot[0] = NULL;
1147		mas->alloc->total = node->total - 1;
1148		ret = node;
1149		goto new_head;
1150	}
1151
1152	node->total--;
1153	ret = node->slot[node->node_count];
1154	node->slot[node->node_count--] = NULL;
1155
1156single_node:
1157new_head:
1158	ret->total = 0;
1159	ret->node_count = 0;
1160	if (ret->request_count) {
1161		mas_set_alloc_req(mas, ret->request_count + 1);
1162		ret->request_count = 0;
1163	}
1164	return (struct maple_node *)ret;
1165}
1166
1167/*
1168 * mas_push_node() - Push a node back on the maple state allocation.
1169 * @mas: The maple state
1170 * @used: The used maple node
1171 *
1172 * Stores the maple node back into @mas->alloc for reuse.  Updates allocated and
1173 * requested node count as necessary.
1174 */
1175static inline void mas_push_node(struct ma_state *mas, struct maple_node *used)
1176{
1177	struct maple_alloc *reuse = (struct maple_alloc *)used;
1178	struct maple_alloc *head = mas->alloc;
1179	unsigned long count;
1180	unsigned int requested = mas_alloc_req(mas);
1181
1182	memset(reuse, 0, sizeof(*reuse));
1183	count = mas_allocated(mas);
1184
1185	if (count && (head->node_count < MAPLE_ALLOC_SLOTS - 1)) {
1186		if (head->slot[0])
1187			head->node_count++;
1188		head->slot[head->node_count] = reuse;
1189		head->total++;
1190		goto done;
1191	}
1192
1193	reuse->total = 1;
1194	if ((head) && !((unsigned long)head & 0x1)) {
1195		head->request_count = 0;
1196		reuse->slot[0] = head;
1197		reuse->total += head->total;
1198	}
1199
1200	mas->alloc = reuse;
1201done:
1202	if (requested > 1)
1203		mas_set_alloc_req(mas, requested - 1);
1204}
1205
1206/*
1207 * mas_alloc_nodes() - Allocate nodes into a maple state
1208 * @mas: The maple state
1209 * @gfp: The GFP Flags
1210 */
1211static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
1212{
1213	struct maple_alloc *node;
1214	unsigned long allocated = mas_allocated(mas);
1215	unsigned long success = allocated;
1216	unsigned int requested = mas_alloc_req(mas);
1217	unsigned int count;
1218	void **slots = NULL;
1219	unsigned int max_req = 0;
1220
1221	if (!requested)
1222		return;
1223
1224	mas_set_alloc_req(mas, 0);
1225	if (mas->mas_flags & MA_STATE_PREALLOC) {
1226		if (allocated)
1227			return;
1228		WARN_ON(!allocated);
1229	}
1230
1231	if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS - 1) {
1232		node = (struct maple_alloc *)mt_alloc_one(gfp);
1233		if (!node)
1234			goto nomem_one;
1235
1236		if (allocated)
1237			node->slot[0] = mas->alloc;
1238
1239		success++;
1240		mas->alloc = node;
1241		requested--;
1242	}
1243
1244	node = mas->alloc;
1245	while (requested) {
1246		max_req = MAPLE_ALLOC_SLOTS;
1247		if (node->slot[0]) {
1248			unsigned int offset = node->node_count + 1;
1249
1250			slots = (void **)&node->slot[offset];
1251			max_req -= offset;
1252		} else {
1253			slots = (void **)&node->slot;
1254		}
1255
1256		max_req = min(requested, max_req);
1257		count = mt_alloc_bulk(gfp, max_req, slots);
1258		if (!count)
1259			goto nomem_bulk;
1260
1261		node->node_count += count;
1262		/* zero indexed. */
1263		if (slots == (void **)&node->slot)
1264			node->node_count--;
1265
1266		success += count;
1267		node = node->slot[0];
1268		requested -= count;
1269	}
1270	mas->alloc->total = success;
1271	return;
1272
1273nomem_bulk:
1274	/* Clean up potential freed allocations on bulk failure */
1275	memset(slots, 0, max_req * sizeof(unsigned long));
1276nomem_one:
1277	mas_set_alloc_req(mas, requested);
1278	if (mas->alloc && !(((unsigned long)mas->alloc & 0x1)))
1279		mas->alloc->total = success;
1280	mas_set_err(mas, -ENOMEM);
1281	return;
1282
1283}
1284
1285/*
1286 * mas_free() - Free an encoded maple node
1287 * @mas: The maple state
1288 * @used: The encoded maple node to free.
1289 *
1290 * Uses rcu free if necessary, pushes @used back on the maple state allocations
1291 * otherwise.
1292 */
1293static inline void mas_free(struct ma_state *mas, struct maple_enode *used)
1294{
1295	struct maple_node *tmp = mte_to_node(used);
1296
1297	if (mt_in_rcu(mas->tree))
1298		ma_free_rcu(tmp);
1299	else
1300		mas_push_node(mas, tmp);
1301}
1302
1303/*
1304 * mas_node_count() - Check if enough nodes are allocated and request more if
1305 * there is not enough nodes.
1306 * @mas: The maple state
1307 * @count: The number of nodes needed
1308 * @gfp: the gfp flags
1309 */
1310static void mas_node_count_gfp(struct ma_state *mas, int count, gfp_t gfp)
1311{
1312	unsigned long allocated = mas_allocated(mas);
1313
1314	if (allocated < count) {
1315		mas_set_alloc_req(mas, count - allocated);
1316		mas_alloc_nodes(mas, gfp);
1317	}
1318}
1319
1320/*
1321 * mas_node_count() - Check if enough nodes are allocated and request more if
1322 * there is not enough nodes.
1323 * @mas: The maple state
1324 * @count: The number of nodes needed
1325 *
1326 * Note: Uses GFP_NOWAIT | __GFP_NOWARN for gfp flags.
1327 */
1328static void mas_node_count(struct ma_state *mas, int count)
1329{
1330	return mas_node_count_gfp(mas, count, GFP_NOWAIT | __GFP_NOWARN);
1331}
1332
1333/*
1334 * mas_start() - Sets up maple state for operations.
1335 * @mas: The maple state.
1336 *
1337 * If mas->node == MAS_START, then set the min, max, depth, and offset to
1338 * defaults.
1339 *
1340 * Return:
1341 * - If mas->node is an error or not MAS_START, return NULL.
1342 * - If it's an empty tree:     NULL & mas->node == MAS_NONE
1343 * - If it's a single entry:    The entry & mas->node == MAS_ROOT
1344 * - If it's a tree:            NULL & mas->node == safe root node.
1345 */
1346static inline struct maple_enode *mas_start(struct ma_state *mas)
1347{
1348	if (likely(mas_is_start(mas))) {
1349		struct maple_enode *root;
1350
1351		mas->node = MAS_NONE;
1352		mas->min = 0;
1353		mas->max = ULONG_MAX;
1354		mas->depth = 0;
1355		mas->offset = 0;
1356
1357		root = mas_root(mas);
1358		/* Tree with nodes */
1359		if (likely(xa_is_node(root))) {
1360			mas->depth = 1;
1361			mas->node = mte_safe_root(root);
1362			return NULL;
1363		}
1364
1365		/* empty tree */
1366		if (unlikely(!root)) {
1367			mas->offset = MAPLE_NODE_SLOTS;
1368			return NULL;
1369		}
1370
1371		/* Single entry tree */
1372		mas->node = MAS_ROOT;
1373		mas->offset = MAPLE_NODE_SLOTS;
1374
1375		/* Single entry tree. */
1376		if (mas->index > 0)
1377			return NULL;
1378
1379		return root;
1380	}
1381
1382	return NULL;
1383}
1384
1385/*
1386 * ma_data_end() - Find the end of the data in a node.
1387 * @node: The maple node
1388 * @type: The maple node type
1389 * @pivots: The array of pivots in the node
1390 * @max: The maximum value in the node
1391 *
1392 * Uses metadata to find the end of the data when possible.
1393 * Return: The zero indexed last slot with data (may be null).
1394 */
1395static inline unsigned char ma_data_end(struct maple_node *node,
1396					enum maple_type type,
1397					unsigned long *pivots,
1398					unsigned long max)
1399{
1400	unsigned char offset;
1401
1402	if (type == maple_arange_64)
1403		return ma_meta_end(node, type);
1404
1405	offset = mt_pivots[type] - 1;
1406	if (likely(!pivots[offset]))
1407		return ma_meta_end(node, type);
1408
1409	if (likely(pivots[offset] == max))
1410		return offset;
1411
1412	return mt_pivots[type];
1413}
1414
1415/*
1416 * mas_data_end() - Find the end of the data (slot).
1417 * @mas: the maple state
1418 *
1419 * This method is optimized to check the metadata of a node if the node type
1420 * supports data end metadata.
1421 *
1422 * Return: The zero indexed last slot with data (may be null).
1423 */
1424static inline unsigned char mas_data_end(struct ma_state *mas)
1425{
1426	enum maple_type type;
1427	struct maple_node *node;
1428	unsigned char offset;
1429	unsigned long *pivots;
1430
1431	type = mte_node_type(mas->node);
1432	node = mas_mn(mas);
1433	if (type == maple_arange_64)
1434		return ma_meta_end(node, type);
1435
1436	pivots = ma_pivots(node, type);
1437	offset = mt_pivots[type] - 1;
1438	if (likely(!pivots[offset]))
1439		return ma_meta_end(node, type);
1440
1441	if (likely(pivots[offset] == mas->max))
1442		return offset;
1443
1444	return mt_pivots[type];
1445}
1446
1447/*
1448 * mas_leaf_max_gap() - Returns the largest gap in a leaf node
1449 * @mas - the maple state
1450 *
1451 * Return: The maximum gap in the leaf.
1452 */
1453static unsigned long mas_leaf_max_gap(struct ma_state *mas)
1454{
1455	enum maple_type mt;
1456	unsigned long pstart, gap, max_gap;
1457	struct maple_node *mn;
1458	unsigned long *pivots;
1459	void __rcu **slots;
1460	unsigned char i;
1461	unsigned char max_piv;
1462
1463	mt = mte_node_type(mas->node);
1464	mn = mas_mn(mas);
1465	slots = ma_slots(mn, mt);
1466	max_gap = 0;
1467	if (unlikely(ma_is_dense(mt))) {
1468		gap = 0;
1469		for (i = 0; i < mt_slots[mt]; i++) {
1470			if (slots[i]) {
1471				if (gap > max_gap)
1472					max_gap = gap;
1473				gap = 0;
1474			} else {
1475				gap++;
1476			}
1477		}
1478		if (gap > max_gap)
1479			max_gap = gap;
1480		return max_gap;
1481	}
1482
1483	/*
1484	 * Check the first implied pivot optimizes the loop below and slot 1 may
1485	 * be skipped if there is a gap in slot 0.
1486	 */
1487	pivots = ma_pivots(mn, mt);
1488	if (likely(!slots[0])) {
1489		max_gap = pivots[0] - mas->min + 1;
1490		i = 2;
1491	} else {
1492		i = 1;
1493	}
1494
1495	/* reduce max_piv as the special case is checked before the loop */
1496	max_piv = ma_data_end(mn, mt, pivots, mas->max) - 1;
1497	/*
1498	 * Check end implied pivot which can only be a gap on the right most
1499	 * node.
1500	 */
1501	if (unlikely(mas->max == ULONG_MAX) && !slots[max_piv + 1]) {
1502		gap = ULONG_MAX - pivots[max_piv];
1503		if (gap > max_gap)
1504			max_gap = gap;
1505	}
1506
1507	for (; i <= max_piv; i++) {
1508		/* data == no gap. */
1509		if (likely(slots[i]))
1510			continue;
1511
1512		pstart = pivots[i - 1];
1513		gap = pivots[i] - pstart;
1514		if (gap > max_gap)
1515			max_gap = gap;
1516
1517		/* There cannot be two gaps in a row. */
1518		i++;
1519	}
1520	return max_gap;
1521}
1522
1523/*
1524 * ma_max_gap() - Get the maximum gap in a maple node (non-leaf)
1525 * @node: The maple node
1526 * @gaps: The pointer to the gaps
1527 * @mt: The maple node type
1528 * @*off: Pointer to store the offset location of the gap.
1529 *
1530 * Uses the metadata data end to scan backwards across set gaps.
1531 *
1532 * Return: The maximum gap value
1533 */
1534static inline unsigned long
1535ma_max_gap(struct maple_node *node, unsigned long *gaps, enum maple_type mt,
1536	    unsigned char *off)
1537{
1538	unsigned char offset, i;
1539	unsigned long max_gap = 0;
1540
1541	i = offset = ma_meta_end(node, mt);
1542	do {
1543		if (gaps[i] > max_gap) {
1544			max_gap = gaps[i];
1545			offset = i;
1546		}
1547	} while (i--);
1548
1549	*off = offset;
1550	return max_gap;
1551}
1552
1553/*
1554 * mas_max_gap() - find the largest gap in a non-leaf node and set the slot.
1555 * @mas: The maple state.
1556 *
1557 * If the metadata gap is set to MAPLE_ARANGE64_META_MAX, there is no gap.
1558 *
1559 * Return: The gap value.
1560 */
1561static inline unsigned long mas_max_gap(struct ma_state *mas)
1562{
1563	unsigned long *gaps;
1564	unsigned char offset;
1565	enum maple_type mt;
1566	struct maple_node *node;
1567
1568	mt = mte_node_type(mas->node);
1569	if (ma_is_leaf(mt))
1570		return mas_leaf_max_gap(mas);
1571
1572	node = mas_mn(mas);
1573	offset = ma_meta_gap(node, mt);
1574	if (offset == MAPLE_ARANGE64_META_MAX)
1575		return 0;
1576
1577	gaps = ma_gaps(node, mt);
1578	return gaps[offset];
1579}
1580
1581/*
1582 * mas_parent_gap() - Set the parent gap and any gaps above, as needed
1583 * @mas: The maple state
1584 * @offset: The gap offset in the parent to set
1585 * @new: The new gap value.
1586 *
1587 * Set the parent gap then continue to set the gap upwards, using the metadata
1588 * of the parent to see if it is necessary to check the node above.
1589 */
1590static inline void mas_parent_gap(struct ma_state *mas, unsigned char offset,
1591		unsigned long new)
1592{
1593	unsigned long meta_gap = 0;
1594	struct maple_node *pnode;
1595	struct maple_enode *penode;
1596	unsigned long *pgaps;
1597	unsigned char meta_offset;
1598	enum maple_type pmt;
1599
1600	pnode = mte_parent(mas->node);
1601	pmt = mas_parent_enum(mas, mas->node);
1602	penode = mt_mk_node(pnode, pmt);
1603	pgaps = ma_gaps(pnode, pmt);
1604
1605ascend:
1606	meta_offset = ma_meta_gap(pnode, pmt);
1607	if (meta_offset == MAPLE_ARANGE64_META_MAX)
1608		meta_gap = 0;
1609	else
1610		meta_gap = pgaps[meta_offset];
1611
1612	pgaps[offset] = new;
1613
1614	if (meta_gap == new)
1615		return;
1616
1617	if (offset != meta_offset) {
1618		if (meta_gap > new)
1619			return;
1620
1621		ma_set_meta_gap(pnode, pmt, offset);
1622	} else if (new < meta_gap) {
1623		meta_offset = 15;
1624		new = ma_max_gap(pnode, pgaps, pmt, &meta_offset);
1625		ma_set_meta_gap(pnode, pmt, meta_offset);
1626	}
1627
1628	if (ma_is_root(pnode))
1629		return;
1630
1631	/* Go to the parent node. */
1632	pnode = mte_parent(penode);
1633	pmt = mas_parent_enum(mas, penode);
1634	pgaps = ma_gaps(pnode, pmt);
1635	offset = mte_parent_slot(penode);
1636	penode = mt_mk_node(pnode, pmt);
1637	goto ascend;
1638}
1639
1640/*
1641 * mas_update_gap() - Update a nodes gaps and propagate up if necessary.
1642 * @mas - the maple state.
1643 */
1644static inline void mas_update_gap(struct ma_state *mas)
1645{
1646	unsigned char pslot;
1647	unsigned long p_gap;
1648	unsigned long max_gap;
1649
1650	if (!mt_is_alloc(mas->tree))
1651		return;
1652
1653	if (mte_is_root(mas->node))
1654		return;
1655
1656	max_gap = mas_max_gap(mas);
1657
1658	pslot = mte_parent_slot(mas->node);
1659	p_gap = ma_gaps(mte_parent(mas->node),
1660			mas_parent_enum(mas, mas->node))[pslot];
1661
1662	if (p_gap != max_gap)
1663		mas_parent_gap(mas, pslot, max_gap);
1664}
1665
1666/*
1667 * mas_adopt_children() - Set the parent pointer of all nodes in @parent to
1668 * @parent with the slot encoded.
1669 * @mas - the maple state (for the tree)
1670 * @parent - the maple encoded node containing the children.
1671 */
1672static inline void mas_adopt_children(struct ma_state *mas,
1673		struct maple_enode *parent)
1674{
1675	enum maple_type type = mte_node_type(parent);
1676	struct maple_node *node = mas_mn(mas);
1677	void __rcu **slots = ma_slots(node, type);
1678	unsigned long *pivots = ma_pivots(node, type);
1679	struct maple_enode *child;
1680	unsigned char offset;
1681
1682	offset = ma_data_end(node, type, pivots, mas->max);
1683	do {
1684		child = mas_slot_locked(mas, slots, offset);
1685		mte_set_parent(child, parent, offset);
1686	} while (offset--);
1687}
1688
1689/*
1690 * mas_replace() - Replace a maple node in the tree with mas->node.  Uses the
1691 * parent encoding to locate the maple node in the tree.
1692 * @mas - the ma_state to use for operations.
1693 * @advanced - boolean to adopt the child nodes and free the old node (false) or
1694 * leave the node (true) and handle the adoption and free elsewhere.
1695 */
1696static inline void mas_replace(struct ma_state *mas, bool advanced)
1697	__must_hold(mas->tree->lock)
1698{
1699	struct maple_node *mn = mas_mn(mas);
1700	struct maple_enode *old_enode;
1701	unsigned char offset = 0;
1702	void __rcu **slots = NULL;
1703
1704	if (ma_is_root(mn)) {
1705		old_enode = mas_root_locked(mas);
1706	} else {
1707		offset = mte_parent_slot(mas->node);
1708		slots = ma_slots(mte_parent(mas->node),
1709				 mas_parent_enum(mas, mas->node));
1710		old_enode = mas_slot_locked(mas, slots, offset);
1711	}
1712
1713	if (!advanced && !mte_is_leaf(mas->node))
1714		mas_adopt_children(mas, mas->node);
1715
1716	if (mte_is_root(mas->node)) {
1717		mn->parent = ma_parent_ptr(
1718			      ((unsigned long)mas->tree | MA_ROOT_PARENT));
1719		rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
1720		mas_set_height(mas);
1721	} else {
1722		rcu_assign_pointer(slots[offset], mas->node);
1723	}
1724
1725	if (!advanced)
1726		mas_free(mas, old_enode);
1727}
1728
1729/*
1730 * mas_new_child() - Find the new child of a node.
1731 * @mas: the maple state
1732 * @child: the maple state to store the child.
1733 */
1734static inline bool mas_new_child(struct ma_state *mas, struct ma_state *child)
1735	__must_hold(mas->tree->lock)
1736{
1737	enum maple_type mt;
1738	unsigned char offset;
1739	unsigned char end;
1740	unsigned long *pivots;
1741	struct maple_enode *entry;
1742	struct maple_node *node;
1743	void __rcu **slots;
1744
1745	mt = mte_node_type(mas->node);
1746	node = mas_mn(mas);
1747	slots = ma_slots(node, mt);
1748	pivots = ma_pivots(node, mt);
1749	end = ma_data_end(node, mt, pivots, mas->max);
1750	for (offset = mas->offset; offset <= end; offset++) {
1751		entry = mas_slot_locked(mas, slots, offset);
1752		if (mte_parent(entry) == node) {
1753			*child = *mas;
1754			mas->offset = offset + 1;
1755			child->offset = offset;
1756			mas_descend(child);
1757			child->offset = 0;
1758			return true;
1759		}
1760	}
1761	return false;
1762}
1763
1764/*
1765 * mab_shift_right() - Shift the data in mab right. Note, does not clean out the
1766 * old data or set b_node->b_end.
1767 * @b_node: the maple_big_node
1768 * @shift: the shift count
1769 */
1770static inline void mab_shift_right(struct maple_big_node *b_node,
1771				 unsigned char shift)
1772{
1773	unsigned long size = b_node->b_end * sizeof(unsigned long);
1774
1775	memmove(b_node->pivot + shift, b_node->pivot, size);
1776	memmove(b_node->slot + shift, b_node->slot, size);
1777	if (b_node->type == maple_arange_64)
1778		memmove(b_node->gap + shift, b_node->gap, size);
1779}
1780
1781/*
1782 * mab_middle_node() - Check if a middle node is needed (unlikely)
1783 * @b_node: the maple_big_node that contains the data.
1784 * @size: the amount of data in the b_node
1785 * @split: the potential split location
1786 * @slot_count: the size that can be stored in a single node being considered.
1787 *
1788 * Return: true if a middle node is required.
1789 */
1790static inline bool mab_middle_node(struct maple_big_node *b_node, int split,
1791				   unsigned char slot_count)
1792{
1793	unsigned char size = b_node->b_end;
1794
1795	if (size >= 2 * slot_count)
1796		return true;
1797
1798	if (!b_node->slot[split] && (size >= 2 * slot_count - 1))
1799		return true;
1800
1801	return false;
1802}
1803
1804/*
1805 * mab_no_null_split() - ensure the split doesn't fall on a NULL
1806 * @b_node: the maple_big_node with the data
1807 * @split: the suggested split location
1808 * @slot_count: the number of slots in the node being considered.
1809 *
1810 * Return: the split location.
1811 */
1812static inline int mab_no_null_split(struct maple_big_node *b_node,
1813				    unsigned char split, unsigned char slot_count)
1814{
1815	if (!b_node->slot[split]) {
1816		/*
1817		 * If the split is less than the max slot && the right side will
1818		 * still be sufficient, then increment the split on NULL.
1819		 */
1820		if ((split < slot_count - 1) &&
1821		    (b_node->b_end - split) > (mt_min_slots[b_node->type]))
1822			split++;
1823		else
1824			split--;
1825	}
1826	return split;
1827}
1828
1829/*
1830 * mab_calc_split() - Calculate the split location and if there needs to be two
1831 * splits.
1832 * @bn: The maple_big_node with the data
1833 * @mid_split: The second split, if required.  0 otherwise.
1834 *
1835 * Return: The first split location.  The middle split is set in @mid_split.
1836 */
1837static inline int mab_calc_split(struct ma_state *mas,
1838	 struct maple_big_node *bn, unsigned char *mid_split, unsigned long min)
1839{
1840	unsigned char b_end = bn->b_end;
1841	int split = b_end / 2; /* Assume equal split. */
1842	unsigned char slot_min, slot_count = mt_slots[bn->type];
1843
1844	/*
1845	 * To support gap tracking, all NULL entries are kept together and a node cannot
1846	 * end on a NULL entry, with the exception of the left-most leaf.  The
1847	 * limitation means that the split of a node must be checked for this condition
1848	 * and be able to put more data in one direction or the other.
1849	 */
1850	if (unlikely((mas->mas_flags & MA_STATE_BULK))) {
1851		*mid_split = 0;
1852		split = b_end - mt_min_slots[bn->type];
1853
1854		if (!ma_is_leaf(bn->type))
1855			return split;
1856
1857		mas->mas_flags |= MA_STATE_REBALANCE;
1858		if (!bn->slot[split])
1859			split--;
1860		return split;
1861	}
1862
1863	/*
1864	 * Although extremely rare, it is possible to enter what is known as the 3-way
1865	 * split scenario.  The 3-way split comes about by means of a store of a range
1866	 * that overwrites the end and beginning of two full nodes.  The result is a set
1867	 * of entries that cannot be stored in 2 nodes.  Sometimes, these two nodes can
1868	 * also be located in different parent nodes which are also full.  This can
1869	 * carry upwards all the way to the root in the worst case.
1870	 */
1871	if (unlikely(mab_middle_node(bn, split, slot_count))) {
1872		split = b_end / 3;
1873		*mid_split = split * 2;
1874	} else {
1875		slot_min = mt_min_slots[bn->type];
1876
1877		*mid_split = 0;
1878		/*
1879		 * Avoid having a range less than the slot count unless it
1880		 * causes one node to be deficient.
1881		 * NOTE: mt_min_slots is 1 based, b_end and split are zero.
1882		 */
1883		while (((bn->pivot[split] - min) < slot_count - 1) &&
1884		       (split < slot_count - 1) && (b_end - split > slot_min))
1885			split++;
1886	}
1887
1888	/* Avoid ending a node on a NULL entry */
1889	split = mab_no_null_split(bn, split, slot_count);
1890	if (!(*mid_split))
1891		return split;
1892
1893	*mid_split = mab_no_null_split(bn, *mid_split, slot_count);
1894
1895	return split;
1896}
1897
1898/*
1899 * mas_mab_cp() - Copy data from a maple state inclusively to a maple_big_node
1900 * and set @b_node->b_end to the next free slot.
1901 * @mas: The maple state
1902 * @mas_start: The starting slot to copy
1903 * @mas_end: The end slot to copy (inclusively)
1904 * @b_node: The maple_big_node to place the data
1905 * @mab_start: The starting location in maple_big_node to store the data.
1906 */
1907static inline void mas_mab_cp(struct ma_state *mas, unsigned char mas_start,
1908			unsigned char mas_end, struct maple_big_node *b_node,
1909			unsigned char mab_start)
1910{
1911	enum maple_type mt;
1912	struct maple_node *node;
1913	void __rcu **slots;
1914	unsigned long *pivots, *gaps;
1915	int i = mas_start, j = mab_start;
1916	unsigned char piv_end;
1917
1918	node = mas_mn(mas);
1919	mt = mte_node_type(mas->node);
1920	pivots = ma_pivots(node, mt);
1921	if (!i) {
1922		b_node->pivot[j] = pivots[i++];
1923		if (unlikely(i > mas_end))
1924			goto complete;
1925		j++;
1926	}
1927
1928	piv_end = min(mas_end, mt_pivots[mt]);
1929	for (; i < piv_end; i++, j++) {
1930		b_node->pivot[j] = pivots[i];
1931		if (unlikely(!b_node->pivot[j]))
1932			break;
1933
1934		if (unlikely(mas->max == b_node->pivot[j]))
1935			goto complete;
1936	}
1937
1938	if (likely(i <= mas_end))
1939		b_node->pivot[j] = mas_safe_pivot(mas, pivots, i, mt);
1940
1941complete:
1942	b_node->b_end = ++j;
1943	j -= mab_start;
1944	slots = ma_slots(node, mt);
1945	memcpy(b_node->slot + mab_start, slots + mas_start, sizeof(void *) * j);
1946	if (!ma_is_leaf(mt) && mt_is_alloc(mas->tree)) {
1947		gaps = ma_gaps(node, mt);
1948		memcpy(b_node->gap + mab_start, gaps + mas_start,
1949		       sizeof(unsigned long) * j);
1950	}
1951}
1952
1953/*
1954 * mas_leaf_set_meta() - Set the metadata of a leaf if possible.
1955 * @mas: The maple state
1956 * @node: The maple node
1957 * @pivots: pointer to the maple node pivots
1958 * @mt: The maple type
1959 * @end: The assumed end
1960 *
1961 * Note, end may be incremented within this function but not modified at the
1962 * source.  This is fine since the metadata is the last thing to be stored in a
1963 * node during a write.
1964 */
1965static inline void mas_leaf_set_meta(struct ma_state *mas,
1966		struct maple_node *node, unsigned long *pivots,
1967		enum maple_type mt, unsigned char end)
1968{
1969	/* There is no room for metadata already */
1970	if (mt_pivots[mt] <= end)
1971		return;
1972
1973	if (pivots[end] && pivots[end] < mas->max)
1974		end++;
1975
1976	if (end < mt_slots[mt] - 1)
1977		ma_set_meta(node, mt, 0, end);
1978}
1979
1980/*
1981 * mab_mas_cp() - Copy data from maple_big_node to a maple encoded node.
1982 * @b_node: the maple_big_node that has the data
1983 * @mab_start: the start location in @b_node.
1984 * @mab_end: The end location in @b_node (inclusively)
1985 * @mas: The maple state with the maple encoded node.
1986 */
1987static inline void mab_mas_cp(struct maple_big_node *b_node,
1988			      unsigned char mab_start, unsigned char mab_end,
1989			      struct ma_state *mas, bool new_max)
1990{
1991	int i, j = 0;
1992	enum maple_type mt = mte_node_type(mas->node);
1993	struct maple_node *node = mte_to_node(mas->node);
1994	void __rcu **slots = ma_slots(node, mt);
1995	unsigned long *pivots = ma_pivots(node, mt);
1996	unsigned long *gaps = NULL;
1997	unsigned char end;
1998
1999	if (mab_end - mab_start > mt_pivots[mt])
2000		mab_end--;
2001
2002	if (!pivots[mt_pivots[mt] - 1])
2003		slots[mt_pivots[mt]] = NULL;
2004
2005	i = mab_start;
2006	do {
2007		pivots[j++] = b_node->pivot[i++];
2008	} while (i <= mab_end && likely(b_node->pivot[i]));
2009
2010	memcpy(slots, b_node->slot + mab_start,
2011	       sizeof(void *) * (i - mab_start));
2012
2013	if (new_max)
2014		mas->max = b_node->pivot[i - 1];
2015
2016	end = j - 1;
2017	if (likely(!ma_is_leaf(mt) && mt_is_alloc(mas->tree))) {
2018		unsigned long max_gap = 0;
2019		unsigned char offset = 15;
2020
2021		gaps = ma_gaps(node, mt);
2022		do {
2023			gaps[--j] = b_node->gap[--i];
2024			if (gaps[j] > max_gap) {
2025				offset = j;
2026				max_gap = gaps[j];
2027			}
2028		} while (j);
2029
2030		ma_set_meta(node, mt, offset, end);
2031	} else {
2032		mas_leaf_set_meta(mas, node, pivots, mt, end);
2033	}
2034}
2035
2036/*
2037 * mas_descend_adopt() - Descend through a sub-tree and adopt children.
2038 * @mas: the maple state with the maple encoded node of the sub-tree.
2039 *
2040 * Descend through a sub-tree and adopt children who do not have the correct
2041 * parents set.  Follow the parents which have the correct parents as they are
2042 * the new entries which need to be followed to find other incorrectly set
2043 * parents.
2044 */
2045static inline void mas_descend_adopt(struct ma_state *mas)
2046{
2047	struct ma_state list[3], next[3];
2048	int i, n;
2049
2050	/*
2051	 * At each level there may be up to 3 correct parent pointers which indicates
2052	 * the new nodes which need to be walked to find any new nodes at a lower level.
2053	 */
2054
2055	for (i = 0; i < 3; i++) {
2056		list[i] = *mas;
2057		list[i].offset = 0;
2058		next[i].offset = 0;
2059	}
2060	next[0] = *mas;
2061
2062	while (!mte_is_leaf(list[0].node)) {
2063		n = 0;
2064		for (i = 0; i < 3; i++) {
2065			if (mas_is_none(&list[i]))
2066				continue;
2067
2068			if (i && list[i-1].node == list[i].node)
2069				continue;
2070
2071			while ((n < 3) && (mas_new_child(&list[i], &next[n])))
2072				n++;
2073
2074			mas_adopt_children(&list[i], list[i].node);
2075		}
2076
2077		while (n < 3)
2078			next[n++].node = MAS_NONE;
2079
2080		/* descend by setting the list to the children */
2081		for (i = 0; i < 3; i++)
2082			list[i] = next[i];
2083	}
2084}
2085
2086/*
2087 * mas_bulk_rebalance() - Rebalance the end of a tree after a bulk insert.
2088 * @mas: The maple state
2089 * @end: The maple node end
2090 * @mt: The maple node type
2091 */
2092static inline void mas_bulk_rebalance(struct ma_state *mas, unsigned char end,
2093				      enum maple_type mt)
2094{
2095	if (!(mas->mas_flags & MA_STATE_BULK))
2096		return;
2097
2098	if (mte_is_root(mas->node))
2099		return;
2100
2101	if (end > mt_min_slots[mt]) {
2102		mas->mas_flags &= ~MA_STATE_REBALANCE;
2103		return;
2104	}
2105}
2106
2107/*
2108 * mas_store_b_node() - Store an @entry into the b_node while also copying the
2109 * data from a maple encoded node.
2110 * @wr_mas: the maple write state
2111 * @b_node: the maple_big_node to fill with data
2112 * @offset_end: the offset to end copying
2113 *
2114 * Return: The actual end of the data stored in @b_node
2115 */
2116static inline void mas_store_b_node(struct ma_wr_state *wr_mas,
2117		struct maple_big_node *b_node, unsigned char offset_end)
2118{
2119	unsigned char slot;
2120	unsigned char b_end;
2121	/* Possible underflow of piv will wrap back to 0 before use. */
2122	unsigned long piv;
2123	struct ma_state *mas = wr_mas->mas;
2124
2125	b_node->type = wr_mas->type;
2126	b_end = 0;
2127	slot = mas->offset;
2128	if (slot) {
2129		/* Copy start data up to insert. */
2130		mas_mab_cp(mas, 0, slot - 1, b_node, 0);
2131		b_end = b_node->b_end;
2132		piv = b_node->pivot[b_end - 1];
2133	} else
2134		piv = mas->min - 1;
2135
2136	if (piv + 1 < mas->index) {
2137		/* Handle range starting after old range */
2138		b_node->slot[b_end] = wr_mas->content;
2139		if (!wr_mas->content)
2140			b_node->gap[b_end] = mas->index - 1 - piv;
2141		b_node->pivot[b_end++] = mas->index - 1;
2142	}
2143
2144	/* Store the new entry. */
2145	mas->offset = b_end;
2146	b_node->slot[b_end] = wr_mas->entry;
2147	b_node->pivot[b_end] = mas->last;
2148
2149	/* Appended. */
2150	if (mas->last >= mas->max)
2151		goto b_end;
2152
2153	/* Handle new range ending before old range ends */
2154	piv = mas_logical_pivot(mas, wr_mas->pivots, offset_end, wr_mas->type);
2155	if (piv > mas->last) {
2156		if (piv == ULONG_MAX)
2157			mas_bulk_rebalance(mas, b_node->b_end, wr_mas->type);
2158
2159		if (offset_end != slot)
2160			wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
2161							  offset_end);
2162
2163		b_node->slot[++b_end] = wr_mas->content;
2164		if (!wr_mas->content)
2165			b_node->gap[b_end] = piv - mas->last + 1;
2166		b_node->pivot[b_end] = piv;
2167	}
2168
2169	slot = offset_end + 1;
2170	if (slot > wr_mas->node_end)
2171		goto b_end;
2172
2173	/* Copy end data to the end of the node. */
2174	mas_mab_cp(mas, slot, wr_mas->node_end + 1, b_node, ++b_end);
2175	b_node->b_end--;
2176	return;
2177
2178b_end:
2179	b_node->b_end = b_end;
2180}
2181
2182/*
2183 * mas_prev_sibling() - Find the previous node with the same parent.
2184 * @mas: the maple state
2185 *
2186 * Return: True if there is a previous sibling, false otherwise.
2187 */
2188static inline bool mas_prev_sibling(struct ma_state *mas)
2189{
2190	unsigned int p_slot = mte_parent_slot(mas->node);
2191
2192	if (mte_is_root(mas->node))
2193		return false;
2194
2195	if (!p_slot)
2196		return false;
2197
2198	mas_ascend(mas);
2199	mas->offset = p_slot - 1;
2200	mas_descend(mas);
2201	return true;
2202}
2203
2204/*
2205 * mas_next_sibling() - Find the next node with the same parent.
2206 * @mas: the maple state
2207 *
2208 * Return: true if there is a next sibling, false otherwise.
2209 */
2210static inline bool mas_next_sibling(struct ma_state *mas)
2211{
2212	MA_STATE(parent, mas->tree, mas->index, mas->last);
2213
2214	if (mte_is_root(mas->node))
2215		return false;
2216
2217	parent = *mas;
2218	mas_ascend(&parent);
2219	parent.offset = mte_parent_slot(mas->node) + 1;
2220	if (parent.offset > mas_data_end(&parent))
2221		return false;
2222
2223	*mas = parent;
2224	mas_descend(mas);
2225	return true;
2226}
2227
2228/*
2229 * mte_node_or_node() - Return the encoded node or MAS_NONE.
2230 * @enode: The encoded maple node.
2231 *
2232 * Shorthand to avoid setting %NULLs in the tree or maple_subtree_state.
2233 *
2234 * Return: @enode or MAS_NONE
2235 */
2236static inline struct maple_enode *mte_node_or_none(struct maple_enode *enode)
2237{
2238	if (enode)
2239		return enode;
2240
2241	return ma_enode_ptr(MAS_NONE);
2242}
2243
2244/*
2245 * mas_wr_node_walk() - Find the correct offset for the index in the @mas.
2246 * @wr_mas: The maple write state
2247 *
2248 * Uses mas_slot_locked() and does not need to worry about dead nodes.
2249 */
2250static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas)
2251{
2252	struct ma_state *mas = wr_mas->mas;
2253	unsigned char count;
2254	unsigned char offset;
2255	unsigned long index, min, max;
2256
2257	if (unlikely(ma_is_dense(wr_mas->type))) {
2258		wr_mas->r_max = wr_mas->r_min = mas->index;
2259		mas->offset = mas->index = mas->min;
2260		return;
2261	}
2262
2263	wr_mas->node = mas_mn(wr_mas->mas);
2264	wr_mas->pivots = ma_pivots(wr_mas->node, wr_mas->type);
2265	count = wr_mas->node_end = ma_data_end(wr_mas->node, wr_mas->type,
2266					       wr_mas->pivots, mas->max);
2267	offset = mas->offset;
2268	min = mas_safe_min(mas, wr_mas->pivots, offset);
2269	if (unlikely(offset == count))
2270		goto max;
2271
2272	max = wr_mas->pivots[offset];
2273	index = mas->index;
2274	if (unlikely(index <= max))
2275		goto done;
2276
2277	if (unlikely(!max && offset))
2278		goto max;
2279
2280	min = max + 1;
2281	while (++offset < count) {
2282		max = wr_mas->pivots[offset];
2283		if (index <= max)
2284			goto done;
2285		else if (unlikely(!max))
2286			break;
2287
2288		min = max + 1;
2289	}
2290
2291max:
2292	max = mas->max;
2293done:
2294	wr_mas->r_max = max;
2295	wr_mas->r_min = min;
2296	wr_mas->offset_end = mas->offset = offset;
2297}
2298
2299/*
2300 * mas_topiary_range() - Add a range of slots to the topiary.
2301 * @mas: The maple state
2302 * @destroy: The topiary to add the slots (usually destroy)
2303 * @start: The starting slot inclusively
2304 * @end: The end slot inclusively
2305 */
2306static inline void mas_topiary_range(struct ma_state *mas,
2307	struct ma_topiary *destroy, unsigned char start, unsigned char end)
2308{
2309	void __rcu **slots;
2310	unsigned char offset;
2311
2312	MT_BUG_ON(mas->tree, mte_is_leaf(mas->node));
2313	slots = ma_slots(mas_mn(mas), mte_node_type(mas->node));
2314	for (offset = start; offset <= end; offset++) {
2315		struct maple_enode *enode = mas_slot_locked(mas, slots, offset);
2316
2317		if (mte_dead_node(enode))
2318			continue;
2319
2320		mat_add(destroy, enode);
2321	}
2322}
2323
2324/*
2325 * mast_topiary() - Add the portions of the tree to the removal list; either to
2326 * be freed or discarded (destroy walk).
2327 * @mast: The maple_subtree_state.
2328 */
2329static inline void mast_topiary(struct maple_subtree_state *mast)
2330{
2331	MA_WR_STATE(wr_mas, mast->orig_l, NULL);
2332	unsigned char r_start, r_end;
2333	unsigned char l_start, l_end;
2334	void __rcu **l_slots, **r_slots;
2335
2336	wr_mas.type = mte_node_type(mast->orig_l->node);
2337	mast->orig_l->index = mast->orig_l->last;
2338	mas_wr_node_walk(&wr_mas);
2339	l_start = mast->orig_l->offset + 1;
2340	l_end = mas_data_end(mast->orig_l);
2341	r_start = 0;
2342	r_end = mast->orig_r->offset;
2343
2344	if (r_end)
2345		r_end--;
2346
2347	l_slots = ma_slots(mas_mn(mast->orig_l),
2348			   mte_node_type(mast->orig_l->node));
2349
2350	r_slots = ma_slots(mas_mn(mast->orig_r),
2351			   mte_node_type(mast->orig_r->node));
2352
2353	if ((l_start < l_end) &&
2354	    mte_dead_node(mas_slot_locked(mast->orig_l, l_slots, l_start))) {
2355		l_start++;
2356	}
2357
2358	if (mte_dead_node(mas_slot_locked(mast->orig_r, r_slots, r_end))) {
2359		if (r_end)
2360			r_end--;
2361	}
2362
2363	if ((l_start > r_end) && (mast->orig_l->node == mast->orig_r->node))
2364		return;
2365
2366	/* At the node where left and right sides meet, add the parts between */
2367	if (mast->orig_l->node == mast->orig_r->node) {
2368		return mas_topiary_range(mast->orig_l, mast->destroy,
2369					     l_start, r_end);
2370	}
2371
2372	/* mast->orig_r is different and consumed. */
2373	if (mte_is_leaf(mast->orig_r->node))
2374		return;
2375
2376	if (mte_dead_node(mas_slot_locked(mast->orig_l, l_slots, l_end)))
2377		l_end--;
2378
2379
2380	if (l_start <= l_end)
2381		mas_topiary_range(mast->orig_l, mast->destroy, l_start, l_end);
2382
2383	if (mte_dead_node(mas_slot_locked(mast->orig_r, r_slots, r_start)))
2384		r_start++;
2385
2386	if (r_start <= r_end)
2387		mas_topiary_range(mast->orig_r, mast->destroy, 0, r_end);
2388}
2389
2390/*
2391 * mast_rebalance_next() - Rebalance against the next node
2392 * @mast: The maple subtree state
2393 * @old_r: The encoded maple node to the right (next node).
2394 */
2395static inline void mast_rebalance_next(struct maple_subtree_state *mast)
2396{
2397	unsigned char b_end = mast->bn->b_end;
2398
2399	mas_mab_cp(mast->orig_r, 0, mt_slot_count(mast->orig_r->node),
2400		   mast->bn, b_end);
2401	mast->orig_r->last = mast->orig_r->max;
2402}
2403
2404/*
2405 * mast_rebalance_prev() - Rebalance against the previous node
2406 * @mast: The maple subtree state
2407 * @old_l: The encoded maple node to the left (previous node)
2408 */
2409static inline void mast_rebalance_prev(struct maple_subtree_state *mast)
2410{
2411	unsigned char end = mas_data_end(mast->orig_l) + 1;
2412	unsigned char b_end = mast->bn->b_end;
2413
2414	mab_shift_right(mast->bn, end);
2415	mas_mab_cp(mast->orig_l, 0, end - 1, mast->bn, 0);
2416	mast->l->min = mast->orig_l->min;
2417	mast->orig_l->index = mast->orig_l->min;
2418	mast->bn->b_end = end + b_end;
2419	mast->l->offset += end;
2420}
2421
2422/*
2423 * mast_spanning_rebalance() - Rebalance nodes with nearest neighbour favouring
2424 * the node to the right.  Checking the nodes to the right then the left at each
2425 * level upwards until root is reached.  Free and destroy as needed.
2426 * Data is copied into the @mast->bn.
2427 * @mast: The maple_subtree_state.
2428 */
2429static inline
2430bool mast_spanning_rebalance(struct maple_subtree_state *mast)
2431{
2432	struct ma_state r_tmp = *mast->orig_r;
2433	struct ma_state l_tmp = *mast->orig_l;
2434	struct maple_enode *ancestor = NULL;
2435	unsigned char start, end;
2436	unsigned char depth = 0;
2437
2438	r_tmp = *mast->orig_r;
2439	l_tmp = *mast->orig_l;
2440	do {
2441		mas_ascend(mast->orig_r);
2442		mas_ascend(mast->orig_l);
2443		depth++;
2444		if (!ancestor &&
2445		    (mast->orig_r->node == mast->orig_l->node)) {
2446			ancestor = mast->orig_r->node;
2447			end = mast->orig_r->offset - 1;
2448			start = mast->orig_l->offset + 1;
2449		}
2450
2451		if (mast->orig_r->offset < mas_data_end(mast->orig_r)) {
2452			if (!ancestor) {
2453				ancestor = mast->orig_r->node;
2454				start = 0;
2455			}
2456
2457			mast->orig_r->offset++;
2458			do {
2459				mas_descend(mast->orig_r);
2460				mast->orig_r->offset = 0;
2461				depth--;
2462			} while (depth);
2463
2464			mast_rebalance_next(mast);
2465			do {
2466				unsigned char l_off = 0;
2467				struct maple_enode *child = r_tmp.node;
2468
2469				mas_ascend(&r_tmp);
2470				if (ancestor == r_tmp.node)
2471					l_off = start;
2472
2473				if (r_tmp.offset)
2474					r_tmp.offset--;
2475
2476				if (l_off < r_tmp.offset)
2477					mas_topiary_range(&r_tmp, mast->destroy,
2478							  l_off, r_tmp.offset);
2479
2480				if (l_tmp.node != child)
2481					mat_add(mast->free, child);
2482
2483			} while (r_tmp.node != ancestor);
2484
2485			*mast->orig_l = l_tmp;
2486			return true;
2487
2488		} else if (mast->orig_l->offset != 0) {
2489			if (!ancestor) {
2490				ancestor = mast->orig_l->node;
2491				end = mas_data_end(mast->orig_l);
2492			}
2493
2494			mast->orig_l->offset--;
2495			do {
2496				mas_descend(mast->orig_l);
2497				mast->orig_l->offset =
2498					mas_data_end(mast->orig_l);
2499				depth--;
2500			} while (depth);
2501
2502			mast_rebalance_prev(mast);
2503			do {
2504				unsigned char r_off;
2505				struct maple_enode *child = l_tmp.node;
2506
2507				mas_ascend(&l_tmp);
2508				if (ancestor == l_tmp.node)
2509					r_off = end;
2510				else
2511					r_off = mas_data_end(&l_tmp);
2512
2513				if (l_tmp.offset < r_off)
2514					l_tmp.offset++;
2515
2516				if (l_tmp.offset < r_off)
2517					mas_topiary_range(&l_tmp, mast->destroy,
2518							  l_tmp.offset, r_off);
2519
2520				if (r_tmp.node != child)
2521					mat_add(mast->free, child);
2522
2523			} while (l_tmp.node != ancestor);
2524
2525			*mast->orig_r = r_tmp;
2526			return true;
2527		}
2528	} while (!mte_is_root(mast->orig_r->node));
2529
2530	*mast->orig_r = r_tmp;
2531	*mast->orig_l = l_tmp;
2532	return false;
2533}
2534
2535/*
2536 * mast_ascend_free() - Add current original maple state nodes to the free list
2537 * and ascend.
2538 * @mast: the maple subtree state.
2539 *
2540 * Ascend the original left and right sides and add the previous nodes to the
2541 * free list.  Set the slots to point to the correct location in the new nodes.
2542 */
2543static inline void
2544mast_ascend_free(struct maple_subtree_state *mast)
2545{
2546	MA_WR_STATE(wr_mas, mast->orig_r,  NULL);
2547	struct maple_enode *left = mast->orig_l->node;
2548	struct maple_enode *right = mast->orig_r->node;
2549
2550	mas_ascend(mast->orig_l);
2551	mas_ascend(mast->orig_r);
2552	mat_add(mast->free, left);
2553
2554	if (left != right)
2555		mat_add(mast->free, right);
2556
2557	mast->orig_r->offset = 0;
2558	mast->orig_r->index = mast->r->max;
2559	/* last should be larger than or equal to index */
2560	if (mast->orig_r->last < mast->orig_r->index)
2561		mast->orig_r->last = mast->orig_r->index;
2562	/*
2563	 * The node may not contain the value so set slot to ensure all
2564	 * of the nodes contents are freed or destroyed.
2565	 */
2566	wr_mas.type = mte_node_type(mast->orig_r->node);
2567	mas_wr_node_walk(&wr_mas);
2568	/* Set up the left side of things */
2569	mast->orig_l->offset = 0;
2570	mast->orig_l->index = mast->l->min;
2571	wr_mas.mas = mast->orig_l;
2572	wr_mas.type = mte_node_type(mast->orig_l->node);
2573	mas_wr_node_walk(&wr_mas);
2574
2575	mast->bn->type = wr_mas.type;
2576}
2577
2578/*
2579 * mas_new_ma_node() - Create and return a new maple node.  Helper function.
2580 * @mas: the maple state with the allocations.
2581 * @b_node: the maple_big_node with the type encoding.
2582 *
2583 * Use the node type from the maple_big_node to allocate a new node from the
2584 * ma_state.  This function exists mainly for code readability.
2585 *
2586 * Return: A new maple encoded node
2587 */
2588static inline struct maple_enode
2589*mas_new_ma_node(struct ma_state *mas, struct maple_big_node *b_node)
2590{
2591	return mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), b_node->type);
2592}
2593
2594/*
2595 * mas_mab_to_node() - Set up right and middle nodes
2596 *
2597 * @mas: the maple state that contains the allocations.
2598 * @b_node: the node which contains the data.
2599 * @left: The pointer which will have the left node
2600 * @right: The pointer which may have the right node
2601 * @middle: the pointer which may have the middle node (rare)
2602 * @mid_split: the split location for the middle node
2603 *
2604 * Return: the split of left.
2605 */
2606static inline unsigned char mas_mab_to_node(struct ma_state *mas,
2607	struct maple_big_node *b_node, struct maple_enode **left,
2608	struct maple_enode **right, struct maple_enode **middle,
2609	unsigned char *mid_split, unsigned long min)
2610{
2611	unsigned char split = 0;
2612	unsigned char slot_count = mt_slots[b_node->type];
2613
2614	*left = mas_new_ma_node(mas, b_node);
2615	*right = NULL;
2616	*middle = NULL;
2617	*mid_split = 0;
2618
2619	if (b_node->b_end < slot_count) {
2620		split = b_node->b_end;
2621	} else {
2622		split = mab_calc_split(mas, b_node, mid_split, min);
2623		*right = mas_new_ma_node(mas, b_node);
2624	}
2625
2626	if (*mid_split)
2627		*middle = mas_new_ma_node(mas, b_node);
2628
2629	return split;
2630
2631}
2632
2633/*
2634 * mab_set_b_end() - Add entry to b_node at b_node->b_end and increment the end
2635 * pointer.
2636 * @b_node - the big node to add the entry
2637 * @mas - the maple state to get the pivot (mas->max)
2638 * @entry - the entry to add, if NULL nothing happens.
2639 */
2640static inline void mab_set_b_end(struct maple_big_node *b_node,
2641				 struct ma_state *mas,
2642				 void *entry)
2643{
2644	if (!entry)
2645		return;
2646
2647	b_node->slot[b_node->b_end] = entry;
2648	if (mt_is_alloc(mas->tree))
2649		b_node->gap[b_node->b_end] = mas_max_gap(mas);
2650	b_node->pivot[b_node->b_end++] = mas->max;
2651}
2652
2653/*
2654 * mas_set_split_parent() - combine_then_separate helper function.  Sets the parent
2655 * of @mas->node to either @left or @right, depending on @slot and @split
2656 *
2657 * @mas - the maple state with the node that needs a parent
2658 * @left - possible parent 1
2659 * @right - possible parent 2
2660 * @slot - the slot the mas->node was placed
2661 * @split - the split location between @left and @right
2662 */
2663static inline void mas_set_split_parent(struct ma_state *mas,
2664					struct maple_enode *left,
2665					struct maple_enode *right,
2666					unsigned char *slot, unsigned char split)
2667{
2668	if (mas_is_none(mas))
2669		return;
2670
2671	if ((*slot) <= split)
2672		mte_set_parent(mas->node, left, *slot);
2673	else if (right)
2674		mte_set_parent(mas->node, right, (*slot) - split - 1);
2675
2676	(*slot)++;
2677}
2678
2679/*
2680 * mte_mid_split_check() - Check if the next node passes the mid-split
2681 * @**l: Pointer to left encoded maple node.
2682 * @**m: Pointer to middle encoded maple node.
2683 * @**r: Pointer to right encoded maple node.
2684 * @slot: The offset
2685 * @*split: The split location.
2686 * @mid_split: The middle split.
2687 */
2688static inline void mte_mid_split_check(struct maple_enode **l,
2689				       struct maple_enode **r,
2690				       struct maple_enode *right,
2691				       unsigned char slot,
2692				       unsigned char *split,
2693				       unsigned char mid_split)
2694{
2695	if (*r == right)
2696		return;
2697
2698	if (slot < mid_split)
2699		return;
2700
2701	*l = *r;
2702	*r = right;
2703	*split = mid_split;
2704}
2705
2706/*
2707 * mast_set_split_parents() - Helper function to set three nodes parents.  Slot
2708 * is taken from @mast->l.
2709 * @mast - the maple subtree state
2710 * @left - the left node
2711 * @right - the right node
2712 * @split - the split location.
2713 */
2714static inline void mast_set_split_parents(struct maple_subtree_state *mast,
2715					  struct maple_enode *left,
2716					  struct maple_enode *middle,
2717					  struct maple_enode *right,
2718					  unsigned char split,
2719					  unsigned char mid_split)
2720{
2721	unsigned char slot;
2722	struct maple_enode *l = left;
2723	struct maple_enode *r = right;
2724
2725	if (mas_is_none(mast->l))
2726		return;
2727
2728	if (middle)
2729		r = middle;
2730
2731	slot = mast->l->offset;
2732
2733	mte_mid_split_check(&l, &r, right, slot, &split, mid_split);
2734	mas_set_split_parent(mast->l, l, r, &slot, split);
2735
2736	mte_mid_split_check(&l, &r, right, slot, &split, mid_split);
2737	mas_set_split_parent(mast->m, l, r, &slot, split);
2738
2739	mte_mid_split_check(&l, &r, right, slot, &split, mid_split);
2740	mas_set_split_parent(mast->r, l, r, &slot, split);
2741}
2742
2743/*
2744 * mas_wmb_replace() - Write memory barrier and replace
2745 * @mas: The maple state
2746 * @free: the maple topiary list of nodes to free
2747 * @destroy: The maple topiary list of nodes to destroy (walk and free)
2748 *
2749 * Updates gap as necessary.
2750 */
2751static inline void mas_wmb_replace(struct ma_state *mas,
2752				   struct ma_topiary *free,
2753				   struct ma_topiary *destroy)
2754{
2755	/* All nodes must see old data as dead prior to replacing that data */
2756	smp_wmb(); /* Needed for RCU */
2757
2758	/* Insert the new data in the tree */
2759	mas_replace(mas, true);
2760
2761	if (!mte_is_leaf(mas->node))
2762		mas_descend_adopt(mas);
2763
2764	mas_mat_free(mas, free);
2765
2766	if (destroy)
2767		mas_mat_destroy(mas, destroy);
2768
2769	if (mte_is_leaf(mas->node))
2770		return;
2771
2772	mas_update_gap(mas);
2773}
2774
2775/*
2776 * mast_new_root() - Set a new tree root during subtree creation
2777 * @mast: The maple subtree state
2778 * @mas: The maple state
2779 */
2780static inline void mast_new_root(struct maple_subtree_state *mast,
2781				 struct ma_state *mas)
2782{
2783	mas_mn(mast->l)->parent =
2784		ma_parent_ptr(((unsigned long)mas->tree | MA_ROOT_PARENT));
2785	if (!mte_dead_node(mast->orig_l->node) &&
2786	    !mte_is_root(mast->orig_l->node)) {
2787		do {
2788			mast_ascend_free(mast);
2789			mast_topiary(mast);
2790		} while (!mte_is_root(mast->orig_l->node));
2791	}
2792	if ((mast->orig_l->node != mas->node) &&
2793		   (mast->l->depth > mas_mt_height(mas))) {
2794		mat_add(mast->free, mas->node);
2795	}
2796}
2797
2798/*
2799 * mast_cp_to_nodes() - Copy data out to nodes.
2800 * @mast: The maple subtree state
2801 * @left: The left encoded maple node
2802 * @middle: The middle encoded maple node
2803 * @right: The right encoded maple node
2804 * @split: The location to split between left and (middle ? middle : right)
2805 * @mid_split: The location to split between middle and right.
2806 */
2807static inline void mast_cp_to_nodes(struct maple_subtree_state *mast,
2808	struct maple_enode *left, struct maple_enode *middle,
2809	struct maple_enode *right, unsigned char split, unsigned char mid_split)
2810{
2811	bool new_lmax = true;
2812
2813	mast->l->node = mte_node_or_none(left);
2814	mast->m->node = mte_node_or_none(middle);
2815	mast->r->node = mte_node_or_none(right);
2816
2817	mast->l->min = mast->orig_l->min;
2818	if (split == mast->bn->b_end) {
2819		mast->l->max = mast->orig_r->max;
2820		new_lmax = false;
2821	}
2822
2823	mab_mas_cp(mast->bn, 0, split, mast->l, new_lmax);
2824
2825	if (middle) {
2826		mab_mas_cp(mast->bn, 1 + split, mid_split, mast->m, true);
2827		mast->m->min = mast->bn->pivot[split] + 1;
2828		split = mid_split;
2829	}
2830
2831	mast->r->max = mast->orig_r->max;
2832	if (right) {
2833		mab_mas_cp(mast->bn, 1 + split, mast->bn->b_end, mast->r, false);
2834		mast->r->min = mast->bn->pivot[split] + 1;
2835	}
2836}
2837
2838/*
2839 * mast_combine_cp_left - Copy in the original left side of the tree into the
2840 * combined data set in the maple subtree state big node.
2841 * @mast: The maple subtree state
2842 */
2843static inline void mast_combine_cp_left(struct maple_subtree_state *mast)
2844{
2845	unsigned char l_slot = mast->orig_l->offset;
2846
2847	if (!l_slot)
2848		return;
2849
2850	mas_mab_cp(mast->orig_l, 0, l_slot - 1, mast->bn, 0);
2851}
2852
2853/*
2854 * mast_combine_cp_right: Copy in the original right side of the tree into the
2855 * combined data set in the maple subtree state big node.
2856 * @mast: The maple subtree state
2857 */
2858static inline void mast_combine_cp_right(struct maple_subtree_state *mast)
2859{
2860	if (mast->bn->pivot[mast->bn->b_end - 1] >= mast->orig_r->max)
2861		return;
2862
2863	mas_mab_cp(mast->orig_r, mast->orig_r->offset + 1,
2864		   mt_slot_count(mast->orig_r->node), mast->bn,
2865		   mast->bn->b_end);
2866	mast->orig_r->last = mast->orig_r->max;
2867}
2868
2869/*
2870 * mast_sufficient: Check if the maple subtree state has enough data in the big
2871 * node to create at least one sufficient node
2872 * @mast: the maple subtree state
2873 */
2874static inline bool mast_sufficient(struct maple_subtree_state *mast)
2875{
2876	if (mast->bn->b_end > mt_min_slot_count(mast->orig_l->node))
2877		return true;
2878
2879	return false;
2880}
2881
2882/*
2883 * mast_overflow: Check if there is too much data in the subtree state for a
2884 * single node.
2885 * @mast: The maple subtree state
2886 */
2887static inline bool mast_overflow(struct maple_subtree_state *mast)
2888{
2889	if (mast->bn->b_end >= mt_slot_count(mast->orig_l->node))
2890		return true;
2891
2892	return false;
2893}
2894
2895static inline void *mtree_range_walk(struct ma_state *mas)
2896{
2897	unsigned long *pivots;
2898	unsigned char offset;
2899	struct maple_node *node;
2900	struct maple_enode *next, *last;
2901	enum maple_type type;
2902	void __rcu **slots;
2903	unsigned char end;
2904	unsigned long max, min;
2905	unsigned long prev_max, prev_min;
2906
2907	next = mas->node;
2908	min = mas->min;
2909	max = mas->max;
2910	do {
2911		offset = 0;
2912		last = next;
2913		node = mte_to_node(next);
2914		type = mte_node_type(next);
2915		pivots = ma_pivots(node, type);
2916		end = ma_data_end(node, type, pivots, max);
2917		if (unlikely(ma_dead_node(node)))
2918			goto dead_node;
2919
2920		if (pivots[offset] >= mas->index) {
2921			prev_max = max;
2922			prev_min = min;
2923			max = pivots[offset];
2924			goto next;
2925		}
2926
2927		do {
2928			offset++;
2929		} while ((offset < end) && (pivots[offset] < mas->index));
2930
2931		prev_min = min;
2932		min = pivots[offset - 1] + 1;
2933		prev_max = max;
2934		if (likely(offset < end && pivots[offset]))
2935			max = pivots[offset];
2936
2937next:
2938		slots = ma_slots(node, type);
2939		next = mt_slot(mas->tree, slots, offset);
2940		if (unlikely(ma_dead_node(node)))
2941			goto dead_node;
2942	} while (!ma_is_leaf(type));
2943
2944	mas->offset = offset;
2945	mas->index = min;
2946	mas->last = max;
2947	mas->min = prev_min;
2948	mas->max = prev_max;
2949	mas->node = last;
2950	return (void *) next;
2951
2952dead_node:
2953	mas_reset(mas);
2954	return NULL;
2955}
2956
2957/*
2958 * mas_spanning_rebalance() - Rebalance across two nodes which may not be peers.
2959 * @mas: The starting maple state
2960 * @mast: The maple_subtree_state, keeps track of 4 maple states.
2961 * @count: The estimated count of iterations needed.
2962 *
2963 * Follow the tree upwards from @l_mas and @r_mas for @count, or until the root
2964 * is hit.  First @b_node is split into two entries which are inserted into the
2965 * next iteration of the loop.  @b_node is returned populated with the final
2966 * iteration. @mas is used to obtain allocations.  orig_l_mas keeps track of the
2967 * nodes that will remain active by using orig_l_mas->index and orig_l_mas->last
2968 * to account of what has been copied into the new sub-tree.  The update of
2969 * orig_l_mas->last is used in mas_consume to find the slots that will need to
2970 * be either freed or destroyed.  orig_l_mas->depth keeps track of the height of
2971 * the new sub-tree in case the sub-tree becomes the full tree.
2972 *
2973 * Return: the number of elements in b_node during the last loop.
2974 */
2975static int mas_spanning_rebalance(struct ma_state *mas,
2976		struct maple_subtree_state *mast, unsigned char count)
2977{
2978	unsigned char split, mid_split;
2979	unsigned char slot = 0;
2980	struct maple_enode *left = NULL, *middle = NULL, *right = NULL;
2981
2982	MA_STATE(l_mas, mas->tree, mas->index, mas->index);
2983	MA_STATE(r_mas, mas->tree, mas->index, mas->last);
2984	MA_STATE(m_mas, mas->tree, mas->index, mas->index);
2985	MA_TOPIARY(free, mas->tree);
2986	MA_TOPIARY(destroy, mas->tree);
2987
2988	/*
2989	 * The tree needs to be rebalanced and leaves need to be kept at the same level.
2990	 * Rebalancing is done by use of the ``struct maple_topiary``.
2991	 */
2992	mast->l = &l_mas;
2993	mast->m = &m_mas;
2994	mast->r = &r_mas;
2995	mast->free = &free;
2996	mast->destroy = &destroy;
2997	l_mas.node = r_mas.node = m_mas.node = MAS_NONE;
2998
2999	/* Check if this is not root and has sufficient data.  */
3000	if (((mast->orig_l->min != 0) || (mast->orig_r->max != ULONG_MAX)) &&
3001	    unlikely(mast->bn->b_end <= mt_min_slots[mast->bn->type]))
3002		mast_spanning_rebalance(mast);
3003
3004	mast->orig_l->depth = 0;
3005
3006	/*
3007	 * Each level of the tree is examined and balanced, pushing data to the left or
3008	 * right, or rebalancing against left or right nodes is employed to avoid
3009	 * rippling up the tree to limit the amount of churn.  Once a new sub-section of
3010	 * the tree is created, there may be a mix of new and old nodes.  The old nodes
3011	 * will have the incorrect parent pointers and currently be in two trees: the
3012	 * original tree and the partially new tree.  To remedy the parent pointers in
3013	 * the old tree, the new data is swapped into the active tree and a walk down
3014	 * the tree is performed and the parent pointers are updated.
3015	 * See mas_descend_adopt() for more information..
3016	 */
3017	while (count--) {
3018		mast->bn->b_end--;
3019		mast->bn->type = mte_node_type(mast->orig_l->node);
3020		split = mas_mab_to_node(mas, mast->bn, &left, &right, &middle,
3021					&mid_split, mast->orig_l->min);
3022		mast_set_split_parents(mast, left, middle, right, split,
3023				       mid_split);
3024		mast_cp_to_nodes(mast, left, middle, right, split, mid_split);
3025
3026		/*
3027		 * Copy data from next level in the tree to mast->bn from next
3028		 * iteration
3029		 */
3030		memset(mast->bn, 0, sizeof(struct maple_big_node));
3031		mast->bn->type = mte_node_type(left);
3032		mast->orig_l->depth++;
3033
3034		/* Root already stored in l->node. */
3035		if (mas_is_root_limits(mast->l))
3036			goto new_root;
3037
3038		mast_ascend_free(mast);
3039		mast_combine_cp_left(mast);
3040		l_mas.offset = mast->bn->b_end;
3041		mab_set_b_end(mast->bn, &l_mas, left);
3042		mab_set_b_end(mast->bn, &m_mas, middle);
3043		mab_set_b_end(mast->bn, &r_mas, right);
3044
3045		/* Copy anything necessary out of the right node. */
3046		mast_combine_cp_right(mast);
3047		mast_topiary(mast);
3048		mast->orig_l->last = mast->orig_l->max;
3049
3050		if (mast_sufficient(mast))
3051			continue;
3052
3053		if (mast_overflow(mast))
3054			continue;
3055
3056		/* May be a new root stored in mast->bn */
3057		if (mas_is_root_limits(mast->orig_l))
3058			break;
3059
3060		mast_spanning_rebalance(mast);
3061
3062		/* rebalancing from other nodes may require another loop. */
3063		if (!count)
3064			count++;
3065	}
3066
3067	l_mas.node = mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)),
3068				mte_node_type(mast->orig_l->node));
3069	mast->orig_l->depth++;
3070	mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, &l_mas, true);
3071	mte_set_parent(left, l_mas.node, slot);
3072	if (middle)
3073		mte_set_parent(middle, l_mas.node, ++slot);
3074
3075	if (right)
3076		mte_set_parent(right, l_mas.node, ++slot);
3077
3078	if (mas_is_root_limits(mast->l)) {
3079new_root:
3080		mast_new_root(mast, mas);
3081	} else {
3082		mas_mn(&l_mas)->parent = mas_mn(mast->orig_l)->parent;
3083	}
3084
3085	if (!mte_dead_node(mast->orig_l->node))
3086		mat_add(&free, mast->orig_l->node);
3087
3088	mas->depth = mast->orig_l->depth;
3089	*mast->orig_l = l_mas;
3090	mte_set_node_dead(mas->node);
3091
3092	/* Set up mas for insertion. */
3093	mast->orig_l->depth = mas->depth;
3094	mast->orig_l->alloc = mas->alloc;
3095	*mas = *mast->orig_l;
3096	mas_wmb_replace(mas, &free, &destroy);
3097	mtree_range_walk(mas);
3098	return mast->bn->b_end;
3099}
3100
3101/*
3102 * mas_rebalance() - Rebalance a given node.
3103 * @mas: The maple state
3104 * @b_node: The big maple node.
3105 *
3106 * Rebalance two nodes into a single node or two new nodes that are sufficient.
3107 * Continue upwards until tree is sufficient.
3108 *
3109 * Return: the number of elements in b_node during the last loop.
3110 */
3111static inline int mas_rebalance(struct ma_state *mas,
3112				struct maple_big_node *b_node)
3113{
3114	char empty_count = mas_mt_height(mas);
3115	struct maple_subtree_state mast;
3116	unsigned char shift, b_end = ++b_node->b_end;
3117
3118	MA_STATE(l_mas, mas->tree, mas->index, mas->last);
3119	MA_STATE(r_mas, mas->tree, mas->index, mas->last);
3120
3121	trace_ma_op(__func__, mas);
3122
3123	/*
3124	 * Rebalancing occurs if a node is insufficient.  Data is rebalanced
3125	 * against the node to the right if it exists, otherwise the node to the
3126	 * left of this node is rebalanced against this node.  If rebalancing
3127	 * causes just one node to be produced instead of two, then the parent
3128	 * is also examined and rebalanced if it is insufficient.  Every level
3129	 * tries to combine the data in the same way.  If one node contains the
3130	 * entire range of the tree, then that node is used as a new root node.
3131	 */
3132	mas_node_count(mas, 1 + empty_count * 3);
3133	if (mas_is_err(mas))
3134		return 0;
3135
3136	mast.orig_l = &l_mas;
3137	mast.orig_r = &r_mas;
3138	mast.bn = b_node;
3139	mast.bn->type = mte_node_type(mas->node);
3140
3141	l_mas = r_mas = *mas;
3142
3143	if (mas_next_sibling(&r_mas)) {
3144		mas_mab_cp(&r_mas, 0, mt_slot_count(r_mas.node), b_node, b_end);
3145		r_mas.last = r_mas.index = r_mas.max;
3146	} else {
3147		mas_prev_sibling(&l_mas);
3148		shift = mas_data_end(&l_mas) + 1;
3149		mab_shift_right(b_node, shift);
3150		mas->offset += shift;
3151		mas_mab_cp(&l_mas, 0, shift - 1, b_node, 0);
3152		b_node->b_end = shift + b_end;
3153		l_mas.index = l_mas.last = l_mas.min;
3154	}
3155
3156	return mas_spanning_rebalance(mas, &mast, empty_count);
3157}
3158
3159/*
3160 * mas_destroy_rebalance() - Rebalance left-most node while destroying the maple
3161 * state.
3162 * @mas: The maple state
3163 * @end: The end of the left-most node.
3164 *
3165 * During a mass-insert event (such as forking), it may be necessary to
3166 * rebalance the left-most node when it is not sufficient.
3167 */
3168static inline void mas_destroy_rebalance(struct ma_state *mas, unsigned char end)
3169{
3170	enum maple_type mt = mte_node_type(mas->node);
3171	struct maple_node reuse, *newnode, *parent, *new_left, *left, *node;
3172	struct maple_enode *eparent;
3173	unsigned char offset, tmp, split = mt_slots[mt] / 2;
3174	void __rcu **l_slots, **slots;
3175	unsigned long *l_pivs, *pivs, gap;
3176	bool in_rcu = mt_in_rcu(mas->tree);
3177
3178	MA_STATE(l_mas, mas->tree, mas->index, mas->last);
3179
3180	l_mas = *mas;
3181	mas_prev_sibling(&l_mas);
3182
3183	/* set up node. */
3184	if (in_rcu) {
3185		/* Allocate for both left and right as well as parent. */
3186		mas_node_count(mas, 3);
3187		if (mas_is_err(mas))
3188			return;
3189
3190		newnode = mas_pop_node(mas);
3191	} else {
3192		newnode = &reuse;
3193	}
3194
3195	node = mas_mn(mas);
3196	newnode->parent = node->parent;
3197	slots = ma_slots(newnode, mt);
3198	pivs = ma_pivots(newnode, mt);
3199	left = mas_mn(&l_mas);
3200	l_slots = ma_slots(left, mt);
3201	l_pivs = ma_pivots(left, mt);
3202	if (!l_slots[split])
3203		split++;
3204	tmp = mas_data_end(&l_mas) - split;
3205
3206	memcpy(slots, l_slots + split + 1, sizeof(void *) * tmp);
3207	memcpy(pivs, l_pivs + split + 1, sizeof(unsigned long) * tmp);
3208	pivs[tmp] = l_mas.max;
3209	memcpy(slots + tmp, ma_slots(node, mt), sizeof(void *) * end);
3210	memcpy(pivs + tmp, ma_pivots(node, mt), sizeof(unsigned long) * end);
3211
3212	l_mas.max = l_pivs[split];
3213	mas->min = l_mas.max + 1;
3214	eparent = mt_mk_node(mte_parent(l_mas.node),
3215			     mas_parent_enum(&l_mas, l_mas.node));
3216	tmp += end;
3217	if (!in_rcu) {
3218		unsigned char max_p = mt_pivots[mt];
3219		unsigned char max_s = mt_slots[mt];
3220
3221		if (tmp < max_p)
3222			memset(pivs + tmp, 0,
3223			       sizeof(unsigned long *) * (max_p - tmp));
3224
3225		if (tmp < mt_slots[mt])
3226			memset(slots + tmp, 0, sizeof(void *) * (max_s - tmp));
3227
3228		memcpy(node, newnode, sizeof(struct maple_node));
3229		ma_set_meta(node, mt, 0, tmp - 1);
3230		mte_set_pivot(eparent, mte_parent_slot(l_mas.node),
3231			      l_pivs[split]);
3232
3233		/* Remove data from l_pivs. */
3234		tmp = split + 1;
3235		memset(l_pivs + tmp, 0, sizeof(unsigned long) * (max_p - tmp));
3236		memset(l_slots + tmp, 0, sizeof(void *) * (max_s - tmp));
3237		ma_set_meta(left, mt, 0, split);
3238
3239		goto done;
3240	}
3241
3242	/* RCU requires replacing both l_mas, mas, and parent. */
3243	mas->node = mt_mk_node(newnode, mt);
3244	ma_set_meta(newnode, mt, 0, tmp);
3245
3246	new_left = mas_pop_node(mas);
3247	new_left->parent = left->parent;
3248	mt = mte_node_type(l_mas.node);
3249	slots = ma_slots(new_left, mt);
3250	pivs = ma_pivots(new_left, mt);
3251	memcpy(slots, l_slots, sizeof(void *) * split);
3252	memcpy(pivs, l_pivs, sizeof(unsigned long) * split);
3253	ma_set_meta(new_left, mt, 0, split);
3254	l_mas.node = mt_mk_node(new_left, mt);
3255
3256	/* replace parent. */
3257	offset = mte_parent_slot(mas->node);
3258	mt = mas_parent_enum(&l_mas, l_mas.node);
3259	parent = mas_pop_node(mas);
3260	slots = ma_slots(parent, mt);
3261	pivs = ma_pivots(parent, mt);
3262	memcpy(parent, mte_to_node(eparent), sizeof(struct maple_node));
3263	rcu_assign_pointer(slots[offset], mas->node);
3264	rcu_assign_pointer(slots[offset - 1], l_mas.node);
3265	pivs[offset - 1] = l_mas.max;
3266	eparent = mt_mk_node(parent, mt);
3267done:
3268	gap = mas_leaf_max_gap(mas);
3269	mte_set_gap(eparent, mte_parent_slot(mas->node), gap);
3270	gap = mas_leaf_max_gap(&l_mas);
3271	mte_set_gap(eparent, mte_parent_slot(l_mas.node), gap);
3272	mas_ascend(mas);
3273
3274	if (in_rcu)
3275		mas_replace(mas, false);
3276
3277	mas_update_gap(mas);
3278}
3279
3280/*
3281 * mas_split_final_node() - Split the final node in a subtree operation.
3282 * @mast: the maple subtree state
3283 * @mas: The maple state
3284 * @height: The height of the tree in case it's a new root.
3285 */
3286static inline bool mas_split_final_node(struct maple_subtree_state *mast,
3287					struct ma_state *mas, int height)
3288{
3289	struct maple_enode *ancestor;
3290
3291	if (mte_is_root(mas->node)) {
3292		if (mt_is_alloc(mas->tree))
3293			mast->bn->type = maple_arange_64;
3294		else
3295			mast->bn->type = maple_range_64;
3296		mas->depth = height;
3297	}
3298	/*
3299	 * Only a single node is used here, could be root.
3300	 * The Big_node data should just fit in a single node.
3301	 */
3302	ancestor = mas_new_ma_node(mas, mast->bn);
3303	mte_set_parent(mast->l->node, ancestor, mast->l->offset);
3304	mte_set_parent(mast->r->node, ancestor, mast->r->offset);
3305	mte_to_node(ancestor)->parent = mas_mn(mas)->parent;
3306
3307	mast->l->node = ancestor;
3308	mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, mast->l, true);
3309	mas->offset = mast->bn->b_end - 1;
3310	return true;
3311}
3312
3313/*
3314 * mast_fill_bnode() - Copy data into the big node in the subtree state
3315 * @mast: The maple subtree state
3316 * @mas: the maple state
3317 * @skip: The number of entries to skip for new nodes insertion.
3318 */
3319static inline void mast_fill_bnode(struct maple_subtree_state *mast,
3320					 struct ma_state *mas,
3321					 unsigned char skip)
3322{
3323	bool cp = true;
3324	struct maple_enode *old = mas->node;
3325	unsigned char split;
3326
3327	memset(mast->bn->gap, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->gap));
3328	memset(mast->bn->slot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->slot));
3329	memset(mast->bn->pivot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->pivot));
3330	mast->bn->b_end = 0;
3331
3332	if (mte_is_root(mas->node)) {
3333		cp = false;
3334	} else {
3335		mas_ascend(mas);
3336		mat_add(mast->free, old);
3337		mas->offset = mte_parent_slot(mas->node);
3338	}
3339
3340	if (cp && mast->l->offset)
3341		mas_mab_cp(mas, 0, mast->l->offset - 1, mast->bn, 0);
3342
3343	split = mast->bn->b_end;
3344	mab_set_b_end(mast->bn, mast->l, mast->l->node);
3345	mast->r->offset = mast->bn->b_end;
3346	mab_set_b_end(mast->bn, mast->r, mast->r->node);
3347	if (mast->bn->pivot[mast->bn->b_end - 1] == mas->max)
3348		cp = false;
3349
3350	if (cp)
3351		mas_mab_cp(mas, split + skip, mt_slot_count(mas->node) - 1,
3352			   mast->bn, mast->bn->b_end);
3353
3354	mast->bn->b_end--;
3355	mast->bn->type = mte_node_type(mas->node);
3356}
3357
3358/*
3359 * mast_split_data() - Split the data in the subtree state big node into regular
3360 * nodes.
3361 * @mast: The maple subtree state
3362 * @mas: The maple state
3363 * @split: The location to split the big node
3364 */
3365static inline void mast_split_data(struct maple_subtree_state *mast,
3366	   struct ma_state *mas, unsigned char split)
3367{
3368	unsigned char p_slot;
3369
3370	mab_mas_cp(mast->bn, 0, split, mast->l, true);
3371	mte_set_pivot(mast->r->node, 0, mast->r->max);
3372	mab_mas_cp(mast->bn, split + 1, mast->bn->b_end, mast->r, false);
3373	mast->l->offset = mte_parent_slot(mas->node);
3374	mast->l->max = mast->bn->pivot[split];
3375	mast->r->min = mast->l->max + 1;
3376	if (mte_is_leaf(mas->node))
3377		return;
3378
3379	p_slot = mast->orig_l->offset;
3380	mas_set_split_parent(mast->orig_l, mast->l->node, mast->r->node,
3381			     &p_slot, split);
3382	mas_set_split_parent(mast->orig_r, mast->l->node, mast->r->node,
3383			     &p_slot, split);
3384}
3385
3386/*
3387 * mas_push_data() - Instead of splitting a node, it is beneficial to push the
3388 * data to the right or left node if there is room.
3389 * @mas: The maple state
3390 * @height: The current height of the maple state
3391 * @mast: The maple subtree state
3392 * @left: Push left or not.
3393 *
3394 * Keeping the height of the tree low means faster lookups.
3395 *
3396 * Return: True if pushed, false otherwise.
3397 */
3398static inline bool mas_push_data(struct ma_state *mas, int height,
3399				 struct maple_subtree_state *mast, bool left)
3400{
3401	unsigned char slot_total = mast->bn->b_end;
3402	unsigned char end, space, split;
3403
3404	MA_STATE(tmp_mas, mas->tree, mas->index, mas->last);
3405	tmp_mas = *mas;
3406	tmp_mas.depth = mast->l->depth;
3407
3408	if (left && !mas_prev_sibling(&tmp_mas))
3409		return false;
3410	else if (!left && !mas_next_sibling(&tmp_mas))
3411		return false;
3412
3413	end = mas_data_end(&tmp_mas);
3414	slot_total += end;
3415	space = 2 * mt_slot_count(mas->node) - 2;
3416	/* -2 instead of -1 to ensure there isn't a triple split */
3417	if (ma_is_leaf(mast->bn->type))
3418		space--;
3419
3420	if (mas->max == ULONG_MAX)
3421		space--;
3422
3423	if (slot_total >= space)
3424		return false;
3425
3426	/* Get the data; Fill mast->bn */
3427	mast->bn->b_end++;
3428	if (left) {
3429		mab_shift_right(mast->bn, end + 1);
3430		mas_mab_cp(&tmp_mas, 0, end, mast->bn, 0);
3431		mast->bn->b_end = slot_total + 1;
3432	} else {
3433		mas_mab_cp(&tmp_mas, 0, end, mast->bn, mast->bn->b_end);
3434	}
3435
3436	/* Configure mast for splitting of mast->bn */
3437	split = mt_slots[mast->bn->type] - 2;
3438	if (left) {
3439		/*  Switch mas to prev node  */
3440		mat_add(mast->free, mas->node);
3441		*mas = tmp_mas;
3442		/* Start using mast->l for the left side. */
3443		tmp_mas.node = mast->l->node;
3444		*mast->l = tmp_mas;
3445	} else {
3446		mat_add(mast->free, tmp_mas.node);
3447		tmp_mas.node = mast->r->node;
3448		*mast->r = tmp_mas;
3449		split = slot_total - split;
3450	}
3451	split = mab_no_null_split(mast->bn, split, mt_slots[mast->bn->type]);
3452	/* Update parent slot for split calculation. */
3453	if (left)
3454		mast->orig_l->offset += end + 1;
3455
3456	mast_split_data(mast, mas, split);
3457	mast_fill_bnode(mast, mas, 2);
3458	mas_split_final_node(mast, mas, height + 1);
3459	return true;
3460}
3461
3462/*
3463 * mas_split() - Split data that is too big for one node into two.
3464 * @mas: The maple state
3465 * @b_node: The maple big node
3466 * Return: 1 on success, 0 on failure.
3467 */
3468static int mas_split(struct ma_state *mas, struct maple_big_node *b_node)
3469{
3470
3471	struct maple_subtree_state mast;
3472	int height = 0;
3473	unsigned char mid_split, split = 0;
3474
3475	/*
3476	 * Splitting is handled differently from any other B-tree; the Maple
3477	 * Tree splits upwards.  Splitting up means that the split operation
3478	 * occurs when the walk of the tree hits the leaves and not on the way
3479	 * down.  The reason for splitting up is that it is impossible to know
3480	 * how much space will be needed until the leaf is (or leaves are)
3481	 * reached.  Since overwriting data is allowed and a range could
3482	 * overwrite more than one range or result in changing one entry into 3
3483	 * entries, it is impossible to know if a split is required until the
3484	 * data is examined.
3485	 *
3486	 * Splitting is a balancing act between keeping allocations to a minimum
3487	 * and avoiding a 'jitter' event where a tree is expanded to make room
3488	 * for an entry followed by a contraction when the entry is removed.  To
3489	 * accomplish the balance, there are empty slots remaining in both left
3490	 * and right nodes after a split.
3491	 */
3492	MA_STATE(l_mas, mas->tree, mas->index, mas->last);
3493	MA_STATE(r_mas, mas->tree, mas->index, mas->last);
3494	MA_STATE(prev_l_mas, mas->tree, mas->index, mas->last);
3495	MA_STATE(prev_r_mas, mas->tree, mas->index, mas->last);
3496	MA_TOPIARY(mat, mas->tree);
3497
3498	trace_ma_op(__func__, mas);
3499	mas->depth = mas_mt_height(mas);
3500	/* Allocation failures will happen early. */
3501	mas_node_count(mas, 1 + mas->depth * 2);
3502	if (mas_is_err(mas))
3503		return 0;
3504
3505	mast.l = &l_mas;
3506	mast.r = &r_mas;
3507	mast.orig_l = &prev_l_mas;
3508	mast.orig_r = &prev_r_mas;
3509	mast.free = &mat;
3510	mast.bn = b_node;
3511
3512	while (height++ <= mas->depth) {
3513		if (mt_slots[b_node->type] > b_node->b_end) {
3514			mas_split_final_node(&mast, mas, height);
3515			break;
3516		}
3517
3518		l_mas = r_mas = *mas;
3519		l_mas.node = mas_new_ma_node(mas, b_node);
3520		r_mas.node = mas_new_ma_node(mas, b_node);
3521		/*
3522		 * Another way that 'jitter' is avoided is to terminate a split up early if the
3523		 * left or right node has space to spare.  This is referred to as "pushing left"
3524		 * or "pushing right" and is similar to the B* tree, except the nodes left or
3525		 * right can rarely be reused due to RCU, but the ripple upwards is halted which
3526		 * is a significant savings.
3527		 */
3528		/* Try to push left. */
3529		if (mas_push_data(mas, height, &mast, true))
3530			break;
3531
3532		/* Try to push right. */
3533		if (mas_push_data(mas, height, &mast, false))
3534			break;
3535
3536		split = mab_calc_split(mas, b_node, &mid_split, prev_l_mas.min);
3537		mast_split_data(&mast, mas, split);
3538		/*
3539		 * Usually correct, mab_mas_cp in the above call overwrites
3540		 * r->max.
3541		 */
3542		mast.r->max = mas->max;
3543		mast_fill_bnode(&mast, mas, 1);
3544		prev_l_mas = *mast.l;
3545		prev_r_mas = *mast.r;
3546	}
3547
3548	/* Set the original node as dead */
3549	mat_add(mast.free, mas->node);
3550	mas->node = l_mas.node;
3551	mas_wmb_replace(mas, mast.free, NULL);
3552	mtree_range_walk(mas);
3553	return 1;
3554}
3555
3556/*
3557 * mas_reuse_node() - Reuse the node to store the data.
3558 * @wr_mas: The maple write state
3559 * @bn: The maple big node
3560 * @end: The end of the data.
3561 *
3562 * Will always return false in RCU mode.
3563 *
3564 * Return: True if node was reused, false otherwise.
3565 */
3566static inline bool mas_reuse_node(struct ma_wr_state *wr_mas,
3567			  struct maple_big_node *bn, unsigned char end)
3568{
3569	/* Need to be rcu safe. */
3570	if (mt_in_rcu(wr_mas->mas->tree))
3571		return false;
3572
3573	if (end > bn->b_end) {
3574		int clear = mt_slots[wr_mas->type] - bn->b_end;
3575
3576		memset(wr_mas->slots + bn->b_end, 0, sizeof(void *) * clear--);
3577		memset(wr_mas->pivots + bn->b_end, 0, sizeof(void *) * clear);
3578	}
3579	mab_mas_cp(bn, 0, bn->b_end, wr_mas->mas, false);
3580	return true;
3581}
3582
3583/*
3584 * mas_commit_b_node() - Commit the big node into the tree.
3585 * @wr_mas: The maple write state
3586 * @b_node: The maple big node
3587 * @end: The end of the data.
3588 */
3589static inline int mas_commit_b_node(struct ma_wr_state *wr_mas,
3590			    struct maple_big_node *b_node, unsigned char end)
3591{
3592	struct maple_node *node;
3593	unsigned char b_end = b_node->b_end;
3594	enum maple_type b_type = b_node->type;
3595
3596	if ((b_end < mt_min_slots[b_type]) &&
3597	    (!mte_is_root(wr_mas->mas->node)) &&
3598	    (mas_mt_height(wr_mas->mas) > 1))
3599		return mas_rebalance(wr_mas->mas, b_node);
3600
3601	if (b_end >= mt_slots[b_type])
3602		return mas_split(wr_mas->mas, b_node);
3603
3604	if (mas_reuse_node(wr_mas, b_node, end))
3605		goto reuse_node;
3606
3607	mas_node_count(wr_mas->mas, 1);
3608	if (mas_is_err(wr_mas->mas))
3609		return 0;
3610
3611	node = mas_pop_node(wr_mas->mas);
3612	node->parent = mas_mn(wr_mas->mas)->parent;
3613	wr_mas->mas->node = mt_mk_node(node, b_type);
3614	mab_mas_cp(b_node, 0, b_end, wr_mas->mas, false);
3615	mas_replace(wr_mas->mas, false);
3616reuse_node:
3617	mas_update_gap(wr_mas->mas);
3618	return 1;
3619}
3620
3621/*
3622 * mas_root_expand() - Expand a root to a node
3623 * @mas: The maple state
3624 * @entry: The entry to store into the tree
3625 */
3626static inline int mas_root_expand(struct ma_state *mas, void *entry)
3627{
3628	void *contents = mas_root_locked(mas);
3629	enum maple_type type = maple_leaf_64;
3630	struct maple_node *node;
3631	void __rcu **slots;
3632	unsigned long *pivots;
3633	int slot = 0;
3634
3635	mas_node_count(mas, 1);
3636	if (unlikely(mas_is_err(mas)))
3637		return 0;
3638
3639	node = mas_pop_node(mas);
3640	pivots = ma_pivots(node, type);
3641	slots = ma_slots(node, type);
3642	node->parent = ma_parent_ptr(
3643		      ((unsigned long)mas->tree | MA_ROOT_PARENT));
3644	mas->node = mt_mk_node(node, type);
3645
3646	if (mas->index) {
3647		if (contents) {
3648			rcu_assign_pointer(slots[slot], contents);
3649			if (likely(mas->index > 1))
3650				slot++;
3651		}
3652		pivots[slot++] = mas->index - 1;
3653	}
3654
3655	rcu_assign_pointer(slots[slot], entry);
3656	mas->offset = slot;
3657	pivots[slot] = mas->last;
3658	if (mas->last != ULONG_MAX)
3659		slot++;
3660	mas->depth = 1;
3661	mas_set_height(mas);
3662
3663	/* swap the new root into the tree */
3664	rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
3665	ma_set_meta(node, maple_leaf_64, 0, slot);
3666	return slot;
3667}
3668
3669static inline void mas_store_root(struct ma_state *mas, void *entry)
3670{
3671	if (likely((mas->last != 0) || (mas->index != 0)))
3672		mas_root_expand(mas, entry);
3673	else if (((unsigned long) (entry) & 3) == 2)
3674		mas_root_expand(mas, entry);
3675	else {
3676		rcu_assign_pointer(mas->tree->ma_root, entry);
3677		mas->node = MAS_START;
3678	}
3679}
3680
3681/*
3682 * mas_is_span_wr() - Check if the write needs to be treated as a write that
3683 * spans the node.
3684 * @mas: The maple state
3685 * @piv: The pivot value being written
3686 * @type: The maple node type
3687 * @entry: The data to write
3688 *
3689 * Spanning writes are writes that start in one node and end in another OR if
3690 * the write of a %NULL will cause the node to end with a %NULL.
3691 *
3692 * Return: True if this is a spanning write, false otherwise.
3693 */
3694static bool mas_is_span_wr(struct ma_wr_state *wr_mas)
3695{
3696	unsigned long max;
3697	unsigned long last = wr_mas->mas->last;
3698	unsigned long piv = wr_mas->r_max;
3699	enum maple_type type = wr_mas->type;
3700	void *entry = wr_mas->entry;
3701
3702	/* Contained in this pivot */
3703	if (piv > last)
3704		return false;
3705
3706	max = wr_mas->mas->max;
3707	if (unlikely(ma_is_leaf(type))) {
3708		/* Fits in the node, but may span slots. */
3709		if (last < max)
3710			return false;
3711
3712		/* Writes to the end of the node but not null. */
3713		if ((last == max) && entry)
3714			return false;
3715
3716		/*
3717		 * Writing ULONG_MAX is not a spanning write regardless of the
3718		 * value being written as long as the range fits in the node.
3719		 */
3720		if ((last == ULONG_MAX) && (last == max))
3721			return false;
3722	} else if (piv == last) {
3723		if (entry)
3724			return false;
3725
3726		/* Detect spanning store wr walk */
3727		if (last == ULONG_MAX)
3728			return false;
3729	}
3730
3731	trace_ma_write(__func__, wr_mas->mas, piv, entry);
3732
3733	return true;
3734}
3735
3736static inline void mas_wr_walk_descend(struct ma_wr_state *wr_mas)
3737{
3738	wr_mas->type = mte_node_type(wr_mas->mas->node);
3739	mas_wr_node_walk(wr_mas);
3740	wr_mas->slots = ma_slots(wr_mas->node, wr_mas->type);
3741}
3742
3743static inline void mas_wr_walk_traverse(struct ma_wr_state *wr_mas)
3744{
3745	wr_mas->mas->max = wr_mas->r_max;
3746	wr_mas->mas->min = wr_mas->r_min;
3747	wr_mas->mas->node = wr_mas->content;
3748	wr_mas->mas->offset = 0;
3749	wr_mas->mas->depth++;
3750}
3751/*
3752 * mas_wr_walk() - Walk the tree for a write.
3753 * @wr_mas: The maple write state
3754 *
3755 * Uses mas_slot_locked() and does not need to worry about dead nodes.
3756 *
3757 * Return: True if it's contained in a node, false on spanning write.
3758 */
3759static bool mas_wr_walk(struct ma_wr_state *wr_mas)
3760{
3761	struct ma_state *mas = wr_mas->mas;
3762
3763	while (true) {
3764		mas_wr_walk_descend(wr_mas);
3765		if (unlikely(mas_is_span_wr(wr_mas)))
3766			return false;
3767
3768		wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
3769						  mas->offset);
3770		if (ma_is_leaf(wr_mas->type))
3771			return true;
3772
3773		mas_wr_walk_traverse(wr_mas);
3774	}
3775
3776	return true;
3777}
3778
3779static bool mas_wr_walk_index(struct ma_wr_state *wr_mas)
3780{
3781	struct ma_state *mas = wr_mas->mas;
3782
3783	while (true) {
3784		mas_wr_walk_descend(wr_mas);
3785		wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
3786						  mas->offset);
3787		if (ma_is_leaf(wr_mas->type))
3788			return true;
3789		mas_wr_walk_traverse(wr_mas);
3790
3791	}
3792	return true;
3793}
3794/*
3795 * mas_extend_spanning_null() - Extend a store of a %NULL to include surrounding %NULLs.
3796 * @l_wr_mas: The left maple write state
3797 * @r_wr_mas: The right maple write state
3798 */
3799static inline void mas_extend_spanning_null(struct ma_wr_state *l_wr_mas,
3800					    struct ma_wr_state *r_wr_mas)
3801{
3802	struct ma_state *r_mas = r_wr_mas->mas;
3803	struct ma_state *l_mas = l_wr_mas->mas;
3804	unsigned char l_slot;
3805
3806	l_slot = l_mas->offset;
3807	if (!l_wr_mas->content)
3808		l_mas->index = l_wr_mas->r_min;
3809
3810	if ((l_mas->index == l_wr_mas->r_min) &&
3811		 (l_slot &&
3812		  !mas_slot_locked(l_mas, l_wr_mas->slots, l_slot - 1))) {
3813		if (l_slot > 1)
3814			l_mas->index = l_wr_mas->pivots[l_slot - 2] + 1;
3815		else
3816			l_mas->index = l_mas->min;
3817
3818		l_mas->offset = l_slot - 1;
3819	}
3820
3821	if (!r_wr_mas->content) {
3822		if (r_mas->last < r_wr_mas->r_max)
3823			r_mas->last = r_wr_mas->r_max;
3824		r_mas->offset++;
3825	} else if ((r_mas->last == r_wr_mas->r_max) &&
3826	    (r_mas->last < r_mas->max) &&
3827	    !mas_slot_locked(r_mas, r_wr_mas->slots, r_mas->offset + 1)) {
3828		r_mas->last = mas_safe_pivot(r_mas, r_wr_mas->pivots,
3829					     r_wr_mas->type, r_mas->offset + 1);
3830		r_mas->offset++;
3831	}
3832}
3833
3834static inline void *mas_state_walk(struct ma_state *mas)
3835{
3836	void *entry;
3837
3838	entry = mas_start(mas);
3839	if (mas_is_none(mas))
3840		return NULL;
3841
3842	if (mas_is_ptr(mas))
3843		return entry;
3844
3845	return mtree_range_walk(mas);
3846}
3847
3848/*
3849 * mtree_lookup_walk() - Internal quick lookup that does not keep maple state up
3850 * to date.
3851 *
3852 * @mas: The maple state.
3853 *
3854 * Note: Leaves mas in undesirable state.
3855 * Return: The entry for @mas->index or %NULL on dead node.
3856 */
3857static inline void *mtree_lookup_walk(struct ma_state *mas)
3858{
3859	unsigned long *pivots;
3860	unsigned char offset;
3861	struct maple_node *node;
3862	struct maple_enode *next;
3863	enum maple_type type;
3864	void __rcu **slots;
3865	unsigned char end;
3866	unsigned long max;
3867
3868	next = mas->node;
3869	max = ULONG_MAX;
3870	do {
3871		offset = 0;
3872		node = mte_to_node(next);
3873		type = mte_node_type(next);
3874		pivots = ma_pivots(node, type);
3875		end = ma_data_end(node, type, pivots, max);
3876		if (unlikely(ma_dead_node(node)))
3877			goto dead_node;
3878
3879		if (pivots[offset] >= mas->index)
3880			goto next;
3881
3882		do {
3883			offset++;
3884		} while ((offset < end) && (pivots[offset] < mas->index));
3885
3886		if (likely(offset > end))
3887			max = pivots[offset];
3888
3889next:
3890		slots = ma_slots(node, type);
3891		next = mt_slot(mas->tree, slots, offset);
3892		if (unlikely(ma_dead_node(node)))
3893			goto dead_node;
3894	} while (!ma_is_leaf(type));
3895
3896	return (void *) next;
3897
3898dead_node:
3899	mas_reset(mas);
3900	return NULL;
3901}
3902
3903/*
3904 * mas_new_root() - Create a new root node that only contains the entry passed
3905 * in.
3906 * @mas: The maple state
3907 * @entry: The entry to store.
3908 *
3909 * Only valid when the index == 0 and the last == ULONG_MAX
3910 *
3911 * Return 0 on error, 1 on success.
3912 */
3913static inline int mas_new_root(struct ma_state *mas, void *entry)
3914{
3915	struct maple_enode *root = mas_root_locked(mas);
3916	enum maple_type type = maple_leaf_64;
3917	struct maple_node *node;
3918	void __rcu **slots;
3919	unsigned long *pivots;
3920
3921	if (!entry && !mas->index && mas->last == ULONG_MAX) {
3922		mas->depth = 0;
3923		mas_set_height(mas);
3924		rcu_assign_pointer(mas->tree->ma_root, entry);
3925		mas->node = MAS_START;
3926		goto done;
3927	}
3928
3929	mas_node_count(mas, 1);
3930	if (mas_is_err(mas))
3931		return 0;
3932
3933	node = mas_pop_node(mas);
3934	pivots = ma_pivots(node, type);
3935	slots = ma_slots(node, type);
3936	node->parent = ma_parent_ptr(
3937		      ((unsigned long)mas->tree | MA_ROOT_PARENT));
3938	mas->node = mt_mk_node(node, type);
3939	rcu_assign_pointer(slots[0], entry);
3940	pivots[0] = mas->last;
3941	mas->depth = 1;
3942	mas_set_height(mas);
3943	rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
3944
3945done:
3946	if (xa_is_node(root))
3947		mte_destroy_walk(root, mas->tree);
3948
3949	return 1;
3950}
3951/*
3952 * mas_wr_spanning_store() - Create a subtree with the store operation completed
3953 * and new nodes where necessary, then place the sub-tree in the actual tree.
3954 * Note that mas is expected to point to the node which caused the store to
3955 * span.
3956 * @wr_mas: The maple write state
3957 *
3958 * Return: 0 on error, positive on success.
3959 */
3960static inline int mas_wr_spanning_store(struct ma_wr_state *wr_mas)
3961{
3962	struct maple_subtree_state mast;
3963	struct maple_big_node b_node;
3964	struct ma_state *mas;
3965	unsigned char height;
3966
3967	/* Left and Right side of spanning store */
3968	MA_STATE(l_mas, NULL, 0, 0);
3969	MA_STATE(r_mas, NULL, 0, 0);
3970
3971	MA_WR_STATE(r_wr_mas, &r_mas, wr_mas->entry);
3972	MA_WR_STATE(l_wr_mas, &l_mas, wr_mas->entry);
3973
3974	/*
3975	 * A store operation that spans multiple nodes is called a spanning
3976	 * store and is handled early in the store call stack by the function
3977	 * mas_is_span_wr().  When a spanning store is identified, the maple
3978	 * state is duplicated.  The first maple state walks the left tree path
3979	 * to ``index``, the duplicate walks the right tree path to ``last``.
3980	 * The data in the two nodes are combined into a single node, two nodes,
3981	 * or possibly three nodes (see the 3-way split above).  A ``NULL``
3982	 * written to the last entry of a node is considered a spanning store as
3983	 * a rebalance is required for the operation to complete and an overflow
3984	 * of data may happen.
3985	 */
3986	mas = wr_mas->mas;
3987	trace_ma_op(__func__, mas);
3988
3989	if (unlikely(!mas->index && mas->last == ULONG_MAX))
3990		return mas_new_root(mas, wr_mas->entry);
3991	/*
3992	 * Node rebalancing may occur due to this store, so there may be three new
3993	 * entries per level plus a new root.
3994	 */
3995	height = mas_mt_height(mas);
3996	mas_node_count(mas, 1 + height * 3);
3997	if (mas_is_err(mas))
3998		return 0;
3999
4000	/*
4001	 * Set up right side.  Need to get to the next offset after the spanning
4002	 * store to ensure it's not NULL and to combine both the next node and
4003	 * the node with the start together.
4004	 */
4005	r_mas = *mas;
4006	/* Avoid overflow, walk to next slot in the tree. */
4007	if (r_mas.last + 1)
4008		r_mas.last++;
4009
4010	r_mas.index = r_mas.last;
4011	mas_wr_walk_index(&r_wr_mas);
4012	r_mas.last = r_mas.index = mas->last;
4013
4014	/* Set up left side. */
4015	l_mas = *mas;
4016	mas_wr_walk_index(&l_wr_mas);
4017
4018	if (!wr_mas->entry) {
4019		mas_extend_spanning_null(&l_wr_mas, &r_wr_mas);
4020		mas->offset = l_mas.offset;
4021		mas->index = l_mas.index;
4022		mas->last = l_mas.last = r_mas.last;
4023	}
4024
4025	/* expanding NULLs may make this cover the entire range */
4026	if (!l_mas.index && r_mas.last == ULONG_MAX) {
4027		mas_set_range(mas, 0, ULONG_MAX);
4028		return mas_new_root(mas, wr_mas->entry);
4029	}
4030
4031	memset(&b_node, 0, sizeof(struct maple_big_node));
4032	/* Copy l_mas and store the value in b_node. */
4033	mas_store_b_node(&l_wr_mas, &b_node, l_wr_mas.node_end);
4034	/* Copy r_mas into b_node. */
4035	if (r_mas.offset <= r_wr_mas.node_end)
4036		mas_mab_cp(&r_mas, r_mas.offset, r_wr_mas.node_end,
4037			   &b_node, b_node.b_end + 1);
4038	else
4039		b_node.b_end++;
4040
4041	/* Stop spanning searches by searching for just index. */
4042	l_mas.index = l_mas.last = mas->index;
4043
4044	mast.bn = &b_node;
4045	mast.orig_l = &l_mas;
4046	mast.orig_r = &r_mas;
4047	/* Combine l_mas and r_mas and split them up evenly again. */
4048	return mas_spanning_rebalance(mas, &mast, height + 1);
4049}
4050
4051/*
4052 * mas_wr_node_store() - Attempt to store the value in a node
4053 * @wr_mas: The maple write state
4054 *
4055 * Attempts to reuse the node, but may allocate.
4056 *
4057 * Return: True if stored, false otherwise
4058 */
4059static inline bool mas_wr_node_store(struct ma_wr_state *wr_mas)
4060{
4061	struct ma_state *mas = wr_mas->mas;
4062	void __rcu **dst_slots;
4063	unsigned long *dst_pivots;
4064	unsigned char dst_offset;
4065	unsigned char new_end = wr_mas->node_end;
4066	unsigned char offset;
4067	unsigned char node_slots = mt_slots[wr_mas->type];
4068	struct maple_node reuse, *newnode;
4069	unsigned char copy_size, max_piv = mt_pivots[wr_mas->type];
4070	bool in_rcu = mt_in_rcu(mas->tree);
4071
4072	offset = mas->offset;
4073	if (mas->last == wr_mas->r_max) {
4074		/* runs right to the end of the node */
4075		if (mas->last == mas->max)
4076			new_end = offset;
4077		/* don't copy this offset */
4078		wr_mas->offset_end++;
4079	} else if (mas->last < wr_mas->r_max) {
4080		/* new range ends in this range */
4081		if (unlikely(wr_mas->r_max == ULONG_MAX))
4082			mas_bulk_rebalance(mas, wr_mas->node_end, wr_mas->type);
4083
4084		new_end++;
4085	} else {
4086		if (wr_mas->end_piv == mas->last)
4087			wr_mas->offset_end++;
4088
4089		new_end -= wr_mas->offset_end - offset - 1;
4090	}
4091
4092	/* new range starts within a range */
4093	if (wr_mas->r_min < mas->index)
4094		new_end++;
4095
4096	/* Not enough room */
4097	if (new_end >= node_slots)
4098		return false;
4099
4100	/* Not enough data. */
4101	if (!mte_is_root(mas->node) && (new_end <= mt_min_slots[wr_mas->type]) &&
4102	    !(mas->mas_flags & MA_STATE_BULK))
4103		return false;
4104
4105	/* set up node. */
4106	if (in_rcu) {
4107		mas_node_count(mas, 1);
4108		if (mas_is_err(mas))
4109			return false;
4110
4111		newnode = mas_pop_node(mas);
4112	} else {
4113		memset(&reuse, 0, sizeof(struct maple_node));
4114		newnode = &reuse;
4115	}
4116
4117	newnode->parent = mas_mn(mas)->parent;
4118	dst_pivots = ma_pivots(newnode, wr_mas->type);
4119	dst_slots = ma_slots(newnode, wr_mas->type);
4120	/* Copy from start to insert point */
4121	memcpy(dst_pivots, wr_mas->pivots, sizeof(unsigned long) * (offset + 1));
4122	memcpy(dst_slots, wr_mas->slots, sizeof(void *) * (offset + 1));
4123	dst_offset = offset;
4124
4125	/* Handle insert of new range starting after old range */
4126	if (wr_mas->r_min < mas->index) {
4127		mas->offset++;
4128		rcu_assign_pointer(dst_slots[dst_offset], wr_mas->content);
4129		dst_pivots[dst_offset++] = mas->index - 1;
4130	}
4131
4132	/* Store the new entry and range end. */
4133	if (dst_offset < max_piv)
4134		dst_pivots[dst_offset] = mas->last;
4135	mas->offset = dst_offset;
4136	rcu_assign_pointer(dst_slots[dst_offset], wr_mas->entry);
4137
4138	/*
4139	 * this range wrote to the end of the node or it overwrote the rest of
4140	 * the data
4141	 */
4142	if (wr_mas->offset_end > wr_mas->node_end || mas->last >= mas->max) {
4143		new_end = dst_offset;
4144		goto done;
4145	}
4146
4147	dst_offset++;
4148	/* Copy to the end of node if necessary. */
4149	copy_size = wr_mas->node_end - wr_mas->offset_end + 1;
4150	memcpy(dst_slots + dst_offset, wr_mas->slots + wr_mas->offset_end,
4151	       sizeof(void *) * copy_size);
4152	if (dst_offset < max_piv) {
4153		if (copy_size > max_piv - dst_offset)
4154			copy_size = max_piv - dst_offset;
4155
4156		memcpy(dst_pivots + dst_offset,
4157		       wr_mas->pivots + wr_mas->offset_end,
4158		       sizeof(unsigned long) * copy_size);
4159	}
4160
4161	if ((wr_mas->node_end == node_slots - 1) && (new_end < node_slots - 1))
4162		dst_pivots[new_end] = mas->max;
4163
4164done:
4165	mas_leaf_set_meta(mas, newnode, dst_pivots, maple_leaf_64, new_end);
4166	if (in_rcu) {
4167		mas->node = mt_mk_node(newnode, wr_mas->type);
4168		mas_replace(mas, false);
4169	} else {
4170		memcpy(wr_mas->node, newnode, sizeof(struct maple_node));
4171	}
4172	trace_ma_write(__func__, mas, 0, wr_mas->entry);
4173	mas_update_gap(mas);
4174	return true;
4175}
4176
4177/*
4178 * mas_wr_slot_store: Attempt to store a value in a slot.
4179 * @wr_mas: the maple write state
4180 *
4181 * Return: True if stored, false otherwise
4182 */
4183static inline bool mas_wr_slot_store(struct ma_wr_state *wr_mas)
4184{
4185	struct ma_state *mas = wr_mas->mas;
4186	unsigned long lmax; /* Logical max. */
4187	unsigned char offset = mas->offset;
4188
4189	if ((wr_mas->r_max > mas->last) && ((wr_mas->r_min != mas->index) ||
4190				  (offset != wr_mas->node_end)))
4191		return false;
4192
4193	if (offset == wr_mas->node_end - 1)
4194		lmax = mas->max;
4195	else
4196		lmax = wr_mas->pivots[offset + 1];
4197
4198	/* going to overwrite too many slots. */
4199	if (lmax < mas->last)
4200		return false;
4201
4202	if (wr_mas->r_min == mas->index) {
4203		/* overwriting two or more ranges with one. */
4204		if (lmax == mas->last)
4205			return false;
4206
4207		/* Overwriting all of offset and a portion of offset + 1. */
4208		rcu_assign_pointer(wr_mas->slots[offset], wr_mas->entry);
4209		wr_mas->pivots[offset] = mas->last;
4210		goto done;
4211	}
4212
4213	/* Doesn't end on the next range end. */
4214	if (lmax != mas->last)
4215		return false;
4216
4217	/* Overwriting a portion of offset and all of offset + 1 */
4218	if ((offset + 1 < mt_pivots[wr_mas->type]) &&
4219	    (wr_mas->entry || wr_mas->pivots[offset + 1]))
4220		wr_mas->pivots[offset + 1] = mas->last;
4221
4222	rcu_assign_pointer(wr_mas->slots[offset + 1], wr_mas->entry);
4223	wr_mas->pivots[offset] = mas->index - 1;
4224	mas->offset++; /* Keep mas accurate. */
4225
4226done:
4227	trace_ma_write(__func__, mas, 0, wr_mas->entry);
4228	mas_update_gap(mas);
4229	return true;
4230}
4231
4232static inline void mas_wr_end_piv(struct ma_wr_state *wr_mas)
4233{
4234	while ((wr_mas->mas->last > wr_mas->end_piv) &&
4235	       (wr_mas->offset_end < wr_mas->node_end))
4236		wr_mas->end_piv = wr_mas->pivots[++wr_mas->offset_end];
4237
4238	if (wr_mas->mas->last > wr_mas->end_piv)
4239		wr_mas->end_piv = wr_mas->mas->max;
4240}
4241
4242static inline void mas_wr_extend_null(struct ma_wr_state *wr_mas)
4243{
4244	struct ma_state *mas = wr_mas->mas;
4245
4246	if (mas->last < wr_mas->end_piv && !wr_mas->slots[wr_mas->offset_end])
4247		mas->last = wr_mas->end_piv;
4248
4249	/* Check next slot(s) if we are overwriting the end */
4250	if ((mas->last == wr_mas->end_piv) &&
4251	    (wr_mas->node_end != wr_mas->offset_end) &&
4252	    !wr_mas->slots[wr_mas->offset_end + 1]) {
4253		wr_mas->offset_end++;
4254		if (wr_mas->offset_end == wr_mas->node_end)
4255			mas->last = mas->max;
4256		else
4257			mas->last = wr_mas->pivots[wr_mas->offset_end];
4258		wr_mas->end_piv = mas->last;
4259	}
4260
4261	if (!wr_mas->content) {
4262		/* If this one is null, the next and prev are not */
4263		mas->index = wr_mas->r_min;
4264	} else {
4265		/* Check prev slot if we are overwriting the start */
4266		if (mas->index == wr_mas->r_min && mas->offset &&
4267		    !wr_mas->slots[mas->offset - 1]) {
4268			mas->offset--;
4269			wr_mas->r_min = mas->index =
4270				mas_safe_min(mas, wr_mas->pivots, mas->offset);
4271			wr_mas->r_max = wr_mas->pivots[mas->offset];
4272		}
4273	}
4274}
4275
4276static inline bool mas_wr_append(struct ma_wr_state *wr_mas)
4277{
4278	unsigned char end = wr_mas->node_end;
4279	unsigned char new_end = end + 1;
4280	struct ma_state *mas = wr_mas->mas;
4281	unsigned char node_pivots = mt_pivots[wr_mas->type];
4282
4283	if ((mas->index != wr_mas->r_min) && (mas->last == wr_mas->r_max)) {
4284		if (new_end < node_pivots)
4285			wr_mas->pivots[new_end] = wr_mas->pivots[end];
4286
4287		if (new_end < node_pivots)
4288			ma_set_meta(wr_mas->node, maple_leaf_64, 0, new_end);
4289
4290		rcu_assign_pointer(wr_mas->slots[new_end], wr_mas->entry);
4291		mas->offset = new_end;
4292		wr_mas->pivots[end] = mas->index - 1;
4293
4294		return true;
4295	}
4296
4297	if ((mas->index == wr_mas->r_min) && (mas->last < wr_mas->r_max)) {
4298		if (new_end < node_pivots)
4299			wr_mas->pivots[new_end] = wr_mas->pivots[end];
4300
4301		rcu_assign_pointer(wr_mas->slots[new_end], wr_mas->content);
4302		if (new_end < node_pivots)
4303			ma_set_meta(wr_mas->node, maple_leaf_64, 0, new_end);
4304
4305		wr_mas->pivots[end] = mas->last;
4306		rcu_assign_pointer(wr_mas->slots[end], wr_mas->entry);
4307		return true;
4308	}
4309
4310	return false;
4311}
4312
4313/*
4314 * mas_wr_bnode() - Slow path for a modification.
4315 * @wr_mas: The write maple state
4316 *
4317 * This is where split, rebalance end up.
4318 */
4319static void mas_wr_bnode(struct ma_wr_state *wr_mas)
4320{
4321	struct maple_big_node b_node;
4322
4323	trace_ma_write(__func__, wr_mas->mas, 0, wr_mas->entry);
4324	memset(&b_node, 0, sizeof(struct maple_big_node));
4325	mas_store_b_node(wr_mas, &b_node, wr_mas->offset_end);
4326	mas_commit_b_node(wr_mas, &b_node, wr_mas->node_end);
4327}
4328
4329static inline void mas_wr_modify(struct ma_wr_state *wr_mas)
4330{
4331	unsigned char node_slots;
4332	unsigned char node_size;
4333	struct ma_state *mas = wr_mas->mas;
4334
4335	/* Direct replacement */
4336	if (wr_mas->r_min == mas->index && wr_mas->r_max == mas->last) {
4337		rcu_assign_pointer(wr_mas->slots[mas->offset], wr_mas->entry);
4338		if (!!wr_mas->entry ^ !!wr_mas->content)
4339			mas_update_gap(mas);
4340		return;
4341	}
4342
4343	/* Attempt to append */
4344	node_slots = mt_slots[wr_mas->type];
4345	node_size = wr_mas->node_end - wr_mas->offset_end + mas->offset + 2;
4346	if (mas->max == ULONG_MAX)
4347		node_size++;
4348
4349	/* slot and node store will not fit, go to the slow path */
4350	if (unlikely(node_size >= node_slots))
4351		goto slow_path;
4352
4353	if (wr_mas->entry && (wr_mas->node_end < node_slots - 1) &&
4354	    (mas->offset == wr_mas->node_end) && mas_wr_append(wr_mas)) {
4355		if (!wr_mas->content || !wr_mas->entry)
4356			mas_update_gap(mas);
4357		return;
4358	}
4359
4360	if ((wr_mas->offset_end - mas->offset <= 1) && mas_wr_slot_store(wr_mas))
4361		return;
4362	else if (mas_wr_node_store(wr_mas))
4363		return;
4364
4365	if (mas_is_err(mas))
4366		return;
4367
4368slow_path:
4369	mas_wr_bnode(wr_mas);
4370}
4371
4372/*
4373 * mas_wr_store_entry() - Internal call to store a value
4374 * @mas: The maple state
4375 * @entry: The entry to store.
4376 *
4377 * Return: The contents that was stored at the index.
4378 */
4379static inline void *mas_wr_store_entry(struct ma_wr_state *wr_mas)
4380{
4381	struct ma_state *mas = wr_mas->mas;
4382
4383	wr_mas->content = mas_start(mas);
4384	if (mas_is_none(mas) || mas_is_ptr(mas)) {
4385		mas_store_root(mas, wr_mas->entry);
4386		return wr_mas->content;
4387	}
4388
4389	if (unlikely(!mas_wr_walk(wr_mas))) {
4390		mas_wr_spanning_store(wr_mas);
4391		return wr_mas->content;
4392	}
4393
4394	/* At this point, we are at the leaf node that needs to be altered. */
4395	wr_mas->end_piv = wr_mas->r_max;
4396	mas_wr_end_piv(wr_mas);
4397
4398	if (!wr_mas->entry)
4399		mas_wr_extend_null(wr_mas);
4400
4401	/* New root for a single pointer */
4402	if (unlikely(!mas->index && mas->last == ULONG_MAX)) {
4403		mas_new_root(mas, wr_mas->entry);
4404		return wr_mas->content;
4405	}
4406
4407	mas_wr_modify(wr_mas);
4408	return wr_mas->content;
4409}
4410
4411/**
4412 * mas_insert() - Internal call to insert a value
4413 * @mas: The maple state
4414 * @entry: The entry to store
4415 *
4416 * Return: %NULL or the contents that already exists at the requested index
4417 * otherwise.  The maple state needs to be checked for error conditions.
4418 */
4419static inline void *mas_insert(struct ma_state *mas, void *entry)
4420{
4421	MA_WR_STATE(wr_mas, mas, entry);
4422
4423	/*
4424	 * Inserting a new range inserts either 0, 1, or 2 pivots within the
4425	 * tree.  If the insert fits exactly into an existing gap with a value
4426	 * of NULL, then the slot only needs to be written with the new value.
4427	 * If the range being inserted is adjacent to another range, then only a
4428	 * single pivot needs to be inserted (as well as writing the entry).  If
4429	 * the new range is within a gap but does not touch any other ranges,
4430	 * then two pivots need to be inserted: the start - 1, and the end.  As
4431	 * usual, the entry must be written.  Most operations require a new node
4432	 * to be allocated and replace an existing node to ensure RCU safety,
4433	 * when in RCU mode.  The exception to requiring a newly allocated node
4434	 * is when inserting at the end of a node (appending).  When done
4435	 * carefully, appending can reuse the node in place.
4436	 */
4437	wr_mas.content = mas_start(mas);
4438	if (wr_mas.content)
4439		goto exists;
4440
4441	if (mas_is_none(mas) || mas_is_ptr(mas)) {
4442		mas_store_root(mas, entry);
4443		return NULL;
4444	}
4445
4446	/* spanning writes always overwrite something */
4447	if (!mas_wr_walk(&wr_mas))
4448		goto exists;
4449
4450	/* At this point, we are at the leaf node that needs to be altered. */
4451	wr_mas.offset_end = mas->offset;
4452	wr_mas.end_piv = wr_mas.r_max;
4453
4454	if (wr_mas.content || (mas->last > wr_mas.r_max))
4455		goto exists;
4456
4457	if (!entry)
4458		return NULL;
4459
4460	mas_wr_modify(&wr_mas);
4461	return wr_mas.content;
4462
4463exists:
4464	mas_set_err(mas, -EEXIST);
4465	return wr_mas.content;
4466
4467}
4468
4469/*
4470 * mas_prev_node() - Find the prev non-null entry at the same level in the
4471 * tree.  The prev value will be mas->node[mas->offset] or MAS_NONE.
4472 * @mas: The maple state
4473 * @min: The lower limit to search
4474 *
4475 * The prev node value will be mas->node[mas->offset] or MAS_NONE.
4476 * Return: 1 if the node is dead, 0 otherwise.
4477 */
4478static inline int mas_prev_node(struct ma_state *mas, unsigned long min)
4479{
4480	enum maple_type mt;
4481	int offset, level;
4482	void __rcu **slots;
4483	struct maple_node *node;
4484	struct maple_enode *enode;
4485	unsigned long *pivots;
4486
4487	if (mas_is_none(mas))
4488		return 0;
4489
4490	level = 0;
4491	do {
4492		node = mas_mn(mas);
4493		if (ma_is_root(node))
4494			goto no_entry;
4495
4496		/* Walk up. */
4497		if (unlikely(mas_ascend(mas)))
4498			return 1;
4499		offset = mas->offset;
4500		level++;
4501	} while (!offset);
4502
4503	offset--;
4504	mt = mte_node_type(mas->node);
4505	node = mas_mn(mas);
4506	slots = ma_slots(node, mt);
4507	pivots = ma_pivots(node, mt);
4508	mas->max = pivots[offset];
4509	if (offset)
4510		mas->min = pivots[offset - 1] + 1;
4511	if (unlikely(ma_dead_node(node)))
4512		return 1;
4513
4514	if (mas->max < min)
4515		goto no_entry_min;
4516
4517	while (level > 1) {
4518		level--;
4519		enode = mas_slot(mas, slots, offset);
4520		if (unlikely(ma_dead_node(node)))
4521			return 1;
4522
4523		mas->node = enode;
4524		mt = mte_node_type(mas->node);
4525		node = mas_mn(mas);
4526		slots = ma_slots(node, mt);
4527		pivots = ma_pivots(node, mt);
4528		offset = ma_data_end(node, mt, pivots, mas->max);
4529		if (offset)
4530			mas->min = pivots[offset - 1] + 1;
4531
4532		if (offset < mt_pivots[mt])
4533			mas->max = pivots[offset];
4534
4535		if (mas->max < min)
4536			goto no_entry;
4537	}
4538
4539	mas->node = mas_slot(mas, slots, offset);
4540	if (unlikely(ma_dead_node(node)))
4541		return 1;
4542
4543	mas->offset = mas_data_end(mas);
4544	if (unlikely(mte_dead_node(mas->node)))
4545		return 1;
4546
4547	return 0;
4548
4549no_entry_min:
4550	mas->offset = offset;
4551	if (offset)
4552		mas->min = pivots[offset - 1] + 1;
4553no_entry:
4554	if (unlikely(ma_dead_node(node)))
4555		return 1;
4556
4557	mas->node = MAS_NONE;
4558	return 0;
4559}
4560
4561/*
4562 * mas_next_node() - Get the next node at the same level in the tree.
4563 * @mas: The maple state
4564 * @max: The maximum pivot value to check.
4565 *
4566 * The next value will be mas->node[mas->offset] or MAS_NONE.
4567 * Return: 1 on dead node, 0 otherwise.
4568 */
4569static inline int mas_next_node(struct ma_state *mas, struct maple_node *node,
4570				unsigned long max)
4571{
4572	unsigned long min, pivot;
4573	unsigned long *pivots;
4574	struct maple_enode *enode;
4575	int level = 0;
4576	unsigned char offset;
4577	enum maple_type mt;
4578	void __rcu **slots;
4579
4580	if (mas->max >= max)
4581		goto no_entry;
4582
4583	level = 0;
4584	do {
4585		if (ma_is_root(node))
4586			goto no_entry;
4587
4588		min = mas->max + 1;
4589		if (min > max)
4590			goto no_entry;
4591
4592		if (unlikely(mas_ascend(mas)))
4593			return 1;
4594
4595		offset = mas->offset;
4596		level++;
4597		node = mas_mn(mas);
4598		mt = mte_node_type(mas->node);
4599		pivots = ma_pivots(node, mt);
4600	} while (unlikely(offset == ma_data_end(node, mt, pivots, mas->max)));
4601
4602	slots = ma_slots(node, mt);
4603	pivot = mas_safe_pivot(mas, pivots, ++offset, mt);
4604	while (unlikely(level > 1)) {
4605		/* Descend, if necessary */
4606		enode = mas_slot(mas, slots, offset);
4607		if (unlikely(ma_dead_node(node)))
4608			return 1;
4609
4610		mas->node = enode;
4611		level--;
4612		node = mas_mn(mas);
4613		mt = mte_node_type(mas->node);
4614		slots = ma_slots(node, mt);
4615		pivots = ma_pivots(node, mt);
4616		offset = 0;
4617		pivot = pivots[0];
4618	}
4619
4620	enode = mas_slot(mas, slots, offset);
4621	if (unlikely(ma_dead_node(node)))
4622		return 1;
4623
4624	mas->node = enode;
4625	mas->min = min;
4626	mas->max = pivot;
4627	return 0;
4628
4629no_entry:
4630	if (unlikely(ma_dead_node(node)))
4631		return 1;
4632
4633	mas->node = MAS_NONE;
4634	return 0;
4635}
4636
4637/*
4638 * mas_next_nentry() - Get the next node entry
4639 * @mas: The maple state
4640 * @max: The maximum value to check
4641 * @*range_start: Pointer to store the start of the range.
4642 *
4643 * Sets @mas->offset to the offset of the next node entry, @mas->last to the
4644 * pivot of the entry.
4645 *
4646 * Return: The next entry, %NULL otherwise
4647 */
4648static inline void *mas_next_nentry(struct ma_state *mas,
4649	    struct maple_node *node, unsigned long max, enum maple_type type)
4650{
4651	unsigned char count;
4652	unsigned long pivot;
4653	unsigned long *pivots;
4654	void __rcu **slots;
4655	void *entry;
4656
4657	if (mas->last == mas->max) {
4658		mas->index = mas->max;
4659		return NULL;
4660	}
4661
4662	pivots = ma_pivots(node, type);
4663	slots = ma_slots(node, type);
4664	mas->index = mas_safe_min(mas, pivots, mas->offset);
4665	if (ma_dead_node(node))
4666		return NULL;
4667
4668	if (mas->index > max)
4669		return NULL;
4670
4671	count = ma_data_end(node, type, pivots, mas->max);
4672	if (mas->offset > count)
4673		return NULL;
4674
4675	while (mas->offset < count) {
4676		pivot = pivots[mas->offset];
4677		entry = mas_slot(mas, slots, mas->offset);
4678		if (ma_dead_node(node))
4679			return NULL;
4680
4681		if (entry)
4682			goto found;
4683
4684		if (pivot >= max)
4685			return NULL;
4686
4687		mas->index = pivot + 1;
4688		mas->offset++;
4689	}
4690
4691	if (mas->index > mas->max) {
4692		mas->index = mas->last;
4693		return NULL;
4694	}
4695
4696	pivot = mas_safe_pivot(mas, pivots, mas->offset, type);
4697	entry = mas_slot(mas, slots, mas->offset);
4698	if (ma_dead_node(node))
4699		return NULL;
4700
4701	if (!pivot)
4702		return NULL;
4703
4704	if (!entry)
4705		return NULL;
4706
4707found:
4708	mas->last = pivot;
4709	return entry;
4710}
4711
4712static inline void mas_rewalk(struct ma_state *mas, unsigned long index)
4713{
4714
4715retry:
4716	mas_set(mas, index);
4717	mas_state_walk(mas);
4718	if (mas_is_start(mas))
4719		goto retry;
4720
4721	return;
4722
4723}
4724
4725/*
4726 * mas_next_entry() - Internal function to get the next entry.
4727 * @mas: The maple state
4728 * @limit: The maximum range start.
4729 *
4730 * Set the @mas->node to the next entry and the range_start to
4731 * the beginning value for the entry.  Does not check beyond @limit.
4732 * Sets @mas->index and @mas->last to the limit if it is hit.
4733 * Restarts on dead nodes.
4734 *
4735 * Return: the next entry or %NULL.
4736 */
4737static inline void *mas_next_entry(struct ma_state *mas, unsigned long limit)
4738{
4739	void *entry = NULL;
4740	struct maple_enode *prev_node;
4741	struct maple_node *node;
4742	unsigned char offset;
4743	unsigned long last;
4744	enum maple_type mt;
4745
4746	last = mas->last;
4747retry:
4748	offset = mas->offset;
4749	prev_node = mas->node;
4750	node = mas_mn(mas);
4751	mt = mte_node_type(mas->node);
4752	mas->offset++;
4753	if (unlikely(mas->offset >= mt_slots[mt])) {
4754		mas->offset = mt_slots[mt] - 1;
4755		goto next_node;
4756	}
4757
4758	while (!mas_is_none(mas)) {
4759		entry = mas_next_nentry(mas, node, limit, mt);
4760		if (unlikely(ma_dead_node(node))) {
4761			mas_rewalk(mas, last);
4762			goto retry;
4763		}
4764
4765		if (likely(entry))
4766			return entry;
4767
4768		if (unlikely((mas->index > limit)))
4769			break;
4770
4771next_node:
4772		prev_node = mas->node;
4773		offset = mas->offset;
4774		if (unlikely(mas_next_node(mas, node, limit))) {
4775			mas_rewalk(mas, last);
4776			goto retry;
4777		}
4778		mas->offset = 0;
4779		node = mas_mn(mas);
4780		mt = mte_node_type(mas->node);
4781	}
4782
4783	mas->index = mas->last = limit;
4784	mas->offset = offset;
4785	mas->node = prev_node;
4786	return NULL;
4787}
4788
4789/*
4790 * mas_prev_nentry() - Get the previous node entry.
4791 * @mas: The maple state.
4792 * @limit: The lower limit to check for a value.
4793 *
4794 * Return: the entry, %NULL otherwise.
4795 */
4796static inline void *mas_prev_nentry(struct ma_state *mas, unsigned long limit,
4797				    unsigned long index)
4798{
4799	unsigned long pivot, min;
4800	unsigned char offset;
4801	struct maple_node *mn;
4802	enum maple_type mt;
4803	unsigned long *pivots;
4804	void __rcu **slots;
4805	void *entry;
4806
4807retry:
4808	if (!mas->offset)
4809		return NULL;
4810
4811	mn = mas_mn(mas);
4812	mt = mte_node_type(mas->node);
4813	offset = mas->offset - 1;
4814	if (offset >= mt_slots[mt])
4815		offset = mt_slots[mt] - 1;
4816
4817	slots = ma_slots(mn, mt);
4818	pivots = ma_pivots(mn, mt);
4819	if (offset == mt_pivots[mt])
4820		pivot = mas->max;
4821	else
4822		pivot = pivots[offset];
4823
4824	if (unlikely(ma_dead_node(mn))) {
4825		mas_rewalk(mas, index);
4826		goto retry;
4827	}
4828
4829	while (offset && ((!mas_slot(mas, slots, offset) && pivot >= limit) ||
4830	       !pivot))
4831		pivot = pivots[--offset];
4832
4833	min = mas_safe_min(mas, pivots, offset);
4834	entry = mas_slot(mas, slots, offset);
4835	if (unlikely(ma_dead_node(mn))) {
4836		mas_rewalk(mas, index);
4837		goto retry;
4838	}
4839
4840	if (likely(entry)) {
4841		mas->offset = offset;
4842		mas->last = pivot;
4843		mas->index = min;
4844	}
4845	return entry;
4846}
4847
4848static inline void *mas_prev_entry(struct ma_state *mas, unsigned long min)
4849{
4850	void *entry;
4851
4852retry:
4853	while (likely(!mas_is_none(mas))) {
4854		entry = mas_prev_nentry(mas, min, mas->index);
4855		if (unlikely(mas->last < min))
4856			goto not_found;
4857
4858		if (likely(entry))
4859			return entry;
4860
4861		if (unlikely(mas_prev_node(mas, min))) {
4862			mas_rewalk(mas, mas->index);
4863			goto retry;
4864		}
4865
4866		mas->offset++;
4867	}
4868
4869	mas->offset--;
4870not_found:
4871	mas->index = mas->last = min;
4872	return NULL;
4873}
4874
4875/*
4876 * mas_rev_awalk() - Internal function.  Reverse allocation walk.  Find the
4877 * highest gap address of a given size in a given node and descend.
4878 * @mas: The maple state
4879 * @size: The needed size.
4880 *
4881 * Return: True if found in a leaf, false otherwise.
4882 *
4883 */
4884static bool mas_rev_awalk(struct ma_state *mas, unsigned long size)
4885{
4886	enum maple_type type = mte_node_type(mas->node);
4887	struct maple_node *node = mas_mn(mas);
4888	unsigned long *pivots, *gaps;
4889	void __rcu **slots;
4890	unsigned long gap = 0;
4891	unsigned long max, min;
4892	unsigned char offset;
4893
4894	if (unlikely(mas_is_err(mas)))
4895		return true;
4896
4897	if (ma_is_dense(type)) {
4898		/* dense nodes. */
4899		mas->offset = (unsigned char)(mas->index - mas->min);
4900		return true;
4901	}
4902
4903	pivots = ma_pivots(node, type);
4904	slots = ma_slots(node, type);
4905	gaps = ma_gaps(node, type);
4906	offset = mas->offset;
4907	min = mas_safe_min(mas, pivots, offset);
4908	/* Skip out of bounds. */
4909	while (mas->last < min)
4910		min = mas_safe_min(mas, pivots, --offset);
4911
4912	max = mas_safe_pivot(mas, pivots, offset, type);
4913	while (mas->index <= max) {
4914		gap = 0;
4915		if (gaps)
4916			gap = gaps[offset];
4917		else if (!mas_slot(mas, slots, offset))
4918			gap = max - min + 1;
4919
4920		if (gap) {
4921			if ((size <= gap) && (size <= mas->last - min + 1))
4922				break;
4923
4924			if (!gaps) {
4925				/* Skip the next slot, it cannot be a gap. */
4926				if (offset < 2)
4927					goto ascend;
4928
4929				offset -= 2;
4930				max = pivots[offset];
4931				min = mas_safe_min(mas, pivots, offset);
4932				continue;
4933			}
4934		}
4935
4936		if (!offset)
4937			goto ascend;
4938
4939		offset--;
4940		max = min - 1;
4941		min = mas_safe_min(mas, pivots, offset);
4942	}
4943
4944	if (unlikely((mas->index > max) || (size - 1 > max - mas->index)))
4945		goto no_space;
4946
4947	if (unlikely(ma_is_leaf(type))) {
4948		mas->offset = offset;
4949		mas->min = min;
4950		mas->max = min + gap - 1;
4951		return true;
4952	}
4953
4954	/* descend, only happens under lock. */
4955	mas->node = mas_slot(mas, slots, offset);
4956	mas->min = min;
4957	mas->max = max;
4958	mas->offset = mas_data_end(mas);
4959	return false;
4960
4961ascend:
4962	if (!mte_is_root(mas->node))
4963		return false;
4964
4965no_space:
4966	mas_set_err(mas, -EBUSY);
4967	return false;
4968}
4969
4970static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size)
4971{
4972	enum maple_type type = mte_node_type(mas->node);
4973	unsigned long pivot, min, gap = 0;
4974	unsigned char offset;
4975	unsigned long *gaps;
4976	unsigned long *pivots = ma_pivots(mas_mn(mas), type);
4977	void __rcu **slots = ma_slots(mas_mn(mas), type);
4978	bool found = false;
4979
4980	if (ma_is_dense(type)) {
4981		mas->offset = (unsigned char)(mas->index - mas->min);
4982		return true;
4983	}
4984
4985	gaps = ma_gaps(mte_to_node(mas->node), type);
4986	offset = mas->offset;
4987	min = mas_safe_min(mas, pivots, offset);
4988	for (; offset < mt_slots[type]; offset++) {
4989		pivot = mas_safe_pivot(mas, pivots, offset, type);
4990		if (offset && !pivot)
4991			break;
4992
4993		/* Not within lower bounds */
4994		if (mas->index > pivot)
4995			goto next_slot;
4996
4997		if (gaps)
4998			gap = gaps[offset];
4999		else if (!mas_slot(mas, slots, offset))
5000			gap = min(pivot, mas->last) - max(mas->index, min) + 1;
5001		else
5002			goto next_slot;
5003
5004		if (gap >= size) {
5005			if (ma_is_leaf(type)) {
5006				found = true;
5007				goto done;
5008			}
5009			if (mas->index <= pivot) {
5010				mas->node = mas_slot(mas, slots, offset);
5011				mas->min = min;
5012				mas->max = pivot;
5013				offset = 0;
5014				break;
5015			}
5016		}
5017next_slot:
5018		min = pivot + 1;
5019		if (mas->last <= pivot) {
5020			mas_set_err(mas, -EBUSY);
5021			return true;
5022		}
5023	}
5024
5025	if (mte_is_root(mas->node))
5026		found = true;
5027done:
5028	mas->offset = offset;
5029	return found;
5030}
5031
5032/**
5033 * mas_walk() - Search for @mas->index in the tree.
5034 * @mas: The maple state.
5035 *
5036 * mas->index and mas->last will be set to the range if there is a value.  If
5037 * mas->node is MAS_NONE, reset to MAS_START.
5038 *
5039 * Return: the entry at the location or %NULL.
5040 */
5041void *mas_walk(struct ma_state *mas)
5042{
5043	void *entry;
5044
5045retry:
5046	entry = mas_state_walk(mas);
5047	if (mas_is_start(mas))
5048		goto retry;
5049
5050	if (mas_is_ptr(mas)) {
5051		if (!mas->index) {
5052			mas->last = 0;
5053		} else {
5054			mas->index = 1;
5055			mas->last = ULONG_MAX;
5056		}
5057		return entry;
5058	}
5059
5060	if (mas_is_none(mas)) {
5061		mas->index = 0;
5062		mas->last = ULONG_MAX;
5063	}
5064
5065	return entry;
5066}
5067EXPORT_SYMBOL_GPL(mas_walk);
5068
5069static inline bool mas_rewind_node(struct ma_state *mas)
5070{
5071	unsigned char slot;
5072
5073	do {
5074		if (mte_is_root(mas->node)) {
5075			slot = mas->offset;
5076			if (!slot)
5077				return false;
5078		} else {
5079			mas_ascend(mas);
5080			slot = mas->offset;
5081		}
5082	} while (!slot);
5083
5084	mas->offset = --slot;
5085	return true;
5086}
5087
5088/*
5089 * mas_skip_node() - Internal function.  Skip over a node.
5090 * @mas: The maple state.
5091 *
5092 * Return: true if there is another node, false otherwise.
5093 */
5094static inline bool mas_skip_node(struct ma_state *mas)
5095{
5096	unsigned char slot, slot_count;
5097	unsigned long *pivots;
5098	enum maple_type mt;
5099
5100	mt = mte_node_type(mas->node);
5101	slot_count = mt_slots[mt] - 1;
5102	do {
5103		if (mte_is_root(mas->node)) {
5104			slot = mas->offset;
5105			if (slot > slot_count) {
5106				mas_set_err(mas, -EBUSY);
5107				return false;
5108			}
5109		} else {
5110			mas_ascend(mas);
5111			slot = mas->offset;
5112			mt = mte_node_type(mas->node);
5113			slot_count = mt_slots[mt] - 1;
5114		}
5115	} while (slot > slot_count);
5116
5117	mas->offset = ++slot;
5118	pivots = ma_pivots(mas_mn(mas), mt);
5119	if (slot > 0)
5120		mas->min = pivots[slot - 1] + 1;
5121
5122	if (slot <= slot_count)
5123		mas->max = pivots[slot];
5124
5125	return true;
5126}
5127
5128/*
5129 * mas_awalk() - Allocation walk.  Search from low address to high, for a gap of
5130 * @size
5131 * @mas: The maple state
5132 * @size: The size of the gap required
5133 *
5134 * Search between @mas->index and @mas->last for a gap of @size.
5135 */
5136static inline void mas_awalk(struct ma_state *mas, unsigned long size)
5137{
5138	struct maple_enode *last = NULL;
5139
5140	/*
5141	 * There are 4 options:
5142	 * go to child (descend)
5143	 * go back to parent (ascend)
5144	 * no gap found. (return, slot == MAPLE_NODE_SLOTS)
5145	 * found the gap. (return, slot != MAPLE_NODE_SLOTS)
5146	 */
5147	while (!mas_is_err(mas) && !mas_anode_descend(mas, size)) {
5148		if (last == mas->node)
5149			mas_skip_node(mas);
5150		else
5151			last = mas->node;
5152	}
5153}
5154
5155/*
5156 * mas_fill_gap() - Fill a located gap with @entry.
5157 * @mas: The maple state
5158 * @entry: The value to store
5159 * @slot: The offset into the node to store the @entry
5160 * @size: The size of the entry
5161 * @index: The start location
5162 */
5163static inline void mas_fill_gap(struct ma_state *mas, void *entry,
5164		unsigned char slot, unsigned long size, unsigned long *index)
5165{
5166	MA_WR_STATE(wr_mas, mas, entry);
5167	unsigned char pslot = mte_parent_slot(mas->node);
5168	struct maple_enode *mn = mas->node;
5169	unsigned long *pivots;
5170	enum maple_type ptype;
5171	/*
5172	 * mas->index is the start address for the search
5173	 *  which may no longer be needed.
5174	 * mas->last is the end address for the search
5175	 */
5176
5177	*index = mas->index;
5178	mas->last = mas->index + size - 1;
5179
5180	/*
5181	 * It is possible that using mas->max and mas->min to correctly
5182	 * calculate the index and last will cause an issue in the gap
5183	 * calculation, so fix the ma_state here
5184	 */
5185	mas_ascend(mas);
5186	ptype = mte_node_type(mas->node);
5187	pivots = ma_pivots(mas_mn(mas), ptype);
5188	mas->max = mas_safe_pivot(mas, pivots, pslot, ptype);
5189	mas->min = mas_safe_min(mas, pivots, pslot);
5190	mas->node = mn;
5191	mas->offset = slot;
5192	mas_wr_store_entry(&wr_mas);
5193}
5194
5195/*
5196 * mas_sparse_area() - Internal function.  Return upper or lower limit when
5197 * searching for a gap in an empty tree.
5198 * @mas: The maple state
5199 * @min: the minimum range
5200 * @max: The maximum range
5201 * @size: The size of the gap
5202 * @fwd: Searching forward or back
5203 */
5204static inline void mas_sparse_area(struct ma_state *mas, unsigned long min,
5205				unsigned long max, unsigned long size, bool fwd)
5206{
5207	unsigned long start = 0;
5208
5209	if (!unlikely(mas_is_none(mas)))
5210		start++;
5211	/* mas_is_ptr */
5212
5213	if (start < min)
5214		start = min;
5215
5216	if (fwd) {
5217		mas->index = start;
5218		mas->last = start + size - 1;
5219		return;
5220	}
5221
5222	mas->index = max;
5223}
5224
5225/*
5226 * mas_empty_area() - Get the lowest address within the range that is
5227 * sufficient for the size requested.
5228 * @mas: The maple state
5229 * @min: The lowest value of the range
5230 * @max: The highest value of the range
5231 * @size: The size needed
5232 */
5233int mas_empty_area(struct ma_state *mas, unsigned long min,
5234		unsigned long max, unsigned long size)
5235{
5236	unsigned char offset;
5237	unsigned long *pivots;
5238	enum maple_type mt;
5239
5240	if (mas_is_start(mas))
5241		mas_start(mas);
5242	else if (mas->offset >= 2)
5243		mas->offset -= 2;
5244	else if (!mas_skip_node(mas))
5245		return -EBUSY;
5246
5247	/* Empty set */
5248	if (mas_is_none(mas) || mas_is_ptr(mas)) {
5249		mas_sparse_area(mas, min, max, size, true);
5250		return 0;
5251	}
5252
5253	/* The start of the window can only be within these values */
5254	mas->index = min;
5255	mas->last = max;
5256	mas_awalk(mas, size);
5257
5258	if (unlikely(mas_is_err(mas)))
5259		return xa_err(mas->node);
5260
5261	offset = mas->offset;
5262	if (unlikely(offset == MAPLE_NODE_SLOTS))
5263		return -EBUSY;
5264
5265	mt = mte_node_type(mas->node);
5266	pivots = ma_pivots(mas_mn(mas), mt);
5267	if (offset)
5268		mas->min = pivots[offset - 1] + 1;
5269
5270	if (offset < mt_pivots[mt])
5271		mas->max = pivots[offset];
5272
5273	if (mas->index < mas->min)
5274		mas->index = mas->min;
5275
5276	mas->last = mas->index + size - 1;
5277	return 0;
5278}
5279EXPORT_SYMBOL_GPL(mas_empty_area);
5280
5281/*
5282 * mas_empty_area_rev() - Get the highest address within the range that is
5283 * sufficient for the size requested.
5284 * @mas: The maple state
5285 * @min: The lowest value of the range
5286 * @max: The highest value of the range
5287 * @size: The size needed
5288 */
5289int mas_empty_area_rev(struct ma_state *mas, unsigned long min,
5290		unsigned long max, unsigned long size)
5291{
5292	struct maple_enode *last = mas->node;
5293
5294	if (mas_is_start(mas)) {
5295		mas_start(mas);
5296		mas->offset = mas_data_end(mas);
5297	} else if (mas->offset >= 2) {
5298		mas->offset -= 2;
5299	} else if (!mas_rewind_node(mas)) {
5300		return -EBUSY;
5301	}
5302
5303	/* Empty set. */
5304	if (mas_is_none(mas) || mas_is_ptr(mas)) {
5305		mas_sparse_area(mas, min, max, size, false);
5306		return 0;
5307	}
5308
5309	/* The start of the window can only be within these values. */
5310	mas->index = min;
5311	mas->last = max;
5312
5313	while (!mas_rev_awalk(mas, size)) {
5314		if (last == mas->node) {
5315			if (!mas_rewind_node(mas))
5316				return -EBUSY;
5317		} else {
5318			last = mas->node;
5319		}
5320	}
5321
5322	if (mas_is_err(mas))
5323		return xa_err(mas->node);
5324
5325	if (unlikely(mas->offset == MAPLE_NODE_SLOTS))
5326		return -EBUSY;
5327
5328	/*
5329	 * mas_rev_awalk() has set mas->min and mas->max to the gap values.  If
5330	 * the maximum is outside the window we are searching, then use the last
5331	 * location in the search.
5332	 * mas->max and mas->min is the range of the gap.
5333	 * mas->index and mas->last are currently set to the search range.
5334	 */
5335
5336	/* Trim the upper limit to the max. */
5337	if (mas->max <= mas->last)
5338		mas->last = mas->max;
5339
5340	mas->index = mas->last - size + 1;
5341	return 0;
5342}
5343EXPORT_SYMBOL_GPL(mas_empty_area_rev);
5344
5345static inline int mas_alloc(struct ma_state *mas, void *entry,
5346		unsigned long size, unsigned long *index)
5347{
5348	unsigned long min;
5349
5350	mas_start(mas);
5351	if (mas_is_none(mas) || mas_is_ptr(mas)) {
5352		mas_root_expand(mas, entry);
5353		if (mas_is_err(mas))
5354			return xa_err(mas->node);
5355
5356		if (!mas->index)
5357			return mte_pivot(mas->node, 0);
5358		return mte_pivot(mas->node, 1);
5359	}
5360
5361	/* Must be walking a tree. */
5362	mas_awalk(mas, size);
5363	if (mas_is_err(mas))
5364		return xa_err(mas->node);
5365
5366	if (mas->offset == MAPLE_NODE_SLOTS)
5367		goto no_gap;
5368
5369	/*
5370	 * At this point, mas->node points to the right node and we have an
5371	 * offset that has a sufficient gap.
5372	 */
5373	min = mas->min;
5374	if (mas->offset)
5375		min = mte_pivot(mas->node, mas->offset - 1) + 1;
5376
5377	if (mas->index < min)
5378		mas->index = min;
5379
5380	mas_fill_gap(mas, entry, mas->offset, size, index);
5381	return 0;
5382
5383no_gap:
5384	return -EBUSY;
5385}
5386
5387static inline int mas_rev_alloc(struct ma_state *mas, unsigned long min,
5388				unsigned long max, void *entry,
5389				unsigned long size, unsigned long *index)
5390{
5391	int ret = 0;
5392
5393	ret = mas_empty_area_rev(mas, min, max, size);
5394	if (ret)
5395		return ret;
5396
5397	if (mas_is_err(mas))
5398		return xa_err(mas->node);
5399
5400	if (mas->offset == MAPLE_NODE_SLOTS)
5401		goto no_gap;
5402
5403	mas_fill_gap(mas, entry, mas->offset, size, index);
5404	return 0;
5405
5406no_gap:
5407	return -EBUSY;
5408}
5409
5410/*
5411 * mas_dead_leaves() - Mark all leaves of a node as dead.
5412 * @mas: The maple state
5413 * @slots: Pointer to the slot array
5414 *
5415 * Must hold the write lock.
5416 *
5417 * Return: The number of leaves marked as dead.
5418 */
5419static inline
5420unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots)
5421{
5422	struct maple_node *node;
5423	enum maple_type type;
5424	void *entry;
5425	int offset;
5426
5427	for (offset = 0; offset < mt_slot_count(mas->node); offset++) {
5428		entry = mas_slot_locked(mas, slots, offset);
5429		type = mte_node_type(entry);
5430		node = mte_to_node(entry);
5431		/* Use both node and type to catch LE & BE metadata */
5432		if (!node || !type)
5433			break;
5434
5435		mte_set_node_dead(entry);
5436		smp_wmb(); /* Needed for RCU */
5437		node->type = type;
5438		rcu_assign_pointer(slots[offset], node);
5439	}
5440
5441	return offset;
5442}
5443
5444static void __rcu **mas_dead_walk(struct ma_state *mas, unsigned char offset)
5445{
5446	struct maple_node *node, *next;
5447	void __rcu **slots = NULL;
5448
5449	next = mas_mn(mas);
5450	do {
5451		mas->node = ma_enode_ptr(next);
5452		node = mas_mn(mas);
5453		slots = ma_slots(node, node->type);
5454		next = mas_slot_locked(mas, slots, offset);
5455		offset = 0;
5456	} while (!ma_is_leaf(next->type));
5457
5458	return slots;
5459}
5460
5461static void mt_free_walk(struct rcu_head *head)
5462{
5463	void __rcu **slots;
5464	struct maple_node *node, *start;
5465	struct maple_tree mt;
5466	unsigned char offset;
5467	enum maple_type type;
5468	MA_STATE(mas, &mt, 0, 0);
5469
5470	node = container_of(head, struct maple_node, rcu);
5471
5472	if (ma_is_leaf(node->type))
5473		goto free_leaf;
5474
5475	mt_init_flags(&mt, node->ma_flags);
5476	mas_lock(&mas);
5477	start = node;
5478	mas.node = mt_mk_node(node, node->type);
5479	slots = mas_dead_walk(&mas, 0);
5480	node = mas_mn(&mas);
5481	do {
5482		mt_free_bulk(node->slot_len, slots);
5483		offset = node->parent_slot + 1;
5484		mas.node = node->piv_parent;
5485		if (mas_mn(&mas) == node)
5486			goto start_slots_free;
5487
5488		type = mte_node_type(mas.node);
5489		slots = ma_slots(mte_to_node(mas.node), type);
5490		if ((offset < mt_slots[type]) && (slots[offset]))
5491			slots = mas_dead_walk(&mas, offset);
5492
5493		node = mas_mn(&mas);
5494	} while ((node != start) || (node->slot_len < offset));
5495
5496	slots = ma_slots(node, node->type);
5497	mt_free_bulk(node->slot_len, slots);
5498
5499start_slots_free:
5500	mas_unlock(&mas);
5501free_leaf:
5502	mt_free_rcu(&node->rcu);
5503}
5504
5505static inline void __rcu **mas_destroy_descend(struct ma_state *mas,
5506			struct maple_enode *prev, unsigned char offset)
5507{
5508	struct maple_node *node;
5509	struct maple_enode *next = mas->node;
5510	void __rcu **slots = NULL;
5511
5512	do {
5513		mas->node = next;
5514		node = mas_mn(mas);
5515		slots = ma_slots(node, mte_node_type(mas->node));
5516		next = mas_slot_locked(mas, slots, 0);
5517		if ((mte_dead_node(next)))
5518			next = mas_slot_locked(mas, slots, 1);
5519
5520		mte_set_node_dead(mas->node);
5521		node->type = mte_node_type(mas->node);
5522		node->piv_parent = prev;
5523		node->parent_slot = offset;
5524		offset = 0;
5525		prev = mas->node;
5526	} while (!mte_is_leaf(next));
5527
5528	return slots;
5529}
5530
5531static void mt_destroy_walk(struct maple_enode *enode, unsigned char ma_flags,
5532			    bool free)
5533{
5534	void __rcu **slots;
5535	struct maple_node *node = mte_to_node(enode);
5536	struct maple_enode *start;
5537	struct maple_tree mt;
5538
5539	MA_STATE(mas, &mt, 0, 0);
5540
5541	if (mte_is_leaf(enode))
5542		goto free_leaf;
5543
5544	mt_init_flags(&mt, ma_flags);
5545	mas_lock(&mas);
5546
5547	mas.node = start = enode;
5548	slots = mas_destroy_descend(&mas, start, 0);
5549	node = mas_mn(&mas);
5550	do {
5551		enum maple_type type;
5552		unsigned char offset;
5553		struct maple_enode *parent, *tmp;
5554
5555		node->slot_len = mas_dead_leaves(&mas, slots);
5556		if (free)
5557			mt_free_bulk(node->slot_len, slots);
5558		offset = node->parent_slot + 1;
5559		mas.node = node->piv_parent;
5560		if (mas_mn(&mas) == node)
5561			goto start_slots_free;
5562
5563		type = mte_node_type(mas.node);
5564		slots = ma_slots(mte_to_node(mas.node), type);
5565		if (offset >= mt_slots[type])
5566			goto next;
5567
5568		tmp = mas_slot_locked(&mas, slots, offset);
5569		if (mte_node_type(tmp) && mte_to_node(tmp)) {
5570			parent = mas.node;
5571			mas.node = tmp;
5572			slots = mas_destroy_descend(&mas, parent, offset);
5573		}
5574next:
5575		node = mas_mn(&mas);
5576	} while (start != mas.node);
5577
5578	node = mas_mn(&mas);
5579	node->slot_len = mas_dead_leaves(&mas, slots);
5580	if (free)
5581		mt_free_bulk(node->slot_len, slots);
5582
5583start_slots_free:
5584	mas_unlock(&mas);
5585
5586free_leaf:
5587	if (free)
5588		mt_free_rcu(&node->rcu);
5589}
5590
5591/*
5592 * mte_destroy_walk() - Free a tree or sub-tree.
5593 * @enode - the encoded maple node (maple_enode) to start
5594 * @mn - the tree to free - needed for node types.
5595 *
5596 * Must hold the write lock.
5597 */
5598static inline void mte_destroy_walk(struct maple_enode *enode,
5599				    struct maple_tree *mt)
5600{
5601	struct maple_node *node = mte_to_node(enode);
5602
5603	if (mt_in_rcu(mt)) {
5604		mt_destroy_walk(enode, mt->ma_flags, false);
5605		call_rcu(&node->rcu, mt_free_walk);
5606	} else {
5607		mt_destroy_walk(enode, mt->ma_flags, true);
5608	}
5609}
5610
5611static void mas_wr_store_setup(struct ma_wr_state *wr_mas)
5612{
5613	if (!mas_is_start(wr_mas->mas)) {
5614		if (mas_is_none(wr_mas->mas)) {
5615			mas_reset(wr_mas->mas);
5616		} else {
5617			wr_mas->r_max = wr_mas->mas->max;
5618			wr_mas->type = mte_node_type(wr_mas->mas->node);
5619			if (mas_is_span_wr(wr_mas))
5620				mas_reset(wr_mas->mas);
5621		}
5622	}
5623
5624}
5625
5626/* Interface */
5627
5628/**
5629 * mas_store() - Store an @entry.
5630 * @mas: The maple state.
5631 * @entry: The entry to store.
5632 *
5633 * The @mas->index and @mas->last is used to set the range for the @entry.
5634 * Note: The @mas should have pre-allocated entries to ensure there is memory to
5635 * store the entry.  Please see mas_expected_entries()/mas_destroy() for more details.
5636 *
5637 * Return: the first entry between mas->index and mas->last or %NULL.
5638 */
5639void *mas_store(struct ma_state *mas, void *entry)
5640{
5641	MA_WR_STATE(wr_mas, mas, entry);
5642
5643	trace_ma_write(__func__, mas, 0, entry);
5644#ifdef CONFIG_DEBUG_MAPLE_TREE
5645	if (mas->index > mas->last)
5646		pr_err("Error %lu > %lu %p\n", mas->index, mas->last, entry);
5647	MT_BUG_ON(mas->tree, mas->index > mas->last);
5648	if (mas->index > mas->last) {
5649		mas_set_err(mas, -EINVAL);
5650		return NULL;
5651	}
5652
5653#endif
5654
5655	/*
5656	 * Storing is the same operation as insert with the added caveat that it
5657	 * can overwrite entries.  Although this seems simple enough, one may
5658	 * want to examine what happens if a single store operation was to
5659	 * overwrite multiple entries within a self-balancing B-Tree.
5660	 */
5661	mas_wr_store_setup(&wr_mas);
5662	mas_wr_store_entry(&wr_mas);
5663	return wr_mas.content;
5664}
5665EXPORT_SYMBOL_GPL(mas_store);
5666
5667/**
5668 * mas_store_gfp() - Store a value into the tree.
5669 * @mas: The maple state
5670 * @entry: The entry to store
5671 * @gfp: The GFP_FLAGS to use for allocations if necessary.
5672 *
5673 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not
5674 * be allocated.
5675 */
5676int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp)
5677{
5678	MA_WR_STATE(wr_mas, mas, entry);
5679
5680	mas_wr_store_setup(&wr_mas);
5681	trace_ma_write(__func__, mas, 0, entry);
5682retry:
5683	mas_wr_store_entry(&wr_mas);
5684	if (unlikely(mas_nomem(mas, gfp)))
5685		goto retry;
5686
5687	if (unlikely(mas_is_err(mas)))
5688		return xa_err(mas->node);
5689
5690	return 0;
5691}
5692EXPORT_SYMBOL_GPL(mas_store_gfp);
5693
5694/**
5695 * mas_store_prealloc() - Store a value into the tree using memory
5696 * preallocated in the maple state.
5697 * @mas: The maple state
5698 * @entry: The entry to store.
5699 */
5700void mas_store_prealloc(struct ma_state *mas, void *entry)
5701{
5702	MA_WR_STATE(wr_mas, mas, entry);
5703
5704	mas_wr_store_setup(&wr_mas);
5705	trace_ma_write(__func__, mas, 0, entry);
5706	mas_wr_store_entry(&wr_mas);
5707	BUG_ON(mas_is_err(mas));
5708	mas_destroy(mas);
5709}
5710EXPORT_SYMBOL_GPL(mas_store_prealloc);
5711
5712/**
5713 * mas_preallocate() - Preallocate enough nodes for a store operation
5714 * @mas: The maple state
5715 * @entry: The entry that will be stored
5716 * @gfp: The GFP_FLAGS to use for allocations.
5717 *
5718 * Return: 0 on success, -ENOMEM if memory could not be allocated.
5719 */
5720int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp)
5721{
5722	int ret;
5723
5724	mas_node_count_gfp(mas, 1 + mas_mt_height(mas) * 3, gfp);
5725	mas->mas_flags |= MA_STATE_PREALLOC;
5726	if (likely(!mas_is_err(mas)))
5727		return 0;
5728
5729	mas_set_alloc_req(mas, 0);
5730	ret = xa_err(mas->node);
5731	mas_reset(mas);
5732	mas_destroy(mas);
5733	mas_reset(mas);
5734	return ret;
5735}
5736
5737/*
5738 * mas_destroy() - destroy a maple state.
5739 * @mas: The maple state
5740 *
5741 * Upon completion, check the left-most node and rebalance against the node to
5742 * the right if necessary.  Frees any allocated nodes associated with this maple
5743 * state.
5744 */
5745void mas_destroy(struct ma_state *mas)
5746{
5747	struct maple_alloc *node;
5748
5749	/*
5750	 * When using mas_for_each() to insert an expected number of elements,
5751	 * it is possible that the number inserted is less than the expected
5752	 * number.  To fix an invalid final node, a check is performed here to
5753	 * rebalance the previous node with the final node.
5754	 */
5755	if (mas->mas_flags & MA_STATE_REBALANCE) {
5756		unsigned char end;
5757
5758		if (mas_is_start(mas))
5759			mas_start(mas);
5760
5761		mtree_range_walk(mas);
5762		end = mas_data_end(mas) + 1;
5763		if (end < mt_min_slot_count(mas->node) - 1)
5764			mas_destroy_rebalance(mas, end);
5765
5766		mas->mas_flags &= ~MA_STATE_REBALANCE;
5767	}
5768	mas->mas_flags &= ~(MA_STATE_BULK|MA_STATE_PREALLOC);
5769
5770	while (mas->alloc && !((unsigned long)mas->alloc & 0x1)) {
5771		node = mas->alloc;
5772		mas->alloc = node->slot[0];
5773		if (node->node_count > 0)
5774			mt_free_bulk(node->node_count,
5775				     (void __rcu **)&node->slot[1]);
5776		kmem_cache_free(maple_node_cache, node);
5777	}
5778	mas->alloc = NULL;
5779}
5780EXPORT_SYMBOL_GPL(mas_destroy);
5781
5782/*
5783 * mas_expected_entries() - Set the expected number of entries that will be inserted.
5784 * @mas: The maple state
5785 * @nr_entries: The number of expected entries.
5786 *
5787 * This will attempt to pre-allocate enough nodes to store the expected number
5788 * of entries.  The allocations will occur using the bulk allocator interface
5789 * for speed.  Please call mas_destroy() on the @mas after inserting the entries
5790 * to ensure any unused nodes are freed.
5791 *
5792 * Return: 0 on success, -ENOMEM if memory could not be allocated.
5793 */
5794int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries)
5795{
5796	int nonleaf_cap = MAPLE_ARANGE64_SLOTS - 2;
5797	struct maple_enode *enode = mas->node;
5798	int nr_nodes;
5799	int ret;
5800
5801	/*
5802	 * Sometimes it is necessary to duplicate a tree to a new tree, such as
5803	 * forking a process and duplicating the VMAs from one tree to a new
5804	 * tree.  When such a situation arises, it is known that the new tree is
5805	 * not going to be used until the entire tree is populated.  For
5806	 * performance reasons, it is best to use a bulk load with RCU disabled.
5807	 * This allows for optimistic splitting that favours the left and reuse
5808	 * of nodes during the operation.
5809	 */
5810
5811	/* Optimize splitting for bulk insert in-order */
5812	mas->mas_flags |= MA_STATE_BULK;
5813
5814	/*
5815	 * Avoid overflow, assume a gap between each entry and a trailing null.
5816	 * If this is wrong, it just means allocation can happen during
5817	 * insertion of entries.
5818	 */
5819	nr_nodes = max(nr_entries, nr_entries * 2 + 1);
5820	if (!mt_is_alloc(mas->tree))
5821		nonleaf_cap = MAPLE_RANGE64_SLOTS - 2;
5822
5823	/* Leaves; reduce slots to keep space for expansion */
5824	nr_nodes = DIV_ROUND_UP(nr_nodes, MAPLE_RANGE64_SLOTS - 2);
5825	/* Internal nodes */
5826	nr_nodes += DIV_ROUND_UP(nr_nodes, nonleaf_cap);
5827	/* Add working room for split (2 nodes) + new parents */
5828	mas_node_count(mas, nr_nodes + 3);
5829
5830	/* Detect if allocations run out */
5831	mas->mas_flags |= MA_STATE_PREALLOC;
5832
5833	if (!mas_is_err(mas))
5834		return 0;
5835
5836	ret = xa_err(mas->node);
5837	mas->node = enode;
5838	mas_destroy(mas);
5839	return ret;
5840
5841}
5842EXPORT_SYMBOL_GPL(mas_expected_entries);
5843
5844/**
5845 * mas_next() - Get the next entry.
5846 * @mas: The maple state
5847 * @max: The maximum index to check.
5848 *
5849 * Returns the next entry after @mas->index.
5850 * Must hold rcu_read_lock or the write lock.
5851 * Can return the zero entry.
5852 *
5853 * Return: The next entry or %NULL
5854 */
5855void *mas_next(struct ma_state *mas, unsigned long max)
5856{
5857	if (mas_is_none(mas) || mas_is_paused(mas))
5858		mas->node = MAS_START;
5859
5860	if (mas_is_start(mas))
5861		mas_walk(mas); /* Retries on dead nodes handled by mas_walk */
5862
5863	if (mas_is_ptr(mas)) {
5864		if (!mas->index) {
5865			mas->index = 1;
5866			mas->last = ULONG_MAX;
5867		}
5868		return NULL;
5869	}
5870
5871	if (mas->last == ULONG_MAX)
5872		return NULL;
5873
5874	/* Retries on dead nodes handled by mas_next_entry */
5875	return mas_next_entry(mas, max);
5876}
5877EXPORT_SYMBOL_GPL(mas_next);
5878
5879/**
5880 * mt_next() - get the next value in the maple tree
5881 * @mt: The maple tree
5882 * @index: The start index
5883 * @max: The maximum index to check
5884 *
5885 * Return: The entry at @index or higher, or %NULL if nothing is found.
5886 */
5887void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max)
5888{
5889	void *entry = NULL;
5890	MA_STATE(mas, mt, index, index);
5891
5892	rcu_read_lock();
5893	entry = mas_next(&mas, max);
5894	rcu_read_unlock();
5895	return entry;
5896}
5897EXPORT_SYMBOL_GPL(mt_next);
5898
5899/**
5900 * mas_prev() - Get the previous entry
5901 * @mas: The maple state
5902 * @min: The minimum value to check.
5903 *
5904 * Must hold rcu_read_lock or the write lock.
5905 * Will reset mas to MAS_START if the node is MAS_NONE.  Will stop on not
5906 * searchable nodes.
5907 *
5908 * Return: the previous value or %NULL.
5909 */
5910void *mas_prev(struct ma_state *mas, unsigned long min)
5911{
5912	if (!mas->index) {
5913		/* Nothing comes before 0 */
5914		mas->last = 0;
5915		return NULL;
5916	}
5917
5918	if (unlikely(mas_is_ptr(mas)))
5919		return NULL;
5920
5921	if (mas_is_none(mas) || mas_is_paused(mas))
5922		mas->node = MAS_START;
5923
5924	if (mas_is_start(mas)) {
5925		mas_walk(mas);
5926		if (!mas->index)
5927			return NULL;
5928	}
5929
5930	if (mas_is_ptr(mas)) {
5931		if (!mas->index) {
5932			mas->last = 0;
5933			return NULL;
5934		}
5935
5936		mas->index = mas->last = 0;
5937		return mas_root_locked(mas);
5938	}
5939	return mas_prev_entry(mas, min);
5940}
5941EXPORT_SYMBOL_GPL(mas_prev);
5942
5943/**
5944 * mt_prev() - get the previous value in the maple tree
5945 * @mt: The maple tree
5946 * @index: The start index
5947 * @min: The minimum index to check
5948 *
5949 * Return: The entry at @index or lower, or %NULL if nothing is found.
5950 */
5951void *mt_prev(struct maple_tree *mt, unsigned long index, unsigned long min)
5952{
5953	void *entry = NULL;
5954	MA_STATE(mas, mt, index, index);
5955
5956	rcu_read_lock();
5957	entry = mas_prev(&mas, min);
5958	rcu_read_unlock();
5959	return entry;
5960}
5961EXPORT_SYMBOL_GPL(mt_prev);
5962
5963/**
5964 * mas_pause() - Pause a mas_find/mas_for_each to drop the lock.
5965 * @mas: The maple state to pause
5966 *
5967 * Some users need to pause a walk and drop the lock they're holding in
5968 * order to yield to a higher priority thread or carry out an operation
5969 * on an entry.  Those users should call this function before they drop
5970 * the lock.  It resets the @mas to be suitable for the next iteration
5971 * of the loop after the user has reacquired the lock.  If most entries
5972 * found during a walk require you to call mas_pause(), the mt_for_each()
5973 * iterator may be more appropriate.
5974 *
5975 */
5976void mas_pause(struct ma_state *mas)
5977{
5978	mas->node = MAS_PAUSE;
5979}
5980EXPORT_SYMBOL_GPL(mas_pause);
5981
5982/**
5983 * mas_find() - On the first call, find the entry at or after mas->index up to
5984 * %max.  Otherwise, find the entry after mas->index.
5985 * @mas: The maple state
5986 * @max: The maximum value to check.
5987 *
5988 * Must hold rcu_read_lock or the write lock.
5989 * If an entry exists, last and index are updated accordingly.
5990 * May set @mas->node to MAS_NONE.
5991 *
5992 * Return: The entry or %NULL.
5993 */
5994void *mas_find(struct ma_state *mas, unsigned long max)
5995{
5996	if (unlikely(mas_is_paused(mas))) {
5997		if (unlikely(mas->last == ULONG_MAX)) {
5998			mas->node = MAS_NONE;
5999			return NULL;
6000		}
6001		mas->node = MAS_START;
6002		mas->index = ++mas->last;
6003	}
6004
6005	if (unlikely(mas_is_start(mas))) {
6006		/* First run or continue */
6007		void *entry;
6008
6009		if (mas->index > max)
6010			return NULL;
6011
6012		entry = mas_walk(mas);
6013		if (entry)
6014			return entry;
6015	}
6016
6017	if (unlikely(!mas_searchable(mas)))
6018		return NULL;
6019
6020	/* Retries on dead nodes handled by mas_next_entry */
6021	return mas_next_entry(mas, max);
6022}
6023EXPORT_SYMBOL_GPL(mas_find);
6024
6025/**
6026 * mas_find_rev: On the first call, find the first non-null entry at or below
6027 * mas->index down to %min.  Otherwise find the first non-null entry below
6028 * mas->index down to %min.
6029 * @mas: The maple state
6030 * @min: The minimum value to check.
6031 *
6032 * Must hold rcu_read_lock or the write lock.
6033 * If an entry exists, last and index are updated accordingly.
6034 * May set @mas->node to MAS_NONE.
6035 *
6036 * Return: The entry or %NULL.
6037 */
6038void *mas_find_rev(struct ma_state *mas, unsigned long min)
6039{
6040	if (unlikely(mas_is_paused(mas))) {
6041		if (unlikely(mas->last == ULONG_MAX)) {
6042			mas->node = MAS_NONE;
6043			return NULL;
6044		}
6045		mas->node = MAS_START;
6046		mas->last = --mas->index;
6047	}
6048
6049	if (unlikely(mas_is_start(mas))) {
6050		/* First run or continue */
6051		void *entry;
6052
6053		if (mas->index < min)
6054			return NULL;
6055
6056		entry = mas_walk(mas);
6057		if (entry)
6058			return entry;
6059	}
6060
6061	if (unlikely(!mas_searchable(mas)))
6062		return NULL;
6063
6064	if (mas->index < min)
6065		return NULL;
6066
6067	/* Retries on dead nodes handled by mas_prev_entry */
6068	return mas_prev_entry(mas, min);
6069}
6070EXPORT_SYMBOL_GPL(mas_find_rev);
6071
6072/**
6073 * mas_erase() - Find the range in which index resides and erase the entire
6074 * range.
6075 * @mas: The maple state
6076 *
6077 * Must hold the write lock.
6078 * Searches for @mas->index, sets @mas->index and @mas->last to the range and
6079 * erases that range.
6080 *
6081 * Return: the entry that was erased or %NULL, @mas->index and @mas->last are updated.
6082 */
6083void *mas_erase(struct ma_state *mas)
6084{
6085	void *entry;
6086	MA_WR_STATE(wr_mas, mas, NULL);
6087
6088	if (mas_is_none(mas) || mas_is_paused(mas))
6089		mas->node = MAS_START;
6090
6091	/* Retry unnecessary when holding the write lock. */
6092	entry = mas_state_walk(mas);
6093	if (!entry)
6094		return NULL;
6095
6096write_retry:
6097	/* Must reset to ensure spanning writes of last slot are detected */
6098	mas_reset(mas);
6099	mas_wr_store_setup(&wr_mas);
6100	mas_wr_store_entry(&wr_mas);
6101	if (mas_nomem(mas, GFP_KERNEL))
6102		goto write_retry;
6103
6104	return entry;
6105}
6106EXPORT_SYMBOL_GPL(mas_erase);
6107
6108/**
6109 * mas_nomem() - Check if there was an error allocating and do the allocation
6110 * if necessary If there are allocations, then free them.
6111 * @mas: The maple state
6112 * @gfp: The GFP_FLAGS to use for allocations
6113 * Return: true on allocation, false otherwise.
6114 */
6115bool mas_nomem(struct ma_state *mas, gfp_t gfp)
6116	__must_hold(mas->tree->lock)
6117{
6118	if (likely(mas->node != MA_ERROR(-ENOMEM))) {
6119		mas_destroy(mas);
6120		return false;
6121	}
6122
6123	if (gfpflags_allow_blocking(gfp) && !mt_external_lock(mas->tree)) {
6124		mtree_unlock(mas->tree);
6125		mas_alloc_nodes(mas, gfp);
6126		mtree_lock(mas->tree);
6127	} else {
6128		mas_alloc_nodes(mas, gfp);
6129	}
6130
6131	if (!mas_allocated(mas))
6132		return false;
6133
6134	mas->node = MAS_START;
6135	return true;
6136}
6137
6138void __init maple_tree_init(void)
6139{
6140	maple_node_cache = kmem_cache_create("maple_node",
6141			sizeof(struct maple_node), sizeof(struct maple_node),
6142			SLAB_PANIC, NULL);
6143}
6144
6145/**
6146 * mtree_load() - Load a value stored in a maple tree
6147 * @mt: The maple tree
6148 * @index: The index to load
6149 *
6150 * Return: the entry or %NULL
6151 */
6152void *mtree_load(struct maple_tree *mt, unsigned long index)
6153{
6154	MA_STATE(mas, mt, index, index);
6155	void *entry;
6156
6157	trace_ma_read(__func__, &mas);
6158	rcu_read_lock();
6159retry:
6160	entry = mas_start(&mas);
6161	if (unlikely(mas_is_none(&mas)))
6162		goto unlock;
6163
6164	if (unlikely(mas_is_ptr(&mas))) {
6165		if (index)
6166			entry = NULL;
6167
6168		goto unlock;
6169	}
6170
6171	entry = mtree_lookup_walk(&mas);
6172	if (!entry && unlikely(mas_is_start(&mas)))
6173		goto retry;
6174unlock:
6175	rcu_read_unlock();
6176	if (xa_is_zero(entry))
6177		return NULL;
6178
6179	return entry;
6180}
6181EXPORT_SYMBOL(mtree_load);
6182
6183/**
6184 * mtree_store_range() - Store an entry at a given range.
6185 * @mt: The maple tree
6186 * @index: The start of the range
6187 * @last: The end of the range
6188 * @entry: The entry to store
6189 * @gfp: The GFP_FLAGS to use for allocations
6190 *
6191 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not
6192 * be allocated.
6193 */
6194int mtree_store_range(struct maple_tree *mt, unsigned long index,
6195		unsigned long last, void *entry, gfp_t gfp)
6196{
6197	MA_STATE(mas, mt, index, last);
6198	MA_WR_STATE(wr_mas, &mas, entry);
6199
6200	trace_ma_write(__func__, &mas, 0, entry);
6201	if (WARN_ON_ONCE(xa_is_advanced(entry)))
6202		return -EINVAL;
6203
6204	if (index > last)
6205		return -EINVAL;
6206
6207	mtree_lock(mt);
6208retry:
6209	mas_wr_store_entry(&wr_mas);
6210	if (mas_nomem(&mas, gfp))
6211		goto retry;
6212
6213	mtree_unlock(mt);
6214	if (mas_is_err(&mas))
6215		return xa_err(mas.node);
6216
6217	return 0;
6218}
6219EXPORT_SYMBOL(mtree_store_range);
6220
6221/**
6222 * mtree_store() - Store an entry at a given index.
6223 * @mt: The maple tree
6224 * @index: The index to store the value
6225 * @entry: The entry to store
6226 * @gfp: The GFP_FLAGS to use for allocations
6227 *
6228 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not
6229 * be allocated.
6230 */
6231int mtree_store(struct maple_tree *mt, unsigned long index, void *entry,
6232		 gfp_t gfp)
6233{
6234	return mtree_store_range(mt, index, index, entry, gfp);
6235}
6236EXPORT_SYMBOL(mtree_store);
6237
6238/**
6239 * mtree_insert_range() - Insert an entry at a give range if there is no value.
6240 * @mt: The maple tree
6241 * @first: The start of the range
6242 * @last: The end of the range
6243 * @entry: The entry to store
6244 * @gfp: The GFP_FLAGS to use for allocations.
6245 *
6246 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid
6247 * request, -ENOMEM if memory could not be allocated.
6248 */
6249int mtree_insert_range(struct maple_tree *mt, unsigned long first,
6250		unsigned long last, void *entry, gfp_t gfp)
6251{
6252	MA_STATE(ms, mt, first, last);
6253
6254	if (WARN_ON_ONCE(xa_is_advanced(entry)))
6255		return -EINVAL;
6256
6257	if (first > last)
6258		return -EINVAL;
6259
6260	mtree_lock(mt);
6261retry:
6262	mas_insert(&ms, entry);
6263	if (mas_nomem(&ms, gfp))
6264		goto retry;
6265
6266	mtree_unlock(mt);
6267	if (mas_is_err(&ms))
6268		return xa_err(ms.node);
6269
6270	return 0;
6271}
6272EXPORT_SYMBOL(mtree_insert_range);
6273
6274/**
6275 * mtree_insert() - Insert an entry at a give index if there is no value.
6276 * @mt: The maple tree
6277 * @index : The index to store the value
6278 * @entry: The entry to store
6279 * @gfp: The FGP_FLAGS to use for allocations.
6280 *
6281 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid
6282 * request, -ENOMEM if memory could not be allocated.
6283 */
6284int mtree_insert(struct maple_tree *mt, unsigned long index, void *entry,
6285		 gfp_t gfp)
6286{
6287	return mtree_insert_range(mt, index, index, entry, gfp);
6288}
6289EXPORT_SYMBOL(mtree_insert);
6290
6291int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp,
6292		void *entry, unsigned long size, unsigned long min,
6293		unsigned long max, gfp_t gfp)
6294{
6295	int ret = 0;
6296
6297	MA_STATE(mas, mt, min, max - size);
6298	if (!mt_is_alloc(mt))
6299		return -EINVAL;
6300
6301	if (WARN_ON_ONCE(mt_is_reserved(entry)))
6302		return -EINVAL;
6303
6304	if (min > max)
6305		return -EINVAL;
6306
6307	if (max < size)
6308		return -EINVAL;
6309
6310	if (!size)
6311		return -EINVAL;
6312
6313	mtree_lock(mt);
6314retry:
6315	mas.offset = 0;
6316	mas.index = min;
6317	mas.last = max - size;
6318	ret = mas_alloc(&mas, entry, size, startp);
6319	if (mas_nomem(&mas, gfp))
6320		goto retry;
6321
6322	mtree_unlock(mt);
6323	return ret;
6324}
6325EXPORT_SYMBOL(mtree_alloc_range);
6326
6327int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp,
6328		void *entry, unsigned long size, unsigned long min,
6329		unsigned long max, gfp_t gfp)
6330{
6331	int ret = 0;
6332
6333	MA_STATE(mas, mt, min, max - size);
6334	if (!mt_is_alloc(mt))
6335		return -EINVAL;
6336
6337	if (WARN_ON_ONCE(mt_is_reserved(entry)))
6338		return -EINVAL;
6339
6340	if (min >= max)
6341		return -EINVAL;
6342
6343	if (max < size - 1)
6344		return -EINVAL;
6345
6346	if (!size)
6347		return -EINVAL;
6348
6349	mtree_lock(mt);
6350retry:
6351	ret = mas_rev_alloc(&mas, min, max, entry, size, startp);
6352	if (mas_nomem(&mas, gfp))
6353		goto retry;
6354
6355	mtree_unlock(mt);
6356	return ret;
6357}
6358EXPORT_SYMBOL(mtree_alloc_rrange);
6359
6360/**
6361 * mtree_erase() - Find an index and erase the entire range.
6362 * @mt: The maple tree
6363 * @index: The index to erase
6364 *
6365 * Erasing is the same as a walk to an entry then a store of a NULL to that
6366 * ENTIRE range.  In fact, it is implemented as such using the advanced API.
6367 *
6368 * Return: The entry stored at the @index or %NULL
6369 */
6370void *mtree_erase(struct maple_tree *mt, unsigned long index)
6371{
6372	void *entry = NULL;
6373
6374	MA_STATE(mas, mt, index, index);
6375	trace_ma_op(__func__, &mas);
6376
6377	mtree_lock(mt);
6378	entry = mas_erase(&mas);
6379	mtree_unlock(mt);
6380
6381	return entry;
6382}
6383EXPORT_SYMBOL(mtree_erase);
6384
6385/**
6386 * __mt_destroy() - Walk and free all nodes of a locked maple tree.
6387 * @mt: The maple tree
6388 *
6389 * Note: Does not handle locking.
6390 */
6391void __mt_destroy(struct maple_tree *mt)
6392{
6393	void *root = mt_root_locked(mt);
6394
6395	rcu_assign_pointer(mt->ma_root, NULL);
6396	if (xa_is_node(root))
6397		mte_destroy_walk(root, mt);
6398
6399	mt->ma_flags = 0;
6400}
6401EXPORT_SYMBOL_GPL(__mt_destroy);
6402
6403/**
6404 * mtree_destroy() - Destroy a maple tree
6405 * @mt: The maple tree
6406 *
6407 * Frees all resources used by the tree.  Handles locking.
6408 */
6409void mtree_destroy(struct maple_tree *mt)
6410{
6411	mtree_lock(mt);
6412	__mt_destroy(mt);
6413	mtree_unlock(mt);
6414}
6415EXPORT_SYMBOL(mtree_destroy);
6416
6417/**
6418 * mt_find() - Search from the start up until an entry is found.
6419 * @mt: The maple tree
6420 * @index: Pointer which contains the start location of the search
6421 * @max: The maximum value to check
6422 *
6423 * Handles locking.  @index will be incremented to one beyond the range.
6424 *
6425 * Return: The entry at or after the @index or %NULL
6426 */
6427void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max)
6428{
6429	MA_STATE(mas, mt, *index, *index);
6430	void *entry;
6431#ifdef CONFIG_DEBUG_MAPLE_TREE
6432	unsigned long copy = *index;
6433#endif
6434
6435	trace_ma_read(__func__, &mas);
6436
6437	if ((*index) > max)
6438		return NULL;
6439
6440	rcu_read_lock();
6441retry:
6442	entry = mas_state_walk(&mas);
6443	if (mas_is_start(&mas))
6444		goto retry;
6445
6446	if (unlikely(xa_is_zero(entry)))
6447		entry = NULL;
6448
6449	if (entry)
6450		goto unlock;
6451
6452	while (mas_searchable(&mas) && (mas.index < max)) {
6453		entry = mas_next_entry(&mas, max);
6454		if (likely(entry && !xa_is_zero(entry)))
6455			break;
6456	}
6457
6458	if (unlikely(xa_is_zero(entry)))
6459		entry = NULL;
6460unlock:
6461	rcu_read_unlock();
6462	if (likely(entry)) {
6463		*index = mas.last + 1;
6464#ifdef CONFIG_DEBUG_MAPLE_TREE
6465		if ((*index) && (*index) <= copy)
6466			pr_err("index not increased! %lx <= %lx\n",
6467			       *index, copy);
6468		MT_BUG_ON(mt, (*index) && ((*index) <= copy));
6469#endif
6470	}
6471
6472	return entry;
6473}
6474EXPORT_SYMBOL(mt_find);
6475
6476/**
6477 * mt_find_after() - Search from the start up until an entry is found.
6478 * @mt: The maple tree
6479 * @index: Pointer which contains the start location of the search
6480 * @max: The maximum value to check
6481 *
6482 * Handles locking, detects wrapping on index == 0
6483 *
6484 * Return: The entry at or after the @index or %NULL
6485 */
6486void *mt_find_after(struct maple_tree *mt, unsigned long *index,
6487		    unsigned long max)
6488{
6489	if (!(*index))
6490		return NULL;
6491
6492	return mt_find(mt, index, max);
6493}
6494EXPORT_SYMBOL(mt_find_after);
6495
6496#ifdef CONFIG_DEBUG_MAPLE_TREE
6497atomic_t maple_tree_tests_run;
6498EXPORT_SYMBOL_GPL(maple_tree_tests_run);
6499atomic_t maple_tree_tests_passed;
6500EXPORT_SYMBOL_GPL(maple_tree_tests_passed);
6501
6502#ifndef __KERNEL__
6503extern void kmem_cache_set_non_kernel(struct kmem_cache *, unsigned int);
6504void mt_set_non_kernel(unsigned int val)
6505{
6506	kmem_cache_set_non_kernel(maple_node_cache, val);
6507}
6508
6509extern unsigned long kmem_cache_get_alloc(struct kmem_cache *);
6510unsigned long mt_get_alloc_size(void)
6511{
6512	return kmem_cache_get_alloc(maple_node_cache);
6513}
6514
6515extern void kmem_cache_zero_nr_tallocated(struct kmem_cache *);
6516void mt_zero_nr_tallocated(void)
6517{
6518	kmem_cache_zero_nr_tallocated(maple_node_cache);
6519}
6520
6521extern unsigned int kmem_cache_nr_tallocated(struct kmem_cache *);
6522unsigned int mt_nr_tallocated(void)
6523{
6524	return kmem_cache_nr_tallocated(maple_node_cache);
6525}
6526
6527extern unsigned int kmem_cache_nr_allocated(struct kmem_cache *);
6528unsigned int mt_nr_allocated(void)
6529{
6530	return kmem_cache_nr_allocated(maple_node_cache);
6531}
6532
6533/*
6534 * mas_dead_node() - Check if the maple state is pointing to a dead node.
6535 * @mas: The maple state
6536 * @index: The index to restore in @mas.
6537 *
6538 * Used in test code.
6539 * Return: 1 if @mas has been reset to MAS_START, 0 otherwise.
6540 */
6541static inline int mas_dead_node(struct ma_state *mas, unsigned long index)
6542{
6543	if (unlikely(!mas_searchable(mas) || mas_is_start(mas)))
6544		return 0;
6545
6546	if (likely(!mte_dead_node(mas->node)))
6547		return 0;
6548
6549	mas_rewalk(mas, index);
6550	return 1;
6551}
6552
6553void mt_cache_shrink(void)
6554{
6555}
6556#else
6557/*
6558 * mt_cache_shrink() - For testing, don't use this.
6559 *
6560 * Certain testcases can trigger an OOM when combined with other memory
6561 * debugging configuration options.  This function is used to reduce the
6562 * possibility of an out of memory even due to kmem_cache objects remaining
6563 * around for longer than usual.
6564 */
6565void mt_cache_shrink(void)
6566{
6567	kmem_cache_shrink(maple_node_cache);
6568
6569}
6570EXPORT_SYMBOL_GPL(mt_cache_shrink);
6571
6572#endif /* not defined __KERNEL__ */
6573/*
6574 * mas_get_slot() - Get the entry in the maple state node stored at @offset.
6575 * @mas: The maple state
6576 * @offset: The offset into the slot array to fetch.
6577 *
6578 * Return: The entry stored at @offset.
6579 */
6580static inline struct maple_enode *mas_get_slot(struct ma_state *mas,
6581		unsigned char offset)
6582{
6583	return mas_slot(mas, ma_slots(mas_mn(mas), mte_node_type(mas->node)),
6584			offset);
6585}
6586
6587
6588/*
6589 * mas_first_entry() - Go the first leaf and find the first entry.
6590 * @mas: the maple state.
6591 * @limit: the maximum index to check.
6592 * @*r_start: Pointer to set to the range start.
6593 *
6594 * Sets mas->offset to the offset of the entry, r_start to the range minimum.
6595 *
6596 * Return: The first entry or MAS_NONE.
6597 */
6598static inline void *mas_first_entry(struct ma_state *mas, struct maple_node *mn,
6599		unsigned long limit, enum maple_type mt)
6600
6601{
6602	unsigned long max;
6603	unsigned long *pivots;
6604	void __rcu **slots;
6605	void *entry = NULL;
6606
6607	mas->index = mas->min;
6608	if (mas->index > limit)
6609		goto none;
6610
6611	max = mas->max;
6612	mas->offset = 0;
6613	while (likely(!ma_is_leaf(mt))) {
6614		MT_BUG_ON(mas->tree, mte_dead_node(mas->node));
6615		slots = ma_slots(mn, mt);
6616		pivots = ma_pivots(mn, mt);
6617		max = pivots[0];
6618		entry = mas_slot(mas, slots, 0);
6619		if (unlikely(ma_dead_node(mn)))
6620			return NULL;
6621		mas->node = entry;
6622		mn = mas_mn(mas);
6623		mt = mte_node_type(mas->node);
6624	}
6625	MT_BUG_ON(mas->tree, mte_dead_node(mas->node));
6626
6627	mas->max = max;
6628	slots = ma_slots(mn, mt);
6629	entry = mas_slot(mas, slots, 0);
6630	if (unlikely(ma_dead_node(mn)))
6631		return NULL;
6632
6633	/* Slot 0 or 1 must be set */
6634	if (mas->index > limit)
6635		goto none;
6636
6637	if (likely(entry))
6638		return entry;
6639
6640	pivots = ma_pivots(mn, mt);
6641	mas->index = pivots[0] + 1;
6642	mas->offset = 1;
6643	entry = mas_slot(mas, slots, 1);
6644	if (unlikely(ma_dead_node(mn)))
6645		return NULL;
6646
6647	if (mas->index > limit)
6648		goto none;
6649
6650	if (likely(entry))
6651		return entry;
6652
6653none:
6654	if (likely(!ma_dead_node(mn)))
6655		mas->node = MAS_NONE;
6656	return NULL;
6657}
6658
6659/* Depth first search, post-order */
6660static void mas_dfs_postorder(struct ma_state *mas, unsigned long max)
6661{
6662
6663	struct maple_enode *p = MAS_NONE, *mn = mas->node;
6664	unsigned long p_min, p_max;
6665
6666	mas_next_node(mas, mas_mn(mas), max);
6667	if (!mas_is_none(mas))
6668		return;
6669
6670	if (mte_is_root(mn))
6671		return;
6672
6673	mas->node = mn;
6674	mas_ascend(mas);
6675	while (mas->node != MAS_NONE) {
6676		p = mas->node;
6677		p_min = mas->min;
6678		p_max = mas->max;
6679		mas_prev_node(mas, 0);
6680	}
6681
6682	if (p == MAS_NONE)
6683		return;
6684
6685	mas->node = p;
6686	mas->max = p_max;
6687	mas->min = p_min;
6688}
6689
6690/* Tree validations */
6691static void mt_dump_node(const struct maple_tree *mt, void *entry,
6692		unsigned long min, unsigned long max, unsigned int depth);
6693static void mt_dump_range(unsigned long min, unsigned long max,
6694			  unsigned int depth)
6695{
6696	static const char spaces[] = "                                ";
6697
6698	if (min == max)
6699		pr_info("%.*s%lu: ", depth * 2, spaces, min);
6700	else
6701		pr_info("%.*s%lu-%lu: ", depth * 2, spaces, min, max);
6702}
6703
6704static void mt_dump_entry(void *entry, unsigned long min, unsigned long max,
6705			  unsigned int depth)
6706{
6707	mt_dump_range(min, max, depth);
6708
6709	if (xa_is_value(entry))
6710		pr_cont("value %ld (0x%lx) [%p]\n", xa_to_value(entry),
6711				xa_to_value(entry), entry);
6712	else if (xa_is_zero(entry))
6713		pr_cont("zero (%ld)\n", xa_to_internal(entry));
6714	else if (mt_is_reserved(entry))
6715		pr_cont("UNKNOWN ENTRY (%p)\n", entry);
6716	else
6717		pr_cont("%p\n", entry);
6718}
6719
6720static void mt_dump_range64(const struct maple_tree *mt, void *entry,
6721			unsigned long min, unsigned long max, unsigned int depth)
6722{
6723	struct maple_range_64 *node = &mte_to_node(entry)->mr64;
6724	bool leaf = mte_is_leaf(entry);
6725	unsigned long first = min;
6726	int i;
6727
6728	pr_cont(" contents: ");
6729	for (i = 0; i < MAPLE_RANGE64_SLOTS - 1; i++)
6730		pr_cont("%p %lu ", node->slot[i], node->pivot[i]);
6731	pr_cont("%p\n", node->slot[i]);
6732	for (i = 0; i < MAPLE_RANGE64_SLOTS; i++) {
6733		unsigned long last = max;
6734
6735		if (i < (MAPLE_RANGE64_SLOTS - 1))
6736			last = node->pivot[i];
6737		else if (!node->slot[i] && max != mt_max[mte_node_type(entry)])
6738			break;
6739		if (last == 0 && i > 0)
6740			break;
6741		if (leaf)
6742			mt_dump_entry(mt_slot(mt, node->slot, i),
6743					first, last, depth + 1);
6744		else if (node->slot[i])
6745			mt_dump_node(mt, mt_slot(mt, node->slot, i),
6746					first, last, depth + 1);
6747
6748		if (last == max)
6749			break;
6750		if (last > max) {
6751			pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n",
6752					node, last, max, i);
6753			break;
6754		}
6755		first = last + 1;
6756	}
6757}
6758
6759static void mt_dump_arange64(const struct maple_tree *mt, void *entry,
6760			unsigned long min, unsigned long max, unsigned int depth)
6761{
6762	struct maple_arange_64 *node = &mte_to_node(entry)->ma64;
6763	bool leaf = mte_is_leaf(entry);
6764	unsigned long first = min;
6765	int i;
6766
6767	pr_cont(" contents: ");
6768	for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++)
6769		pr_cont("%lu ", node->gap[i]);
6770	pr_cont("| %02X %02X| ", node->meta.end, node->meta.gap);
6771	for (i = 0; i < MAPLE_ARANGE64_SLOTS - 1; i++)
6772		pr_cont("%p %lu ", node->slot[i], node->pivot[i]);
6773	pr_cont("%p\n", node->slot[i]);
6774	for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) {
6775		unsigned long last = max;
6776
6777		if (i < (MAPLE_ARANGE64_SLOTS - 1))
6778			last = node->pivot[i];
6779		else if (!node->slot[i])
6780			break;
6781		if (last == 0 && i > 0)
6782			break;
6783		if (leaf)
6784			mt_dump_entry(mt_slot(mt, node->slot, i),
6785					first, last, depth + 1);
6786		else if (node->slot[i])
6787			mt_dump_node(mt, mt_slot(mt, node->slot, i),
6788					first, last, depth + 1);
6789
6790		if (last == max)
6791			break;
6792		if (last > max) {
6793			pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n",
6794					node, last, max, i);
6795			break;
6796		}
6797		first = last + 1;
6798	}
6799}
6800
6801static void mt_dump_node(const struct maple_tree *mt, void *entry,
6802		unsigned long min, unsigned long max, unsigned int depth)
6803{
6804	struct maple_node *node = mte_to_node(entry);
6805	unsigned int type = mte_node_type(entry);
6806	unsigned int i;
6807
6808	mt_dump_range(min, max, depth);
6809
6810	pr_cont("node %p depth %d type %d parent %p", node, depth, type,
6811			node ? node->parent : NULL);
6812	switch (type) {
6813	case maple_dense:
6814		pr_cont("\n");
6815		for (i = 0; i < MAPLE_NODE_SLOTS; i++) {
6816			if (min + i > max)
6817				pr_cont("OUT OF RANGE: ");
6818			mt_dump_entry(mt_slot(mt, node->slot, i),
6819					min + i, min + i, depth);
6820		}
6821		break;
6822	case maple_leaf_64:
6823	case maple_range_64:
6824		mt_dump_range64(mt, entry, min, max, depth);
6825		break;
6826	case maple_arange_64:
6827		mt_dump_arange64(mt, entry, min, max, depth);
6828		break;
6829
6830	default:
6831		pr_cont(" UNKNOWN TYPE\n");
6832	}
6833}
6834
6835void mt_dump(const struct maple_tree *mt)
6836{
6837	void *entry = rcu_dereference_check(mt->ma_root, mt_locked(mt));
6838
6839	pr_info("maple_tree(%p) flags %X, height %u root %p\n",
6840		 mt, mt->ma_flags, mt_height(mt), entry);
6841	if (!xa_is_node(entry))
6842		mt_dump_entry(entry, 0, 0, 0);
6843	else if (entry)
6844		mt_dump_node(mt, entry, 0, mt_max[mte_node_type(entry)], 0);
6845}
6846EXPORT_SYMBOL_GPL(mt_dump);
6847
6848/*
6849 * Calculate the maximum gap in a node and check if that's what is reported in
6850 * the parent (unless root).
6851 */
6852static void mas_validate_gaps(struct ma_state *mas)
6853{
6854	struct maple_enode *mte = mas->node;
6855	struct maple_node *p_mn;
6856	unsigned long gap = 0, max_gap = 0;
6857	unsigned long p_end, p_start = mas->min;
6858	unsigned char p_slot;
6859	unsigned long *gaps = NULL;
6860	unsigned long *pivots = ma_pivots(mte_to_node(mte), mte_node_type(mte));
6861	int i;
6862
6863	if (ma_is_dense(mte_node_type(mte))) {
6864		for (i = 0; i < mt_slot_count(mte); i++) {
6865			if (mas_get_slot(mas, i)) {
6866				if (gap > max_gap)
6867					max_gap = gap;
6868				gap = 0;
6869				continue;
6870			}
6871			gap++;
6872		}
6873		goto counted;
6874	}
6875
6876	gaps = ma_gaps(mte_to_node(mte), mte_node_type(mte));
6877	for (i = 0; i < mt_slot_count(mte); i++) {
6878		p_end = mas_logical_pivot(mas, pivots, i, mte_node_type(mte));
6879
6880		if (!gaps) {
6881			if (mas_get_slot(mas, i)) {
6882				gap = 0;
6883				goto not_empty;
6884			}
6885
6886			gap += p_end - p_start + 1;
6887		} else {
6888			void *entry = mas_get_slot(mas, i);
6889
6890			gap = gaps[i];
6891			if (!entry) {
6892				if (gap != p_end - p_start + 1) {
6893					pr_err("%p[%u] -> %p %lu != %lu - %lu + 1\n",
6894						mas_mn(mas), i,
6895						mas_get_slot(mas, i), gap,
6896						p_end, p_start);
6897					mt_dump(mas->tree);
6898
6899					MT_BUG_ON(mas->tree,
6900						gap != p_end - p_start + 1);
6901				}
6902			} else {
6903				if (gap > p_end - p_start + 1) {
6904					pr_err("%p[%u] %lu >= %lu - %lu + 1 (%lu)\n",
6905					mas_mn(mas), i, gap, p_end, p_start,
6906					p_end - p_start + 1);
6907					MT_BUG_ON(mas->tree,
6908						gap > p_end - p_start + 1);
6909				}
6910			}
6911		}
6912
6913		if (gap > max_gap)
6914			max_gap = gap;
6915not_empty:
6916		p_start = p_end + 1;
6917		if (p_end >= mas->max)
6918			break;
6919	}
6920
6921counted:
6922	if (mte_is_root(mte))
6923		return;
6924
6925	p_slot = mte_parent_slot(mas->node);
6926	p_mn = mte_parent(mte);
6927	MT_BUG_ON(mas->tree, max_gap > mas->max);
6928	if (ma_gaps(p_mn, mas_parent_enum(mas, mte))[p_slot] != max_gap) {
6929		pr_err("gap %p[%u] != %lu\n", p_mn, p_slot, max_gap);
6930		mt_dump(mas->tree);
6931	}
6932
6933	MT_BUG_ON(mas->tree,
6934		  ma_gaps(p_mn, mas_parent_enum(mas, mte))[p_slot] != max_gap);
6935}
6936
6937static void mas_validate_parent_slot(struct ma_state *mas)
6938{
6939	struct maple_node *parent;
6940	struct maple_enode *node;
6941	enum maple_type p_type = mas_parent_enum(mas, mas->node);
6942	unsigned char p_slot = mte_parent_slot(mas->node);
6943	void __rcu **slots;
6944	int i;
6945
6946	if (mte_is_root(mas->node))
6947		return;
6948
6949	parent = mte_parent(mas->node);
6950	slots = ma_slots(parent, p_type);
6951	MT_BUG_ON(mas->tree, mas_mn(mas) == parent);
6952
6953	/* Check prev/next parent slot for duplicate node entry */
6954
6955	for (i = 0; i < mt_slots[p_type]; i++) {
6956		node = mas_slot(mas, slots, i);
6957		if (i == p_slot) {
6958			if (node != mas->node)
6959				pr_err("parent %p[%u] does not have %p\n",
6960					parent, i, mas_mn(mas));
6961			MT_BUG_ON(mas->tree, node != mas->node);
6962		} else if (node == mas->node) {
6963			pr_err("Invalid child %p at parent %p[%u] p_slot %u\n",
6964			       mas_mn(mas), parent, i, p_slot);
6965			MT_BUG_ON(mas->tree, node == mas->node);
6966		}
6967	}
6968}
6969
6970static void mas_validate_child_slot(struct ma_state *mas)
6971{
6972	enum maple_type type = mte_node_type(mas->node);
6973	void __rcu **slots = ma_slots(mte_to_node(mas->node), type);
6974	unsigned long *pivots = ma_pivots(mte_to_node(mas->node), type);
6975	struct maple_enode *child;
6976	unsigned char i;
6977
6978	if (mte_is_leaf(mas->node))
6979		return;
6980
6981	for (i = 0; i < mt_slots[type]; i++) {
6982		child = mas_slot(mas, slots, i);
6983		if (!pivots[i] || pivots[i] == mas->max)
6984			break;
6985
6986		if (!child)
6987			break;
6988
6989		if (mte_parent_slot(child) != i) {
6990			pr_err("Slot error at %p[%u]: child %p has pslot %u\n",
6991			       mas_mn(mas), i, mte_to_node(child),
6992			       mte_parent_slot(child));
6993			MT_BUG_ON(mas->tree, 1);
6994		}
6995
6996		if (mte_parent(child) != mte_to_node(mas->node)) {
6997			pr_err("child %p has parent %p not %p\n",
6998			       mte_to_node(child), mte_parent(child),
6999			       mte_to_node(mas->node));
7000			MT_BUG_ON(mas->tree, 1);
7001		}
7002	}
7003}
7004
7005/*
7006 * Validate all pivots are within mas->min and mas->max.
7007 */
7008static void mas_validate_limits(struct ma_state *mas)
7009{
7010	int i;
7011	unsigned long prev_piv = 0;
7012	enum maple_type type = mte_node_type(mas->node);
7013	void __rcu **slots = ma_slots(mte_to_node(mas->node), type);
7014	unsigned long *pivots = ma_pivots(mas_mn(mas), type);
7015
7016	/* all limits are fine here. */
7017	if (mte_is_root(mas->node))
7018		return;
7019
7020	for (i = 0; i < mt_slots[type]; i++) {
7021		unsigned long piv;
7022
7023		piv = mas_safe_pivot(mas, pivots, i, type);
7024
7025		if (!piv && (i != 0))
7026			break;
7027
7028		if (!mte_is_leaf(mas->node)) {
7029			void *entry = mas_slot(mas, slots, i);
7030
7031			if (!entry)
7032				pr_err("%p[%u] cannot be null\n",
7033				       mas_mn(mas), i);
7034
7035			MT_BUG_ON(mas->tree, !entry);
7036		}
7037
7038		if (prev_piv > piv) {
7039			pr_err("%p[%u] piv %lu < prev_piv %lu\n",
7040				mas_mn(mas), i, piv, prev_piv);
7041			MT_BUG_ON(mas->tree, piv < prev_piv);
7042		}
7043
7044		if (piv < mas->min) {
7045			pr_err("%p[%u] %lu < %lu\n", mas_mn(mas), i,
7046				piv, mas->min);
7047			MT_BUG_ON(mas->tree, piv < mas->min);
7048		}
7049		if (piv > mas->max) {
7050			pr_err("%p[%u] %lu > %lu\n", mas_mn(mas), i,
7051				piv, mas->max);
7052			MT_BUG_ON(mas->tree, piv > mas->max);
7053		}
7054		prev_piv = piv;
7055		if (piv == mas->max)
7056			break;
7057	}
7058	for (i += 1; i < mt_slots[type]; i++) {
7059		void *entry = mas_slot(mas, slots, i);
7060
7061		if (entry && (i != mt_slots[type] - 1)) {
7062			pr_err("%p[%u] should not have entry %p\n", mas_mn(mas),
7063			       i, entry);
7064			MT_BUG_ON(mas->tree, entry != NULL);
7065		}
7066
7067		if (i < mt_pivots[type]) {
7068			unsigned long piv = pivots[i];
7069
7070			if (!piv)
7071				continue;
7072
7073			pr_err("%p[%u] should not have piv %lu\n",
7074			       mas_mn(mas), i, piv);
7075			MT_BUG_ON(mas->tree, i < mt_pivots[type] - 1);
7076		}
7077	}
7078}
7079
7080static void mt_validate_nulls(struct maple_tree *mt)
7081{
7082	void *entry, *last = (void *)1;
7083	unsigned char offset = 0;
7084	void __rcu **slots;
7085	MA_STATE(mas, mt, 0, 0);
7086
7087	mas_start(&mas);
7088	if (mas_is_none(&mas) || (mas.node == MAS_ROOT))
7089		return;
7090
7091	while (!mte_is_leaf(mas.node))
7092		mas_descend(&mas);
7093
7094	slots = ma_slots(mte_to_node(mas.node), mte_node_type(mas.node));
7095	do {
7096		entry = mas_slot(&mas, slots, offset);
7097		if (!last && !entry) {
7098			pr_err("Sequential nulls end at %p[%u]\n",
7099				mas_mn(&mas), offset);
7100		}
7101		MT_BUG_ON(mt, !last && !entry);
7102		last = entry;
7103		if (offset == mas_data_end(&mas)) {
7104			mas_next_node(&mas, mas_mn(&mas), ULONG_MAX);
7105			if (mas_is_none(&mas))
7106				return;
7107			offset = 0;
7108			slots = ma_slots(mte_to_node(mas.node),
7109					 mte_node_type(mas.node));
7110		} else {
7111			offset++;
7112		}
7113
7114	} while (!mas_is_none(&mas));
7115}
7116
7117/*
7118 * validate a maple tree by checking:
7119 * 1. The limits (pivots are within mas->min to mas->max)
7120 * 2. The gap is correctly set in the parents
7121 */
7122void mt_validate(struct maple_tree *mt)
7123{
7124	unsigned char end;
7125
7126	MA_STATE(mas, mt, 0, 0);
7127	rcu_read_lock();
7128	mas_start(&mas);
7129	if (!mas_searchable(&mas))
7130		goto done;
7131
7132	mas_first_entry(&mas, mas_mn(&mas), ULONG_MAX, mte_node_type(mas.node));
7133	while (!mas_is_none(&mas)) {
7134		MT_BUG_ON(mas.tree, mte_dead_node(mas.node));
7135		if (!mte_is_root(mas.node)) {
7136			end = mas_data_end(&mas);
7137			if ((end < mt_min_slot_count(mas.node)) &&
7138			    (mas.max != ULONG_MAX)) {
7139				pr_err("Invalid size %u of %p\n", end,
7140				mas_mn(&mas));
7141				MT_BUG_ON(mas.tree, 1);
7142			}
7143
7144		}
7145		mas_validate_parent_slot(&mas);
7146		mas_validate_child_slot(&mas);
7147		mas_validate_limits(&mas);
7148		if (mt_is_alloc(mt))
7149			mas_validate_gaps(&mas);
7150		mas_dfs_postorder(&mas, ULONG_MAX);
7151	}
7152	mt_validate_nulls(mt);
7153done:
7154	rcu_read_unlock();
7155
7156}
7157EXPORT_SYMBOL_GPL(mt_validate);
7158
7159#endif /* CONFIG_DEBUG_MAPLE_TREE */