rhashtable.c - lib/rhashtable.c - Linux source code v3.15

Note: File does not exist in v3.15.
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Resizable, Scalable, Concurrent Hash Table
   4 *
   5 * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
   6 * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
   7 * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
   8 *
   9 * Code partially derived from nft_hash
  10 * Rewritten with rehash code from br_multicast plus single list
  11 * pointer as suggested by Josh Triplett
  12 */
  13
  14#include <linux/atomic.h>
  15#include <linux/kernel.h>
  16#include <linux/init.h>
  17#include <linux/log2.h>
  18#include <linux/sched.h>
  19#include <linux/rculist.h>
  20#include <linux/slab.h>
  21#include <linux/vmalloc.h>
  22#include <linux/mm.h>
  23#include <linux/jhash.h>
  24#include <linux/random.h>
  25#include <linux/rhashtable.h>
  26#include <linux/err.h>
  27#include <linux/export.h>
  28
  29#define HASH_DEFAULT_SIZE	64UL
  30#define HASH_MIN_SIZE		4U
  31
  32union nested_table {
  33	union nested_table __rcu *table;
  34	struct rhash_lock_head *bucket;
  35};
  36
  37static u32 head_hashfn(struct rhashtable *ht,
  38		       const struct bucket_table *tbl,
  39		       const struct rhash_head *he)
  40{
  41	return rht_head_hashfn(ht, tbl, he, ht->p);
  42}
  43
  44#ifdef CONFIG_PROVE_LOCKING
  45#define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT))
  46
  47int lockdep_rht_mutex_is_held(struct rhashtable *ht)
  48{
  49	return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1;
  50}
  51EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
  52
  53int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
  54{
  55	if (!debug_locks)
  56		return 1;
  57	if (unlikely(tbl->nest))
  58		return 1;
  59	return bit_spin_is_locked(0, (unsigned long *)&tbl->buckets[hash]);
  60}
  61EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
  62#else
  63#define ASSERT_RHT_MUTEX(HT)
  64#endif
  65
  66static void nested_table_free(union nested_table *ntbl, unsigned int size)
  67{
  68	const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
  69	const unsigned int len = 1 << shift;
  70	unsigned int i;
  71
  72	ntbl = rcu_dereference_raw(ntbl->table);
  73	if (!ntbl)
  74		return;
  75
  76	if (size > len) {
  77		size >>= shift;
  78		for (i = 0; i < len; i++)
  79			nested_table_free(ntbl + i, size);
  80	}
  81
  82	kfree(ntbl);
  83}
  84
  85static void nested_bucket_table_free(const struct bucket_table *tbl)
  86{
  87	unsigned int size = tbl->size >> tbl->nest;
  88	unsigned int len = 1 << tbl->nest;
  89	union nested_table *ntbl;
  90	unsigned int i;
  91
  92	ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
  93
  94	for (i = 0; i < len; i++)
  95		nested_table_free(ntbl + i, size);
  96
  97	kfree(ntbl);
  98}
  99
 100static void bucket_table_free(const struct bucket_table *tbl)
 101{
 102	if (tbl->nest)
 103		nested_bucket_table_free(tbl);
 104
 105	kvfree(tbl);
 106}
 107
 108static void bucket_table_free_rcu(struct rcu_head *head)
 109{
 110	bucket_table_free(container_of(head, struct bucket_table, rcu));
 111}
 112
 113static union nested_table *nested_table_alloc(struct rhashtable *ht,
 114					      union nested_table __rcu **prev,
 115					      bool leaf)
 116{
 117	union nested_table *ntbl;
 118	int i;
 119
 120	ntbl = rcu_dereference(*prev);
 121	if (ntbl)
 122		return ntbl;
 123
 124	ntbl = kzalloc(PAGE_SIZE, GFP_ATOMIC);
 125
 126	if (ntbl && leaf) {
 127		for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0]); i++)
 128			INIT_RHT_NULLS_HEAD(ntbl[i].bucket);
 129	}
 130
 131	if (cmpxchg((union nested_table **)prev, NULL, ntbl) == NULL)
 132		return ntbl;
 133	/* Raced with another thread. */
 134	kfree(ntbl);
 135	return rcu_dereference(*prev);
 136}
 137
 138static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht,
 139						      size_t nbuckets,
 140						      gfp_t gfp)
 141{
 142	const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
 143	struct bucket_table *tbl;
 144	size_t size;
 145
 146	if (nbuckets < (1 << (shift + 1)))
 147		return NULL;
 148
 149	size = sizeof(*tbl) + sizeof(tbl->buckets[0]);
 150
 151	tbl = kzalloc(size, gfp);
 152	if (!tbl)
 153		return NULL;
 154
 155	if (!nested_table_alloc(ht, (union nested_table __rcu **)tbl->buckets,
 156				false)) {
 157		kfree(tbl);
 158		return NULL;
 159	}
 160
 161	tbl->nest = (ilog2(nbuckets) - 1) % shift + 1;
 162
 163	return tbl;
 164}
 165
 166static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 167					       size_t nbuckets,
 168					       gfp_t gfp)
 169{
 170	struct bucket_table *tbl = NULL;
 171	size_t size;
 172	int i;
 173	static struct lock_class_key __key;
 174
 175	tbl = kvzalloc(struct_size(tbl, buckets, nbuckets), gfp);
 176
 177	size = nbuckets;
 178
 179	if (tbl == NULL && (gfp & ~__GFP_NOFAIL) != GFP_KERNEL) {
 180		tbl = nested_bucket_table_alloc(ht, nbuckets, gfp);
 181		nbuckets = 0;
 182	}
 183
 184	if (tbl == NULL)
 185		return NULL;
 186
 187	lockdep_init_map(&tbl->dep_map, "rhashtable_bucket", &__key, 0);
 188
 189	tbl->size = size;
 190
 191	rcu_head_init(&tbl->rcu);
 192	INIT_LIST_HEAD(&tbl->walkers);
 193
 194	tbl->hash_rnd = get_random_u32();
 195
 196	for (i = 0; i < nbuckets; i++)
 197		INIT_RHT_NULLS_HEAD(tbl->buckets[i]);
 198
 199	return tbl;
 200}
 201
 202static struct bucket_table *rhashtable_last_table(struct rhashtable *ht,
 203						  struct bucket_table *tbl)
 204{
 205	struct bucket_table *new_tbl;
 206
 207	do {
 208		new_tbl = tbl;
 209		tbl = rht_dereference_rcu(tbl->future_tbl, ht);
 210	} while (tbl);
 211
 212	return new_tbl;
 213}
 214
 215static int rhashtable_rehash_one(struct rhashtable *ht,
 216				 struct rhash_lock_head **bkt,
 217				 unsigned int old_hash)
 218{
 219	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
 220	struct bucket_table *new_tbl = rhashtable_last_table(ht, old_tbl);
 221	int err = -EAGAIN;
 222	struct rhash_head *head, *next, *entry;
 223	struct rhash_head __rcu **pprev = NULL;
 224	unsigned int new_hash;
 225
 226	if (new_tbl->nest)
 227		goto out;
 228
 229	err = -ENOENT;
 230
 231	rht_for_each_from(entry, rht_ptr(bkt, old_tbl, old_hash),
 232			  old_tbl, old_hash) {
 233		err = 0;
 234		next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
 235
 236		if (rht_is_a_nulls(next))
 237			break;
 238
 239		pprev = &entry->next;
 240	}
 241
 242	if (err)
 243		goto out;
 244
 245	new_hash = head_hashfn(ht, new_tbl, entry);
 246
 247	rht_lock_nested(new_tbl, &new_tbl->buckets[new_hash], SINGLE_DEPTH_NESTING);
 248
 249	head = rht_ptr(new_tbl->buckets + new_hash, new_tbl, new_hash);
 250
 251	RCU_INIT_POINTER(entry->next, head);
 252
 253	rht_assign_unlock(new_tbl, &new_tbl->buckets[new_hash], entry);
 254
 255	if (pprev)
 256		rcu_assign_pointer(*pprev, next);
 257	else
 258		/* Need to preserved the bit lock. */
 259		rht_assign_locked(bkt, next);
 260
 261out:
 262	return err;
 263}
 264
 265static int rhashtable_rehash_chain(struct rhashtable *ht,
 266				    unsigned int old_hash)
 267{
 268	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
 269	struct rhash_lock_head **bkt = rht_bucket_var(old_tbl, old_hash);
 270	int err;
 271
 272	if (!bkt)
 273		return 0;
 274	rht_lock(old_tbl, bkt);
 275
 276	while (!(err = rhashtable_rehash_one(ht, bkt, old_hash)))
 277		;
 278
 279	if (err == -ENOENT)
 280		err = 0;
 281	rht_unlock(old_tbl, bkt);
 282
 283	return err;
 284}
 285
 286static int rhashtable_rehash_attach(struct rhashtable *ht,
 287				    struct bucket_table *old_tbl,
 288				    struct bucket_table *new_tbl)
 289{
 290	/* Make insertions go into the new, empty table right away. Deletions
 291	 * and lookups will be attempted in both tables until we synchronize.
 292	 * As cmpxchg() provides strong barriers, we do not need
 293	 * rcu_assign_pointer().
 294	 */
 295
 296	if (cmpxchg((struct bucket_table **)&old_tbl->future_tbl, NULL,
 297		    new_tbl) != NULL)
 298		return -EEXIST;
 299
 300	return 0;
 301}
 302
 303static int rhashtable_rehash_table(struct rhashtable *ht)
 304{
 305	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
 306	struct bucket_table *new_tbl;
 307	struct rhashtable_walker *walker;
 308	unsigned int old_hash;
 309	int err;
 310
 311	new_tbl = rht_dereference(old_tbl->future_tbl, ht);
 312	if (!new_tbl)
 313		return 0;
 314
 315	for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
 316		err = rhashtable_rehash_chain(ht, old_hash);
 317		if (err)
 318			return err;
 319		cond_resched();
 320	}
 321
 322	/* Publish the new table pointer. */
 323	rcu_assign_pointer(ht->tbl, new_tbl);
 324
 325	spin_lock(&ht->lock);
 326	list_for_each_entry(walker, &old_tbl->walkers, list)
 327		walker->tbl = NULL;
 328
 329	/* Wait for readers. All new readers will see the new
 330	 * table, and thus no references to the old table will
 331	 * remain.
 332	 * We do this inside the locked region so that
 333	 * rhashtable_walk_stop() can use rcu_head_after_call_rcu()
 334	 * to check if it should not re-link the table.
 335	 */
 336	call_rcu(&old_tbl->rcu, bucket_table_free_rcu);
 337	spin_unlock(&ht->lock);
 338
 339	return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0;
 340}
 341
 342static int rhashtable_rehash_alloc(struct rhashtable *ht,
 343				   struct bucket_table *old_tbl,
 344				   unsigned int size)
 345{
 346	struct bucket_table *new_tbl;
 347	int err;
 348
 349	ASSERT_RHT_MUTEX(ht);
 350
 351	new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
 352	if (new_tbl == NULL)
 353		return -ENOMEM;
 354
 355	err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
 356	if (err)
 357		bucket_table_free(new_tbl);
 358
 359	return err;
 360}
 361
 362/**
 363 * rhashtable_shrink - Shrink hash table while allowing concurrent lookups
 364 * @ht:		the hash table to shrink
 365 *
 366 * This function shrinks the hash table to fit, i.e., the smallest
 367 * size would not cause it to expand right away automatically.
 368 *
 369 * The caller must ensure that no concurrent resizing occurs by holding
 370 * ht->mutex.
 371 *
 372 * The caller must ensure that no concurrent table mutations take place.
 373 * It is however valid to have concurrent lookups if they are RCU protected.
 374 *
 375 * It is valid to have concurrent insertions and deletions protected by per
 376 * bucket locks or concurrent RCU protected lookups and traversals.
 377 */
 378static int rhashtable_shrink(struct rhashtable *ht)
 379{
 380	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
 381	unsigned int nelems = atomic_read(&ht->nelems);
 382	unsigned int size = 0;
 383
 384	if (nelems)
 385		size = roundup_pow_of_two(nelems * 3 / 2);
 386	if (size < ht->p.min_size)
 387		size = ht->p.min_size;
 388
 389	if (old_tbl->size <= size)
 390		return 0;
 391
 392	if (rht_dereference(old_tbl->future_tbl, ht))
 393		return -EEXIST;
 394
 395	return rhashtable_rehash_alloc(ht, old_tbl, size);
 396}
 397
 398static void rht_deferred_worker(struct work_struct *work)
 399{
 400	struct rhashtable *ht;
 401	struct bucket_table *tbl;
 402	int err = 0;
 403
 404	ht = container_of(work, struct rhashtable, run_work);
 405	mutex_lock(&ht->mutex);
 406
 407	tbl = rht_dereference(ht->tbl, ht);
 408	tbl = rhashtable_last_table(ht, tbl);
 409
 410	if (rht_grow_above_75(ht, tbl))
 411		err = rhashtable_rehash_alloc(ht, tbl, tbl->size * 2);
 412	else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl))
 413		err = rhashtable_shrink(ht);
 414	else if (tbl->nest)
 415		err = rhashtable_rehash_alloc(ht, tbl, tbl->size);
 416
 417	if (!err || err == -EEXIST) {
 418		int nerr;
 419
 420		nerr = rhashtable_rehash_table(ht);
 421		err = err ?: nerr;
 422	}
 423
 424	mutex_unlock(&ht->mutex);
 425
 426	if (err)
 427		schedule_work(&ht->run_work);
 428}
 429
 430static int rhashtable_insert_rehash(struct rhashtable *ht,
 431				    struct bucket_table *tbl)
 432{
 433	struct bucket_table *old_tbl;
 434	struct bucket_table *new_tbl;
 435	unsigned int size;
 436	int err;
 437
 438	old_tbl = rht_dereference_rcu(ht->tbl, ht);
 439
 440	size = tbl->size;
 441
 442	err = -EBUSY;
 443
 444	if (rht_grow_above_75(ht, tbl))
 445		size *= 2;
 446	/* Do not schedule more than one rehash */
 447	else if (old_tbl != tbl)
 448		goto fail;
 449
 450	err = -ENOMEM;
 451
 452	new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC | __GFP_NOWARN);
 453	if (new_tbl == NULL)
 454		goto fail;
 455
 456	err = rhashtable_rehash_attach(ht, tbl, new_tbl);
 457	if (err) {
 458		bucket_table_free(new_tbl);
 459		if (err == -EEXIST)
 460			err = 0;
 461	} else
 462		schedule_work(&ht->run_work);
 463
 464	return err;
 465
 466fail:
 467	/* Do not fail the insert if someone else did a rehash. */
 468	if (likely(rcu_access_pointer(tbl->future_tbl)))
 469		return 0;
 470
 471	/* Schedule async rehash to retry allocation in process context. */
 472	if (err == -ENOMEM)
 473		schedule_work(&ht->run_work);
 474
 475	return err;
 476}
 477
 478static void *rhashtable_lookup_one(struct rhashtable *ht,
 479				   struct rhash_lock_head **bkt,
 480				   struct bucket_table *tbl, unsigned int hash,
 481				   const void *key, struct rhash_head *obj)
 482{
 483	struct rhashtable_compare_arg arg = {
 484		.ht = ht,
 485		.key = key,
 486	};
 487	struct rhash_head __rcu **pprev = NULL;
 488	struct rhash_head *head;
 489	int elasticity;
 490
 491	elasticity = RHT_ELASTICITY;
 492	rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) {
 493		struct rhlist_head *list;
 494		struct rhlist_head *plist;
 495
 496		elasticity--;
 497		if (!key ||
 498		    (ht->p.obj_cmpfn ?
 499		     ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) :
 500		     rhashtable_compare(&arg, rht_obj(ht, head)))) {
 501			pprev = &head->next;
 502			continue;
 503		}
 504
 505		if (!ht->rhlist)
 506			return rht_obj(ht, head);
 507
 508		list = container_of(obj, struct rhlist_head, rhead);
 509		plist = container_of(head, struct rhlist_head, rhead);
 510
 511		RCU_INIT_POINTER(list->next, plist);
 512		head = rht_dereference_bucket(head->next, tbl, hash);
 513		RCU_INIT_POINTER(list->rhead.next, head);
 514		if (pprev)
 515			rcu_assign_pointer(*pprev, obj);
 516		else
 517			/* Need to preserve the bit lock */
 518			rht_assign_locked(bkt, obj);
 519
 520		return NULL;
 521	}
 522
 523	if (elasticity <= 0)
 524		return ERR_PTR(-EAGAIN);
 525
 526	return ERR_PTR(-ENOENT);
 527}
 528
 529static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
 530						  struct rhash_lock_head **bkt,
 531						  struct bucket_table *tbl,
 532						  unsigned int hash,
 533						  struct rhash_head *obj,
 534						  void *data)
 535{
 536	struct bucket_table *new_tbl;
 537	struct rhash_head *head;
 538
 539	if (!IS_ERR_OR_NULL(data))
 540		return ERR_PTR(-EEXIST);
 541
 542	if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT)
 543		return ERR_CAST(data);
 544
 545	new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
 546	if (new_tbl)
 547		return new_tbl;
 548
 549	if (PTR_ERR(data) != -ENOENT)
 550		return ERR_CAST(data);
 551
 552	if (unlikely(rht_grow_above_max(ht, tbl)))
 553		return ERR_PTR(-E2BIG);
 554
 555	if (unlikely(rht_grow_above_100(ht, tbl)))
 556		return ERR_PTR(-EAGAIN);
 557
 558	head = rht_ptr(bkt, tbl, hash);
 559
 560	RCU_INIT_POINTER(obj->next, head);
 561	if (ht->rhlist) {
 562		struct rhlist_head *list;
 563
 564		list = container_of(obj, struct rhlist_head, rhead);
 565		RCU_INIT_POINTER(list->next, NULL);
 566	}
 567
 568	/* bkt is always the head of the list, so it holds
 569	 * the lock, which we need to preserve
 570	 */
 571	rht_assign_locked(bkt, obj);
 572
 573	atomic_inc(&ht->nelems);
 574	if (rht_grow_above_75(ht, tbl))
 575		schedule_work(&ht->run_work);
 576
 577	return NULL;
 578}
 579
 580static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
 581				   struct rhash_head *obj)
 582{
 583	struct bucket_table *new_tbl;
 584	struct bucket_table *tbl;
 585	struct rhash_lock_head **bkt;
 586	unsigned int hash;
 587	void *data;
 588
 589	new_tbl = rcu_dereference(ht->tbl);
 590
 591	do {
 592		tbl = new_tbl;
 593		hash = rht_head_hashfn(ht, tbl, obj, ht->p);
 594		if (rcu_access_pointer(tbl->future_tbl))
 595			/* Failure is OK */
 596			bkt = rht_bucket_var(tbl, hash);
 597		else
 598			bkt = rht_bucket_insert(ht, tbl, hash);
 599		if (bkt == NULL) {
 600			new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
 601			data = ERR_PTR(-EAGAIN);
 602		} else {
 603			rht_lock(tbl, bkt);
 604			data = rhashtable_lookup_one(ht, bkt, tbl,
 605						     hash, key, obj);
 606			new_tbl = rhashtable_insert_one(ht, bkt, tbl,
 607							hash, obj, data);
 608			if (PTR_ERR(new_tbl) != -EEXIST)
 609				data = ERR_CAST(new_tbl);
 610
 611			rht_unlock(tbl, bkt);
 612		}
 613	} while (!IS_ERR_OR_NULL(new_tbl));
 614
 615	if (PTR_ERR(data) == -EAGAIN)
 616		data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?:
 617			       -EAGAIN);
 618
 619	return data;
 620}
 621
 622void *rhashtable_insert_slow(struct rhashtable *ht, const void *key,
 623			     struct rhash_head *obj)
 624{
 625	void *data;
 626
 627	do {
 628		rcu_read_lock();
 629		data = rhashtable_try_insert(ht, key, obj);
 630		rcu_read_unlock();
 631	} while (PTR_ERR(data) == -EAGAIN);
 632
 633	return data;
 634}
 635EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
 636
 637/**
 638 * rhashtable_walk_enter - Initialise an iterator
 639 * @ht:		Table to walk over
 640 * @iter:	Hash table Iterator
 641 *
 642 * This function prepares a hash table walk.
 643 *
 644 * Note that if you restart a walk after rhashtable_walk_stop you
 645 * may see the same object twice.  Also, you may miss objects if
 646 * there are removals in between rhashtable_walk_stop and the next
 647 * call to rhashtable_walk_start.
 648 *
 649 * For a completely stable walk you should construct your own data
 650 * structure outside the hash table.
 651 *
 652 * This function may be called from any process context, including
 653 * non-preemptable context, but cannot be called from softirq or
 654 * hardirq context.
 655 *
 656 * You must call rhashtable_walk_exit after this function returns.
 657 */
 658void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter)
 659{
 660	iter->ht = ht;
 661	iter->p = NULL;
 662	iter->slot = 0;
 663	iter->skip = 0;
 664	iter->end_of_table = 0;
 665
 666	spin_lock(&ht->lock);
 667	iter->walker.tbl =
 668		rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock));
 669	list_add(&iter->walker.list, &iter->walker.tbl->walkers);
 670	spin_unlock(&ht->lock);
 671}
 672EXPORT_SYMBOL_GPL(rhashtable_walk_enter);
 673
 674/**
 675 * rhashtable_walk_exit - Free an iterator
 676 * @iter:	Hash table Iterator
 677 *
 678 * This function frees resources allocated by rhashtable_walk_enter.
 679 */
 680void rhashtable_walk_exit(struct rhashtable_iter *iter)
 681{
 682	spin_lock(&iter->ht->lock);
 683	if (iter->walker.tbl)
 684		list_del(&iter->walker.list);
 685	spin_unlock(&iter->ht->lock);
 686}
 687EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
 688
 689/**
 690 * rhashtable_walk_start_check - Start a hash table walk
 691 * @iter:	Hash table iterator
 692 *
 693 * Start a hash table walk at the current iterator position.  Note that we take
 694 * the RCU lock in all cases including when we return an error.  So you must
 695 * always call rhashtable_walk_stop to clean up.
 696 *
 697 * Returns zero if successful.
 698 *
 699 * Returns -EAGAIN if resize event occured.  Note that the iterator
 700 * will rewind back to the beginning and you may use it immediately
 701 * by calling rhashtable_walk_next.
 702 *
 703 * rhashtable_walk_start is defined as an inline variant that returns
 704 * void. This is preferred in cases where the caller would ignore
 705 * resize events and always continue.
 706 */
 707int rhashtable_walk_start_check(struct rhashtable_iter *iter)
 708	__acquires(RCU)
 709{
 710	struct rhashtable *ht = iter->ht;
 711	bool rhlist = ht->rhlist;
 712
 713	rcu_read_lock();
 714
 715	spin_lock(&ht->lock);
 716	if (iter->walker.tbl)
 717		list_del(&iter->walker.list);
 718	spin_unlock(&ht->lock);
 719
 720	if (iter->end_of_table)
 721		return 0;
 722	if (!iter->walker.tbl) {
 723		iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht);
 724		iter->slot = 0;
 725		iter->skip = 0;
 726		return -EAGAIN;
 727	}
 728
 729	if (iter->p && !rhlist) {
 730		/*
 731		 * We need to validate that 'p' is still in the table, and
 732		 * if so, update 'skip'
 733		 */
 734		struct rhash_head *p;
 735		int skip = 0;
 736		rht_for_each_rcu(p, iter->walker.tbl, iter->slot) {
 737			skip++;
 738			if (p == iter->p) {
 739				iter->skip = skip;
 740				goto found;
 741			}
 742		}
 743		iter->p = NULL;
 744	} else if (iter->p && rhlist) {
 745		/* Need to validate that 'list' is still in the table, and
 746		 * if so, update 'skip' and 'p'.
 747		 */
 748		struct rhash_head *p;
 749		struct rhlist_head *list;
 750		int skip = 0;
 751		rht_for_each_rcu(p, iter->walker.tbl, iter->slot) {
 752			for (list = container_of(p, struct rhlist_head, rhead);
 753			     list;
 754			     list = rcu_dereference(list->next)) {
 755				skip++;
 756				if (list == iter->list) {
 757					iter->p = p;
 758					iter->skip = skip;
 759					goto found;
 760				}
 761			}
 762		}
 763		iter->p = NULL;
 764	}
 765found:
 766	return 0;
 767}
 768EXPORT_SYMBOL_GPL(rhashtable_walk_start_check);
 769
 770/**
 771 * __rhashtable_walk_find_next - Find the next element in a table (or the first
 772 * one in case of a new walk).
 773 *
 774 * @iter:	Hash table iterator
 775 *
 776 * Returns the found object or NULL when the end of the table is reached.
 777 *
 778 * Returns -EAGAIN if resize event occurred.
 779 */
 780static void *__rhashtable_walk_find_next(struct rhashtable_iter *iter)
 781{
 782	struct bucket_table *tbl = iter->walker.tbl;
 783	struct rhlist_head *list = iter->list;
 784	struct rhashtable *ht = iter->ht;
 785	struct rhash_head *p = iter->p;
 786	bool rhlist = ht->rhlist;
 787
 788	if (!tbl)
 789		return NULL;
 790
 791	for (; iter->slot < tbl->size; iter->slot++) {
 792		int skip = iter->skip;
 793
 794		rht_for_each_rcu(p, tbl, iter->slot) {
 795			if (rhlist) {
 796				list = container_of(p, struct rhlist_head,
 797						    rhead);
 798				do {
 799					if (!skip)
 800						goto next;
 801					skip--;
 802					list = rcu_dereference(list->next);
 803				} while (list);
 804
 805				continue;
 806			}
 807			if (!skip)
 808				break;
 809			skip--;
 810		}
 811
 812next:
 813		if (!rht_is_a_nulls(p)) {
 814			iter->skip++;
 815			iter->p = p;
 816			iter->list = list;
 817			return rht_obj(ht, rhlist ? &list->rhead : p);
 818		}
 819
 820		iter->skip = 0;
 821	}
 822
 823	iter->p = NULL;
 824
 825	/* Ensure we see any new tables. */
 826	smp_rmb();
 827
 828	iter->walker.tbl = rht_dereference_rcu(tbl->future_tbl, ht);
 829	if (iter->walker.tbl) {
 830		iter->slot = 0;
 831		iter->skip = 0;
 832		return ERR_PTR(-EAGAIN);
 833	} else {
 834		iter->end_of_table = true;
 835	}
 836
 837	return NULL;
 838}
 839
 840/**
 841 * rhashtable_walk_next - Return the next object and advance the iterator
 842 * @iter:	Hash table iterator
 843 *
 844 * Note that you must call rhashtable_walk_stop when you are finished
 845 * with the walk.
 846 *
 847 * Returns the next object or NULL when the end of the table is reached.
 848 *
 849 * Returns -EAGAIN if resize event occurred.  Note that the iterator
 850 * will rewind back to the beginning and you may continue to use it.
 851 */
 852void *rhashtable_walk_next(struct rhashtable_iter *iter)
 853{
 854	struct rhlist_head *list = iter->list;
 855	struct rhashtable *ht = iter->ht;
 856	struct rhash_head *p = iter->p;
 857	bool rhlist = ht->rhlist;
 858
 859	if (p) {
 860		if (!rhlist || !(list = rcu_dereference(list->next))) {
 861			p = rcu_dereference(p->next);
 862			list = container_of(p, struct rhlist_head, rhead);
 863		}
 864		if (!rht_is_a_nulls(p)) {
 865			iter->skip++;
 866			iter->p = p;
 867			iter->list = list;
 868			return rht_obj(ht, rhlist ? &list->rhead : p);
 869		}
 870
 871		/* At the end of this slot, switch to next one and then find
 872		 * next entry from that point.
 873		 */
 874		iter->skip = 0;
 875		iter->slot++;
 876	}
 877
 878	return __rhashtable_walk_find_next(iter);
 879}
 880EXPORT_SYMBOL_GPL(rhashtable_walk_next);
 881
 882/**
 883 * rhashtable_walk_peek - Return the next object but don't advance the iterator
 884 * @iter:	Hash table iterator
 885 *
 886 * Returns the next object or NULL when the end of the table is reached.
 887 *
 888 * Returns -EAGAIN if resize event occurred.  Note that the iterator
 889 * will rewind back to the beginning and you may continue to use it.
 890 */
 891void *rhashtable_walk_peek(struct rhashtable_iter *iter)
 892{
 893	struct rhlist_head *list = iter->list;
 894	struct rhashtable *ht = iter->ht;
 895	struct rhash_head *p = iter->p;
 896
 897	if (p)
 898		return rht_obj(ht, ht->rhlist ? &list->rhead : p);
 899
 900	/* No object found in current iter, find next one in the table. */
 901
 902	if (iter->skip) {
 903		/* A nonzero skip value points to the next entry in the table
 904		 * beyond that last one that was found. Decrement skip so
 905		 * we find the current value. __rhashtable_walk_find_next
 906		 * will restore the original value of skip assuming that
 907		 * the table hasn't changed.
 908		 */
 909		iter->skip--;
 910	}
 911
 912	return __rhashtable_walk_find_next(iter);
 913}
 914EXPORT_SYMBOL_GPL(rhashtable_walk_peek);
 915
 916/**
 917 * rhashtable_walk_stop - Finish a hash table walk
 918 * @iter:	Hash table iterator
 919 *
 920 * Finish a hash table walk.  Does not reset the iterator to the start of the
 921 * hash table.
 922 */
 923void rhashtable_walk_stop(struct rhashtable_iter *iter)
 924	__releases(RCU)
 925{
 926	struct rhashtable *ht;
 927	struct bucket_table *tbl = iter->walker.tbl;
 928
 929	if (!tbl)
 930		goto out;
 931
 932	ht = iter->ht;
 933
 934	spin_lock(&ht->lock);
 935	if (rcu_head_after_call_rcu(&tbl->rcu, bucket_table_free_rcu))
 936		/* This bucket table is being freed, don't re-link it. */
 937		iter->walker.tbl = NULL;
 938	else
 939		list_add(&iter->walker.list, &tbl->walkers);
 940	spin_unlock(&ht->lock);
 941
 942out:
 943	rcu_read_unlock();
 944}
 945EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
 946
 947static size_t rounded_hashtable_size(const struct rhashtable_params *params)
 948{
 949	size_t retsize;
 950
 951	if (params->nelem_hint)
 952		retsize = max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
 953			      (unsigned long)params->min_size);
 954	else
 955		retsize = max(HASH_DEFAULT_SIZE,
 956			      (unsigned long)params->min_size);
 957
 958	return retsize;
 959}
 960
 961static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
 962{
 963	return jhash2(key, length, seed);
 964}
 965
 966/**
 967 * rhashtable_init - initialize a new hash table
 968 * @ht:		hash table to be initialized
 969 * @params:	configuration parameters
 970 *
 971 * Initializes a new hash table based on the provided configuration
 972 * parameters. A table can be configured either with a variable or
 973 * fixed length key:
 974 *
 975 * Configuration Example 1: Fixed length keys
 976 * struct test_obj {
 977 *	int			key;
 978 *	void *			my_member;
 979 *	struct rhash_head	node;
 980 * };
 981 *
 982 * struct rhashtable_params params = {
 983 *	.head_offset = offsetof(struct test_obj, node),
 984 *	.key_offset = offsetof(struct test_obj, key),
 985 *	.key_len = sizeof(int),
 986 *	.hashfn = jhash,
 987 * };
 988 *
 989 * Configuration Example 2: Variable length keys
 990 * struct test_obj {
 991 *	[...]
 992 *	struct rhash_head	node;
 993 * };
 994 *
 995 * u32 my_hash_fn(const void *data, u32 len, u32 seed)
 996 * {
 997 *	struct test_obj *obj = data;
 998 *
 999 *	return [... hash ...];
1000 * }
1001 *
1002 * struct rhashtable_params params = {
1003 *	.head_offset = offsetof(struct test_obj, node),
1004 *	.hashfn = jhash,
1005 *	.obj_hashfn = my_hash_fn,
1006 * };
1007 */
1008int rhashtable_init(struct rhashtable *ht,
1009		    const struct rhashtable_params *params)
1010{
1011	struct bucket_table *tbl;
1012	size_t size;
1013
1014	if ((!params->key_len && !params->obj_hashfn) ||
1015	    (params->obj_hashfn && !params->obj_cmpfn))
1016		return -EINVAL;
1017
1018	memset(ht, 0, sizeof(*ht));
1019	mutex_init(&ht->mutex);
1020	spin_lock_init(&ht->lock);
1021	memcpy(&ht->p, params, sizeof(*params));
1022
1023	if (params->min_size)
1024		ht->p.min_size = roundup_pow_of_two(params->min_size);
1025
1026	/* Cap total entries at 2^31 to avoid nelems overflow. */
1027	ht->max_elems = 1u << 31;
1028
1029	if (params->max_size) {
1030		ht->p.max_size = rounddown_pow_of_two(params->max_size);
1031		if (ht->p.max_size < ht->max_elems / 2)
1032			ht->max_elems = ht->p.max_size * 2;
1033	}
1034
1035	ht->p.min_size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE);
1036
1037	size = rounded_hashtable_size(&ht->p);
1038
1039	ht->key_len = ht->p.key_len;
1040	if (!params->hashfn) {
1041		ht->p.hashfn = jhash;
1042
1043		if (!(ht->key_len & (sizeof(u32) - 1))) {
1044			ht->key_len /= sizeof(u32);
1045			ht->p.hashfn = rhashtable_jhash2;
1046		}
1047	}
1048
1049	/*
1050	 * This is api initialization and thus we need to guarantee the
1051	 * initial rhashtable allocation. Upon failure, retry with the
1052	 * smallest possible size with __GFP_NOFAIL semantics.
1053	 */
1054	tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
1055	if (unlikely(tbl == NULL)) {
1056		size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE);
1057		tbl = bucket_table_alloc(ht, size, GFP_KERNEL | __GFP_NOFAIL);
1058	}
1059
1060	atomic_set(&ht->nelems, 0);
1061
1062	RCU_INIT_POINTER(ht->tbl, tbl);
1063
1064	INIT_WORK(&ht->run_work, rht_deferred_worker);
1065
1066	return 0;
1067}
1068EXPORT_SYMBOL_GPL(rhashtable_init);
1069
1070/**
1071 * rhltable_init - initialize a new hash list table
1072 * @hlt:	hash list table to be initialized
1073 * @params:	configuration parameters
1074 *
1075 * Initializes a new hash list table.
1076 *
1077 * See documentation for rhashtable_init.
1078 */
1079int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params)
1080{
1081	int err;
1082
1083	err = rhashtable_init(&hlt->ht, params);
1084	hlt->ht.rhlist = true;
1085	return err;
1086}
1087EXPORT_SYMBOL_GPL(rhltable_init);
1088
1089static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj,
1090				void (*free_fn)(void *ptr, void *arg),
1091				void *arg)
1092{
1093	struct rhlist_head *list;
1094
1095	if (!ht->rhlist) {
1096		free_fn(rht_obj(ht, obj), arg);
1097		return;
1098	}
1099
1100	list = container_of(obj, struct rhlist_head, rhead);
1101	do {
1102		obj = &list->rhead;
1103		list = rht_dereference(list->next, ht);
1104		free_fn(rht_obj(ht, obj), arg);
1105	} while (list);
1106}
1107
1108/**
1109 * rhashtable_free_and_destroy - free elements and destroy hash table
1110 * @ht:		the hash table to destroy
1111 * @free_fn:	callback to release resources of element
1112 * @arg:	pointer passed to free_fn
1113 *
1114 * Stops an eventual async resize. If defined, invokes free_fn for each
1115 * element to releasal resources. Please note that RCU protected
1116 * readers may still be accessing the elements. Releasing of resources
1117 * must occur in a compatible manner. Then frees the bucket array.
1118 *
1119 * This function will eventually sleep to wait for an async resize
1120 * to complete. The caller is responsible that no further write operations
1121 * occurs in parallel.
1122 */
1123void rhashtable_free_and_destroy(struct rhashtable *ht,
1124				 void (*free_fn)(void *ptr, void *arg),
1125				 void *arg)
1126{
1127	struct bucket_table *tbl, *next_tbl;
1128	unsigned int i;
1129
1130	cancel_work_sync(&ht->run_work);
1131
1132	mutex_lock(&ht->mutex);
1133	tbl = rht_dereference(ht->tbl, ht);
1134restart:
1135	if (free_fn) {
1136		for (i = 0; i < tbl->size; i++) {
1137			struct rhash_head *pos, *next;
1138
1139			cond_resched();
1140			for (pos = rht_ptr_exclusive(rht_bucket(tbl, i)),
1141			     next = !rht_is_a_nulls(pos) ?
1142					rht_dereference(pos->next, ht) : NULL;
1143			     !rht_is_a_nulls(pos);
1144			     pos = next,
1145			     next = !rht_is_a_nulls(pos) ?
1146					rht_dereference(pos->next, ht) : NULL)
1147				rhashtable_free_one(ht, pos, free_fn, arg);
1148		}
1149	}
1150
1151	next_tbl = rht_dereference(tbl->future_tbl, ht);
1152	bucket_table_free(tbl);
1153	if (next_tbl) {
1154		tbl = next_tbl;
1155		goto restart;
1156	}
1157	mutex_unlock(&ht->mutex);
1158}
1159EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy);
1160
1161void rhashtable_destroy(struct rhashtable *ht)
1162{
1163	return rhashtable_free_and_destroy(ht, NULL, NULL);
1164}
1165EXPORT_SYMBOL_GPL(rhashtable_destroy);
1166
1167struct rhash_lock_head **__rht_bucket_nested(const struct bucket_table *tbl,
1168					     unsigned int hash)
1169{
1170	const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
1171	unsigned int index = hash & ((1 << tbl->nest) - 1);
1172	unsigned int size = tbl->size >> tbl->nest;
1173	unsigned int subhash = hash;
1174	union nested_table *ntbl;
1175
1176	ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
1177	ntbl = rht_dereference_bucket_rcu(ntbl[index].table, tbl, hash);
1178	subhash >>= tbl->nest;
1179
1180	while (ntbl && size > (1 << shift)) {
1181		index = subhash & ((1 << shift) - 1);
1182		ntbl = rht_dereference_bucket_rcu(ntbl[index].table,
1183						  tbl, hash);
1184		size >>= shift;
1185		subhash >>= shift;
1186	}
1187
1188	if (!ntbl)
1189		return NULL;
1190
1191	return &ntbl[subhash].bucket;
1192
1193}
1194EXPORT_SYMBOL_GPL(__rht_bucket_nested);
1195
1196struct rhash_lock_head **rht_bucket_nested(const struct bucket_table *tbl,
1197					   unsigned int hash)
1198{
1199	static struct rhash_lock_head *rhnull;
1200
1201	if (!rhnull)
1202		INIT_RHT_NULLS_HEAD(rhnull);
1203	return __rht_bucket_nested(tbl, hash) ?: &rhnull;
1204}
1205EXPORT_SYMBOL_GPL(rht_bucket_nested);
1206
1207struct rhash_lock_head **rht_bucket_nested_insert(struct rhashtable *ht,
1208						  struct bucket_table *tbl,
1209						  unsigned int hash)
1210{
1211	const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
1212	unsigned int index = hash & ((1 << tbl->nest) - 1);
1213	unsigned int size = tbl->size >> tbl->nest;
1214	union nested_table *ntbl;
1215
1216	ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
1217	hash >>= tbl->nest;
1218	ntbl = nested_table_alloc(ht, &ntbl[index].table,
1219				  size <= (1 << shift));
1220
1221	while (ntbl && size > (1 << shift)) {
1222		index = hash & ((1 << shift) - 1);
1223		size >>= shift;
1224		hash >>= shift;
1225		ntbl = nested_table_alloc(ht, &ntbl[index].table,
1226					  size <= (1 << shift));
1227	}
1228
1229	if (!ntbl)
1230		return NULL;
1231
1232	return &ntbl[hash].bucket;
1233
1234}
1235EXPORT_SYMBOL_GPL(rht_bucket_nested_insert);