Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1/*
   2 * Copyright (C) 2011 Red Hat, Inc.
   3 *
   4 * This file is released under the GPL.
   5 */
   6
   7#include "dm-space-map-common.h"
   8#include "dm-transaction-manager.h"
   9#include "dm-btree-internal.h"
  10#include "dm-persistent-data-internal.h"
  11
  12#include <linux/bitops.h>
  13#include <linux/device-mapper.h>
  14
  15#define DM_MSG_PREFIX "space map common"
  16
  17/*----------------------------------------------------------------*/
  18
  19/*
  20 * Index validator.
  21 */
  22#define INDEX_CSUM_XOR 160478
  23
  24static void index_prepare_for_write(struct dm_block_validator *v,
  25				    struct dm_block *b,
  26				    size_t block_size)
  27{
  28	struct disk_metadata_index *mi_le = dm_block_data(b);
  29
  30	mi_le->blocknr = cpu_to_le64(dm_block_location(b));
  31	mi_le->csum = cpu_to_le32(dm_bm_checksum(&mi_le->padding,
  32						 block_size - sizeof(__le32),
  33						 INDEX_CSUM_XOR));
  34}
  35
  36static int index_check(struct dm_block_validator *v,
  37		       struct dm_block *b,
  38		       size_t block_size)
  39{
  40	struct disk_metadata_index *mi_le = dm_block_data(b);
  41	__le32 csum_disk;
  42
  43	if (dm_block_location(b) != le64_to_cpu(mi_le->blocknr)) {
  44		DMERR_LIMIT("index_check failed: blocknr %llu != wanted %llu",
  45			    le64_to_cpu(mi_le->blocknr), dm_block_location(b));
  46		return -ENOTBLK;
  47	}
  48
  49	csum_disk = cpu_to_le32(dm_bm_checksum(&mi_le->padding,
  50					       block_size - sizeof(__le32),
  51					       INDEX_CSUM_XOR));
  52	if (csum_disk != mi_le->csum) {
  53		DMERR_LIMIT("index_check failed: csum %u != wanted %u",
  54			    le32_to_cpu(csum_disk), le32_to_cpu(mi_le->csum));
  55		return -EILSEQ;
  56	}
  57
  58	return 0;
  59}
  60
  61static struct dm_block_validator index_validator = {
  62	.name = "index",
  63	.prepare_for_write = index_prepare_for_write,
  64	.check = index_check
  65};
  66
  67/*----------------------------------------------------------------*/
  68
  69/*
  70 * Bitmap validator
  71 */
  72#define BITMAP_CSUM_XOR 240779
  73
  74static void dm_bitmap_prepare_for_write(struct dm_block_validator *v,
  75					struct dm_block *b,
  76					size_t block_size)
  77{
  78	struct disk_bitmap_header *disk_header = dm_block_data(b);
  79
  80	disk_header->blocknr = cpu_to_le64(dm_block_location(b));
  81	disk_header->csum = cpu_to_le32(dm_bm_checksum(&disk_header->not_used,
  82						       block_size - sizeof(__le32),
  83						       BITMAP_CSUM_XOR));
  84}
  85
  86static int dm_bitmap_check(struct dm_block_validator *v,
  87			   struct dm_block *b,
  88			   size_t block_size)
  89{
  90	struct disk_bitmap_header *disk_header = dm_block_data(b);
  91	__le32 csum_disk;
  92
  93	if (dm_block_location(b) != le64_to_cpu(disk_header->blocknr)) {
  94		DMERR_LIMIT("bitmap check failed: blocknr %llu != wanted %llu",
  95			    le64_to_cpu(disk_header->blocknr), dm_block_location(b));
  96		return -ENOTBLK;
  97	}
  98
  99	csum_disk = cpu_to_le32(dm_bm_checksum(&disk_header->not_used,
 100					       block_size - sizeof(__le32),
 101					       BITMAP_CSUM_XOR));
 102	if (csum_disk != disk_header->csum) {
 103		DMERR_LIMIT("bitmap check failed: csum %u != wanted %u",
 104			    le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum));
 105		return -EILSEQ;
 106	}
 107
 108	return 0;
 109}
 110
 111static struct dm_block_validator dm_sm_bitmap_validator = {
 112	.name = "sm_bitmap",
 113	.prepare_for_write = dm_bitmap_prepare_for_write,
 114	.check = dm_bitmap_check,
 115};
 116
 117/*----------------------------------------------------------------*/
 118
 119#define ENTRIES_PER_WORD 32
 120#define ENTRIES_SHIFT	5
 121
 122static void *dm_bitmap_data(struct dm_block *b)
 123{
 124	return dm_block_data(b) + sizeof(struct disk_bitmap_header);
 125}
 126
 127#define WORD_MASK_HIGH 0xAAAAAAAAAAAAAAAAULL
 128
 129static unsigned dm_bitmap_word_used(void *addr, unsigned b)
 130{
 131	__le64 *words_le = addr;
 132	__le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
 133
 134	uint64_t bits = le64_to_cpu(*w_le);
 135	uint64_t mask = (bits + WORD_MASK_HIGH + 1) & WORD_MASK_HIGH;
 136
 137	return !(~bits & mask);
 138}
 139
 140static unsigned sm_lookup_bitmap(void *addr, unsigned b)
 141{
 142	__le64 *words_le = addr;
 143	__le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
 144	unsigned hi, lo;
 145
 146	b = (b & (ENTRIES_PER_WORD - 1)) << 1;
 147	hi = !!test_bit_le(b, (void *) w_le);
 148	lo = !!test_bit_le(b + 1, (void *) w_le);
 149	return (hi << 1) | lo;
 150}
 151
 152static void sm_set_bitmap(void *addr, unsigned b, unsigned val)
 153{
 154	__le64 *words_le = addr;
 155	__le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
 156
 157	b = (b & (ENTRIES_PER_WORD - 1)) << 1;
 158
 159	if (val & 2)
 160		__set_bit_le(b, (void *) w_le);
 161	else
 162		__clear_bit_le(b, (void *) w_le);
 163
 164	if (val & 1)
 165		__set_bit_le(b + 1, (void *) w_le);
 166	else
 167		__clear_bit_le(b + 1, (void *) w_le);
 168}
 169
 170static int sm_find_free(void *addr, unsigned begin, unsigned end,
 171			unsigned *result)
 172{
 173	while (begin < end) {
 174		if (!(begin & (ENTRIES_PER_WORD - 1)) &&
 175		    dm_bitmap_word_used(addr, begin)) {
 176			begin += ENTRIES_PER_WORD;
 177			continue;
 178		}
 179
 180		if (!sm_lookup_bitmap(addr, begin)) {
 181			*result = begin;
 182			return 0;
 183		}
 184
 185		begin++;
 186	}
 187
 188	return -ENOSPC;
 189}
 190
 191/*----------------------------------------------------------------*/
 192
 193static int sm_ll_init(struct ll_disk *ll, struct dm_transaction_manager *tm)
 194{
 195	memset(ll, 0, sizeof(struct ll_disk));
 196
 197	ll->tm = tm;
 198
 199	ll->bitmap_info.tm = tm;
 200	ll->bitmap_info.levels = 1;
 201
 202	/*
 203	 * Because the new bitmap blocks are created via a shadow
 204	 * operation, the old entry has already had its reference count
 205	 * decremented and we don't need the btree to do any bookkeeping.
 206	 */
 207	ll->bitmap_info.value_type.size = sizeof(struct disk_index_entry);
 208	ll->bitmap_info.value_type.inc = NULL;
 209	ll->bitmap_info.value_type.dec = NULL;
 210	ll->bitmap_info.value_type.equal = NULL;
 211
 212	ll->ref_count_info.tm = tm;
 213	ll->ref_count_info.levels = 1;
 214	ll->ref_count_info.value_type.size = sizeof(uint32_t);
 215	ll->ref_count_info.value_type.inc = NULL;
 216	ll->ref_count_info.value_type.dec = NULL;
 217	ll->ref_count_info.value_type.equal = NULL;
 218
 219	ll->block_size = dm_bm_block_size(dm_tm_get_bm(tm));
 220
 221	if (ll->block_size > (1 << 30)) {
 222		DMERR("block size too big to hold bitmaps");
 223		return -EINVAL;
 224	}
 225
 226	ll->entries_per_block = (ll->block_size - sizeof(struct disk_bitmap_header)) *
 227		ENTRIES_PER_BYTE;
 228	ll->nr_blocks = 0;
 229	ll->bitmap_root = 0;
 230	ll->ref_count_root = 0;
 231	ll->bitmap_index_changed = false;
 232
 233	return 0;
 234}
 235
 236int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks)
 237{
 238	int r;
 239	dm_block_t i, nr_blocks, nr_indexes;
 240	unsigned old_blocks, blocks;
 241
 242	nr_blocks = ll->nr_blocks + extra_blocks;
 243	old_blocks = dm_sector_div_up(ll->nr_blocks, ll->entries_per_block);
 244	blocks = dm_sector_div_up(nr_blocks, ll->entries_per_block);
 245
 246	nr_indexes = dm_sector_div_up(nr_blocks, ll->entries_per_block);
 247	if (nr_indexes > ll->max_entries(ll)) {
 248		DMERR("space map too large");
 249		return -EINVAL;
 250	}
 251
 252	/*
 253	 * We need to set this before the dm_tm_new_block() call below.
 254	 */
 255	ll->nr_blocks = nr_blocks;
 256	for (i = old_blocks; i < blocks; i++) {
 257		struct dm_block *b;
 258		struct disk_index_entry idx;
 259
 260		r = dm_tm_new_block(ll->tm, &dm_sm_bitmap_validator, &b);
 261		if (r < 0)
 262			return r;
 263
 264		idx.blocknr = cpu_to_le64(dm_block_location(b));
 265
 266		dm_tm_unlock(ll->tm, b);
 267
 268		idx.nr_free = cpu_to_le32(ll->entries_per_block);
 269		idx.none_free_before = 0;
 270
 271		r = ll->save_ie(ll, i, &idx);
 272		if (r < 0)
 273			return r;
 274	}
 275
 276	return 0;
 277}
 278
 279int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result)
 280{
 281	int r;
 282	dm_block_t index = b;
 283	struct disk_index_entry ie_disk;
 284	struct dm_block *blk;
 285
 286	if (b >= ll->nr_blocks) {
 287		DMERR_LIMIT("metadata block out of bounds");
 288		return -EINVAL;
 289	}
 290
 291	b = do_div(index, ll->entries_per_block);
 292	r = ll->load_ie(ll, index, &ie_disk);
 293	if (r < 0)
 294		return r;
 295
 296	r = dm_tm_read_lock(ll->tm, le64_to_cpu(ie_disk.blocknr),
 297			    &dm_sm_bitmap_validator, &blk);
 298	if (r < 0)
 299		return r;
 300
 301	*result = sm_lookup_bitmap(dm_bitmap_data(blk), b);
 302
 303	dm_tm_unlock(ll->tm, blk);
 304
 305	return 0;
 306}
 307
 308static int sm_ll_lookup_big_ref_count(struct ll_disk *ll, dm_block_t b,
 309				      uint32_t *result)
 310{
 311	__le32 le_rc;
 312	int r;
 313
 314	r = dm_btree_lookup(&ll->ref_count_info, ll->ref_count_root, &b, &le_rc);
 315	if (r < 0)
 316		return r;
 317
 318	*result = le32_to_cpu(le_rc);
 319
 320	return r;
 321}
 322
 323int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result)
 324{
 325	int r = sm_ll_lookup_bitmap(ll, b, result);
 326
 327	if (r)
 328		return r;
 329
 330	if (*result != 3)
 331		return r;
 332
 333	return sm_ll_lookup_big_ref_count(ll, b, result);
 334}
 335
 336int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin,
 337			  dm_block_t end, dm_block_t *result)
 338{
 339	int r;
 340	struct disk_index_entry ie_disk;
 341	dm_block_t i, index_begin = begin;
 342	dm_block_t index_end = dm_sector_div_up(end, ll->entries_per_block);
 343
 344	/*
 345	 * FIXME: Use shifts
 346	 */
 347	begin = do_div(index_begin, ll->entries_per_block);
 348	end = do_div(end, ll->entries_per_block);
 349	if (end == 0)
 350		end = ll->entries_per_block;
 351
 352	for (i = index_begin; i < index_end; i++, begin = 0) {
 353		struct dm_block *blk;
 354		unsigned position;
 355		uint32_t bit_end;
 356
 357		r = ll->load_ie(ll, i, &ie_disk);
 358		if (r < 0)
 359			return r;
 360
 361		if (le32_to_cpu(ie_disk.nr_free) == 0)
 362			continue;
 363
 364		r = dm_tm_read_lock(ll->tm, le64_to_cpu(ie_disk.blocknr),
 365				    &dm_sm_bitmap_validator, &blk);
 366		if (r < 0)
 367			return r;
 368
 369		bit_end = (i == index_end - 1) ?  end : ll->entries_per_block;
 370
 371		r = sm_find_free(dm_bitmap_data(blk),
 372				 max_t(unsigned, begin, le32_to_cpu(ie_disk.none_free_before)),
 373				 bit_end, &position);
 374		if (r == -ENOSPC) {
 375			/*
 376			 * This might happen because we started searching
 377			 * part way through the bitmap.
 378			 */
 379			dm_tm_unlock(ll->tm, blk);
 380			continue;
 381		}
 382
 383		dm_tm_unlock(ll->tm, blk);
 384
 385		*result = i * ll->entries_per_block + (dm_block_t) position;
 386		return 0;
 387	}
 388
 389	return -ENOSPC;
 390}
 391
 392int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll,
 393	                         dm_block_t begin, dm_block_t end, dm_block_t *b)
 394{
 395	int r;
 396	uint32_t count;
 397
 398	do {
 399		r = sm_ll_find_free_block(new_ll, begin, new_ll->nr_blocks, b);
 400		if (r)
 401			break;
 402
 403		/* double check this block wasn't used in the old transaction */
 404		if (*b >= old_ll->nr_blocks)
 405			count = 0;
 406		else {
 407			r = sm_ll_lookup(old_ll, *b, &count);
 408			if (r)
 409				break;
 410
 411			if (count)
 412				begin = *b + 1;
 413		}
 414	} while (count);
 415
 416	return r;
 417}
 418
 419/*----------------------------------------------------------------*/
 420
 421int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
 422		 uint32_t ref_count, int32_t *nr_allocations)
 423{
 424	int r;
 425	uint32_t bit, old;
 426	struct dm_block *nb;
 427	dm_block_t index = b;
 428	struct disk_index_entry ie_disk;
 429	void *bm_le;
 430	int inc;
 431
 432	bit = do_div(index, ll->entries_per_block);
 433	r = ll->load_ie(ll, index, &ie_disk);
 434	if (r < 0)
 435		return r;
 436
 437	r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ie_disk.blocknr),
 438			       &dm_sm_bitmap_validator, &nb, &inc);
 439	if (r < 0) {
 440		DMERR("dm_tm_shadow_block() failed");
 441		return r;
 442	}
 443	ie_disk.blocknr = cpu_to_le64(dm_block_location(nb));
 444	bm_le = dm_bitmap_data(nb);
 445
 446	old = sm_lookup_bitmap(bm_le, bit);
 447	if (old > 2) {
 448		r = sm_ll_lookup_big_ref_count(ll, b, &old);
 449		if (r < 0) {
 450			dm_tm_unlock(ll->tm, nb);
 451			return r;
 452		}
 453	}
 454
 455	if (r) {
 456		dm_tm_unlock(ll->tm, nb);
 457		return r;
 458	}
 459
 460	if (ref_count <= 2) {
 461		sm_set_bitmap(bm_le, bit, ref_count);
 462		dm_tm_unlock(ll->tm, nb);
 463
 464		if (old > 2) {
 465			r = dm_btree_remove(&ll->ref_count_info,
 466					    ll->ref_count_root,
 467					    &b, &ll->ref_count_root);
 468			if (r)
 469				return r;
 470		}
 471
 472	} else {
 473		__le32 le_rc = cpu_to_le32(ref_count);
 474
 475		sm_set_bitmap(bm_le, bit, 3);
 476		dm_tm_unlock(ll->tm, nb);
 477
 478		__dm_bless_for_disk(&le_rc);
 479		r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root,
 480				    &b, &le_rc, &ll->ref_count_root);
 481		if (r < 0) {
 482			DMERR("ref count insert failed");
 483			return r;
 484		}
 485	}
 486
 487	if (ref_count && !old) {
 488		*nr_allocations = 1;
 489		ll->nr_allocated++;
 490		le32_add_cpu(&ie_disk.nr_free, -1);
 491		if (le32_to_cpu(ie_disk.none_free_before) == bit)
 492			ie_disk.none_free_before = cpu_to_le32(bit + 1);
 493
 494	} else if (old && !ref_count) {
 495		*nr_allocations = -1;
 496		ll->nr_allocated--;
 497		le32_add_cpu(&ie_disk.nr_free, 1);
 498		ie_disk.none_free_before = cpu_to_le32(min(le32_to_cpu(ie_disk.none_free_before), bit));
 499	} else
 500		*nr_allocations = 0;
 501
 502	return ll->save_ie(ll, index, &ie_disk);
 503}
 504
 505/*----------------------------------------------------------------*/
 506
 507/*
 508 * Holds useful intermediate results for the range based inc and dec
 509 * operations.
 510 */
 511struct inc_context {
 512	struct disk_index_entry ie_disk;
 513	struct dm_block *bitmap_block;
 514	void *bitmap;
 515
 516	struct dm_block *overflow_leaf;
 517};
 518
 519static inline void init_inc_context(struct inc_context *ic)
 520{
 521	ic->bitmap_block = NULL;
 522	ic->bitmap = NULL;
 523	ic->overflow_leaf = NULL;
 524}
 525
 526static inline void exit_inc_context(struct ll_disk *ll, struct inc_context *ic)
 527{
 528	if (ic->bitmap_block)
 529		dm_tm_unlock(ll->tm, ic->bitmap_block);
 530	if (ic->overflow_leaf)
 531		dm_tm_unlock(ll->tm, ic->overflow_leaf);
 532}
 533
 534static inline void reset_inc_context(struct ll_disk *ll, struct inc_context *ic)
 535{
 536	exit_inc_context(ll, ic);
 537	init_inc_context(ic);
 538}
 539
 540/*
 541 * Confirms a btree node contains a particular key at an index.
 542 */
 543static bool contains_key(struct btree_node *n, uint64_t key, int index)
 544{
 545	return index >= 0 &&
 546		index < le32_to_cpu(n->header.nr_entries) &&
 547		le64_to_cpu(n->keys[index]) == key;
 548}
 549
 550static int __sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
 551{
 552	int r;
 553	int index;
 554	struct btree_node *n;
 555	__le32 *v_ptr;
 556	uint32_t rc;
 557
 558	/*
 559	 * bitmap_block needs to be unlocked because getting the
 560	 * overflow_leaf may need to allocate, and thus use the space map.
 561	 */
 562	reset_inc_context(ll, ic);
 563
 564	r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
 565				     b, &index, &ll->ref_count_root, &ic->overflow_leaf);
 566	if (r < 0)
 567		return r;
 568
 569	n = dm_block_data(ic->overflow_leaf);
 570
 571	if (!contains_key(n, b, index)) {
 572		DMERR("overflow btree is missing an entry");
 573		return -EINVAL;
 574	}
 575
 576	v_ptr = value_ptr(n, index);
 577	rc = le32_to_cpu(*v_ptr) + 1;
 578	*v_ptr = cpu_to_le32(rc);
 579
 580	return 0;
 581}
 582
 583static int sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
 584{
 585	int index;
 586	struct btree_node *n;
 587	__le32 *v_ptr;
 588	uint32_t rc;
 589
 590	/*
 591	 * Do we already have the correct overflow leaf?
 592	 */
 593	if (ic->overflow_leaf) {
 594		n = dm_block_data(ic->overflow_leaf);
 595		index = lower_bound(n, b);
 596		if (contains_key(n, b, index)) {
 597			v_ptr = value_ptr(n, index);
 598			rc = le32_to_cpu(*v_ptr) + 1;
 599			*v_ptr = cpu_to_le32(rc);
 600
 601			return 0;
 602		}
 603	}
 604
 605	return __sm_ll_inc_overflow(ll, b, ic);
 606}
 607
 608static inline int shadow_bitmap(struct ll_disk *ll, struct inc_context *ic)
 609{
 610	int r, inc;
 611	r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ic->ie_disk.blocknr),
 612			       &dm_sm_bitmap_validator, &ic->bitmap_block, &inc);
 613	if (r < 0) {
 614		DMERR("dm_tm_shadow_block() failed");
 615		return r;
 616	}
 617	ic->ie_disk.blocknr = cpu_to_le64(dm_block_location(ic->bitmap_block));
 618	ic->bitmap = dm_bitmap_data(ic->bitmap_block);
 619	return 0;
 620}
 621
 622/*
 623 * Once shadow_bitmap has been called, which always happens at the start of inc/dec,
 624 * we can reopen the bitmap with a simple write lock, rather than re calling
 625 * dm_tm_shadow_block().
 626 */
 627static inline int ensure_bitmap(struct ll_disk *ll, struct inc_context *ic)
 628{
 629	if (!ic->bitmap_block) {
 630		int r = dm_bm_write_lock(dm_tm_get_bm(ll->tm), le64_to_cpu(ic->ie_disk.blocknr),
 631					 &dm_sm_bitmap_validator, &ic->bitmap_block);
 632		if (r) {
 633			DMERR("unable to re-get write lock for bitmap");
 634			return r;
 635		}
 636		ic->bitmap = dm_bitmap_data(ic->bitmap_block);
 637	}
 638
 639	return 0;
 640}
 641
 642/*
 643 * Loops round incrementing entries in a single bitmap.
 644 */
 645static inline int sm_ll_inc_bitmap(struct ll_disk *ll, dm_block_t b,
 646				   uint32_t bit, uint32_t bit_end,
 647				   int32_t *nr_allocations, dm_block_t *new_b,
 648				   struct inc_context *ic)
 649{
 650	int r;
 651	__le32 le_rc;
 652	uint32_t old;
 653
 654	for (; bit != bit_end; bit++, b++) {
 655		/*
 656		 * We only need to drop the bitmap if we need to find a new btree
 657		 * leaf for the overflow.  So if it was dropped last iteration,
 658		 * we now re-get it.
 659		 */
 660		r = ensure_bitmap(ll, ic);
 661		if (r)
 662			return r;
 663
 664		old = sm_lookup_bitmap(ic->bitmap, bit);
 665		switch (old) {
 666		case 0:
 667			/* inc bitmap, adjust nr_allocated */
 668			sm_set_bitmap(ic->bitmap, bit, 1);
 669			(*nr_allocations)++;
 670			ll->nr_allocated++;
 671			le32_add_cpu(&ic->ie_disk.nr_free, -1);
 672			if (le32_to_cpu(ic->ie_disk.none_free_before) == bit)
 673				ic->ie_disk.none_free_before = cpu_to_le32(bit + 1);
 674			break;
 675
 676		case 1:
 677			/* inc bitmap */
 678			sm_set_bitmap(ic->bitmap, bit, 2);
 679			break;
 680
 681		case 2:
 682			/* inc bitmap and insert into overflow */
 683			sm_set_bitmap(ic->bitmap, bit, 3);
 684			reset_inc_context(ll, ic);
 685
 686			le_rc = cpu_to_le32(3);
 687			__dm_bless_for_disk(&le_rc);
 688			r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root,
 689					    &b, &le_rc, &ll->ref_count_root);
 690			if (r < 0) {
 691				DMERR("ref count insert failed");
 692				return r;
 693			}
 694			break;
 695
 696		default:
 697			/*
 698			 * inc within the overflow tree only.
 699			 */
 700			r = sm_ll_inc_overflow(ll, b, ic);
 701			if (r < 0)
 702				return r;
 703		}
 704	}
 705
 706	*new_b = b;
 707	return 0;
 708}
 709
 710/*
 711 * Finds a bitmap that contains entries in the block range, and increments
 712 * them.
 713 */
 714static int __sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
 715		       int32_t *nr_allocations, dm_block_t *new_b)
 716{
 717	int r;
 718	struct inc_context ic;
 719	uint32_t bit, bit_end;
 720	dm_block_t index = b;
 721
 722	init_inc_context(&ic);
 723
 724	bit = do_div(index, ll->entries_per_block);
 725	r = ll->load_ie(ll, index, &ic.ie_disk);
 726	if (r < 0)
 727		return r;
 728
 729	r = shadow_bitmap(ll, &ic);
 730	if (r)
 731		return r;
 732
 733	bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
 734	r = sm_ll_inc_bitmap(ll, b, bit, bit_end, nr_allocations, new_b, &ic);
 735
 736	exit_inc_context(ll, &ic);
 737
 738	if (r)
 739		return r;
 740
 741	return ll->save_ie(ll, index, &ic.ie_disk);
 742}
 743
 744int sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
 745	      int32_t *nr_allocations)
 746{
 747	*nr_allocations = 0;
 748	while (b != e) {
 749		int r = __sm_ll_inc(ll, b, e, nr_allocations, &b);
 750		if (r)
 751			return r;
 752	}
 753
 754	return 0;
 755}
 756
 757/*----------------------------------------------------------------*/
 758
 759static int __sm_ll_del_overflow(struct ll_disk *ll, dm_block_t b,
 760				struct inc_context *ic)
 761{
 762	reset_inc_context(ll, ic);
 763	return dm_btree_remove(&ll->ref_count_info, ll->ref_count_root,
 764			       &b, &ll->ref_count_root);
 765}
 766
 767static int __sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
 768				struct inc_context *ic, uint32_t *old_rc)
 769{
 770	int r;
 771	int index = -1;
 772	struct btree_node *n;
 773	__le32 *v_ptr;
 774	uint32_t rc;
 775
 776	reset_inc_context(ll, ic);
 777	r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
 778				     b, &index, &ll->ref_count_root, &ic->overflow_leaf);
 779	if (r < 0)
 780		return r;
 781
 782	n = dm_block_data(ic->overflow_leaf);
 783
 784	if (!contains_key(n, b, index)) {
 785		DMERR("overflow btree is missing an entry");
 786		return -EINVAL;
 787	}
 788
 789	v_ptr = value_ptr(n, index);
 790	rc = le32_to_cpu(*v_ptr);
 791	*old_rc = rc;
 792
 793	if (rc == 3) {
 794		return __sm_ll_del_overflow(ll, b, ic);
 795	} else {
 796		rc--;
 797		*v_ptr = cpu_to_le32(rc);
 798		return 0;
 799	}
 800}
 801
 802static int sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
 803			      struct inc_context *ic, uint32_t *old_rc)
 804{
 805	/*
 806	 * Do we already have the correct overflow leaf?
 807	 */
 808	if (ic->overflow_leaf) {
 809		int index;
 810		struct btree_node *n;
 811		__le32 *v_ptr;
 812		uint32_t rc;
 813
 814		n = dm_block_data(ic->overflow_leaf);
 815		index = lower_bound(n, b);
 816		if (contains_key(n, b, index)) {
 817			v_ptr = value_ptr(n, index);
 818			rc = le32_to_cpu(*v_ptr);
 819			*old_rc = rc;
 820
 821			if (rc > 3) {
 822				rc--;
 823				*v_ptr = cpu_to_le32(rc);
 824				return 0;
 825			} else {
 826				return __sm_ll_del_overflow(ll, b, ic);
 827			}
 828
 829		}
 830	}
 831
 832	return __sm_ll_dec_overflow(ll, b, ic, old_rc);
 833}
 834
 835/*
 836 * Loops round incrementing entries in a single bitmap.
 837 */
 838static inline int sm_ll_dec_bitmap(struct ll_disk *ll, dm_block_t b,
 839				   uint32_t bit, uint32_t bit_end,
 840				   struct inc_context *ic,
 841				   int32_t *nr_allocations, dm_block_t *new_b)
 842{
 843	int r;
 844	uint32_t old;
 845
 846	for (; bit != bit_end; bit++, b++) {
 847		/*
 848		 * We only need to drop the bitmap if we need to find a new btree
 849		 * leaf for the overflow.  So if it was dropped last iteration,
 850		 * we now re-get it.
 851		 */
 852		r = ensure_bitmap(ll, ic);
 853		if (r)
 854			return r;
 855
 856		old = sm_lookup_bitmap(ic->bitmap, bit);
 857		switch (old) {
 858		case 0:
 859			DMERR("unable to decrement block");
 860			return -EINVAL;
 861
 862		case 1:
 863			/* dec bitmap */
 864			sm_set_bitmap(ic->bitmap, bit, 0);
 865			(*nr_allocations)--;
 866			ll->nr_allocated--;
 867			le32_add_cpu(&ic->ie_disk.nr_free, 1);
 868			ic->ie_disk.none_free_before =
 869				cpu_to_le32(min(le32_to_cpu(ic->ie_disk.none_free_before), bit));
 870			break;
 871
 872		case 2:
 873			/* dec bitmap and insert into overflow */
 874			sm_set_bitmap(ic->bitmap, bit, 1);
 875			break;
 876
 877		case 3:
 878			r = sm_ll_dec_overflow(ll, b, ic, &old);
 879			if (r < 0)
 880				return r;
 881
 882			if (old == 3) {
 883				r = ensure_bitmap(ll, ic);
 884				if (r)
 885					return r;
 886
 887				sm_set_bitmap(ic->bitmap, bit, 2);
 888			}
 889			break;
 890		}
 891	}
 892
 893	*new_b = b;
 894	return 0;
 895}
 896
 897static int __sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
 898		       int32_t *nr_allocations, dm_block_t *new_b)
 899{
 900	int r;
 901	uint32_t bit, bit_end;
 902	struct inc_context ic;
 903	dm_block_t index = b;
 904
 905	init_inc_context(&ic);
 906
 907	bit = do_div(index, ll->entries_per_block);
 908	r = ll->load_ie(ll, index, &ic.ie_disk);
 909	if (r < 0)
 910		return r;
 911
 912	r = shadow_bitmap(ll, &ic);
 913	if (r)
 914		return r;
 915
 916	bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
 917	r = sm_ll_dec_bitmap(ll, b, bit, bit_end, &ic, nr_allocations, new_b);
 918	exit_inc_context(ll, &ic);
 919
 920	if (r)
 921		return r;
 922
 923	return ll->save_ie(ll, index, &ic.ie_disk);
 924}
 925
 926int sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
 927	      int32_t *nr_allocations)
 928{
 929	*nr_allocations = 0;
 930	while (b != e) {
 931		int r = __sm_ll_dec(ll, b, e, nr_allocations, &b);
 932		if (r)
 933			return r;
 934	}
 935
 936	return 0;
 937}
 938
 939/*----------------------------------------------------------------*/
 940
 941int sm_ll_commit(struct ll_disk *ll)
 942{
 943	int r = 0;
 944
 945	if (ll->bitmap_index_changed) {
 946		r = ll->commit(ll);
 947		if (!r)
 948			ll->bitmap_index_changed = false;
 949	}
 950
 951	return r;
 952}
 953
 954/*----------------------------------------------------------------*/
 955
 956static int metadata_ll_load_ie(struct ll_disk *ll, dm_block_t index,
 957			       struct disk_index_entry *ie)
 958{
 959	memcpy(ie, ll->mi_le.index + index, sizeof(*ie));
 960	return 0;
 961}
 962
 963static int metadata_ll_save_ie(struct ll_disk *ll, dm_block_t index,
 964			       struct disk_index_entry *ie)
 965{
 966	ll->bitmap_index_changed = true;
 967	memcpy(ll->mi_le.index + index, ie, sizeof(*ie));
 968	return 0;
 969}
 970
 971static int metadata_ll_init_index(struct ll_disk *ll)
 972{
 973	int r;
 974	struct dm_block *b;
 975
 976	r = dm_tm_new_block(ll->tm, &index_validator, &b);
 977	if (r < 0)
 978		return r;
 979
 980	ll->bitmap_root = dm_block_location(b);
 981
 982	dm_tm_unlock(ll->tm, b);
 983
 984	return 0;
 985}
 986
 987static int metadata_ll_open(struct ll_disk *ll)
 988{
 989	int r;
 990	struct dm_block *block;
 991
 992	r = dm_tm_read_lock(ll->tm, ll->bitmap_root,
 993			    &index_validator, &block);
 994	if (r)
 995		return r;
 996
 997	memcpy(&ll->mi_le, dm_block_data(block), sizeof(ll->mi_le));
 998	dm_tm_unlock(ll->tm, block);
 999
1000	return 0;
1001}
1002
1003static dm_block_t metadata_ll_max_entries(struct ll_disk *ll)
1004{
1005	return MAX_METADATA_BITMAPS;
1006}
1007
1008static int metadata_ll_commit(struct ll_disk *ll)
1009{
1010	int r, inc;
1011	struct dm_block *b;
1012
1013	r = dm_tm_shadow_block(ll->tm, ll->bitmap_root, &index_validator, &b, &inc);
1014	if (r)
1015		return r;
1016
1017	memcpy(dm_block_data(b), &ll->mi_le, sizeof(ll->mi_le));
1018	ll->bitmap_root = dm_block_location(b);
1019
1020	dm_tm_unlock(ll->tm, b);
1021
1022	return 0;
1023}
1024
1025int sm_ll_new_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm)
1026{
1027	int r;
1028
1029	r = sm_ll_init(ll, tm);
1030	if (r < 0)
1031		return r;
1032
1033	ll->load_ie = metadata_ll_load_ie;
1034	ll->save_ie = metadata_ll_save_ie;
1035	ll->init_index = metadata_ll_init_index;
1036	ll->open_index = metadata_ll_open;
1037	ll->max_entries = metadata_ll_max_entries;
1038	ll->commit = metadata_ll_commit;
1039
1040	ll->nr_blocks = 0;
1041	ll->nr_allocated = 0;
1042
1043	r = ll->init_index(ll);
1044	if (r < 0)
1045		return r;
1046
1047	r = dm_btree_empty(&ll->ref_count_info, &ll->ref_count_root);
1048	if (r < 0)
1049		return r;
1050
1051	return 0;
1052}
1053
1054int sm_ll_open_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm,
1055			void *root_le, size_t len)
1056{
1057	int r;
1058	struct disk_sm_root smr;
1059
1060	if (len < sizeof(struct disk_sm_root)) {
1061		DMERR("sm_metadata root too small");
1062		return -ENOMEM;
1063	}
1064
1065	/*
1066	 * We don't know the alignment of the root_le buffer, so need to
1067	 * copy into a new structure.
1068	 */
1069	memcpy(&smr, root_le, sizeof(smr));
1070
1071	r = sm_ll_init(ll, tm);
1072	if (r < 0)
1073		return r;
1074
1075	ll->load_ie = metadata_ll_load_ie;
1076	ll->save_ie = metadata_ll_save_ie;
1077	ll->init_index = metadata_ll_init_index;
1078	ll->open_index = metadata_ll_open;
1079	ll->max_entries = metadata_ll_max_entries;
1080	ll->commit = metadata_ll_commit;
1081
1082	ll->nr_blocks = le64_to_cpu(smr.nr_blocks);
1083	ll->nr_allocated = le64_to_cpu(smr.nr_allocated);
1084	ll->bitmap_root = le64_to_cpu(smr.bitmap_root);
1085	ll->ref_count_root = le64_to_cpu(smr.ref_count_root);
1086
1087	return ll->open_index(ll);
1088}
1089
1090/*----------------------------------------------------------------*/
1091
1092static inline int ie_cache_writeback(struct ll_disk *ll, struct ie_cache *iec)
1093{
1094	iec->dirty = false;
1095	__dm_bless_for_disk(iec->ie);
1096	return dm_btree_insert(&ll->bitmap_info, ll->bitmap_root,
1097			       &iec->index, &iec->ie, &ll->bitmap_root);
1098}
1099
1100static inline unsigned hash_index(dm_block_t index)
1101{
1102	return dm_hash_block(index, IE_CACHE_MASK);
1103}
1104
1105static int disk_ll_load_ie(struct ll_disk *ll, dm_block_t index,
1106			   struct disk_index_entry *ie)
1107{
1108	int r;
1109	unsigned h = hash_index(index);
1110	struct ie_cache *iec = ll->ie_cache + h;
1111
1112	if (iec->valid) {
1113		if (iec->index == index) {
1114			memcpy(ie, &iec->ie, sizeof(*ie));
1115			return 0;
1116		}
1117
1118		if (iec->dirty) {
1119			r = ie_cache_writeback(ll, iec);
1120			if (r)
1121				return r;
1122		}
1123	}
1124
1125	r = dm_btree_lookup(&ll->bitmap_info, ll->bitmap_root, &index, ie);
1126	if (!r) {
1127		iec->valid = true;
1128		iec->dirty = false;
1129		iec->index = index;
1130		memcpy(&iec->ie, ie, sizeof(*ie));
1131	}
1132
1133	return r;
1134}
1135
1136static int disk_ll_save_ie(struct ll_disk *ll, dm_block_t index,
1137			   struct disk_index_entry *ie)
1138{
1139	int r;
1140	unsigned h = hash_index(index);
1141	struct ie_cache *iec = ll->ie_cache + h;
1142
1143	ll->bitmap_index_changed = true;
1144	if (iec->valid) {
1145		if (iec->index == index) {
1146			memcpy(&iec->ie, ie, sizeof(*ie));
1147			iec->dirty = true;
1148			return 0;
1149		}
1150
1151		if (iec->dirty) {
1152			r = ie_cache_writeback(ll, iec);
1153			if (r)
1154				return r;
1155		}
1156	}
1157
1158	iec->valid = true;
1159	iec->dirty = true;
1160	iec->index = index;
1161	memcpy(&iec->ie, ie, sizeof(*ie));
1162	return 0;
1163}
1164
1165static int disk_ll_init_index(struct ll_disk *ll)
1166{
1167	unsigned i;
1168	for (i = 0; i < IE_CACHE_SIZE; i++) {
1169		struct ie_cache *iec = ll->ie_cache + i;
1170		iec->valid = false;
1171		iec->dirty = false;
1172	}
1173	return dm_btree_empty(&ll->bitmap_info, &ll->bitmap_root);
1174}
1175
1176static int disk_ll_open(struct ll_disk *ll)
1177{
1178	return 0;
1179}
1180
1181static dm_block_t disk_ll_max_entries(struct ll_disk *ll)
1182{
1183	return -1ULL;
1184}
1185
1186static int disk_ll_commit(struct ll_disk *ll)
1187{
1188	int r = 0;
1189	unsigned i;
1190
1191	for (i = 0; i < IE_CACHE_SIZE; i++) {
1192		struct ie_cache *iec = ll->ie_cache + i;
1193		if (iec->valid && iec->dirty)
1194			r = ie_cache_writeback(ll, iec);
1195	}
1196
1197	return r;
1198}
1199
1200int sm_ll_new_disk(struct ll_disk *ll, struct dm_transaction_manager *tm)
1201{
1202	int r;
1203
1204	r = sm_ll_init(ll, tm);
1205	if (r < 0)
1206		return r;
1207
1208	ll->load_ie = disk_ll_load_ie;
1209	ll->save_ie = disk_ll_save_ie;
1210	ll->init_index = disk_ll_init_index;
1211	ll->open_index = disk_ll_open;
1212	ll->max_entries = disk_ll_max_entries;
1213	ll->commit = disk_ll_commit;
1214
1215	ll->nr_blocks = 0;
1216	ll->nr_allocated = 0;
1217
1218	r = ll->init_index(ll);
1219	if (r < 0)
1220		return r;
1221
1222	r = dm_btree_empty(&ll->ref_count_info, &ll->ref_count_root);
1223	if (r < 0)
1224		return r;
1225
1226	return 0;
1227}
1228
1229int sm_ll_open_disk(struct ll_disk *ll, struct dm_transaction_manager *tm,
1230		    void *root_le, size_t len)
1231{
1232	int r;
1233	struct disk_sm_root *smr = root_le;
1234
1235	if (len < sizeof(struct disk_sm_root)) {
1236		DMERR("sm_metadata root too small");
1237		return -ENOMEM;
1238	}
1239
1240	r = sm_ll_init(ll, tm);
1241	if (r < 0)
1242		return r;
1243
1244	ll->load_ie = disk_ll_load_ie;
1245	ll->save_ie = disk_ll_save_ie;
1246	ll->init_index = disk_ll_init_index;
1247	ll->open_index = disk_ll_open;
1248	ll->max_entries = disk_ll_max_entries;
1249	ll->commit = disk_ll_commit;
1250
1251	ll->nr_blocks = le64_to_cpu(smr->nr_blocks);
1252	ll->nr_allocated = le64_to_cpu(smr->nr_allocated);
1253	ll->bitmap_root = le64_to_cpu(smr->bitmap_root);
1254	ll->ref_count_root = le64_to_cpu(smr->ref_count_root);
1255
1256	return ll->open_index(ll);
1257}
1258
1259/*----------------------------------------------------------------*/