Loading...
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2011 Red Hat, Inc.
4 *
5 * This file is released under the GPL.
6 */
7
8#include "dm-space-map.h"
9#include "dm-space-map-common.h"
10#include "dm-space-map-metadata.h"
11
12#include <linux/list.h>
13#include <linux/slab.h>
14#include <linux/device-mapper.h>
15#include <linux/kernel.h>
16
17#define DM_MSG_PREFIX "space map metadata"
18
19/*----------------------------------------------------------------*/
20
21/*
22 * An edge triggered threshold.
23 */
24struct threshold {
25 bool threshold_set;
26 bool value_set;
27 dm_block_t threshold;
28 dm_block_t current_value;
29 dm_sm_threshold_fn fn;
30 void *context;
31};
32
33static void threshold_init(struct threshold *t)
34{
35 t->threshold_set = false;
36 t->value_set = false;
37}
38
39static void set_threshold(struct threshold *t, dm_block_t value,
40 dm_sm_threshold_fn fn, void *context)
41{
42 t->threshold_set = true;
43 t->threshold = value;
44 t->fn = fn;
45 t->context = context;
46}
47
48static bool below_threshold(struct threshold *t, dm_block_t value)
49{
50 return t->threshold_set && value <= t->threshold;
51}
52
53static bool threshold_already_triggered(struct threshold *t)
54{
55 return t->value_set && below_threshold(t, t->current_value);
56}
57
58static void check_threshold(struct threshold *t, dm_block_t value)
59{
60 if (below_threshold(t, value) &&
61 !threshold_already_triggered(t))
62 t->fn(t->context);
63
64 t->value_set = true;
65 t->current_value = value;
66}
67
68/*----------------------------------------------------------------*/
69
70/*
71 * Space map interface.
72 *
73 * The low level disk format is written using the standard btree and
74 * transaction manager. This means that performing disk operations may
75 * cause us to recurse into the space map in order to allocate new blocks.
76 * For this reason we have a pool of pre-allocated blocks large enough to
77 * service any metadata_ll_disk operation.
78 */
79
80/*
81 * FIXME: we should calculate this based on the size of the device.
82 * Only the metadata space map needs this functionality.
83 */
84#define MAX_RECURSIVE_ALLOCATIONS 1024
85
86enum block_op_type {
87 BOP_INC,
88 BOP_DEC
89};
90
91struct block_op {
92 enum block_op_type type;
93 dm_block_t b;
94 dm_block_t e;
95};
96
97struct bop_ring_buffer {
98 unsigned int begin;
99 unsigned int end;
100 struct block_op bops[MAX_RECURSIVE_ALLOCATIONS + 1];
101};
102
103static void brb_init(struct bop_ring_buffer *brb)
104{
105 brb->begin = 0;
106 brb->end = 0;
107}
108
109static bool brb_empty(struct bop_ring_buffer *brb)
110{
111 return brb->begin == brb->end;
112}
113
114static unsigned int brb_next(struct bop_ring_buffer *brb, unsigned int old)
115{
116 unsigned int r = old + 1;
117
118 return r >= ARRAY_SIZE(brb->bops) ? 0 : r;
119}
120
121static int brb_push(struct bop_ring_buffer *brb,
122 enum block_op_type type, dm_block_t b, dm_block_t e)
123{
124 struct block_op *bop;
125 unsigned int next = brb_next(brb, brb->end);
126
127 /*
128 * We don't allow the last bop to be filled, this way we can
129 * differentiate between full and empty.
130 */
131 if (next == brb->begin)
132 return -ENOMEM;
133
134 bop = brb->bops + brb->end;
135 bop->type = type;
136 bop->b = b;
137 bop->e = e;
138
139 brb->end = next;
140
141 return 0;
142}
143
144static int brb_peek(struct bop_ring_buffer *brb, struct block_op *result)
145{
146 struct block_op *bop;
147
148 if (brb_empty(brb))
149 return -ENODATA;
150
151 bop = brb->bops + brb->begin;
152 memcpy(result, bop, sizeof(*result));
153 return 0;
154}
155
156static int brb_pop(struct bop_ring_buffer *brb)
157{
158 if (brb_empty(brb))
159 return -ENODATA;
160
161 brb->begin = brb_next(brb, brb->begin);
162
163 return 0;
164}
165
166/*----------------------------------------------------------------*/
167
168struct sm_metadata {
169 struct dm_space_map sm;
170
171 struct ll_disk ll;
172 struct ll_disk old_ll;
173
174 dm_block_t begin;
175
176 unsigned int recursion_count;
177 unsigned int allocated_this_transaction;
178 struct bop_ring_buffer uncommitted;
179
180 struct threshold threshold;
181};
182
183static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b, dm_block_t e)
184{
185 int r = brb_push(&smm->uncommitted, type, b, e);
186
187 if (r) {
188 DMERR("too many recursive allocations");
189 return -ENOMEM;
190 }
191
192 return 0;
193}
194
195static int commit_bop(struct sm_metadata *smm, struct block_op *op)
196{
197 int r = 0;
198 int32_t nr_allocations;
199
200 switch (op->type) {
201 case BOP_INC:
202 r = sm_ll_inc(&smm->ll, op->b, op->e, &nr_allocations);
203 break;
204
205 case BOP_DEC:
206 r = sm_ll_dec(&smm->ll, op->b, op->e, &nr_allocations);
207 break;
208 }
209
210 return r;
211}
212
213static void in(struct sm_metadata *smm)
214{
215 smm->recursion_count++;
216}
217
218static int apply_bops(struct sm_metadata *smm)
219{
220 int r = 0;
221
222 while (!brb_empty(&smm->uncommitted)) {
223 struct block_op bop;
224
225 r = brb_peek(&smm->uncommitted, &bop);
226 if (r) {
227 DMERR("bug in bop ring buffer");
228 break;
229 }
230
231 r = commit_bop(smm, &bop);
232 if (r)
233 break;
234
235 brb_pop(&smm->uncommitted);
236 }
237
238 return r;
239}
240
241static int out(struct sm_metadata *smm)
242{
243 int r = 0;
244
245 /*
246 * If we're not recursing then very bad things are happening.
247 */
248 if (!smm->recursion_count) {
249 DMERR("lost track of recursion depth");
250 return -ENOMEM;
251 }
252
253 if (smm->recursion_count == 1)
254 r = apply_bops(smm);
255
256 smm->recursion_count--;
257
258 return r;
259}
260
261/*
262 * When using the out() function above, we often want to combine an error
263 * code for the operation run in the recursive context with that from
264 * out().
265 */
266static int combine_errors(int r1, int r2)
267{
268 return r1 ? r1 : r2;
269}
270
271static int recursing(struct sm_metadata *smm)
272{
273 return smm->recursion_count;
274}
275
276static void sm_metadata_destroy(struct dm_space_map *sm)
277{
278 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
279
280 kfree(smm);
281}
282
283static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
284{
285 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
286
287 *count = smm->ll.nr_blocks;
288
289 return 0;
290}
291
292static int sm_metadata_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
293{
294 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
295
296 *count = smm->old_ll.nr_blocks - smm->old_ll.nr_allocated -
297 smm->allocated_this_transaction;
298
299 return 0;
300}
301
302static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b,
303 uint32_t *result)
304{
305 int r;
306 unsigned int i;
307 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
308 unsigned int adjustment = 0;
309
310 /*
311 * We may have some uncommitted adjustments to add. This list
312 * should always be really short.
313 */
314 for (i = smm->uncommitted.begin;
315 i != smm->uncommitted.end;
316 i = brb_next(&smm->uncommitted, i)) {
317 struct block_op *op = smm->uncommitted.bops + i;
318
319 if (b < op->b || b >= op->e)
320 continue;
321
322 switch (op->type) {
323 case BOP_INC:
324 adjustment++;
325 break;
326
327 case BOP_DEC:
328 adjustment--;
329 break;
330 }
331 }
332
333 r = sm_ll_lookup(&smm->ll, b, result);
334 if (r)
335 return r;
336
337 *result += adjustment;
338
339 return 0;
340}
341
342static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm,
343 dm_block_t b, int *result)
344{
345 int r, adjustment = 0;
346 unsigned int i;
347 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
348 uint32_t rc;
349
350 /*
351 * We may have some uncommitted adjustments to add. This list
352 * should always be really short.
353 */
354 for (i = smm->uncommitted.begin;
355 i != smm->uncommitted.end;
356 i = brb_next(&smm->uncommitted, i)) {
357
358 struct block_op *op = smm->uncommitted.bops + i;
359
360 if (b < op->b || b >= op->e)
361 continue;
362
363 switch (op->type) {
364 case BOP_INC:
365 adjustment++;
366 break;
367
368 case BOP_DEC:
369 adjustment--;
370 break;
371 }
372 }
373
374 if (adjustment > 1) {
375 *result = 1;
376 return 0;
377 }
378
379 r = sm_ll_lookup_bitmap(&smm->ll, b, &rc);
380 if (r)
381 return r;
382
383 if (rc == 3)
384 /*
385 * We err on the side of caution, and always return true.
386 */
387 *result = 1;
388 else
389 *result = rc + adjustment > 1;
390
391 return 0;
392}
393
394static int sm_metadata_set_count(struct dm_space_map *sm, dm_block_t b,
395 uint32_t count)
396{
397 int r, r2;
398 int32_t nr_allocations;
399 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
400
401 if (smm->recursion_count) {
402 DMERR("cannot recurse set_count()");
403 return -EINVAL;
404 }
405
406 in(smm);
407 r = sm_ll_insert(&smm->ll, b, count, &nr_allocations);
408 r2 = out(smm);
409
410 return combine_errors(r, r2);
411}
412
413static int sm_metadata_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
414{
415 int r, r2 = 0;
416 int32_t nr_allocations;
417 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
418
419 if (recursing(smm)) {
420 r = add_bop(smm, BOP_INC, b, e);
421 if (r)
422 return r;
423 } else {
424 in(smm);
425 r = sm_ll_inc(&smm->ll, b, e, &nr_allocations);
426 r2 = out(smm);
427 }
428
429 return combine_errors(r, r2);
430}
431
432static int sm_metadata_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
433{
434 int r, r2 = 0;
435 int32_t nr_allocations;
436 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
437
438 if (recursing(smm))
439 r = add_bop(smm, BOP_DEC, b, e);
440 else {
441 in(smm);
442 r = sm_ll_dec(&smm->ll, b, e, &nr_allocations);
443 r2 = out(smm);
444 }
445
446 return combine_errors(r, r2);
447}
448
449static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b)
450{
451 int r, r2 = 0;
452 int32_t nr_allocations;
453 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
454
455 /*
456 * Any block we allocate has to be free in both the old and current ll.
457 */
458 r = sm_ll_find_common_free_block(&smm->old_ll, &smm->ll, smm->begin, smm->ll.nr_blocks, b);
459 if (r == -ENOSPC) {
460 /*
461 * There's no free block between smm->begin and the end of the metadata device.
462 * We search before smm->begin in case something has been freed.
463 */
464 r = sm_ll_find_common_free_block(&smm->old_ll, &smm->ll, 0, smm->begin, b);
465 }
466
467 if (r)
468 return r;
469
470 smm->begin = *b + 1;
471
472 if (recursing(smm))
473 r = add_bop(smm, BOP_INC, *b, *b + 1);
474 else {
475 in(smm);
476 r = sm_ll_inc(&smm->ll, *b, *b + 1, &nr_allocations);
477 r2 = out(smm);
478 }
479
480 if (!r)
481 smm->allocated_this_transaction++;
482
483 return combine_errors(r, r2);
484}
485
486static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b)
487{
488 dm_block_t count;
489 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
490
491 int r = sm_metadata_new_block_(sm, b);
492
493 if (r) {
494 DMERR_LIMIT("unable to allocate new metadata block");
495 return r;
496 }
497
498 r = sm_metadata_get_nr_free(sm, &count);
499 if (r) {
500 DMERR_LIMIT("couldn't get free block count");
501 return r;
502 }
503
504 check_threshold(&smm->threshold, count);
505
506 return r;
507}
508
509static int sm_metadata_commit(struct dm_space_map *sm)
510{
511 int r;
512 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
513
514 r = sm_ll_commit(&smm->ll);
515 if (r)
516 return r;
517
518 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll));
519 smm->allocated_this_transaction = 0;
520
521 return 0;
522}
523
524static int sm_metadata_register_threshold_callback(struct dm_space_map *sm,
525 dm_block_t threshold,
526 dm_sm_threshold_fn fn,
527 void *context)
528{
529 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
530
531 set_threshold(&smm->threshold, threshold, fn, context);
532
533 return 0;
534}
535
536static int sm_metadata_root_size(struct dm_space_map *sm, size_t *result)
537{
538 *result = sizeof(struct disk_sm_root);
539
540 return 0;
541}
542
543static int sm_metadata_copy_root(struct dm_space_map *sm, void *where_le, size_t max)
544{
545 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
546 struct disk_sm_root root_le;
547
548 root_le.nr_blocks = cpu_to_le64(smm->ll.nr_blocks);
549 root_le.nr_allocated = cpu_to_le64(smm->ll.nr_allocated);
550 root_le.bitmap_root = cpu_to_le64(smm->ll.bitmap_root);
551 root_le.ref_count_root = cpu_to_le64(smm->ll.ref_count_root);
552
553 if (max < sizeof(root_le))
554 return -ENOSPC;
555
556 memcpy(where_le, &root_le, sizeof(root_le));
557
558 return 0;
559}
560
561static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks);
562
563static const struct dm_space_map ops = {
564 .destroy = sm_metadata_destroy,
565 .extend = sm_metadata_extend,
566 .get_nr_blocks = sm_metadata_get_nr_blocks,
567 .get_nr_free = sm_metadata_get_nr_free,
568 .get_count = sm_metadata_get_count,
569 .count_is_more_than_one = sm_metadata_count_is_more_than_one,
570 .set_count = sm_metadata_set_count,
571 .inc_blocks = sm_metadata_inc_blocks,
572 .dec_blocks = sm_metadata_dec_blocks,
573 .new_block = sm_metadata_new_block,
574 .commit = sm_metadata_commit,
575 .root_size = sm_metadata_root_size,
576 .copy_root = sm_metadata_copy_root,
577 .register_threshold_callback = sm_metadata_register_threshold_callback
578};
579
580/*----------------------------------------------------------------*/
581
582/*
583 * When a new space map is created that manages its own space. We use
584 * this tiny bootstrap allocator.
585 */
586static void sm_bootstrap_destroy(struct dm_space_map *sm)
587{
588}
589
590static int sm_bootstrap_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
591{
592 DMERR("bootstrap doesn't support extend");
593
594 return -EINVAL;
595}
596
597static int sm_bootstrap_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
598{
599 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
600
601 *count = smm->ll.nr_blocks;
602
603 return 0;
604}
605
606static int sm_bootstrap_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
607{
608 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
609
610 *count = smm->ll.nr_blocks - smm->begin;
611
612 return 0;
613}
614
615static int sm_bootstrap_get_count(struct dm_space_map *sm, dm_block_t b,
616 uint32_t *result)
617{
618 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
619
620 *result = (b < smm->begin) ? 1 : 0;
621
622 return 0;
623}
624
625static int sm_bootstrap_count_is_more_than_one(struct dm_space_map *sm,
626 dm_block_t b, int *result)
627{
628 *result = 0;
629
630 return 0;
631}
632
633static int sm_bootstrap_set_count(struct dm_space_map *sm, dm_block_t b,
634 uint32_t count)
635{
636 DMERR("bootstrap doesn't support set_count");
637
638 return -EINVAL;
639}
640
641static int sm_bootstrap_new_block(struct dm_space_map *sm, dm_block_t *b)
642{
643 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
644
645 /*
646 * We know the entire device is unused.
647 */
648 if (smm->begin == smm->ll.nr_blocks)
649 return -ENOSPC;
650
651 *b = smm->begin++;
652
653 return 0;
654}
655
656static int sm_bootstrap_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
657{
658 int r;
659 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
660
661 r = add_bop(smm, BOP_INC, b, e);
662 if (r)
663 return r;
664
665 return 0;
666}
667
668static int sm_bootstrap_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
669{
670 int r;
671 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
672
673 r = add_bop(smm, BOP_DEC, b, e);
674 if (r)
675 return r;
676
677 return 0;
678}
679
680static int sm_bootstrap_commit(struct dm_space_map *sm)
681{
682 return 0;
683}
684
685static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result)
686{
687 DMERR("bootstrap doesn't support root_size");
688
689 return -EINVAL;
690}
691
692static int sm_bootstrap_copy_root(struct dm_space_map *sm, void *where,
693 size_t max)
694{
695 DMERR("bootstrap doesn't support copy_root");
696
697 return -EINVAL;
698}
699
700static const struct dm_space_map bootstrap_ops = {
701 .destroy = sm_bootstrap_destroy,
702 .extend = sm_bootstrap_extend,
703 .get_nr_blocks = sm_bootstrap_get_nr_blocks,
704 .get_nr_free = sm_bootstrap_get_nr_free,
705 .get_count = sm_bootstrap_get_count,
706 .count_is_more_than_one = sm_bootstrap_count_is_more_than_one,
707 .set_count = sm_bootstrap_set_count,
708 .inc_blocks = sm_bootstrap_inc_blocks,
709 .dec_blocks = sm_bootstrap_dec_blocks,
710 .new_block = sm_bootstrap_new_block,
711 .commit = sm_bootstrap_commit,
712 .root_size = sm_bootstrap_root_size,
713 .copy_root = sm_bootstrap_copy_root,
714 .register_threshold_callback = NULL
715};
716
717/*----------------------------------------------------------------*/
718
719static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
720{
721 int r;
722 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
723 dm_block_t old_len = smm->ll.nr_blocks;
724
725 /*
726 * Flick into a mode where all blocks get allocated in the new area.
727 */
728 smm->begin = old_len;
729 memcpy(sm, &bootstrap_ops, sizeof(*sm));
730
731 /*
732 * Extend.
733 */
734 r = sm_ll_extend(&smm->ll, extra_blocks);
735 if (r)
736 goto out;
737
738 /*
739 * We repeatedly increment then commit until the commit doesn't
740 * allocate any new blocks.
741 */
742 do {
743 r = add_bop(smm, BOP_INC, old_len, smm->begin);
744 if (r)
745 goto out;
746
747 old_len = smm->begin;
748
749 r = apply_bops(smm);
750 if (r) {
751 DMERR("%s: apply_bops failed", __func__);
752 goto out;
753 }
754
755 r = sm_ll_commit(&smm->ll);
756 if (r)
757 goto out;
758
759 } while (old_len != smm->begin);
760
761out:
762 /*
763 * Switch back to normal behaviour.
764 */
765 memcpy(sm, &ops, sizeof(*sm));
766 return r;
767}
768
769/*----------------------------------------------------------------*/
770
771struct dm_space_map *dm_sm_metadata_init(void)
772{
773 struct sm_metadata *smm;
774
775 smm = kmalloc(sizeof(*smm), GFP_KERNEL);
776 if (!smm)
777 return ERR_PTR(-ENOMEM);
778
779 memcpy(&smm->sm, &ops, sizeof(smm->sm));
780
781 return &smm->sm;
782}
783
784int dm_sm_metadata_create(struct dm_space_map *sm,
785 struct dm_transaction_manager *tm,
786 dm_block_t nr_blocks,
787 dm_block_t superblock)
788{
789 int r;
790 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
791
792 smm->begin = superblock + 1;
793 smm->recursion_count = 0;
794 smm->allocated_this_transaction = 0;
795 brb_init(&smm->uncommitted);
796 threshold_init(&smm->threshold);
797
798 memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm));
799
800 r = sm_ll_new_metadata(&smm->ll, tm);
801 if (!r) {
802 if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS)
803 nr_blocks = DM_SM_METADATA_MAX_BLOCKS;
804 r = sm_ll_extend(&smm->ll, nr_blocks);
805 }
806 memcpy(&smm->sm, &ops, sizeof(smm->sm));
807 if (r)
808 return r;
809
810 /*
811 * Now we need to update the newly created data structures with the
812 * allocated blocks that they were built from.
813 */
814 r = add_bop(smm, BOP_INC, superblock, smm->begin);
815 if (r)
816 return r;
817
818 r = apply_bops(smm);
819 if (r) {
820 DMERR("%s: apply_bops failed", __func__);
821 return r;
822 }
823
824 return sm_metadata_commit(sm);
825}
826
827int dm_sm_metadata_open(struct dm_space_map *sm,
828 struct dm_transaction_manager *tm,
829 void *root_le, size_t len)
830{
831 int r;
832 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
833
834 r = sm_ll_open_metadata(&smm->ll, tm, root_le, len);
835 if (r)
836 return r;
837
838 smm->begin = 0;
839 smm->recursion_count = 0;
840 smm->allocated_this_transaction = 0;
841 brb_init(&smm->uncommitted);
842 threshold_init(&smm->threshold);
843
844 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll));
845 return 0;
846}
1/*
2 * Copyright (C) 2011 Red Hat, Inc.
3 *
4 * This file is released under the GPL.
5 */
6
7#include "dm-space-map.h"
8#include "dm-space-map-common.h"
9#include "dm-space-map-metadata.h"
10
11#include <linux/list.h>
12#include <linux/slab.h>
13#include <linux/device-mapper.h>
14
15#define DM_MSG_PREFIX "space map metadata"
16
17/*----------------------------------------------------------------*/
18
19/*
20 * An edge triggered threshold.
21 */
22struct threshold {
23 bool threshold_set;
24 bool value_set;
25 dm_block_t threshold;
26 dm_block_t current_value;
27 dm_sm_threshold_fn fn;
28 void *context;
29};
30
31static void threshold_init(struct threshold *t)
32{
33 t->threshold_set = false;
34 t->value_set = false;
35}
36
37static void set_threshold(struct threshold *t, dm_block_t value,
38 dm_sm_threshold_fn fn, void *context)
39{
40 t->threshold_set = true;
41 t->threshold = value;
42 t->fn = fn;
43 t->context = context;
44}
45
46static bool below_threshold(struct threshold *t, dm_block_t value)
47{
48 return t->threshold_set && value <= t->threshold;
49}
50
51static bool threshold_already_triggered(struct threshold *t)
52{
53 return t->value_set && below_threshold(t, t->current_value);
54}
55
56static void check_threshold(struct threshold *t, dm_block_t value)
57{
58 if (below_threshold(t, value) &&
59 !threshold_already_triggered(t))
60 t->fn(t->context);
61
62 t->value_set = true;
63 t->current_value = value;
64}
65
66/*----------------------------------------------------------------*/
67
68/*
69 * Space map interface.
70 *
71 * The low level disk format is written using the standard btree and
72 * transaction manager. This means that performing disk operations may
73 * cause us to recurse into the space map in order to allocate new blocks.
74 * For this reason we have a pool of pre-allocated blocks large enough to
75 * service any metadata_ll_disk operation.
76 */
77
78/*
79 * FIXME: we should calculate this based on the size of the device.
80 * Only the metadata space map needs this functionality.
81 */
82#define MAX_RECURSIVE_ALLOCATIONS 1024
83
84enum block_op_type {
85 BOP_INC,
86 BOP_DEC
87};
88
89struct block_op {
90 enum block_op_type type;
91 dm_block_t block;
92};
93
94struct bop_ring_buffer {
95 unsigned begin;
96 unsigned end;
97 struct block_op bops[MAX_RECURSIVE_ALLOCATIONS + 1];
98};
99
100static void brb_init(struct bop_ring_buffer *brb)
101{
102 brb->begin = 0;
103 brb->end = 0;
104}
105
106static bool brb_empty(struct bop_ring_buffer *brb)
107{
108 return brb->begin == brb->end;
109}
110
111static unsigned brb_next(struct bop_ring_buffer *brb, unsigned old)
112{
113 unsigned r = old + 1;
114 return (r >= (sizeof(brb->bops) / sizeof(*brb->bops))) ? 0 : r;
115}
116
117static int brb_push(struct bop_ring_buffer *brb,
118 enum block_op_type type, dm_block_t b)
119{
120 struct block_op *bop;
121 unsigned next = brb_next(brb, brb->end);
122
123 /*
124 * We don't allow the last bop to be filled, this way we can
125 * differentiate between full and empty.
126 */
127 if (next == brb->begin)
128 return -ENOMEM;
129
130 bop = brb->bops + brb->end;
131 bop->type = type;
132 bop->block = b;
133
134 brb->end = next;
135
136 return 0;
137}
138
139static int brb_peek(struct bop_ring_buffer *brb, struct block_op *result)
140{
141 struct block_op *bop;
142
143 if (brb_empty(brb))
144 return -ENODATA;
145
146 bop = brb->bops + brb->begin;
147 result->type = bop->type;
148 result->block = bop->block;
149
150 return 0;
151}
152
153static int brb_pop(struct bop_ring_buffer *brb)
154{
155 if (brb_empty(brb))
156 return -ENODATA;
157
158 brb->begin = brb_next(brb, brb->begin);
159
160 return 0;
161}
162
163/*----------------------------------------------------------------*/
164
165struct sm_metadata {
166 struct dm_space_map sm;
167
168 struct ll_disk ll;
169 struct ll_disk old_ll;
170
171 dm_block_t begin;
172
173 unsigned recursion_count;
174 unsigned allocated_this_transaction;
175 struct bop_ring_buffer uncommitted;
176
177 struct threshold threshold;
178};
179
180static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b)
181{
182 int r = brb_push(&smm->uncommitted, type, b);
183
184 if (r) {
185 DMERR("too many recursive allocations");
186 return -ENOMEM;
187 }
188
189 return 0;
190}
191
192static int commit_bop(struct sm_metadata *smm, struct block_op *op)
193{
194 int r = 0;
195 enum allocation_event ev;
196
197 switch (op->type) {
198 case BOP_INC:
199 r = sm_ll_inc(&smm->ll, op->block, &ev);
200 break;
201
202 case BOP_DEC:
203 r = sm_ll_dec(&smm->ll, op->block, &ev);
204 break;
205 }
206
207 return r;
208}
209
210static void in(struct sm_metadata *smm)
211{
212 smm->recursion_count++;
213}
214
215static int apply_bops(struct sm_metadata *smm)
216{
217 int r = 0;
218
219 while (!brb_empty(&smm->uncommitted)) {
220 struct block_op bop;
221
222 r = brb_peek(&smm->uncommitted, &bop);
223 if (r) {
224 DMERR("bug in bop ring buffer");
225 break;
226 }
227
228 r = commit_bop(smm, &bop);
229 if (r)
230 break;
231
232 brb_pop(&smm->uncommitted);
233 }
234
235 return r;
236}
237
238static int out(struct sm_metadata *smm)
239{
240 int r = 0;
241
242 /*
243 * If we're not recursing then very bad things are happening.
244 */
245 if (!smm->recursion_count) {
246 DMERR("lost track of recursion depth");
247 return -ENOMEM;
248 }
249
250 if (smm->recursion_count == 1)
251 apply_bops(smm);
252
253 smm->recursion_count--;
254
255 return r;
256}
257
258/*
259 * When using the out() function above, we often want to combine an error
260 * code for the operation run in the recursive context with that from
261 * out().
262 */
263static int combine_errors(int r1, int r2)
264{
265 return r1 ? r1 : r2;
266}
267
268static int recursing(struct sm_metadata *smm)
269{
270 return smm->recursion_count;
271}
272
273static void sm_metadata_destroy(struct dm_space_map *sm)
274{
275 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
276
277 kfree(smm);
278}
279
280static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
281{
282 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
283
284 *count = smm->ll.nr_blocks;
285
286 return 0;
287}
288
289static int sm_metadata_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
290{
291 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
292
293 *count = smm->old_ll.nr_blocks - smm->old_ll.nr_allocated -
294 smm->allocated_this_transaction;
295
296 return 0;
297}
298
299static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b,
300 uint32_t *result)
301{
302 int r;
303 unsigned i;
304 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
305 unsigned adjustment = 0;
306
307 /*
308 * We may have some uncommitted adjustments to add. This list
309 * should always be really short.
310 */
311 for (i = smm->uncommitted.begin;
312 i != smm->uncommitted.end;
313 i = brb_next(&smm->uncommitted, i)) {
314 struct block_op *op = smm->uncommitted.bops + i;
315
316 if (op->block != b)
317 continue;
318
319 switch (op->type) {
320 case BOP_INC:
321 adjustment++;
322 break;
323
324 case BOP_DEC:
325 adjustment--;
326 break;
327 }
328 }
329
330 r = sm_ll_lookup(&smm->ll, b, result);
331 if (r)
332 return r;
333
334 *result += adjustment;
335
336 return 0;
337}
338
339static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm,
340 dm_block_t b, int *result)
341{
342 int r, adjustment = 0;
343 unsigned i;
344 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
345 uint32_t rc;
346
347 /*
348 * We may have some uncommitted adjustments to add. This list
349 * should always be really short.
350 */
351 for (i = smm->uncommitted.begin;
352 i != smm->uncommitted.end;
353 i = brb_next(&smm->uncommitted, i)) {
354
355 struct block_op *op = smm->uncommitted.bops + i;
356
357 if (op->block != b)
358 continue;
359
360 switch (op->type) {
361 case BOP_INC:
362 adjustment++;
363 break;
364
365 case BOP_DEC:
366 adjustment--;
367 break;
368 }
369 }
370
371 if (adjustment > 1) {
372 *result = 1;
373 return 0;
374 }
375
376 r = sm_ll_lookup_bitmap(&smm->ll, b, &rc);
377 if (r)
378 return r;
379
380 if (rc == 3)
381 /*
382 * We err on the side of caution, and always return true.
383 */
384 *result = 1;
385 else
386 *result = rc + adjustment > 1;
387
388 return 0;
389}
390
391static int sm_metadata_set_count(struct dm_space_map *sm, dm_block_t b,
392 uint32_t count)
393{
394 int r, r2;
395 enum allocation_event ev;
396 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
397
398 if (smm->recursion_count) {
399 DMERR("cannot recurse set_count()");
400 return -EINVAL;
401 }
402
403 in(smm);
404 r = sm_ll_insert(&smm->ll, b, count, &ev);
405 r2 = out(smm);
406
407 return combine_errors(r, r2);
408}
409
410static int sm_metadata_inc_block(struct dm_space_map *sm, dm_block_t b)
411{
412 int r, r2 = 0;
413 enum allocation_event ev;
414 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
415
416 if (recursing(smm))
417 r = add_bop(smm, BOP_INC, b);
418 else {
419 in(smm);
420 r = sm_ll_inc(&smm->ll, b, &ev);
421 r2 = out(smm);
422 }
423
424 return combine_errors(r, r2);
425}
426
427static int sm_metadata_dec_block(struct dm_space_map *sm, dm_block_t b)
428{
429 int r, r2 = 0;
430 enum allocation_event ev;
431 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
432
433 if (recursing(smm))
434 r = add_bop(smm, BOP_DEC, b);
435 else {
436 in(smm);
437 r = sm_ll_dec(&smm->ll, b, &ev);
438 r2 = out(smm);
439 }
440
441 return combine_errors(r, r2);
442}
443
444static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b)
445{
446 int r, r2 = 0;
447 enum allocation_event ev;
448 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
449
450 r = sm_ll_find_free_block(&smm->old_ll, smm->begin, smm->old_ll.nr_blocks, b);
451 if (r)
452 return r;
453
454 smm->begin = *b + 1;
455
456 if (recursing(smm))
457 r = add_bop(smm, BOP_INC, *b);
458 else {
459 in(smm);
460 r = sm_ll_inc(&smm->ll, *b, &ev);
461 r2 = out(smm);
462 }
463
464 if (!r)
465 smm->allocated_this_transaction++;
466
467 return combine_errors(r, r2);
468}
469
470static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b)
471{
472 dm_block_t count;
473 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
474
475 int r = sm_metadata_new_block_(sm, b);
476 if (r) {
477 DMERR_LIMIT("unable to allocate new metadata block");
478 return r;
479 }
480
481 r = sm_metadata_get_nr_free(sm, &count);
482 if (r) {
483 DMERR_LIMIT("couldn't get free block count");
484 return r;
485 }
486
487 check_threshold(&smm->threshold, count);
488
489 return r;
490}
491
492static int sm_metadata_commit(struct dm_space_map *sm)
493{
494 int r;
495 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
496
497 r = sm_ll_commit(&smm->ll);
498 if (r)
499 return r;
500
501 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll));
502 smm->begin = 0;
503 smm->allocated_this_transaction = 0;
504
505 return 0;
506}
507
508static int sm_metadata_register_threshold_callback(struct dm_space_map *sm,
509 dm_block_t threshold,
510 dm_sm_threshold_fn fn,
511 void *context)
512{
513 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
514
515 set_threshold(&smm->threshold, threshold, fn, context);
516
517 return 0;
518}
519
520static int sm_metadata_root_size(struct dm_space_map *sm, size_t *result)
521{
522 *result = sizeof(struct disk_sm_root);
523
524 return 0;
525}
526
527static int sm_metadata_copy_root(struct dm_space_map *sm, void *where_le, size_t max)
528{
529 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
530 struct disk_sm_root root_le;
531
532 root_le.nr_blocks = cpu_to_le64(smm->ll.nr_blocks);
533 root_le.nr_allocated = cpu_to_le64(smm->ll.nr_allocated);
534 root_le.bitmap_root = cpu_to_le64(smm->ll.bitmap_root);
535 root_le.ref_count_root = cpu_to_le64(smm->ll.ref_count_root);
536
537 if (max < sizeof(root_le))
538 return -ENOSPC;
539
540 memcpy(where_le, &root_le, sizeof(root_le));
541
542 return 0;
543}
544
545static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks);
546
547static struct dm_space_map ops = {
548 .destroy = sm_metadata_destroy,
549 .extend = sm_metadata_extend,
550 .get_nr_blocks = sm_metadata_get_nr_blocks,
551 .get_nr_free = sm_metadata_get_nr_free,
552 .get_count = sm_metadata_get_count,
553 .count_is_more_than_one = sm_metadata_count_is_more_than_one,
554 .set_count = sm_metadata_set_count,
555 .inc_block = sm_metadata_inc_block,
556 .dec_block = sm_metadata_dec_block,
557 .new_block = sm_metadata_new_block,
558 .commit = sm_metadata_commit,
559 .root_size = sm_metadata_root_size,
560 .copy_root = sm_metadata_copy_root,
561 .register_threshold_callback = sm_metadata_register_threshold_callback
562};
563
564/*----------------------------------------------------------------*/
565
566/*
567 * When a new space map is created that manages its own space. We use
568 * this tiny bootstrap allocator.
569 */
570static void sm_bootstrap_destroy(struct dm_space_map *sm)
571{
572}
573
574static int sm_bootstrap_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
575{
576 DMERR("bootstrap doesn't support extend");
577
578 return -EINVAL;
579}
580
581static int sm_bootstrap_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
582{
583 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
584
585 *count = smm->ll.nr_blocks;
586
587 return 0;
588}
589
590static int sm_bootstrap_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
591{
592 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
593
594 *count = smm->ll.nr_blocks - smm->begin;
595
596 return 0;
597}
598
599static int sm_bootstrap_get_count(struct dm_space_map *sm, dm_block_t b,
600 uint32_t *result)
601{
602 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
603
604 *result = (b < smm->begin) ? 1 : 0;
605
606 return 0;
607}
608
609static int sm_bootstrap_count_is_more_than_one(struct dm_space_map *sm,
610 dm_block_t b, int *result)
611{
612 *result = 0;
613
614 return 0;
615}
616
617static int sm_bootstrap_set_count(struct dm_space_map *sm, dm_block_t b,
618 uint32_t count)
619{
620 DMERR("bootstrap doesn't support set_count");
621
622 return -EINVAL;
623}
624
625static int sm_bootstrap_new_block(struct dm_space_map *sm, dm_block_t *b)
626{
627 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
628
629 /*
630 * We know the entire device is unused.
631 */
632 if (smm->begin == smm->ll.nr_blocks)
633 return -ENOSPC;
634
635 *b = smm->begin++;
636
637 return 0;
638}
639
640static int sm_bootstrap_inc_block(struct dm_space_map *sm, dm_block_t b)
641{
642 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
643
644 return add_bop(smm, BOP_INC, b);
645}
646
647static int sm_bootstrap_dec_block(struct dm_space_map *sm, dm_block_t b)
648{
649 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
650
651 return add_bop(smm, BOP_DEC, b);
652}
653
654static int sm_bootstrap_commit(struct dm_space_map *sm)
655{
656 return 0;
657}
658
659static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result)
660{
661 DMERR("bootstrap doesn't support root_size");
662
663 return -EINVAL;
664}
665
666static int sm_bootstrap_copy_root(struct dm_space_map *sm, void *where,
667 size_t max)
668{
669 DMERR("bootstrap doesn't support copy_root");
670
671 return -EINVAL;
672}
673
674static struct dm_space_map bootstrap_ops = {
675 .destroy = sm_bootstrap_destroy,
676 .extend = sm_bootstrap_extend,
677 .get_nr_blocks = sm_bootstrap_get_nr_blocks,
678 .get_nr_free = sm_bootstrap_get_nr_free,
679 .get_count = sm_bootstrap_get_count,
680 .count_is_more_than_one = sm_bootstrap_count_is_more_than_one,
681 .set_count = sm_bootstrap_set_count,
682 .inc_block = sm_bootstrap_inc_block,
683 .dec_block = sm_bootstrap_dec_block,
684 .new_block = sm_bootstrap_new_block,
685 .commit = sm_bootstrap_commit,
686 .root_size = sm_bootstrap_root_size,
687 .copy_root = sm_bootstrap_copy_root,
688 .register_threshold_callback = NULL
689};
690
691/*----------------------------------------------------------------*/
692
693static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
694{
695 int r, i;
696 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
697 dm_block_t old_len = smm->ll.nr_blocks;
698
699 /*
700 * Flick into a mode where all blocks get allocated in the new area.
701 */
702 smm->begin = old_len;
703 memcpy(sm, &bootstrap_ops, sizeof(*sm));
704
705 /*
706 * Extend.
707 */
708 r = sm_ll_extend(&smm->ll, extra_blocks);
709 if (r)
710 goto out;
711
712 /*
713 * We repeatedly increment then commit until the commit doesn't
714 * allocate any new blocks.
715 */
716 do {
717 for (i = old_len; !r && i < smm->begin; i++)
718 r = add_bop(smm, BOP_INC, i);
719
720 if (r)
721 goto out;
722
723 old_len = smm->begin;
724
725 r = apply_bops(smm);
726 if (r) {
727 DMERR("%s: apply_bops failed", __func__);
728 goto out;
729 }
730
731 r = sm_ll_commit(&smm->ll);
732 if (r)
733 goto out;
734
735 } while (old_len != smm->begin);
736
737out:
738 /*
739 * Switch back to normal behaviour.
740 */
741 memcpy(sm, &ops, sizeof(*sm));
742 return r;
743}
744
745/*----------------------------------------------------------------*/
746
747struct dm_space_map *dm_sm_metadata_init(void)
748{
749 struct sm_metadata *smm;
750
751 smm = kmalloc(sizeof(*smm), GFP_KERNEL);
752 if (!smm)
753 return ERR_PTR(-ENOMEM);
754
755 memcpy(&smm->sm, &ops, sizeof(smm->sm));
756
757 return &smm->sm;
758}
759
760int dm_sm_metadata_create(struct dm_space_map *sm,
761 struct dm_transaction_manager *tm,
762 dm_block_t nr_blocks,
763 dm_block_t superblock)
764{
765 int r;
766 dm_block_t i;
767 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
768
769 smm->begin = superblock + 1;
770 smm->recursion_count = 0;
771 smm->allocated_this_transaction = 0;
772 brb_init(&smm->uncommitted);
773 threshold_init(&smm->threshold);
774
775 memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm));
776
777 r = sm_ll_new_metadata(&smm->ll, tm);
778 if (r)
779 return r;
780
781 if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS)
782 nr_blocks = DM_SM_METADATA_MAX_BLOCKS;
783 r = sm_ll_extend(&smm->ll, nr_blocks);
784 if (r)
785 return r;
786
787 memcpy(&smm->sm, &ops, sizeof(smm->sm));
788
789 /*
790 * Now we need to update the newly created data structures with the
791 * allocated blocks that they were built from.
792 */
793 for (i = superblock; !r && i < smm->begin; i++)
794 r = add_bop(smm, BOP_INC, i);
795
796 if (r)
797 return r;
798
799 r = apply_bops(smm);
800 if (r) {
801 DMERR("%s: apply_bops failed", __func__);
802 return r;
803 }
804
805 return sm_metadata_commit(sm);
806}
807
808int dm_sm_metadata_open(struct dm_space_map *sm,
809 struct dm_transaction_manager *tm,
810 void *root_le, size_t len)
811{
812 int r;
813 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
814
815 r = sm_ll_open_metadata(&smm->ll, tm, root_le, len);
816 if (r)
817 return r;
818
819 smm->begin = 0;
820 smm->recursion_count = 0;
821 smm->allocated_this_transaction = 0;
822 brb_init(&smm->uncommitted);
823 threshold_init(&smm->threshold);
824
825 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll));
826 return 0;
827}