Loading...
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2011 Red Hat, Inc.
4 *
5 * This file is released under the GPL.
6 */
7
8#include "dm-space-map.h"
9#include "dm-space-map-common.h"
10#include "dm-space-map-metadata.h"
11
12#include <linux/list.h>
13#include <linux/slab.h>
14#include <linux/device-mapper.h>
15#include <linux/kernel.h>
16
17#define DM_MSG_PREFIX "space map metadata"
18
19/*----------------------------------------------------------------*/
20
21/*
22 * An edge triggered threshold.
23 */
24struct threshold {
25 bool threshold_set;
26 bool value_set;
27 dm_block_t threshold;
28 dm_block_t current_value;
29 dm_sm_threshold_fn fn;
30 void *context;
31};
32
33static void threshold_init(struct threshold *t)
34{
35 t->threshold_set = false;
36 t->value_set = false;
37}
38
39static void set_threshold(struct threshold *t, dm_block_t value,
40 dm_sm_threshold_fn fn, void *context)
41{
42 t->threshold_set = true;
43 t->threshold = value;
44 t->fn = fn;
45 t->context = context;
46}
47
48static bool below_threshold(struct threshold *t, dm_block_t value)
49{
50 return t->threshold_set && value <= t->threshold;
51}
52
53static bool threshold_already_triggered(struct threshold *t)
54{
55 return t->value_set && below_threshold(t, t->current_value);
56}
57
58static void check_threshold(struct threshold *t, dm_block_t value)
59{
60 if (below_threshold(t, value) &&
61 !threshold_already_triggered(t))
62 t->fn(t->context);
63
64 t->value_set = true;
65 t->current_value = value;
66}
67
68/*----------------------------------------------------------------*/
69
70/*
71 * Space map interface.
72 *
73 * The low level disk format is written using the standard btree and
74 * transaction manager. This means that performing disk operations may
75 * cause us to recurse into the space map in order to allocate new blocks.
76 * For this reason we have a pool of pre-allocated blocks large enough to
77 * service any metadata_ll_disk operation.
78 */
79
80/*
81 * FIXME: we should calculate this based on the size of the device.
82 * Only the metadata space map needs this functionality.
83 */
84#define MAX_RECURSIVE_ALLOCATIONS 1024
85
86enum block_op_type {
87 BOP_INC,
88 BOP_DEC
89};
90
91struct block_op {
92 enum block_op_type type;
93 dm_block_t b;
94 dm_block_t e;
95};
96
97struct bop_ring_buffer {
98 unsigned int begin;
99 unsigned int end;
100 struct block_op bops[MAX_RECURSIVE_ALLOCATIONS + 1];
101};
102
103static void brb_init(struct bop_ring_buffer *brb)
104{
105 brb->begin = 0;
106 brb->end = 0;
107}
108
109static bool brb_empty(struct bop_ring_buffer *brb)
110{
111 return brb->begin == brb->end;
112}
113
114static unsigned int brb_next(struct bop_ring_buffer *brb, unsigned int old)
115{
116 unsigned int r = old + 1;
117
118 return r >= ARRAY_SIZE(brb->bops) ? 0 : r;
119}
120
121static int brb_push(struct bop_ring_buffer *brb,
122 enum block_op_type type, dm_block_t b, dm_block_t e)
123{
124 struct block_op *bop;
125 unsigned int next = brb_next(brb, brb->end);
126
127 /*
128 * We don't allow the last bop to be filled, this way we can
129 * differentiate between full and empty.
130 */
131 if (next == brb->begin)
132 return -ENOMEM;
133
134 bop = brb->bops + brb->end;
135 bop->type = type;
136 bop->b = b;
137 bop->e = e;
138
139 brb->end = next;
140
141 return 0;
142}
143
144static int brb_peek(struct bop_ring_buffer *brb, struct block_op *result)
145{
146 struct block_op *bop;
147
148 if (brb_empty(brb))
149 return -ENODATA;
150
151 bop = brb->bops + brb->begin;
152 memcpy(result, bop, sizeof(*result));
153 return 0;
154}
155
156static int brb_pop(struct bop_ring_buffer *brb)
157{
158 if (brb_empty(brb))
159 return -ENODATA;
160
161 brb->begin = brb_next(brb, brb->begin);
162
163 return 0;
164}
165
166/*----------------------------------------------------------------*/
167
168struct sm_metadata {
169 struct dm_space_map sm;
170
171 struct ll_disk ll;
172 struct ll_disk old_ll;
173
174 dm_block_t begin;
175
176 unsigned int recursion_count;
177 unsigned int allocated_this_transaction;
178 struct bop_ring_buffer uncommitted;
179
180 struct threshold threshold;
181};
182
183static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b, dm_block_t e)
184{
185 int r = brb_push(&smm->uncommitted, type, b, e);
186
187 if (r) {
188 DMERR("too many recursive allocations");
189 return -ENOMEM;
190 }
191
192 return 0;
193}
194
195static int commit_bop(struct sm_metadata *smm, struct block_op *op)
196{
197 int r = 0;
198 int32_t nr_allocations;
199
200 switch (op->type) {
201 case BOP_INC:
202 r = sm_ll_inc(&smm->ll, op->b, op->e, &nr_allocations);
203 break;
204
205 case BOP_DEC:
206 r = sm_ll_dec(&smm->ll, op->b, op->e, &nr_allocations);
207 break;
208 }
209
210 return r;
211}
212
213static void in(struct sm_metadata *smm)
214{
215 smm->recursion_count++;
216}
217
218static int apply_bops(struct sm_metadata *smm)
219{
220 int r = 0;
221
222 while (!brb_empty(&smm->uncommitted)) {
223 struct block_op bop;
224
225 r = brb_peek(&smm->uncommitted, &bop);
226 if (r) {
227 DMERR("bug in bop ring buffer");
228 break;
229 }
230
231 r = commit_bop(smm, &bop);
232 if (r)
233 break;
234
235 brb_pop(&smm->uncommitted);
236 }
237
238 return r;
239}
240
241static int out(struct sm_metadata *smm)
242{
243 int r = 0;
244
245 /*
246 * If we're not recursing then very bad things are happening.
247 */
248 if (!smm->recursion_count) {
249 DMERR("lost track of recursion depth");
250 return -ENOMEM;
251 }
252
253 if (smm->recursion_count == 1)
254 r = apply_bops(smm);
255
256 smm->recursion_count--;
257
258 return r;
259}
260
261/*
262 * When using the out() function above, we often want to combine an error
263 * code for the operation run in the recursive context with that from
264 * out().
265 */
266static int combine_errors(int r1, int r2)
267{
268 return r1 ? r1 : r2;
269}
270
271static int recursing(struct sm_metadata *smm)
272{
273 return smm->recursion_count;
274}
275
276static void sm_metadata_destroy(struct dm_space_map *sm)
277{
278 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
279
280 kfree(smm);
281}
282
283static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
284{
285 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
286
287 *count = smm->ll.nr_blocks;
288
289 return 0;
290}
291
292static int sm_metadata_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
293{
294 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
295
296 *count = smm->old_ll.nr_blocks - smm->old_ll.nr_allocated -
297 smm->allocated_this_transaction;
298
299 return 0;
300}
301
302static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b,
303 uint32_t *result)
304{
305 int r;
306 unsigned int i;
307 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
308 unsigned int adjustment = 0;
309
310 /*
311 * We may have some uncommitted adjustments to add. This list
312 * should always be really short.
313 */
314 for (i = smm->uncommitted.begin;
315 i != smm->uncommitted.end;
316 i = brb_next(&smm->uncommitted, i)) {
317 struct block_op *op = smm->uncommitted.bops + i;
318
319 if (b < op->b || b >= op->e)
320 continue;
321
322 switch (op->type) {
323 case BOP_INC:
324 adjustment++;
325 break;
326
327 case BOP_DEC:
328 adjustment--;
329 break;
330 }
331 }
332
333 r = sm_ll_lookup(&smm->ll, b, result);
334 if (r)
335 return r;
336
337 *result += adjustment;
338
339 return 0;
340}
341
342static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm,
343 dm_block_t b, int *result)
344{
345 int r, adjustment = 0;
346 unsigned int i;
347 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
348 uint32_t rc;
349
350 /*
351 * We may have some uncommitted adjustments to add. This list
352 * should always be really short.
353 */
354 for (i = smm->uncommitted.begin;
355 i != smm->uncommitted.end;
356 i = brb_next(&smm->uncommitted, i)) {
357
358 struct block_op *op = smm->uncommitted.bops + i;
359
360 if (b < op->b || b >= op->e)
361 continue;
362
363 switch (op->type) {
364 case BOP_INC:
365 adjustment++;
366 break;
367
368 case BOP_DEC:
369 adjustment--;
370 break;
371 }
372 }
373
374 if (adjustment > 1) {
375 *result = 1;
376 return 0;
377 }
378
379 r = sm_ll_lookup_bitmap(&smm->ll, b, &rc);
380 if (r)
381 return r;
382
383 if (rc == 3)
384 /*
385 * We err on the side of caution, and always return true.
386 */
387 *result = 1;
388 else
389 *result = rc + adjustment > 1;
390
391 return 0;
392}
393
394static int sm_metadata_set_count(struct dm_space_map *sm, dm_block_t b,
395 uint32_t count)
396{
397 int r, r2;
398 int32_t nr_allocations;
399 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
400
401 if (smm->recursion_count) {
402 DMERR("cannot recurse set_count()");
403 return -EINVAL;
404 }
405
406 in(smm);
407 r = sm_ll_insert(&smm->ll, b, count, &nr_allocations);
408 r2 = out(smm);
409
410 return combine_errors(r, r2);
411}
412
413static int sm_metadata_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
414{
415 int r, r2 = 0;
416 int32_t nr_allocations;
417 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
418
419 if (recursing(smm)) {
420 r = add_bop(smm, BOP_INC, b, e);
421 if (r)
422 return r;
423 } else {
424 in(smm);
425 r = sm_ll_inc(&smm->ll, b, e, &nr_allocations);
426 r2 = out(smm);
427 }
428
429 return combine_errors(r, r2);
430}
431
432static int sm_metadata_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
433{
434 int r, r2 = 0;
435 int32_t nr_allocations;
436 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
437
438 if (recursing(smm))
439 r = add_bop(smm, BOP_DEC, b, e);
440 else {
441 in(smm);
442 r = sm_ll_dec(&smm->ll, b, e, &nr_allocations);
443 r2 = out(smm);
444 }
445
446 return combine_errors(r, r2);
447}
448
449static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b)
450{
451 int r, r2 = 0;
452 int32_t nr_allocations;
453 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
454
455 /*
456 * Any block we allocate has to be free in both the old and current ll.
457 */
458 r = sm_ll_find_common_free_block(&smm->old_ll, &smm->ll, smm->begin, smm->ll.nr_blocks, b);
459 if (r == -ENOSPC) {
460 /*
461 * There's no free block between smm->begin and the end of the metadata device.
462 * We search before smm->begin in case something has been freed.
463 */
464 r = sm_ll_find_common_free_block(&smm->old_ll, &smm->ll, 0, smm->begin, b);
465 }
466
467 if (r)
468 return r;
469
470 smm->begin = *b + 1;
471
472 if (recursing(smm))
473 r = add_bop(smm, BOP_INC, *b, *b + 1);
474 else {
475 in(smm);
476 r = sm_ll_inc(&smm->ll, *b, *b + 1, &nr_allocations);
477 r2 = out(smm);
478 }
479
480 if (!r)
481 smm->allocated_this_transaction++;
482
483 return combine_errors(r, r2);
484}
485
486static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b)
487{
488 dm_block_t count;
489 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
490
491 int r = sm_metadata_new_block_(sm, b);
492
493 if (r) {
494 DMERR_LIMIT("unable to allocate new metadata block");
495 return r;
496 }
497
498 r = sm_metadata_get_nr_free(sm, &count);
499 if (r) {
500 DMERR_LIMIT("couldn't get free block count");
501 return r;
502 }
503
504 check_threshold(&smm->threshold, count);
505
506 return r;
507}
508
509static int sm_metadata_commit(struct dm_space_map *sm)
510{
511 int r;
512 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
513
514 r = sm_ll_commit(&smm->ll);
515 if (r)
516 return r;
517
518 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll));
519 smm->allocated_this_transaction = 0;
520
521 return 0;
522}
523
524static int sm_metadata_register_threshold_callback(struct dm_space_map *sm,
525 dm_block_t threshold,
526 dm_sm_threshold_fn fn,
527 void *context)
528{
529 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
530
531 set_threshold(&smm->threshold, threshold, fn, context);
532
533 return 0;
534}
535
536static int sm_metadata_root_size(struct dm_space_map *sm, size_t *result)
537{
538 *result = sizeof(struct disk_sm_root);
539
540 return 0;
541}
542
543static int sm_metadata_copy_root(struct dm_space_map *sm, void *where_le, size_t max)
544{
545 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
546 struct disk_sm_root root_le;
547
548 root_le.nr_blocks = cpu_to_le64(smm->ll.nr_blocks);
549 root_le.nr_allocated = cpu_to_le64(smm->ll.nr_allocated);
550 root_le.bitmap_root = cpu_to_le64(smm->ll.bitmap_root);
551 root_le.ref_count_root = cpu_to_le64(smm->ll.ref_count_root);
552
553 if (max < sizeof(root_le))
554 return -ENOSPC;
555
556 memcpy(where_le, &root_le, sizeof(root_le));
557
558 return 0;
559}
560
561static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks);
562
563static const struct dm_space_map ops = {
564 .destroy = sm_metadata_destroy,
565 .extend = sm_metadata_extend,
566 .get_nr_blocks = sm_metadata_get_nr_blocks,
567 .get_nr_free = sm_metadata_get_nr_free,
568 .get_count = sm_metadata_get_count,
569 .count_is_more_than_one = sm_metadata_count_is_more_than_one,
570 .set_count = sm_metadata_set_count,
571 .inc_blocks = sm_metadata_inc_blocks,
572 .dec_blocks = sm_metadata_dec_blocks,
573 .new_block = sm_metadata_new_block,
574 .commit = sm_metadata_commit,
575 .root_size = sm_metadata_root_size,
576 .copy_root = sm_metadata_copy_root,
577 .register_threshold_callback = sm_metadata_register_threshold_callback
578};
579
580/*----------------------------------------------------------------*/
581
582/*
583 * When a new space map is created that manages its own space. We use
584 * this tiny bootstrap allocator.
585 */
586static void sm_bootstrap_destroy(struct dm_space_map *sm)
587{
588}
589
590static int sm_bootstrap_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
591{
592 DMERR("bootstrap doesn't support extend");
593
594 return -EINVAL;
595}
596
597static int sm_bootstrap_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
598{
599 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
600
601 *count = smm->ll.nr_blocks;
602
603 return 0;
604}
605
606static int sm_bootstrap_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
607{
608 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
609
610 *count = smm->ll.nr_blocks - smm->begin;
611
612 return 0;
613}
614
615static int sm_bootstrap_get_count(struct dm_space_map *sm, dm_block_t b,
616 uint32_t *result)
617{
618 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
619
620 *result = (b < smm->begin) ? 1 : 0;
621
622 return 0;
623}
624
625static int sm_bootstrap_count_is_more_than_one(struct dm_space_map *sm,
626 dm_block_t b, int *result)
627{
628 *result = 0;
629
630 return 0;
631}
632
633static int sm_bootstrap_set_count(struct dm_space_map *sm, dm_block_t b,
634 uint32_t count)
635{
636 DMERR("bootstrap doesn't support set_count");
637
638 return -EINVAL;
639}
640
641static int sm_bootstrap_new_block(struct dm_space_map *sm, dm_block_t *b)
642{
643 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
644
645 /*
646 * We know the entire device is unused.
647 */
648 if (smm->begin == smm->ll.nr_blocks)
649 return -ENOSPC;
650
651 *b = smm->begin++;
652
653 return 0;
654}
655
656static int sm_bootstrap_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
657{
658 int r;
659 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
660
661 r = add_bop(smm, BOP_INC, b, e);
662 if (r)
663 return r;
664
665 return 0;
666}
667
668static int sm_bootstrap_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
669{
670 int r;
671 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
672
673 r = add_bop(smm, BOP_DEC, b, e);
674 if (r)
675 return r;
676
677 return 0;
678}
679
680static int sm_bootstrap_commit(struct dm_space_map *sm)
681{
682 return 0;
683}
684
685static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result)
686{
687 DMERR("bootstrap doesn't support root_size");
688
689 return -EINVAL;
690}
691
692static int sm_bootstrap_copy_root(struct dm_space_map *sm, void *where,
693 size_t max)
694{
695 DMERR("bootstrap doesn't support copy_root");
696
697 return -EINVAL;
698}
699
700static const struct dm_space_map bootstrap_ops = {
701 .destroy = sm_bootstrap_destroy,
702 .extend = sm_bootstrap_extend,
703 .get_nr_blocks = sm_bootstrap_get_nr_blocks,
704 .get_nr_free = sm_bootstrap_get_nr_free,
705 .get_count = sm_bootstrap_get_count,
706 .count_is_more_than_one = sm_bootstrap_count_is_more_than_one,
707 .set_count = sm_bootstrap_set_count,
708 .inc_blocks = sm_bootstrap_inc_blocks,
709 .dec_blocks = sm_bootstrap_dec_blocks,
710 .new_block = sm_bootstrap_new_block,
711 .commit = sm_bootstrap_commit,
712 .root_size = sm_bootstrap_root_size,
713 .copy_root = sm_bootstrap_copy_root,
714 .register_threshold_callback = NULL
715};
716
717/*----------------------------------------------------------------*/
718
719static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
720{
721 int r;
722 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
723 dm_block_t old_len = smm->ll.nr_blocks;
724
725 /*
726 * Flick into a mode where all blocks get allocated in the new area.
727 */
728 smm->begin = old_len;
729 memcpy(sm, &bootstrap_ops, sizeof(*sm));
730
731 /*
732 * Extend.
733 */
734 r = sm_ll_extend(&smm->ll, extra_blocks);
735 if (r)
736 goto out;
737
738 /*
739 * We repeatedly increment then commit until the commit doesn't
740 * allocate any new blocks.
741 */
742 do {
743 r = add_bop(smm, BOP_INC, old_len, smm->begin);
744 if (r)
745 goto out;
746
747 old_len = smm->begin;
748
749 r = apply_bops(smm);
750 if (r) {
751 DMERR("%s: apply_bops failed", __func__);
752 goto out;
753 }
754
755 r = sm_ll_commit(&smm->ll);
756 if (r)
757 goto out;
758
759 } while (old_len != smm->begin);
760
761out:
762 /*
763 * Switch back to normal behaviour.
764 */
765 memcpy(sm, &ops, sizeof(*sm));
766 return r;
767}
768
769/*----------------------------------------------------------------*/
770
771struct dm_space_map *dm_sm_metadata_init(void)
772{
773 struct sm_metadata *smm;
774
775 smm = kmalloc(sizeof(*smm), GFP_KERNEL);
776 if (!smm)
777 return ERR_PTR(-ENOMEM);
778
779 memcpy(&smm->sm, &ops, sizeof(smm->sm));
780
781 return &smm->sm;
782}
783
784int dm_sm_metadata_create(struct dm_space_map *sm,
785 struct dm_transaction_manager *tm,
786 dm_block_t nr_blocks,
787 dm_block_t superblock)
788{
789 int r;
790 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
791
792 smm->begin = superblock + 1;
793 smm->recursion_count = 0;
794 smm->allocated_this_transaction = 0;
795 brb_init(&smm->uncommitted);
796 threshold_init(&smm->threshold);
797
798 memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm));
799
800 r = sm_ll_new_metadata(&smm->ll, tm);
801 if (!r) {
802 if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS)
803 nr_blocks = DM_SM_METADATA_MAX_BLOCKS;
804 r = sm_ll_extend(&smm->ll, nr_blocks);
805 }
806 memcpy(&smm->sm, &ops, sizeof(smm->sm));
807 if (r)
808 return r;
809
810 /*
811 * Now we need to update the newly created data structures with the
812 * allocated blocks that they were built from.
813 */
814 r = add_bop(smm, BOP_INC, superblock, smm->begin);
815 if (r)
816 return r;
817
818 r = apply_bops(smm);
819 if (r) {
820 DMERR("%s: apply_bops failed", __func__);
821 return r;
822 }
823
824 return sm_metadata_commit(sm);
825}
826
827int dm_sm_metadata_open(struct dm_space_map *sm,
828 struct dm_transaction_manager *tm,
829 void *root_le, size_t len)
830{
831 int r;
832 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
833
834 r = sm_ll_open_metadata(&smm->ll, tm, root_le, len);
835 if (r)
836 return r;
837
838 smm->begin = 0;
839 smm->recursion_count = 0;
840 smm->allocated_this_transaction = 0;
841 brb_init(&smm->uncommitted);
842 threshold_init(&smm->threshold);
843
844 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll));
845 return 0;
846}
1/*
2 * Copyright (C) 2011 Red Hat, Inc.
3 *
4 * This file is released under the GPL.
5 */
6
7#include "dm-space-map.h"
8#include "dm-space-map-common.h"
9#include "dm-space-map-metadata.h"
10
11#include <linux/list.h>
12#include <linux/slab.h>
13#include <linux/device-mapper.h>
14#include <linux/kernel.h>
15
16#define DM_MSG_PREFIX "space map metadata"
17
18/*----------------------------------------------------------------*/
19
20/*
21 * An edge triggered threshold.
22 */
23struct threshold {
24 bool threshold_set;
25 bool value_set;
26 dm_block_t threshold;
27 dm_block_t current_value;
28 dm_sm_threshold_fn fn;
29 void *context;
30};
31
32static void threshold_init(struct threshold *t)
33{
34 t->threshold_set = false;
35 t->value_set = false;
36}
37
38static void set_threshold(struct threshold *t, dm_block_t value,
39 dm_sm_threshold_fn fn, void *context)
40{
41 t->threshold_set = true;
42 t->threshold = value;
43 t->fn = fn;
44 t->context = context;
45}
46
47static bool below_threshold(struct threshold *t, dm_block_t value)
48{
49 return t->threshold_set && value <= t->threshold;
50}
51
52static bool threshold_already_triggered(struct threshold *t)
53{
54 return t->value_set && below_threshold(t, t->current_value);
55}
56
57static void check_threshold(struct threshold *t, dm_block_t value)
58{
59 if (below_threshold(t, value) &&
60 !threshold_already_triggered(t))
61 t->fn(t->context);
62
63 t->value_set = true;
64 t->current_value = value;
65}
66
67/*----------------------------------------------------------------*/
68
69/*
70 * Space map interface.
71 *
72 * The low level disk format is written using the standard btree and
73 * transaction manager. This means that performing disk operations may
74 * cause us to recurse into the space map in order to allocate new blocks.
75 * For this reason we have a pool of pre-allocated blocks large enough to
76 * service any metadata_ll_disk operation.
77 */
78
79/*
80 * FIXME: we should calculate this based on the size of the device.
81 * Only the metadata space map needs this functionality.
82 */
83#define MAX_RECURSIVE_ALLOCATIONS 1024
84
85enum block_op_type {
86 BOP_INC,
87 BOP_DEC
88};
89
90struct block_op {
91 enum block_op_type type;
92 dm_block_t b;
93 dm_block_t e;
94};
95
96struct bop_ring_buffer {
97 unsigned begin;
98 unsigned end;
99 struct block_op bops[MAX_RECURSIVE_ALLOCATIONS + 1];
100};
101
102static void brb_init(struct bop_ring_buffer *brb)
103{
104 brb->begin = 0;
105 brb->end = 0;
106}
107
108static bool brb_empty(struct bop_ring_buffer *brb)
109{
110 return brb->begin == brb->end;
111}
112
113static unsigned brb_next(struct bop_ring_buffer *brb, unsigned old)
114{
115 unsigned r = old + 1;
116 return r >= ARRAY_SIZE(brb->bops) ? 0 : r;
117}
118
119static int brb_push(struct bop_ring_buffer *brb,
120 enum block_op_type type, dm_block_t b, dm_block_t e)
121{
122 struct block_op *bop;
123 unsigned next = brb_next(brb, brb->end);
124
125 /*
126 * We don't allow the last bop to be filled, this way we can
127 * differentiate between full and empty.
128 */
129 if (next == brb->begin)
130 return -ENOMEM;
131
132 bop = brb->bops + brb->end;
133 bop->type = type;
134 bop->b = b;
135 bop->e = e;
136
137 brb->end = next;
138
139 return 0;
140}
141
142static int brb_peek(struct bop_ring_buffer *brb, struct block_op *result)
143{
144 struct block_op *bop;
145
146 if (brb_empty(brb))
147 return -ENODATA;
148
149 bop = brb->bops + brb->begin;
150 memcpy(result, bop, sizeof(*result));
151 return 0;
152}
153
154static int brb_pop(struct bop_ring_buffer *brb)
155{
156 if (brb_empty(brb))
157 return -ENODATA;
158
159 brb->begin = brb_next(brb, brb->begin);
160
161 return 0;
162}
163
164/*----------------------------------------------------------------*/
165
166struct sm_metadata {
167 struct dm_space_map sm;
168
169 struct ll_disk ll;
170 struct ll_disk old_ll;
171
172 dm_block_t begin;
173
174 unsigned recursion_count;
175 unsigned allocated_this_transaction;
176 struct bop_ring_buffer uncommitted;
177
178 struct threshold threshold;
179};
180
181static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b, dm_block_t e)
182{
183 int r = brb_push(&smm->uncommitted, type, b, e);
184 if (r) {
185 DMERR("too many recursive allocations");
186 return -ENOMEM;
187 }
188
189 return 0;
190}
191
192static int commit_bop(struct sm_metadata *smm, struct block_op *op)
193{
194 int r = 0;
195 int32_t nr_allocations;
196
197 switch (op->type) {
198 case BOP_INC:
199 r = sm_ll_inc(&smm->ll, op->b, op->e, &nr_allocations);
200 break;
201
202 case BOP_DEC:
203 r = sm_ll_dec(&smm->ll, op->b, op->e, &nr_allocations);
204 break;
205 }
206
207 return r;
208}
209
210static void in(struct sm_metadata *smm)
211{
212 smm->recursion_count++;
213}
214
215static int apply_bops(struct sm_metadata *smm)
216{
217 int r = 0;
218
219 while (!brb_empty(&smm->uncommitted)) {
220 struct block_op bop;
221
222 r = brb_peek(&smm->uncommitted, &bop);
223 if (r) {
224 DMERR("bug in bop ring buffer");
225 break;
226 }
227
228 r = commit_bop(smm, &bop);
229 if (r)
230 break;
231
232 brb_pop(&smm->uncommitted);
233 }
234
235 return r;
236}
237
238static int out(struct sm_metadata *smm)
239{
240 int r = 0;
241
242 /*
243 * If we're not recursing then very bad things are happening.
244 */
245 if (!smm->recursion_count) {
246 DMERR("lost track of recursion depth");
247 return -ENOMEM;
248 }
249
250 if (smm->recursion_count == 1)
251 r = apply_bops(smm);
252
253 smm->recursion_count--;
254
255 return r;
256}
257
258/*
259 * When using the out() function above, we often want to combine an error
260 * code for the operation run in the recursive context with that from
261 * out().
262 */
263static int combine_errors(int r1, int r2)
264{
265 return r1 ? r1 : r2;
266}
267
268static int recursing(struct sm_metadata *smm)
269{
270 return smm->recursion_count;
271}
272
273static void sm_metadata_destroy(struct dm_space_map *sm)
274{
275 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
276
277 kfree(smm);
278}
279
280static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
281{
282 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
283
284 *count = smm->ll.nr_blocks;
285
286 return 0;
287}
288
289static int sm_metadata_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
290{
291 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
292
293 *count = smm->old_ll.nr_blocks - smm->old_ll.nr_allocated -
294 smm->allocated_this_transaction;
295
296 return 0;
297}
298
299static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b,
300 uint32_t *result)
301{
302 int r;
303 unsigned i;
304 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
305 unsigned adjustment = 0;
306
307 /*
308 * We may have some uncommitted adjustments to add. This list
309 * should always be really short.
310 */
311 for (i = smm->uncommitted.begin;
312 i != smm->uncommitted.end;
313 i = brb_next(&smm->uncommitted, i)) {
314 struct block_op *op = smm->uncommitted.bops + i;
315
316 if (b < op->b || b >= op->e)
317 continue;
318
319 switch (op->type) {
320 case BOP_INC:
321 adjustment++;
322 break;
323
324 case BOP_DEC:
325 adjustment--;
326 break;
327 }
328 }
329
330 r = sm_ll_lookup(&smm->ll, b, result);
331 if (r)
332 return r;
333
334 *result += adjustment;
335
336 return 0;
337}
338
339static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm,
340 dm_block_t b, int *result)
341{
342 int r, adjustment = 0;
343 unsigned i;
344 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
345 uint32_t rc;
346
347 /*
348 * We may have some uncommitted adjustments to add. This list
349 * should always be really short.
350 */
351 for (i = smm->uncommitted.begin;
352 i != smm->uncommitted.end;
353 i = brb_next(&smm->uncommitted, i)) {
354
355 struct block_op *op = smm->uncommitted.bops + i;
356
357 if (b < op->b || b >= op->e)
358 continue;
359
360 switch (op->type) {
361 case BOP_INC:
362 adjustment++;
363 break;
364
365 case BOP_DEC:
366 adjustment--;
367 break;
368 }
369 }
370
371 if (adjustment > 1) {
372 *result = 1;
373 return 0;
374 }
375
376 r = sm_ll_lookup_bitmap(&smm->ll, b, &rc);
377 if (r)
378 return r;
379
380 if (rc == 3)
381 /*
382 * We err on the side of caution, and always return true.
383 */
384 *result = 1;
385 else
386 *result = rc + adjustment > 1;
387
388 return 0;
389}
390
391static int sm_metadata_set_count(struct dm_space_map *sm, dm_block_t b,
392 uint32_t count)
393{
394 int r, r2;
395 int32_t nr_allocations;
396 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
397
398 if (smm->recursion_count) {
399 DMERR("cannot recurse set_count()");
400 return -EINVAL;
401 }
402
403 in(smm);
404 r = sm_ll_insert(&smm->ll, b, count, &nr_allocations);
405 r2 = out(smm);
406
407 return combine_errors(r, r2);
408}
409
410static int sm_metadata_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
411{
412 int r, r2 = 0;
413 int32_t nr_allocations;
414 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
415
416 if (recursing(smm)) {
417 r = add_bop(smm, BOP_INC, b, e);
418 if (r)
419 return r;
420 } else {
421 in(smm);
422 r = sm_ll_inc(&smm->ll, b, e, &nr_allocations);
423 r2 = out(smm);
424 }
425
426 return combine_errors(r, r2);
427}
428
429static int sm_metadata_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
430{
431 int r, r2 = 0;
432 int32_t nr_allocations;
433 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
434
435 if (recursing(smm))
436 r = add_bop(smm, BOP_DEC, b, e);
437 else {
438 in(smm);
439 r = sm_ll_dec(&smm->ll, b, e, &nr_allocations);
440 r2 = out(smm);
441 }
442
443 return combine_errors(r, r2);
444}
445
446static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b)
447{
448 int r, r2 = 0;
449 int32_t nr_allocations;
450 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
451
452 /*
453 * Any block we allocate has to be free in both the old and current ll.
454 */
455 r = sm_ll_find_common_free_block(&smm->old_ll, &smm->ll, smm->begin, smm->ll.nr_blocks, b);
456 if (r == -ENOSPC) {
457 /*
458 * There's no free block between smm->begin and the end of the metadata device.
459 * We search before smm->begin in case something has been freed.
460 */
461 r = sm_ll_find_common_free_block(&smm->old_ll, &smm->ll, 0, smm->begin, b);
462 }
463
464 if (r)
465 return r;
466
467 smm->begin = *b + 1;
468
469 if (recursing(smm))
470 r = add_bop(smm, BOP_INC, *b, *b + 1);
471 else {
472 in(smm);
473 r = sm_ll_inc(&smm->ll, *b, *b + 1, &nr_allocations);
474 r2 = out(smm);
475 }
476
477 if (!r)
478 smm->allocated_this_transaction++;
479
480 return combine_errors(r, r2);
481}
482
483static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b)
484{
485 dm_block_t count;
486 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
487
488 int r = sm_metadata_new_block_(sm, b);
489 if (r) {
490 DMERR_LIMIT("unable to allocate new metadata block");
491 return r;
492 }
493
494 r = sm_metadata_get_nr_free(sm, &count);
495 if (r) {
496 DMERR_LIMIT("couldn't get free block count");
497 return r;
498 }
499
500 check_threshold(&smm->threshold, count);
501
502 return r;
503}
504
505static int sm_metadata_commit(struct dm_space_map *sm)
506{
507 int r;
508 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
509
510 r = sm_ll_commit(&smm->ll);
511 if (r)
512 return r;
513
514 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll));
515 smm->allocated_this_transaction = 0;
516
517 return 0;
518}
519
520static int sm_metadata_register_threshold_callback(struct dm_space_map *sm,
521 dm_block_t threshold,
522 dm_sm_threshold_fn fn,
523 void *context)
524{
525 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
526
527 set_threshold(&smm->threshold, threshold, fn, context);
528
529 return 0;
530}
531
532static int sm_metadata_root_size(struct dm_space_map *sm, size_t *result)
533{
534 *result = sizeof(struct disk_sm_root);
535
536 return 0;
537}
538
539static int sm_metadata_copy_root(struct dm_space_map *sm, void *where_le, size_t max)
540{
541 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
542 struct disk_sm_root root_le;
543
544 root_le.nr_blocks = cpu_to_le64(smm->ll.nr_blocks);
545 root_le.nr_allocated = cpu_to_le64(smm->ll.nr_allocated);
546 root_le.bitmap_root = cpu_to_le64(smm->ll.bitmap_root);
547 root_le.ref_count_root = cpu_to_le64(smm->ll.ref_count_root);
548
549 if (max < sizeof(root_le))
550 return -ENOSPC;
551
552 memcpy(where_le, &root_le, sizeof(root_le));
553
554 return 0;
555}
556
557static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks);
558
559static const struct dm_space_map ops = {
560 .destroy = sm_metadata_destroy,
561 .extend = sm_metadata_extend,
562 .get_nr_blocks = sm_metadata_get_nr_blocks,
563 .get_nr_free = sm_metadata_get_nr_free,
564 .get_count = sm_metadata_get_count,
565 .count_is_more_than_one = sm_metadata_count_is_more_than_one,
566 .set_count = sm_metadata_set_count,
567 .inc_blocks = sm_metadata_inc_blocks,
568 .dec_blocks = sm_metadata_dec_blocks,
569 .new_block = sm_metadata_new_block,
570 .commit = sm_metadata_commit,
571 .root_size = sm_metadata_root_size,
572 .copy_root = sm_metadata_copy_root,
573 .register_threshold_callback = sm_metadata_register_threshold_callback
574};
575
576/*----------------------------------------------------------------*/
577
578/*
579 * When a new space map is created that manages its own space. We use
580 * this tiny bootstrap allocator.
581 */
582static void sm_bootstrap_destroy(struct dm_space_map *sm)
583{
584}
585
586static int sm_bootstrap_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
587{
588 DMERR("bootstrap doesn't support extend");
589
590 return -EINVAL;
591}
592
593static int sm_bootstrap_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
594{
595 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
596
597 *count = smm->ll.nr_blocks;
598
599 return 0;
600}
601
602static int sm_bootstrap_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
603{
604 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
605
606 *count = smm->ll.nr_blocks - smm->begin;
607
608 return 0;
609}
610
611static int sm_bootstrap_get_count(struct dm_space_map *sm, dm_block_t b,
612 uint32_t *result)
613{
614 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
615
616 *result = (b < smm->begin) ? 1 : 0;
617
618 return 0;
619}
620
621static int sm_bootstrap_count_is_more_than_one(struct dm_space_map *sm,
622 dm_block_t b, int *result)
623{
624 *result = 0;
625
626 return 0;
627}
628
629static int sm_bootstrap_set_count(struct dm_space_map *sm, dm_block_t b,
630 uint32_t count)
631{
632 DMERR("bootstrap doesn't support set_count");
633
634 return -EINVAL;
635}
636
637static int sm_bootstrap_new_block(struct dm_space_map *sm, dm_block_t *b)
638{
639 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
640
641 /*
642 * We know the entire device is unused.
643 */
644 if (smm->begin == smm->ll.nr_blocks)
645 return -ENOSPC;
646
647 *b = smm->begin++;
648
649 return 0;
650}
651
652static int sm_bootstrap_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
653{
654 int r;
655 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
656
657 r = add_bop(smm, BOP_INC, b, e);
658 if (r)
659 return r;
660
661 return 0;
662}
663
664static int sm_bootstrap_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
665{
666 int r;
667 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
668
669 r = add_bop(smm, BOP_DEC, b, e);
670 if (r)
671 return r;
672
673 return 0;
674}
675
676static int sm_bootstrap_commit(struct dm_space_map *sm)
677{
678 return 0;
679}
680
681static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result)
682{
683 DMERR("bootstrap doesn't support root_size");
684
685 return -EINVAL;
686}
687
688static int sm_bootstrap_copy_root(struct dm_space_map *sm, void *where,
689 size_t max)
690{
691 DMERR("bootstrap doesn't support copy_root");
692
693 return -EINVAL;
694}
695
696static const struct dm_space_map bootstrap_ops = {
697 .destroy = sm_bootstrap_destroy,
698 .extend = sm_bootstrap_extend,
699 .get_nr_blocks = sm_bootstrap_get_nr_blocks,
700 .get_nr_free = sm_bootstrap_get_nr_free,
701 .get_count = sm_bootstrap_get_count,
702 .count_is_more_than_one = sm_bootstrap_count_is_more_than_one,
703 .set_count = sm_bootstrap_set_count,
704 .inc_blocks = sm_bootstrap_inc_blocks,
705 .dec_blocks = sm_bootstrap_dec_blocks,
706 .new_block = sm_bootstrap_new_block,
707 .commit = sm_bootstrap_commit,
708 .root_size = sm_bootstrap_root_size,
709 .copy_root = sm_bootstrap_copy_root,
710 .register_threshold_callback = NULL
711};
712
713/*----------------------------------------------------------------*/
714
715static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
716{
717 int r;
718 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
719 dm_block_t old_len = smm->ll.nr_blocks;
720
721 /*
722 * Flick into a mode where all blocks get allocated in the new area.
723 */
724 smm->begin = old_len;
725 memcpy(sm, &bootstrap_ops, sizeof(*sm));
726
727 /*
728 * Extend.
729 */
730 r = sm_ll_extend(&smm->ll, extra_blocks);
731 if (r)
732 goto out;
733
734 /*
735 * We repeatedly increment then commit until the commit doesn't
736 * allocate any new blocks.
737 */
738 do {
739 r = add_bop(smm, BOP_INC, old_len, smm->begin);
740 if (r)
741 goto out;
742
743 old_len = smm->begin;
744
745 r = apply_bops(smm);
746 if (r) {
747 DMERR("%s: apply_bops failed", __func__);
748 goto out;
749 }
750
751 r = sm_ll_commit(&smm->ll);
752 if (r)
753 goto out;
754
755 } while (old_len != smm->begin);
756
757out:
758 /*
759 * Switch back to normal behaviour.
760 */
761 memcpy(sm, &ops, sizeof(*sm));
762 return r;
763}
764
765/*----------------------------------------------------------------*/
766
767struct dm_space_map *dm_sm_metadata_init(void)
768{
769 struct sm_metadata *smm;
770
771 smm = kmalloc(sizeof(*smm), GFP_KERNEL);
772 if (!smm)
773 return ERR_PTR(-ENOMEM);
774
775 memcpy(&smm->sm, &ops, sizeof(smm->sm));
776
777 return &smm->sm;
778}
779
780int dm_sm_metadata_create(struct dm_space_map *sm,
781 struct dm_transaction_manager *tm,
782 dm_block_t nr_blocks,
783 dm_block_t superblock)
784{
785 int r;
786 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
787
788 smm->begin = superblock + 1;
789 smm->recursion_count = 0;
790 smm->allocated_this_transaction = 0;
791 brb_init(&smm->uncommitted);
792 threshold_init(&smm->threshold);
793
794 memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm));
795
796 r = sm_ll_new_metadata(&smm->ll, tm);
797 if (!r) {
798 if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS)
799 nr_blocks = DM_SM_METADATA_MAX_BLOCKS;
800 r = sm_ll_extend(&smm->ll, nr_blocks);
801 }
802 memcpy(&smm->sm, &ops, sizeof(smm->sm));
803 if (r)
804 return r;
805
806 /*
807 * Now we need to update the newly created data structures with the
808 * allocated blocks that they were built from.
809 */
810 r = add_bop(smm, BOP_INC, superblock, smm->begin);
811 if (r)
812 return r;
813
814 r = apply_bops(smm);
815 if (r) {
816 DMERR("%s: apply_bops failed", __func__);
817 return r;
818 }
819
820 return sm_metadata_commit(sm);
821}
822
823int dm_sm_metadata_open(struct dm_space_map *sm,
824 struct dm_transaction_manager *tm,
825 void *root_le, size_t len)
826{
827 int r;
828 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
829
830 r = sm_ll_open_metadata(&smm->ll, tm, root_le, len);
831 if (r)
832 return r;
833
834 smm->begin = 0;
835 smm->recursion_count = 0;
836 smm->allocated_this_transaction = 0;
837 brb_init(&smm->uncommitted);
838 threshold_init(&smm->threshold);
839
840 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll));
841 return 0;
842}