Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2018-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_btree.h" #include "xfs_btree_staging.h" #include "xfs_bit.h" #include "xfs_log_format.h" #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_alloc.h" #include "xfs_alloc_btree.h" #include "xfs_rmap.h" #include "xfs_rmap_btree.h" #include "xfs_inode.h" #include "xfs_refcount.h" #include "xfs_extent_busy.h" #include "xfs_health.h" #include "xfs_bmap.h" #include "xfs_ialloc.h" #include "xfs_ag.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/btree.h" #include "scrub/trace.h" #include "scrub/repair.h" #include "scrub/bitmap.h" #include "scrub/agb_bitmap.h" #include "scrub/xfile.h" #include "scrub/xfarray.h" #include "scrub/newbt.h" #include "scrub/reap.h" /* * Free Space Btree Repair * ======================= * * The reverse mappings are supposed to record all space usage for the entire * AG. Therefore, we can recreate the free extent records in an AG by looking * for gaps in the physical extents recorded in the rmapbt. These records are * staged in @free_records. Identifying the gaps is more difficult on a * reflink filesystem because rmap records are allowed to overlap. * * Because the final step of building a new index is to free the space used by * the old index, repair needs to find that space. Unfortunately, all * structures that live in the free space (bnobt, cntbt, rmapbt, agfl) share * the same rmapbt owner code (OWN_AG), so this is not straightforward. * * The scan of the reverse mapping information records the space used by OWN_AG * in @old_allocbt_blocks, which (at this stage) is somewhat misnamed. While * walking the rmapbt records, we create a second bitmap @not_allocbt_blocks to * record all visited rmap btree blocks and all blocks owned by the AGFL. * * After that is where the definitions of old_allocbt_blocks shifts. This * expression identifies possible former bnobt/cntbt blocks: * * (OWN_AG blocks) & ~(rmapbt blocks | agfl blocks); * * Substituting from above definitions, that becomes: * * old_allocbt_blocks & ~not_allocbt_blocks * * The OWN_AG bitmap itself isn't needed after this point, so what we really do * instead is: * * old_allocbt_blocks &= ~not_allocbt_blocks; * * After this point, @old_allocbt_blocks is a bitmap of alleged former * bnobt/cntbt blocks. The xagb_bitmap_disunion operation modifies its first * parameter in place to avoid copying records around. * * Next, some of the space described by @free_records are diverted to the newbt * reservation and used to format new btree blocks. The remaining records are * written to the new btree indices. We reconstruct both bnobt and cntbt at * the same time since we've already done all the work. * * We use the prefix 'xrep_abt' here because we regenerate both free space * allocation btrees at the same time. */ struct xrep_abt { /* Blocks owned by the rmapbt or the agfl. */ struct xagb_bitmap not_allocbt_blocks; /* All OWN_AG blocks. */ struct xagb_bitmap old_allocbt_blocks; /* * New bnobt information. All btree block reservations are added to * the reservation list in new_bnobt. */ struct xrep_newbt new_bnobt; /* new cntbt information */ struct xrep_newbt new_cntbt; /* Free space extents. */ struct xfarray *free_records; struct xfs_scrub *sc; /* Number of non-null records in @free_records. */ uint64_t nr_real_records; /* get_records()'s position in the free space record array. */ xfarray_idx_t array_cur; /* * Next block we anticipate seeing in the rmap records. If the next * rmap record is greater than next_agbno, we have found unused space. */ xfs_agblock_t next_agbno; /* Number of free blocks in this AG. */ xfs_agblock_t nr_blocks; /* Longest free extent we found in the AG. */ xfs_agblock_t longest; }; /* Set up to repair AG free space btrees. */ int xrep_setup_ag_allocbt( struct xfs_scrub *sc) { unsigned int busy_gen; /* * Make sure the busy extent list is clear because we can't put extents * on there twice. */ busy_gen = READ_ONCE(sc->sa.pag->pagb_gen); if (xfs_extent_busy_list_empty(sc->sa.pag)) return 0; return xfs_extent_busy_flush(sc->tp, sc->sa.pag, busy_gen, 0); } /* Check for any obvious conflicts in the free extent. */ STATIC int xrep_abt_check_free_ext( struct xfs_scrub *sc, const struct xfs_alloc_rec_incore *rec) { enum xbtree_recpacking outcome; int error; if (xfs_alloc_check_irec(sc->sa.pag, rec) != NULL) return -EFSCORRUPTED; /* Must not be an inode chunk. */ error = xfs_ialloc_has_inodes_at_extent(sc->sa.ino_cur, rec->ar_startblock, rec->ar_blockcount, &outcome); if (error) return error; if (outcome != XBTREE_RECPACKING_EMPTY) return -EFSCORRUPTED; /* Must not be shared or CoW staging. */ if (sc->sa.refc_cur) { error = xfs_refcount_has_records(sc->sa.refc_cur, XFS_REFC_DOMAIN_SHARED, rec->ar_startblock, rec->ar_blockcount, &outcome); if (error) return error; if (outcome != XBTREE_RECPACKING_EMPTY) return -EFSCORRUPTED; error = xfs_refcount_has_records(sc->sa.refc_cur, XFS_REFC_DOMAIN_COW, rec->ar_startblock, rec->ar_blockcount, &outcome); if (error) return error; if (outcome != XBTREE_RECPACKING_EMPTY) return -EFSCORRUPTED; } return 0; } /* * Stash a free space record for all the space since the last bno we found * all the way up to @end. */ static int xrep_abt_stash( struct xrep_abt *ra, xfs_agblock_t end) { struct xfs_alloc_rec_incore arec = { .ar_startblock = ra->next_agbno, .ar_blockcount = end - ra->next_agbno, }; struct xfs_scrub *sc = ra->sc; int error = 0; if (xchk_should_terminate(sc, &error)) return error; error = xrep_abt_check_free_ext(ra->sc, &arec); if (error) return error; trace_xrep_abt_found(sc->mp, sc->sa.pag->pag_agno, &arec); error = xfarray_append(ra->free_records, &arec); if (error) return error; ra->nr_blocks += arec.ar_blockcount; return 0; } /* Record extents that aren't in use from gaps in the rmap records. */ STATIC int xrep_abt_walk_rmap( struct xfs_btree_cur *cur, const struct xfs_rmap_irec *rec, void *priv) { struct xrep_abt *ra = priv; int error; /* Record all the OWN_AG blocks... */ if (rec->rm_owner == XFS_RMAP_OWN_AG) { error = xagb_bitmap_set(&ra->old_allocbt_blocks, rec->rm_startblock, rec->rm_blockcount); if (error) return error; } /* ...and all the rmapbt blocks... */ error = xagb_bitmap_set_btcur_path(&ra->not_allocbt_blocks, cur); if (error) return error; /* ...and all the free space. */ if (rec->rm_startblock > ra->next_agbno) { error = xrep_abt_stash(ra, rec->rm_startblock); if (error) return error; } /* * rmap records can overlap on reflink filesystems, so project * next_agbno as far out into the AG space as we currently know about. */ ra->next_agbno = max_t(xfs_agblock_t, ra->next_agbno, rec->rm_startblock + rec->rm_blockcount); return 0; } /* Collect an AGFL block for the not-to-release list. */ static int xrep_abt_walk_agfl( struct xfs_mount *mp, xfs_agblock_t agbno, void *priv) { struct xrep_abt *ra = priv; return xagb_bitmap_set(&ra->not_allocbt_blocks, agbno, 1); } /* * Compare two free space extents by block number. We want to sort in order of * increasing block number. */ static int xrep_bnobt_extent_cmp( const void *a, const void *b) { const struct xfs_alloc_rec_incore *ap = a; const struct xfs_alloc_rec_incore *bp = b; if (ap->ar_startblock > bp->ar_startblock) return 1; else if (ap->ar_startblock < bp->ar_startblock) return -1; return 0; } /* * Re-sort the free extents by block number so that we can put the records into * the bnobt in the correct order. Make sure the records do not overlap in * physical space. */ STATIC int xrep_bnobt_sort_records( struct xrep_abt *ra) { struct xfs_alloc_rec_incore arec; xfarray_idx_t cur = XFARRAY_CURSOR_INIT; xfs_agblock_t next_agbno = 0; int error; error = xfarray_sort(ra->free_records, xrep_bnobt_extent_cmp, 0); if (error) return error; while ((error = xfarray_iter(ra->free_records, &cur, &arec)) == 1) { if (arec.ar_startblock < next_agbno) return -EFSCORRUPTED; next_agbno = arec.ar_startblock + arec.ar_blockcount; } return error; } /* * Compare two free space extents by length and then block number. We want * to sort first in order of increasing length and then in order of increasing * block number. */ static int xrep_cntbt_extent_cmp( const void *a, const void *b) { const struct xfs_alloc_rec_incore *ap = a; const struct xfs_alloc_rec_incore *bp = b; if (ap->ar_blockcount > bp->ar_blockcount) return 1; else if (ap->ar_blockcount < bp->ar_blockcount) return -1; return xrep_bnobt_extent_cmp(a, b); } /* * Sort the free extents by length so so that we can put the records into the * cntbt in the correct order. Don't let userspace kill us if we're resorting * after allocating btree blocks. */ STATIC int xrep_cntbt_sort_records( struct xrep_abt *ra, bool is_resort) { return xfarray_sort(ra->free_records, xrep_cntbt_extent_cmp, is_resort ? 0 : XFARRAY_SORT_KILLABLE); } /* * Iterate all reverse mappings to find (1) the gaps between rmap records (all * unowned space), (2) the OWN_AG extents (which encompass the free space * btrees, the rmapbt, and the agfl), (3) the rmapbt blocks, and (4) the AGFL * blocks. The free space is (1) + (2) - (3) - (4). */ STATIC int xrep_abt_find_freespace( struct xrep_abt *ra) { struct xfs_scrub *sc = ra->sc; struct xfs_mount *mp = sc->mp; struct xfs_agf *agf = sc->sa.agf_bp->b_addr; struct xfs_buf *agfl_bp; xfs_agblock_t agend; int error; xagb_bitmap_init(&ra->not_allocbt_blocks); xrep_ag_btcur_init(sc, &sc->sa); /* * Iterate all the reverse mappings to find gaps in the physical * mappings, all the OWN_AG blocks, and all the rmapbt extents. */ error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_abt_walk_rmap, ra); if (error) goto err; /* Insert a record for space between the last rmap and EOAG. */ agend = be32_to_cpu(agf->agf_length); if (ra->next_agbno < agend) { error = xrep_abt_stash(ra, agend); if (error) goto err; } /* Collect all the AGFL blocks. */ error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp); if (error) goto err; error = xfs_agfl_walk(mp, agf, agfl_bp, xrep_abt_walk_agfl, ra); if (error) goto err_agfl; /* Compute the old bnobt/cntbt blocks. */ error = xagb_bitmap_disunion(&ra->old_allocbt_blocks, &ra->not_allocbt_blocks); if (error) goto err_agfl; ra->nr_real_records = xfarray_length(ra->free_records); err_agfl: xfs_trans_brelse(sc->tp, agfl_bp); err: xchk_ag_btcur_free(&sc->sa); xagb_bitmap_destroy(&ra->not_allocbt_blocks); return error; } /* * We're going to use the observed free space records to reserve blocks for the * new free space btrees, so we play an iterative game where we try to converge * on the number of blocks we need: * * 1. Estimate how many blocks we'll need to store the records. * 2. If the first free record has more blocks than we need, we're done. * We will have to re-sort the records prior to building the cntbt. * 3. If that record has exactly the number of blocks we need, null out the * record. We're done. * 4. Otherwise, we still need more blocks. Null out the record, subtract its * length from the number of blocks we need, and go back to step 1. * * Fortunately, we don't have to do any transaction work to play this game, so * we don't have to tear down the staging cursors. */ STATIC int xrep_abt_reserve_space( struct xrep_abt *ra, struct xfs_btree_cur *bno_cur, struct xfs_btree_cur *cnt_cur, bool *needs_resort) { struct xfs_scrub *sc = ra->sc; xfarray_idx_t record_nr; unsigned int allocated = 0; int error = 0; record_nr = xfarray_length(ra->free_records) - 1; do { struct xfs_alloc_rec_incore arec; uint64_t required; unsigned int desired; unsigned int len; /* Compute how many blocks we'll need. */ error = xfs_btree_bload_compute_geometry(cnt_cur, &ra->new_cntbt.bload, ra->nr_real_records); if (error) break; error = xfs_btree_bload_compute_geometry(bno_cur, &ra->new_bnobt.bload, ra->nr_real_records); if (error) break; /* How many btree blocks do we need to store all records? */ required = ra->new_bnobt.bload.nr_blocks + ra->new_cntbt.bload.nr_blocks; ASSERT(required < INT_MAX); /* If we've reserved enough blocks, we're done. */ if (allocated >= required) break; desired = required - allocated; /* We need space but there's none left; bye! */ if (ra->nr_real_records == 0) { error = -ENOSPC; break; } /* Grab the first record from the list. */ error = xfarray_load(ra->free_records, record_nr, &arec); if (error) break; ASSERT(arec.ar_blockcount <= UINT_MAX); len = min_t(unsigned int, arec.ar_blockcount, desired); trace_xrep_newbt_alloc_ag_blocks(sc->mp, sc->sa.pag->pag_agno, arec.ar_startblock, len, XFS_RMAP_OWN_AG); error = xrep_newbt_add_extent(&ra->new_bnobt, sc->sa.pag, arec.ar_startblock, len); if (error) break; allocated += len; ra->nr_blocks -= len; if (arec.ar_blockcount > desired) { /* * Record has more space than we need. The number of * free records doesn't change, so shrink the free * record, inform the caller that the records are no * longer sorted by length, and exit. */ arec.ar_startblock += desired; arec.ar_blockcount -= desired; error = xfarray_store(ra->free_records, record_nr, &arec); if (error) break; *needs_resort = true; return 0; } /* * We're going to use up the entire record, so unset it and * move on to the next one. This changes the number of free * records (but doesn't break the sorting order), so we must * go around the loop once more to re-run _bload_init. */ error = xfarray_unset(ra->free_records, record_nr); if (error) break; ra->nr_real_records--; record_nr--; } while (1); return error; } STATIC int xrep_abt_dispose_one( struct xrep_abt *ra, struct xrep_newbt_resv *resv) { struct xfs_scrub *sc = ra->sc; struct xfs_perag *pag = sc->sa.pag; xfs_agblock_t free_agbno = resv->agbno + resv->used; xfs_extlen_t free_aglen = resv->len - resv->used; int error; ASSERT(pag == resv->pag); /* Add a deferred rmap for each extent we used. */ if (resv->used > 0) xfs_rmap_alloc_extent(sc->tp, pag->pag_agno, resv->agbno, resv->used, XFS_RMAP_OWN_AG); /* * For each reserved btree block we didn't use, add it to the free * space btree. We didn't touch fdblocks when we reserved them, so * we don't touch it now. */ if (free_aglen == 0) return 0; trace_xrep_newbt_free_blocks(sc->mp, resv->pag->pag_agno, free_agbno, free_aglen, ra->new_bnobt.oinfo.oi_owner); error = __xfs_free_extent(sc->tp, resv->pag, free_agbno, free_aglen, &ra->new_bnobt.oinfo, XFS_AG_RESV_IGNORE, true); if (error) return error; return xrep_defer_finish(sc); } /* * Deal with all the space we reserved. Blocks that were allocated for the * free space btrees need to have a (deferred) rmap added for the OWN_AG * allocation, and blocks that didn't get used can be freed via the usual * (deferred) means. */ STATIC void xrep_abt_dispose_reservations( struct xrep_abt *ra, int error) { struct xrep_newbt_resv *resv, *n; if (error) goto junkit; list_for_each_entry_safe(resv, n, &ra->new_bnobt.resv_list, list) { error = xrep_abt_dispose_one(ra, resv); if (error) goto junkit; } junkit: list_for_each_entry_safe(resv, n, &ra->new_bnobt.resv_list, list) { xfs_perag_put(resv->pag); list_del(&resv->list); kfree(resv); } xrep_newbt_cancel(&ra->new_bnobt); xrep_newbt_cancel(&ra->new_cntbt); } /* Retrieve free space data for bulk load. */ STATIC int xrep_abt_get_records( struct xfs_btree_cur *cur, unsigned int idx, struct xfs_btree_block *block, unsigned int nr_wanted, void *priv) { struct xfs_alloc_rec_incore *arec = &cur->bc_rec.a; struct xrep_abt *ra = priv; union xfs_btree_rec *block_rec; unsigned int loaded; int error; for (loaded = 0; loaded < nr_wanted; loaded++, idx++) { error = xfarray_load_next(ra->free_records, &ra->array_cur, arec); if (error) return error; ra->longest = max(ra->longest, arec->ar_blockcount); block_rec = xfs_btree_rec_addr(cur, idx, block); cur->bc_ops->init_rec_from_cur(cur, block_rec); } return loaded; } /* Feed one of the new btree blocks to the bulk loader. */ STATIC int xrep_abt_claim_block( struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr, void *priv) { struct xrep_abt *ra = priv; return xrep_newbt_claim_block(cur, &ra->new_bnobt, ptr); } /* * Reset the AGF counters to reflect the free space btrees that we just * rebuilt, then reinitialize the per-AG data. */ STATIC int xrep_abt_reset_counters( struct xrep_abt *ra) { struct xfs_scrub *sc = ra->sc; struct xfs_perag *pag = sc->sa.pag; struct xfs_agf *agf = sc->sa.agf_bp->b_addr; unsigned int freesp_btreeblks = 0; /* * Compute the contribution to agf_btreeblks for the new free space * btrees. This is the computed btree size minus anything we didn't * use. */ freesp_btreeblks += ra->new_bnobt.bload.nr_blocks - 1; freesp_btreeblks += ra->new_cntbt.bload.nr_blocks - 1; freesp_btreeblks -= xrep_newbt_unused_blocks(&ra->new_bnobt); freesp_btreeblks -= xrep_newbt_unused_blocks(&ra->new_cntbt); /* * The AGF header contains extra information related to the free space * btrees, so we must update those fields here. */ agf->agf_btreeblks = cpu_to_be32(freesp_btreeblks + (be32_to_cpu(agf->agf_rmap_blocks) - 1)); agf->agf_freeblks = cpu_to_be32(ra->nr_blocks); agf->agf_longest = cpu_to_be32(ra->longest); xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, XFS_AGF_BTREEBLKS | XFS_AGF_LONGEST | XFS_AGF_FREEBLKS); /* * After we commit the new btree to disk, it is possible that the * process to reap the old btree blocks will race with the AIL trying * to checkpoint the old btree blocks into the filesystem. If the new * tree is shorter than the old one, the allocbt write verifier will * fail and the AIL will shut down the filesystem. * * To avoid this, save the old incore btree height values as the alt * height values before re-initializing the perag info from the updated * AGF to capture all the new values. */ pag->pagf_repair_bno_level = pag->pagf_bno_level; pag->pagf_repair_cnt_level = pag->pagf_cnt_level; /* Reinitialize with the values we just logged. */ return xrep_reinit_pagf(sc); } /* * Use the collected free space information to stage new free space btrees. * If this is successful we'll return with the new btree root * information logged to the repair transaction but not yet committed. */ STATIC int xrep_abt_build_new_trees( struct xrep_abt *ra) { struct xfs_scrub *sc = ra->sc; struct xfs_btree_cur *bno_cur; struct xfs_btree_cur *cnt_cur; struct xfs_perag *pag = sc->sa.pag; bool needs_resort = false; int error; /* * Sort the free extents by length so that we can set up the free space * btrees in as few extents as possible. This reduces the amount of * deferred rmap / free work we have to do at the end. */ error = xrep_cntbt_sort_records(ra, false); if (error) return error; /* * Prepare to construct the new btree by reserving disk space for the * new btree and setting up all the accounting information we'll need * to root the new btree while it's under construction and before we * attach it to the AG header. */ xrep_newbt_init_bare(&ra->new_bnobt, sc); xrep_newbt_init_bare(&ra->new_cntbt, sc); ra->new_bnobt.bload.get_records = xrep_abt_get_records; ra->new_cntbt.bload.get_records = xrep_abt_get_records; ra->new_bnobt.bload.claim_block = xrep_abt_claim_block; ra->new_cntbt.bload.claim_block = xrep_abt_claim_block; /* Allocate cursors for the staged btrees. */ bno_cur = xfs_bnobt_init_cursor(sc->mp, NULL, NULL, pag); xfs_btree_stage_afakeroot(bno_cur, &ra->new_bnobt.afake); cnt_cur = xfs_cntbt_init_cursor(sc->mp, NULL, NULL, pag); xfs_btree_stage_afakeroot(cnt_cur, &ra->new_cntbt.afake); /* Last chance to abort before we start committing fixes. */ if (xchk_should_terminate(sc, &error)) goto err_cur; /* Reserve the space we'll need for the new btrees. */ error = xrep_abt_reserve_space(ra, bno_cur, cnt_cur, &needs_resort); if (error) goto err_cur; /* * If we need to re-sort the free extents by length, do so so that we * can put the records into the cntbt in the correct order. */ if (needs_resort) { error = xrep_cntbt_sort_records(ra, needs_resort); if (error) goto err_cur; } /* * Due to btree slack factors, it's possible for a new btree to be one * level taller than the old btree. Update the alternate incore btree * height so that we don't trip the verifiers when writing the new * btree blocks to disk. */ pag->pagf_repair_bno_level = ra->new_bnobt.bload.btree_height; pag->pagf_repair_cnt_level = ra->new_cntbt.bload.btree_height; /* Load the free space by length tree. */ ra->array_cur = XFARRAY_CURSOR_INIT; ra->longest = 0; error = xfs_btree_bload(cnt_cur, &ra->new_cntbt.bload, ra); if (error) goto err_levels; error = xrep_bnobt_sort_records(ra); if (error) return error; /* Load the free space by block number tree. */ ra->array_cur = XFARRAY_CURSOR_INIT; error = xfs_btree_bload(bno_cur, &ra->new_bnobt.bload, ra); if (error) goto err_levels; /* * Install the new btrees in the AG header. After this point the old * btrees are no longer accessible and the new trees are live. */ xfs_allocbt_commit_staged_btree(bno_cur, sc->tp, sc->sa.agf_bp); xfs_btree_del_cursor(bno_cur, 0); xfs_allocbt_commit_staged_btree(cnt_cur, sc->tp, sc->sa.agf_bp); xfs_btree_del_cursor(cnt_cur, 0); /* Reset the AGF counters now that we've changed the btree shape. */ error = xrep_abt_reset_counters(ra); if (error) goto err_newbt; /* Dispose of any unused blocks and the accounting information. */ xrep_abt_dispose_reservations(ra, error); return xrep_roll_ag_trans(sc); err_levels: pag->pagf_repair_bno_level = 0; pag->pagf_repair_cnt_level = 0; err_cur: xfs_btree_del_cursor(cnt_cur, error); xfs_btree_del_cursor(bno_cur, error); err_newbt: xrep_abt_dispose_reservations(ra, error); return error; } /* * Now that we've logged the roots of the new btrees, invalidate all of the * old blocks and free them. */ STATIC int xrep_abt_remove_old_trees( struct xrep_abt *ra) { struct xfs_perag *pag = ra->sc->sa.pag; int error; /* Free the old btree blocks if they're not in use. */ error = xrep_reap_agblocks(ra->sc, &ra->old_allocbt_blocks, &XFS_RMAP_OINFO_AG, XFS_AG_RESV_IGNORE); if (error) return error; /* * Now that we've zapped all the old allocbt blocks we can turn off * the alternate height mechanism. */ pag->pagf_repair_bno_level = 0; pag->pagf_repair_cnt_level = 0; return 0; } /* Repair the freespace btrees for some AG. */ int xrep_allocbt( struct xfs_scrub *sc) { struct xrep_abt *ra; struct xfs_mount *mp = sc->mp; char *descr; int error; /* We require the rmapbt to rebuild anything. */ if (!xfs_has_rmapbt(mp)) return -EOPNOTSUPP; ra = kzalloc(sizeof(struct xrep_abt), XCHK_GFP_FLAGS); if (!ra) return -ENOMEM; ra->sc = sc; /* We rebuild both data structures. */ sc->sick_mask = XFS_SICK_AG_BNOBT | XFS_SICK_AG_CNTBT; /* * Make sure the busy extent list is clear because we can't put extents * on there twice. In theory we cleared this before we started, but * let's not risk the filesystem. */ if (!xfs_extent_busy_list_empty(sc->sa.pag)) { error = -EDEADLOCK; goto out_ra; } /* Set up enough storage to handle maximally fragmented free space. */ descr = xchk_xfile_ag_descr(sc, "free space records"); error = xfarray_create(descr, mp->m_sb.sb_agblocks / 2, sizeof(struct xfs_alloc_rec_incore), &ra->free_records); kfree(descr); if (error) goto out_ra; /* Collect the free space data and find the old btree blocks. */ xagb_bitmap_init(&ra->old_allocbt_blocks); error = xrep_abt_find_freespace(ra); if (error) goto out_bitmap; /* Rebuild the free space information. */ error = xrep_abt_build_new_trees(ra); if (error) goto out_bitmap; /* Kill the old trees. */ error = xrep_abt_remove_old_trees(ra); if (error) goto out_bitmap; out_bitmap: xagb_bitmap_destroy(&ra->old_allocbt_blocks); xfarray_destroy(ra->free_records); out_ra: kfree(ra); return error; } /* Make sure both btrees are ok after we've rebuilt them. */ int xrep_revalidate_allocbt( struct xfs_scrub *sc) { __u32 old_type = sc->sm->sm_type; int error; /* * We must update sm_type temporarily so that the tree-to-tree cross * reference checks will work in the correct direction, and also so * that tracing will report correctly if there are more errors. */ sc->sm->sm_type = XFS_SCRUB_TYPE_BNOBT; error = xchk_allocbt(sc); if (error) goto out; sc->sm->sm_type = XFS_SCRUB_TYPE_CNTBT; error = xchk_allocbt(sc); out: sc->sm->sm_type = old_type; return error; } |