Loading...
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Copyright (C) 2022-2023 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6#include "xfs.h"
7#include "xfs_fs.h"
8#include "xfs_shared.h"
9#include "xfs_format.h"
10#include "xfs_trans_resv.h"
11#include "xfs_mount.h"
12#include "xfs_btree.h"
13#include "xfs_btree_staging.h"
14#include "xfs_log_format.h"
15#include "xfs_trans.h"
16#include "xfs_sb.h"
17#include "xfs_inode.h"
18#include "xfs_alloc.h"
19#include "xfs_rmap.h"
20#include "xfs_ag.h"
21#include "xfs_defer.h"
22#include "scrub/scrub.h"
23#include "scrub/common.h"
24#include "scrub/trace.h"
25#include "scrub/repair.h"
26#include "scrub/newbt.h"
27
28/*
29 * Estimate proper slack values for a btree that's being reloaded.
30 *
31 * Under most circumstances, we'll take whatever default loading value the
32 * btree bulk loading code calculates for us. However, there are some
33 * exceptions to this rule:
34 *
35 * (0) If someone turned one of the debug knobs.
36 * (1) If this is a per-AG btree and the AG has less than 10% space free.
37 * (2) If this is an inode btree and the FS has less than 10% space free.
38
39 * In either case, format the new btree blocks almost completely full to
40 * minimize space usage.
41 */
42static void
43xrep_newbt_estimate_slack(
44 struct xrep_newbt *xnr)
45{
46 struct xfs_scrub *sc = xnr->sc;
47 struct xfs_btree_bload *bload = &xnr->bload;
48 uint64_t free;
49 uint64_t sz;
50
51 /*
52 * The xfs_globals values are set to -1 (i.e. take the bload defaults)
53 * unless someone has set them otherwise, so we just pull the values
54 * here.
55 */
56 bload->leaf_slack = xfs_globals.bload_leaf_slack;
57 bload->node_slack = xfs_globals.bload_node_slack;
58
59 if (sc->ops->type == ST_PERAG) {
60 free = sc->sa.pag->pagf_freeblks;
61 sz = xfs_ag_block_count(sc->mp, sc->sa.pag->pag_agno);
62 } else {
63 free = percpu_counter_sum(&sc->mp->m_fdblocks);
64 sz = sc->mp->m_sb.sb_dblocks;
65 }
66
67 /* No further changes if there's more than 10% free space left. */
68 if (free >= div_u64(sz, 10))
69 return;
70
71 /*
72 * We're low on space; load the btrees as tightly as possible. Leave
73 * a couple of open slots in each btree block so that we don't end up
74 * splitting the btrees like crazy after a mount.
75 */
76 if (bload->leaf_slack < 0)
77 bload->leaf_slack = 2;
78 if (bload->node_slack < 0)
79 bload->node_slack = 2;
80}
81
82/* Initialize accounting resources for staging a new AG btree. */
83void
84xrep_newbt_init_ag(
85 struct xrep_newbt *xnr,
86 struct xfs_scrub *sc,
87 const struct xfs_owner_info *oinfo,
88 xfs_fsblock_t alloc_hint,
89 enum xfs_ag_resv_type resv)
90{
91 memset(xnr, 0, sizeof(struct xrep_newbt));
92 xnr->sc = sc;
93 xnr->oinfo = *oinfo; /* structure copy */
94 xnr->alloc_hint = alloc_hint;
95 xnr->resv = resv;
96 INIT_LIST_HEAD(&xnr->resv_list);
97 xnr->bload.max_dirty = XFS_B_TO_FSBT(sc->mp, 256U << 10); /* 256K */
98 xrep_newbt_estimate_slack(xnr);
99}
100
101/* Initialize accounting resources for staging a new inode fork btree. */
102int
103xrep_newbt_init_inode(
104 struct xrep_newbt *xnr,
105 struct xfs_scrub *sc,
106 int whichfork,
107 const struct xfs_owner_info *oinfo)
108{
109 struct xfs_ifork *ifp;
110
111 ifp = kmem_cache_zalloc(xfs_ifork_cache, XCHK_GFP_FLAGS);
112 if (!ifp)
113 return -ENOMEM;
114
115 xrep_newbt_init_ag(xnr, sc, oinfo,
116 XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino),
117 XFS_AG_RESV_NONE);
118 xnr->ifake.if_fork = ifp;
119 xnr->ifake.if_fork_size = xfs_inode_fork_size(sc->ip, whichfork);
120 return 0;
121}
122
123/*
124 * Initialize accounting resources for staging a new btree. Callers are
125 * expected to add their own reservations (and clean them up) manually.
126 */
127void
128xrep_newbt_init_bare(
129 struct xrep_newbt *xnr,
130 struct xfs_scrub *sc)
131{
132 xrep_newbt_init_ag(xnr, sc, &XFS_RMAP_OINFO_ANY_OWNER, NULLFSBLOCK,
133 XFS_AG_RESV_NONE);
134}
135
136/*
137 * Designate specific blocks to be used to build our new btree. @pag must be
138 * a passive reference.
139 */
140STATIC int
141xrep_newbt_add_blocks(
142 struct xrep_newbt *xnr,
143 struct xfs_perag *pag,
144 const struct xfs_alloc_arg *args)
145{
146 struct xfs_mount *mp = xnr->sc->mp;
147 struct xrep_newbt_resv *resv;
148 int error;
149
150 resv = kmalloc(sizeof(struct xrep_newbt_resv), XCHK_GFP_FLAGS);
151 if (!resv)
152 return -ENOMEM;
153
154 INIT_LIST_HEAD(&resv->list);
155 resv->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
156 resv->len = args->len;
157 resv->used = 0;
158 resv->pag = xfs_perag_hold(pag);
159
160 if (args->tp) {
161 ASSERT(xnr->oinfo.oi_offset == 0);
162
163 error = xfs_alloc_schedule_autoreap(args, true, &resv->autoreap);
164 if (error)
165 goto out_pag;
166 }
167
168 list_add_tail(&resv->list, &xnr->resv_list);
169 return 0;
170out_pag:
171 xfs_perag_put(resv->pag);
172 kfree(resv);
173 return error;
174}
175
176/*
177 * Add an extent to the new btree reservation pool. Callers are required to
178 * reap this reservation manually if the repair is cancelled. @pag must be a
179 * passive reference.
180 */
181int
182xrep_newbt_add_extent(
183 struct xrep_newbt *xnr,
184 struct xfs_perag *pag,
185 xfs_agblock_t agbno,
186 xfs_extlen_t len)
187{
188 struct xfs_mount *mp = xnr->sc->mp;
189 struct xfs_alloc_arg args = {
190 .tp = NULL, /* no autoreap */
191 .oinfo = xnr->oinfo,
192 .fsbno = XFS_AGB_TO_FSB(mp, pag->pag_agno, agbno),
193 .len = len,
194 .resv = xnr->resv,
195 };
196
197 return xrep_newbt_add_blocks(xnr, pag, &args);
198}
199
200/* Don't let our allocation hint take us beyond this AG */
201static inline void
202xrep_newbt_validate_ag_alloc_hint(
203 struct xrep_newbt *xnr)
204{
205 struct xfs_scrub *sc = xnr->sc;
206 xfs_agnumber_t agno = XFS_FSB_TO_AGNO(sc->mp, xnr->alloc_hint);
207
208 if (agno == sc->sa.pag->pag_agno &&
209 xfs_verify_fsbno(sc->mp, xnr->alloc_hint))
210 return;
211
212 xnr->alloc_hint = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno,
213 XFS_AGFL_BLOCK(sc->mp) + 1);
214}
215
216/* Allocate disk space for a new per-AG btree. */
217STATIC int
218xrep_newbt_alloc_ag_blocks(
219 struct xrep_newbt *xnr,
220 uint64_t nr_blocks)
221{
222 struct xfs_scrub *sc = xnr->sc;
223 struct xfs_mount *mp = sc->mp;
224 int error = 0;
225
226 ASSERT(sc->sa.pag != NULL);
227
228 while (nr_blocks > 0) {
229 struct xfs_alloc_arg args = {
230 .tp = sc->tp,
231 .mp = mp,
232 .oinfo = xnr->oinfo,
233 .minlen = 1,
234 .maxlen = nr_blocks,
235 .prod = 1,
236 .resv = xnr->resv,
237 };
238 xfs_agnumber_t agno;
239
240 xrep_newbt_validate_ag_alloc_hint(xnr);
241
242 if (xnr->alloc_vextent)
243 error = xnr->alloc_vextent(sc, &args, xnr->alloc_hint);
244 else
245 error = xfs_alloc_vextent_near_bno(&args,
246 xnr->alloc_hint);
247 if (error)
248 return error;
249 if (args.fsbno == NULLFSBLOCK)
250 return -ENOSPC;
251
252 agno = XFS_FSB_TO_AGNO(mp, args.fsbno);
253
254 trace_xrep_newbt_alloc_ag_blocks(mp, agno,
255 XFS_FSB_TO_AGBNO(mp, args.fsbno), args.len,
256 xnr->oinfo.oi_owner);
257
258 if (agno != sc->sa.pag->pag_agno) {
259 ASSERT(agno == sc->sa.pag->pag_agno);
260 return -EFSCORRUPTED;
261 }
262
263 error = xrep_newbt_add_blocks(xnr, sc->sa.pag, &args);
264 if (error)
265 return error;
266
267 nr_blocks -= args.len;
268 xnr->alloc_hint = args.fsbno + args.len;
269
270 error = xrep_defer_finish(sc);
271 if (error)
272 return error;
273 }
274
275 return 0;
276}
277
278/* Don't let our allocation hint take us beyond EOFS */
279static inline void
280xrep_newbt_validate_file_alloc_hint(
281 struct xrep_newbt *xnr)
282{
283 struct xfs_scrub *sc = xnr->sc;
284
285 if (xfs_verify_fsbno(sc->mp, xnr->alloc_hint))
286 return;
287
288 xnr->alloc_hint = XFS_AGB_TO_FSB(sc->mp, 0, XFS_AGFL_BLOCK(sc->mp) + 1);
289}
290
291/* Allocate disk space for our new file-based btree. */
292STATIC int
293xrep_newbt_alloc_file_blocks(
294 struct xrep_newbt *xnr,
295 uint64_t nr_blocks)
296{
297 struct xfs_scrub *sc = xnr->sc;
298 struct xfs_mount *mp = sc->mp;
299 int error = 0;
300
301 while (nr_blocks > 0) {
302 struct xfs_alloc_arg args = {
303 .tp = sc->tp,
304 .mp = mp,
305 .oinfo = xnr->oinfo,
306 .minlen = 1,
307 .maxlen = nr_blocks,
308 .prod = 1,
309 .resv = xnr->resv,
310 };
311 struct xfs_perag *pag;
312 xfs_agnumber_t agno;
313
314 xrep_newbt_validate_file_alloc_hint(xnr);
315
316 if (xnr->alloc_vextent)
317 error = xnr->alloc_vextent(sc, &args, xnr->alloc_hint);
318 else
319 error = xfs_alloc_vextent_start_ag(&args,
320 xnr->alloc_hint);
321 if (error)
322 return error;
323 if (args.fsbno == NULLFSBLOCK)
324 return -ENOSPC;
325
326 agno = XFS_FSB_TO_AGNO(mp, args.fsbno);
327
328 trace_xrep_newbt_alloc_file_blocks(mp, agno,
329 XFS_FSB_TO_AGBNO(mp, args.fsbno), args.len,
330 xnr->oinfo.oi_owner);
331
332 pag = xfs_perag_get(mp, agno);
333 if (!pag) {
334 ASSERT(0);
335 return -EFSCORRUPTED;
336 }
337
338 error = xrep_newbt_add_blocks(xnr, pag, &args);
339 xfs_perag_put(pag);
340 if (error)
341 return error;
342
343 nr_blocks -= args.len;
344 xnr->alloc_hint = args.fsbno + args.len;
345
346 error = xrep_defer_finish(sc);
347 if (error)
348 return error;
349 }
350
351 return 0;
352}
353
354/* Allocate disk space for our new btree. */
355int
356xrep_newbt_alloc_blocks(
357 struct xrep_newbt *xnr,
358 uint64_t nr_blocks)
359{
360 if (xnr->sc->ip)
361 return xrep_newbt_alloc_file_blocks(xnr, nr_blocks);
362 return xrep_newbt_alloc_ag_blocks(xnr, nr_blocks);
363}
364
365/*
366 * Free the unused part of a space extent that was reserved for a new ondisk
367 * structure. Returns the number of EFIs logged or a negative errno.
368 */
369STATIC int
370xrep_newbt_free_extent(
371 struct xrep_newbt *xnr,
372 struct xrep_newbt_resv *resv,
373 bool btree_committed)
374{
375 struct xfs_scrub *sc = xnr->sc;
376 xfs_agblock_t free_agbno = resv->agbno;
377 xfs_extlen_t free_aglen = resv->len;
378 xfs_fsblock_t fsbno;
379 int error;
380
381 if (!btree_committed || resv->used == 0) {
382 /*
383 * If we're not committing a new btree or we didn't use the
384 * space reservation, let the existing EFI free the entire
385 * space extent.
386 */
387 trace_xrep_newbt_free_blocks(sc->mp, resv->pag->pag_agno,
388 free_agbno, free_aglen, xnr->oinfo.oi_owner);
389 xfs_alloc_commit_autoreap(sc->tp, &resv->autoreap);
390 return 1;
391 }
392
393 /*
394 * We used space and committed the btree. Cancel the autoreap, remove
395 * the written blocks from the reservation, and possibly log a new EFI
396 * to free any unused reservation space.
397 */
398 xfs_alloc_cancel_autoreap(sc->tp, &resv->autoreap);
399 free_agbno += resv->used;
400 free_aglen -= resv->used;
401
402 if (free_aglen == 0)
403 return 0;
404
405 trace_xrep_newbt_free_blocks(sc->mp, resv->pag->pag_agno, free_agbno,
406 free_aglen, xnr->oinfo.oi_owner);
407
408 ASSERT(xnr->resv != XFS_AG_RESV_AGFL);
409 ASSERT(xnr->resv != XFS_AG_RESV_IGNORE);
410
411 /*
412 * Use EFIs to free the reservations. This reduces the chance
413 * that we leak blocks if the system goes down.
414 */
415 fsbno = XFS_AGB_TO_FSB(sc->mp, resv->pag->pag_agno, free_agbno);
416 error = xfs_free_extent_later(sc->tp, fsbno, free_aglen, &xnr->oinfo,
417 xnr->resv, true);
418 if (error)
419 return error;
420
421 return 1;
422}
423
424/* Free all the accounting info and disk space we reserved for a new btree. */
425STATIC int
426xrep_newbt_free(
427 struct xrep_newbt *xnr,
428 bool btree_committed)
429{
430 struct xfs_scrub *sc = xnr->sc;
431 struct xrep_newbt_resv *resv, *n;
432 unsigned int freed = 0;
433 int error = 0;
434
435 /*
436 * If the filesystem already went down, we can't free the blocks. Skip
437 * ahead to freeing the incore metadata because we can't fix anything.
438 */
439 if (xfs_is_shutdown(sc->mp))
440 goto junkit;
441
442 list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
443 int ret;
444
445 ret = xrep_newbt_free_extent(xnr, resv, btree_committed);
446 list_del(&resv->list);
447 xfs_perag_put(resv->pag);
448 kfree(resv);
449 if (ret < 0) {
450 error = ret;
451 goto junkit;
452 }
453
454 freed += ret;
455 if (freed >= XREP_MAX_ITRUNCATE_EFIS) {
456 error = xrep_defer_finish(sc);
457 if (error)
458 goto junkit;
459 freed = 0;
460 }
461 }
462
463 if (freed)
464 error = xrep_defer_finish(sc);
465
466junkit:
467 /*
468 * If we still have reservations attached to @newbt, cleanup must have
469 * failed and the filesystem is about to go down. Clean up the incore
470 * reservations and try to commit to freeing the space we used.
471 */
472 list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
473 xfs_alloc_commit_autoreap(sc->tp, &resv->autoreap);
474 list_del(&resv->list);
475 xfs_perag_put(resv->pag);
476 kfree(resv);
477 }
478
479 if (sc->ip) {
480 kmem_cache_free(xfs_ifork_cache, xnr->ifake.if_fork);
481 xnr->ifake.if_fork = NULL;
482 }
483
484 return error;
485}
486
487/*
488 * Free all the accounting info and unused disk space allocations after
489 * committing a new btree.
490 */
491int
492xrep_newbt_commit(
493 struct xrep_newbt *xnr)
494{
495 return xrep_newbt_free(xnr, true);
496}
497
498/*
499 * Free all the accounting info and all of the disk space we reserved for a new
500 * btree that we're not going to commit. We want to try to roll things back
501 * cleanly for things like ENOSPC midway through allocation.
502 */
503void
504xrep_newbt_cancel(
505 struct xrep_newbt *xnr)
506{
507 xrep_newbt_free(xnr, false);
508}
509
510/* Feed one of the reserved btree blocks to the bulk loader. */
511int
512xrep_newbt_claim_block(
513 struct xfs_btree_cur *cur,
514 struct xrep_newbt *xnr,
515 union xfs_btree_ptr *ptr)
516{
517 struct xrep_newbt_resv *resv;
518 struct xfs_mount *mp = cur->bc_mp;
519 xfs_agblock_t agbno;
520
521 /*
522 * The first item in the list should always have a free block unless
523 * we're completely out.
524 */
525 resv = list_first_entry(&xnr->resv_list, struct xrep_newbt_resv, list);
526 if (resv->used == resv->len)
527 return -ENOSPC;
528
529 /*
530 * Peel off a block from the start of the reservation. We allocate
531 * blocks in order to place blocks on disk in increasing record or key
532 * order. The block reservations tend to end up on the list in
533 * decreasing order, which hopefully results in leaf blocks ending up
534 * together.
535 */
536 agbno = resv->agbno + resv->used;
537 resv->used++;
538
539 /* If we used all the blocks in this reservation, move it to the end. */
540 if (resv->used == resv->len)
541 list_move_tail(&resv->list, &xnr->resv_list);
542
543 trace_xrep_newbt_claim_block(mp, resv->pag->pag_agno, agbno, 1,
544 xnr->oinfo.oi_owner);
545
546 if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN)
547 ptr->l = cpu_to_be64(XFS_AGB_TO_FSB(mp, resv->pag->pag_agno,
548 agbno));
549 else
550 ptr->s = cpu_to_be32(agbno);
551
552 /* Relog all the EFIs. */
553 return xrep_defer_finish(xnr->sc);
554}
555
556/* How many reserved blocks are unused? */
557unsigned int
558xrep_newbt_unused_blocks(
559 struct xrep_newbt *xnr)
560{
561 struct xrep_newbt_resv *resv;
562 unsigned int unused = 0;
563
564 list_for_each_entry(resv, &xnr->resv_list, list)
565 unused += resv->len - resv->used;
566 return unused;
567}
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Copyright (C) 2022-2023 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6#include "xfs.h"
7#include "xfs_fs.h"
8#include "xfs_shared.h"
9#include "xfs_format.h"
10#include "xfs_trans_resv.h"
11#include "xfs_mount.h"
12#include "xfs_btree.h"
13#include "xfs_btree_staging.h"
14#include "xfs_log_format.h"
15#include "xfs_trans.h"
16#include "xfs_sb.h"
17#include "xfs_inode.h"
18#include "xfs_alloc.h"
19#include "xfs_rmap.h"
20#include "xfs_ag.h"
21#include "xfs_defer.h"
22#include "scrub/scrub.h"
23#include "scrub/common.h"
24#include "scrub/trace.h"
25#include "scrub/repair.h"
26#include "scrub/newbt.h"
27
28/*
29 * Estimate proper slack values for a btree that's being reloaded.
30 *
31 * Under most circumstances, we'll take whatever default loading value the
32 * btree bulk loading code calculates for us. However, there are some
33 * exceptions to this rule:
34 *
35 * (0) If someone turned one of the debug knobs.
36 * (1) If this is a per-AG btree and the AG has less than 10% space free.
37 * (2) If this is an inode btree and the FS has less than 10% space free.
38
39 * In either case, format the new btree blocks almost completely full to
40 * minimize space usage.
41 */
42static void
43xrep_newbt_estimate_slack(
44 struct xrep_newbt *xnr)
45{
46 struct xfs_scrub *sc = xnr->sc;
47 struct xfs_btree_bload *bload = &xnr->bload;
48 uint64_t free;
49 uint64_t sz;
50
51 /*
52 * The xfs_globals values are set to -1 (i.e. take the bload defaults)
53 * unless someone has set them otherwise, so we just pull the values
54 * here.
55 */
56 bload->leaf_slack = xfs_globals.bload_leaf_slack;
57 bload->node_slack = xfs_globals.bload_node_slack;
58
59 if (sc->ops->type == ST_PERAG) {
60 free = sc->sa.pag->pagf_freeblks;
61 sz = xfs_ag_block_count(sc->mp, pag_agno(sc->sa.pag));
62 } else {
63 free = percpu_counter_sum(&sc->mp->m_fdblocks);
64 sz = sc->mp->m_sb.sb_dblocks;
65 }
66
67 /* No further changes if there's more than 10% free space left. */
68 if (free >= div_u64(sz, 10))
69 return;
70
71 /*
72 * We're low on space; load the btrees as tightly as possible. Leave
73 * a couple of open slots in each btree block so that we don't end up
74 * splitting the btrees like crazy after a mount.
75 */
76 if (bload->leaf_slack < 0)
77 bload->leaf_slack = 2;
78 if (bload->node_slack < 0)
79 bload->node_slack = 2;
80}
81
82/* Initialize accounting resources for staging a new AG btree. */
83void
84xrep_newbt_init_ag(
85 struct xrep_newbt *xnr,
86 struct xfs_scrub *sc,
87 const struct xfs_owner_info *oinfo,
88 xfs_fsblock_t alloc_hint,
89 enum xfs_ag_resv_type resv)
90{
91 memset(xnr, 0, sizeof(struct xrep_newbt));
92 xnr->sc = sc;
93 xnr->oinfo = *oinfo; /* structure copy */
94 xnr->alloc_hint = alloc_hint;
95 xnr->resv = resv;
96 INIT_LIST_HEAD(&xnr->resv_list);
97 xnr->bload.max_dirty = XFS_B_TO_FSBT(sc->mp, 256U << 10); /* 256K */
98 xrep_newbt_estimate_slack(xnr);
99}
100
101/* Initialize accounting resources for staging a new inode fork btree. */
102int
103xrep_newbt_init_inode(
104 struct xrep_newbt *xnr,
105 struct xfs_scrub *sc,
106 int whichfork,
107 const struct xfs_owner_info *oinfo)
108{
109 struct xfs_ifork *ifp;
110
111 ifp = kmem_cache_zalloc(xfs_ifork_cache, XCHK_GFP_FLAGS);
112 if (!ifp)
113 return -ENOMEM;
114
115 xrep_newbt_init_ag(xnr, sc, oinfo,
116 XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino),
117 XFS_AG_RESV_NONE);
118 xnr->ifake.if_fork = ifp;
119 xnr->ifake.if_fork_size = xfs_inode_fork_size(sc->ip, whichfork);
120 return 0;
121}
122
123/*
124 * Initialize accounting resources for staging a new btree. Callers are
125 * expected to add their own reservations (and clean them up) manually.
126 */
127void
128xrep_newbt_init_bare(
129 struct xrep_newbt *xnr,
130 struct xfs_scrub *sc)
131{
132 xrep_newbt_init_ag(xnr, sc, &XFS_RMAP_OINFO_ANY_OWNER, NULLFSBLOCK,
133 XFS_AG_RESV_NONE);
134}
135
136/*
137 * Designate specific blocks to be used to build our new btree. @pag must be
138 * a passive reference.
139 */
140STATIC int
141xrep_newbt_add_blocks(
142 struct xrep_newbt *xnr,
143 struct xfs_perag *pag,
144 const struct xfs_alloc_arg *args)
145{
146 struct xfs_mount *mp = xnr->sc->mp;
147 struct xrep_newbt_resv *resv;
148 int error;
149
150 resv = kmalloc(sizeof(struct xrep_newbt_resv), XCHK_GFP_FLAGS);
151 if (!resv)
152 return -ENOMEM;
153
154 INIT_LIST_HEAD(&resv->list);
155 resv->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
156 resv->len = args->len;
157 resv->used = 0;
158 resv->pag = xfs_perag_hold(pag);
159
160 if (args->tp) {
161 ASSERT(xnr->oinfo.oi_offset == 0);
162
163 error = xfs_alloc_schedule_autoreap(args,
164 XFS_FREE_EXTENT_SKIP_DISCARD, &resv->autoreap);
165 if (error)
166 goto out_pag;
167 }
168
169 list_add_tail(&resv->list, &xnr->resv_list);
170 return 0;
171out_pag:
172 xfs_perag_put(resv->pag);
173 kfree(resv);
174 return error;
175}
176
177/*
178 * Add an extent to the new btree reservation pool. Callers are required to
179 * reap this reservation manually if the repair is cancelled. @pag must be a
180 * passive reference.
181 */
182int
183xrep_newbt_add_extent(
184 struct xrep_newbt *xnr,
185 struct xfs_perag *pag,
186 xfs_agblock_t agbno,
187 xfs_extlen_t len)
188{
189 struct xfs_alloc_arg args = {
190 .tp = NULL, /* no autoreap */
191 .oinfo = xnr->oinfo,
192 .fsbno = xfs_agbno_to_fsb(pag, agbno),
193 .len = len,
194 .resv = xnr->resv,
195 };
196
197 return xrep_newbt_add_blocks(xnr, pag, &args);
198}
199
200/* Don't let our allocation hint take us beyond this AG */
201static inline void
202xrep_newbt_validate_ag_alloc_hint(
203 struct xrep_newbt *xnr)
204{
205 struct xfs_scrub *sc = xnr->sc;
206 xfs_agnumber_t agno = XFS_FSB_TO_AGNO(sc->mp, xnr->alloc_hint);
207
208 if (agno == pag_agno(sc->sa.pag) &&
209 xfs_verify_fsbno(sc->mp, xnr->alloc_hint))
210 return;
211
212 xnr->alloc_hint =
213 xfs_agbno_to_fsb(sc->sa.pag, XFS_AGFL_BLOCK(sc->mp) + 1);
214}
215
216/* Allocate disk space for a new per-AG btree. */
217STATIC int
218xrep_newbt_alloc_ag_blocks(
219 struct xrep_newbt *xnr,
220 uint64_t nr_blocks)
221{
222 struct xfs_scrub *sc = xnr->sc;
223 struct xfs_mount *mp = sc->mp;
224 int error = 0;
225
226 ASSERT(sc->sa.pag != NULL);
227
228 while (nr_blocks > 0) {
229 struct xfs_alloc_arg args = {
230 .tp = sc->tp,
231 .mp = mp,
232 .oinfo = xnr->oinfo,
233 .minlen = 1,
234 .maxlen = nr_blocks,
235 .prod = 1,
236 .resv = xnr->resv,
237 };
238 xfs_agnumber_t agno;
239
240 xrep_newbt_validate_ag_alloc_hint(xnr);
241
242 if (xnr->alloc_vextent)
243 error = xnr->alloc_vextent(sc, &args, xnr->alloc_hint);
244 else
245 error = xfs_alloc_vextent_near_bno(&args,
246 xnr->alloc_hint);
247 if (error)
248 return error;
249 if (args.fsbno == NULLFSBLOCK)
250 return -ENOSPC;
251
252 agno = XFS_FSB_TO_AGNO(mp, args.fsbno);
253 if (agno != pag_agno(sc->sa.pag)) {
254 ASSERT(agno == pag_agno(sc->sa.pag));
255 return -EFSCORRUPTED;
256 }
257
258 trace_xrep_newbt_alloc_ag_blocks(sc->sa.pag,
259 XFS_FSB_TO_AGBNO(mp, args.fsbno), args.len,
260 xnr->oinfo.oi_owner);
261
262 error = xrep_newbt_add_blocks(xnr, sc->sa.pag, &args);
263 if (error)
264 return error;
265
266 nr_blocks -= args.len;
267 xnr->alloc_hint = args.fsbno + args.len;
268
269 error = xrep_defer_finish(sc);
270 if (error)
271 return error;
272 }
273
274 return 0;
275}
276
277/* Don't let our allocation hint take us beyond EOFS */
278static inline void
279xrep_newbt_validate_file_alloc_hint(
280 struct xrep_newbt *xnr)
281{
282 struct xfs_scrub *sc = xnr->sc;
283
284 if (xfs_verify_fsbno(sc->mp, xnr->alloc_hint))
285 return;
286
287 xnr->alloc_hint = XFS_AGB_TO_FSB(sc->mp, 0, XFS_AGFL_BLOCK(sc->mp) + 1);
288}
289
290/* Allocate disk space for our new file-based btree. */
291STATIC int
292xrep_newbt_alloc_file_blocks(
293 struct xrep_newbt *xnr,
294 uint64_t nr_blocks)
295{
296 struct xfs_scrub *sc = xnr->sc;
297 struct xfs_mount *mp = sc->mp;
298 int error = 0;
299
300 while (nr_blocks > 0) {
301 struct xfs_alloc_arg args = {
302 .tp = sc->tp,
303 .mp = mp,
304 .oinfo = xnr->oinfo,
305 .minlen = 1,
306 .maxlen = nr_blocks,
307 .prod = 1,
308 .resv = xnr->resv,
309 };
310 struct xfs_perag *pag;
311 xfs_agnumber_t agno;
312
313 xrep_newbt_validate_file_alloc_hint(xnr);
314
315 if (xnr->alloc_vextent)
316 error = xnr->alloc_vextent(sc, &args, xnr->alloc_hint);
317 else
318 error = xfs_alloc_vextent_start_ag(&args,
319 xnr->alloc_hint);
320 if (error)
321 return error;
322 if (args.fsbno == NULLFSBLOCK)
323 return -ENOSPC;
324
325 agno = XFS_FSB_TO_AGNO(mp, args.fsbno);
326
327 pag = xfs_perag_get(mp, agno);
328 if (!pag) {
329 ASSERT(0);
330 return -EFSCORRUPTED;
331 }
332
333 trace_xrep_newbt_alloc_file_blocks(pag,
334 XFS_FSB_TO_AGBNO(mp, args.fsbno), args.len,
335 xnr->oinfo.oi_owner);
336
337 error = xrep_newbt_add_blocks(xnr, pag, &args);
338 xfs_perag_put(pag);
339 if (error)
340 return error;
341
342 nr_blocks -= args.len;
343 xnr->alloc_hint = args.fsbno + args.len;
344
345 error = xrep_defer_finish(sc);
346 if (error)
347 return error;
348 }
349
350 return 0;
351}
352
353/* Allocate disk space for our new btree. */
354int
355xrep_newbt_alloc_blocks(
356 struct xrep_newbt *xnr,
357 uint64_t nr_blocks)
358{
359 if (xnr->sc->ip)
360 return xrep_newbt_alloc_file_blocks(xnr, nr_blocks);
361 return xrep_newbt_alloc_ag_blocks(xnr, nr_blocks);
362}
363
364/*
365 * Free the unused part of a space extent that was reserved for a new ondisk
366 * structure. Returns the number of EFIs logged or a negative errno.
367 */
368STATIC int
369xrep_newbt_free_extent(
370 struct xrep_newbt *xnr,
371 struct xrep_newbt_resv *resv,
372 bool btree_committed)
373{
374 struct xfs_scrub *sc = xnr->sc;
375 xfs_agblock_t free_agbno = resv->agbno;
376 xfs_extlen_t free_aglen = resv->len;
377 int error;
378
379 if (!btree_committed || resv->used == 0) {
380 /*
381 * If we're not committing a new btree or we didn't use the
382 * space reservation, let the existing EFI free the entire
383 * space extent.
384 */
385 trace_xrep_newbt_free_blocks(resv->pag, free_agbno, free_aglen,
386 xnr->oinfo.oi_owner);
387 xfs_alloc_commit_autoreap(sc->tp, &resv->autoreap);
388 return 1;
389 }
390
391 /*
392 * We used space and committed the btree. Cancel the autoreap, remove
393 * the written blocks from the reservation, and possibly log a new EFI
394 * to free any unused reservation space.
395 */
396 xfs_alloc_cancel_autoreap(sc->tp, &resv->autoreap);
397 free_agbno += resv->used;
398 free_aglen -= resv->used;
399
400 if (free_aglen == 0)
401 return 0;
402
403 trace_xrep_newbt_free_blocks(resv->pag, free_agbno, free_aglen,
404 xnr->oinfo.oi_owner);
405
406 ASSERT(xnr->resv != XFS_AG_RESV_AGFL);
407 ASSERT(xnr->resv != XFS_AG_RESV_IGNORE);
408
409 /*
410 * Use EFIs to free the reservations. This reduces the chance
411 * that we leak blocks if the system goes down.
412 */
413 error = xfs_free_extent_later(sc->tp,
414 xfs_agbno_to_fsb(resv->pag, free_agbno), free_aglen,
415 &xnr->oinfo, xnr->resv, XFS_FREE_EXTENT_SKIP_DISCARD);
416 if (error)
417 return error;
418
419 return 1;
420}
421
422/* Free all the accounting info and disk space we reserved for a new btree. */
423STATIC int
424xrep_newbt_free(
425 struct xrep_newbt *xnr,
426 bool btree_committed)
427{
428 struct xfs_scrub *sc = xnr->sc;
429 struct xrep_newbt_resv *resv, *n;
430 unsigned int freed = 0;
431 int error = 0;
432
433 /*
434 * If the filesystem already went down, we can't free the blocks. Skip
435 * ahead to freeing the incore metadata because we can't fix anything.
436 */
437 if (xfs_is_shutdown(sc->mp))
438 goto junkit;
439
440 list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
441 int ret;
442
443 ret = xrep_newbt_free_extent(xnr, resv, btree_committed);
444 list_del(&resv->list);
445 xfs_perag_put(resv->pag);
446 kfree(resv);
447 if (ret < 0) {
448 error = ret;
449 goto junkit;
450 }
451
452 freed += ret;
453 if (freed >= XREP_MAX_ITRUNCATE_EFIS) {
454 error = xrep_defer_finish(sc);
455 if (error)
456 goto junkit;
457 freed = 0;
458 }
459 }
460
461 if (freed)
462 error = xrep_defer_finish(sc);
463
464junkit:
465 /*
466 * If we still have reservations attached to @newbt, cleanup must have
467 * failed and the filesystem is about to go down. Clean up the incore
468 * reservations and try to commit to freeing the space we used.
469 */
470 list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
471 xfs_alloc_commit_autoreap(sc->tp, &resv->autoreap);
472 list_del(&resv->list);
473 xfs_perag_put(resv->pag);
474 kfree(resv);
475 }
476
477 if (sc->ip) {
478 kmem_cache_free(xfs_ifork_cache, xnr->ifake.if_fork);
479 xnr->ifake.if_fork = NULL;
480 }
481
482 return error;
483}
484
485/*
486 * Free all the accounting info and unused disk space allocations after
487 * committing a new btree.
488 */
489int
490xrep_newbt_commit(
491 struct xrep_newbt *xnr)
492{
493 return xrep_newbt_free(xnr, true);
494}
495
496/*
497 * Free all the accounting info and all of the disk space we reserved for a new
498 * btree that we're not going to commit. We want to try to roll things back
499 * cleanly for things like ENOSPC midway through allocation.
500 */
501void
502xrep_newbt_cancel(
503 struct xrep_newbt *xnr)
504{
505 xrep_newbt_free(xnr, false);
506}
507
508/* Feed one of the reserved btree blocks to the bulk loader. */
509int
510xrep_newbt_claim_block(
511 struct xfs_btree_cur *cur,
512 struct xrep_newbt *xnr,
513 union xfs_btree_ptr *ptr)
514{
515 struct xrep_newbt_resv *resv;
516 xfs_agblock_t agbno;
517
518 /*
519 * The first item in the list should always have a free block unless
520 * we're completely out.
521 */
522 resv = list_first_entry(&xnr->resv_list, struct xrep_newbt_resv, list);
523 if (resv->used == resv->len)
524 return -ENOSPC;
525
526 /*
527 * Peel off a block from the start of the reservation. We allocate
528 * blocks in order to place blocks on disk in increasing record or key
529 * order. The block reservations tend to end up on the list in
530 * decreasing order, which hopefully results in leaf blocks ending up
531 * together.
532 */
533 agbno = resv->agbno + resv->used;
534 resv->used++;
535
536 /* If we used all the blocks in this reservation, move it to the end. */
537 if (resv->used == resv->len)
538 list_move_tail(&resv->list, &xnr->resv_list);
539
540 trace_xrep_newbt_claim_block(resv->pag, agbno, 1, xnr->oinfo.oi_owner);
541
542 if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN)
543 ptr->l = cpu_to_be64(xfs_agbno_to_fsb(resv->pag, agbno));
544 else
545 ptr->s = cpu_to_be32(agbno);
546
547 /* Relog all the EFIs. */
548 return xrep_defer_finish(xnr->sc);
549}
550
551/* How many reserved blocks are unused? */
552unsigned int
553xrep_newbt_unused_blocks(
554 struct xrep_newbt *xnr)
555{
556 struct xrep_newbt_resv *resv;
557 unsigned int unused = 0;
558
559 list_for_each_entry(resv, &xnr->resv_list, list)
560 unused += resv->len - resv->used;
561 return unused;
562}