newbt.c - fs/xfs/scrub/newbt.c - Linux diff v6.9.4 - Bootlin Elixir Cross Referencer

  1// SPDX-License-Identifier: GPL-2.0-or-later
  2/*
  3 * Copyright (C) 2022-2023 Oracle.  All Rights Reserved.
  4 * Author: Darrick J. Wong <djwong@kernel.org>
  5 */
  6#include "xfs.h"
  7#include "xfs_fs.h"
  8#include "xfs_shared.h"
  9#include "xfs_format.h"
 10#include "xfs_trans_resv.h"
 11#include "xfs_mount.h"
 12#include "xfs_btree.h"
 13#include "xfs_btree_staging.h"
 14#include "xfs_log_format.h"
 15#include "xfs_trans.h"
 16#include "xfs_sb.h"
 17#include "xfs_inode.h"
 18#include "xfs_alloc.h"
 19#include "xfs_rmap.h"
 20#include "xfs_ag.h"
 21#include "xfs_defer.h"
 22#include "scrub/scrub.h"
 23#include "scrub/common.h"
 24#include "scrub/trace.h"
 25#include "scrub/repair.h"
 26#include "scrub/newbt.h"
 27
 28/*
 29 * Estimate proper slack values for a btree that's being reloaded.
 30 *
 31 * Under most circumstances, we'll take whatever default loading value the
 32 * btree bulk loading code calculates for us.  However, there are some
 33 * exceptions to this rule:
 34 *
 35 * (0) If someone turned one of the debug knobs.
 36 * (1) If this is a per-AG btree and the AG has less than 10% space free.
 37 * (2) If this is an inode btree and the FS has less than 10% space free.
 38
 39 * In either case, format the new btree blocks almost completely full to
 40 * minimize space usage.
 41 */
 42static void
 43xrep_newbt_estimate_slack(
 44	struct xrep_newbt	*xnr)
 45{
 46	struct xfs_scrub	*sc = xnr->sc;
 47	struct xfs_btree_bload	*bload = &xnr->bload;
 48	uint64_t		free;
 49	uint64_t		sz;
 50
 51	/*
 52	 * The xfs_globals values are set to -1 (i.e. take the bload defaults)
 53	 * unless someone has set them otherwise, so we just pull the values
 54	 * here.
 55	 */
 56	bload->leaf_slack = xfs_globals.bload_leaf_slack;
 57	bload->node_slack = xfs_globals.bload_node_slack;
 58
 59	if (sc->ops->type == ST_PERAG) {
 60		free = sc->sa.pag->pagf_freeblks;
 61		sz = xfs_ag_block_count(sc->mp, sc->sa.pag->pag_agno);
 62	} else {
 63		free = percpu_counter_sum(&sc->mp->m_fdblocks);
 64		sz = sc->mp->m_sb.sb_dblocks;
 65	}
 66
 67	/* No further changes if there's more than 10% free space left. */
 68	if (free >= div_u64(sz, 10))
 69		return;
 70
 71	/*
 72	 * We're low on space; load the btrees as tightly as possible.  Leave
 73	 * a couple of open slots in each btree block so that we don't end up
 74	 * splitting the btrees like crazy after a mount.
 75	 */
 76	if (bload->leaf_slack < 0)
 77		bload->leaf_slack = 2;
 78	if (bload->node_slack < 0)
 79		bload->node_slack = 2;
 80}
 81
 82/* Initialize accounting resources for staging a new AG btree. */
 83void
 84xrep_newbt_init_ag(
 85	struct xrep_newbt		*xnr,
 86	struct xfs_scrub		*sc,
 87	const struct xfs_owner_info	*oinfo,
 88	xfs_fsblock_t			alloc_hint,
 89	enum xfs_ag_resv_type		resv)
 90{
 91	memset(xnr, 0, sizeof(struct xrep_newbt));
 92	xnr->sc = sc;
 93	xnr->oinfo = *oinfo; /* structure copy */
 94	xnr->alloc_hint = alloc_hint;
 95	xnr->resv = resv;
 96	INIT_LIST_HEAD(&xnr->resv_list);
 97	xnr->bload.max_dirty = XFS_B_TO_FSBT(sc->mp, 256U << 10); /* 256K */
 98	xrep_newbt_estimate_slack(xnr);
 99}
100
101/* Initialize accounting resources for staging a new inode fork btree. */
102int
103xrep_newbt_init_inode(
104	struct xrep_newbt		*xnr,
105	struct xfs_scrub		*sc,
106	int				whichfork,
107	const struct xfs_owner_info	*oinfo)
108{
109	struct xfs_ifork		*ifp;
110
111	ifp = kmem_cache_zalloc(xfs_ifork_cache, XCHK_GFP_FLAGS);
112	if (!ifp)
113		return -ENOMEM;
114
115	xrep_newbt_init_ag(xnr, sc, oinfo,
116			XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino),
117			XFS_AG_RESV_NONE);
118	xnr->ifake.if_fork = ifp;
119	xnr->ifake.if_fork_size = xfs_inode_fork_size(sc->ip, whichfork);
120	return 0;
121}
122
123/*
124 * Initialize accounting resources for staging a new btree.  Callers are
125 * expected to add their own reservations (and clean them up) manually.
126 */
127void
128xrep_newbt_init_bare(
129	struct xrep_newbt		*xnr,
130	struct xfs_scrub		*sc)
131{
132	xrep_newbt_init_ag(xnr, sc, &XFS_RMAP_OINFO_ANY_OWNER, NULLFSBLOCK,
133			XFS_AG_RESV_NONE);
134}
135
136/*
137 * Designate specific blocks to be used to build our new btree.  @pag must be
138 * a passive reference.
139 */
140STATIC int
141xrep_newbt_add_blocks(
142	struct xrep_newbt		*xnr,
143	struct xfs_perag		*pag,
144	const struct xfs_alloc_arg	*args)
145{
146	struct xfs_mount		*mp = xnr->sc->mp;
147	struct xrep_newbt_resv		*resv;
148	int				error;
149
150	resv = kmalloc(sizeof(struct xrep_newbt_resv), XCHK_GFP_FLAGS);
151	if (!resv)
152		return -ENOMEM;
153
154	INIT_LIST_HEAD(&resv->list);
155	resv->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
156	resv->len = args->len;
157	resv->used = 0;
158	resv->pag = xfs_perag_hold(pag);
159
160	if (args->tp) {
161		ASSERT(xnr->oinfo.oi_offset == 0);
162
163		error = xfs_alloc_schedule_autoreap(args, true, &resv->autoreap);
 
164		if (error)
165			goto out_pag;
166	}
167
168	list_add_tail(&resv->list, &xnr->resv_list);
169	return 0;
170out_pag:
171	xfs_perag_put(resv->pag);
172	kfree(resv);
173	return error;
174}
175
176/*
177 * Add an extent to the new btree reservation pool.  Callers are required to
178 * reap this reservation manually if the repair is cancelled.  @pag must be a
179 * passive reference.
180 */
181int
182xrep_newbt_add_extent(
183	struct xrep_newbt	*xnr,
184	struct xfs_perag	*pag,
185	xfs_agblock_t		agbno,
186	xfs_extlen_t		len)
187{
188	struct xfs_mount	*mp = xnr->sc->mp;
189	struct xfs_alloc_arg	args = {
190		.tp		= NULL, /* no autoreap */
191		.oinfo		= xnr->oinfo,
192		.fsbno		= XFS_AGB_TO_FSB(mp, pag->pag_agno, agbno),
193		.len		= len,
194		.resv		= xnr->resv,
195	};
196
197	return xrep_newbt_add_blocks(xnr, pag, &args);
198}
199
200/* Don't let our allocation hint take us beyond this AG */
201static inline void
202xrep_newbt_validate_ag_alloc_hint(
203	struct xrep_newbt	*xnr)
204{
205	struct xfs_scrub	*sc = xnr->sc;
206	xfs_agnumber_t		agno = XFS_FSB_TO_AGNO(sc->mp, xnr->alloc_hint);
207
208	if (agno == sc->sa.pag->pag_agno &&
209	    xfs_verify_fsbno(sc->mp, xnr->alloc_hint))
210		return;
211
212	xnr->alloc_hint = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno,
213					 XFS_AGFL_BLOCK(sc->mp) + 1);
214}
215
216/* Allocate disk space for a new per-AG btree. */
217STATIC int
218xrep_newbt_alloc_ag_blocks(
219	struct xrep_newbt	*xnr,
220	uint64_t		nr_blocks)
221{
222	struct xfs_scrub	*sc = xnr->sc;
223	struct xfs_mount	*mp = sc->mp;
224	int			error = 0;
225
226	ASSERT(sc->sa.pag != NULL);
227
228	while (nr_blocks > 0) {
229		struct xfs_alloc_arg	args = {
230			.tp		= sc->tp,
231			.mp		= mp,
232			.oinfo		= xnr->oinfo,
233			.minlen		= 1,
234			.maxlen		= nr_blocks,
235			.prod		= 1,
236			.resv		= xnr->resv,
237		};
238		xfs_agnumber_t		agno;
239
240		xrep_newbt_validate_ag_alloc_hint(xnr);
241
242		if (xnr->alloc_vextent)
243			error = xnr->alloc_vextent(sc, &args, xnr->alloc_hint);
244		else
245			error = xfs_alloc_vextent_near_bno(&args,
246					xnr->alloc_hint);
247		if (error)
248			return error;
249		if (args.fsbno == NULLFSBLOCK)
250			return -ENOSPC;
251
252		agno = XFS_FSB_TO_AGNO(mp, args.fsbno);
 
 
 
 
253
254		trace_xrep_newbt_alloc_ag_blocks(mp, agno,
255				XFS_FSB_TO_AGBNO(mp, args.fsbno), args.len,
256				xnr->oinfo.oi_owner);
257
258		if (agno != sc->sa.pag->pag_agno) {
259			ASSERT(agno == sc->sa.pag->pag_agno);
260			return -EFSCORRUPTED;
261		}
262
263		error = xrep_newbt_add_blocks(xnr, sc->sa.pag, &args);
264		if (error)
265			return error;
266
267		nr_blocks -= args.len;
268		xnr->alloc_hint = args.fsbno + args.len;
269
270		error = xrep_defer_finish(sc);
271		if (error)
272			return error;
273	}
274
275	return 0;
276}
277
278/* Don't let our allocation hint take us beyond EOFS */
279static inline void
280xrep_newbt_validate_file_alloc_hint(
281	struct xrep_newbt	*xnr)
282{
283	struct xfs_scrub	*sc = xnr->sc;
284
285	if (xfs_verify_fsbno(sc->mp, xnr->alloc_hint))
286		return;
287
288	xnr->alloc_hint = XFS_AGB_TO_FSB(sc->mp, 0, XFS_AGFL_BLOCK(sc->mp) + 1);
289}
290
291/* Allocate disk space for our new file-based btree. */
292STATIC int
293xrep_newbt_alloc_file_blocks(
294	struct xrep_newbt	*xnr,
295	uint64_t		nr_blocks)
296{
297	struct xfs_scrub	*sc = xnr->sc;
298	struct xfs_mount	*mp = sc->mp;
299	int			error = 0;
300
301	while (nr_blocks > 0) {
302		struct xfs_alloc_arg	args = {
303			.tp		= sc->tp,
304			.mp		= mp,
305			.oinfo		= xnr->oinfo,
306			.minlen		= 1,
307			.maxlen		= nr_blocks,
308			.prod		= 1,
309			.resv		= xnr->resv,
310		};
311		struct xfs_perag	*pag;
312		xfs_agnumber_t		agno;
313
314		xrep_newbt_validate_file_alloc_hint(xnr);
315
316		if (xnr->alloc_vextent)
317			error = xnr->alloc_vextent(sc, &args, xnr->alloc_hint);
318		else
319			error = xfs_alloc_vextent_start_ag(&args,
320					xnr->alloc_hint);
321		if (error)
322			return error;
323		if (args.fsbno == NULLFSBLOCK)
324			return -ENOSPC;
325
326		agno = XFS_FSB_TO_AGNO(mp, args.fsbno);
327
328		trace_xrep_newbt_alloc_file_blocks(mp, agno,
329				XFS_FSB_TO_AGBNO(mp, args.fsbno), args.len,
330				xnr->oinfo.oi_owner);
331
332		pag = xfs_perag_get(mp, agno);
333		if (!pag) {
334			ASSERT(0);
335			return -EFSCORRUPTED;
336		}
337
 
 
 
 
338		error = xrep_newbt_add_blocks(xnr, pag, &args);
339		xfs_perag_put(pag);
340		if (error)
341			return error;
342
343		nr_blocks -= args.len;
344		xnr->alloc_hint = args.fsbno + args.len;
345
346		error = xrep_defer_finish(sc);
347		if (error)
348			return error;
349	}
350
351	return 0;
352}
353
354/* Allocate disk space for our new btree. */
355int
356xrep_newbt_alloc_blocks(
357	struct xrep_newbt	*xnr,
358	uint64_t		nr_blocks)
359{
360	if (xnr->sc->ip)
361		return xrep_newbt_alloc_file_blocks(xnr, nr_blocks);
362	return xrep_newbt_alloc_ag_blocks(xnr, nr_blocks);
363}
364
365/*
366 * Free the unused part of a space extent that was reserved for a new ondisk
367 * structure.  Returns the number of EFIs logged or a negative errno.
368 */
369STATIC int
370xrep_newbt_free_extent(
371	struct xrep_newbt	*xnr,
372	struct xrep_newbt_resv	*resv,
373	bool			btree_committed)
374{
375	struct xfs_scrub	*sc = xnr->sc;
376	xfs_agblock_t		free_agbno = resv->agbno;
377	xfs_extlen_t		free_aglen = resv->len;
378	xfs_fsblock_t		fsbno;
379	int			error;
380
381	if (!btree_committed || resv->used == 0) {
382		/*
383		 * If we're not committing a new btree or we didn't use the
384		 * space reservation, let the existing EFI free the entire
385		 * space extent.
386		 */
387		trace_xrep_newbt_free_blocks(sc->mp, resv->pag->pag_agno,
388				free_agbno, free_aglen, xnr->oinfo.oi_owner);
389		xfs_alloc_commit_autoreap(sc->tp, &resv->autoreap);
390		return 1;
391	}
392
393	/*
394	 * We used space and committed the btree.  Cancel the autoreap, remove
395	 * the written blocks from the reservation, and possibly log a new EFI
396	 * to free any unused reservation space.
397	 */
398	xfs_alloc_cancel_autoreap(sc->tp, &resv->autoreap);
399	free_agbno += resv->used;
400	free_aglen -= resv->used;
401
402	if (free_aglen == 0)
403		return 0;
404
405	trace_xrep_newbt_free_blocks(sc->mp, resv->pag->pag_agno, free_agbno,
406			free_aglen, xnr->oinfo.oi_owner);
407
408	ASSERT(xnr->resv != XFS_AG_RESV_AGFL);
409	ASSERT(xnr->resv != XFS_AG_RESV_IGNORE);
410
411	/*
412	 * Use EFIs to free the reservations.  This reduces the chance
413	 * that we leak blocks if the system goes down.
414	 */
415	fsbno = XFS_AGB_TO_FSB(sc->mp, resv->pag->pag_agno, free_agbno);
416	error = xfs_free_extent_later(sc->tp, fsbno, free_aglen, &xnr->oinfo,
417			xnr->resv, true);
418	if (error)
419		return error;
420
421	return 1;
422}
423
424/* Free all the accounting info and disk space we reserved for a new btree. */
425STATIC int
426xrep_newbt_free(
427	struct xrep_newbt	*xnr,
428	bool			btree_committed)
429{
430	struct xfs_scrub	*sc = xnr->sc;
431	struct xrep_newbt_resv	*resv, *n;
432	unsigned int		freed = 0;
433	int			error = 0;
434
435	/*
436	 * If the filesystem already went down, we can't free the blocks.  Skip
437	 * ahead to freeing the incore metadata because we can't fix anything.
438	 */
439	if (xfs_is_shutdown(sc->mp))
440		goto junkit;
441
442	list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
443		int		ret;
444
445		ret = xrep_newbt_free_extent(xnr, resv, btree_committed);
446		list_del(&resv->list);
447		xfs_perag_put(resv->pag);
448		kfree(resv);
449		if (ret < 0) {
450			error = ret;
451			goto junkit;
452		}
453
454		freed += ret;
455		if (freed >= XREP_MAX_ITRUNCATE_EFIS) {
456			error = xrep_defer_finish(sc);
457			if (error)
458				goto junkit;
459			freed = 0;
460		}
461	}
462
463	if (freed)
464		error = xrep_defer_finish(sc);
465
466junkit:
467	/*
468	 * If we still have reservations attached to @newbt, cleanup must have
469	 * failed and the filesystem is about to go down.  Clean up the incore
470	 * reservations and try to commit to freeing the space we used.
471	 */
472	list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
473		xfs_alloc_commit_autoreap(sc->tp, &resv->autoreap);
474		list_del(&resv->list);
475		xfs_perag_put(resv->pag);
476		kfree(resv);
477	}
478
479	if (sc->ip) {
480		kmem_cache_free(xfs_ifork_cache, xnr->ifake.if_fork);
481		xnr->ifake.if_fork = NULL;
482	}
483
484	return error;
485}
486
487/*
488 * Free all the accounting info and unused disk space allocations after
489 * committing a new btree.
490 */
491int
492xrep_newbt_commit(
493	struct xrep_newbt	*xnr)
494{
495	return xrep_newbt_free(xnr, true);
496}
497
498/*
499 * Free all the accounting info and all of the disk space we reserved for a new
500 * btree that we're not going to commit.  We want to try to roll things back
501 * cleanly for things like ENOSPC midway through allocation.
502 */
503void
504xrep_newbt_cancel(
505	struct xrep_newbt	*xnr)
506{
507	xrep_newbt_free(xnr, false);
508}
509
510/* Feed one of the reserved btree blocks to the bulk loader. */
511int
512xrep_newbt_claim_block(
513	struct xfs_btree_cur	*cur,
514	struct xrep_newbt	*xnr,
515	union xfs_btree_ptr	*ptr)
516{
517	struct xrep_newbt_resv	*resv;
518	struct xfs_mount	*mp = cur->bc_mp;
519	xfs_agblock_t		agbno;
520
521	/*
522	 * The first item in the list should always have a free block unless
523	 * we're completely out.
524	 */
525	resv = list_first_entry(&xnr->resv_list, struct xrep_newbt_resv, list);
526	if (resv->used == resv->len)
527		return -ENOSPC;
528
529	/*
530	 * Peel off a block from the start of the reservation.  We allocate
531	 * blocks in order to place blocks on disk in increasing record or key
532	 * order.  The block reservations tend to end up on the list in
533	 * decreasing order, which hopefully results in leaf blocks ending up
534	 * together.
535	 */
536	agbno = resv->agbno + resv->used;
537	resv->used++;
538
539	/* If we used all the blocks in this reservation, move it to the end. */
540	if (resv->used == resv->len)
541		list_move_tail(&resv->list, &xnr->resv_list);
542
543	trace_xrep_newbt_claim_block(mp, resv->pag->pag_agno, agbno, 1,
544			xnr->oinfo.oi_owner);
545
546	if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN)
547		ptr->l = cpu_to_be64(XFS_AGB_TO_FSB(mp, resv->pag->pag_agno,
548								agbno));
549	else
550		ptr->s = cpu_to_be32(agbno);
551
552	/* Relog all the EFIs. */
553	return xrep_defer_finish(xnr->sc);
554}
555
556/* How many reserved blocks are unused? */
557unsigned int
558xrep_newbt_unused_blocks(
559	struct xrep_newbt	*xnr)
560{
561	struct xrep_newbt_resv	*resv;
562	unsigned int		unused = 0;
563
564	list_for_each_entry(resv, &xnr->resv_list, list)
565		unused += resv->len - resv->used;
566	return unused;
567}

  1// SPDX-License-Identifier: GPL-2.0-or-later
  2/*
  3 * Copyright (C) 2022-2023 Oracle.  All Rights Reserved.
  4 * Author: Darrick J. Wong <djwong@kernel.org>
  5 */
  6#include "xfs.h"
  7#include "xfs_fs.h"
  8#include "xfs_shared.h"
  9#include "xfs_format.h"
 10#include "xfs_trans_resv.h"
 11#include "xfs_mount.h"
 12#include "xfs_btree.h"
 13#include "xfs_btree_staging.h"
 14#include "xfs_log_format.h"
 15#include "xfs_trans.h"
 16#include "xfs_sb.h"
 17#include "xfs_inode.h"
 18#include "xfs_alloc.h"
 19#include "xfs_rmap.h"
 20#include "xfs_ag.h"
 21#include "xfs_defer.h"
 22#include "scrub/scrub.h"
 23#include "scrub/common.h"
 24#include "scrub/trace.h"
 25#include "scrub/repair.h"
 26#include "scrub/newbt.h"
 27
 28/*
 29 * Estimate proper slack values for a btree that's being reloaded.
 30 *
 31 * Under most circumstances, we'll take whatever default loading value the
 32 * btree bulk loading code calculates for us.  However, there are some
 33 * exceptions to this rule:
 34 *
 35 * (0) If someone turned one of the debug knobs.
 36 * (1) If this is a per-AG btree and the AG has less than 10% space free.
 37 * (2) If this is an inode btree and the FS has less than 10% space free.
 38
 39 * In either case, format the new btree blocks almost completely full to
 40 * minimize space usage.
 41 */
 42static void
 43xrep_newbt_estimate_slack(
 44	struct xrep_newbt	*xnr)
 45{
 46	struct xfs_scrub	*sc = xnr->sc;
 47	struct xfs_btree_bload	*bload = &xnr->bload;
 48	uint64_t		free;
 49	uint64_t		sz;
 50
 51	/*
 52	 * The xfs_globals values are set to -1 (i.e. take the bload defaults)
 53	 * unless someone has set them otherwise, so we just pull the values
 54	 * here.
 55	 */
 56	bload->leaf_slack = xfs_globals.bload_leaf_slack;
 57	bload->node_slack = xfs_globals.bload_node_slack;
 58
 59	if (sc->ops->type == ST_PERAG) {
 60		free = sc->sa.pag->pagf_freeblks;
 61		sz = xfs_ag_block_count(sc->mp, pag_agno(sc->sa.pag));
 62	} else {
 63		free = percpu_counter_sum(&sc->mp->m_fdblocks);
 64		sz = sc->mp->m_sb.sb_dblocks;
 65	}
 66
 67	/* No further changes if there's more than 10% free space left. */
 68	if (free >= div_u64(sz, 10))
 69		return;
 70
 71	/*
 72	 * We're low on space; load the btrees as tightly as possible.  Leave
 73	 * a couple of open slots in each btree block so that we don't end up
 74	 * splitting the btrees like crazy after a mount.
 75	 */
 76	if (bload->leaf_slack < 0)
 77		bload->leaf_slack = 2;
 78	if (bload->node_slack < 0)
 79		bload->node_slack = 2;
 80}
 81
 82/* Initialize accounting resources for staging a new AG btree. */
 83void
 84xrep_newbt_init_ag(
 85	struct xrep_newbt		*xnr,
 86	struct xfs_scrub		*sc,
 87	const struct xfs_owner_info	*oinfo,
 88	xfs_fsblock_t			alloc_hint,
 89	enum xfs_ag_resv_type		resv)
 90{
 91	memset(xnr, 0, sizeof(struct xrep_newbt));
 92	xnr->sc = sc;
 93	xnr->oinfo = *oinfo; /* structure copy */
 94	xnr->alloc_hint = alloc_hint;
 95	xnr->resv = resv;
 96	INIT_LIST_HEAD(&xnr->resv_list);
 97	xnr->bload.max_dirty = XFS_B_TO_FSBT(sc->mp, 256U << 10); /* 256K */
 98	xrep_newbt_estimate_slack(xnr);
 99}
100
101/* Initialize accounting resources for staging a new inode fork btree. */
102int
103xrep_newbt_init_inode(
104	struct xrep_newbt		*xnr,
105	struct xfs_scrub		*sc,
106	int				whichfork,
107	const struct xfs_owner_info	*oinfo)
108{
109	struct xfs_ifork		*ifp;
110
111	ifp = kmem_cache_zalloc(xfs_ifork_cache, XCHK_GFP_FLAGS);
112	if (!ifp)
113		return -ENOMEM;
114
115	xrep_newbt_init_ag(xnr, sc, oinfo,
116			XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino),
117			XFS_AG_RESV_NONE);
118	xnr->ifake.if_fork = ifp;
119	xnr->ifake.if_fork_size = xfs_inode_fork_size(sc->ip, whichfork);
120	return 0;
121}
122
123/*
124 * Initialize accounting resources for staging a new btree.  Callers are
125 * expected to add their own reservations (and clean them up) manually.
126 */
127void
128xrep_newbt_init_bare(
129	struct xrep_newbt		*xnr,
130	struct xfs_scrub		*sc)
131{
132	xrep_newbt_init_ag(xnr, sc, &XFS_RMAP_OINFO_ANY_OWNER, NULLFSBLOCK,
133			XFS_AG_RESV_NONE);
134}
135
136/*
137 * Designate specific blocks to be used to build our new btree.  @pag must be
138 * a passive reference.
139 */
140STATIC int
141xrep_newbt_add_blocks(
142	struct xrep_newbt		*xnr,
143	struct xfs_perag		*pag,
144	const struct xfs_alloc_arg	*args)
145{
146	struct xfs_mount		*mp = xnr->sc->mp;
147	struct xrep_newbt_resv		*resv;
148	int				error;
149
150	resv = kmalloc(sizeof(struct xrep_newbt_resv), XCHK_GFP_FLAGS);
151	if (!resv)
152		return -ENOMEM;
153
154	INIT_LIST_HEAD(&resv->list);
155	resv->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
156	resv->len = args->len;
157	resv->used = 0;
158	resv->pag = xfs_perag_hold(pag);
159
160	if (args->tp) {
161		ASSERT(xnr->oinfo.oi_offset == 0);
162
163		error = xfs_alloc_schedule_autoreap(args,
164				XFS_FREE_EXTENT_SKIP_DISCARD, &resv->autoreap);
165		if (error)
166			goto out_pag;
167	}
168
169	list_add_tail(&resv->list, &xnr->resv_list);
170	return 0;
171out_pag:
172	xfs_perag_put(resv->pag);
173	kfree(resv);
174	return error;
175}
176
177/*
178 * Add an extent to the new btree reservation pool.  Callers are required to
179 * reap this reservation manually if the repair is cancelled.  @pag must be a
180 * passive reference.
181 */
182int
183xrep_newbt_add_extent(
184	struct xrep_newbt	*xnr,
185	struct xfs_perag	*pag,
186	xfs_agblock_t		agbno,
187	xfs_extlen_t		len)
188{
 
189	struct xfs_alloc_arg	args = {
190		.tp		= NULL, /* no autoreap */
191		.oinfo		= xnr->oinfo,
192		.fsbno		= xfs_agbno_to_fsb(pag, agbno),
193		.len		= len,
194		.resv		= xnr->resv,
195	};
196
197	return xrep_newbt_add_blocks(xnr, pag, &args);
198}
199
200/* Don't let our allocation hint take us beyond this AG */
201static inline void
202xrep_newbt_validate_ag_alloc_hint(
203	struct xrep_newbt	*xnr)
204{
205	struct xfs_scrub	*sc = xnr->sc;
206	xfs_agnumber_t		agno = XFS_FSB_TO_AGNO(sc->mp, xnr->alloc_hint);
207
208	if (agno == pag_agno(sc->sa.pag) &&
209	    xfs_verify_fsbno(sc->mp, xnr->alloc_hint))
210		return;
211
212	xnr->alloc_hint =
213		xfs_agbno_to_fsb(sc->sa.pag, XFS_AGFL_BLOCK(sc->mp) + 1);
214}
215
216/* Allocate disk space for a new per-AG btree. */
217STATIC int
218xrep_newbt_alloc_ag_blocks(
219	struct xrep_newbt	*xnr,
220	uint64_t		nr_blocks)
221{
222	struct xfs_scrub	*sc = xnr->sc;
223	struct xfs_mount	*mp = sc->mp;
224	int			error = 0;
225
226	ASSERT(sc->sa.pag != NULL);
227
228	while (nr_blocks > 0) {
229		struct xfs_alloc_arg	args = {
230			.tp		= sc->tp,
231			.mp		= mp,
232			.oinfo		= xnr->oinfo,
233			.minlen		= 1,
234			.maxlen		= nr_blocks,
235			.prod		= 1,
236			.resv		= xnr->resv,
237		};
238		xfs_agnumber_t		agno;
239
240		xrep_newbt_validate_ag_alloc_hint(xnr);
241
242		if (xnr->alloc_vextent)
243			error = xnr->alloc_vextent(sc, &args, xnr->alloc_hint);
244		else
245			error = xfs_alloc_vextent_near_bno(&args,
246					xnr->alloc_hint);
247		if (error)
248			return error;
249		if (args.fsbno == NULLFSBLOCK)
250			return -ENOSPC;
251
252		agno = XFS_FSB_TO_AGNO(mp, args.fsbno);
253		if (agno != pag_agno(sc->sa.pag)) {
254			ASSERT(agno == pag_agno(sc->sa.pag));
255			return -EFSCORRUPTED;
256		}
257
258		trace_xrep_newbt_alloc_ag_blocks(sc->sa.pag,
259				XFS_FSB_TO_AGBNO(mp, args.fsbno), args.len,
260				xnr->oinfo.oi_owner);
261
 
 
 
 
 
262		error = xrep_newbt_add_blocks(xnr, sc->sa.pag, &args);
263		if (error)
264			return error;
265
266		nr_blocks -= args.len;
267		xnr->alloc_hint = args.fsbno + args.len;
268
269		error = xrep_defer_finish(sc);
270		if (error)
271			return error;
272	}
273
274	return 0;
275}
276
277/* Don't let our allocation hint take us beyond EOFS */
278static inline void
279xrep_newbt_validate_file_alloc_hint(
280	struct xrep_newbt	*xnr)
281{
282	struct xfs_scrub	*sc = xnr->sc;
283
284	if (xfs_verify_fsbno(sc->mp, xnr->alloc_hint))
285		return;
286
287	xnr->alloc_hint = XFS_AGB_TO_FSB(sc->mp, 0, XFS_AGFL_BLOCK(sc->mp) + 1);
288}
289
290/* Allocate disk space for our new file-based btree. */
291STATIC int
292xrep_newbt_alloc_file_blocks(
293	struct xrep_newbt	*xnr,
294	uint64_t		nr_blocks)
295{
296	struct xfs_scrub	*sc = xnr->sc;
297	struct xfs_mount	*mp = sc->mp;
298	int			error = 0;
299
300	while (nr_blocks > 0) {
301		struct xfs_alloc_arg	args = {
302			.tp		= sc->tp,
303			.mp		= mp,
304			.oinfo		= xnr->oinfo,
305			.minlen		= 1,
306			.maxlen		= nr_blocks,
307			.prod		= 1,
308			.resv		= xnr->resv,
309		};
310		struct xfs_perag	*pag;
311		xfs_agnumber_t		agno;
312
313		xrep_newbt_validate_file_alloc_hint(xnr);
314
315		if (xnr->alloc_vextent)
316			error = xnr->alloc_vextent(sc, &args, xnr->alloc_hint);
317		else
318			error = xfs_alloc_vextent_start_ag(&args,
319					xnr->alloc_hint);
320		if (error)
321			return error;
322		if (args.fsbno == NULLFSBLOCK)
323			return -ENOSPC;
324
325		agno = XFS_FSB_TO_AGNO(mp, args.fsbno);
326
 
 
 
 
327		pag = xfs_perag_get(mp, agno);
328		if (!pag) {
329			ASSERT(0);
330			return -EFSCORRUPTED;
331		}
332
333		trace_xrep_newbt_alloc_file_blocks(pag,
334				XFS_FSB_TO_AGBNO(mp, args.fsbno), args.len,
335				xnr->oinfo.oi_owner);
336
337		error = xrep_newbt_add_blocks(xnr, pag, &args);
338		xfs_perag_put(pag);
339		if (error)
340			return error;
341
342		nr_blocks -= args.len;
343		xnr->alloc_hint = args.fsbno + args.len;
344
345		error = xrep_defer_finish(sc);
346		if (error)
347			return error;
348	}
349
350	return 0;
351}
352
353/* Allocate disk space for our new btree. */
354int
355xrep_newbt_alloc_blocks(
356	struct xrep_newbt	*xnr,
357	uint64_t		nr_blocks)
358{
359	if (xnr->sc->ip)
360		return xrep_newbt_alloc_file_blocks(xnr, nr_blocks);
361	return xrep_newbt_alloc_ag_blocks(xnr, nr_blocks);
362}
363
364/*
365 * Free the unused part of a space extent that was reserved for a new ondisk
366 * structure.  Returns the number of EFIs logged or a negative errno.
367 */
368STATIC int
369xrep_newbt_free_extent(
370	struct xrep_newbt	*xnr,
371	struct xrep_newbt_resv	*resv,
372	bool			btree_committed)
373{
374	struct xfs_scrub	*sc = xnr->sc;
375	xfs_agblock_t		free_agbno = resv->agbno;
376	xfs_extlen_t		free_aglen = resv->len;
 
377	int			error;
378
379	if (!btree_committed || resv->used == 0) {
380		/*
381		 * If we're not committing a new btree or we didn't use the
382		 * space reservation, let the existing EFI free the entire
383		 * space extent.
384		 */
385		trace_xrep_newbt_free_blocks(resv->pag, free_agbno, free_aglen,
386				xnr->oinfo.oi_owner);
387		xfs_alloc_commit_autoreap(sc->tp, &resv->autoreap);
388		return 1;
389	}
390
391	/*
392	 * We used space and committed the btree.  Cancel the autoreap, remove
393	 * the written blocks from the reservation, and possibly log a new EFI
394	 * to free any unused reservation space.
395	 */
396	xfs_alloc_cancel_autoreap(sc->tp, &resv->autoreap);
397	free_agbno += resv->used;
398	free_aglen -= resv->used;
399
400	if (free_aglen == 0)
401		return 0;
402
403	trace_xrep_newbt_free_blocks(resv->pag, free_agbno, free_aglen,
404			xnr->oinfo.oi_owner);
405
406	ASSERT(xnr->resv != XFS_AG_RESV_AGFL);
407	ASSERT(xnr->resv != XFS_AG_RESV_IGNORE);
408
409	/*
410	 * Use EFIs to free the reservations.  This reduces the chance
411	 * that we leak blocks if the system goes down.
412	 */
413	error = xfs_free_extent_later(sc->tp,
414			xfs_agbno_to_fsb(resv->pag, free_agbno), free_aglen,
415			&xnr->oinfo, xnr->resv, XFS_FREE_EXTENT_SKIP_DISCARD);
416	if (error)
417		return error;
418
419	return 1;
420}
421
422/* Free all the accounting info and disk space we reserved for a new btree. */
423STATIC int
424xrep_newbt_free(
425	struct xrep_newbt	*xnr,
426	bool			btree_committed)
427{
428	struct xfs_scrub	*sc = xnr->sc;
429	struct xrep_newbt_resv	*resv, *n;
430	unsigned int		freed = 0;
431	int			error = 0;
432
433	/*
434	 * If the filesystem already went down, we can't free the blocks.  Skip
435	 * ahead to freeing the incore metadata because we can't fix anything.
436	 */
437	if (xfs_is_shutdown(sc->mp))
438		goto junkit;
439
440	list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
441		int		ret;
442
443		ret = xrep_newbt_free_extent(xnr, resv, btree_committed);
444		list_del(&resv->list);
445		xfs_perag_put(resv->pag);
446		kfree(resv);
447		if (ret < 0) {
448			error = ret;
449			goto junkit;
450		}
451
452		freed += ret;
453		if (freed >= XREP_MAX_ITRUNCATE_EFIS) {
454			error = xrep_defer_finish(sc);
455			if (error)
456				goto junkit;
457			freed = 0;
458		}
459	}
460
461	if (freed)
462		error = xrep_defer_finish(sc);
463
464junkit:
465	/*
466	 * If we still have reservations attached to @newbt, cleanup must have
467	 * failed and the filesystem is about to go down.  Clean up the incore
468	 * reservations and try to commit to freeing the space we used.
469	 */
470	list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
471		xfs_alloc_commit_autoreap(sc->tp, &resv->autoreap);
472		list_del(&resv->list);
473		xfs_perag_put(resv->pag);
474		kfree(resv);
475	}
476
477	if (sc->ip) {
478		kmem_cache_free(xfs_ifork_cache, xnr->ifake.if_fork);
479		xnr->ifake.if_fork = NULL;
480	}
481
482	return error;
483}
484
485/*
486 * Free all the accounting info and unused disk space allocations after
487 * committing a new btree.
488 */
489int
490xrep_newbt_commit(
491	struct xrep_newbt	*xnr)
492{
493	return xrep_newbt_free(xnr, true);
494}
495
496/*
497 * Free all the accounting info and all of the disk space we reserved for a new
498 * btree that we're not going to commit.  We want to try to roll things back
499 * cleanly for things like ENOSPC midway through allocation.
500 */
501void
502xrep_newbt_cancel(
503	struct xrep_newbt	*xnr)
504{
505	xrep_newbt_free(xnr, false);
506}
507
508/* Feed one of the reserved btree blocks to the bulk loader. */
509int
510xrep_newbt_claim_block(
511	struct xfs_btree_cur	*cur,
512	struct xrep_newbt	*xnr,
513	union xfs_btree_ptr	*ptr)
514{
515	struct xrep_newbt_resv	*resv;
 
516	xfs_agblock_t		agbno;
517
518	/*
519	 * The first item in the list should always have a free block unless
520	 * we're completely out.
521	 */
522	resv = list_first_entry(&xnr->resv_list, struct xrep_newbt_resv, list);
523	if (resv->used == resv->len)
524		return -ENOSPC;
525
526	/*
527	 * Peel off a block from the start of the reservation.  We allocate
528	 * blocks in order to place blocks on disk in increasing record or key
529	 * order.  The block reservations tend to end up on the list in
530	 * decreasing order, which hopefully results in leaf blocks ending up
531	 * together.
532	 */
533	agbno = resv->agbno + resv->used;
534	resv->used++;
535
536	/* If we used all the blocks in this reservation, move it to the end. */
537	if (resv->used == resv->len)
538		list_move_tail(&resv->list, &xnr->resv_list);
539
540	trace_xrep_newbt_claim_block(resv->pag, agbno, 1, xnr->oinfo.oi_owner);
 
541
542	if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN)
543		ptr->l = cpu_to_be64(xfs_agbno_to_fsb(resv->pag, agbno));
 
544	else
545		ptr->s = cpu_to_be32(agbno);
546
547	/* Relog all the EFIs. */
548	return xrep_defer_finish(xnr->sc);
549}
550
551/* How many reserved blocks are unused? */
552unsigned int
553xrep_newbt_unused_blocks(
554	struct xrep_newbt	*xnr)
555{
556	struct xrep_newbt_resv	*resv;
557	unsigned int		unused = 0;
558
559	list_for_each_entry(resv, &xnr->resv_list, list)
560		unused += resv->len - resv->used;
561	return unused;
562}