xfs_discard.c - fs/xfs/xfs_discard.c - Linux diff v6.13.7

  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * Copyright (C) 2010, 2023 Red Hat, Inc.
  4 * All Rights Reserved.
  5 */
  6#include "xfs.h"
  7#include "xfs_shared.h"
  8#include "xfs_format.h"
  9#include "xfs_log_format.h"
 10#include "xfs_trans_resv.h"
 11#include "xfs_trans.h"
 12#include "xfs_mount.h"
 13#include "xfs_btree.h"
 14#include "xfs_alloc_btree.h"
 15#include "xfs_alloc.h"
 16#include "xfs_discard.h"
 17#include "xfs_error.h"
 18#include "xfs_extent_busy.h"
 19#include "xfs_trace.h"
 20#include "xfs_log.h"
 21#include "xfs_ag.h"
 22#include "xfs_health.h"
 23#include "xfs_rtbitmap.h"
 24#include "xfs_rtgroup.h"
 25
 26/*
 27 * Notes on an efficient, low latency fstrim algorithm
 28 *
 29 * We need to walk the filesystem free space and issue discards on the free
 30 * space that meet the search criteria (size and location). We cannot issue
 31 * discards on extents that might be in use, or are so recently in use they are
 32 * still marked as busy. To serialise against extent state changes whilst we are
 33 * gathering extents to trim, we must hold the AGF lock to lock out other
 34 * allocations and extent free operations that might change extent state.
 35 *
 36 * However, we cannot just hold the AGF for the entire AG free space walk whilst
 37 * we issue discards on each free space that is found. Storage devices can have
 38 * extremely slow discard implementations (e.g. ceph RBD) and so walking a
 39 * couple of million free extents and issuing synchronous discards on each
 40 * extent can take a *long* time. Whilst we are doing this walk, nothing else
 41 * can access the AGF, and we can stall transactions and hence the log whilst
 42 * modifications wait for the AGF lock to be released. This can lead hung tasks
 43 * kicking the hung task timer and rebooting the system. This is bad.
 44 *
 45 * Hence we need to take a leaf from the bulkstat playbook. It takes the AGI
 46 * lock, gathers a range of inode cluster buffers that are allocated, drops the
 47 * AGI lock and then reads all the inode cluster buffers and processes them. It
 48 * loops doing this, using a cursor to keep track of where it is up to in the AG
 49 * for each iteration to restart the INOBT lookup from.
 50 *
 51 * We can't do this exactly with free space - once we drop the AGF lock, the
 52 * state of the free extent is out of our control and we cannot run a discard
 53 * safely on it in this situation. Unless, of course, we've marked the free
 54 * extent as busy and undergoing a discard operation whilst we held the AGF
 55 * locked.
 56 *
 57 * This is exactly how online discard works - free extents are marked busy when
 58 * they are freed, and once the extent free has been committed to the journal,
 59 * the busy extent record is marked as "undergoing discard" and the discard is
 60 * then issued on the free extent. Once the discard completes, the busy extent
 61 * record is removed and the extent is able to be allocated again.
 62 *
 63 * In the context of fstrim, if we find a free extent we need to discard, we
 64 * don't have to discard it immediately. All we need to do it record that free
 65 * extent as being busy and under discard, and all the allocation routines will
 66 * now avoid trying to allocate it. Hence if we mark the extent as busy under
 67 * the AGF lock, we can safely discard it without holding the AGF lock because
 68 * nothing will attempt to allocate that free space until the discard completes.
 69 *
 70 * This also allows us to issue discards asynchronously like we do with online
 71 * discard, and so for fast devices fstrim will run much faster as we can have
 72 * multiple discard operations in flight at once, as well as pipeline the free
 73 * extent search so that it overlaps in flight discard IO.
 74 */
 75
 76#define XFS_DISCARD_MAX_EXAMINE	(100)
 77
 78struct workqueue_struct *xfs_discard_wq;
 79
 80static void
 81xfs_discard_endio_work(
 82	struct work_struct	*work)
 83{
 84	struct xfs_busy_extents	*extents =
 85		container_of(work, struct xfs_busy_extents, endio_work);
 86
 87	xfs_extent_busy_clear(&extents->extent_list, false);
 88	kfree(extents->owner);
 89}
 90
 91/*
 92 * Queue up the actual completion to a thread to avoid IRQ-safe locking for
 93 * pagb_lock.
 94 */
 95static void
 96xfs_discard_endio(
 97	struct bio		*bio)
 98{
 99	struct xfs_busy_extents	*extents = bio->bi_private;
100
101	INIT_WORK(&extents->endio_work, xfs_discard_endio_work);
102	queue_work(xfs_discard_wq, &extents->endio_work);
103	bio_put(bio);
104}
105
106static inline struct block_device *
107xfs_group_bdev(
108	const struct xfs_group	*xg)
109{
110	struct xfs_mount	*mp = xg->xg_mount;
111
112	switch (xg->xg_type) {
113	case XG_TYPE_AG:
114		return mp->m_ddev_targp->bt_bdev;
115	case XG_TYPE_RTG:
116		return mp->m_rtdev_targp->bt_bdev;
117	default:
118		ASSERT(0);
119		break;
120	}
121	return NULL;
122}
123
124/*
125 * Walk the discard list and issue discards on all the busy extents in the
126 * list. We plug and chain the bios so that we only need a single completion
127 * call to clear all the busy extents once the discards are complete.
128 */
129int
130xfs_discard_extents(
131	struct xfs_mount	*mp,
132	struct xfs_busy_extents	*extents)
133{
134	struct xfs_extent_busy	*busyp;
135	struct bio		*bio = NULL;
136	struct blk_plug		plug;
137	int			error = 0;
138
139	blk_start_plug(&plug);
140	list_for_each_entry(busyp, &extents->extent_list, list) {
141		trace_xfs_discard_extent(busyp->group, busyp->bno,
142				busyp->length);
143
144		error = __blkdev_issue_discard(xfs_group_bdev(busyp->group),
145				xfs_gbno_to_daddr(busyp->group, busyp->bno),
146				XFS_FSB_TO_BB(mp, busyp->length),
147				GFP_KERNEL, &bio);
148		if (error && error != -EOPNOTSUPP) {
149			xfs_info(mp,
150	 "discard failed for extent [0x%llx,%u], error %d",
151				 (unsigned long long)busyp->bno,
152				 busyp->length,
153				 error);
154			break;
155		}
156	}
157
158	if (bio) {
159		bio->bi_private = extents;
160		bio->bi_end_io = xfs_discard_endio;
161		submit_bio(bio);
162	} else {
163		xfs_discard_endio_work(&extents->endio_work);
164	}
165	blk_finish_plug(&plug);
166
167	return error;
168}
169
170struct xfs_trim_cur {
171	xfs_agblock_t	start;
172	xfs_extlen_t	count;
173	xfs_agblock_t	end;
174	xfs_extlen_t	minlen;
175	bool		by_bno;
176};
177
178static int
179xfs_trim_gather_extents(
180	struct xfs_perag	*pag,
181	struct xfs_trim_cur	*tcur,
182	struct xfs_busy_extents	*extents)
 
 
 
 
183{
184	struct xfs_mount	*mp = pag_mount(pag);
185	struct xfs_trans	*tp;
186	struct xfs_btree_cur	*cur;
187	struct xfs_buf		*agbp;
188	int			error;
189	int			i;
190	int			batch = XFS_DISCARD_MAX_EXAMINE;
191
192	/*
193	 * Force out the log.  This means any transactions that might have freed
194	 * space before we take the AGF buffer lock are now on disk, and the
195	 * volatile disk cache is flushed.
196	 */
197	xfs_log_force(mp, XFS_LOG_SYNC);
198
199	error = xfs_trans_alloc_empty(mp, &tp);
200	if (error)
201		return error;
202
203	error = xfs_alloc_read_agf(pag, tp, 0, &agbp);
204	if (error)
205		goto out_trans_cancel;
206
207	if (tcur->by_bno) {
208		/* sub-AG discard request always starts at tcur->start */
209		cur = xfs_bnobt_init_cursor(mp, tp, agbp, pag);
210		error = xfs_alloc_lookup_le(cur, tcur->start, 0, &i);
211		if (!error && !i)
212			error = xfs_alloc_lookup_ge(cur, tcur->start, 0, &i);
213	} else if (tcur->start == 0) {
214		/* first time through a by-len starts with max length */
215		cur = xfs_cntbt_init_cursor(mp, tp, agbp, pag);
216		error = xfs_alloc_lookup_ge(cur, 0, tcur->count, &i);
217	} else {
218		/* nth time through a by-len starts where we left off */
219		cur = xfs_cntbt_init_cursor(mp, tp, agbp, pag);
220		error = xfs_alloc_lookup_le(cur, tcur->start, tcur->count, &i);
221	}
222	if (error)
223		goto out_del_cursor;
224	if (i == 0) {
225		/* nothing of that length left in the AG, we are done */
226		tcur->count = 0;
227		goto out_del_cursor;
228	}
229
230	/*
231	 * Loop until we are done with all extents that are large
232	 * enough to be worth discarding or we hit batch limits.
233	 */
234	while (i) {
235		xfs_agblock_t	fbno;
236		xfs_extlen_t	flen;
 
 
237
238		error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
239		if (error)
240			break;
241		if (XFS_IS_CORRUPT(mp, i != 1)) {
242			xfs_btree_mark_sick(cur);
243			error = -EFSCORRUPTED;
244			break;
245		}
246
247		if (--batch <= 0) {
248			/*
249			 * Update the cursor to point at this extent so we
250			 * restart the next batch from this extent.
251			 */
252			tcur->start = fbno;
253			tcur->count = flen;
254			break;
255		}
256
257		/*
258		 * If the extent is entirely outside of the range we are
259		 * supposed to skip it.  Do not bother to trim down partially
260		 * overlapping ranges for now.
261		 */
262		if (fbno + flen < tcur->start) {
263			trace_xfs_discard_exclude(pag_group(pag), fbno, flen);
264			goto next_extent;
265		}
266		if (fbno > tcur->end) {
267			trace_xfs_discard_exclude(pag_group(pag), fbno, flen);
268			if (tcur->by_bno) {
269				tcur->count = 0;
270				break;
271			}
272			goto next_extent;
273		}
274
275		/* Trim the extent returned to the range we want. */
276		if (fbno < tcur->start) {
277			flen -= tcur->start - fbno;
278			fbno = tcur->start;
 
 
 
279		}
280		if (fbno + flen > tcur->end + 1)
281			flen = tcur->end - fbno + 1;
282
283		/* Too small?  Give up. */
284		if (flen < tcur->minlen) {
285			trace_xfs_discard_toosmall(pag_group(pag), fbno, flen);
286			if (tcur->by_bno)
287				goto next_extent;
288			tcur->count = 0;
289			break;
 
290		}
291
292		/*
293		 * If any blocks in the range are still busy, skip the
294		 * discard and try again the next time.
295		 */
296		if (xfs_extent_busy_search(pag_group(pag), fbno, flen)) {
297			trace_xfs_discard_busy(pag_group(pag), fbno, flen);
298			goto next_extent;
299		}
300
301		xfs_extent_busy_insert_discard(pag_group(pag), fbno, flen,
302				&extents->extent_list);
 
303next_extent:
304		if (tcur->by_bno)
305			error = xfs_btree_increment(cur, 0, &i);
306		else
307			error = xfs_btree_decrement(cur, 0, &i);
308		if (error)
309			break;
310
311		/*
312		 * If there's no more records in the tree, we are done. Set the
313		 * cursor block count to 0 to indicate to the caller that there
314		 * is no more extents to search.
315		 */
316		if (i == 0)
317			tcur->count = 0;
318	}
319
320	/*
321	 * If there was an error, release all the gathered busy extents because
322	 * we aren't going to issue a discard on them any more.
323	 */
324	if (error)
325		xfs_extent_busy_clear(&extents->extent_list, false);
326out_del_cursor:
327	xfs_btree_del_cursor(cur, error);
328out_trans_cancel:
329	xfs_trans_cancel(tp);
330	return error;
331}
332
333static bool
334xfs_trim_should_stop(void)
335{
336	return fatal_signal_pending(current) || freezing(current);
337}
338
339/*
340 * Iterate the free list gathering extents and discarding them. We need a cursor
341 * for the repeated iteration of gather/discard loop, so use the longest extent
342 * we found in the last batch as the key to start the next.
343 */
344static int
345xfs_trim_perag_extents(
346	struct xfs_perag	*pag,
347	xfs_agblock_t		start,
348	xfs_agblock_t		end,
349	xfs_extlen_t		minlen)
 
350{
351	struct xfs_trim_cur	tcur = {
352		.start		= start,
353		.count		= pag->pagf_longest,
354		.end		= end,
355		.minlen		= minlen,
356	};
357	int			error = 0;
358
359	if (start != 0 || end != pag_group(pag)->xg_block_count)
360		tcur.by_bno = true;
361
362	do {
363		struct xfs_busy_extents	*extents;
364
365		extents = kzalloc(sizeof(*extents), GFP_KERNEL);
366		if (!extents) {
367			error = -ENOMEM;
368			break;
369		}
370
 
371		extents->owner = extents;
372		INIT_LIST_HEAD(&extents->extent_list);
373
374		error = xfs_trim_gather_extents(pag, &tcur, extents);
 
375		if (error) {
376			kfree(extents);
377			break;
378		}
379
380		/*
381		 * We hand the extent list to the discard function here so the
382		 * discarded extents can be removed from the busy extent list.
383		 * This allows the discards to run asynchronously with gathering
384		 * the next round of extents to discard.
385		 *
386		 * However, we must ensure that we do not reference the extent
387		 * list  after this function call, as it may have been freed by
388		 * the time control returns to us.
389		 */
390		error = xfs_discard_extents(pag_mount(pag), extents);
391		if (error)
392			break;
393
394		if (xfs_trim_should_stop())
395			break;
396
397	} while (tcur.count != 0);
398
399	return error;
400
401}
402
403static int
404xfs_trim_datadev_extents(
405	struct xfs_mount	*mp,
406	xfs_daddr_t		start,
407	xfs_daddr_t		end,
408	xfs_extlen_t		minlen)
409{
410	xfs_agnumber_t		start_agno, end_agno;
411	xfs_agblock_t		start_agbno, end_agbno;
412	struct xfs_perag	*pag = NULL;
413	xfs_daddr_t		ddev_end;
414	int			last_error = 0, error;
415
416	ddev_end = min_t(xfs_daddr_t, end,
417			 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1);
418
419	start_agno = xfs_daddr_to_agno(mp, start);
420	start_agbno = xfs_daddr_to_agbno(mp, start);
421	end_agno = xfs_daddr_to_agno(mp, ddev_end);
422	end_agbno = xfs_daddr_to_agbno(mp, ddev_end);
423
424	while ((pag = xfs_perag_next_range(mp, pag, start_agno, end_agno))) {
425		xfs_agblock_t	agend = pag_group(pag)->xg_block_count;
426
427		if (pag_agno(pag) == end_agno)
428			agend = end_agbno;
429		error = xfs_trim_perag_extents(pag, start_agbno, agend, minlen);
430		if (error)
431			last_error = error;
432
433		if (xfs_trim_should_stop()) {
434			xfs_perag_rele(pag);
435			break;
436		}
437		start_agbno = 0;
438	}
439
440	return last_error;
441}
442
443#ifdef CONFIG_XFS_RT
444struct xfs_trim_rtdev {
445	/* list of rt extents to free */
446	struct list_head	extent_list;
447
448	/* minimum length that caller allows us to trim */
449	xfs_rtblock_t		minlen_fsb;
450
451	/* restart point for the rtbitmap walk */
452	xfs_rtxnum_t		restart_rtx;
453
454	/* stopping point for the current rtbitmap walk */
455	xfs_rtxnum_t		stop_rtx;
456};
457
458struct xfs_rtx_busy {
459	struct list_head	list;
460	xfs_rtblock_t		bno;
461	xfs_rtblock_t		length;
462};
463
464static void
465xfs_discard_free_rtdev_extents(
466	struct xfs_trim_rtdev	*tr)
467{
468	struct xfs_rtx_busy	*busyp, *n;
469
470	list_for_each_entry_safe(busyp, n, &tr->extent_list, list) {
471		list_del_init(&busyp->list);
472		kfree(busyp);
473	}
474}
475
476/*
477 * Walk the discard list and issue discards on all the busy extents in the
478 * list. We plug and chain the bios so that we only need a single completion
479 * call to clear all the busy extents once the discards are complete.
480 */
481static int
482xfs_discard_rtdev_extents(
483	struct xfs_mount	*mp,
484	struct xfs_trim_rtdev	*tr)
485{
486	struct block_device	*bdev = mp->m_rtdev_targp->bt_bdev;
487	struct xfs_rtx_busy	*busyp;
488	struct bio		*bio = NULL;
489	struct blk_plug		plug;
490	xfs_rtblock_t		start = NULLRTBLOCK, length = 0;
491	int			error = 0;
492
493	blk_start_plug(&plug);
494	list_for_each_entry(busyp, &tr->extent_list, list) {
495		if (start == NULLRTBLOCK)
496			start = busyp->bno;
497		length += busyp->length;
498
499		trace_xfs_discard_rtextent(mp, busyp->bno, busyp->length);
500
501		error = __blkdev_issue_discard(bdev,
502				xfs_rtb_to_daddr(mp, busyp->bno),
503				XFS_FSB_TO_BB(mp, busyp->length),
504				GFP_NOFS, &bio);
505		if (error)
506			break;
507	}
508	xfs_discard_free_rtdev_extents(tr);
509
510	if (bio) {
511		error = submit_bio_wait(bio);
512		if (error == -EOPNOTSUPP)
513			error = 0;
514		if (error)
515			xfs_info(mp,
516	 "discard failed for rtextent [0x%llx,%llu], error %d",
517				 (unsigned long long)start,
518				 (unsigned long long)length,
519				 error);
520		bio_put(bio);
521	}
522	blk_finish_plug(&plug);
523
524	return error;
525}
526
527static int
528xfs_trim_gather_rtextent(
529	struct xfs_rtgroup		*rtg,
530	struct xfs_trans		*tp,
531	const struct xfs_rtalloc_rec	*rec,
532	void				*priv)
533{
534	struct xfs_trim_rtdev		*tr = priv;
535	struct xfs_rtx_busy		*busyp;
536	xfs_rtblock_t			rbno, rlen;
537
538	if (rec->ar_startext > tr->stop_rtx) {
539		/*
540		 * If we've scanned a large number of rtbitmap blocks, update
541		 * the cursor to point at this extent so we restart the next
542		 * batch from this extent.
543		 */
544		tr->restart_rtx = rec->ar_startext;
545		return -ECANCELED;
546	}
547
548	rbno = xfs_rtx_to_rtb(rtg, rec->ar_startext);
549	rlen = xfs_rtbxlen_to_blen(rtg_mount(rtg), rec->ar_extcount);
550
551	/* Ignore too small. */
552	if (rlen < tr->minlen_fsb) {
553		trace_xfs_discard_rttoosmall(rtg_mount(rtg), rbno, rlen);
554		return 0;
555	}
556
557	busyp = kzalloc(sizeof(struct xfs_rtx_busy), GFP_KERNEL);
558	if (!busyp)
559		return -ENOMEM;
560
561	busyp->bno = rbno;
562	busyp->length = rlen;
563	INIT_LIST_HEAD(&busyp->list);
564	list_add_tail(&busyp->list, &tr->extent_list);
565
566	tr->restart_rtx = rec->ar_startext + rec->ar_extcount;
567	return 0;
568}
569
570/* Trim extents on an !rtgroups realtime device */
571static int
572xfs_trim_rtextents(
573	struct xfs_rtgroup	*rtg,
574	xfs_rtxnum_t		low,
575	xfs_rtxnum_t		high,
576	xfs_daddr_t		minlen)
577{
578	struct xfs_mount	*mp = rtg_mount(rtg);
579	struct xfs_trim_rtdev	tr = {
580		.minlen_fsb	= XFS_BB_TO_FSB(mp, minlen),
581		.extent_list	= LIST_HEAD_INIT(tr.extent_list),
582	};
583	struct xfs_trans	*tp;
584	int			error;
585
586	error = xfs_trans_alloc_empty(mp, &tp);
587	if (error)
588		return error;
589
590	/*
591	 * Walk the free ranges between low and high.  The query_range function
592	 * trims the extents returned.
593	 */
594	do {
595		tr.stop_rtx = low + xfs_rtbitmap_rtx_per_rbmblock(mp);
596		xfs_rtgroup_lock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
597		error = xfs_rtalloc_query_range(rtg, tp, low, high,
598				xfs_trim_gather_rtextent, &tr);
599
600		if (error == -ECANCELED)
601			error = 0;
602		if (error) {
603			xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
604			xfs_discard_free_rtdev_extents(&tr);
605			break;
606		}
607
608		if (list_empty(&tr.extent_list)) {
609			xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
610			break;
611		}
612
613		error = xfs_discard_rtdev_extents(mp, &tr);
614		xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
615		if (error)
616			break;
617
618		low = tr.restart_rtx;
619	} while (!xfs_trim_should_stop() && low <= high);
620
621	xfs_trans_cancel(tp);
622	return error;
623}
624
625struct xfs_trim_rtgroup {
626	/* list of rtgroup extents to free */
627	struct xfs_busy_extents	*extents;
628
629	/* minimum length that caller allows us to trim */
630	xfs_rtblock_t		minlen_fsb;
631
632	/* restart point for the rtbitmap walk */
633	xfs_rtxnum_t		restart_rtx;
634
635	/* number of extents to examine before stopping to issue discard ios */
636	int			batch;
637
638	/* number of extents queued for discard */
639	int			queued;
640};
641
642static int
643xfs_trim_gather_rtgroup_extent(
644	struct xfs_rtgroup		*rtg,
645	struct xfs_trans		*tp,
646	const struct xfs_rtalloc_rec	*rec,
647	void				*priv)
648{
649	struct xfs_trim_rtgroup		*tr = priv;
650	xfs_rgblock_t			rgbno;
651	xfs_extlen_t			len;
652
653	if (--tr->batch <= 0) {
654		/*
655		 * If we've checked a large number of extents, update the
656		 * cursor to point at this extent so we restart the next batch
657		 * from this extent.
658		 */
659		tr->restart_rtx = rec->ar_startext;
660		return -ECANCELED;
661	}
662
663	rgbno = xfs_rtx_to_rgbno(rtg, rec->ar_startext);
664	len = xfs_rtxlen_to_extlen(rtg_mount(rtg), rec->ar_extcount);
665
666	/* Ignore too small. */
667	if (len < tr->minlen_fsb) {
668		trace_xfs_discard_toosmall(rtg_group(rtg), rgbno, len);
669		return 0;
670	}
671
672	/*
673	 * If any blocks in the range are still busy, skip the discard and try
674	 * again the next time.
675	 */
676	if (xfs_extent_busy_search(rtg_group(rtg), rgbno, len)) {
677		trace_xfs_discard_busy(rtg_group(rtg), rgbno, len);
678		return 0;
679	}
680
681	xfs_extent_busy_insert_discard(rtg_group(rtg), rgbno, len,
682			&tr->extents->extent_list);
683
684	tr->queued++;
685	tr->restart_rtx = rec->ar_startext + rec->ar_extcount;
686	return 0;
687}
688
689/* Trim extents in this rtgroup using the busy extent machinery. */
690static int
691xfs_trim_rtgroup_extents(
692	struct xfs_rtgroup	*rtg,
693	xfs_rtxnum_t		low,
694	xfs_rtxnum_t		high,
695	xfs_daddr_t		minlen)
696{
697	struct xfs_mount	*mp = rtg_mount(rtg);
698	struct xfs_trim_rtgroup	tr = {
699		.minlen_fsb	= XFS_BB_TO_FSB(mp, minlen),
700	};
701	struct xfs_trans	*tp;
702	int			error;
703
704	error = xfs_trans_alloc_empty(mp, &tp);
705	if (error)
706		return error;
707
708	/*
709	 * Walk the free ranges between low and high.  The query_range function
710	 * trims the extents returned.
711	 */
712	do {
713		tr.extents = kzalloc(sizeof(*tr.extents), GFP_KERNEL);
714		if (!tr.extents) {
715			error = -ENOMEM;
716			break;
717		}
718
719		tr.queued = 0;
720		tr.batch = XFS_DISCARD_MAX_EXAMINE;
721		tr.extents->owner = tr.extents;
722		INIT_LIST_HEAD(&tr.extents->extent_list);
723
724		xfs_rtgroup_lock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
725		error = xfs_rtalloc_query_range(rtg, tp, low, high,
726				xfs_trim_gather_rtgroup_extent, &tr);
727		xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
728		if (error == -ECANCELED)
729			error = 0;
730		if (error) {
731			kfree(tr.extents);
732			break;
733		}
734
735		if (!tr.queued)
736			break;
737
738		/*
739		 * We hand the extent list to the discard function here so the
740		 * discarded extents can be removed from the busy extent list.
741		 * This allows the discards to run asynchronously with
742		 * gathering the next round of extents to discard.
743		 *
744		 * However, we must ensure that we do not reference the extent
745		 * list  after this function call, as it may have been freed by
746		 * the time control returns to us.
747		 */
748		error = xfs_discard_extents(rtg_mount(rtg), tr.extents);
749		if (error)
750			break;
751
752		low = tr.restart_rtx;
753	} while (!xfs_trim_should_stop() && low <= high);
754
755	xfs_trans_cancel(tp);
756	return error;
757}
758
759static int
760xfs_trim_rtdev_extents(
761	struct xfs_mount	*mp,
762	xfs_daddr_t		start,
763	xfs_daddr_t		end,
764	xfs_daddr_t		minlen)
765{
766	xfs_rtblock_t		start_rtbno, end_rtbno;
767	xfs_rtxnum_t		start_rtx, end_rtx;
768	xfs_rgnumber_t		start_rgno, end_rgno;
769	xfs_daddr_t		daddr_offset;
770	int			last_error = 0, error;
771	struct xfs_rtgroup	*rtg = NULL;
772
773	/* Shift the start and end downwards to match the rt device. */
774	daddr_offset = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
775	if (start > daddr_offset)
776		start -= daddr_offset;
777	else
778		start = 0;
779	start_rtbno = xfs_daddr_to_rtb(mp, start);
780	start_rtx = xfs_rtb_to_rtx(mp, start_rtbno);
781	start_rgno = xfs_rtb_to_rgno(mp, start_rtbno);
782
783	if (end <= daddr_offset)
784		return 0;
785	else
786		end -= daddr_offset;
787	end_rtbno = xfs_daddr_to_rtb(mp, end);
788	end_rtx = xfs_rtb_to_rtx(mp, end_rtbno + mp->m_sb.sb_rextsize - 1);
789	end_rgno = xfs_rtb_to_rgno(mp, end_rtbno);
790
791	while ((rtg = xfs_rtgroup_next_range(mp, rtg, start_rgno, end_rgno))) {
792		xfs_rtxnum_t	rtg_end = rtg->rtg_extents;
793
794		if (rtg_rgno(rtg) == end_rgno)
795			rtg_end = min(rtg_end, end_rtx);
796
797		if (xfs_has_rtgroups(mp))
798			error = xfs_trim_rtgroup_extents(rtg, start_rtx,
799					rtg_end, minlen);
800		else
801			error = xfs_trim_rtextents(rtg, start_rtx, rtg_end,
802					minlen);
803		if (error)
804			last_error = error;
805
806		if (xfs_trim_should_stop()) {
807			xfs_rtgroup_rele(rtg);
808			break;
809		}
810		start_rtx = 0;
811	}
812
813	return last_error;
814}
815#else
816# define xfs_trim_rtdev_extents(...)	(-EOPNOTSUPP)
817#endif /* CONFIG_XFS_RT */
818
819/*
820 * trim a range of the filesystem.
821 *
822 * Note: the parameters passed from userspace are byte ranges into the
823 * filesystem which does not match to the format we use for filesystem block
824 * addressing. FSB addressing is sparse (AGNO|AGBNO), while the incoming format
825 * is a linear address range. Hence we need to use DADDR based conversions and
826 * comparisons for determining the correct offset and regions to trim.
827 *
828 * The realtime device is mapped into the FITRIM "address space" immediately
829 * after the data device.
830 */
831int
832xfs_ioc_trim(
833	struct xfs_mount		*mp,
834	struct fstrim_range __user	*urange)
835{
 
836	unsigned int		granularity =
837		bdev_discard_granularity(mp->m_ddev_targp->bt_bdev);
838	struct block_device	*rt_bdev = NULL;
839	struct fstrim_range	range;
840	xfs_daddr_t		start, end;
841	xfs_extlen_t		minlen;
842	xfs_rfsblock_t		max_blocks;
843	int			error, last_error = 0;
844
845	if (!capable(CAP_SYS_ADMIN))
846		return -EPERM;
847	if (mp->m_rtdev_targp &&
848	    bdev_max_discard_sectors(mp->m_rtdev_targp->bt_bdev))
849		rt_bdev = mp->m_rtdev_targp->bt_bdev;
850	if (!bdev_max_discard_sectors(mp->m_ddev_targp->bt_bdev) && !rt_bdev)
851		return -EOPNOTSUPP;
852
853	if (rt_bdev)
854		granularity = max(granularity,
855				  bdev_discard_granularity(rt_bdev));
856
857	/*
858	 * We haven't recovered the log, so we cannot use our bnobt-guided
859	 * storage zapping commands.
860	 */
861	if (xfs_has_norecovery(mp))
862		return -EROFS;
863
864	if (copy_from_user(&range, urange, sizeof(range)))
865		return -EFAULT;
866
867	range.minlen = max_t(u64, granularity, range.minlen);
868	minlen = XFS_B_TO_FSB(mp, range.minlen);
869
870	/*
871	 * Truncating down the len isn't actually quite correct, but using
872	 * BBTOB would mean we trivially get overflows for values
873	 * of ULLONG_MAX or slightly lower.  And ULLONG_MAX is the default
874	 * used by the fstrim application.  In the end it really doesn't
875	 * matter as trimming blocks is an advisory interface.
876	 */
877	max_blocks = mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks;
878	if (range.start >= XFS_FSB_TO_B(mp, max_blocks) ||
879	    range.minlen > XFS_FSB_TO_B(mp, mp->m_ag_max_usable) ||
880	    range.len < mp->m_sb.sb_blocksize)
881		return -EINVAL;
882
883	start = BTOBB(range.start);
884	end = start + BTOBBT(range.len) - 1;
885
886	if (bdev_max_discard_sectors(mp->m_ddev_targp->bt_bdev)) {
887		error = xfs_trim_datadev_extents(mp, start, end, minlen);
888		if (error)
889			last_error = error;
890	}
891
892	if (rt_bdev && !xfs_trim_should_stop()) {
893		error = xfs_trim_rtdev_extents(mp, start, end, minlen);
 
 
894		if (error)
895			last_error = error;
 
 
 
 
 
896	}
897
898	if (last_error)
899		return last_error;
900
901	range.len = min_t(unsigned long long, range.len,
902			  XFS_FSB_TO_B(mp, max_blocks) - range.start);
903	if (copy_to_user(urange, &range, sizeof(range)))
904		return -EFAULT;
905	return 0;
906}

  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * Copyright (C) 2010, 2023 Red Hat, Inc.
  4 * All Rights Reserved.
  5 */
  6#include "xfs.h"
  7#include "xfs_shared.h"
  8#include "xfs_format.h"
  9#include "xfs_log_format.h"
 10#include "xfs_trans_resv.h"
 11#include "xfs_trans.h"
 12#include "xfs_mount.h"
 13#include "xfs_btree.h"
 14#include "xfs_alloc_btree.h"
 15#include "xfs_alloc.h"
 16#include "xfs_discard.h"
 17#include "xfs_error.h"
 18#include "xfs_extent_busy.h"
 19#include "xfs_trace.h"
 20#include "xfs_log.h"
 21#include "xfs_ag.h"
 22#include "xfs_health.h"
 
 
 23
 24/*
 25 * Notes on an efficient, low latency fstrim algorithm
 26 *
 27 * We need to walk the filesystem free space and issue discards on the free
 28 * space that meet the search criteria (size and location). We cannot issue
 29 * discards on extents that might be in use, or are so recently in use they are
 30 * still marked as busy. To serialise against extent state changes whilst we are
 31 * gathering extents to trim, we must hold the AGF lock to lock out other
 32 * allocations and extent free operations that might change extent state.
 33 *
 34 * However, we cannot just hold the AGF for the entire AG free space walk whilst
 35 * we issue discards on each free space that is found. Storage devices can have
 36 * extremely slow discard implementations (e.g. ceph RBD) and so walking a
 37 * couple of million free extents and issuing synchronous discards on each
 38 * extent can take a *long* time. Whilst we are doing this walk, nothing else
 39 * can access the AGF, and we can stall transactions and hence the log whilst
 40 * modifications wait for the AGF lock to be released. This can lead hung tasks
 41 * kicking the hung task timer and rebooting the system. This is bad.
 42 *
 43 * Hence we need to take a leaf from the bulkstat playbook. It takes the AGI
 44 * lock, gathers a range of inode cluster buffers that are allocated, drops the
 45 * AGI lock and then reads all the inode cluster buffers and processes them. It
 46 * loops doing this, using a cursor to keep track of where it is up to in the AG
 47 * for each iteration to restart the INOBT lookup from.
 48 *
 49 * We can't do this exactly with free space - once we drop the AGF lock, the
 50 * state of the free extent is out of our control and we cannot run a discard
 51 * safely on it in this situation. Unless, of course, we've marked the free
 52 * extent as busy and undergoing a discard operation whilst we held the AGF
 53 * locked.
 54 *
 55 * This is exactly how online discard works - free extents are marked busy when
 56 * they are freed, and once the extent free has been committed to the journal,
 57 * the busy extent record is marked as "undergoing discard" and the discard is
 58 * then issued on the free extent. Once the discard completes, the busy extent
 59 * record is removed and the extent is able to be allocated again.
 60 *
 61 * In the context of fstrim, if we find a free extent we need to discard, we
 62 * don't have to discard it immediately. All we need to do it record that free
 63 * extent as being busy and under discard, and all the allocation routines will
 64 * now avoid trying to allocate it. Hence if we mark the extent as busy under
 65 * the AGF lock, we can safely discard it without holding the AGF lock because
 66 * nothing will attempt to allocate that free space until the discard completes.
 67 *
 68 * This also allows us to issue discards asynchronously like we do with online
 69 * discard, and so for fast devices fstrim will run much faster as we can have
 70 * multiple discard operations in flight at once, as well as pipeline the free
 71 * extent search so that it overlaps in flight discard IO.
 72 */
 73
 
 
 74struct workqueue_struct *xfs_discard_wq;
 75
 76static void
 77xfs_discard_endio_work(
 78	struct work_struct	*work)
 79{
 80	struct xfs_busy_extents	*extents =
 81		container_of(work, struct xfs_busy_extents, endio_work);
 82
 83	xfs_extent_busy_clear(extents->mount, &extents->extent_list, false);
 84	kfree(extents->owner);
 85}
 86
 87/*
 88 * Queue up the actual completion to a thread to avoid IRQ-safe locking for
 89 * pagb_lock.
 90 */
 91static void
 92xfs_discard_endio(
 93	struct bio		*bio)
 94{
 95	struct xfs_busy_extents	*extents = bio->bi_private;
 96
 97	INIT_WORK(&extents->endio_work, xfs_discard_endio_work);
 98	queue_work(xfs_discard_wq, &extents->endio_work);
 99	bio_put(bio);
100}
101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102/*
103 * Walk the discard list and issue discards on all the busy extents in the
104 * list. We plug and chain the bios so that we only need a single completion
105 * call to clear all the busy extents once the discards are complete.
106 */
107int
108xfs_discard_extents(
109	struct xfs_mount	*mp,
110	struct xfs_busy_extents	*extents)
111{
112	struct xfs_extent_busy	*busyp;
113	struct bio		*bio = NULL;
114	struct blk_plug		plug;
115	int			error = 0;
116
117	blk_start_plug(&plug);
118	list_for_each_entry(busyp, &extents->extent_list, list) {
119		trace_xfs_discard_extent(mp, busyp->agno, busyp->bno,
120					 busyp->length);
121
122		error = __blkdev_issue_discard(mp->m_ddev_targp->bt_bdev,
123				XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno),
124				XFS_FSB_TO_BB(mp, busyp->length),
125				GFP_KERNEL, &bio);
126		if (error && error != -EOPNOTSUPP) {
127			xfs_info(mp,
128	 "discard failed for extent [0x%llx,%u], error %d",
129				 (unsigned long long)busyp->bno,
130				 busyp->length,
131				 error);
132			break;
133		}
134	}
135
136	if (bio) {
137		bio->bi_private = extents;
138		bio->bi_end_io = xfs_discard_endio;
139		submit_bio(bio);
140	} else {
141		xfs_discard_endio_work(&extents->endio_work);
142	}
143	blk_finish_plug(&plug);
144
145	return error;
146}
147
 
 
 
 
 
 
 
148
149static int
150xfs_trim_gather_extents(
151	struct xfs_perag	*pag,
152	xfs_daddr_t		start,
153	xfs_daddr_t		end,
154	xfs_daddr_t		minlen,
155	struct xfs_alloc_rec_incore *tcur,
156	struct xfs_busy_extents	*extents,
157	uint64_t		*blocks_trimmed)
158{
159	struct xfs_mount	*mp = pag->pag_mount;
160	struct xfs_trans	*tp;
161	struct xfs_btree_cur	*cur;
162	struct xfs_buf		*agbp;
163	int			error;
164	int			i;
165	int			batch = 100;
166
167	/*
168	 * Force out the log.  This means any transactions that might have freed
169	 * space before we take the AGF buffer lock are now on disk, and the
170	 * volatile disk cache is flushed.
171	 */
172	xfs_log_force(mp, XFS_LOG_SYNC);
173
174	error = xfs_trans_alloc_empty(mp, &tp);
175	if (error)
176		return error;
177
178	error = xfs_alloc_read_agf(pag, tp, 0, &agbp);
179	if (error)
180		goto out_trans_cancel;
181
182	cur = xfs_cntbt_init_cursor(mp, tp, agbp, pag);
183
184	/*
185	 * Look up the extent length requested in the AGF and start with it.
186	 */
187	if (tcur->ar_startblock == NULLAGBLOCK)
188		error = xfs_alloc_lookup_ge(cur, 0, tcur->ar_blockcount, &i);
189	else
190		error = xfs_alloc_lookup_le(cur, tcur->ar_startblock,
191				tcur->ar_blockcount, &i);
 
 
 
 
 
192	if (error)
193		goto out_del_cursor;
194	if (i == 0) {
195		/* nothing of that length left in the AG, we are done */
196		tcur->ar_blockcount = 0;
197		goto out_del_cursor;
198	}
199
200	/*
201	 * Loop until we are done with all extents that are large
202	 * enough to be worth discarding or we hit batch limits.
203	 */
204	while (i) {
205		xfs_agblock_t	fbno;
206		xfs_extlen_t	flen;
207		xfs_daddr_t	dbno;
208		xfs_extlen_t	dlen;
209
210		error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
211		if (error)
212			break;
213		if (XFS_IS_CORRUPT(mp, i != 1)) {
214			xfs_btree_mark_sick(cur);
215			error = -EFSCORRUPTED;
216			break;
217		}
218
219		if (--batch <= 0) {
220			/*
221			 * Update the cursor to point at this extent so we
222			 * restart the next batch from this extent.
223			 */
224			tcur->ar_startblock = fbno;
225			tcur->ar_blockcount = flen;
226			break;
227		}
228
229		/*
230		 * use daddr format for all range/len calculations as that is
231		 * the format the range/len variables are supplied in by
232		 * userspace.
233		 */
234		dbno = XFS_AGB_TO_DADDR(mp, pag->pag_agno, fbno);
235		dlen = XFS_FSB_TO_BB(mp, flen);
 
 
 
 
 
 
 
 
 
 
236
237		/*
238		 * Too small?  Give up.
239		 */
240		if (dlen < minlen) {
241			trace_xfs_discard_toosmall(mp, pag->pag_agno, fbno, flen);
242			tcur->ar_blockcount = 0;
243			break;
244		}
 
 
245
246		/*
247		 * If the extent is entirely outside of the range we are
248		 * supposed to discard skip it.  Do not bother to trim
249		 * down partially overlapping ranges for now.
250		 */
251		if (dbno + dlen < start || dbno > end) {
252			trace_xfs_discard_exclude(mp, pag->pag_agno, fbno, flen);
253			goto next_extent;
254		}
255
256		/*
257		 * If any blocks in the range are still busy, skip the
258		 * discard and try again the next time.
259		 */
260		if (xfs_extent_busy_search(mp, pag, fbno, flen)) {
261			trace_xfs_discard_busy(mp, pag->pag_agno, fbno, flen);
262			goto next_extent;
263		}
264
265		xfs_extent_busy_insert_discard(pag, fbno, flen,
266				&extents->extent_list);
267		*blocks_trimmed += flen;
268next_extent:
269		error = xfs_btree_decrement(cur, 0, &i);
 
 
 
270		if (error)
271			break;
272
273		/*
274		 * If there's no more records in the tree, we are done. Set the
275		 * cursor block count to 0 to indicate to the caller that there
276		 * is no more extents to search.
277		 */
278		if (i == 0)
279			tcur->ar_blockcount = 0;
280	}
281
282	/*
283	 * If there was an error, release all the gathered busy extents because
284	 * we aren't going to issue a discard on them any more.
285	 */
286	if (error)
287		xfs_extent_busy_clear(mp, &extents->extent_list, false);
288out_del_cursor:
289	xfs_btree_del_cursor(cur, error);
290out_trans_cancel:
291	xfs_trans_cancel(tp);
292	return error;
293}
294
295static bool
296xfs_trim_should_stop(void)
297{
298	return fatal_signal_pending(current) || freezing(current);
299}
300
301/*
302 * Iterate the free list gathering extents and discarding them. We need a cursor
303 * for the repeated iteration of gather/discard loop, so use the longest extent
304 * we found in the last batch as the key to start the next.
305 */
306static int
307xfs_trim_extents(
308	struct xfs_perag	*pag,
309	xfs_daddr_t		start,
310	xfs_daddr_t		end,
311	xfs_daddr_t		minlen,
312	uint64_t		*blocks_trimmed)
313{
314	struct xfs_alloc_rec_incore tcur = {
315		.ar_blockcount = pag->pagf_longest,
316		.ar_startblock = NULLAGBLOCK,
 
 
317	};
318	int			error = 0;
319
 
 
 
320	do {
321		struct xfs_busy_extents	*extents;
322
323		extents = kzalloc(sizeof(*extents), GFP_KERNEL);
324		if (!extents) {
325			error = -ENOMEM;
326			break;
327		}
328
329		extents->mount = pag->pag_mount;
330		extents->owner = extents;
331		INIT_LIST_HEAD(&extents->extent_list);
332
333		error = xfs_trim_gather_extents(pag, start, end, minlen,
334				&tcur, extents, blocks_trimmed);
335		if (error) {
336			kfree(extents);
337			break;
338		}
339
340		/*
341		 * We hand the extent list to the discard function here so the
342		 * discarded extents can be removed from the busy extent list.
343		 * This allows the discards to run asynchronously with gathering
344		 * the next round of extents to discard.
345		 *
346		 * However, we must ensure that we do not reference the extent
347		 * list  after this function call, as it may have been freed by
348		 * the time control returns to us.
349		 */
350		error = xfs_discard_extents(pag->pag_mount, extents);
351		if (error)
352			break;
353
354		if (xfs_trim_should_stop())
355			break;
356
357	} while (tcur.ar_blockcount != 0);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
359	return error;
 
360
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361}
 
 
 
362
363/*
364 * trim a range of the filesystem.
365 *
366 * Note: the parameters passed from userspace are byte ranges into the
367 * filesystem which does not match to the format we use for filesystem block
368 * addressing. FSB addressing is sparse (AGNO|AGBNO), while the incoming format
369 * is a linear address range. Hence we need to use DADDR based conversions and
370 * comparisons for determining the correct offset and regions to trim.
 
 
 
371 */
372int
373xfs_ioc_trim(
374	struct xfs_mount		*mp,
375	struct fstrim_range __user	*urange)
376{
377	struct xfs_perag	*pag;
378	unsigned int		granularity =
379		bdev_discard_granularity(mp->m_ddev_targp->bt_bdev);
 
380	struct fstrim_range	range;
381	xfs_daddr_t		start, end, minlen;
382	xfs_agnumber_t		agno;
383	uint64_t		blocks_trimmed = 0;
384	int			error, last_error = 0;
385
386	if (!capable(CAP_SYS_ADMIN))
387		return -EPERM;
388	if (!bdev_max_discard_sectors(mp->m_ddev_targp->bt_bdev))
 
 
 
389		return -EOPNOTSUPP;
390
 
 
 
 
391	/*
392	 * We haven't recovered the log, so we cannot use our bnobt-guided
393	 * storage zapping commands.
394	 */
395	if (xfs_has_norecovery(mp))
396		return -EROFS;
397
398	if (copy_from_user(&range, urange, sizeof(range)))
399		return -EFAULT;
400
401	range.minlen = max_t(u64, granularity, range.minlen);
402	minlen = BTOBB(range.minlen);
 
403	/*
404	 * Truncating down the len isn't actually quite correct, but using
405	 * BBTOB would mean we trivially get overflows for values
406	 * of ULLONG_MAX or slightly lower.  And ULLONG_MAX is the default
407	 * used by the fstrim application.  In the end it really doesn't
408	 * matter as trimming blocks is an advisory interface.
409	 */
410	if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) ||
 
411	    range.minlen > XFS_FSB_TO_B(mp, mp->m_ag_max_usable) ||
412	    range.len < mp->m_sb.sb_blocksize)
413		return -EINVAL;
414
415	start = BTOBB(range.start);
416	end = start + BTOBBT(range.len) - 1;
417
418	if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1)
419		end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1;
 
 
 
420
421	agno = xfs_daddr_to_agno(mp, start);
422	for_each_perag_range(mp, agno, xfs_daddr_to_agno(mp, end), pag) {
423		error = xfs_trim_extents(pag, start, end, minlen,
424					  &blocks_trimmed);
425		if (error)
426			last_error = error;
427
428		if (xfs_trim_should_stop()) {
429			xfs_perag_rele(pag);
430			break;
431		}
432	}
433
434	if (last_error)
435		return last_error;
436
437	range.len = XFS_FSB_TO_B(mp, blocks_trimmed);
 
438	if (copy_to_user(urange, &range, sizeof(range)))
439		return -EFAULT;
440	return 0;
441}