Linux Audio

Check our new training course

Loading...
v6.8
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * Copyright (C) 2010, 2023 Red Hat, Inc.
  4 * All Rights Reserved.
  5 */
  6#include "xfs.h"
  7#include "xfs_shared.h"
  8#include "xfs_format.h"
  9#include "xfs_log_format.h"
 10#include "xfs_trans_resv.h"
 
 11#include "xfs_mount.h"
 12#include "xfs_btree.h"
 13#include "xfs_alloc_btree.h"
 14#include "xfs_alloc.h"
 15#include "xfs_discard.h"
 16#include "xfs_error.h"
 17#include "xfs_extent_busy.h"
 18#include "xfs_trace.h"
 19#include "xfs_log.h"
 20#include "xfs_ag.h"
 
 
 
 21
 22/*
 23 * Notes on an efficient, low latency fstrim algorithm
 24 *
 25 * We need to walk the filesystem free space and issue discards on the free
 26 * space that meet the search criteria (size and location). We cannot issue
 27 * discards on extents that might be in use, or are so recently in use they are
 28 * still marked as busy. To serialise against extent state changes whilst we are
 29 * gathering extents to trim, we must hold the AGF lock to lock out other
 30 * allocations and extent free operations that might change extent state.
 31 *
 32 * However, we cannot just hold the AGF for the entire AG free space walk whilst
 33 * we issue discards on each free space that is found. Storage devices can have
 34 * extremely slow discard implementations (e.g. ceph RBD) and so walking a
 35 * couple of million free extents and issuing synchronous discards on each
 36 * extent can take a *long* time. Whilst we are doing this walk, nothing else
 37 * can access the AGF, and we can stall transactions and hence the log whilst
 38 * modifications wait for the AGF lock to be released. This can lead hung tasks
 39 * kicking the hung task timer and rebooting the system. This is bad.
 40 *
 41 * Hence we need to take a leaf from the bulkstat playbook. It takes the AGI
 42 * lock, gathers a range of inode cluster buffers that are allocated, drops the
 43 * AGI lock and then reads all the inode cluster buffers and processes them. It
 44 * loops doing this, using a cursor to keep track of where it is up to in the AG
 45 * for each iteration to restart the INOBT lookup from.
 46 *
 47 * We can't do this exactly with free space - once we drop the AGF lock, the
 48 * state of the free extent is out of our control and we cannot run a discard
 49 * safely on it in this situation. Unless, of course, we've marked the free
 50 * extent as busy and undergoing a discard operation whilst we held the AGF
 51 * locked.
 52 *
 53 * This is exactly how online discard works - free extents are marked busy when
 54 * they are freed, and once the extent free has been committed to the journal,
 55 * the busy extent record is marked as "undergoing discard" and the discard is
 56 * then issued on the free extent. Once the discard completes, the busy extent
 57 * record is removed and the extent is able to be allocated again.
 58 *
 59 * In the context of fstrim, if we find a free extent we need to discard, we
 60 * don't have to discard it immediately. All we need to do it record that free
 61 * extent as being busy and under discard, and all the allocation routines will
 62 * now avoid trying to allocate it. Hence if we mark the extent as busy under
 63 * the AGF lock, we can safely discard it without holding the AGF lock because
 64 * nothing will attempt to allocate that free space until the discard completes.
 65 *
 66 * This also allows us to issue discards asynchronously like we do with online
 67 * discard, and so for fast devices fstrim will run much faster as we can have
 68 * multiple discard operations in flight at once, as well as pipeline the free
 69 * extent search so that it overlaps in flight discard IO.
 70 */
 71
 
 
 72struct workqueue_struct *xfs_discard_wq;
 73
 74static void
 75xfs_discard_endio_work(
 76	struct work_struct	*work)
 77{
 78	struct xfs_busy_extents	*extents =
 79		container_of(work, struct xfs_busy_extents, endio_work);
 80
 81	xfs_extent_busy_clear(extents->mount, &extents->extent_list, false);
 82	kmem_free(extents->owner);
 83}
 84
 85/*
 86 * Queue up the actual completion to a thread to avoid IRQ-safe locking for
 87 * pagb_lock.
 88 */
 89static void
 90xfs_discard_endio(
 91	struct bio		*bio)
 92{
 93	struct xfs_busy_extents	*extents = bio->bi_private;
 94
 95	INIT_WORK(&extents->endio_work, xfs_discard_endio_work);
 96	queue_work(xfs_discard_wq, &extents->endio_work);
 97	bio_put(bio);
 98}
 99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100/*
101 * Walk the discard list and issue discards on all the busy extents in the
102 * list. We plug and chain the bios so that we only need a single completion
103 * call to clear all the busy extents once the discards are complete.
104 */
105int
106xfs_discard_extents(
107	struct xfs_mount	*mp,
108	struct xfs_busy_extents	*extents)
109{
110	struct xfs_extent_busy	*busyp;
111	struct bio		*bio = NULL;
112	struct blk_plug		plug;
113	int			error = 0;
114
115	blk_start_plug(&plug);
116	list_for_each_entry(busyp, &extents->extent_list, list) {
117		trace_xfs_discard_extent(mp, busyp->agno, busyp->bno,
118					 busyp->length);
119
120		error = __blkdev_issue_discard(mp->m_ddev_targp->bt_bdev,
121				XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno),
122				XFS_FSB_TO_BB(mp, busyp->length),
123				GFP_NOFS, &bio);
124		if (error && error != -EOPNOTSUPP) {
125			xfs_info(mp,
126	 "discard failed for extent [0x%llx,%u], error %d",
127				 (unsigned long long)busyp->bno,
128				 busyp->length,
129				 error);
130			break;
131		}
132	}
133
134	if (bio) {
135		bio->bi_private = extents;
136		bio->bi_end_io = xfs_discard_endio;
137		submit_bio(bio);
138	} else {
139		xfs_discard_endio_work(&extents->endio_work);
140	}
141	blk_finish_plug(&plug);
142
143	return error;
144}
145
 
 
 
 
 
 
 
146
147static int
148xfs_trim_gather_extents(
149	struct xfs_perag	*pag,
150	xfs_daddr_t		start,
151	xfs_daddr_t		end,
152	xfs_daddr_t		minlen,
153	struct xfs_alloc_rec_incore *tcur,
154	struct xfs_busy_extents	*extents,
155	uint64_t		*blocks_trimmed)
156{
157	struct xfs_mount	*mp = pag->pag_mount;
 
158	struct xfs_btree_cur	*cur;
159	struct xfs_buf		*agbp;
160	int			error;
161	int			i;
162	int			batch = 100;
163
164	/*
165	 * Force out the log.  This means any transactions that might have freed
166	 * space before we take the AGF buffer lock are now on disk, and the
167	 * volatile disk cache is flushed.
168	 */
169	xfs_log_force(mp, XFS_LOG_SYNC);
170
171	error = xfs_alloc_read_agf(pag, NULL, 0, &agbp);
172	if (error)
173		return error;
174
175	cur = xfs_allocbt_init_cursor(mp, NULL, agbp, pag, XFS_BTNUM_CNT);
 
 
176
177	/*
178	 * Look up the extent length requested in the AGF and start with it.
179	 */
180	if (tcur->ar_startblock == NULLAGBLOCK)
181		error = xfs_alloc_lookup_ge(cur, 0, tcur->ar_blockcount, &i);
182	else
183		error = xfs_alloc_lookup_le(cur, tcur->ar_startblock,
184				tcur->ar_blockcount, &i);
 
 
 
 
 
 
 
185	if (error)
186		goto out_del_cursor;
187	if (i == 0) {
188		/* nothing of that length left in the AG, we are done */
189		tcur->ar_blockcount = 0;
190		goto out_del_cursor;
191	}
192
193	/*
194	 * Loop until we are done with all extents that are large
195	 * enough to be worth discarding or we hit batch limits.
196	 */
197	while (i) {
198		xfs_agblock_t	fbno;
199		xfs_extlen_t	flen;
200		xfs_daddr_t	dbno;
201		xfs_extlen_t	dlen;
202
203		error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
204		if (error)
205			break;
206		if (XFS_IS_CORRUPT(mp, i != 1)) {
 
207			error = -EFSCORRUPTED;
208			break;
209		}
210
211		if (--batch <= 0) {
212			/*
213			 * Update the cursor to point at this extent so we
214			 * restart the next batch from this extent.
215			 */
216			tcur->ar_startblock = fbno;
217			tcur->ar_blockcount = flen;
218			break;
219		}
220
221		/*
222		 * use daddr format for all range/len calculations as that is
223		 * the format the range/len variables are supplied in by
224		 * userspace.
225		 */
226		dbno = XFS_AGB_TO_DADDR(mp, pag->pag_agno, fbno);
227		dlen = XFS_FSB_TO_BB(mp, flen);
 
 
 
 
 
 
 
 
 
 
228
229		/*
230		 * Too small?  Give up.
231		 */
232		if (dlen < minlen) {
233			trace_xfs_discard_toosmall(mp, pag->pag_agno, fbno, flen);
234			tcur->ar_blockcount = 0;
235			break;
236		}
 
 
237
238		/*
239		 * If the extent is entirely outside of the range we are
240		 * supposed to discard skip it.  Do not bother to trim
241		 * down partially overlapping ranges for now.
242		 */
243		if (dbno + dlen < start || dbno > end) {
244			trace_xfs_discard_exclude(mp, pag->pag_agno, fbno, flen);
245			goto next_extent;
246		}
247
248		/*
249		 * If any blocks in the range are still busy, skip the
250		 * discard and try again the next time.
251		 */
252		if (xfs_extent_busy_search(mp, pag, fbno, flen)) {
253			trace_xfs_discard_busy(mp, pag->pag_agno, fbno, flen);
254			goto next_extent;
255		}
256
257		xfs_extent_busy_insert_discard(pag, fbno, flen,
258				&extents->extent_list);
259		*blocks_trimmed += flen;
260next_extent:
261		error = xfs_btree_decrement(cur, 0, &i);
 
 
 
262		if (error)
263			break;
264
265		/*
266		 * If there's no more records in the tree, we are done. Set the
267		 * cursor block count to 0 to indicate to the caller that there
268		 * is no more extents to search.
269		 */
270		if (i == 0)
271			tcur->ar_blockcount = 0;
272	}
273
274	/*
275	 * If there was an error, release all the gathered busy extents because
276	 * we aren't going to issue a discard on them any more.
277	 */
278	if (error)
279		xfs_extent_busy_clear(mp, &extents->extent_list, false);
280out_del_cursor:
281	xfs_btree_del_cursor(cur, error);
282	xfs_buf_relse(agbp);
 
283	return error;
284}
285
286static bool
287xfs_trim_should_stop(void)
288{
289	return fatal_signal_pending(current) || freezing(current);
290}
291
292/*
293 * Iterate the free list gathering extents and discarding them. We need a cursor
294 * for the repeated iteration of gather/discard loop, so use the longest extent
295 * we found in the last batch as the key to start the next.
296 */
297static int
298xfs_trim_extents(
299	struct xfs_perag	*pag,
300	xfs_daddr_t		start,
301	xfs_daddr_t		end,
302	xfs_daddr_t		minlen,
303	uint64_t		*blocks_trimmed)
304{
305	struct xfs_alloc_rec_incore tcur = {
306		.ar_blockcount = pag->pagf_longest,
307		.ar_startblock = NULLAGBLOCK,
 
 
308	};
309	int			error = 0;
310
 
 
 
311	do {
312		struct xfs_busy_extents	*extents;
313
314		extents = kzalloc(sizeof(*extents), GFP_KERNEL);
315		if (!extents) {
316			error = -ENOMEM;
317			break;
318		}
319
320		extents->mount = pag->pag_mount;
321		extents->owner = extents;
322		INIT_LIST_HEAD(&extents->extent_list);
323
324		error = xfs_trim_gather_extents(pag, start, end, minlen,
325				&tcur, extents, blocks_trimmed);
326		if (error) {
327			kfree(extents);
328			break;
329		}
330
331		/*
332		 * We hand the extent list to the discard function here so the
333		 * discarded extents can be removed from the busy extent list.
334		 * This allows the discards to run asynchronously with gathering
335		 * the next round of extents to discard.
336		 *
337		 * However, we must ensure that we do not reference the extent
338		 * list  after this function call, as it may have been freed by
339		 * the time control returns to us.
340		 */
341		error = xfs_discard_extents(pag->pag_mount, extents);
342		if (error)
343			break;
344
345		if (xfs_trim_should_stop())
346			break;
347
348	} while (tcur.ar_blockcount != 0);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
349
 
 
 
 
350	return error;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
351
 
352}
 
 
 
353
354/*
355 * trim a range of the filesystem.
356 *
357 * Note: the parameters passed from userspace are byte ranges into the
358 * filesystem which does not match to the format we use for filesystem block
359 * addressing. FSB addressing is sparse (AGNO|AGBNO), while the incoming format
360 * is a linear address range. Hence we need to use DADDR based conversions and
361 * comparisons for determining the correct offset and regions to trim.
 
 
 
362 */
363int
364xfs_ioc_trim(
365	struct xfs_mount		*mp,
366	struct fstrim_range __user	*urange)
367{
368	struct xfs_perag	*pag;
369	unsigned int		granularity =
370		bdev_discard_granularity(mp->m_ddev_targp->bt_bdev);
 
371	struct fstrim_range	range;
372	xfs_daddr_t		start, end, minlen;
373	xfs_agnumber_t		agno;
374	uint64_t		blocks_trimmed = 0;
375	int			error, last_error = 0;
376
377	if (!capable(CAP_SYS_ADMIN))
378		return -EPERM;
379	if (!bdev_max_discard_sectors(mp->m_ddev_targp->bt_bdev))
 
 
 
380		return -EOPNOTSUPP;
381
 
 
 
 
382	/*
383	 * We haven't recovered the log, so we cannot use our bnobt-guided
384	 * storage zapping commands.
385	 */
386	if (xfs_has_norecovery(mp))
387		return -EROFS;
388
389	if (copy_from_user(&range, urange, sizeof(range)))
390		return -EFAULT;
391
392	range.minlen = max_t(u64, granularity, range.minlen);
393	minlen = BTOBB(range.minlen);
 
394	/*
395	 * Truncating down the len isn't actually quite correct, but using
396	 * BBTOB would mean we trivially get overflows for values
397	 * of ULLONG_MAX or slightly lower.  And ULLONG_MAX is the default
398	 * used by the fstrim application.  In the end it really doesn't
399	 * matter as trimming blocks is an advisory interface.
400	 */
401	if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) ||
 
402	    range.minlen > XFS_FSB_TO_B(mp, mp->m_ag_max_usable) ||
403	    range.len < mp->m_sb.sb_blocksize)
404		return -EINVAL;
405
406	start = BTOBB(range.start);
407	end = start + BTOBBT(range.len) - 1;
408
409	if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1)
410		end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1;
411
412	agno = xfs_daddr_to_agno(mp, start);
413	for_each_perag_range(mp, agno, xfs_daddr_to_agno(mp, end), pag) {
414		error = xfs_trim_extents(pag, start, end, minlen,
415					  &blocks_trimmed);
416		if (error)
417			last_error = error;
 
418
419		if (xfs_trim_should_stop()) {
420			xfs_perag_rele(pag);
421			break;
422		}
423	}
424
425	if (last_error)
426		return last_error;
427
428	range.len = XFS_FSB_TO_B(mp, blocks_trimmed);
 
429	if (copy_to_user(urange, &range, sizeof(range)))
430		return -EFAULT;
431	return 0;
432}
v6.13.7
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * Copyright (C) 2010, 2023 Red Hat, Inc.
  4 * All Rights Reserved.
  5 */
  6#include "xfs.h"
  7#include "xfs_shared.h"
  8#include "xfs_format.h"
  9#include "xfs_log_format.h"
 10#include "xfs_trans_resv.h"
 11#include "xfs_trans.h"
 12#include "xfs_mount.h"
 13#include "xfs_btree.h"
 14#include "xfs_alloc_btree.h"
 15#include "xfs_alloc.h"
 16#include "xfs_discard.h"
 17#include "xfs_error.h"
 18#include "xfs_extent_busy.h"
 19#include "xfs_trace.h"
 20#include "xfs_log.h"
 21#include "xfs_ag.h"
 22#include "xfs_health.h"
 23#include "xfs_rtbitmap.h"
 24#include "xfs_rtgroup.h"
 25
 26/*
 27 * Notes on an efficient, low latency fstrim algorithm
 28 *
 29 * We need to walk the filesystem free space and issue discards on the free
 30 * space that meet the search criteria (size and location). We cannot issue
 31 * discards on extents that might be in use, or are so recently in use they are
 32 * still marked as busy. To serialise against extent state changes whilst we are
 33 * gathering extents to trim, we must hold the AGF lock to lock out other
 34 * allocations and extent free operations that might change extent state.
 35 *
 36 * However, we cannot just hold the AGF for the entire AG free space walk whilst
 37 * we issue discards on each free space that is found. Storage devices can have
 38 * extremely slow discard implementations (e.g. ceph RBD) and so walking a
 39 * couple of million free extents and issuing synchronous discards on each
 40 * extent can take a *long* time. Whilst we are doing this walk, nothing else
 41 * can access the AGF, and we can stall transactions and hence the log whilst
 42 * modifications wait for the AGF lock to be released. This can lead hung tasks
 43 * kicking the hung task timer and rebooting the system. This is bad.
 44 *
 45 * Hence we need to take a leaf from the bulkstat playbook. It takes the AGI
 46 * lock, gathers a range of inode cluster buffers that are allocated, drops the
 47 * AGI lock and then reads all the inode cluster buffers and processes them. It
 48 * loops doing this, using a cursor to keep track of where it is up to in the AG
 49 * for each iteration to restart the INOBT lookup from.
 50 *
 51 * We can't do this exactly with free space - once we drop the AGF lock, the
 52 * state of the free extent is out of our control and we cannot run a discard
 53 * safely on it in this situation. Unless, of course, we've marked the free
 54 * extent as busy and undergoing a discard operation whilst we held the AGF
 55 * locked.
 56 *
 57 * This is exactly how online discard works - free extents are marked busy when
 58 * they are freed, and once the extent free has been committed to the journal,
 59 * the busy extent record is marked as "undergoing discard" and the discard is
 60 * then issued on the free extent. Once the discard completes, the busy extent
 61 * record is removed and the extent is able to be allocated again.
 62 *
 63 * In the context of fstrim, if we find a free extent we need to discard, we
 64 * don't have to discard it immediately. All we need to do it record that free
 65 * extent as being busy and under discard, and all the allocation routines will
 66 * now avoid trying to allocate it. Hence if we mark the extent as busy under
 67 * the AGF lock, we can safely discard it without holding the AGF lock because
 68 * nothing will attempt to allocate that free space until the discard completes.
 69 *
 70 * This also allows us to issue discards asynchronously like we do with online
 71 * discard, and so for fast devices fstrim will run much faster as we can have
 72 * multiple discard operations in flight at once, as well as pipeline the free
 73 * extent search so that it overlaps in flight discard IO.
 74 */
 75
 76#define XFS_DISCARD_MAX_EXAMINE	(100)
 77
 78struct workqueue_struct *xfs_discard_wq;
 79
 80static void
 81xfs_discard_endio_work(
 82	struct work_struct	*work)
 83{
 84	struct xfs_busy_extents	*extents =
 85		container_of(work, struct xfs_busy_extents, endio_work);
 86
 87	xfs_extent_busy_clear(&extents->extent_list, false);
 88	kfree(extents->owner);
 89}
 90
 91/*
 92 * Queue up the actual completion to a thread to avoid IRQ-safe locking for
 93 * pagb_lock.
 94 */
 95static void
 96xfs_discard_endio(
 97	struct bio		*bio)
 98{
 99	struct xfs_busy_extents	*extents = bio->bi_private;
100
101	INIT_WORK(&extents->endio_work, xfs_discard_endio_work);
102	queue_work(xfs_discard_wq, &extents->endio_work);
103	bio_put(bio);
104}
105
106static inline struct block_device *
107xfs_group_bdev(
108	const struct xfs_group	*xg)
109{
110	struct xfs_mount	*mp = xg->xg_mount;
111
112	switch (xg->xg_type) {
113	case XG_TYPE_AG:
114		return mp->m_ddev_targp->bt_bdev;
115	case XG_TYPE_RTG:
116		return mp->m_rtdev_targp->bt_bdev;
117	default:
118		ASSERT(0);
119		break;
120	}
121	return NULL;
122}
123
124/*
125 * Walk the discard list and issue discards on all the busy extents in the
126 * list. We plug and chain the bios so that we only need a single completion
127 * call to clear all the busy extents once the discards are complete.
128 */
129int
130xfs_discard_extents(
131	struct xfs_mount	*mp,
132	struct xfs_busy_extents	*extents)
133{
134	struct xfs_extent_busy	*busyp;
135	struct bio		*bio = NULL;
136	struct blk_plug		plug;
137	int			error = 0;
138
139	blk_start_plug(&plug);
140	list_for_each_entry(busyp, &extents->extent_list, list) {
141		trace_xfs_discard_extent(busyp->group, busyp->bno,
142				busyp->length);
143
144		error = __blkdev_issue_discard(xfs_group_bdev(busyp->group),
145				xfs_gbno_to_daddr(busyp->group, busyp->bno),
146				XFS_FSB_TO_BB(mp, busyp->length),
147				GFP_KERNEL, &bio);
148		if (error && error != -EOPNOTSUPP) {
149			xfs_info(mp,
150	 "discard failed for extent [0x%llx,%u], error %d",
151				 (unsigned long long)busyp->bno,
152				 busyp->length,
153				 error);
154			break;
155		}
156	}
157
158	if (bio) {
159		bio->bi_private = extents;
160		bio->bi_end_io = xfs_discard_endio;
161		submit_bio(bio);
162	} else {
163		xfs_discard_endio_work(&extents->endio_work);
164	}
165	blk_finish_plug(&plug);
166
167	return error;
168}
169
170struct xfs_trim_cur {
171	xfs_agblock_t	start;
172	xfs_extlen_t	count;
173	xfs_agblock_t	end;
174	xfs_extlen_t	minlen;
175	bool		by_bno;
176};
177
178static int
179xfs_trim_gather_extents(
180	struct xfs_perag	*pag,
181	struct xfs_trim_cur	*tcur,
182	struct xfs_busy_extents	*extents)
 
 
 
 
183{
184	struct xfs_mount	*mp = pag_mount(pag);
185	struct xfs_trans	*tp;
186	struct xfs_btree_cur	*cur;
187	struct xfs_buf		*agbp;
188	int			error;
189	int			i;
190	int			batch = XFS_DISCARD_MAX_EXAMINE;
191
192	/*
193	 * Force out the log.  This means any transactions that might have freed
194	 * space before we take the AGF buffer lock are now on disk, and the
195	 * volatile disk cache is flushed.
196	 */
197	xfs_log_force(mp, XFS_LOG_SYNC);
198
199	error = xfs_trans_alloc_empty(mp, &tp);
200	if (error)
201		return error;
202
203	error = xfs_alloc_read_agf(pag, tp, 0, &agbp);
204	if (error)
205		goto out_trans_cancel;
206
207	if (tcur->by_bno) {
208		/* sub-AG discard request always starts at tcur->start */
209		cur = xfs_bnobt_init_cursor(mp, tp, agbp, pag);
210		error = xfs_alloc_lookup_le(cur, tcur->start, 0, &i);
211		if (!error && !i)
212			error = xfs_alloc_lookup_ge(cur, tcur->start, 0, &i);
213	} else if (tcur->start == 0) {
214		/* first time through a by-len starts with max length */
215		cur = xfs_cntbt_init_cursor(mp, tp, agbp, pag);
216		error = xfs_alloc_lookup_ge(cur, 0, tcur->count, &i);
217	} else {
218		/* nth time through a by-len starts where we left off */
219		cur = xfs_cntbt_init_cursor(mp, tp, agbp, pag);
220		error = xfs_alloc_lookup_le(cur, tcur->start, tcur->count, &i);
221	}
222	if (error)
223		goto out_del_cursor;
224	if (i == 0) {
225		/* nothing of that length left in the AG, we are done */
226		tcur->count = 0;
227		goto out_del_cursor;
228	}
229
230	/*
231	 * Loop until we are done with all extents that are large
232	 * enough to be worth discarding or we hit batch limits.
233	 */
234	while (i) {
235		xfs_agblock_t	fbno;
236		xfs_extlen_t	flen;
 
 
237
238		error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
239		if (error)
240			break;
241		if (XFS_IS_CORRUPT(mp, i != 1)) {
242			xfs_btree_mark_sick(cur);
243			error = -EFSCORRUPTED;
244			break;
245		}
246
247		if (--batch <= 0) {
248			/*
249			 * Update the cursor to point at this extent so we
250			 * restart the next batch from this extent.
251			 */
252			tcur->start = fbno;
253			tcur->count = flen;
254			break;
255		}
256
257		/*
258		 * If the extent is entirely outside of the range we are
259		 * supposed to skip it.  Do not bother to trim down partially
260		 * overlapping ranges for now.
261		 */
262		if (fbno + flen < tcur->start) {
263			trace_xfs_discard_exclude(pag_group(pag), fbno, flen);
264			goto next_extent;
265		}
266		if (fbno > tcur->end) {
267			trace_xfs_discard_exclude(pag_group(pag), fbno, flen);
268			if (tcur->by_bno) {
269				tcur->count = 0;
270				break;
271			}
272			goto next_extent;
273		}
274
275		/* Trim the extent returned to the range we want. */
276		if (fbno < tcur->start) {
277			flen -= tcur->start - fbno;
278			fbno = tcur->start;
 
 
 
279		}
280		if (fbno + flen > tcur->end + 1)
281			flen = tcur->end - fbno + 1;
282
283		/* Too small?  Give up. */
284		if (flen < tcur->minlen) {
285			trace_xfs_discard_toosmall(pag_group(pag), fbno, flen);
286			if (tcur->by_bno)
287				goto next_extent;
288			tcur->count = 0;
289			break;
 
290		}
291
292		/*
293		 * If any blocks in the range are still busy, skip the
294		 * discard and try again the next time.
295		 */
296		if (xfs_extent_busy_search(pag_group(pag), fbno, flen)) {
297			trace_xfs_discard_busy(pag_group(pag), fbno, flen);
298			goto next_extent;
299		}
300
301		xfs_extent_busy_insert_discard(pag_group(pag), fbno, flen,
302				&extents->extent_list);
 
303next_extent:
304		if (tcur->by_bno)
305			error = xfs_btree_increment(cur, 0, &i);
306		else
307			error = xfs_btree_decrement(cur, 0, &i);
308		if (error)
309			break;
310
311		/*
312		 * If there's no more records in the tree, we are done. Set the
313		 * cursor block count to 0 to indicate to the caller that there
314		 * is no more extents to search.
315		 */
316		if (i == 0)
317			tcur->count = 0;
318	}
319
320	/*
321	 * If there was an error, release all the gathered busy extents because
322	 * we aren't going to issue a discard on them any more.
323	 */
324	if (error)
325		xfs_extent_busy_clear(&extents->extent_list, false);
326out_del_cursor:
327	xfs_btree_del_cursor(cur, error);
328out_trans_cancel:
329	xfs_trans_cancel(tp);
330	return error;
331}
332
333static bool
334xfs_trim_should_stop(void)
335{
336	return fatal_signal_pending(current) || freezing(current);
337}
338
339/*
340 * Iterate the free list gathering extents and discarding them. We need a cursor
341 * for the repeated iteration of gather/discard loop, so use the longest extent
342 * we found in the last batch as the key to start the next.
343 */
344static int
345xfs_trim_perag_extents(
346	struct xfs_perag	*pag,
347	xfs_agblock_t		start,
348	xfs_agblock_t		end,
349	xfs_extlen_t		minlen)
 
350{
351	struct xfs_trim_cur	tcur = {
352		.start		= start,
353		.count		= pag->pagf_longest,
354		.end		= end,
355		.minlen		= minlen,
356	};
357	int			error = 0;
358
359	if (start != 0 || end != pag_group(pag)->xg_block_count)
360		tcur.by_bno = true;
361
362	do {
363		struct xfs_busy_extents	*extents;
364
365		extents = kzalloc(sizeof(*extents), GFP_KERNEL);
366		if (!extents) {
367			error = -ENOMEM;
368			break;
369		}
370
 
371		extents->owner = extents;
372		INIT_LIST_HEAD(&extents->extent_list);
373
374		error = xfs_trim_gather_extents(pag, &tcur, extents);
 
375		if (error) {
376			kfree(extents);
377			break;
378		}
379
380		/*
381		 * We hand the extent list to the discard function here so the
382		 * discarded extents can be removed from the busy extent list.
383		 * This allows the discards to run asynchronously with gathering
384		 * the next round of extents to discard.
385		 *
386		 * However, we must ensure that we do not reference the extent
387		 * list  after this function call, as it may have been freed by
388		 * the time control returns to us.
389		 */
390		error = xfs_discard_extents(pag_mount(pag), extents);
391		if (error)
392			break;
393
394		if (xfs_trim_should_stop())
395			break;
396
397	} while (tcur.count != 0);
398
399	return error;
400
401}
402
403static int
404xfs_trim_datadev_extents(
405	struct xfs_mount	*mp,
406	xfs_daddr_t		start,
407	xfs_daddr_t		end,
408	xfs_extlen_t		minlen)
409{
410	xfs_agnumber_t		start_agno, end_agno;
411	xfs_agblock_t		start_agbno, end_agbno;
412	struct xfs_perag	*pag = NULL;
413	xfs_daddr_t		ddev_end;
414	int			last_error = 0, error;
415
416	ddev_end = min_t(xfs_daddr_t, end,
417			 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1);
418
419	start_agno = xfs_daddr_to_agno(mp, start);
420	start_agbno = xfs_daddr_to_agbno(mp, start);
421	end_agno = xfs_daddr_to_agno(mp, ddev_end);
422	end_agbno = xfs_daddr_to_agbno(mp, ddev_end);
423
424	while ((pag = xfs_perag_next_range(mp, pag, start_agno, end_agno))) {
425		xfs_agblock_t	agend = pag_group(pag)->xg_block_count;
426
427		if (pag_agno(pag) == end_agno)
428			agend = end_agbno;
429		error = xfs_trim_perag_extents(pag, start_agbno, agend, minlen);
430		if (error)
431			last_error = error;
432
433		if (xfs_trim_should_stop()) {
434			xfs_perag_rele(pag);
435			break;
436		}
437		start_agbno = 0;
438	}
439
440	return last_error;
441}
442
443#ifdef CONFIG_XFS_RT
444struct xfs_trim_rtdev {
445	/* list of rt extents to free */
446	struct list_head	extent_list;
447
448	/* minimum length that caller allows us to trim */
449	xfs_rtblock_t		minlen_fsb;
450
451	/* restart point for the rtbitmap walk */
452	xfs_rtxnum_t		restart_rtx;
453
454	/* stopping point for the current rtbitmap walk */
455	xfs_rtxnum_t		stop_rtx;
456};
457
458struct xfs_rtx_busy {
459	struct list_head	list;
460	xfs_rtblock_t		bno;
461	xfs_rtblock_t		length;
462};
463
464static void
465xfs_discard_free_rtdev_extents(
466	struct xfs_trim_rtdev	*tr)
467{
468	struct xfs_rtx_busy	*busyp, *n;
469
470	list_for_each_entry_safe(busyp, n, &tr->extent_list, list) {
471		list_del_init(&busyp->list);
472		kfree(busyp);
473	}
474}
475
476/*
477 * Walk the discard list and issue discards on all the busy extents in the
478 * list. We plug and chain the bios so that we only need a single completion
479 * call to clear all the busy extents once the discards are complete.
480 */
481static int
482xfs_discard_rtdev_extents(
483	struct xfs_mount	*mp,
484	struct xfs_trim_rtdev	*tr)
485{
486	struct block_device	*bdev = mp->m_rtdev_targp->bt_bdev;
487	struct xfs_rtx_busy	*busyp;
488	struct bio		*bio = NULL;
489	struct blk_plug		plug;
490	xfs_rtblock_t		start = NULLRTBLOCK, length = 0;
491	int			error = 0;
492
493	blk_start_plug(&plug);
494	list_for_each_entry(busyp, &tr->extent_list, list) {
495		if (start == NULLRTBLOCK)
496			start = busyp->bno;
497		length += busyp->length;
498
499		trace_xfs_discard_rtextent(mp, busyp->bno, busyp->length);
500
501		error = __blkdev_issue_discard(bdev,
502				xfs_rtb_to_daddr(mp, busyp->bno),
503				XFS_FSB_TO_BB(mp, busyp->length),
504				GFP_NOFS, &bio);
505		if (error)
506			break;
507	}
508	xfs_discard_free_rtdev_extents(tr);
509
510	if (bio) {
511		error = submit_bio_wait(bio);
512		if (error == -EOPNOTSUPP)
513			error = 0;
514		if (error)
515			xfs_info(mp,
516	 "discard failed for rtextent [0x%llx,%llu], error %d",
517				 (unsigned long long)start,
518				 (unsigned long long)length,
519				 error);
520		bio_put(bio);
521	}
522	blk_finish_plug(&plug);
523
524	return error;
525}
526
527static int
528xfs_trim_gather_rtextent(
529	struct xfs_rtgroup		*rtg,
530	struct xfs_trans		*tp,
531	const struct xfs_rtalloc_rec	*rec,
532	void				*priv)
533{
534	struct xfs_trim_rtdev		*tr = priv;
535	struct xfs_rtx_busy		*busyp;
536	xfs_rtblock_t			rbno, rlen;
537
538	if (rec->ar_startext > tr->stop_rtx) {
539		/*
540		 * If we've scanned a large number of rtbitmap blocks, update
541		 * the cursor to point at this extent so we restart the next
542		 * batch from this extent.
543		 */
544		tr->restart_rtx = rec->ar_startext;
545		return -ECANCELED;
546	}
547
548	rbno = xfs_rtx_to_rtb(rtg, rec->ar_startext);
549	rlen = xfs_rtbxlen_to_blen(rtg_mount(rtg), rec->ar_extcount);
550
551	/* Ignore too small. */
552	if (rlen < tr->minlen_fsb) {
553		trace_xfs_discard_rttoosmall(rtg_mount(rtg), rbno, rlen);
554		return 0;
555	}
556
557	busyp = kzalloc(sizeof(struct xfs_rtx_busy), GFP_KERNEL);
558	if (!busyp)
559		return -ENOMEM;
560
561	busyp->bno = rbno;
562	busyp->length = rlen;
563	INIT_LIST_HEAD(&busyp->list);
564	list_add_tail(&busyp->list, &tr->extent_list);
565
566	tr->restart_rtx = rec->ar_startext + rec->ar_extcount;
567	return 0;
568}
569
570/* Trim extents on an !rtgroups realtime device */
571static int
572xfs_trim_rtextents(
573	struct xfs_rtgroup	*rtg,
574	xfs_rtxnum_t		low,
575	xfs_rtxnum_t		high,
576	xfs_daddr_t		minlen)
577{
578	struct xfs_mount	*mp = rtg_mount(rtg);
579	struct xfs_trim_rtdev	tr = {
580		.minlen_fsb	= XFS_BB_TO_FSB(mp, minlen),
581		.extent_list	= LIST_HEAD_INIT(tr.extent_list),
582	};
583	struct xfs_trans	*tp;
584	int			error;
585
586	error = xfs_trans_alloc_empty(mp, &tp);
587	if (error)
588		return error;
589
590	/*
591	 * Walk the free ranges between low and high.  The query_range function
592	 * trims the extents returned.
593	 */
594	do {
595		tr.stop_rtx = low + xfs_rtbitmap_rtx_per_rbmblock(mp);
596		xfs_rtgroup_lock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
597		error = xfs_rtalloc_query_range(rtg, tp, low, high,
598				xfs_trim_gather_rtextent, &tr);
599
600		if (error == -ECANCELED)
601			error = 0;
602		if (error) {
603			xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
604			xfs_discard_free_rtdev_extents(&tr);
605			break;
606		}
607
608		if (list_empty(&tr.extent_list)) {
609			xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
610			break;
611		}
612
613		error = xfs_discard_rtdev_extents(mp, &tr);
614		xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
615		if (error)
616			break;
617
618		low = tr.restart_rtx;
619	} while (!xfs_trim_should_stop() && low <= high);
620
621	xfs_trans_cancel(tp);
622	return error;
623}
624
625struct xfs_trim_rtgroup {
626	/* list of rtgroup extents to free */
627	struct xfs_busy_extents	*extents;
628
629	/* minimum length that caller allows us to trim */
630	xfs_rtblock_t		minlen_fsb;
631
632	/* restart point for the rtbitmap walk */
633	xfs_rtxnum_t		restart_rtx;
634
635	/* number of extents to examine before stopping to issue discard ios */
636	int			batch;
637
638	/* number of extents queued for discard */
639	int			queued;
640};
641
642static int
643xfs_trim_gather_rtgroup_extent(
644	struct xfs_rtgroup		*rtg,
645	struct xfs_trans		*tp,
646	const struct xfs_rtalloc_rec	*rec,
647	void				*priv)
648{
649	struct xfs_trim_rtgroup		*tr = priv;
650	xfs_rgblock_t			rgbno;
651	xfs_extlen_t			len;
652
653	if (--tr->batch <= 0) {
654		/*
655		 * If we've checked a large number of extents, update the
656		 * cursor to point at this extent so we restart the next batch
657		 * from this extent.
658		 */
659		tr->restart_rtx = rec->ar_startext;
660		return -ECANCELED;
661	}
662
663	rgbno = xfs_rtx_to_rgbno(rtg, rec->ar_startext);
664	len = xfs_rtxlen_to_extlen(rtg_mount(rtg), rec->ar_extcount);
665
666	/* Ignore too small. */
667	if (len < tr->minlen_fsb) {
668		trace_xfs_discard_toosmall(rtg_group(rtg), rgbno, len);
669		return 0;
670	}
671
672	/*
673	 * If any blocks in the range are still busy, skip the discard and try
674	 * again the next time.
675	 */
676	if (xfs_extent_busy_search(rtg_group(rtg), rgbno, len)) {
677		trace_xfs_discard_busy(rtg_group(rtg), rgbno, len);
678		return 0;
679	}
680
681	xfs_extent_busy_insert_discard(rtg_group(rtg), rgbno, len,
682			&tr->extents->extent_list);
683
684	tr->queued++;
685	tr->restart_rtx = rec->ar_startext + rec->ar_extcount;
686	return 0;
687}
688
689/* Trim extents in this rtgroup using the busy extent machinery. */
690static int
691xfs_trim_rtgroup_extents(
692	struct xfs_rtgroup	*rtg,
693	xfs_rtxnum_t		low,
694	xfs_rtxnum_t		high,
695	xfs_daddr_t		minlen)
696{
697	struct xfs_mount	*mp = rtg_mount(rtg);
698	struct xfs_trim_rtgroup	tr = {
699		.minlen_fsb	= XFS_BB_TO_FSB(mp, minlen),
700	};
701	struct xfs_trans	*tp;
702	int			error;
703
704	error = xfs_trans_alloc_empty(mp, &tp);
705	if (error)
706		return error;
707
708	/*
709	 * Walk the free ranges between low and high.  The query_range function
710	 * trims the extents returned.
711	 */
712	do {
713		tr.extents = kzalloc(sizeof(*tr.extents), GFP_KERNEL);
714		if (!tr.extents) {
715			error = -ENOMEM;
716			break;
717		}
718
719		tr.queued = 0;
720		tr.batch = XFS_DISCARD_MAX_EXAMINE;
721		tr.extents->owner = tr.extents;
722		INIT_LIST_HEAD(&tr.extents->extent_list);
723
724		xfs_rtgroup_lock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
725		error = xfs_rtalloc_query_range(rtg, tp, low, high,
726				xfs_trim_gather_rtgroup_extent, &tr);
727		xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
728		if (error == -ECANCELED)
729			error = 0;
730		if (error) {
731			kfree(tr.extents);
732			break;
733		}
734
735		if (!tr.queued)
736			break;
737
738		/*
739		 * We hand the extent list to the discard function here so the
740		 * discarded extents can be removed from the busy extent list.
741		 * This allows the discards to run asynchronously with
742		 * gathering the next round of extents to discard.
743		 *
744		 * However, we must ensure that we do not reference the extent
745		 * list  after this function call, as it may have been freed by
746		 * the time control returns to us.
747		 */
748		error = xfs_discard_extents(rtg_mount(rtg), tr.extents);
749		if (error)
750			break;
751
752		low = tr.restart_rtx;
753	} while (!xfs_trim_should_stop() && low <= high);
754
755	xfs_trans_cancel(tp);
756	return error;
757}
758
759static int
760xfs_trim_rtdev_extents(
761	struct xfs_mount	*mp,
762	xfs_daddr_t		start,
763	xfs_daddr_t		end,
764	xfs_daddr_t		minlen)
765{
766	xfs_rtblock_t		start_rtbno, end_rtbno;
767	xfs_rtxnum_t		start_rtx, end_rtx;
768	xfs_rgnumber_t		start_rgno, end_rgno;
769	xfs_daddr_t		daddr_offset;
770	int			last_error = 0, error;
771	struct xfs_rtgroup	*rtg = NULL;
772
773	/* Shift the start and end downwards to match the rt device. */
774	daddr_offset = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
775	if (start > daddr_offset)
776		start -= daddr_offset;
777	else
778		start = 0;
779	start_rtbno = xfs_daddr_to_rtb(mp, start);
780	start_rtx = xfs_rtb_to_rtx(mp, start_rtbno);
781	start_rgno = xfs_rtb_to_rgno(mp, start_rtbno);
782
783	if (end <= daddr_offset)
784		return 0;
785	else
786		end -= daddr_offset;
787	end_rtbno = xfs_daddr_to_rtb(mp, end);
788	end_rtx = xfs_rtb_to_rtx(mp, end_rtbno + mp->m_sb.sb_rextsize - 1);
789	end_rgno = xfs_rtb_to_rgno(mp, end_rtbno);
790
791	while ((rtg = xfs_rtgroup_next_range(mp, rtg, start_rgno, end_rgno))) {
792		xfs_rtxnum_t	rtg_end = rtg->rtg_extents;
793
794		if (rtg_rgno(rtg) == end_rgno)
795			rtg_end = min(rtg_end, end_rtx);
796
797		if (xfs_has_rtgroups(mp))
798			error = xfs_trim_rtgroup_extents(rtg, start_rtx,
799					rtg_end, minlen);
800		else
801			error = xfs_trim_rtextents(rtg, start_rtx, rtg_end,
802					minlen);
803		if (error)
804			last_error = error;
805
806		if (xfs_trim_should_stop()) {
807			xfs_rtgroup_rele(rtg);
808			break;
809		}
810		start_rtx = 0;
811	}
812
813	return last_error;
814}
815#else
816# define xfs_trim_rtdev_extents(...)	(-EOPNOTSUPP)
817#endif /* CONFIG_XFS_RT */
818
819/*
820 * trim a range of the filesystem.
821 *
822 * Note: the parameters passed from userspace are byte ranges into the
823 * filesystem which does not match to the format we use for filesystem block
824 * addressing. FSB addressing is sparse (AGNO|AGBNO), while the incoming format
825 * is a linear address range. Hence we need to use DADDR based conversions and
826 * comparisons for determining the correct offset and regions to trim.
827 *
828 * The realtime device is mapped into the FITRIM "address space" immediately
829 * after the data device.
830 */
831int
832xfs_ioc_trim(
833	struct xfs_mount		*mp,
834	struct fstrim_range __user	*urange)
835{
 
836	unsigned int		granularity =
837		bdev_discard_granularity(mp->m_ddev_targp->bt_bdev);
838	struct block_device	*rt_bdev = NULL;
839	struct fstrim_range	range;
840	xfs_daddr_t		start, end;
841	xfs_extlen_t		minlen;
842	xfs_rfsblock_t		max_blocks;
843	int			error, last_error = 0;
844
845	if (!capable(CAP_SYS_ADMIN))
846		return -EPERM;
847	if (mp->m_rtdev_targp &&
848	    bdev_max_discard_sectors(mp->m_rtdev_targp->bt_bdev))
849		rt_bdev = mp->m_rtdev_targp->bt_bdev;
850	if (!bdev_max_discard_sectors(mp->m_ddev_targp->bt_bdev) && !rt_bdev)
851		return -EOPNOTSUPP;
852
853	if (rt_bdev)
854		granularity = max(granularity,
855				  bdev_discard_granularity(rt_bdev));
856
857	/*
858	 * We haven't recovered the log, so we cannot use our bnobt-guided
859	 * storage zapping commands.
860	 */
861	if (xfs_has_norecovery(mp))
862		return -EROFS;
863
864	if (copy_from_user(&range, urange, sizeof(range)))
865		return -EFAULT;
866
867	range.minlen = max_t(u64, granularity, range.minlen);
868	minlen = XFS_B_TO_FSB(mp, range.minlen);
869
870	/*
871	 * Truncating down the len isn't actually quite correct, but using
872	 * BBTOB would mean we trivially get overflows for values
873	 * of ULLONG_MAX or slightly lower.  And ULLONG_MAX is the default
874	 * used by the fstrim application.  In the end it really doesn't
875	 * matter as trimming blocks is an advisory interface.
876	 */
877	max_blocks = mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks;
878	if (range.start >= XFS_FSB_TO_B(mp, max_blocks) ||
879	    range.minlen > XFS_FSB_TO_B(mp, mp->m_ag_max_usable) ||
880	    range.len < mp->m_sb.sb_blocksize)
881		return -EINVAL;
882
883	start = BTOBB(range.start);
884	end = start + BTOBBT(range.len) - 1;
885
886	if (bdev_max_discard_sectors(mp->m_ddev_targp->bt_bdev)) {
887		error = xfs_trim_datadev_extents(mp, start, end, minlen);
 
 
 
 
 
888		if (error)
889			last_error = error;
890	}
891
892	if (rt_bdev && !xfs_trim_should_stop()) {
893		error = xfs_trim_rtdev_extents(mp, start, end, minlen);
894		if (error)
895			last_error = error;
896	}
897
898	if (last_error)
899		return last_error;
900
901	range.len = min_t(unsigned long long, range.len,
902			  XFS_FSB_TO_B(mp, max_blocks) - range.start);
903	if (copy_to_user(urange, &range, sizeof(range)))
904		return -EFAULT;
905	return 0;
906}