Linux Audio

Check our new training course

Loading...
v6.9.4
  1// SPDX-License-Identifier: GPL-2.0-or-later
  2/*
  3 * Copyright (c) 2020-2024 Oracle.  All Rights Reserved.
  4 * Author: Darrick J. Wong <djwong@kernel.org>
  5 */
  6#include "xfs.h"
  7#include "xfs_fs.h"
  8#include "xfs_shared.h"
  9#include "xfs_format.h"
 10#include "xfs_trans_resv.h"
 11#include "xfs_mount.h"
 12#include "xfs_log_format.h"
 13#include "xfs_trans.h"
 14#include "xfs_inode.h"
 15#include "xfs_quota.h"
 16#include "xfs_qm.h"
 17#include "xfs_icache.h"
 18#include "xfs_bmap_util.h"
 19#include "xfs_ialloc.h"
 20#include "xfs_ag.h"
 21#include "scrub/scrub.h"
 22#include "scrub/common.h"
 23#include "scrub/repair.h"
 24#include "scrub/xfile.h"
 25#include "scrub/xfarray.h"
 26#include "scrub/iscan.h"
 27#include "scrub/quota.h"
 28#include "scrub/quotacheck.h"
 29#include "scrub/trace.h"
 30
 31/*
 32 * Live Quotacheck
 33 * ===============
 34 *
 35 * Quota counters are "summary" metadata, in the sense that they are computed
 36 * as the summation of the block usage counts for every file on the filesystem.
 37 * Therefore, we compute the correct icount, bcount, and rtbcount values by
 38 * creating a shadow quota counter structure and walking every inode.
 39 */
 40
 41/* Track the quota deltas for a dquot in a transaction. */
 42struct xqcheck_dqtrx {
 43	xfs_dqtype_t		q_type;
 44	xfs_dqid_t		q_id;
 45
 46	int64_t			icount_delta;
 47
 48	int64_t			bcount_delta;
 49	int64_t			delbcnt_delta;
 50
 51	int64_t			rtbcount_delta;
 52	int64_t			delrtb_delta;
 53};
 54
 55#define XQCHECK_MAX_NR_DQTRXS	(XFS_QM_TRANS_DQTYPES * XFS_QM_TRANS_MAXDQS)
 56
 57/*
 58 * Track the quota deltas for all dquots attached to a transaction if the
 59 * quota deltas are being applied to an inode that we already scanned.
 60 */
 61struct xqcheck_dqacct {
 62	struct rhash_head	hash;
 63	uintptr_t		tx_id;
 64	struct xqcheck_dqtrx	dqtrx[XQCHECK_MAX_NR_DQTRXS];
 65	unsigned int		refcount;
 66};
 67
 68/* Free a shadow dquot accounting structure. */
 69static void
 70xqcheck_dqacct_free(
 71	void			*ptr,
 72	void			*arg)
 73{
 74	struct xqcheck_dqacct	*dqa = ptr;
 75
 76	kfree(dqa);
 77}
 78
 79/* Set us up to scrub quota counters. */
 80int
 81xchk_setup_quotacheck(
 82	struct xfs_scrub	*sc)
 83{
 84	if (!XFS_IS_QUOTA_ON(sc->mp))
 85		return -ENOENT;
 86
 87	xchk_fsgates_enable(sc, XCHK_FSGATES_QUOTA);
 88
 89	sc->buf = kzalloc(sizeof(struct xqcheck), XCHK_GFP_FLAGS);
 90	if (!sc->buf)
 91		return -ENOMEM;
 92
 93	return xchk_setup_fs(sc);
 94}
 95
 96/*
 97 * Part 1: Collecting dquot resource usage counts.  For each xfs_dquot attached
 98 * to each inode, we create a shadow dquot, and compute the inode count and add
 99 * the data/rt block usage from what we see.
100 *
101 * To avoid false corruption reports in part 2, any failure in this part must
102 * set the INCOMPLETE flag even when a negative errno is returned.  This care
103 * must be taken with certain errno values (i.e. EFSBADCRC, EFSCORRUPTED,
104 * ECANCELED) that are absorbed into a scrub state flag update by
105 * xchk_*_process_error.  Scrub and repair share the same incore data
106 * structures, so the INCOMPLETE flag is critical to prevent a repair based on
107 * insufficient information.
108 *
109 * Because we are scanning a live filesystem, it's possible that another thread
110 * will try to update the quota counters for an inode that we've already
111 * scanned.  This will cause our counts to be incorrect.  Therefore, we hook
112 * the live transaction code in two places: (1) when the callers update the
113 * per-transaction dqtrx structure to log quota counter updates; and (2) when
114 * transaction commit actually logs those updates to the incore dquot.  By
115 * shadowing transaction updates in this manner, live quotacheck can ensure
116 * by locking the dquot and the shadow structure that its own copies are not
117 * out of date.  Because the hook code runs in a different process context from
118 * the scrub code and the scrub state flags are not accessed atomically,
119 * failures in the hook code must abort the iscan and the scrubber must notice
120 * the aborted scan and set the incomplete flag.
121 *
122 * Note that we use srcu notifier hooks to minimize the overhead when live
123 * quotacheck is /not/ running.
124 */
125
126/* Update an incore dquot counter information from a live update. */
127static int
128xqcheck_update_incore_counts(
129	struct xqcheck		*xqc,
130	struct xfarray		*counts,
131	xfs_dqid_t		id,
132	int64_t			inodes,
133	int64_t			nblks,
134	int64_t			rtblks)
135{
136	struct xqcheck_dquot	xcdq;
137	int			error;
138
139	error = xfarray_load_sparse(counts, id, &xcdq);
140	if (error)
141		return error;
142
143	xcdq.flags |= XQCHECK_DQUOT_WRITTEN;
144	xcdq.icount += inodes;
145	xcdq.bcount += nblks;
146	xcdq.rtbcount += rtblks;
147
148	error = xfarray_store(counts, id, &xcdq);
149	if (error == -EFBIG) {
150		/*
151		 * EFBIG means we tried to store data at too high a byte offset
152		 * in the sparse array.  IOWs, we cannot complete the check and
153		 * must notify userspace that the check was incomplete.
154		 */
155		error = -ECANCELED;
156	}
157	return error;
158}
159
160/* Decide if this is the shadow dquot accounting structure for a transaction. */
161static int
162xqcheck_dqacct_obj_cmpfn(
163	struct rhashtable_compare_arg	*arg,
164	const void			*obj)
165{
166	const uintptr_t			*tx_idp = arg->key;
167	const struct xqcheck_dqacct	*dqa = obj;
168
169	if (dqa->tx_id != *tx_idp)
170		return 1;
171	return 0;
172}
173
174static const struct rhashtable_params xqcheck_dqacct_hash_params = {
175	.min_size		= 32,
176	.key_len		= sizeof(uintptr_t),
177	.key_offset		= offsetof(struct xqcheck_dqacct, tx_id),
178	.head_offset		= offsetof(struct xqcheck_dqacct, hash),
179	.automatic_shrinking	= true,
180	.obj_cmpfn		= xqcheck_dqacct_obj_cmpfn,
181};
182
183/* Find a shadow dqtrx slot for the given dquot. */
184STATIC struct xqcheck_dqtrx *
185xqcheck_get_dqtrx(
186	struct xqcheck_dqacct	*dqa,
187	xfs_dqtype_t		q_type,
188	xfs_dqid_t		q_id)
189{
190	int			i;
191
192	for (i = 0; i < XQCHECK_MAX_NR_DQTRXS; i++) {
193		if (dqa->dqtrx[i].q_type == 0 ||
194		    (dqa->dqtrx[i].q_type == q_type &&
195		     dqa->dqtrx[i].q_id == q_id))
196			return &dqa->dqtrx[i];
197	}
198
199	return NULL;
200}
201
202/*
203 * Create and fill out a quota delta tracking structure to shadow the updates
204 * going on in the regular quota code.
205 */
206static int
207xqcheck_mod_live_ino_dqtrx(
208	struct notifier_block		*nb,
209	unsigned long			action,
210	void				*data)
211{
212	struct xfs_mod_ino_dqtrx_params *p = data;
213	struct xqcheck			*xqc;
214	struct xqcheck_dqacct		*dqa;
215	struct xqcheck_dqtrx		*dqtrx;
216	int				error;
217
218	xqc = container_of(nb, struct xqcheck, qhook.mod_hook.nb);
219
220	/* Skip quota reservation fields. */
221	switch (action) {
222	case XFS_TRANS_DQ_BCOUNT:
223	case XFS_TRANS_DQ_DELBCOUNT:
224	case XFS_TRANS_DQ_ICOUNT:
225	case XFS_TRANS_DQ_RTBCOUNT:
226	case XFS_TRANS_DQ_DELRTBCOUNT:
227		break;
228	default:
229		return NOTIFY_DONE;
230	}
231
232	/* Ignore dqtrx updates for quota types we don't care about. */
233	switch (p->q_type) {
234	case XFS_DQTYPE_USER:
235		if (!xqc->ucounts)
236			return NOTIFY_DONE;
237		break;
238	case XFS_DQTYPE_GROUP:
239		if (!xqc->gcounts)
240			return NOTIFY_DONE;
241		break;
242	case XFS_DQTYPE_PROJ:
243		if (!xqc->pcounts)
244			return NOTIFY_DONE;
245		break;
246	default:
247		return NOTIFY_DONE;
248	}
249
250	/* Skip inodes that haven't been scanned yet. */
251	if (!xchk_iscan_want_live_update(&xqc->iscan, p->ino))
252		return NOTIFY_DONE;
253
254	/* Make a shadow quota accounting tracker for this transaction. */
255	mutex_lock(&xqc->lock);
256	dqa = rhashtable_lookup_fast(&xqc->shadow_dquot_acct, &p->tx_id,
257			xqcheck_dqacct_hash_params);
258	if (!dqa) {
259		dqa = kzalloc(sizeof(struct xqcheck_dqacct), XCHK_GFP_FLAGS);
260		if (!dqa)
261			goto out_abort;
262
263		dqa->tx_id = p->tx_id;
264		error = rhashtable_insert_fast(&xqc->shadow_dquot_acct,
265				&dqa->hash, xqcheck_dqacct_hash_params);
266		if (error)
267			goto out_abort;
268	}
269
270	/* Find the shadow dqtrx (or an empty slot) here. */
271	dqtrx = xqcheck_get_dqtrx(dqa, p->q_type, p->q_id);
272	if (!dqtrx)
273		goto out_abort;
274	if (dqtrx->q_type == 0) {
275		dqtrx->q_type = p->q_type;
276		dqtrx->q_id = p->q_id;
277		dqa->refcount++;
278	}
279
280	/* Update counter */
281	switch (action) {
282	case XFS_TRANS_DQ_BCOUNT:
283		dqtrx->bcount_delta += p->delta;
284		break;
285	case XFS_TRANS_DQ_DELBCOUNT:
286		dqtrx->delbcnt_delta += p->delta;
287		break;
288	case XFS_TRANS_DQ_ICOUNT:
289		dqtrx->icount_delta += p->delta;
290		break;
291	case XFS_TRANS_DQ_RTBCOUNT:
292		dqtrx->rtbcount_delta += p->delta;
293		break;
294	case XFS_TRANS_DQ_DELRTBCOUNT:
295		dqtrx->delrtb_delta += p->delta;
296		break;
297	}
298
299	mutex_unlock(&xqc->lock);
300	return NOTIFY_DONE;
301
302out_abort:
303	xchk_iscan_abort(&xqc->iscan);
304	mutex_unlock(&xqc->lock);
305	return NOTIFY_DONE;
306}
307
308/*
309 * Apply the transaction quota deltas to our shadow quota accounting info when
310 * the regular quota code are doing the same.
311 */
312static int
313xqcheck_apply_live_dqtrx(
314	struct notifier_block		*nb,
315	unsigned long			action,
316	void				*data)
317{
318	struct xfs_apply_dqtrx_params	*p = data;
319	struct xqcheck			*xqc;
320	struct xqcheck_dqacct		*dqa;
321	struct xqcheck_dqtrx		*dqtrx;
322	struct xfarray			*counts;
323	int				error;
324
325	xqc = container_of(nb, struct xqcheck, qhook.apply_hook.nb);
326
327	/* Map the dquot type to an incore counter object. */
328	switch (p->q_type) {
329	case XFS_DQTYPE_USER:
330		counts = xqc->ucounts;
331		break;
332	case XFS_DQTYPE_GROUP:
333		counts = xqc->gcounts;
334		break;
335	case XFS_DQTYPE_PROJ:
336		counts = xqc->pcounts;
337		break;
338	default:
339		return NOTIFY_DONE;
340	}
341
342	if (xchk_iscan_aborted(&xqc->iscan) || counts == NULL)
343		return NOTIFY_DONE;
344
345	/*
346	 * Find the shadow dqtrx for this transaction and dquot, if any deltas
347	 * need to be applied here.  If not, we're finished early.
348	 */
349	mutex_lock(&xqc->lock);
350	dqa = rhashtable_lookup_fast(&xqc->shadow_dquot_acct, &p->tx_id,
351			xqcheck_dqacct_hash_params);
352	if (!dqa)
353		goto out_unlock;
354	dqtrx = xqcheck_get_dqtrx(dqa, p->q_type, p->q_id);
355	if (!dqtrx || dqtrx->q_type == 0)
356		goto out_unlock;
357
358	/* Update our shadow dquot if we're committing. */
359	if (action == XFS_APPLY_DQTRX_COMMIT) {
360		error = xqcheck_update_incore_counts(xqc, counts, p->q_id,
361				dqtrx->icount_delta,
362				dqtrx->bcount_delta + dqtrx->delbcnt_delta,
363				dqtrx->rtbcount_delta + dqtrx->delrtb_delta);
364		if (error)
365			goto out_abort;
366	}
367
368	/* Free the shadow accounting structure if that was the last user. */
369	dqa->refcount--;
370	if (dqa->refcount == 0) {
371		error = rhashtable_remove_fast(&xqc->shadow_dquot_acct,
372				&dqa->hash, xqcheck_dqacct_hash_params);
373		if (error)
374			goto out_abort;
375		xqcheck_dqacct_free(dqa, NULL);
376	}
377
378	mutex_unlock(&xqc->lock);
379	return NOTIFY_DONE;
380
381out_abort:
382	xchk_iscan_abort(&xqc->iscan);
383out_unlock:
384	mutex_unlock(&xqc->lock);
385	return NOTIFY_DONE;
386}
387
388/* Record this inode's quota usage in our shadow quota counter data. */
389STATIC int
390xqcheck_collect_inode(
391	struct xqcheck		*xqc,
392	struct xfs_inode	*ip)
393{
394	struct xfs_trans	*tp = xqc->sc->tp;
395	xfs_filblks_t		nblks, rtblks;
396	uint			ilock_flags = 0;
397	xfs_dqid_t		id;
398	bool			isreg = S_ISREG(VFS_I(ip)->i_mode);
399	int			error = 0;
400
401	if (xfs_is_quota_inode(&tp->t_mountp->m_sb, ip->i_ino)) {
 
402		/*
403		 * Quota files are never counted towards quota, so we do not
404		 * need to take the lock.
 
 
405		 */
406		xchk_iscan_mark_visited(&xqc->iscan, ip);
407		return 0;
408	}
409
410	/* Figure out the data / rt device block counts. */
411	xfs_ilock(ip, XFS_IOLOCK_SHARED);
412	if (isreg)
413		xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
414	if (XFS_IS_REALTIME_INODE(ip)) {
415		/*
416		 * Read in the data fork for rt files so that _count_blocks
417		 * can count the number of blocks allocated from the rt volume.
418		 * Inodes do not track that separately.
419		 */
420		ilock_flags = xfs_ilock_data_map_shared(ip);
421		error = xfs_iread_extents(tp, ip, XFS_DATA_FORK);
422		if (error)
423			goto out_abort;
424	} else {
425		ilock_flags = XFS_ILOCK_SHARED;
426		xfs_ilock(ip, XFS_ILOCK_SHARED);
427	}
428	xfs_inode_count_blocks(tp, ip, &nblks, &rtblks);
429
430	if (xchk_iscan_aborted(&xqc->iscan)) {
431		error = -ECANCELED;
432		goto out_incomplete;
433	}
434
435	/* Update the shadow dquot counters. */
436	mutex_lock(&xqc->lock);
437	if (xqc->ucounts) {
438		id = xfs_qm_id_for_quotatype(ip, XFS_DQTYPE_USER);
439		error = xqcheck_update_incore_counts(xqc, xqc->ucounts, id, 1,
440				nblks, rtblks);
441		if (error)
442			goto out_mutex;
443	}
444
445	if (xqc->gcounts) {
446		id = xfs_qm_id_for_quotatype(ip, XFS_DQTYPE_GROUP);
447		error = xqcheck_update_incore_counts(xqc, xqc->gcounts, id, 1,
448				nblks, rtblks);
449		if (error)
450			goto out_mutex;
451	}
452
453	if (xqc->pcounts) {
454		id = xfs_qm_id_for_quotatype(ip, XFS_DQTYPE_PROJ);
455		error = xqcheck_update_incore_counts(xqc, xqc->pcounts, id, 1,
456				nblks, rtblks);
457		if (error)
458			goto out_mutex;
459	}
460	mutex_unlock(&xqc->lock);
461
462	xchk_iscan_mark_visited(&xqc->iscan, ip);
463	goto out_ilock;
464
465out_mutex:
466	mutex_unlock(&xqc->lock);
467out_abort:
468	xchk_iscan_abort(&xqc->iscan);
469out_incomplete:
470	xchk_set_incomplete(xqc->sc);
471out_ilock:
472	xfs_iunlock(ip, ilock_flags);
473	if (isreg)
474		xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
475	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
476	return error;
477}
478
479/* Walk all the allocated inodes and run a quota scan on them. */
480STATIC int
481xqcheck_collect_counts(
482	struct xqcheck		*xqc)
483{
484	struct xfs_scrub	*sc = xqc->sc;
485	struct xfs_inode	*ip;
486	int			error;
487
488	/*
489	 * Set up for a potentially lengthy filesystem scan by reducing our
490	 * transaction resource usage for the duration.  Specifically:
491	 *
492	 * Cancel the transaction to release the log grant space while we scan
493	 * the filesystem.
494	 *
495	 * Create a new empty transaction to eliminate the possibility of the
496	 * inode scan deadlocking on cyclical metadata.
497	 *
498	 * We pass the empty transaction to the file scanning function to avoid
499	 * repeatedly cycling empty transactions.  This can be done without
500	 * risk of deadlock between sb_internal and the IOLOCK (we take the
501	 * IOLOCK to quiesce the file before scanning) because empty
502	 * transactions do not take sb_internal.
503	 */
504	xchk_trans_cancel(sc);
505	error = xchk_trans_alloc_empty(sc);
506	if (error)
507		return error;
508
509	while ((error = xchk_iscan_iter(&xqc->iscan, &ip)) == 1) {
510		error = xqcheck_collect_inode(xqc, ip);
511		xchk_irele(sc, ip);
512		if (error)
513			break;
514
515		if (xchk_should_terminate(sc, &error))
516			break;
517	}
518	xchk_iscan_iter_finish(&xqc->iscan);
519	if (error) {
520		xchk_set_incomplete(sc);
521		/*
522		 * If we couldn't grab an inode that was busy with a state
523		 * change, change the error code so that we exit to userspace
524		 * as quickly as possible.
525		 */
526		if (error == -EBUSY)
527			return -ECANCELED;
528		return error;
529	}
530
531	/*
532	 * Switch out for a real transaction in preparation for building a new
533	 * tree.
534	 */
535	xchk_trans_cancel(sc);
536	return xchk_setup_fs(sc);
537}
538
539/*
540 * Part 2: Comparing dquot resource counters.  Walk each xfs_dquot, comparing
541 * the resource usage counters against our shadow dquots; and then walk each
542 * shadow dquot (that wasn't covered in the first part), comparing it against
543 * the xfs_dquot.
544 */
545
546/*
547 * Check the dquot data against what we observed.  Caller must hold the dquot
548 * lock.
549 */
550STATIC int
551xqcheck_compare_dquot(
552	struct xqcheck		*xqc,
553	xfs_dqtype_t		dqtype,
554	struct xfs_dquot	*dq)
555{
556	struct xqcheck_dquot	xcdq;
557	struct xfarray		*counts = xqcheck_counters_for(xqc, dqtype);
558	int			error;
559
560	if (xchk_iscan_aborted(&xqc->iscan)) {
561		xchk_set_incomplete(xqc->sc);
562		return -ECANCELED;
563	}
564
565	mutex_lock(&xqc->lock);
566	error = xfarray_load_sparse(counts, dq->q_id, &xcdq);
567	if (error)
568		goto out_unlock;
569
570	if (xcdq.icount != dq->q_ino.count)
571		xchk_qcheck_set_corrupt(xqc->sc, dqtype, dq->q_id);
572
573	if (xcdq.bcount != dq->q_blk.count)
574		xchk_qcheck_set_corrupt(xqc->sc, dqtype, dq->q_id);
575
576	if (xcdq.rtbcount != dq->q_rtb.count)
577		xchk_qcheck_set_corrupt(xqc->sc, dqtype, dq->q_id);
578
579	xcdq.flags |= (XQCHECK_DQUOT_COMPARE_SCANNED | XQCHECK_DQUOT_WRITTEN);
580	error = xfarray_store(counts, dq->q_id, &xcdq);
581	if (error == -EFBIG) {
582		/*
583		 * EFBIG means we tried to store data at too high a byte offset
584		 * in the sparse array.  IOWs, we cannot complete the check and
585		 * must notify userspace that the check was incomplete.  This
586		 * should never happen outside of the collection phase.
587		 */
588		xchk_set_incomplete(xqc->sc);
589		error = -ECANCELED;
590	}
591	mutex_unlock(&xqc->lock);
592	if (error)
593		return error;
594
595	if (xqc->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
596		return -ECANCELED;
597
598	return 0;
599
600out_unlock:
601	mutex_unlock(&xqc->lock);
602	return error;
603}
604
605/*
606 * Walk all the observed dquots, and make sure there's a matching incore
607 * dquot and that its counts match ours.
608 */
609STATIC int
610xqcheck_walk_observations(
611	struct xqcheck		*xqc,
612	xfs_dqtype_t		dqtype)
613{
614	struct xqcheck_dquot	xcdq;
615	struct xfs_dquot	*dq;
616	struct xfarray		*counts = xqcheck_counters_for(xqc, dqtype);
617	xfarray_idx_t		cur = XFARRAY_CURSOR_INIT;
618	int			error;
619
620	mutex_lock(&xqc->lock);
621	while ((error = xfarray_iter(counts, &cur, &xcdq)) == 1) {
622		xfs_dqid_t	id = cur - 1;
623
624		if (xcdq.flags & XQCHECK_DQUOT_COMPARE_SCANNED)
625			continue;
626
627		mutex_unlock(&xqc->lock);
628
629		error = xfs_qm_dqget(xqc->sc->mp, id, dqtype, false, &dq);
630		if (error == -ENOENT) {
631			xchk_qcheck_set_corrupt(xqc->sc, dqtype, id);
632			return 0;
633		}
634		if (error)
635			return error;
636
637		error = xqcheck_compare_dquot(xqc, dqtype, dq);
638		xfs_qm_dqput(dq);
639		if (error)
640			return error;
641
642		if (xchk_should_terminate(xqc->sc, &error))
643			return error;
644
645		mutex_lock(&xqc->lock);
646	}
647	mutex_unlock(&xqc->lock);
648
649	return error;
650}
651
652/* Compare the quota counters we observed against the live dquots. */
653STATIC int
654xqcheck_compare_dqtype(
655	struct xqcheck		*xqc,
656	xfs_dqtype_t		dqtype)
657{
658	struct xchk_dqiter	cursor = { };
659	struct xfs_scrub	*sc = xqc->sc;
660	struct xfs_dquot	*dq;
661	int			error;
662
663	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
664		return 0;
665
666	/* If the quota CHKD flag is cleared, we need to repair this quota. */
667	if (!(xfs_quota_chkd_flag(dqtype) & sc->mp->m_qflags)) {
668		xchk_qcheck_set_corrupt(xqc->sc, dqtype, 0);
669		return 0;
670	}
671
672	/* Compare what we observed against the actual dquots. */
673	xchk_dqiter_init(&cursor, sc, dqtype);
674	while ((error = xchk_dquot_iter(&cursor, &dq)) == 1) {
675		error = xqcheck_compare_dquot(xqc, dqtype, dq);
676		xfs_qm_dqput(dq);
677		if (error)
678			break;
679	}
680	if (error)
681		return error;
682
683	/* Walk all the observed dquots and compare to the incore ones. */
684	return xqcheck_walk_observations(xqc, dqtype);
685}
686
687/* Tear down everything associated with a quotacheck. */
688static void
689xqcheck_teardown_scan(
690	void			*priv)
691{
692	struct xqcheck		*xqc = priv;
693	struct xfs_quotainfo	*qi = xqc->sc->mp->m_quotainfo;
694
695	/* Discourage any hook functions that might be running. */
696	xchk_iscan_abort(&xqc->iscan);
697
698	/*
699	 * As noted above, the apply hook is responsible for cleaning up the
700	 * shadow dquot accounting data when a transaction completes.  The mod
701	 * hook must be removed before the apply hook so that we don't
702	 * mistakenly leave an active shadow account for the mod hook to get
703	 * its hands on.  No hooks should be running after these functions
704	 * return.
705	 */
706	xfs_dqtrx_hook_del(qi, &xqc->qhook);
707
708	if (xqc->shadow_dquot_acct.key_len) {
709		rhashtable_free_and_destroy(&xqc->shadow_dquot_acct,
710				xqcheck_dqacct_free, NULL);
711		xqc->shadow_dquot_acct.key_len = 0;
712	}
713
714	if (xqc->pcounts) {
715		xfarray_destroy(xqc->pcounts);
716		xqc->pcounts = NULL;
717	}
718
719	if (xqc->gcounts) {
720		xfarray_destroy(xqc->gcounts);
721		xqc->gcounts = NULL;
722	}
723
724	if (xqc->ucounts) {
725		xfarray_destroy(xqc->ucounts);
726		xqc->ucounts = NULL;
727	}
728
729	xchk_iscan_teardown(&xqc->iscan);
730	mutex_destroy(&xqc->lock);
731	xqc->sc = NULL;
732}
733
734/*
735 * Scan all inodes in the entire filesystem to generate quota counter data.
736 * If the scan is successful, the quota data will be left alive for a repair.
737 * If any error occurs, we'll tear everything down.
738 */
739STATIC int
740xqcheck_setup_scan(
741	struct xfs_scrub	*sc,
742	struct xqcheck		*xqc)
743{
744	char			*descr;
745	struct xfs_quotainfo	*qi = sc->mp->m_quotainfo;
746	unsigned long long	max_dquots = XFS_DQ_ID_MAX + 1ULL;
747	int			error;
748
749	ASSERT(xqc->sc == NULL);
750	xqc->sc = sc;
751
752	mutex_init(&xqc->lock);
753
754	/* Retry iget every tenth of a second for up to 30 seconds. */
755	xchk_iscan_start(sc, 30000, 100, &xqc->iscan);
756
757	error = -ENOMEM;
758	if (xfs_this_quota_on(sc->mp, XFS_DQTYPE_USER)) {
759		descr = xchk_xfile_descr(sc, "user dquot records");
760		error = xfarray_create(descr, max_dquots,
761				sizeof(struct xqcheck_dquot), &xqc->ucounts);
762		kfree(descr);
763		if (error)
764			goto out_teardown;
765	}
766
767	if (xfs_this_quota_on(sc->mp, XFS_DQTYPE_GROUP)) {
768		descr = xchk_xfile_descr(sc, "group dquot records");
769		error = xfarray_create(descr, max_dquots,
770				sizeof(struct xqcheck_dquot), &xqc->gcounts);
771		kfree(descr);
772		if (error)
773			goto out_teardown;
774	}
775
776	if (xfs_this_quota_on(sc->mp, XFS_DQTYPE_PROJ)) {
777		descr = xchk_xfile_descr(sc, "project dquot records");
778		error = xfarray_create(descr, max_dquots,
779				sizeof(struct xqcheck_dquot), &xqc->pcounts);
780		kfree(descr);
781		if (error)
782			goto out_teardown;
783	}
784
785	/*
786	 * Set up hash table to map transactions to our internal shadow dqtrx
787	 * structures.
788	 */
789	error = rhashtable_init(&xqc->shadow_dquot_acct,
790			&xqcheck_dqacct_hash_params);
791	if (error)
792		goto out_teardown;
793
794	/*
795	 * Hook into the quota code.  The hook only triggers for inodes that
796	 * were already scanned, and the scanner thread takes each inode's
797	 * ILOCK, which means that any in-progress inode updates will finish
798	 * before we can scan the inode.
799	 *
800	 * The apply hook (which removes the shadow dquot accounting struct)
801	 * must be installed before the mod hook so that we never fail to catch
802	 * the end of a quota update sequence and leave stale shadow data.
803	 */
804	ASSERT(sc->flags & XCHK_FSGATES_QUOTA);
805	xfs_dqtrx_hook_setup(&xqc->qhook, xqcheck_mod_live_ino_dqtrx,
806			xqcheck_apply_live_dqtrx);
807
808	error = xfs_dqtrx_hook_add(qi, &xqc->qhook);
809	if (error)
810		goto out_teardown;
811
812	/* Use deferred cleanup to pass the quota count data to repair. */
813	sc->buf_cleanup = xqcheck_teardown_scan;
814	return 0;
815
816out_teardown:
817	xqcheck_teardown_scan(xqc);
818	return error;
819}
820
821/* Scrub all counters for a given quota type. */
822int
823xchk_quotacheck(
824	struct xfs_scrub	*sc)
825{
826	struct xqcheck		*xqc = sc->buf;
827	int			error = 0;
828
829	/* Check quota counters on the live filesystem. */
830	error = xqcheck_setup_scan(sc, xqc);
831	if (error)
832		return error;
833
834	/* Walk all inodes, picking up quota information. */
835	error = xqcheck_collect_counts(xqc);
836	if (!xchk_xref_process_error(sc, 0, 0, &error))
837		return error;
838
839	/* Fail fast if we're not playing with a full dataset. */
840	if (xchk_iscan_aborted(&xqc->iscan))
841		xchk_set_incomplete(sc);
842	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
843		return 0;
844
845	/* Compare quota counters. */
846	if (xqc->ucounts) {
847		error = xqcheck_compare_dqtype(xqc, XFS_DQTYPE_USER);
848		if (!xchk_xref_process_error(sc, 0, 0, &error))
849			return error;
850	}
851	if (xqc->gcounts) {
852		error = xqcheck_compare_dqtype(xqc, XFS_DQTYPE_GROUP);
853		if (!xchk_xref_process_error(sc, 0, 0, &error))
854			return error;
855	}
856	if (xqc->pcounts) {
857		error = xqcheck_compare_dqtype(xqc, XFS_DQTYPE_PROJ);
858		if (!xchk_xref_process_error(sc, 0, 0, &error))
859			return error;
860	}
861
862	/* Check one last time for an incomplete dataset. */
863	if (xchk_iscan_aborted(&xqc->iscan))
864		xchk_set_incomplete(sc);
865
866	return 0;
867}
v6.13.7
  1// SPDX-License-Identifier: GPL-2.0-or-later
  2/*
  3 * Copyright (c) 2020-2024 Oracle.  All Rights Reserved.
  4 * Author: Darrick J. Wong <djwong@kernel.org>
  5 */
  6#include "xfs.h"
  7#include "xfs_fs.h"
  8#include "xfs_shared.h"
  9#include "xfs_format.h"
 10#include "xfs_trans_resv.h"
 11#include "xfs_mount.h"
 12#include "xfs_log_format.h"
 13#include "xfs_trans.h"
 14#include "xfs_inode.h"
 15#include "xfs_quota.h"
 16#include "xfs_qm.h"
 17#include "xfs_icache.h"
 18#include "xfs_bmap_util.h"
 19#include "xfs_ialloc.h"
 20#include "xfs_ag.h"
 21#include "scrub/scrub.h"
 22#include "scrub/common.h"
 23#include "scrub/repair.h"
 24#include "scrub/xfile.h"
 25#include "scrub/xfarray.h"
 26#include "scrub/iscan.h"
 27#include "scrub/quota.h"
 28#include "scrub/quotacheck.h"
 29#include "scrub/trace.h"
 30
 31/*
 32 * Live Quotacheck
 33 * ===============
 34 *
 35 * Quota counters are "summary" metadata, in the sense that they are computed
 36 * as the summation of the block usage counts for every file on the filesystem.
 37 * Therefore, we compute the correct icount, bcount, and rtbcount values by
 38 * creating a shadow quota counter structure and walking every inode.
 39 */
 40
 41/* Track the quota deltas for a dquot in a transaction. */
 42struct xqcheck_dqtrx {
 43	xfs_dqtype_t		q_type;
 44	xfs_dqid_t		q_id;
 45
 46	int64_t			icount_delta;
 47
 48	int64_t			bcount_delta;
 49	int64_t			delbcnt_delta;
 50
 51	int64_t			rtbcount_delta;
 52	int64_t			delrtb_delta;
 53};
 54
 55#define XQCHECK_MAX_NR_DQTRXS	(XFS_QM_TRANS_DQTYPES * XFS_QM_TRANS_MAXDQS)
 56
 57/*
 58 * Track the quota deltas for all dquots attached to a transaction if the
 59 * quota deltas are being applied to an inode that we already scanned.
 60 */
 61struct xqcheck_dqacct {
 62	struct rhash_head	hash;
 63	uintptr_t		tx_id;
 64	struct xqcheck_dqtrx	dqtrx[XQCHECK_MAX_NR_DQTRXS];
 65	unsigned int		refcount;
 66};
 67
 68/* Free a shadow dquot accounting structure. */
 69static void
 70xqcheck_dqacct_free(
 71	void			*ptr,
 72	void			*arg)
 73{
 74	struct xqcheck_dqacct	*dqa = ptr;
 75
 76	kfree(dqa);
 77}
 78
 79/* Set us up to scrub quota counters. */
 80int
 81xchk_setup_quotacheck(
 82	struct xfs_scrub	*sc)
 83{
 84	if (!XFS_IS_QUOTA_ON(sc->mp))
 85		return -ENOENT;
 86
 87	xchk_fsgates_enable(sc, XCHK_FSGATES_QUOTA);
 88
 89	sc->buf = kzalloc(sizeof(struct xqcheck), XCHK_GFP_FLAGS);
 90	if (!sc->buf)
 91		return -ENOMEM;
 92
 93	return xchk_setup_fs(sc);
 94}
 95
 96/*
 97 * Part 1: Collecting dquot resource usage counts.  For each xfs_dquot attached
 98 * to each inode, we create a shadow dquot, and compute the inode count and add
 99 * the data/rt block usage from what we see.
100 *
101 * To avoid false corruption reports in part 2, any failure in this part must
102 * set the INCOMPLETE flag even when a negative errno is returned.  This care
103 * must be taken with certain errno values (i.e. EFSBADCRC, EFSCORRUPTED,
104 * ECANCELED) that are absorbed into a scrub state flag update by
105 * xchk_*_process_error.  Scrub and repair share the same incore data
106 * structures, so the INCOMPLETE flag is critical to prevent a repair based on
107 * insufficient information.
108 *
109 * Because we are scanning a live filesystem, it's possible that another thread
110 * will try to update the quota counters for an inode that we've already
111 * scanned.  This will cause our counts to be incorrect.  Therefore, we hook
112 * the live transaction code in two places: (1) when the callers update the
113 * per-transaction dqtrx structure to log quota counter updates; and (2) when
114 * transaction commit actually logs those updates to the incore dquot.  By
115 * shadowing transaction updates in this manner, live quotacheck can ensure
116 * by locking the dquot and the shadow structure that its own copies are not
117 * out of date.  Because the hook code runs in a different process context from
118 * the scrub code and the scrub state flags are not accessed atomically,
119 * failures in the hook code must abort the iscan and the scrubber must notice
120 * the aborted scan and set the incomplete flag.
121 *
122 * Note that we use srcu notifier hooks to minimize the overhead when live
123 * quotacheck is /not/ running.
124 */
125
126/* Update an incore dquot counter information from a live update. */
127static int
128xqcheck_update_incore_counts(
129	struct xqcheck		*xqc,
130	struct xfarray		*counts,
131	xfs_dqid_t		id,
132	int64_t			inodes,
133	int64_t			nblks,
134	int64_t			rtblks)
135{
136	struct xqcheck_dquot	xcdq;
137	int			error;
138
139	error = xfarray_load_sparse(counts, id, &xcdq);
140	if (error)
141		return error;
142
143	xcdq.flags |= XQCHECK_DQUOT_WRITTEN;
144	xcdq.icount += inodes;
145	xcdq.bcount += nblks;
146	xcdq.rtbcount += rtblks;
147
148	error = xfarray_store(counts, id, &xcdq);
149	if (error == -EFBIG) {
150		/*
151		 * EFBIG means we tried to store data at too high a byte offset
152		 * in the sparse array.  IOWs, we cannot complete the check and
153		 * must notify userspace that the check was incomplete.
154		 */
155		error = -ECANCELED;
156	}
157	return error;
158}
159
160/* Decide if this is the shadow dquot accounting structure for a transaction. */
161static int
162xqcheck_dqacct_obj_cmpfn(
163	struct rhashtable_compare_arg	*arg,
164	const void			*obj)
165{
166	const uintptr_t			*tx_idp = arg->key;
167	const struct xqcheck_dqacct	*dqa = obj;
168
169	if (dqa->tx_id != *tx_idp)
170		return 1;
171	return 0;
172}
173
174static const struct rhashtable_params xqcheck_dqacct_hash_params = {
175	.min_size		= 32,
176	.key_len		= sizeof(uintptr_t),
177	.key_offset		= offsetof(struct xqcheck_dqacct, tx_id),
178	.head_offset		= offsetof(struct xqcheck_dqacct, hash),
179	.automatic_shrinking	= true,
180	.obj_cmpfn		= xqcheck_dqacct_obj_cmpfn,
181};
182
183/* Find a shadow dqtrx slot for the given dquot. */
184STATIC struct xqcheck_dqtrx *
185xqcheck_get_dqtrx(
186	struct xqcheck_dqacct	*dqa,
187	xfs_dqtype_t		q_type,
188	xfs_dqid_t		q_id)
189{
190	int			i;
191
192	for (i = 0; i < XQCHECK_MAX_NR_DQTRXS; i++) {
193		if (dqa->dqtrx[i].q_type == 0 ||
194		    (dqa->dqtrx[i].q_type == q_type &&
195		     dqa->dqtrx[i].q_id == q_id))
196			return &dqa->dqtrx[i];
197	}
198
199	return NULL;
200}
201
202/*
203 * Create and fill out a quota delta tracking structure to shadow the updates
204 * going on in the regular quota code.
205 */
206static int
207xqcheck_mod_live_ino_dqtrx(
208	struct notifier_block		*nb,
209	unsigned long			action,
210	void				*data)
211{
212	struct xfs_mod_ino_dqtrx_params *p = data;
213	struct xqcheck			*xqc;
214	struct xqcheck_dqacct		*dqa;
215	struct xqcheck_dqtrx		*dqtrx;
216	int				error;
217
218	xqc = container_of(nb, struct xqcheck, qhook.mod_hook.nb);
219
220	/* Skip quota reservation fields. */
221	switch (action) {
222	case XFS_TRANS_DQ_BCOUNT:
223	case XFS_TRANS_DQ_DELBCOUNT:
224	case XFS_TRANS_DQ_ICOUNT:
225	case XFS_TRANS_DQ_RTBCOUNT:
226	case XFS_TRANS_DQ_DELRTBCOUNT:
227		break;
228	default:
229		return NOTIFY_DONE;
230	}
231
232	/* Ignore dqtrx updates for quota types we don't care about. */
233	switch (p->q_type) {
234	case XFS_DQTYPE_USER:
235		if (!xqc->ucounts)
236			return NOTIFY_DONE;
237		break;
238	case XFS_DQTYPE_GROUP:
239		if (!xqc->gcounts)
240			return NOTIFY_DONE;
241		break;
242	case XFS_DQTYPE_PROJ:
243		if (!xqc->pcounts)
244			return NOTIFY_DONE;
245		break;
246	default:
247		return NOTIFY_DONE;
248	}
249
250	/* Skip inodes that haven't been scanned yet. */
251	if (!xchk_iscan_want_live_update(&xqc->iscan, p->ino))
252		return NOTIFY_DONE;
253
254	/* Make a shadow quota accounting tracker for this transaction. */
255	mutex_lock(&xqc->lock);
256	dqa = rhashtable_lookup_fast(&xqc->shadow_dquot_acct, &p->tx_id,
257			xqcheck_dqacct_hash_params);
258	if (!dqa) {
259		dqa = kzalloc(sizeof(struct xqcheck_dqacct), XCHK_GFP_FLAGS);
260		if (!dqa)
261			goto out_abort;
262
263		dqa->tx_id = p->tx_id;
264		error = rhashtable_insert_fast(&xqc->shadow_dquot_acct,
265				&dqa->hash, xqcheck_dqacct_hash_params);
266		if (error)
267			goto out_abort;
268	}
269
270	/* Find the shadow dqtrx (or an empty slot) here. */
271	dqtrx = xqcheck_get_dqtrx(dqa, p->q_type, p->q_id);
272	if (!dqtrx)
273		goto out_abort;
274	if (dqtrx->q_type == 0) {
275		dqtrx->q_type = p->q_type;
276		dqtrx->q_id = p->q_id;
277		dqa->refcount++;
278	}
279
280	/* Update counter */
281	switch (action) {
282	case XFS_TRANS_DQ_BCOUNT:
283		dqtrx->bcount_delta += p->delta;
284		break;
285	case XFS_TRANS_DQ_DELBCOUNT:
286		dqtrx->delbcnt_delta += p->delta;
287		break;
288	case XFS_TRANS_DQ_ICOUNT:
289		dqtrx->icount_delta += p->delta;
290		break;
291	case XFS_TRANS_DQ_RTBCOUNT:
292		dqtrx->rtbcount_delta += p->delta;
293		break;
294	case XFS_TRANS_DQ_DELRTBCOUNT:
295		dqtrx->delrtb_delta += p->delta;
296		break;
297	}
298
299	mutex_unlock(&xqc->lock);
300	return NOTIFY_DONE;
301
302out_abort:
303	xchk_iscan_abort(&xqc->iscan);
304	mutex_unlock(&xqc->lock);
305	return NOTIFY_DONE;
306}
307
308/*
309 * Apply the transaction quota deltas to our shadow quota accounting info when
310 * the regular quota code are doing the same.
311 */
312static int
313xqcheck_apply_live_dqtrx(
314	struct notifier_block		*nb,
315	unsigned long			action,
316	void				*data)
317{
318	struct xfs_apply_dqtrx_params	*p = data;
319	struct xqcheck			*xqc;
320	struct xqcheck_dqacct		*dqa;
321	struct xqcheck_dqtrx		*dqtrx;
322	struct xfarray			*counts;
323	int				error;
324
325	xqc = container_of(nb, struct xqcheck, qhook.apply_hook.nb);
326
327	/* Map the dquot type to an incore counter object. */
328	switch (p->q_type) {
329	case XFS_DQTYPE_USER:
330		counts = xqc->ucounts;
331		break;
332	case XFS_DQTYPE_GROUP:
333		counts = xqc->gcounts;
334		break;
335	case XFS_DQTYPE_PROJ:
336		counts = xqc->pcounts;
337		break;
338	default:
339		return NOTIFY_DONE;
340	}
341
342	if (xchk_iscan_aborted(&xqc->iscan) || counts == NULL)
343		return NOTIFY_DONE;
344
345	/*
346	 * Find the shadow dqtrx for this transaction and dquot, if any deltas
347	 * need to be applied here.  If not, we're finished early.
348	 */
349	mutex_lock(&xqc->lock);
350	dqa = rhashtable_lookup_fast(&xqc->shadow_dquot_acct, &p->tx_id,
351			xqcheck_dqacct_hash_params);
352	if (!dqa)
353		goto out_unlock;
354	dqtrx = xqcheck_get_dqtrx(dqa, p->q_type, p->q_id);
355	if (!dqtrx || dqtrx->q_type == 0)
356		goto out_unlock;
357
358	/* Update our shadow dquot if we're committing. */
359	if (action == XFS_APPLY_DQTRX_COMMIT) {
360		error = xqcheck_update_incore_counts(xqc, counts, p->q_id,
361				dqtrx->icount_delta,
362				dqtrx->bcount_delta + dqtrx->delbcnt_delta,
363				dqtrx->rtbcount_delta + dqtrx->delrtb_delta);
364		if (error)
365			goto out_abort;
366	}
367
368	/* Free the shadow accounting structure if that was the last user. */
369	dqa->refcount--;
370	if (dqa->refcount == 0) {
371		error = rhashtable_remove_fast(&xqc->shadow_dquot_acct,
372				&dqa->hash, xqcheck_dqacct_hash_params);
373		if (error)
374			goto out_abort;
375		xqcheck_dqacct_free(dqa, NULL);
376	}
377
378	mutex_unlock(&xqc->lock);
379	return NOTIFY_DONE;
380
381out_abort:
382	xchk_iscan_abort(&xqc->iscan);
383out_unlock:
384	mutex_unlock(&xqc->lock);
385	return NOTIFY_DONE;
386}
387
388/* Record this inode's quota usage in our shadow quota counter data. */
389STATIC int
390xqcheck_collect_inode(
391	struct xqcheck		*xqc,
392	struct xfs_inode	*ip)
393{
394	struct xfs_trans	*tp = xqc->sc->tp;
395	xfs_filblks_t		nblks, rtblks;
396	uint			ilock_flags = 0;
397	xfs_dqid_t		id;
398	bool			isreg = S_ISREG(VFS_I(ip)->i_mode);
399	int			error = 0;
400
401	if (xfs_is_metadir_inode(ip) ||
402	    xfs_is_quota_inode(&tp->t_mountp->m_sb, ip->i_ino)) {
403		/*
404		 * Quota files are never counted towards quota, so we do not
405		 * need to take the lock.  Files do not switch between the
406		 * metadata and regular directory trees without a reallocation,
407		 * so we do not need to ILOCK them either.
408		 */
409		xchk_iscan_mark_visited(&xqc->iscan, ip);
410		return 0;
411	}
412
413	/* Figure out the data / rt device block counts. */
414	xfs_ilock(ip, XFS_IOLOCK_SHARED);
415	if (isreg)
416		xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
417	if (XFS_IS_REALTIME_INODE(ip)) {
418		/*
419		 * Read in the data fork for rt files so that _count_blocks
420		 * can count the number of blocks allocated from the rt volume.
421		 * Inodes do not track that separately.
422		 */
423		ilock_flags = xfs_ilock_data_map_shared(ip);
424		error = xfs_iread_extents(tp, ip, XFS_DATA_FORK);
425		if (error)
426			goto out_abort;
427	} else {
428		ilock_flags = XFS_ILOCK_SHARED;
429		xfs_ilock(ip, XFS_ILOCK_SHARED);
430	}
431	xfs_inode_count_blocks(tp, ip, &nblks, &rtblks);
432
433	if (xchk_iscan_aborted(&xqc->iscan)) {
434		error = -ECANCELED;
435		goto out_incomplete;
436	}
437
438	/* Update the shadow dquot counters. */
439	mutex_lock(&xqc->lock);
440	if (xqc->ucounts) {
441		id = xfs_qm_id_for_quotatype(ip, XFS_DQTYPE_USER);
442		error = xqcheck_update_incore_counts(xqc, xqc->ucounts, id, 1,
443				nblks, rtblks);
444		if (error)
445			goto out_mutex;
446	}
447
448	if (xqc->gcounts) {
449		id = xfs_qm_id_for_quotatype(ip, XFS_DQTYPE_GROUP);
450		error = xqcheck_update_incore_counts(xqc, xqc->gcounts, id, 1,
451				nblks, rtblks);
452		if (error)
453			goto out_mutex;
454	}
455
456	if (xqc->pcounts) {
457		id = xfs_qm_id_for_quotatype(ip, XFS_DQTYPE_PROJ);
458		error = xqcheck_update_incore_counts(xqc, xqc->pcounts, id, 1,
459				nblks, rtblks);
460		if (error)
461			goto out_mutex;
462	}
463	mutex_unlock(&xqc->lock);
464
465	xchk_iscan_mark_visited(&xqc->iscan, ip);
466	goto out_ilock;
467
468out_mutex:
469	mutex_unlock(&xqc->lock);
470out_abort:
471	xchk_iscan_abort(&xqc->iscan);
472out_incomplete:
473	xchk_set_incomplete(xqc->sc);
474out_ilock:
475	xfs_iunlock(ip, ilock_flags);
476	if (isreg)
477		xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
478	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
479	return error;
480}
481
482/* Walk all the allocated inodes and run a quota scan on them. */
483STATIC int
484xqcheck_collect_counts(
485	struct xqcheck		*xqc)
486{
487	struct xfs_scrub	*sc = xqc->sc;
488	struct xfs_inode	*ip;
489	int			error;
490
491	/*
492	 * Set up for a potentially lengthy filesystem scan by reducing our
493	 * transaction resource usage for the duration.  Specifically:
494	 *
495	 * Cancel the transaction to release the log grant space while we scan
496	 * the filesystem.
497	 *
498	 * Create a new empty transaction to eliminate the possibility of the
499	 * inode scan deadlocking on cyclical metadata.
500	 *
501	 * We pass the empty transaction to the file scanning function to avoid
502	 * repeatedly cycling empty transactions.  This can be done without
503	 * risk of deadlock between sb_internal and the IOLOCK (we take the
504	 * IOLOCK to quiesce the file before scanning) because empty
505	 * transactions do not take sb_internal.
506	 */
507	xchk_trans_cancel(sc);
508	error = xchk_trans_alloc_empty(sc);
509	if (error)
510		return error;
511
512	while ((error = xchk_iscan_iter(&xqc->iscan, &ip)) == 1) {
513		error = xqcheck_collect_inode(xqc, ip);
514		xchk_irele(sc, ip);
515		if (error)
516			break;
517
518		if (xchk_should_terminate(sc, &error))
519			break;
520	}
521	xchk_iscan_iter_finish(&xqc->iscan);
522	if (error) {
523		xchk_set_incomplete(sc);
524		/*
525		 * If we couldn't grab an inode that was busy with a state
526		 * change, change the error code so that we exit to userspace
527		 * as quickly as possible.
528		 */
529		if (error == -EBUSY)
530			return -ECANCELED;
531		return error;
532	}
533
534	/*
535	 * Switch out for a real transaction in preparation for building a new
536	 * tree.
537	 */
538	xchk_trans_cancel(sc);
539	return xchk_setup_fs(sc);
540}
541
542/*
543 * Part 2: Comparing dquot resource counters.  Walk each xfs_dquot, comparing
544 * the resource usage counters against our shadow dquots; and then walk each
545 * shadow dquot (that wasn't covered in the first part), comparing it against
546 * the xfs_dquot.
547 */
548
549/*
550 * Check the dquot data against what we observed.  Caller must hold the dquot
551 * lock.
552 */
553STATIC int
554xqcheck_compare_dquot(
555	struct xqcheck		*xqc,
556	xfs_dqtype_t		dqtype,
557	struct xfs_dquot	*dq)
558{
559	struct xqcheck_dquot	xcdq;
560	struct xfarray		*counts = xqcheck_counters_for(xqc, dqtype);
561	int			error;
562
563	if (xchk_iscan_aborted(&xqc->iscan)) {
564		xchk_set_incomplete(xqc->sc);
565		return -ECANCELED;
566	}
567
568	mutex_lock(&xqc->lock);
569	error = xfarray_load_sparse(counts, dq->q_id, &xcdq);
570	if (error)
571		goto out_unlock;
572
573	if (xcdq.icount != dq->q_ino.count)
574		xchk_qcheck_set_corrupt(xqc->sc, dqtype, dq->q_id);
575
576	if (xcdq.bcount != dq->q_blk.count)
577		xchk_qcheck_set_corrupt(xqc->sc, dqtype, dq->q_id);
578
579	if (xcdq.rtbcount != dq->q_rtb.count)
580		xchk_qcheck_set_corrupt(xqc->sc, dqtype, dq->q_id);
581
582	xcdq.flags |= (XQCHECK_DQUOT_COMPARE_SCANNED | XQCHECK_DQUOT_WRITTEN);
583	error = xfarray_store(counts, dq->q_id, &xcdq);
584	if (error == -EFBIG) {
585		/*
586		 * EFBIG means we tried to store data at too high a byte offset
587		 * in the sparse array.  IOWs, we cannot complete the check and
588		 * must notify userspace that the check was incomplete.  This
589		 * should never happen outside of the collection phase.
590		 */
591		xchk_set_incomplete(xqc->sc);
592		error = -ECANCELED;
593	}
594	mutex_unlock(&xqc->lock);
595	if (error)
596		return error;
597
598	if (xqc->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
599		return -ECANCELED;
600
601	return 0;
602
603out_unlock:
604	mutex_unlock(&xqc->lock);
605	return error;
606}
607
608/*
609 * Walk all the observed dquots, and make sure there's a matching incore
610 * dquot and that its counts match ours.
611 */
612STATIC int
613xqcheck_walk_observations(
614	struct xqcheck		*xqc,
615	xfs_dqtype_t		dqtype)
616{
617	struct xqcheck_dquot	xcdq;
618	struct xfs_dquot	*dq;
619	struct xfarray		*counts = xqcheck_counters_for(xqc, dqtype);
620	xfarray_idx_t		cur = XFARRAY_CURSOR_INIT;
621	int			error;
622
623	mutex_lock(&xqc->lock);
624	while ((error = xfarray_iter(counts, &cur, &xcdq)) == 1) {
625		xfs_dqid_t	id = cur - 1;
626
627		if (xcdq.flags & XQCHECK_DQUOT_COMPARE_SCANNED)
628			continue;
629
630		mutex_unlock(&xqc->lock);
631
632		error = xfs_qm_dqget(xqc->sc->mp, id, dqtype, false, &dq);
633		if (error == -ENOENT) {
634			xchk_qcheck_set_corrupt(xqc->sc, dqtype, id);
635			return 0;
636		}
637		if (error)
638			return error;
639
640		error = xqcheck_compare_dquot(xqc, dqtype, dq);
641		xfs_qm_dqput(dq);
642		if (error)
643			return error;
644
645		if (xchk_should_terminate(xqc->sc, &error))
646			return error;
647
648		mutex_lock(&xqc->lock);
649	}
650	mutex_unlock(&xqc->lock);
651
652	return error;
653}
654
655/* Compare the quota counters we observed against the live dquots. */
656STATIC int
657xqcheck_compare_dqtype(
658	struct xqcheck		*xqc,
659	xfs_dqtype_t		dqtype)
660{
661	struct xchk_dqiter	cursor = { };
662	struct xfs_scrub	*sc = xqc->sc;
663	struct xfs_dquot	*dq;
664	int			error;
665
666	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
667		return 0;
668
669	/* If the quota CHKD flag is cleared, we need to repair this quota. */
670	if (!(xfs_quota_chkd_flag(dqtype) & sc->mp->m_qflags)) {
671		xchk_qcheck_set_corrupt(xqc->sc, dqtype, 0);
672		return 0;
673	}
674
675	/* Compare what we observed against the actual dquots. */
676	xchk_dqiter_init(&cursor, sc, dqtype);
677	while ((error = xchk_dquot_iter(&cursor, &dq)) == 1) {
678		error = xqcheck_compare_dquot(xqc, dqtype, dq);
679		xfs_qm_dqput(dq);
680		if (error)
681			break;
682	}
683	if (error)
684		return error;
685
686	/* Walk all the observed dquots and compare to the incore ones. */
687	return xqcheck_walk_observations(xqc, dqtype);
688}
689
690/* Tear down everything associated with a quotacheck. */
691static void
692xqcheck_teardown_scan(
693	void			*priv)
694{
695	struct xqcheck		*xqc = priv;
696	struct xfs_quotainfo	*qi = xqc->sc->mp->m_quotainfo;
697
698	/* Discourage any hook functions that might be running. */
699	xchk_iscan_abort(&xqc->iscan);
700
701	/*
702	 * As noted above, the apply hook is responsible for cleaning up the
703	 * shadow dquot accounting data when a transaction completes.  The mod
704	 * hook must be removed before the apply hook so that we don't
705	 * mistakenly leave an active shadow account for the mod hook to get
706	 * its hands on.  No hooks should be running after these functions
707	 * return.
708	 */
709	xfs_dqtrx_hook_del(qi, &xqc->qhook);
710
711	if (xqc->shadow_dquot_acct.key_len) {
712		rhashtable_free_and_destroy(&xqc->shadow_dquot_acct,
713				xqcheck_dqacct_free, NULL);
714		xqc->shadow_dquot_acct.key_len = 0;
715	}
716
717	if (xqc->pcounts) {
718		xfarray_destroy(xqc->pcounts);
719		xqc->pcounts = NULL;
720	}
721
722	if (xqc->gcounts) {
723		xfarray_destroy(xqc->gcounts);
724		xqc->gcounts = NULL;
725	}
726
727	if (xqc->ucounts) {
728		xfarray_destroy(xqc->ucounts);
729		xqc->ucounts = NULL;
730	}
731
732	xchk_iscan_teardown(&xqc->iscan);
733	mutex_destroy(&xqc->lock);
734	xqc->sc = NULL;
735}
736
737/*
738 * Scan all inodes in the entire filesystem to generate quota counter data.
739 * If the scan is successful, the quota data will be left alive for a repair.
740 * If any error occurs, we'll tear everything down.
741 */
742STATIC int
743xqcheck_setup_scan(
744	struct xfs_scrub	*sc,
745	struct xqcheck		*xqc)
746{
747	char			*descr;
748	struct xfs_quotainfo	*qi = sc->mp->m_quotainfo;
749	unsigned long long	max_dquots = XFS_DQ_ID_MAX + 1ULL;
750	int			error;
751
752	ASSERT(xqc->sc == NULL);
753	xqc->sc = sc;
754
755	mutex_init(&xqc->lock);
756
757	/* Retry iget every tenth of a second for up to 30 seconds. */
758	xchk_iscan_start(sc, 30000, 100, &xqc->iscan);
759
760	error = -ENOMEM;
761	if (xfs_this_quota_on(sc->mp, XFS_DQTYPE_USER)) {
762		descr = xchk_xfile_descr(sc, "user dquot records");
763		error = xfarray_create(descr, max_dquots,
764				sizeof(struct xqcheck_dquot), &xqc->ucounts);
765		kfree(descr);
766		if (error)
767			goto out_teardown;
768	}
769
770	if (xfs_this_quota_on(sc->mp, XFS_DQTYPE_GROUP)) {
771		descr = xchk_xfile_descr(sc, "group dquot records");
772		error = xfarray_create(descr, max_dquots,
773				sizeof(struct xqcheck_dquot), &xqc->gcounts);
774		kfree(descr);
775		if (error)
776			goto out_teardown;
777	}
778
779	if (xfs_this_quota_on(sc->mp, XFS_DQTYPE_PROJ)) {
780		descr = xchk_xfile_descr(sc, "project dquot records");
781		error = xfarray_create(descr, max_dquots,
782				sizeof(struct xqcheck_dquot), &xqc->pcounts);
783		kfree(descr);
784		if (error)
785			goto out_teardown;
786	}
787
788	/*
789	 * Set up hash table to map transactions to our internal shadow dqtrx
790	 * structures.
791	 */
792	error = rhashtable_init(&xqc->shadow_dquot_acct,
793			&xqcheck_dqacct_hash_params);
794	if (error)
795		goto out_teardown;
796
797	/*
798	 * Hook into the quota code.  The hook only triggers for inodes that
799	 * were already scanned, and the scanner thread takes each inode's
800	 * ILOCK, which means that any in-progress inode updates will finish
801	 * before we can scan the inode.
802	 *
803	 * The apply hook (which removes the shadow dquot accounting struct)
804	 * must be installed before the mod hook so that we never fail to catch
805	 * the end of a quota update sequence and leave stale shadow data.
806	 */
807	ASSERT(sc->flags & XCHK_FSGATES_QUOTA);
808	xfs_dqtrx_hook_setup(&xqc->qhook, xqcheck_mod_live_ino_dqtrx,
809			xqcheck_apply_live_dqtrx);
810
811	error = xfs_dqtrx_hook_add(qi, &xqc->qhook);
812	if (error)
813		goto out_teardown;
814
815	/* Use deferred cleanup to pass the quota count data to repair. */
816	sc->buf_cleanup = xqcheck_teardown_scan;
817	return 0;
818
819out_teardown:
820	xqcheck_teardown_scan(xqc);
821	return error;
822}
823
824/* Scrub all counters for a given quota type. */
825int
826xchk_quotacheck(
827	struct xfs_scrub	*sc)
828{
829	struct xqcheck		*xqc = sc->buf;
830	int			error = 0;
831
832	/* Check quota counters on the live filesystem. */
833	error = xqcheck_setup_scan(sc, xqc);
834	if (error)
835		return error;
836
837	/* Walk all inodes, picking up quota information. */
838	error = xqcheck_collect_counts(xqc);
839	if (!xchk_xref_process_error(sc, 0, 0, &error))
840		return error;
841
842	/* Fail fast if we're not playing with a full dataset. */
843	if (xchk_iscan_aborted(&xqc->iscan))
844		xchk_set_incomplete(sc);
845	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
846		return 0;
847
848	/* Compare quota counters. */
849	if (xqc->ucounts) {
850		error = xqcheck_compare_dqtype(xqc, XFS_DQTYPE_USER);
851		if (!xchk_xref_process_error(sc, 0, 0, &error))
852			return error;
853	}
854	if (xqc->gcounts) {
855		error = xqcheck_compare_dqtype(xqc, XFS_DQTYPE_GROUP);
856		if (!xchk_xref_process_error(sc, 0, 0, &error))
857			return error;
858	}
859	if (xqc->pcounts) {
860		error = xqcheck_compare_dqtype(xqc, XFS_DQTYPE_PROJ);
861		if (!xchk_xref_process_error(sc, 0, 0, &error))
862			return error;
863	}
864
865	/* Check one last time for an incomplete dataset. */
866	if (xchk_iscan_aborted(&xqc->iscan))
867		xchk_set_incomplete(sc);
868
869	return 0;
870}