Loading...
1// SPDX-License-Identifier: GPL-2.0+
2/*
3 * Copyright (C) 2017 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 */
6#include "xfs.h"
7#include "xfs_fs.h"
8#include "xfs_shared.h"
9#include "xfs_format.h"
10#include "xfs_trans_resv.h"
11#include "xfs_mount.h"
12#include "xfs_btree.h"
13#include "xfs_log_format.h"
14#include "xfs_inode.h"
15#include "xfs_ialloc.h"
16#include "xfs_da_format.h"
17#include "xfs_reflink.h"
18#include "xfs_rmap.h"
19#include "xfs_bmap_util.h"
20#include "scrub/scrub.h"
21#include "scrub/common.h"
22#include "scrub/btree.h"
23
24/*
25 * Grab total control of the inode metadata. It doesn't matter here if
26 * the file data is still changing; exclusive access to the metadata is
27 * the goal.
28 */
29int
30xchk_setup_inode(
31 struct xfs_scrub *sc,
32 struct xfs_inode *ip)
33{
34 int error;
35
36 /*
37 * Try to get the inode. If the verifiers fail, we try again
38 * in raw mode.
39 */
40 error = xchk_get_inode(sc, ip);
41 switch (error) {
42 case 0:
43 break;
44 case -EFSCORRUPTED:
45 case -EFSBADCRC:
46 return xchk_trans_alloc(sc, 0);
47 default:
48 return error;
49 }
50
51 /* Got the inode, lock it and we're ready to go. */
52 sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
53 xfs_ilock(sc->ip, sc->ilock_flags);
54 error = xchk_trans_alloc(sc, 0);
55 if (error)
56 goto out;
57 sc->ilock_flags |= XFS_ILOCK_EXCL;
58 xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
59
60out:
61 /* scrub teardown will unlock and release the inode for us */
62 return error;
63}
64
65/* Inode core */
66
67/* Validate di_extsize hint. */
68STATIC void
69xchk_inode_extsize(
70 struct xfs_scrub *sc,
71 struct xfs_dinode *dip,
72 xfs_ino_t ino,
73 uint16_t mode,
74 uint16_t flags)
75{
76 xfs_failaddr_t fa;
77
78 fa = xfs_inode_validate_extsize(sc->mp, be32_to_cpu(dip->di_extsize),
79 mode, flags);
80 if (fa)
81 xchk_ino_set_corrupt(sc, ino);
82}
83
84/*
85 * Validate di_cowextsize hint.
86 *
87 * The rules are documented at xfs_ioctl_setattr_check_cowextsize().
88 * These functions must be kept in sync with each other.
89 */
90STATIC void
91xchk_inode_cowextsize(
92 struct xfs_scrub *sc,
93 struct xfs_dinode *dip,
94 xfs_ino_t ino,
95 uint16_t mode,
96 uint16_t flags,
97 uint64_t flags2)
98{
99 xfs_failaddr_t fa;
100
101 fa = xfs_inode_validate_cowextsize(sc->mp,
102 be32_to_cpu(dip->di_cowextsize), mode, flags,
103 flags2);
104 if (fa)
105 xchk_ino_set_corrupt(sc, ino);
106}
107
108/* Make sure the di_flags make sense for the inode. */
109STATIC void
110xchk_inode_flags(
111 struct xfs_scrub *sc,
112 struct xfs_dinode *dip,
113 xfs_ino_t ino,
114 uint16_t mode,
115 uint16_t flags)
116{
117 struct xfs_mount *mp = sc->mp;
118
119 /* di_flags are all taken, last bit cannot be used */
120 if (flags & ~XFS_DIFLAG_ANY)
121 goto bad;
122
123 /* rt flags require rt device */
124 if ((flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT)) &&
125 !mp->m_rtdev_targp)
126 goto bad;
127
128 /* new rt bitmap flag only valid for rbmino */
129 if ((flags & XFS_DIFLAG_NEWRTBM) && ino != mp->m_sb.sb_rbmino)
130 goto bad;
131
132 /* directory-only flags */
133 if ((flags & (XFS_DIFLAG_RTINHERIT |
134 XFS_DIFLAG_EXTSZINHERIT |
135 XFS_DIFLAG_PROJINHERIT |
136 XFS_DIFLAG_NOSYMLINKS)) &&
137 !S_ISDIR(mode))
138 goto bad;
139
140 /* file-only flags */
141 if ((flags & (XFS_DIFLAG_REALTIME | FS_XFLAG_EXTSIZE)) &&
142 !S_ISREG(mode))
143 goto bad;
144
145 /* filestreams and rt make no sense */
146 if ((flags & XFS_DIFLAG_FILESTREAM) && (flags & XFS_DIFLAG_REALTIME))
147 goto bad;
148
149 return;
150bad:
151 xchk_ino_set_corrupt(sc, ino);
152}
153
154/* Make sure the di_flags2 make sense for the inode. */
155STATIC void
156xchk_inode_flags2(
157 struct xfs_scrub *sc,
158 struct xfs_dinode *dip,
159 xfs_ino_t ino,
160 uint16_t mode,
161 uint16_t flags,
162 uint64_t flags2)
163{
164 struct xfs_mount *mp = sc->mp;
165
166 /* Unknown di_flags2 could be from a future kernel */
167 if (flags2 & ~XFS_DIFLAG2_ANY)
168 xchk_ino_set_warning(sc, ino);
169
170 /* reflink flag requires reflink feature */
171 if ((flags2 & XFS_DIFLAG2_REFLINK) &&
172 !xfs_sb_version_hasreflink(&mp->m_sb))
173 goto bad;
174
175 /* cowextsize flag is checked w.r.t. mode separately */
176
177 /* file/dir-only flags */
178 if ((flags2 & XFS_DIFLAG2_DAX) && !(S_ISREG(mode) || S_ISDIR(mode)))
179 goto bad;
180
181 /* file-only flags */
182 if ((flags2 & XFS_DIFLAG2_REFLINK) && !S_ISREG(mode))
183 goto bad;
184
185 /* realtime and reflink make no sense, currently */
186 if ((flags & XFS_DIFLAG_REALTIME) && (flags2 & XFS_DIFLAG2_REFLINK))
187 goto bad;
188
189 /* dax and reflink make no sense, currently */
190 if ((flags2 & XFS_DIFLAG2_DAX) && (flags2 & XFS_DIFLAG2_REFLINK))
191 goto bad;
192
193 return;
194bad:
195 xchk_ino_set_corrupt(sc, ino);
196}
197
198/* Scrub all the ondisk inode fields. */
199STATIC void
200xchk_dinode(
201 struct xfs_scrub *sc,
202 struct xfs_dinode *dip,
203 xfs_ino_t ino)
204{
205 struct xfs_mount *mp = sc->mp;
206 size_t fork_recs;
207 unsigned long long isize;
208 uint64_t flags2;
209 uint32_t nextents;
210 uint16_t flags;
211 uint16_t mode;
212
213 flags = be16_to_cpu(dip->di_flags);
214 if (dip->di_version >= 3)
215 flags2 = be64_to_cpu(dip->di_flags2);
216 else
217 flags2 = 0;
218
219 /* di_mode */
220 mode = be16_to_cpu(dip->di_mode);
221 switch (mode & S_IFMT) {
222 case S_IFLNK:
223 case S_IFREG:
224 case S_IFDIR:
225 case S_IFCHR:
226 case S_IFBLK:
227 case S_IFIFO:
228 case S_IFSOCK:
229 /* mode is recognized */
230 break;
231 default:
232 xchk_ino_set_corrupt(sc, ino);
233 break;
234 }
235
236 /* v1/v2 fields */
237 switch (dip->di_version) {
238 case 1:
239 /*
240 * We autoconvert v1 inodes into v2 inodes on writeout,
241 * so just mark this inode for preening.
242 */
243 xchk_ino_set_preen(sc, ino);
244 break;
245 case 2:
246 case 3:
247 if (dip->di_onlink != 0)
248 xchk_ino_set_corrupt(sc, ino);
249
250 if (dip->di_mode == 0 && sc->ip)
251 xchk_ino_set_corrupt(sc, ino);
252
253 if (dip->di_projid_hi != 0 &&
254 !xfs_sb_version_hasprojid32bit(&mp->m_sb))
255 xchk_ino_set_corrupt(sc, ino);
256 break;
257 default:
258 xchk_ino_set_corrupt(sc, ino);
259 return;
260 }
261
262 /*
263 * di_uid/di_gid -- -1 isn't invalid, but there's no way that
264 * userspace could have created that.
265 */
266 if (dip->di_uid == cpu_to_be32(-1U) ||
267 dip->di_gid == cpu_to_be32(-1U))
268 xchk_ino_set_warning(sc, ino);
269
270 /* di_format */
271 switch (dip->di_format) {
272 case XFS_DINODE_FMT_DEV:
273 if (!S_ISCHR(mode) && !S_ISBLK(mode) &&
274 !S_ISFIFO(mode) && !S_ISSOCK(mode))
275 xchk_ino_set_corrupt(sc, ino);
276 break;
277 case XFS_DINODE_FMT_LOCAL:
278 if (!S_ISDIR(mode) && !S_ISLNK(mode))
279 xchk_ino_set_corrupt(sc, ino);
280 break;
281 case XFS_DINODE_FMT_EXTENTS:
282 if (!S_ISREG(mode) && !S_ISDIR(mode) && !S_ISLNK(mode))
283 xchk_ino_set_corrupt(sc, ino);
284 break;
285 case XFS_DINODE_FMT_BTREE:
286 if (!S_ISREG(mode) && !S_ISDIR(mode))
287 xchk_ino_set_corrupt(sc, ino);
288 break;
289 case XFS_DINODE_FMT_UUID:
290 default:
291 xchk_ino_set_corrupt(sc, ino);
292 break;
293 }
294
295 /* di_[amc]time.nsec */
296 if (be32_to_cpu(dip->di_atime.t_nsec) >= NSEC_PER_SEC)
297 xchk_ino_set_corrupt(sc, ino);
298 if (be32_to_cpu(dip->di_mtime.t_nsec) >= NSEC_PER_SEC)
299 xchk_ino_set_corrupt(sc, ino);
300 if (be32_to_cpu(dip->di_ctime.t_nsec) >= NSEC_PER_SEC)
301 xchk_ino_set_corrupt(sc, ino);
302
303 /*
304 * di_size. xfs_dinode_verify checks for things that screw up
305 * the VFS such as the upper bit being set and zero-length
306 * symlinks/directories, but we can do more here.
307 */
308 isize = be64_to_cpu(dip->di_size);
309 if (isize & (1ULL << 63))
310 xchk_ino_set_corrupt(sc, ino);
311
312 /* Devices, fifos, and sockets must have zero size */
313 if (!S_ISDIR(mode) && !S_ISREG(mode) && !S_ISLNK(mode) && isize != 0)
314 xchk_ino_set_corrupt(sc, ino);
315
316 /* Directories can't be larger than the data section size (32G) */
317 if (S_ISDIR(mode) && (isize == 0 || isize >= XFS_DIR2_SPACE_SIZE))
318 xchk_ino_set_corrupt(sc, ino);
319
320 /* Symlinks can't be larger than SYMLINK_MAXLEN */
321 if (S_ISLNK(mode) && (isize == 0 || isize >= XFS_SYMLINK_MAXLEN))
322 xchk_ino_set_corrupt(sc, ino);
323
324 /*
325 * Warn if the running kernel can't handle the kinds of offsets
326 * needed to deal with the file size. In other words, if the
327 * pagecache can't cache all the blocks in this file due to
328 * overly large offsets, flag the inode for admin review.
329 */
330 if (isize >= mp->m_super->s_maxbytes)
331 xchk_ino_set_warning(sc, ino);
332
333 /* di_nblocks */
334 if (flags2 & XFS_DIFLAG2_REFLINK) {
335 ; /* nblocks can exceed dblocks */
336 } else if (flags & XFS_DIFLAG_REALTIME) {
337 /*
338 * nblocks is the sum of data extents (in the rtdev),
339 * attr extents (in the datadev), and both forks' bmbt
340 * blocks (in the datadev). This clumsy check is the
341 * best we can do without cross-referencing with the
342 * inode forks.
343 */
344 if (be64_to_cpu(dip->di_nblocks) >=
345 mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks)
346 xchk_ino_set_corrupt(sc, ino);
347 } else {
348 if (be64_to_cpu(dip->di_nblocks) >= mp->m_sb.sb_dblocks)
349 xchk_ino_set_corrupt(sc, ino);
350 }
351
352 xchk_inode_flags(sc, dip, ino, mode, flags);
353
354 xchk_inode_extsize(sc, dip, ino, mode, flags);
355
356 /* di_nextents */
357 nextents = be32_to_cpu(dip->di_nextents);
358 fork_recs = XFS_DFORK_DSIZE(dip, mp) / sizeof(struct xfs_bmbt_rec);
359 switch (dip->di_format) {
360 case XFS_DINODE_FMT_EXTENTS:
361 if (nextents > fork_recs)
362 xchk_ino_set_corrupt(sc, ino);
363 break;
364 case XFS_DINODE_FMT_BTREE:
365 if (nextents <= fork_recs)
366 xchk_ino_set_corrupt(sc, ino);
367 break;
368 default:
369 if (nextents != 0)
370 xchk_ino_set_corrupt(sc, ino);
371 break;
372 }
373
374 /* di_forkoff */
375 if (XFS_DFORK_APTR(dip) >= (char *)dip + mp->m_sb.sb_inodesize)
376 xchk_ino_set_corrupt(sc, ino);
377 if (dip->di_anextents != 0 && dip->di_forkoff == 0)
378 xchk_ino_set_corrupt(sc, ino);
379 if (dip->di_forkoff == 0 && dip->di_aformat != XFS_DINODE_FMT_EXTENTS)
380 xchk_ino_set_corrupt(sc, ino);
381
382 /* di_aformat */
383 if (dip->di_aformat != XFS_DINODE_FMT_LOCAL &&
384 dip->di_aformat != XFS_DINODE_FMT_EXTENTS &&
385 dip->di_aformat != XFS_DINODE_FMT_BTREE)
386 xchk_ino_set_corrupt(sc, ino);
387
388 /* di_anextents */
389 nextents = be16_to_cpu(dip->di_anextents);
390 fork_recs = XFS_DFORK_ASIZE(dip, mp) / sizeof(struct xfs_bmbt_rec);
391 switch (dip->di_aformat) {
392 case XFS_DINODE_FMT_EXTENTS:
393 if (nextents > fork_recs)
394 xchk_ino_set_corrupt(sc, ino);
395 break;
396 case XFS_DINODE_FMT_BTREE:
397 if (nextents <= fork_recs)
398 xchk_ino_set_corrupt(sc, ino);
399 break;
400 default:
401 if (nextents != 0)
402 xchk_ino_set_corrupt(sc, ino);
403 }
404
405 if (dip->di_version >= 3) {
406 if (be32_to_cpu(dip->di_crtime.t_nsec) >= NSEC_PER_SEC)
407 xchk_ino_set_corrupt(sc, ino);
408 xchk_inode_flags2(sc, dip, ino, mode, flags, flags2);
409 xchk_inode_cowextsize(sc, dip, ino, mode, flags,
410 flags2);
411 }
412}
413
414/*
415 * Make sure the finobt doesn't think this inode is free.
416 * We don't have to check the inobt ourselves because we got the inode via
417 * IGET_UNTRUSTED, which checks the inobt for us.
418 */
419static void
420xchk_inode_xref_finobt(
421 struct xfs_scrub *sc,
422 xfs_ino_t ino)
423{
424 struct xfs_inobt_rec_incore rec;
425 xfs_agino_t agino;
426 int has_record;
427 int error;
428
429 if (!sc->sa.fino_cur || xchk_skip_xref(sc->sm))
430 return;
431
432 agino = XFS_INO_TO_AGINO(sc->mp, ino);
433
434 /*
435 * Try to get the finobt record. If we can't get it, then we're
436 * in good shape.
437 */
438 error = xfs_inobt_lookup(sc->sa.fino_cur, agino, XFS_LOOKUP_LE,
439 &has_record);
440 if (!xchk_should_check_xref(sc, &error, &sc->sa.fino_cur) ||
441 !has_record)
442 return;
443
444 error = xfs_inobt_get_rec(sc->sa.fino_cur, &rec, &has_record);
445 if (!xchk_should_check_xref(sc, &error, &sc->sa.fino_cur) ||
446 !has_record)
447 return;
448
449 /*
450 * Otherwise, make sure this record either doesn't cover this inode,
451 * or that it does but it's marked present.
452 */
453 if (rec.ir_startino > agino ||
454 rec.ir_startino + XFS_INODES_PER_CHUNK <= agino)
455 return;
456
457 if (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))
458 xchk_btree_xref_set_corrupt(sc, sc->sa.fino_cur, 0);
459}
460
461/* Cross reference the inode fields with the forks. */
462STATIC void
463xchk_inode_xref_bmap(
464 struct xfs_scrub *sc,
465 struct xfs_dinode *dip)
466{
467 xfs_extnum_t nextents;
468 xfs_filblks_t count;
469 xfs_filblks_t acount;
470 int error;
471
472 if (xchk_skip_xref(sc->sm))
473 return;
474
475 /* Walk all the extents to check nextents/naextents/nblocks. */
476 error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK,
477 &nextents, &count);
478 if (!xchk_should_check_xref(sc, &error, NULL))
479 return;
480 if (nextents < be32_to_cpu(dip->di_nextents))
481 xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino);
482
483 error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK,
484 &nextents, &acount);
485 if (!xchk_should_check_xref(sc, &error, NULL))
486 return;
487 if (nextents != be16_to_cpu(dip->di_anextents))
488 xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino);
489
490 /* Check nblocks against the inode. */
491 if (count + acount != be64_to_cpu(dip->di_nblocks))
492 xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino);
493}
494
495/* Cross-reference with the other btrees. */
496STATIC void
497xchk_inode_xref(
498 struct xfs_scrub *sc,
499 xfs_ino_t ino,
500 struct xfs_dinode *dip)
501{
502 xfs_agnumber_t agno;
503 xfs_agblock_t agbno;
504 int error;
505
506 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
507 return;
508
509 agno = XFS_INO_TO_AGNO(sc->mp, ino);
510 agbno = XFS_INO_TO_AGBNO(sc->mp, ino);
511
512 error = xchk_ag_init(sc, agno, &sc->sa);
513 if (!xchk_xref_process_error(sc, agno, agbno, &error))
514 return;
515
516 xchk_xref_is_used_space(sc, agbno, 1);
517 xchk_inode_xref_finobt(sc, ino);
518 xchk_xref_is_owned_by(sc, agbno, 1, &XFS_RMAP_OINFO_INODES);
519 xchk_xref_is_not_shared(sc, agbno, 1);
520 xchk_inode_xref_bmap(sc, dip);
521
522 xchk_ag_free(sc, &sc->sa);
523}
524
525/*
526 * If the reflink iflag disagrees with a scan for shared data fork extents,
527 * either flag an error (shared extents w/ no flag) or a preen (flag set w/o
528 * any shared extents). We already checked for reflink iflag set on a non
529 * reflink filesystem.
530 */
531static void
532xchk_inode_check_reflink_iflag(
533 struct xfs_scrub *sc,
534 xfs_ino_t ino)
535{
536 struct xfs_mount *mp = sc->mp;
537 bool has_shared;
538 int error;
539
540 if (!xfs_sb_version_hasreflink(&mp->m_sb))
541 return;
542
543 error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip,
544 &has_shared);
545 if (!xchk_xref_process_error(sc, XFS_INO_TO_AGNO(mp, ino),
546 XFS_INO_TO_AGBNO(mp, ino), &error))
547 return;
548 if (xfs_is_reflink_inode(sc->ip) && !has_shared)
549 xchk_ino_set_preen(sc, ino);
550 else if (!xfs_is_reflink_inode(sc->ip) && has_shared)
551 xchk_ino_set_corrupt(sc, ino);
552}
553
554/* Scrub an inode. */
555int
556xchk_inode(
557 struct xfs_scrub *sc)
558{
559 struct xfs_dinode di;
560 int error = 0;
561
562 /*
563 * If sc->ip is NULL, that means that the setup function called
564 * xfs_iget to look up the inode. xfs_iget returned a EFSCORRUPTED
565 * and a NULL inode, so flag the corruption error and return.
566 */
567 if (!sc->ip) {
568 xchk_ino_set_corrupt(sc, sc->sm->sm_ino);
569 return 0;
570 }
571
572 /* Scrub the inode core. */
573 xfs_inode_to_disk(sc->ip, &di, 0);
574 xchk_dinode(sc, &di, sc->ip->i_ino);
575 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
576 goto out;
577
578 /*
579 * Look for discrepancies between file's data blocks and the reflink
580 * iflag. We already checked the iflag against the file mode when
581 * we scrubbed the dinode.
582 */
583 if (S_ISREG(VFS_I(sc->ip)->i_mode))
584 xchk_inode_check_reflink_iflag(sc, sc->ip->i_ino);
585
586 xchk_inode_xref(sc, sc->ip->i_ino, &di);
587out:
588 return error;
589}
1// SPDX-License-Identifier: GPL-2.0+
2/*
3 * Copyright (C) 2017 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 */
6#include "xfs.h"
7#include "xfs_fs.h"
8#include "xfs_shared.h"
9#include "xfs_format.h"
10#include "xfs_trans_resv.h"
11#include "xfs_mount.h"
12#include "xfs_btree.h"
13#include "xfs_log_format.h"
14#include "xfs_inode.h"
15#include "xfs_ialloc.h"
16#include "xfs_da_format.h"
17#include "xfs_reflink.h"
18#include "xfs_rmap.h"
19#include "xfs_bmap_util.h"
20#include "scrub/scrub.h"
21#include "scrub/common.h"
22#include "scrub/btree.h"
23
24/*
25 * Grab total control of the inode metadata. It doesn't matter here if
26 * the file data is still changing; exclusive access to the metadata is
27 * the goal.
28 */
29int
30xchk_setup_inode(
31 struct xfs_scrub *sc)
32{
33 int error;
34
35 /*
36 * Try to get the inode. If the verifiers fail, we try again
37 * in raw mode.
38 */
39 error = xchk_get_inode(sc);
40 switch (error) {
41 case 0:
42 break;
43 case -EFSCORRUPTED:
44 case -EFSBADCRC:
45 return xchk_trans_alloc(sc, 0);
46 default:
47 return error;
48 }
49
50 /* Got the inode, lock it and we're ready to go. */
51 sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
52 xfs_ilock(sc->ip, sc->ilock_flags);
53 error = xchk_trans_alloc(sc, 0);
54 if (error)
55 goto out;
56 sc->ilock_flags |= XFS_ILOCK_EXCL;
57 xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
58
59out:
60 /* scrub teardown will unlock and release the inode for us */
61 return error;
62}
63
64/* Inode core */
65
66/* Validate di_extsize hint. */
67STATIC void
68xchk_inode_extsize(
69 struct xfs_scrub *sc,
70 struct xfs_dinode *dip,
71 xfs_ino_t ino,
72 uint16_t mode,
73 uint16_t flags)
74{
75 xfs_failaddr_t fa;
76 uint32_t value = be32_to_cpu(dip->di_extsize);
77
78 fa = xfs_inode_validate_extsize(sc->mp, value, mode, flags);
79 if (fa)
80 xchk_ino_set_corrupt(sc, ino);
81
82 /*
83 * XFS allows a sysadmin to change the rt extent size when adding a rt
84 * section to a filesystem after formatting. If there are any
85 * directories with extszinherit and rtinherit set, the hint could
86 * become misaligned with the new rextsize. The verifier doesn't check
87 * this, because we allow rtinherit directories even without an rt
88 * device. Flag this as an administrative warning since we will clean
89 * this up eventually.
90 */
91 if ((flags & XFS_DIFLAG_RTINHERIT) &&
92 (flags & XFS_DIFLAG_EXTSZINHERIT) &&
93 value % sc->mp->m_sb.sb_rextsize > 0)
94 xchk_ino_set_warning(sc, ino);
95}
96
97/*
98 * Validate di_cowextsize hint.
99 *
100 * The rules are documented at xfs_ioctl_setattr_check_cowextsize().
101 * These functions must be kept in sync with each other.
102 */
103STATIC void
104xchk_inode_cowextsize(
105 struct xfs_scrub *sc,
106 struct xfs_dinode *dip,
107 xfs_ino_t ino,
108 uint16_t mode,
109 uint16_t flags,
110 uint64_t flags2)
111{
112 xfs_failaddr_t fa;
113
114 fa = xfs_inode_validate_cowextsize(sc->mp,
115 be32_to_cpu(dip->di_cowextsize), mode, flags,
116 flags2);
117 if (fa)
118 xchk_ino_set_corrupt(sc, ino);
119}
120
121/* Make sure the di_flags make sense for the inode. */
122STATIC void
123xchk_inode_flags(
124 struct xfs_scrub *sc,
125 struct xfs_dinode *dip,
126 xfs_ino_t ino,
127 uint16_t mode,
128 uint16_t flags)
129{
130 struct xfs_mount *mp = sc->mp;
131
132 /* di_flags are all taken, last bit cannot be used */
133 if (flags & ~XFS_DIFLAG_ANY)
134 goto bad;
135
136 /* rt flags require rt device */
137 if ((flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp)
138 goto bad;
139
140 /* new rt bitmap flag only valid for rbmino */
141 if ((flags & XFS_DIFLAG_NEWRTBM) && ino != mp->m_sb.sb_rbmino)
142 goto bad;
143
144 /* directory-only flags */
145 if ((flags & (XFS_DIFLAG_RTINHERIT |
146 XFS_DIFLAG_EXTSZINHERIT |
147 XFS_DIFLAG_PROJINHERIT |
148 XFS_DIFLAG_NOSYMLINKS)) &&
149 !S_ISDIR(mode))
150 goto bad;
151
152 /* file-only flags */
153 if ((flags & (XFS_DIFLAG_REALTIME | FS_XFLAG_EXTSIZE)) &&
154 !S_ISREG(mode))
155 goto bad;
156
157 /* filestreams and rt make no sense */
158 if ((flags & XFS_DIFLAG_FILESTREAM) && (flags & XFS_DIFLAG_REALTIME))
159 goto bad;
160
161 return;
162bad:
163 xchk_ino_set_corrupt(sc, ino);
164}
165
166/* Make sure the di_flags2 make sense for the inode. */
167STATIC void
168xchk_inode_flags2(
169 struct xfs_scrub *sc,
170 struct xfs_dinode *dip,
171 xfs_ino_t ino,
172 uint16_t mode,
173 uint16_t flags,
174 uint64_t flags2)
175{
176 struct xfs_mount *mp = sc->mp;
177
178 /* Unknown di_flags2 could be from a future kernel */
179 if (flags2 & ~XFS_DIFLAG2_ANY)
180 xchk_ino_set_warning(sc, ino);
181
182 /* reflink flag requires reflink feature */
183 if ((flags2 & XFS_DIFLAG2_REFLINK) &&
184 !xfs_sb_version_hasreflink(&mp->m_sb))
185 goto bad;
186
187 /* cowextsize flag is checked w.r.t. mode separately */
188
189 /* file/dir-only flags */
190 if ((flags2 & XFS_DIFLAG2_DAX) && !(S_ISREG(mode) || S_ISDIR(mode)))
191 goto bad;
192
193 /* file-only flags */
194 if ((flags2 & XFS_DIFLAG2_REFLINK) && !S_ISREG(mode))
195 goto bad;
196
197 /* realtime and reflink make no sense, currently */
198 if ((flags & XFS_DIFLAG_REALTIME) && (flags2 & XFS_DIFLAG2_REFLINK))
199 goto bad;
200
201 /* no bigtime iflag without the bigtime feature */
202 if (xfs_dinode_has_bigtime(dip) &&
203 !xfs_sb_version_hasbigtime(&mp->m_sb))
204 goto bad;
205
206 return;
207bad:
208 xchk_ino_set_corrupt(sc, ino);
209}
210
211static inline void
212xchk_dinode_nsec(
213 struct xfs_scrub *sc,
214 xfs_ino_t ino,
215 struct xfs_dinode *dip,
216 const xfs_timestamp_t ts)
217{
218 struct timespec64 tv;
219
220 tv = xfs_inode_from_disk_ts(dip, ts);
221 if (tv.tv_nsec < 0 || tv.tv_nsec >= NSEC_PER_SEC)
222 xchk_ino_set_corrupt(sc, ino);
223}
224
225/* Scrub all the ondisk inode fields. */
226STATIC void
227xchk_dinode(
228 struct xfs_scrub *sc,
229 struct xfs_dinode *dip,
230 xfs_ino_t ino)
231{
232 struct xfs_mount *mp = sc->mp;
233 size_t fork_recs;
234 unsigned long long isize;
235 uint64_t flags2;
236 uint32_t nextents;
237 uint16_t flags;
238 uint16_t mode;
239
240 flags = be16_to_cpu(dip->di_flags);
241 if (dip->di_version >= 3)
242 flags2 = be64_to_cpu(dip->di_flags2);
243 else
244 flags2 = 0;
245
246 /* di_mode */
247 mode = be16_to_cpu(dip->di_mode);
248 switch (mode & S_IFMT) {
249 case S_IFLNK:
250 case S_IFREG:
251 case S_IFDIR:
252 case S_IFCHR:
253 case S_IFBLK:
254 case S_IFIFO:
255 case S_IFSOCK:
256 /* mode is recognized */
257 break;
258 default:
259 xchk_ino_set_corrupt(sc, ino);
260 break;
261 }
262
263 /* v1/v2 fields */
264 switch (dip->di_version) {
265 case 1:
266 /*
267 * We autoconvert v1 inodes into v2 inodes on writeout,
268 * so just mark this inode for preening.
269 */
270 xchk_ino_set_preen(sc, ino);
271 break;
272 case 2:
273 case 3:
274 if (dip->di_onlink != 0)
275 xchk_ino_set_corrupt(sc, ino);
276
277 if (dip->di_mode == 0 && sc->ip)
278 xchk_ino_set_corrupt(sc, ino);
279
280 if (dip->di_projid_hi != 0 &&
281 !xfs_sb_version_hasprojid32bit(&mp->m_sb))
282 xchk_ino_set_corrupt(sc, ino);
283 break;
284 default:
285 xchk_ino_set_corrupt(sc, ino);
286 return;
287 }
288
289 /*
290 * di_uid/di_gid -- -1 isn't invalid, but there's no way that
291 * userspace could have created that.
292 */
293 if (dip->di_uid == cpu_to_be32(-1U) ||
294 dip->di_gid == cpu_to_be32(-1U))
295 xchk_ino_set_warning(sc, ino);
296
297 /* di_format */
298 switch (dip->di_format) {
299 case XFS_DINODE_FMT_DEV:
300 if (!S_ISCHR(mode) && !S_ISBLK(mode) &&
301 !S_ISFIFO(mode) && !S_ISSOCK(mode))
302 xchk_ino_set_corrupt(sc, ino);
303 break;
304 case XFS_DINODE_FMT_LOCAL:
305 if (!S_ISDIR(mode) && !S_ISLNK(mode))
306 xchk_ino_set_corrupt(sc, ino);
307 break;
308 case XFS_DINODE_FMT_EXTENTS:
309 if (!S_ISREG(mode) && !S_ISDIR(mode) && !S_ISLNK(mode))
310 xchk_ino_set_corrupt(sc, ino);
311 break;
312 case XFS_DINODE_FMT_BTREE:
313 if (!S_ISREG(mode) && !S_ISDIR(mode))
314 xchk_ino_set_corrupt(sc, ino);
315 break;
316 case XFS_DINODE_FMT_UUID:
317 default:
318 xchk_ino_set_corrupt(sc, ino);
319 break;
320 }
321
322 /* di_[amc]time.nsec */
323 xchk_dinode_nsec(sc, ino, dip, dip->di_atime);
324 xchk_dinode_nsec(sc, ino, dip, dip->di_mtime);
325 xchk_dinode_nsec(sc, ino, dip, dip->di_ctime);
326
327 /*
328 * di_size. xfs_dinode_verify checks for things that screw up
329 * the VFS such as the upper bit being set and zero-length
330 * symlinks/directories, but we can do more here.
331 */
332 isize = be64_to_cpu(dip->di_size);
333 if (isize & (1ULL << 63))
334 xchk_ino_set_corrupt(sc, ino);
335
336 /* Devices, fifos, and sockets must have zero size */
337 if (!S_ISDIR(mode) && !S_ISREG(mode) && !S_ISLNK(mode) && isize != 0)
338 xchk_ino_set_corrupt(sc, ino);
339
340 /* Directories can't be larger than the data section size (32G) */
341 if (S_ISDIR(mode) && (isize == 0 || isize >= XFS_DIR2_SPACE_SIZE))
342 xchk_ino_set_corrupt(sc, ino);
343
344 /* Symlinks can't be larger than SYMLINK_MAXLEN */
345 if (S_ISLNK(mode) && (isize == 0 || isize >= XFS_SYMLINK_MAXLEN))
346 xchk_ino_set_corrupt(sc, ino);
347
348 /*
349 * Warn if the running kernel can't handle the kinds of offsets
350 * needed to deal with the file size. In other words, if the
351 * pagecache can't cache all the blocks in this file due to
352 * overly large offsets, flag the inode for admin review.
353 */
354 if (isize >= mp->m_super->s_maxbytes)
355 xchk_ino_set_warning(sc, ino);
356
357 /* di_nblocks */
358 if (flags2 & XFS_DIFLAG2_REFLINK) {
359 ; /* nblocks can exceed dblocks */
360 } else if (flags & XFS_DIFLAG_REALTIME) {
361 /*
362 * nblocks is the sum of data extents (in the rtdev),
363 * attr extents (in the datadev), and both forks' bmbt
364 * blocks (in the datadev). This clumsy check is the
365 * best we can do without cross-referencing with the
366 * inode forks.
367 */
368 if (be64_to_cpu(dip->di_nblocks) >=
369 mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks)
370 xchk_ino_set_corrupt(sc, ino);
371 } else {
372 if (be64_to_cpu(dip->di_nblocks) >= mp->m_sb.sb_dblocks)
373 xchk_ino_set_corrupt(sc, ino);
374 }
375
376 xchk_inode_flags(sc, dip, ino, mode, flags);
377
378 xchk_inode_extsize(sc, dip, ino, mode, flags);
379
380 /* di_nextents */
381 nextents = be32_to_cpu(dip->di_nextents);
382 fork_recs = XFS_DFORK_DSIZE(dip, mp) / sizeof(struct xfs_bmbt_rec);
383 switch (dip->di_format) {
384 case XFS_DINODE_FMT_EXTENTS:
385 if (nextents > fork_recs)
386 xchk_ino_set_corrupt(sc, ino);
387 break;
388 case XFS_DINODE_FMT_BTREE:
389 if (nextents <= fork_recs)
390 xchk_ino_set_corrupt(sc, ino);
391 break;
392 default:
393 if (nextents != 0)
394 xchk_ino_set_corrupt(sc, ino);
395 break;
396 }
397
398 /* di_forkoff */
399 if (XFS_DFORK_APTR(dip) >= (char *)dip + mp->m_sb.sb_inodesize)
400 xchk_ino_set_corrupt(sc, ino);
401 if (dip->di_anextents != 0 && dip->di_forkoff == 0)
402 xchk_ino_set_corrupt(sc, ino);
403 if (dip->di_forkoff == 0 && dip->di_aformat != XFS_DINODE_FMT_EXTENTS)
404 xchk_ino_set_corrupt(sc, ino);
405
406 /* di_aformat */
407 if (dip->di_aformat != XFS_DINODE_FMT_LOCAL &&
408 dip->di_aformat != XFS_DINODE_FMT_EXTENTS &&
409 dip->di_aformat != XFS_DINODE_FMT_BTREE)
410 xchk_ino_set_corrupt(sc, ino);
411
412 /* di_anextents */
413 nextents = be16_to_cpu(dip->di_anextents);
414 fork_recs = XFS_DFORK_ASIZE(dip, mp) / sizeof(struct xfs_bmbt_rec);
415 switch (dip->di_aformat) {
416 case XFS_DINODE_FMT_EXTENTS:
417 if (nextents > fork_recs)
418 xchk_ino_set_corrupt(sc, ino);
419 break;
420 case XFS_DINODE_FMT_BTREE:
421 if (nextents <= fork_recs)
422 xchk_ino_set_corrupt(sc, ino);
423 break;
424 default:
425 if (nextents != 0)
426 xchk_ino_set_corrupt(sc, ino);
427 }
428
429 if (dip->di_version >= 3) {
430 xchk_dinode_nsec(sc, ino, dip, dip->di_crtime);
431 xchk_inode_flags2(sc, dip, ino, mode, flags, flags2);
432 xchk_inode_cowextsize(sc, dip, ino, mode, flags,
433 flags2);
434 }
435}
436
437/*
438 * Make sure the finobt doesn't think this inode is free.
439 * We don't have to check the inobt ourselves because we got the inode via
440 * IGET_UNTRUSTED, which checks the inobt for us.
441 */
442static void
443xchk_inode_xref_finobt(
444 struct xfs_scrub *sc,
445 xfs_ino_t ino)
446{
447 struct xfs_inobt_rec_incore rec;
448 xfs_agino_t agino;
449 int has_record;
450 int error;
451
452 if (!sc->sa.fino_cur || xchk_skip_xref(sc->sm))
453 return;
454
455 agino = XFS_INO_TO_AGINO(sc->mp, ino);
456
457 /*
458 * Try to get the finobt record. If we can't get it, then we're
459 * in good shape.
460 */
461 error = xfs_inobt_lookup(sc->sa.fino_cur, agino, XFS_LOOKUP_LE,
462 &has_record);
463 if (!xchk_should_check_xref(sc, &error, &sc->sa.fino_cur) ||
464 !has_record)
465 return;
466
467 error = xfs_inobt_get_rec(sc->sa.fino_cur, &rec, &has_record);
468 if (!xchk_should_check_xref(sc, &error, &sc->sa.fino_cur) ||
469 !has_record)
470 return;
471
472 /*
473 * Otherwise, make sure this record either doesn't cover this inode,
474 * or that it does but it's marked present.
475 */
476 if (rec.ir_startino > agino ||
477 rec.ir_startino + XFS_INODES_PER_CHUNK <= agino)
478 return;
479
480 if (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))
481 xchk_btree_xref_set_corrupt(sc, sc->sa.fino_cur, 0);
482}
483
484/* Cross reference the inode fields with the forks. */
485STATIC void
486xchk_inode_xref_bmap(
487 struct xfs_scrub *sc,
488 struct xfs_dinode *dip)
489{
490 xfs_extnum_t nextents;
491 xfs_filblks_t count;
492 xfs_filblks_t acount;
493 int error;
494
495 if (xchk_skip_xref(sc->sm))
496 return;
497
498 /* Walk all the extents to check nextents/naextents/nblocks. */
499 error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK,
500 &nextents, &count);
501 if (!xchk_should_check_xref(sc, &error, NULL))
502 return;
503 if (nextents < be32_to_cpu(dip->di_nextents))
504 xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino);
505
506 error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK,
507 &nextents, &acount);
508 if (!xchk_should_check_xref(sc, &error, NULL))
509 return;
510 if (nextents != be16_to_cpu(dip->di_anextents))
511 xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino);
512
513 /* Check nblocks against the inode. */
514 if (count + acount != be64_to_cpu(dip->di_nblocks))
515 xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino);
516}
517
518/* Cross-reference with the other btrees. */
519STATIC void
520xchk_inode_xref(
521 struct xfs_scrub *sc,
522 xfs_ino_t ino,
523 struct xfs_dinode *dip)
524{
525 xfs_agnumber_t agno;
526 xfs_agblock_t agbno;
527 int error;
528
529 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
530 return;
531
532 agno = XFS_INO_TO_AGNO(sc->mp, ino);
533 agbno = XFS_INO_TO_AGBNO(sc->mp, ino);
534
535 error = xchk_ag_init(sc, agno, &sc->sa);
536 if (!xchk_xref_process_error(sc, agno, agbno, &error))
537 return;
538
539 xchk_xref_is_used_space(sc, agbno, 1);
540 xchk_inode_xref_finobt(sc, ino);
541 xchk_xref_is_owned_by(sc, agbno, 1, &XFS_RMAP_OINFO_INODES);
542 xchk_xref_is_not_shared(sc, agbno, 1);
543 xchk_inode_xref_bmap(sc, dip);
544
545 xchk_ag_free(sc, &sc->sa);
546}
547
548/*
549 * If the reflink iflag disagrees with a scan for shared data fork extents,
550 * either flag an error (shared extents w/ no flag) or a preen (flag set w/o
551 * any shared extents). We already checked for reflink iflag set on a non
552 * reflink filesystem.
553 */
554static void
555xchk_inode_check_reflink_iflag(
556 struct xfs_scrub *sc,
557 xfs_ino_t ino)
558{
559 struct xfs_mount *mp = sc->mp;
560 bool has_shared;
561 int error;
562
563 if (!xfs_sb_version_hasreflink(&mp->m_sb))
564 return;
565
566 error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip,
567 &has_shared);
568 if (!xchk_xref_process_error(sc, XFS_INO_TO_AGNO(mp, ino),
569 XFS_INO_TO_AGBNO(mp, ino), &error))
570 return;
571 if (xfs_is_reflink_inode(sc->ip) && !has_shared)
572 xchk_ino_set_preen(sc, ino);
573 else if (!xfs_is_reflink_inode(sc->ip) && has_shared)
574 xchk_ino_set_corrupt(sc, ino);
575}
576
577/* Scrub an inode. */
578int
579xchk_inode(
580 struct xfs_scrub *sc)
581{
582 struct xfs_dinode di;
583 int error = 0;
584
585 /*
586 * If sc->ip is NULL, that means that the setup function called
587 * xfs_iget to look up the inode. xfs_iget returned a EFSCORRUPTED
588 * and a NULL inode, so flag the corruption error and return.
589 */
590 if (!sc->ip) {
591 xchk_ino_set_corrupt(sc, sc->sm->sm_ino);
592 return 0;
593 }
594
595 /* Scrub the inode core. */
596 xfs_inode_to_disk(sc->ip, &di, 0);
597 xchk_dinode(sc, &di, sc->ip->i_ino);
598 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
599 goto out;
600
601 /*
602 * Look for discrepancies between file's data blocks and the reflink
603 * iflag. We already checked the iflag against the file mode when
604 * we scrubbed the dinode.
605 */
606 if (S_ISREG(VFS_I(sc->ip)->i_mode))
607 xchk_inode_check_reflink_iflag(sc, sc->ip->i_ino);
608
609 xchk_inode_xref(sc, sc->ip->i_ino, &di);
610out:
611 return error;
612}