Loading...
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
4 * All Rights Reserved.
5 */
6#ifndef __XFS_MOUNT_H__
7#define __XFS_MOUNT_H__
8
9struct xlog;
10struct xfs_inode;
11struct xfs_mru_cache;
12struct xfs_ail;
13struct xfs_quotainfo;
14struct xfs_da_geometry;
15struct xfs_perag;
16
17/* dynamic preallocation free space thresholds, 5% down to 1% */
18enum {
19 XFS_LOWSP_1_PCNT = 0,
20 XFS_LOWSP_2_PCNT,
21 XFS_LOWSP_3_PCNT,
22 XFS_LOWSP_4_PCNT,
23 XFS_LOWSP_5_PCNT,
24 XFS_LOWSP_MAX,
25};
26
27/*
28 * Error Configuration
29 *
30 * Error classes define the subsystem the configuration belongs to.
31 * Error numbers define the errors that are configurable.
32 */
33enum {
34 XFS_ERR_METADATA,
35 XFS_ERR_CLASS_MAX,
36};
37enum {
38 XFS_ERR_DEFAULT,
39 XFS_ERR_EIO,
40 XFS_ERR_ENOSPC,
41 XFS_ERR_ENODEV,
42 XFS_ERR_ERRNO_MAX,
43};
44
45#define XFS_ERR_RETRY_FOREVER -1
46
47/*
48 * Although retry_timeout is in jiffies which is normally an unsigned long,
49 * we limit the retry timeout to 86400 seconds, or one day. So even a
50 * signed 32-bit long is sufficient for a HZ value up to 24855. Making it
51 * signed lets us store the special "-1" value, meaning retry forever.
52 */
53struct xfs_error_cfg {
54 struct xfs_kobj kobj;
55 int max_retries;
56 long retry_timeout; /* in jiffies, -1 = infinite */
57};
58
59/*
60 * Per-cpu deferred inode inactivation GC lists.
61 */
62struct xfs_inodegc {
63 struct xfs_mount *mp;
64 struct llist_head list;
65 struct delayed_work work;
66 int error;
67
68 /* approximate count of inodes in the list */
69 unsigned int items;
70 unsigned int shrinker_hits;
71 unsigned int cpu;
72};
73
74/*
75 * The struct xfsmount layout is optimised to separate read-mostly variables
76 * from variables that are frequently modified. We put the read-mostly variables
77 * first, then place all the other variables at the end.
78 *
79 * Typically, read-mostly variables are those that are set at mount time and
80 * never changed again, or only change rarely as a result of things like sysfs
81 * knobs being tweaked.
82 */
83typedef struct xfs_mount {
84 struct xfs_sb m_sb; /* copy of fs superblock */
85 struct super_block *m_super;
86 struct xfs_ail *m_ail; /* fs active log item list */
87 struct xfs_buf *m_sb_bp; /* buffer for superblock */
88 char *m_rtname; /* realtime device name */
89 char *m_logname; /* external log device name */
90 struct xfs_da_geometry *m_dir_geo; /* directory block geometry */
91 struct xfs_da_geometry *m_attr_geo; /* attribute block geometry */
92 struct xlog *m_log; /* log specific stuff */
93 struct xfs_inode *m_rbmip; /* pointer to bitmap inode */
94 struct xfs_inode *m_rsumip; /* pointer to summary inode */
95 struct xfs_inode *m_rootip; /* pointer to root directory */
96 struct xfs_quotainfo *m_quotainfo; /* disk quota information */
97 struct xfs_buftarg *m_ddev_targp; /* data device */
98 struct xfs_buftarg *m_logdev_targp;/* log device */
99 struct xfs_buftarg *m_rtdev_targp; /* rt device */
100 void __percpu *m_inodegc; /* percpu inodegc structures */
101
102 /*
103 * Optional cache of rt summary level per bitmap block with the
104 * invariant that m_rsum_cache[bbno] > the maximum i for which
105 * rsum[i][bbno] != 0, or 0 if rsum[i][bbno] == 0 for all i.
106 * Reads and writes are serialized by the rsumip inode lock.
107 */
108 uint8_t *m_rsum_cache;
109 struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
110 struct workqueue_struct *m_buf_workqueue;
111 struct workqueue_struct *m_unwritten_workqueue;
112 struct workqueue_struct *m_reclaim_workqueue;
113 struct workqueue_struct *m_sync_workqueue;
114 struct workqueue_struct *m_blockgc_wq;
115 struct workqueue_struct *m_inodegc_wq;
116
117 int m_bsize; /* fs logical block size */
118 uint8_t m_blkbit_log; /* blocklog + NBBY */
119 uint8_t m_blkbb_log; /* blocklog - BBSHIFT */
120 uint8_t m_agno_log; /* log #ag's */
121 uint8_t m_sectbb_log; /* sectlog - BBSHIFT */
122 int8_t m_rtxblklog; /* log2 of rextsize, if possible */
123 uint m_blockmask; /* sb_blocksize-1 */
124 uint m_blockwsize; /* sb_blocksize in words */
125 uint m_blockwmask; /* blockwsize-1 */
126 uint m_alloc_mxr[2]; /* max alloc btree records */
127 uint m_alloc_mnr[2]; /* min alloc btree records */
128 uint m_bmap_dmxr[2]; /* max bmap btree records */
129 uint m_bmap_dmnr[2]; /* min bmap btree records */
130 uint m_rmap_mxr[2]; /* max rmap btree records */
131 uint m_rmap_mnr[2]; /* min rmap btree records */
132 uint m_refc_mxr[2]; /* max refc btree records */
133 uint m_refc_mnr[2]; /* min refc btree records */
134 uint m_alloc_maxlevels; /* max alloc btree levels */
135 uint m_bm_maxlevels[2]; /* max bmap btree levels */
136 uint m_rmap_maxlevels; /* max rmap btree levels */
137 uint m_refc_maxlevels; /* max refcount btree level */
138 unsigned int m_agbtree_maxlevels; /* max level of all AG btrees */
139 xfs_extlen_t m_ag_prealloc_blocks; /* reserved ag blocks */
140 uint m_alloc_set_aside; /* space we can't use */
141 uint m_ag_max_usable; /* max space per AG */
142 int m_dalign; /* stripe unit */
143 int m_swidth; /* stripe width */
144 xfs_agnumber_t m_maxagi; /* highest inode alloc group */
145 uint m_allocsize_log;/* min write size log bytes */
146 uint m_allocsize_blocks; /* min write size blocks */
147 int m_logbufs; /* number of log buffers */
148 int m_logbsize; /* size of each log buffer */
149 uint m_rsumlevels; /* rt summary levels */
150 uint m_rsumsize; /* size of rt summary, bytes */
151 int m_fixedfsid[2]; /* unchanged for life of FS */
152 uint m_qflags; /* quota status flags */
153 uint64_t m_features; /* active filesystem features */
154 uint64_t m_low_space[XFS_LOWSP_MAX];
155 uint64_t m_low_rtexts[XFS_LOWSP_MAX];
156 uint64_t m_rtxblkmask; /* rt extent block mask */
157 struct xfs_ino_geometry m_ino_geo; /* inode geometry */
158 struct xfs_trans_resv m_resv; /* precomputed res values */
159 /* low free space thresholds */
160 unsigned long m_opstate; /* dynamic state flags */
161 bool m_always_cow;
162 bool m_fail_unmount;
163 bool m_finobt_nores; /* no per-AG finobt resv. */
164 bool m_update_sb; /* sb needs update in mount */
165
166 /*
167 * Bitsets of per-fs metadata that have been checked and/or are sick.
168 * Callers must hold m_sb_lock to access these two fields.
169 */
170 uint8_t m_fs_checked;
171 uint8_t m_fs_sick;
172 /*
173 * Bitsets of rt metadata that have been checked and/or are sick.
174 * Callers must hold m_sb_lock to access this field.
175 */
176 uint8_t m_rt_checked;
177 uint8_t m_rt_sick;
178
179 /*
180 * End of read-mostly variables. Frequently written variables and locks
181 * should be placed below this comment from now on. The first variable
182 * here is marked as cacheline aligned so they it is separated from
183 * the read-mostly variables.
184 */
185
186 spinlock_t ____cacheline_aligned m_sb_lock; /* sb counter lock */
187 struct percpu_counter m_icount; /* allocated inodes counter */
188 struct percpu_counter m_ifree; /* free inodes counter */
189 struct percpu_counter m_fdblocks; /* free block counter */
190 struct percpu_counter m_frextents; /* free rt extent counter */
191
192 /*
193 * Count of data device blocks reserved for delayed allocations,
194 * including indlen blocks. Does not include allocated CoW staging
195 * extents or anything related to the rt device.
196 */
197 struct percpu_counter m_delalloc_blks;
198 /*
199 * Global count of allocation btree blocks in use across all AGs. Only
200 * used when perag reservation is enabled. Helps prevent block
201 * reservation from attempting to reserve allocation btree blocks.
202 */
203 atomic64_t m_allocbt_blks;
204
205 struct radix_tree_root m_perag_tree; /* per-ag accounting info */
206 spinlock_t m_perag_lock; /* lock for m_perag_tree */
207 uint64_t m_resblks; /* total reserved blocks */
208 uint64_t m_resblks_avail;/* available reserved blocks */
209 uint64_t m_resblks_save; /* reserved blks @ remount,ro */
210 struct delayed_work m_reclaim_work; /* background inode reclaim */
211 struct dentry *m_debugfs; /* debugfs parent */
212 struct xfs_kobj m_kobj;
213 struct xfs_kobj m_error_kobj;
214 struct xfs_kobj m_error_meta_kobj;
215 struct xfs_error_cfg m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX];
216 struct xstats m_stats; /* per-fs stats */
217#ifdef CONFIG_XFS_ONLINE_SCRUB_STATS
218 struct xchk_stats *m_scrub_stats;
219#endif
220 xfs_agnumber_t m_agfrotor; /* last ag where space found */
221 atomic_t m_agirotor; /* last ag dir inode alloced */
222
223 /* Memory shrinker to throttle and reprioritize inodegc */
224 struct shrinker *m_inodegc_shrinker;
225 /*
226 * Workqueue item so that we can coalesce multiple inode flush attempts
227 * into a single flush.
228 */
229 struct work_struct m_flush_inodes_work;
230
231 /*
232 * Generation of the filesysyem layout. This is incremented by each
233 * growfs, and used by the pNFS server to ensure the client updates
234 * its view of the block device once it gets a layout that might
235 * reference the newly added blocks. Does not need to be persistent
236 * as long as we only allow file system size increments, but if we
237 * ever support shrinks it would have to be persisted in addition
238 * to various other kinds of pain inflicted on the pNFS server.
239 */
240 uint32_t m_generation;
241 struct mutex m_growlock; /* growfs mutex */
242
243#ifdef DEBUG
244 /*
245 * Frequency with which errors are injected. Replaces xfs_etest; the
246 * value stored in here is the inverse of the frequency with which the
247 * error triggers. 1 = always, 2 = half the time, etc.
248 */
249 unsigned int *m_errortag;
250 struct xfs_kobj m_errortag_kobj;
251#endif
252
253 /* cpus that have inodes queued for inactivation */
254 struct cpumask m_inodegc_cpumask;
255
256 /* Hook to feed dirent updates to an active online repair. */
257 struct xfs_hooks m_dir_update_hooks;
258} xfs_mount_t;
259
260#define M_IGEO(mp) (&(mp)->m_ino_geo)
261
262/*
263 * Flags for m_features.
264 *
265 * These are all the active features in the filesystem, regardless of how
266 * they are configured.
267 */
268#define XFS_FEAT_ATTR (1ULL << 0) /* xattrs present in fs */
269#define XFS_FEAT_NLINK (1ULL << 1) /* 32 bit link counts */
270#define XFS_FEAT_QUOTA (1ULL << 2) /* quota active */
271#define XFS_FEAT_ALIGN (1ULL << 3) /* inode alignment */
272#define XFS_FEAT_DALIGN (1ULL << 4) /* data alignment */
273#define XFS_FEAT_LOGV2 (1ULL << 5) /* version 2 logs */
274#define XFS_FEAT_SECTOR (1ULL << 6) /* sector size > 512 bytes */
275#define XFS_FEAT_EXTFLG (1ULL << 7) /* unwritten extents */
276#define XFS_FEAT_ASCIICI (1ULL << 8) /* ASCII only case-insens. */
277#define XFS_FEAT_LAZYSBCOUNT (1ULL << 9) /* Superblk counters */
278#define XFS_FEAT_ATTR2 (1ULL << 10) /* dynamic attr fork */
279#define XFS_FEAT_PARENT (1ULL << 11) /* parent pointers */
280#define XFS_FEAT_PROJID32 (1ULL << 12) /* 32 bit project id */
281#define XFS_FEAT_CRC (1ULL << 13) /* metadata CRCs */
282#define XFS_FEAT_V3INODES (1ULL << 14) /* Version 3 inodes */
283#define XFS_FEAT_PQUOTINO (1ULL << 15) /* non-shared proj/grp quotas */
284#define XFS_FEAT_FTYPE (1ULL << 16) /* inode type in dir */
285#define XFS_FEAT_FINOBT (1ULL << 17) /* free inode btree */
286#define XFS_FEAT_RMAPBT (1ULL << 18) /* reverse map btree */
287#define XFS_FEAT_REFLINK (1ULL << 19) /* reflinked files */
288#define XFS_FEAT_SPINODES (1ULL << 20) /* sparse inode chunks */
289#define XFS_FEAT_META_UUID (1ULL << 21) /* metadata UUID */
290#define XFS_FEAT_REALTIME (1ULL << 22) /* realtime device present */
291#define XFS_FEAT_INOBTCNT (1ULL << 23) /* inobt block counts */
292#define XFS_FEAT_BIGTIME (1ULL << 24) /* large timestamps */
293#define XFS_FEAT_NEEDSREPAIR (1ULL << 25) /* needs xfs_repair */
294#define XFS_FEAT_NREXT64 (1ULL << 26) /* large extent counters */
295
296/* Mount features */
297#define XFS_FEAT_NOATTR2 (1ULL << 48) /* disable attr2 creation */
298#define XFS_FEAT_NOALIGN (1ULL << 49) /* ignore alignment */
299#define XFS_FEAT_ALLOCSIZE (1ULL << 50) /* user specified allocation size */
300#define XFS_FEAT_LARGE_IOSIZE (1ULL << 51) /* report large preferred
301 * I/O size in stat() */
302#define XFS_FEAT_WSYNC (1ULL << 52) /* synchronous metadata ops */
303#define XFS_FEAT_DIRSYNC (1ULL << 53) /* synchronous directory ops */
304#define XFS_FEAT_DISCARD (1ULL << 54) /* discard unused blocks */
305#define XFS_FEAT_GRPID (1ULL << 55) /* group-ID assigned from directory */
306#define XFS_FEAT_SMALL_INUMS (1ULL << 56) /* user wants 32bit inodes */
307#define XFS_FEAT_IKEEP (1ULL << 57) /* keep empty inode clusters*/
308#define XFS_FEAT_SWALLOC (1ULL << 58) /* stripe width allocation */
309#define XFS_FEAT_FILESTREAMS (1ULL << 59) /* use filestreams allocator */
310#define XFS_FEAT_DAX_ALWAYS (1ULL << 60) /* DAX always enabled */
311#define XFS_FEAT_DAX_NEVER (1ULL << 61) /* DAX never enabled */
312#define XFS_FEAT_NORECOVERY (1ULL << 62) /* no recovery - dirty fs */
313#define XFS_FEAT_NOUUID (1ULL << 63) /* ignore uuid during mount */
314
315#define __XFS_HAS_FEAT(name, NAME) \
316static inline bool xfs_has_ ## name (struct xfs_mount *mp) \
317{ \
318 return mp->m_features & XFS_FEAT_ ## NAME; \
319}
320
321/* Some features can be added dynamically so they need a set wrapper, too. */
322#define __XFS_ADD_FEAT(name, NAME) \
323 __XFS_HAS_FEAT(name, NAME); \
324static inline void xfs_add_ ## name (struct xfs_mount *mp) \
325{ \
326 mp->m_features |= XFS_FEAT_ ## NAME; \
327 xfs_sb_version_add ## name(&mp->m_sb); \
328}
329
330/* Superblock features */
331__XFS_ADD_FEAT(attr, ATTR)
332__XFS_HAS_FEAT(nlink, NLINK)
333__XFS_ADD_FEAT(quota, QUOTA)
334__XFS_HAS_FEAT(align, ALIGN)
335__XFS_HAS_FEAT(dalign, DALIGN)
336__XFS_HAS_FEAT(logv2, LOGV2)
337__XFS_HAS_FEAT(sector, SECTOR)
338__XFS_HAS_FEAT(extflg, EXTFLG)
339__XFS_HAS_FEAT(asciici, ASCIICI)
340__XFS_HAS_FEAT(lazysbcount, LAZYSBCOUNT)
341__XFS_ADD_FEAT(attr2, ATTR2)
342__XFS_HAS_FEAT(parent, PARENT)
343__XFS_ADD_FEAT(projid32, PROJID32)
344__XFS_HAS_FEAT(crc, CRC)
345__XFS_HAS_FEAT(v3inodes, V3INODES)
346__XFS_HAS_FEAT(pquotino, PQUOTINO)
347__XFS_HAS_FEAT(ftype, FTYPE)
348__XFS_HAS_FEAT(finobt, FINOBT)
349__XFS_HAS_FEAT(rmapbt, RMAPBT)
350__XFS_HAS_FEAT(reflink, REFLINK)
351__XFS_HAS_FEAT(sparseinodes, SPINODES)
352__XFS_HAS_FEAT(metauuid, META_UUID)
353__XFS_HAS_FEAT(realtime, REALTIME)
354__XFS_HAS_FEAT(inobtcounts, INOBTCNT)
355__XFS_HAS_FEAT(bigtime, BIGTIME)
356__XFS_HAS_FEAT(needsrepair, NEEDSREPAIR)
357__XFS_HAS_FEAT(large_extent_counts, NREXT64)
358
359/*
360 * Mount features
361 *
362 * These do not change dynamically - features that can come and go, such as 32
363 * bit inodes and read-only state, are kept as operational state rather than
364 * features.
365 */
366__XFS_HAS_FEAT(noattr2, NOATTR2)
367__XFS_HAS_FEAT(noalign, NOALIGN)
368__XFS_HAS_FEAT(allocsize, ALLOCSIZE)
369__XFS_HAS_FEAT(large_iosize, LARGE_IOSIZE)
370__XFS_HAS_FEAT(wsync, WSYNC)
371__XFS_HAS_FEAT(dirsync, DIRSYNC)
372__XFS_HAS_FEAT(discard, DISCARD)
373__XFS_HAS_FEAT(grpid, GRPID)
374__XFS_HAS_FEAT(small_inums, SMALL_INUMS)
375__XFS_HAS_FEAT(ikeep, IKEEP)
376__XFS_HAS_FEAT(swalloc, SWALLOC)
377__XFS_HAS_FEAT(filestreams, FILESTREAMS)
378__XFS_HAS_FEAT(dax_always, DAX_ALWAYS)
379__XFS_HAS_FEAT(dax_never, DAX_NEVER)
380__XFS_HAS_FEAT(norecovery, NORECOVERY)
381__XFS_HAS_FEAT(nouuid, NOUUID)
382
383/*
384 * Operational mount state flags
385 *
386 * Use these with atomic bit ops only!
387 */
388#define XFS_OPSTATE_UNMOUNTING 0 /* filesystem is unmounting */
389#define XFS_OPSTATE_CLEAN 1 /* mount was clean */
390#define XFS_OPSTATE_SHUTDOWN 2 /* stop all fs operations */
391#define XFS_OPSTATE_INODE32 3 /* inode32 allocator active */
392#define XFS_OPSTATE_READONLY 4 /* read-only fs */
393
394/*
395 * If set, inactivation worker threads will be scheduled to process queued
396 * inodegc work. If not, queued inodes remain in memory waiting to be
397 * processed.
398 */
399#define XFS_OPSTATE_INODEGC_ENABLED 5
400/*
401 * If set, background speculative prealloc gc worker threads will be scheduled
402 * to process queued blockgc work. If not, inodes retain their preallocations
403 * until explicitly deleted.
404 */
405#define XFS_OPSTATE_BLOCKGC_ENABLED 6
406
407/* Kernel has logged a warning about online fsck being used on this fs. */
408#define XFS_OPSTATE_WARNED_SCRUB 7
409/* Kernel has logged a warning about shrink being used on this fs. */
410#define XFS_OPSTATE_WARNED_SHRINK 8
411/* Kernel has logged a warning about logged xattr updates being used. */
412#define XFS_OPSTATE_WARNED_LARP 9
413/* Mount time quotacheck is running */
414#define XFS_OPSTATE_QUOTACHECK_RUNNING 10
415
416#define __XFS_IS_OPSTATE(name, NAME) \
417static inline bool xfs_is_ ## name (struct xfs_mount *mp) \
418{ \
419 return test_bit(XFS_OPSTATE_ ## NAME, &mp->m_opstate); \
420} \
421static inline bool xfs_clear_ ## name (struct xfs_mount *mp) \
422{ \
423 return test_and_clear_bit(XFS_OPSTATE_ ## NAME, &mp->m_opstate); \
424} \
425static inline bool xfs_set_ ## name (struct xfs_mount *mp) \
426{ \
427 return test_and_set_bit(XFS_OPSTATE_ ## NAME, &mp->m_opstate); \
428}
429
430__XFS_IS_OPSTATE(unmounting, UNMOUNTING)
431__XFS_IS_OPSTATE(clean, CLEAN)
432__XFS_IS_OPSTATE(shutdown, SHUTDOWN)
433__XFS_IS_OPSTATE(inode32, INODE32)
434__XFS_IS_OPSTATE(readonly, READONLY)
435__XFS_IS_OPSTATE(inodegc_enabled, INODEGC_ENABLED)
436__XFS_IS_OPSTATE(blockgc_enabled, BLOCKGC_ENABLED)
437#ifdef CONFIG_XFS_QUOTA
438__XFS_IS_OPSTATE(quotacheck_running, QUOTACHECK_RUNNING)
439#else
440# define xfs_is_quotacheck_running(mp) (false)
441#endif
442
443static inline bool
444xfs_should_warn(struct xfs_mount *mp, long nr)
445{
446 return !test_and_set_bit(nr, &mp->m_opstate);
447}
448
449#define XFS_OPSTATE_STRINGS \
450 { (1UL << XFS_OPSTATE_UNMOUNTING), "unmounting" }, \
451 { (1UL << XFS_OPSTATE_CLEAN), "clean" }, \
452 { (1UL << XFS_OPSTATE_SHUTDOWN), "shutdown" }, \
453 { (1UL << XFS_OPSTATE_INODE32), "inode32" }, \
454 { (1UL << XFS_OPSTATE_READONLY), "read_only" }, \
455 { (1UL << XFS_OPSTATE_INODEGC_ENABLED), "inodegc" }, \
456 { (1UL << XFS_OPSTATE_BLOCKGC_ENABLED), "blockgc" }, \
457 { (1UL << XFS_OPSTATE_WARNED_SCRUB), "wscrub" }, \
458 { (1UL << XFS_OPSTATE_WARNED_SHRINK), "wshrink" }, \
459 { (1UL << XFS_OPSTATE_WARNED_LARP), "wlarp" }, \
460 { (1UL << XFS_OPSTATE_QUOTACHECK_RUNNING), "quotacheck" }
461
462/*
463 * Max and min values for mount-option defined I/O
464 * preallocation sizes.
465 */
466#define XFS_MAX_IO_LOG 30 /* 1G */
467#define XFS_MIN_IO_LOG PAGE_SHIFT
468
469void xfs_do_force_shutdown(struct xfs_mount *mp, uint32_t flags, char *fname,
470 int lnnum);
471#define xfs_force_shutdown(m,f) \
472 xfs_do_force_shutdown(m, f, __FILE__, __LINE__)
473
474#define SHUTDOWN_META_IO_ERROR (1u << 0) /* write attempt to metadata failed */
475#define SHUTDOWN_LOG_IO_ERROR (1u << 1) /* write attempt to the log failed */
476#define SHUTDOWN_FORCE_UMOUNT (1u << 2) /* shutdown from a forced unmount */
477#define SHUTDOWN_CORRUPT_INCORE (1u << 3) /* corrupt in-memory structures */
478#define SHUTDOWN_CORRUPT_ONDISK (1u << 4) /* corrupt metadata on device */
479#define SHUTDOWN_DEVICE_REMOVED (1u << 5) /* device removed underneath us */
480
481#define XFS_SHUTDOWN_STRINGS \
482 { SHUTDOWN_META_IO_ERROR, "metadata_io" }, \
483 { SHUTDOWN_LOG_IO_ERROR, "log_io" }, \
484 { SHUTDOWN_FORCE_UMOUNT, "force_umount" }, \
485 { SHUTDOWN_CORRUPT_INCORE, "corruption" }, \
486 { SHUTDOWN_DEVICE_REMOVED, "device_removed" }
487
488/*
489 * Flags for xfs_mountfs
490 */
491#define XFS_MFSI_QUIET 0x40 /* Be silent if mount errors found */
492
493static inline xfs_agnumber_t
494xfs_daddr_to_agno(struct xfs_mount *mp, xfs_daddr_t d)
495{
496 xfs_rfsblock_t ld = XFS_BB_TO_FSBT(mp, d);
497 do_div(ld, mp->m_sb.sb_agblocks);
498 return (xfs_agnumber_t) ld;
499}
500
501static inline xfs_agblock_t
502xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
503{
504 xfs_rfsblock_t ld = XFS_BB_TO_FSBT(mp, d);
505 return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks);
506}
507
508extern void xfs_uuid_table_free(void);
509extern uint64_t xfs_default_resblks(xfs_mount_t *mp);
510extern int xfs_mountfs(xfs_mount_t *mp);
511extern void xfs_unmountfs(xfs_mount_t *);
512
513/*
514 * Deltas for the block count can vary from 1 to very large, but lock contention
515 * only occurs on frequent small block count updates such as in the delayed
516 * allocation path for buffered writes (page a time updates). Hence we set
517 * a large batch count (1024) to minimise global counter updates except when
518 * we get near to ENOSPC and we have to be very accurate with our updates.
519 */
520#define XFS_FDBLOCKS_BATCH 1024
521
522/*
523 * Estimate the amount of free space that is not available to userspace and is
524 * not explicitly reserved from the incore fdblocks. This includes:
525 *
526 * - The minimum number of blocks needed to support splitting a bmap btree
527 * - The blocks currently in use by the freespace btrees because they record
528 * the actual blocks that will fill per-AG metadata space reservations
529 */
530static inline uint64_t
531xfs_fdblocks_unavailable(
532 struct xfs_mount *mp)
533{
534 return mp->m_alloc_set_aside + atomic64_read(&mp->m_allocbt_blks);
535}
536
537int xfs_mod_freecounter(struct xfs_mount *mp, struct percpu_counter *counter,
538 int64_t delta, bool rsvd);
539
540static inline int
541xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta, bool reserved)
542{
543 return xfs_mod_freecounter(mp, &mp->m_fdblocks, delta, reserved);
544}
545
546static inline int
547xfs_mod_frextents(struct xfs_mount *mp, int64_t delta)
548{
549 return xfs_mod_freecounter(mp, &mp->m_frextents, delta, false);
550}
551
552extern int xfs_readsb(xfs_mount_t *, int);
553extern void xfs_freesb(xfs_mount_t *);
554extern bool xfs_fs_writable(struct xfs_mount *mp, int level);
555extern int xfs_sb_validate_fsb_count(struct xfs_sb *, uint64_t);
556
557extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
558
559extern void xfs_set_low_space_thresholds(struct xfs_mount *);
560
561int xfs_zero_extent(struct xfs_inode *ip, xfs_fsblock_t start_fsb,
562 xfs_off_t count_fsb);
563
564struct xfs_error_cfg * xfs_error_get_cfg(struct xfs_mount *mp,
565 int error_class, int error);
566void xfs_force_summary_recalc(struct xfs_mount *mp);
567int xfs_add_incompat_log_feature(struct xfs_mount *mp, uint32_t feature);
568bool xfs_clear_incompat_log_features(struct xfs_mount *mp);
569void xfs_mod_delalloc(struct xfs_mount *mp, int64_t delta);
570
571#endif /* __XFS_MOUNT_H__ */
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
4 * All Rights Reserved.
5 */
6#ifndef __XFS_MOUNT_H__
7#define __XFS_MOUNT_H__
8
9struct xlog;
10struct xfs_inode;
11struct xfs_mru_cache;
12struct xfs_ail;
13struct xfs_quotainfo;
14struct xfs_da_geometry;
15
16/* dynamic preallocation free space thresholds, 5% down to 1% */
17enum {
18 XFS_LOWSP_1_PCNT = 0,
19 XFS_LOWSP_2_PCNT,
20 XFS_LOWSP_3_PCNT,
21 XFS_LOWSP_4_PCNT,
22 XFS_LOWSP_5_PCNT,
23 XFS_LOWSP_MAX,
24};
25
26/*
27 * Error Configuration
28 *
29 * Error classes define the subsystem the configuration belongs to.
30 * Error numbers define the errors that are configurable.
31 */
32enum {
33 XFS_ERR_METADATA,
34 XFS_ERR_CLASS_MAX,
35};
36enum {
37 XFS_ERR_DEFAULT,
38 XFS_ERR_EIO,
39 XFS_ERR_ENOSPC,
40 XFS_ERR_ENODEV,
41 XFS_ERR_ERRNO_MAX,
42};
43
44#define XFS_ERR_RETRY_FOREVER -1
45
46/*
47 * Although retry_timeout is in jiffies which is normally an unsigned long,
48 * we limit the retry timeout to 86400 seconds, or one day. So even a
49 * signed 32-bit long is sufficient for a HZ value up to 24855. Making it
50 * signed lets us store the special "-1" value, meaning retry forever.
51 */
52struct xfs_error_cfg {
53 struct xfs_kobj kobj;
54 int max_retries;
55 long retry_timeout; /* in jiffies, -1 = infinite */
56};
57
58/*
59 * The struct xfsmount layout is optimised to separate read-mostly variables
60 * from variables that are frequently modified. We put the read-mostly variables
61 * first, then place all the other variables at the end.
62 *
63 * Typically, read-mostly variables are those that are set at mount time and
64 * never changed again, or only change rarely as a result of things like sysfs
65 * knobs being tweaked.
66 */
67typedef struct xfs_mount {
68 struct xfs_sb m_sb; /* copy of fs superblock */
69 struct super_block *m_super;
70 struct xfs_ail *m_ail; /* fs active log item list */
71 struct xfs_buf *m_sb_bp; /* buffer for superblock */
72 char *m_rtname; /* realtime device name */
73 char *m_logname; /* external log device name */
74 struct xfs_da_geometry *m_dir_geo; /* directory block geometry */
75 struct xfs_da_geometry *m_attr_geo; /* attribute block geometry */
76 struct xlog *m_log; /* log specific stuff */
77 struct xfs_inode *m_rbmip; /* pointer to bitmap inode */
78 struct xfs_inode *m_rsumip; /* pointer to summary inode */
79 struct xfs_inode *m_rootip; /* pointer to root directory */
80 struct xfs_quotainfo *m_quotainfo; /* disk quota information */
81 xfs_buftarg_t *m_ddev_targp; /* saves taking the address */
82 xfs_buftarg_t *m_logdev_targp;/* ptr to log device */
83 xfs_buftarg_t *m_rtdev_targp; /* ptr to rt device */
84 /*
85 * Optional cache of rt summary level per bitmap block with the
86 * invariant that m_rsum_cache[bbno] <= the minimum i for which
87 * rsum[i][bbno] != 0. Reads and writes are serialized by the rsumip
88 * inode lock.
89 */
90 uint8_t *m_rsum_cache;
91 struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
92 struct workqueue_struct *m_buf_workqueue;
93 struct workqueue_struct *m_unwritten_workqueue;
94 struct workqueue_struct *m_cil_workqueue;
95 struct workqueue_struct *m_reclaim_workqueue;
96 struct workqueue_struct *m_eofblocks_workqueue;
97 struct workqueue_struct *m_sync_workqueue;
98
99 int m_bsize; /* fs logical block size */
100 uint8_t m_blkbit_log; /* blocklog + NBBY */
101 uint8_t m_blkbb_log; /* blocklog - BBSHIFT */
102 uint8_t m_agno_log; /* log #ag's */
103 uint8_t m_sectbb_log; /* sectlog - BBSHIFT */
104 uint m_blockmask; /* sb_blocksize-1 */
105 uint m_blockwsize; /* sb_blocksize in words */
106 uint m_blockwmask; /* blockwsize-1 */
107 uint m_alloc_mxr[2]; /* max alloc btree records */
108 uint m_alloc_mnr[2]; /* min alloc btree records */
109 uint m_bmap_dmxr[2]; /* max bmap btree records */
110 uint m_bmap_dmnr[2]; /* min bmap btree records */
111 uint m_rmap_mxr[2]; /* max rmap btree records */
112 uint m_rmap_mnr[2]; /* min rmap btree records */
113 uint m_refc_mxr[2]; /* max refc btree records */
114 uint m_refc_mnr[2]; /* min refc btree records */
115 uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */
116 uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
117 uint m_rmap_maxlevels; /* max rmap btree levels */
118 uint m_refc_maxlevels; /* max refcount btree level */
119 xfs_extlen_t m_ag_prealloc_blocks; /* reserved ag blocks */
120 uint m_alloc_set_aside; /* space we can't use */
121 uint m_ag_max_usable; /* max space per AG */
122 int m_dalign; /* stripe unit */
123 int m_swidth; /* stripe width */
124 xfs_agnumber_t m_maxagi; /* highest inode alloc group */
125 uint m_allocsize_log;/* min write size log bytes */
126 uint m_allocsize_blocks; /* min write size blocks */
127 int m_logbufs; /* number of log buffers */
128 int m_logbsize; /* size of each log buffer */
129 uint m_rsumlevels; /* rt summary levels */
130 uint m_rsumsize; /* size of rt summary, bytes */
131 int m_fixedfsid[2]; /* unchanged for life of FS */
132 uint m_qflags; /* quota status flags */
133 uint64_t m_flags; /* global mount flags */
134 int64_t m_low_space[XFS_LOWSP_MAX];
135 struct xfs_ino_geometry m_ino_geo; /* inode geometry */
136 struct xfs_trans_resv m_resv; /* precomputed res values */
137 /* low free space thresholds */
138 bool m_always_cow;
139 bool m_fail_unmount;
140 bool m_finobt_nores; /* no per-AG finobt resv. */
141 bool m_update_sb; /* sb needs update in mount */
142
143 /*
144 * Bitsets of per-fs metadata that have been checked and/or are sick.
145 * Callers must hold m_sb_lock to access these two fields.
146 */
147 uint8_t m_fs_checked;
148 uint8_t m_fs_sick;
149 /*
150 * Bitsets of rt metadata that have been checked and/or are sick.
151 * Callers must hold m_sb_lock to access this field.
152 */
153 uint8_t m_rt_checked;
154 uint8_t m_rt_sick;
155
156 /*
157 * End of read-mostly variables. Frequently written variables and locks
158 * should be placed below this comment from now on. The first variable
159 * here is marked as cacheline aligned so they it is separated from
160 * the read-mostly variables.
161 */
162
163 spinlock_t ____cacheline_aligned m_sb_lock; /* sb counter lock */
164 struct percpu_counter m_icount; /* allocated inodes counter */
165 struct percpu_counter m_ifree; /* free inodes counter */
166 struct percpu_counter m_fdblocks; /* free block counter */
167 /*
168 * Count of data device blocks reserved for delayed allocations,
169 * including indlen blocks. Does not include allocated CoW staging
170 * extents or anything related to the rt device.
171 */
172 struct percpu_counter m_delalloc_blks;
173
174 struct radix_tree_root m_perag_tree; /* per-ag accounting info */
175 spinlock_t m_perag_lock; /* lock for m_perag_tree */
176 uint64_t m_resblks; /* total reserved blocks */
177 uint64_t m_resblks_avail;/* available reserved blocks */
178 uint64_t m_resblks_save; /* reserved blks @ remount,ro */
179 struct delayed_work m_reclaim_work; /* background inode reclaim */
180 struct delayed_work m_eofblocks_work; /* background eof blocks
181 trimming */
182 struct delayed_work m_cowblocks_work; /* background cow blocks
183 trimming */
184 struct xfs_kobj m_kobj;
185 struct xfs_kobj m_error_kobj;
186 struct xfs_kobj m_error_meta_kobj;
187 struct xfs_error_cfg m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX];
188 struct xstats m_stats; /* per-fs stats */
189 xfs_agnumber_t m_agfrotor; /* last ag where space found */
190 xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */
191 spinlock_t m_agirotor_lock;/* .. and lock protecting it */
192
193 /*
194 * Workqueue item so that we can coalesce multiple inode flush attempts
195 * into a single flush.
196 */
197 struct work_struct m_flush_inodes_work;
198
199 /*
200 * Generation of the filesysyem layout. This is incremented by each
201 * growfs, and used by the pNFS server to ensure the client updates
202 * its view of the block device once it gets a layout that might
203 * reference the newly added blocks. Does not need to be persistent
204 * as long as we only allow file system size increments, but if we
205 * ever support shrinks it would have to be persisted in addition
206 * to various other kinds of pain inflicted on the pNFS server.
207 */
208 uint32_t m_generation;
209 struct mutex m_growlock; /* growfs mutex */
210
211#ifdef DEBUG
212 /*
213 * Frequency with which errors are injected. Replaces xfs_etest; the
214 * value stored in here is the inverse of the frequency with which the
215 * error triggers. 1 = always, 2 = half the time, etc.
216 */
217 unsigned int *m_errortag;
218 struct xfs_kobj m_errortag_kobj;
219#endif
220} xfs_mount_t;
221
222#define M_IGEO(mp) (&(mp)->m_ino_geo)
223
224/*
225 * Flags for m_flags.
226 */
227#define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops
228 must be synchronous except
229 for space allocations */
230#define XFS_MOUNT_UNMOUNTING (1ULL << 1) /* filesystem is unmounting */
231#define XFS_MOUNT_WAS_CLEAN (1ULL << 3)
232#define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem
233 operations, typically for
234 disk errors in metadata */
235#define XFS_MOUNT_DISCARD (1ULL << 5) /* discard unused blocks */
236#define XFS_MOUNT_NOALIGN (1ULL << 7) /* turn off stripe alignment
237 allocations */
238#define XFS_MOUNT_ATTR2 (1ULL << 8) /* allow use of attr2 format */
239#define XFS_MOUNT_GRPID (1ULL << 9) /* group-ID assigned from directory */
240#define XFS_MOUNT_NORECOVERY (1ULL << 10) /* no recovery - dirty fs */
241#define XFS_MOUNT_ALLOCSIZE (1ULL << 12) /* specified allocation size */
242#define XFS_MOUNT_SMALL_INUMS (1ULL << 14) /* user wants 32bit inodes */
243#define XFS_MOUNT_32BITINODES (1ULL << 15) /* inode32 allocator active */
244#define XFS_MOUNT_NOUUID (1ULL << 16) /* ignore uuid during mount */
245#define XFS_MOUNT_IKEEP (1ULL << 18) /* keep empty inode clusters*/
246#define XFS_MOUNT_SWALLOC (1ULL << 19) /* turn on stripe width
247 * allocation */
248#define XFS_MOUNT_RDONLY (1ULL << 20) /* read-only fs */
249#define XFS_MOUNT_DIRSYNC (1ULL << 21) /* synchronous directory ops */
250#define XFS_MOUNT_LARGEIO (1ULL << 22) /* report large preferred
251 * I/O size in stat() */
252#define XFS_MOUNT_FILESTREAMS (1ULL << 24) /* enable the filestreams
253 allocator */
254#define XFS_MOUNT_NOATTR2 (1ULL << 25) /* disable use of attr2 format */
255#define XFS_MOUNT_DAX_ALWAYS (1ULL << 26)
256#define XFS_MOUNT_DAX_NEVER (1ULL << 27)
257
258/*
259 * Max and min values for mount-option defined I/O
260 * preallocation sizes.
261 */
262#define XFS_MAX_IO_LOG 30 /* 1G */
263#define XFS_MIN_IO_LOG PAGE_SHIFT
264
265#define XFS_LAST_UNMOUNT_WAS_CLEAN(mp) \
266 ((mp)->m_flags & XFS_MOUNT_WAS_CLEAN)
267#define XFS_FORCED_SHUTDOWN(mp) ((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN)
268void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
269 int lnnum);
270#define xfs_force_shutdown(m,f) \
271 xfs_do_force_shutdown(m, f, __FILE__, __LINE__)
272
273#define SHUTDOWN_META_IO_ERROR 0x0001 /* write attempt to metadata failed */
274#define SHUTDOWN_LOG_IO_ERROR 0x0002 /* write attempt to the log failed */
275#define SHUTDOWN_FORCE_UMOUNT 0x0004 /* shutdown from a forced unmount */
276#define SHUTDOWN_CORRUPT_INCORE 0x0008 /* corrupt in-memory data structures */
277
278/*
279 * Flags for xfs_mountfs
280 */
281#define XFS_MFSI_QUIET 0x40 /* Be silent if mount errors found */
282
283static inline xfs_agnumber_t
284xfs_daddr_to_agno(struct xfs_mount *mp, xfs_daddr_t d)
285{
286 xfs_rfsblock_t ld = XFS_BB_TO_FSBT(mp, d);
287 do_div(ld, mp->m_sb.sb_agblocks);
288 return (xfs_agnumber_t) ld;
289}
290
291static inline xfs_agblock_t
292xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
293{
294 xfs_rfsblock_t ld = XFS_BB_TO_FSBT(mp, d);
295 return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks);
296}
297
298/* per-AG block reservation data structures*/
299struct xfs_ag_resv {
300 /* number of blocks originally reserved here */
301 xfs_extlen_t ar_orig_reserved;
302 /* number of blocks reserved here */
303 xfs_extlen_t ar_reserved;
304 /* number of blocks originally asked for */
305 xfs_extlen_t ar_asked;
306};
307
308/*
309 * Per-ag incore structure, copies of information in agf and agi, to improve the
310 * performance of allocation group selection.
311 */
312typedef struct xfs_perag {
313 struct xfs_mount *pag_mount; /* owner filesystem */
314 xfs_agnumber_t pag_agno; /* AG this structure belongs to */
315 atomic_t pag_ref; /* perag reference count */
316 char pagf_init; /* this agf's entry is initialized */
317 char pagi_init; /* this agi's entry is initialized */
318 char pagf_metadata; /* the agf is preferred to be metadata */
319 char pagi_inodeok; /* The agi is ok for inodes */
320 uint8_t pagf_levels[XFS_BTNUM_AGF];
321 /* # of levels in bno & cnt btree */
322 bool pagf_agflreset; /* agfl requires reset before use */
323 uint32_t pagf_flcount; /* count of blocks in freelist */
324 xfs_extlen_t pagf_freeblks; /* total free blocks */
325 xfs_extlen_t pagf_longest; /* longest free space */
326 uint32_t pagf_btreeblks; /* # of blocks held in AGF btrees */
327 xfs_agino_t pagi_freecount; /* number of free inodes */
328 xfs_agino_t pagi_count; /* number of allocated inodes */
329
330 /*
331 * Inode allocation search lookup optimisation.
332 * If the pagino matches, the search for new inodes
333 * doesn't need to search the near ones again straight away
334 */
335 xfs_agino_t pagl_pagino;
336 xfs_agino_t pagl_leftrec;
337 xfs_agino_t pagl_rightrec;
338
339 /*
340 * Bitsets of per-ag metadata that have been checked and/or are sick.
341 * Callers should hold pag_state_lock before accessing this field.
342 */
343 uint16_t pag_checked;
344 uint16_t pag_sick;
345 spinlock_t pag_state_lock;
346
347 spinlock_t pagb_lock; /* lock for pagb_tree */
348 struct rb_root pagb_tree; /* ordered tree of busy extents */
349 unsigned int pagb_gen; /* generation count for pagb_tree */
350 wait_queue_head_t pagb_wait; /* woken when pagb_gen changes */
351
352 atomic_t pagf_fstrms; /* # of filestreams active in this AG */
353
354 spinlock_t pag_ici_lock; /* incore inode cache lock */
355 struct radix_tree_root pag_ici_root; /* incore inode cache root */
356 int pag_ici_reclaimable; /* reclaimable inodes */
357 unsigned long pag_ici_reclaim_cursor; /* reclaim restart point */
358
359 /* buffer cache index */
360 spinlock_t pag_buf_lock; /* lock for pag_buf_hash */
361 struct rhashtable pag_buf_hash;
362
363 /* for rcu-safe freeing */
364 struct rcu_head rcu_head;
365 int pagb_count; /* pagb slots in use */
366
367 /* Blocks reserved for all kinds of metadata. */
368 struct xfs_ag_resv pag_meta_resv;
369 /* Blocks reserved for the reverse mapping btree. */
370 struct xfs_ag_resv pag_rmapbt_resv;
371
372 /* reference count */
373 uint8_t pagf_refcount_level;
374
375 /*
376 * Unlinked inode information. This incore information reflects
377 * data stored in the AGI, so callers must hold the AGI buffer lock
378 * or have some other means to control concurrency.
379 */
380 struct rhashtable pagi_unlinked_hash;
381} xfs_perag_t;
382
383static inline struct xfs_ag_resv *
384xfs_perag_resv(
385 struct xfs_perag *pag,
386 enum xfs_ag_resv_type type)
387{
388 switch (type) {
389 case XFS_AG_RESV_METADATA:
390 return &pag->pag_meta_resv;
391 case XFS_AG_RESV_RMAPBT:
392 return &pag->pag_rmapbt_resv;
393 default:
394 return NULL;
395 }
396}
397
398int xfs_buf_hash_init(xfs_perag_t *pag);
399void xfs_buf_hash_destroy(xfs_perag_t *pag);
400
401extern void xfs_uuid_table_free(void);
402extern int xfs_log_sbcount(xfs_mount_t *);
403extern uint64_t xfs_default_resblks(xfs_mount_t *mp);
404extern int xfs_mountfs(xfs_mount_t *mp);
405extern int xfs_initialize_perag(xfs_mount_t *mp, xfs_agnumber_t agcount,
406 xfs_agnumber_t *maxagi);
407extern void xfs_unmountfs(xfs_mount_t *);
408
409extern int xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta,
410 bool reserved);
411extern int xfs_mod_frextents(struct xfs_mount *mp, int64_t delta);
412
413extern struct xfs_buf *xfs_getsb(xfs_mount_t *);
414extern int xfs_readsb(xfs_mount_t *, int);
415extern void xfs_freesb(xfs_mount_t *);
416extern bool xfs_fs_writable(struct xfs_mount *mp, int level);
417extern int xfs_sb_validate_fsb_count(struct xfs_sb *, uint64_t);
418
419extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
420
421extern void xfs_set_low_space_thresholds(struct xfs_mount *);
422
423int xfs_zero_extent(struct xfs_inode *ip, xfs_fsblock_t start_fsb,
424 xfs_off_t count_fsb);
425
426struct xfs_error_cfg * xfs_error_get_cfg(struct xfs_mount *mp,
427 int error_class, int error);
428void xfs_force_summary_recalc(struct xfs_mount *mp);
429void xfs_mod_delalloc(struct xfs_mount *mp, int64_t delta);
430
431#endif /* __XFS_MOUNT_H__ */