Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.15.
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
  4 * All Rights Reserved.
  5 */
  6#include <linux/iversion.h>
  7#include "xfs.h"
  8#include "xfs_fs.h"
  9#include "xfs_shared.h"
 10#include "xfs_format.h"
 11#include "xfs_log_format.h"
 12#include "xfs_trans_resv.h"
 13#include "xfs_sb.h"
 14#include "xfs_mount.h"
 15#include "xfs_inode.h"
 16#include "xfs_inode_util.h"
 17#include "xfs_trans.h"
 18#include "xfs_ialloc.h"
 19#include "xfs_health.h"
 20#include "xfs_bmap.h"
 21#include "xfs_error.h"
 22#include "xfs_trace.h"
 23#include "xfs_ag.h"
 24#include "xfs_iunlink_item.h"
 25#include "xfs_inode_item.h"
 26
 27uint16_t
 28xfs_flags2diflags(
 29	struct xfs_inode	*ip,
 30	unsigned int		xflags)
 31{
 32	/* can't set PREALLOC this way, just preserve it */
 33	uint16_t		di_flags =
 34		(ip->i_diflags & XFS_DIFLAG_PREALLOC);
 35
 36	if (xflags & FS_XFLAG_IMMUTABLE)
 37		di_flags |= XFS_DIFLAG_IMMUTABLE;
 38	if (xflags & FS_XFLAG_APPEND)
 39		di_flags |= XFS_DIFLAG_APPEND;
 40	if (xflags & FS_XFLAG_SYNC)
 41		di_flags |= XFS_DIFLAG_SYNC;
 42	if (xflags & FS_XFLAG_NOATIME)
 43		di_flags |= XFS_DIFLAG_NOATIME;
 44	if (xflags & FS_XFLAG_NODUMP)
 45		di_flags |= XFS_DIFLAG_NODUMP;
 46	if (xflags & FS_XFLAG_NODEFRAG)
 47		di_flags |= XFS_DIFLAG_NODEFRAG;
 48	if (xflags & FS_XFLAG_FILESTREAM)
 49		di_flags |= XFS_DIFLAG_FILESTREAM;
 50	if (S_ISDIR(VFS_I(ip)->i_mode)) {
 51		if (xflags & FS_XFLAG_RTINHERIT)
 52			di_flags |= XFS_DIFLAG_RTINHERIT;
 53		if (xflags & FS_XFLAG_NOSYMLINKS)
 54			di_flags |= XFS_DIFLAG_NOSYMLINKS;
 55		if (xflags & FS_XFLAG_EXTSZINHERIT)
 56			di_flags |= XFS_DIFLAG_EXTSZINHERIT;
 57		if (xflags & FS_XFLAG_PROJINHERIT)
 58			di_flags |= XFS_DIFLAG_PROJINHERIT;
 59	} else if (S_ISREG(VFS_I(ip)->i_mode)) {
 60		if (xflags & FS_XFLAG_REALTIME)
 61			di_flags |= XFS_DIFLAG_REALTIME;
 62		if (xflags & FS_XFLAG_EXTSIZE)
 63			di_flags |= XFS_DIFLAG_EXTSIZE;
 64	}
 65
 66	return di_flags;
 67}
 68
 69uint64_t
 70xfs_flags2diflags2(
 71	struct xfs_inode	*ip,
 72	unsigned int		xflags)
 73{
 74	uint64_t		di_flags2 =
 75		(ip->i_diflags2 & (XFS_DIFLAG2_REFLINK |
 76				   XFS_DIFLAG2_BIGTIME |
 77				   XFS_DIFLAG2_NREXT64));
 78
 79	if (xflags & FS_XFLAG_DAX)
 80		di_flags2 |= XFS_DIFLAG2_DAX;
 81	if (xflags & FS_XFLAG_COWEXTSIZE)
 82		di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
 83
 84	return di_flags2;
 85}
 86
 87uint32_t
 88xfs_ip2xflags(
 89	struct xfs_inode	*ip)
 90{
 91	uint32_t		flags = 0;
 92
 93	if (ip->i_diflags & XFS_DIFLAG_ANY) {
 94		if (ip->i_diflags & XFS_DIFLAG_REALTIME)
 95			flags |= FS_XFLAG_REALTIME;
 96		if (ip->i_diflags & XFS_DIFLAG_PREALLOC)
 97			flags |= FS_XFLAG_PREALLOC;
 98		if (ip->i_diflags & XFS_DIFLAG_IMMUTABLE)
 99			flags |= FS_XFLAG_IMMUTABLE;
100		if (ip->i_diflags & XFS_DIFLAG_APPEND)
101			flags |= FS_XFLAG_APPEND;
102		if (ip->i_diflags & XFS_DIFLAG_SYNC)
103			flags |= FS_XFLAG_SYNC;
104		if (ip->i_diflags & XFS_DIFLAG_NOATIME)
105			flags |= FS_XFLAG_NOATIME;
106		if (ip->i_diflags & XFS_DIFLAG_NODUMP)
107			flags |= FS_XFLAG_NODUMP;
108		if (ip->i_diflags & XFS_DIFLAG_RTINHERIT)
109			flags |= FS_XFLAG_RTINHERIT;
110		if (ip->i_diflags & XFS_DIFLAG_PROJINHERIT)
111			flags |= FS_XFLAG_PROJINHERIT;
112		if (ip->i_diflags & XFS_DIFLAG_NOSYMLINKS)
113			flags |= FS_XFLAG_NOSYMLINKS;
114		if (ip->i_diflags & XFS_DIFLAG_EXTSIZE)
115			flags |= FS_XFLAG_EXTSIZE;
116		if (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT)
117			flags |= FS_XFLAG_EXTSZINHERIT;
118		if (ip->i_diflags & XFS_DIFLAG_NODEFRAG)
119			flags |= FS_XFLAG_NODEFRAG;
120		if (ip->i_diflags & XFS_DIFLAG_FILESTREAM)
121			flags |= FS_XFLAG_FILESTREAM;
122	}
123
124	if (ip->i_diflags2 & XFS_DIFLAG2_ANY) {
125		if (ip->i_diflags2 & XFS_DIFLAG2_DAX)
126			flags |= FS_XFLAG_DAX;
127		if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
128			flags |= FS_XFLAG_COWEXTSIZE;
129	}
130
131	if (xfs_inode_has_attr_fork(ip))
132		flags |= FS_XFLAG_HASATTR;
133	return flags;
134}
135
136prid_t
137xfs_get_initial_prid(struct xfs_inode *dp)
138{
139	if (dp->i_diflags & XFS_DIFLAG_PROJINHERIT)
140		return dp->i_projid;
141
142	/* Assign to the root project by default. */
143	return 0;
144}
145
146/* Propagate di_flags from a parent inode to a child inode. */
147static inline void
148xfs_inode_inherit_flags(
149	struct xfs_inode	*ip,
150	const struct xfs_inode	*pip)
151{
152	unsigned int		di_flags = 0;
153	xfs_failaddr_t		failaddr;
154	umode_t			mode = VFS_I(ip)->i_mode;
155
156	if (S_ISDIR(mode)) {
157		if (pip->i_diflags & XFS_DIFLAG_RTINHERIT)
158			di_flags |= XFS_DIFLAG_RTINHERIT;
159		if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) {
160			di_flags |= XFS_DIFLAG_EXTSZINHERIT;
161			ip->i_extsize = pip->i_extsize;
162		}
163		if (pip->i_diflags & XFS_DIFLAG_PROJINHERIT)
164			di_flags |= XFS_DIFLAG_PROJINHERIT;
165	} else if (S_ISREG(mode)) {
166		if ((pip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
167		    xfs_has_realtime(ip->i_mount))
168			di_flags |= XFS_DIFLAG_REALTIME;
169		if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) {
170			di_flags |= XFS_DIFLAG_EXTSIZE;
171			ip->i_extsize = pip->i_extsize;
172		}
173	}
174	if ((pip->i_diflags & XFS_DIFLAG_NOATIME) &&
175	    xfs_inherit_noatime)
176		di_flags |= XFS_DIFLAG_NOATIME;
177	if ((pip->i_diflags & XFS_DIFLAG_NODUMP) &&
178	    xfs_inherit_nodump)
179		di_flags |= XFS_DIFLAG_NODUMP;
180	if ((pip->i_diflags & XFS_DIFLAG_SYNC) &&
181	    xfs_inherit_sync)
182		di_flags |= XFS_DIFLAG_SYNC;
183	if ((pip->i_diflags & XFS_DIFLAG_NOSYMLINKS) &&
184	    xfs_inherit_nosymlinks)
185		di_flags |= XFS_DIFLAG_NOSYMLINKS;
186	if ((pip->i_diflags & XFS_DIFLAG_NODEFRAG) &&
187	    xfs_inherit_nodefrag)
188		di_flags |= XFS_DIFLAG_NODEFRAG;
189	if (pip->i_diflags & XFS_DIFLAG_FILESTREAM)
190		di_flags |= XFS_DIFLAG_FILESTREAM;
191
192	ip->i_diflags |= di_flags;
193
194	/*
195	 * Inode verifiers on older kernels only check that the extent size
196	 * hint is an integer multiple of the rt extent size on realtime files.
197	 * They did not check the hint alignment on a directory with both
198	 * rtinherit and extszinherit flags set.  If the misaligned hint is
199	 * propagated from a directory into a new realtime file, new file
200	 * allocations will fail due to math errors in the rt allocator and/or
201	 * trip the verifiers.  Validate the hint settings in the new file so
202	 * that we don't let broken hints propagate.
203	 */
204	failaddr = xfs_inode_validate_extsize(ip->i_mount, ip->i_extsize,
205			VFS_I(ip)->i_mode, ip->i_diflags);
206	if (failaddr) {
207		ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
208				   XFS_DIFLAG_EXTSZINHERIT);
209		ip->i_extsize = 0;
210	}
211}
212
213/* Propagate di_flags2 from a parent inode to a child inode. */
214static inline void
215xfs_inode_inherit_flags2(
216	struct xfs_inode	*ip,
217	const struct xfs_inode	*pip)
218{
219	xfs_failaddr_t		failaddr;
220
221	if (pip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) {
222		ip->i_diflags2 |= XFS_DIFLAG2_COWEXTSIZE;
223		ip->i_cowextsize = pip->i_cowextsize;
224	}
225	if (pip->i_diflags2 & XFS_DIFLAG2_DAX)
226		ip->i_diflags2 |= XFS_DIFLAG2_DAX;
227	if (xfs_is_metadir_inode(pip))
228		ip->i_diflags2 |= XFS_DIFLAG2_METADATA;
229
230	/* Don't let invalid cowextsize hints propagate. */
231	failaddr = xfs_inode_validate_cowextsize(ip->i_mount, ip->i_cowextsize,
232			VFS_I(ip)->i_mode, ip->i_diflags, ip->i_diflags2);
233	if (failaddr) {
234		ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE;
235		ip->i_cowextsize = 0;
236	}
237}
238
239/*
240 * If we need to create attributes immediately after allocating the inode,
241 * initialise an empty attribute fork right now. We use the default fork offset
242 * for attributes here as we don't know exactly what size or how many
243 * attributes we might be adding. We can do this safely here because we know
244 * the data fork is completely empty and this saves us from needing to run a
245 * separate transaction to set the fork offset in the immediate future.
246 *
247 * If we have parent pointers and the caller hasn't told us that the file will
248 * never be linked into a directory tree, we /must/ create the attr fork.
249 */
250static inline bool
251xfs_icreate_want_attrfork(
252	struct xfs_mount		*mp,
253	const struct xfs_icreate_args	*args)
254{
255	if (args->flags & XFS_ICREATE_INIT_XATTRS)
256		return true;
257
258	if (!(args->flags & XFS_ICREATE_UNLINKABLE) && xfs_has_parent(mp))
259		return true;
260
261	return false;
262}
263
264/* Initialise an inode's attributes. */
265void
266xfs_inode_init(
267	struct xfs_trans	*tp,
268	const struct xfs_icreate_args *args,
269	struct xfs_inode	*ip)
270{
271	struct xfs_inode	*pip = args->pip;
272	struct inode		*dir = pip ? VFS_I(pip) : NULL;
273	struct xfs_mount	*mp = tp->t_mountp;
274	struct inode		*inode = VFS_I(ip);
275	unsigned int		flags;
276	int			times = XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG |
277					XFS_ICHGTIME_ACCESS;
278
279	if (args->flags & XFS_ICREATE_TMPFILE)
280		set_nlink(inode, 0);
281	else if (S_ISDIR(args->mode))
282		set_nlink(inode, 2);
283	else
284		set_nlink(inode, 1);
285	inode->i_rdev = args->rdev;
286
287	if (!args->idmap || pip == NULL) {
288		/* creating a tree root, sb rooted, or detached file */
289		inode->i_uid = GLOBAL_ROOT_UID;
290		inode->i_gid = GLOBAL_ROOT_GID;
291		ip->i_projid = 0;
292		inode->i_mode = args->mode;
293	} else {
294		/* creating a child in the directory tree */
295		if (dir && !(dir->i_mode & S_ISGID) && xfs_has_grpid(mp)) {
296			inode_fsuid_set(inode, args->idmap);
297			inode->i_gid = dir->i_gid;
298			inode->i_mode = args->mode;
299		} else {
300			inode_init_owner(args->idmap, inode, dir, args->mode);
301		}
302
303		/*
304		 * If the group ID of the new file does not match the effective
305		 * group ID or one of the supplementary group IDs, the S_ISGID
306		 * bit is cleared (and only if the irix_sgid_inherit
307		 * compatibility variable is set).
308		 */
309		if (irix_sgid_inherit && (inode->i_mode & S_ISGID) &&
310		    !vfsgid_in_group_p(i_gid_into_vfsgid(args->idmap, inode)))
311			inode->i_mode &= ~S_ISGID;
312
313		ip->i_projid = xfs_get_initial_prid(pip);
314	}
315
316	ip->i_disk_size = 0;
317	ip->i_df.if_nextents = 0;
318	ASSERT(ip->i_nblocks == 0);
319
320	ip->i_extsize = 0;
321	ip->i_diflags = 0;
322
323	if (xfs_has_v3inodes(mp)) {
324		inode_set_iversion(inode, 1);
325		ip->i_cowextsize = 0;
326		times |= XFS_ICHGTIME_CREATE;
327	}
328
329	xfs_trans_ichgtime(tp, ip, times);
330
331	flags = XFS_ILOG_CORE;
332	switch (args->mode & S_IFMT) {
333	case S_IFIFO:
334	case S_IFCHR:
335	case S_IFBLK:
336	case S_IFSOCK:
337		ip->i_df.if_format = XFS_DINODE_FMT_DEV;
338		flags |= XFS_ILOG_DEV;
339		break;
340	case S_IFREG:
341	case S_IFDIR:
342		if (pip && (pip->i_diflags & XFS_DIFLAG_ANY))
343			xfs_inode_inherit_flags(ip, pip);
344		if (pip && (pip->i_diflags2 & XFS_DIFLAG2_ANY))
345			xfs_inode_inherit_flags2(ip, pip);
346		fallthrough;
347	case S_IFLNK:
348		ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
349		ip->i_df.if_bytes = 0;
350		ip->i_df.if_data = NULL;
351		break;
352	default:
353		ASSERT(0);
354	}
355
356	if (xfs_icreate_want_attrfork(mp, args)) {
357		ip->i_forkoff = xfs_default_attroffset(ip) >> 3;
358		xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0);
359
360		if (!xfs_has_attr(mp)) {
361			spin_lock(&mp->m_sb_lock);
362			xfs_add_attr(mp);
363			spin_unlock(&mp->m_sb_lock);
364			xfs_log_sb(tp);
365		}
366	}
367
368	xfs_trans_log_inode(tp, ip, flags);
369}
370
371/*
372 * In-Core Unlinked List Lookups
373 * =============================
374 *
375 * Every inode is supposed to be reachable from some other piece of metadata
376 * with the exception of the root directory.  Inodes with a connection to a
377 * file descriptor but not linked from anywhere in the on-disk directory tree
378 * are collectively known as unlinked inodes, though the filesystem itself
379 * maintains links to these inodes so that on-disk metadata are consistent.
380 *
381 * XFS implements a per-AG on-disk hash table of unlinked inodes.  The AGI
382 * header contains a number of buckets that point to an inode, and each inode
383 * record has a pointer to the next inode in the hash chain.  This
384 * singly-linked list causes scaling problems in the iunlink remove function
385 * because we must walk that list to find the inode that points to the inode
386 * being removed from the unlinked hash bucket list.
387 *
388 * Hence we keep an in-memory double linked list to link each inode on an
389 * unlinked list. Because there are 64 unlinked lists per AGI, keeping pointer
390 * based lists would require having 64 list heads in the perag, one for each
391 * list. This is expensive in terms of memory (think millions of AGs) and cache
392 * misses on lookups. Instead, use the fact that inodes on the unlinked list
393 * must be referenced at the VFS level to keep them on the list and hence we
394 * have an existence guarantee for inodes on the unlinked list.
395 *
396 * Given we have an existence guarantee, we can use lockless inode cache lookups
397 * to resolve aginos to xfs inodes. This means we only need 8 bytes per inode
398 * for the double linked unlinked list, and we don't need any extra locking to
399 * keep the list safe as all manipulations are done under the AGI buffer lock.
400 * Keeping the list up to date does not require memory allocation, just finding
401 * the XFS inode and updating the next/prev unlinked list aginos.
402 */
403
404/*
405 * Update the prev pointer of the next agino.  Returns -ENOLINK if the inode
406 * is not in cache.
407 */
408static int
409xfs_iunlink_update_backref(
410	struct xfs_perag	*pag,
411	xfs_agino_t		prev_agino,
412	xfs_agino_t		next_agino)
413{
414	struct xfs_inode	*ip;
415
416	/* No update necessary if we are at the end of the list. */
417	if (next_agino == NULLAGINO)
418		return 0;
419
420	ip = xfs_iunlink_lookup(pag, next_agino);
421	if (!ip)
422		return -ENOLINK;
423
424	ip->i_prev_unlinked = prev_agino;
425	return 0;
426}
427
428/*
429 * Point the AGI unlinked bucket at an inode and log the results.  The caller
430 * is responsible for validating the old value.
431 */
432STATIC int
433xfs_iunlink_update_bucket(
434	struct xfs_trans	*tp,
435	struct xfs_perag	*pag,
436	struct xfs_buf		*agibp,
437	unsigned int		bucket_index,
438	xfs_agino_t		new_agino)
439{
440	struct xfs_agi		*agi = agibp->b_addr;
441	xfs_agino_t		old_value;
442	int			offset;
443
444	ASSERT(xfs_verify_agino_or_null(pag, new_agino));
445
446	old_value = be32_to_cpu(agi->agi_unlinked[bucket_index]);
447	trace_xfs_iunlink_update_bucket(pag, bucket_index, old_value,
448			new_agino);
449
450	/*
451	 * We should never find the head of the list already set to the value
452	 * passed in because either we're adding or removing ourselves from the
453	 * head of the list.
454	 */
455	if (old_value == new_agino) {
456		xfs_buf_mark_corrupt(agibp);
457		xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
458		return -EFSCORRUPTED;
459	}
460
461	agi->agi_unlinked[bucket_index] = cpu_to_be32(new_agino);
462	offset = offsetof(struct xfs_agi, agi_unlinked) +
463			(sizeof(xfs_agino_t) * bucket_index);
464	xfs_trans_log_buf(tp, agibp, offset, offset + sizeof(xfs_agino_t) - 1);
465	return 0;
466}
467
468static int
469xfs_iunlink_insert_inode(
470	struct xfs_trans	*tp,
471	struct xfs_perag	*pag,
472	struct xfs_buf		*agibp,
473	struct xfs_inode	*ip)
474{
475	struct xfs_mount	*mp = tp->t_mountp;
476	struct xfs_agi		*agi = agibp->b_addr;
477	xfs_agino_t		next_agino;
478	xfs_agino_t		agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
479	short			bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
480	int			error;
481
482	/*
483	 * Get the index into the agi hash table for the list this inode will
484	 * go on.  Make sure the pointer isn't garbage and that this inode
485	 * isn't already on the list.
486	 */
487	next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
488	if (next_agino == agino ||
489	    !xfs_verify_agino_or_null(pag, next_agino)) {
490		xfs_buf_mark_corrupt(agibp);
491		xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
492		return -EFSCORRUPTED;
493	}
494
495	/*
496	 * Update the prev pointer in the next inode to point back to this
497	 * inode.
498	 */
499	error = xfs_iunlink_update_backref(pag, agino, next_agino);
500	if (error == -ENOLINK)
501		error = xfs_iunlink_reload_next(tp, agibp, agino, next_agino);
502	if (error)
503		return error;
504
505	if (next_agino != NULLAGINO) {
506		/*
507		 * There is already another inode in the bucket, so point this
508		 * inode to the current head of the list.
509		 */
510		error = xfs_iunlink_log_inode(tp, ip, pag, next_agino);
511		if (error)
512			return error;
513		ip->i_next_unlinked = next_agino;
514	}
515
516	/* Point the head of the list to point to this inode. */
517	ip->i_prev_unlinked = NULLAGINO;
518	return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, agino);
519}
520
521/*
522 * This is called when the inode's link count has gone to 0 or we are creating
523 * a tmpfile via O_TMPFILE.  The inode @ip must have nlink == 0.
524 *
525 * We place the on-disk inode on a list in the AGI.  It will be pulled from this
526 * list when the inode is freed.
527 */
528int
529xfs_iunlink(
530	struct xfs_trans	*tp,
531	struct xfs_inode	*ip)
532{
533	struct xfs_mount	*mp = tp->t_mountp;
534	struct xfs_perag	*pag;
535	struct xfs_buf		*agibp;
536	int			error;
537
538	ASSERT(VFS_I(ip)->i_nlink == 0);
539	ASSERT(VFS_I(ip)->i_mode != 0);
540	trace_xfs_iunlink(ip);
541
542	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
543
544	/* Get the agi buffer first.  It ensures lock ordering on the list. */
545	error = xfs_read_agi(pag, tp, 0, &agibp);
546	if (error)
547		goto out;
548
549	error = xfs_iunlink_insert_inode(tp, pag, agibp, ip);
550out:
551	xfs_perag_put(pag);
552	return error;
553}
554
555static int
556xfs_iunlink_remove_inode(
557	struct xfs_trans	*tp,
558	struct xfs_perag	*pag,
559	struct xfs_buf		*agibp,
560	struct xfs_inode	*ip)
561{
562	struct xfs_mount	*mp = tp->t_mountp;
563	struct xfs_agi		*agi = agibp->b_addr;
564	xfs_agino_t		agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
565	xfs_agino_t		head_agino;
566	short			bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
567	int			error;
568
569	trace_xfs_iunlink_remove(ip);
570
571	/*
572	 * Get the index into the agi hash table for the list this inode will
573	 * go on.  Make sure the head pointer isn't garbage.
574	 */
575	head_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
576	if (!xfs_verify_agino(pag, head_agino)) {
577		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
578				agi, sizeof(*agi));
579		xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
580		return -EFSCORRUPTED;
581	}
582
583	/*
584	 * Set our inode's next_unlinked pointer to NULL and then return
585	 * the old pointer value so that we can update whatever was previous
586	 * to us in the list to point to whatever was next in the list.
587	 */
588	error = xfs_iunlink_log_inode(tp, ip, pag, NULLAGINO);
589	if (error)
590		return error;
591
592	/*
593	 * Update the prev pointer in the next inode to point back to previous
594	 * inode in the chain.
595	 */
596	error = xfs_iunlink_update_backref(pag, ip->i_prev_unlinked,
597			ip->i_next_unlinked);
598	if (error == -ENOLINK)
599		error = xfs_iunlink_reload_next(tp, agibp, ip->i_prev_unlinked,
600				ip->i_next_unlinked);
601	if (error)
602		return error;
603
604	if (head_agino != agino) {
605		struct xfs_inode	*prev_ip;
606
607		prev_ip = xfs_iunlink_lookup(pag, ip->i_prev_unlinked);
608		if (!prev_ip) {
609			xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE);
610			return -EFSCORRUPTED;
611		}
612
613		error = xfs_iunlink_log_inode(tp, prev_ip, pag,
614				ip->i_next_unlinked);
615		prev_ip->i_next_unlinked = ip->i_next_unlinked;
616	} else {
617		/* Point the head of the list to the next unlinked inode. */
618		error = xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index,
619				ip->i_next_unlinked);
620	}
621
622	ip->i_next_unlinked = NULLAGINO;
623	ip->i_prev_unlinked = 0;
624	return error;
625}
626
627/*
628 * Pull the on-disk inode from the AGI unlinked list.
629 */
630int
631xfs_iunlink_remove(
632	struct xfs_trans	*tp,
633	struct xfs_perag	*pag,
634	struct xfs_inode	*ip)
635{
636	struct xfs_buf		*agibp;
637	int			error;
638
639	trace_xfs_iunlink_remove(ip);
640
641	/* Get the agi buffer first.  It ensures lock ordering on the list. */
642	error = xfs_read_agi(pag, tp, 0, &agibp);
643	if (error)
644		return error;
645
646	return xfs_iunlink_remove_inode(tp, pag, agibp, ip);
647}
648
649/*
650 * Decrement the link count on an inode & log the change.  If this causes the
651 * link count to go to zero, move the inode to AGI unlinked list so that it can
652 * be freed when the last active reference goes away via xfs_inactive().
653 */
654int
655xfs_droplink(
656	struct xfs_trans	*tp,
657	struct xfs_inode	*ip)
658{
659	struct inode		*inode = VFS_I(ip);
660
661	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
662
663	if (inode->i_nlink == 0) {
664		xfs_info_ratelimited(tp->t_mountp,
665 "Inode 0x%llx link count dropped below zero.  Pinning link count.",
666				ip->i_ino);
667		set_nlink(inode, XFS_NLINK_PINNED);
668	}
669	if (inode->i_nlink != XFS_NLINK_PINNED)
670		drop_nlink(inode);
671
672	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
673
674	if (inode->i_nlink)
675		return 0;
676
677	return xfs_iunlink(tp, ip);
678}
679
680/*
681 * Increment the link count on an inode & log the change.
682 */
683void
684xfs_bumplink(
685	struct xfs_trans	*tp,
686	struct xfs_inode	*ip)
687{
688	struct inode		*inode = VFS_I(ip);
689
690	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
691
692	if (inode->i_nlink == XFS_NLINK_PINNED - 1)
693		xfs_info_ratelimited(tp->t_mountp,
694 "Inode 0x%llx link count exceeded maximum.  Pinning link count.",
695				ip->i_ino);
696	if (inode->i_nlink != XFS_NLINK_PINNED)
697		inc_nlink(inode);
698
699	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
700}
701
702/* Free an inode in the ondisk index and zero it out. */
703int
704xfs_inode_uninit(
705	struct xfs_trans	*tp,
706	struct xfs_perag	*pag,
707	struct xfs_inode	*ip,
708	struct xfs_icluster	*xic)
709{
710	struct xfs_mount	*mp = ip->i_mount;
711	int			error;
712
713	/*
714	 * Free the inode first so that we guarantee that the AGI lock is going
715	 * to be taken before we remove the inode from the unlinked list. This
716	 * makes the AGI lock -> unlinked list modification order the same as
717	 * used in O_TMPFILE creation.
718	 */
719	error = xfs_difree(tp, pag, ip->i_ino, xic);
720	if (error)
721		return error;
722
723	error = xfs_iunlink_remove(tp, pag, ip);
724	if (error)
725		return error;
726
727	/*
728	 * Free any local-format data sitting around before we reset the
729	 * data fork to extents format.  Note that the attr fork data has
730	 * already been freed by xfs_attr_inactive.
731	 */
732	if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
733		kfree(ip->i_df.if_data);
734		ip->i_df.if_data = NULL;
735		ip->i_df.if_bytes = 0;
736	}
737
738	VFS_I(ip)->i_mode = 0;		/* mark incore inode as free */
739	ip->i_diflags = 0;
740	ip->i_diflags2 = mp->m_ino_geo.new_diflags2;
741	ip->i_forkoff = 0;		/* mark the attr fork not in use */
742	ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
743
744	/*
745	 * Bump the generation count so no one will be confused
746	 * by reincarnations of this inode.
747	 */
748	VFS_I(ip)->i_generation++;
749	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
750	return 0;
751}