xfs_iget.c - fs/xfs/xfs_iget.c - Linux source code v3.5.6

Note: File does not exist in v6.13.7.
  1/*
  2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
  3 * All Rights Reserved.
  4 *
  5 * This program is free software; you can redistribute it and/or
  6 * modify it under the terms of the GNU General Public License as
  7 * published by the Free Software Foundation.
  8 *
  9 * This program is distributed in the hope that it would be useful,
 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 * GNU General Public License for more details.
 13 *
 14 * You should have received a copy of the GNU General Public License
 15 * along with this program; if not, write the Free Software Foundation,
 16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 17 */
 18#include "xfs.h"
 19#include "xfs_fs.h"
 20#include "xfs_types.h"
 21#include "xfs_acl.h"
 22#include "xfs_log.h"
 23#include "xfs_inum.h"
 24#include "xfs_trans.h"
 25#include "xfs_sb.h"
 26#include "xfs_ag.h"
 27#include "xfs_mount.h"
 28#include "xfs_bmap_btree.h"
 29#include "xfs_alloc_btree.h"
 30#include "xfs_ialloc_btree.h"
 31#include "xfs_dinode.h"
 32#include "xfs_inode.h"
 33#include "xfs_btree.h"
 34#include "xfs_ialloc.h"
 35#include "xfs_quota.h"
 36#include "xfs_utils.h"
 37#include "xfs_trans_priv.h"
 38#include "xfs_inode_item.h"
 39#include "xfs_bmap.h"
 40#include "xfs_trace.h"
 41
 42
 43/*
 44 * Define xfs inode iolock lockdep classes. We need to ensure that all active
 45 * inodes are considered the same for lockdep purposes, including inodes that
 46 * are recycled through the XFS_IRECLAIMABLE state. This is the the only way to
 47 * guarantee the locks are considered the same when there are multiple lock
 48 * initialisation siteѕ. Also, define a reclaimable inode class so it is
 49 * obvious in lockdep reports which class the report is against.
 50 */
 51static struct lock_class_key xfs_iolock_active;
 52struct lock_class_key xfs_iolock_reclaimable;
 53
 54/*
 55 * Allocate and initialise an xfs_inode.
 56 */
 57STATIC struct xfs_inode *
 58xfs_inode_alloc(
 59	struct xfs_mount	*mp,
 60	xfs_ino_t		ino)
 61{
 62	struct xfs_inode	*ip;
 63
 64	/*
 65	 * if this didn't occur in transactions, we could use
 66	 * KM_MAYFAIL and return NULL here on ENOMEM. Set the
 67	 * code up to do this anyway.
 68	 */
 69	ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP);
 70	if (!ip)
 71		return NULL;
 72	if (inode_init_always(mp->m_super, VFS_I(ip))) {
 73		kmem_zone_free(xfs_inode_zone, ip);
 74		return NULL;
 75	}
 76
 77	ASSERT(atomic_read(&ip->i_pincount) == 0);
 78	ASSERT(!spin_is_locked(&ip->i_flags_lock));
 79	ASSERT(!xfs_isiflocked(ip));
 80	ASSERT(ip->i_ino == 0);
 81
 82	mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
 83	lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
 84			&xfs_iolock_active, "xfs_iolock_active");
 85
 86	/* initialise the xfs inode */
 87	ip->i_ino = ino;
 88	ip->i_mount = mp;
 89	memset(&ip->i_imap, 0, sizeof(struct xfs_imap));
 90	ip->i_afp = NULL;
 91	memset(&ip->i_df, 0, sizeof(xfs_ifork_t));
 92	ip->i_flags = 0;
 93	ip->i_delayed_blks = 0;
 94	memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
 95
 96	return ip;
 97}
 98
 99STATIC void
100xfs_inode_free_callback(
101	struct rcu_head		*head)
102{
103	struct inode		*inode = container_of(head, struct inode, i_rcu);
104	struct xfs_inode	*ip = XFS_I(inode);
105
106	kmem_zone_free(xfs_inode_zone, ip);
107}
108
109void
110xfs_inode_free(
111	struct xfs_inode	*ip)
112{
113	switch (ip->i_d.di_mode & S_IFMT) {
114	case S_IFREG:
115	case S_IFDIR:
116	case S_IFLNK:
117		xfs_idestroy_fork(ip, XFS_DATA_FORK);
118		break;
119	}
120
121	if (ip->i_afp)
122		xfs_idestroy_fork(ip, XFS_ATTR_FORK);
123
124	if (ip->i_itemp) {
125		ASSERT(!(ip->i_itemp->ili_item.li_flags & XFS_LI_IN_AIL));
126		xfs_inode_item_destroy(ip);
127		ip->i_itemp = NULL;
128	}
129
130	/* asserts to verify all state is correct here */
131	ASSERT(atomic_read(&ip->i_pincount) == 0);
132	ASSERT(!spin_is_locked(&ip->i_flags_lock));
133	ASSERT(!xfs_isiflocked(ip));
134
135	/*
136	 * Because we use RCU freeing we need to ensure the inode always
137	 * appears to be reclaimed with an invalid inode number when in the
138	 * free state. The ip->i_flags_lock provides the barrier against lookup
139	 * races.
140	 */
141	spin_lock(&ip->i_flags_lock);
142	ip->i_flags = XFS_IRECLAIM;
143	ip->i_ino = 0;
144	spin_unlock(&ip->i_flags_lock);
145
146	call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
147}
148
149/*
150 * Check the validity of the inode we just found it the cache
151 */
152static int
153xfs_iget_cache_hit(
154	struct xfs_perag	*pag,
155	struct xfs_inode	*ip,
156	xfs_ino_t		ino,
157	int			flags,
158	int			lock_flags) __releases(RCU)
159{
160	struct inode		*inode = VFS_I(ip);
161	struct xfs_mount	*mp = ip->i_mount;
162	int			error;
163
164	/*
165	 * check for re-use of an inode within an RCU grace period due to the
166	 * radix tree nodes not being updated yet. We monitor for this by
167	 * setting the inode number to zero before freeing the inode structure.
168	 * If the inode has been reallocated and set up, then the inode number
169	 * will not match, so check for that, too.
170	 */
171	spin_lock(&ip->i_flags_lock);
172	if (ip->i_ino != ino) {
173		trace_xfs_iget_skip(ip);
174		XFS_STATS_INC(xs_ig_frecycle);
175		error = EAGAIN;
176		goto out_error;
177	}
178
179
180	/*
181	 * If we are racing with another cache hit that is currently
182	 * instantiating this inode or currently recycling it out of
183	 * reclaimabe state, wait for the initialisation to complete
184	 * before continuing.
185	 *
186	 * XXX(hch): eventually we should do something equivalent to
187	 *	     wait_on_inode to wait for these flags to be cleared
188	 *	     instead of polling for it.
189	 */
190	if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) {
191		trace_xfs_iget_skip(ip);
192		XFS_STATS_INC(xs_ig_frecycle);
193		error = EAGAIN;
194		goto out_error;
195	}
196
197	/*
198	 * If lookup is racing with unlink return an error immediately.
199	 */
200	if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
201		error = ENOENT;
202		goto out_error;
203	}
204
205	/*
206	 * If IRECLAIMABLE is set, we've torn down the VFS inode already.
207	 * Need to carefully get it back into useable state.
208	 */
209	if (ip->i_flags & XFS_IRECLAIMABLE) {
210		trace_xfs_iget_reclaim(ip);
211
212		/*
213		 * We need to set XFS_IRECLAIM to prevent xfs_reclaim_inode
214		 * from stomping over us while we recycle the inode.  We can't
215		 * clear the radix tree reclaimable tag yet as it requires
216		 * pag_ici_lock to be held exclusive.
217		 */
218		ip->i_flags |= XFS_IRECLAIM;
219
220		spin_unlock(&ip->i_flags_lock);
221		rcu_read_unlock();
222
223		error = -inode_init_always(mp->m_super, inode);
224		if (error) {
225			/*
226			 * Re-initializing the inode failed, and we are in deep
227			 * trouble.  Try to re-add it to the reclaim list.
228			 */
229			rcu_read_lock();
230			spin_lock(&ip->i_flags_lock);
231
232			ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM);
233			ASSERT(ip->i_flags & XFS_IRECLAIMABLE);
234			trace_xfs_iget_reclaim_fail(ip);
235			goto out_error;
236		}
237
238		spin_lock(&pag->pag_ici_lock);
239		spin_lock(&ip->i_flags_lock);
240
241		/*
242		 * Clear the per-lifetime state in the inode as we are now
243		 * effectively a new inode and need to return to the initial
244		 * state before reuse occurs.
245		 */
246		ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS;
247		ip->i_flags |= XFS_INEW;
248		__xfs_inode_clear_reclaim_tag(mp, pag, ip);
249		inode->i_state = I_NEW;
250
251		ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
252		mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
253		lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
254				&xfs_iolock_active, "xfs_iolock_active");
255
256		spin_unlock(&ip->i_flags_lock);
257		spin_unlock(&pag->pag_ici_lock);
258	} else {
259		/* If the VFS inode is being torn down, pause and try again. */
260		if (!igrab(inode)) {
261			trace_xfs_iget_skip(ip);
262			error = EAGAIN;
263			goto out_error;
264		}
265
266		/* We've got a live one. */
267		spin_unlock(&ip->i_flags_lock);
268		rcu_read_unlock();
269		trace_xfs_iget_hit(ip);
270	}
271
272	if (lock_flags != 0)
273		xfs_ilock(ip, lock_flags);
274
275	xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE);
276	XFS_STATS_INC(xs_ig_found);
277
278	return 0;
279
280out_error:
281	spin_unlock(&ip->i_flags_lock);
282	rcu_read_unlock();
283	return error;
284}
285
286
287static int
288xfs_iget_cache_miss(
289	struct xfs_mount	*mp,
290	struct xfs_perag	*pag,
291	xfs_trans_t		*tp,
292	xfs_ino_t		ino,
293	struct xfs_inode	**ipp,
294	int			flags,
295	int			lock_flags)
296{
297	struct xfs_inode	*ip;
298	int			error;
299	xfs_agino_t		agino = XFS_INO_TO_AGINO(mp, ino);
300	int			iflags;
301
302	ip = xfs_inode_alloc(mp, ino);
303	if (!ip)
304		return ENOMEM;
305
306	error = xfs_iread(mp, tp, ip, flags);
307	if (error)
308		goto out_destroy;
309
310	trace_xfs_iget_miss(ip);
311
312	if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
313		error = ENOENT;
314		goto out_destroy;
315	}
316
317	/*
318	 * Preload the radix tree so we can insert safely under the
319	 * write spinlock. Note that we cannot sleep inside the preload
320	 * region. Since we can be called from transaction context, don't
321	 * recurse into the file system.
322	 */
323	if (radix_tree_preload(GFP_NOFS)) {
324		error = EAGAIN;
325		goto out_destroy;
326	}
327
328	/*
329	 * Because the inode hasn't been added to the radix-tree yet it can't
330	 * be found by another thread, so we can do the non-sleeping lock here.
331	 */
332	if (lock_flags) {
333		if (!xfs_ilock_nowait(ip, lock_flags))
334			BUG();
335	}
336
337	/*
338	 * These values must be set before inserting the inode into the radix
339	 * tree as the moment it is inserted a concurrent lookup (allowed by the
340	 * RCU locking mechanism) can find it and that lookup must see that this
341	 * is an inode currently under construction (i.e. that XFS_INEW is set).
342	 * The ip->i_flags_lock that protects the XFS_INEW flag forms the
343	 * memory barrier that ensures this detection works correctly at lookup
344	 * time.
345	 */
346	iflags = XFS_INEW;
347	if (flags & XFS_IGET_DONTCACHE)
348		iflags |= XFS_IDONTCACHE;
349	ip->i_udquot = ip->i_gdquot = NULL;
350	xfs_iflags_set(ip, iflags);
351
352	/* insert the new inode */
353	spin_lock(&pag->pag_ici_lock);
354	error = radix_tree_insert(&pag->pag_ici_root, agino, ip);
355	if (unlikely(error)) {
356		WARN_ON(error != -EEXIST);
357		XFS_STATS_INC(xs_ig_dup);
358		error = EAGAIN;
359		goto out_preload_end;
360	}
361	spin_unlock(&pag->pag_ici_lock);
362	radix_tree_preload_end();
363
364	*ipp = ip;
365	return 0;
366
367out_preload_end:
368	spin_unlock(&pag->pag_ici_lock);
369	radix_tree_preload_end();
370	if (lock_flags)
371		xfs_iunlock(ip, lock_flags);
372out_destroy:
373	__destroy_inode(VFS_I(ip));
374	xfs_inode_free(ip);
375	return error;
376}
377
378/*
379 * Look up an inode by number in the given file system.
380 * The inode is looked up in the cache held in each AG.
381 * If the inode is found in the cache, initialise the vfs inode
382 * if necessary.
383 *
384 * If it is not in core, read it in from the file system's device,
385 * add it to the cache and initialise the vfs inode.
386 *
387 * The inode is locked according to the value of the lock_flags parameter.
388 * This flag parameter indicates how and if the inode's IO lock and inode lock
389 * should be taken.
390 *
391 * mp -- the mount point structure for the current file system.  It points
392 *       to the inode hash table.
393 * tp -- a pointer to the current transaction if there is one.  This is
394 *       simply passed through to the xfs_iread() call.
395 * ino -- the number of the inode desired.  This is the unique identifier
396 *        within the file system for the inode being requested.
397 * lock_flags -- flags indicating how to lock the inode.  See the comment
398 *		 for xfs_ilock() for a list of valid values.
399 */
400int
401xfs_iget(
402	xfs_mount_t	*mp,
403	xfs_trans_t	*tp,
404	xfs_ino_t	ino,
405	uint		flags,
406	uint		lock_flags,
407	xfs_inode_t	**ipp)
408{
409	xfs_inode_t	*ip;
410	int		error;
411	xfs_perag_t	*pag;
412	xfs_agino_t	agino;
413
414	/*
415	 * xfs_reclaim_inode() uses the ILOCK to ensure an inode
416	 * doesn't get freed while it's being referenced during a
417	 * radix tree traversal here.  It assumes this function
418	 * aqcuires only the ILOCK (and therefore it has no need to
419	 * involve the IOLOCK in this synchronization).
420	 */
421	ASSERT((lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) == 0);
422
423	/* reject inode numbers outside existing AGs */
424	if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
425		return EINVAL;
426
427	/* get the perag structure and ensure that it's inode capable */
428	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino));
429	agino = XFS_INO_TO_AGINO(mp, ino);
430
431again:
432	error = 0;
433	rcu_read_lock();
434	ip = radix_tree_lookup(&pag->pag_ici_root, agino);
435
436	if (ip) {
437		error = xfs_iget_cache_hit(pag, ip, ino, flags, lock_flags);
438		if (error)
439			goto out_error_or_again;
440	} else {
441		rcu_read_unlock();
442		XFS_STATS_INC(xs_ig_missed);
443
444		error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip,
445							flags, lock_flags);
446		if (error)
447			goto out_error_or_again;
448	}
449	xfs_perag_put(pag);
450
451	*ipp = ip;
452
453	/*
454	 * If we have a real type for an on-disk inode, we can set ops(&unlock)
455	 * now.	 If it's a new inode being created, xfs_ialloc will handle it.
456	 */
457	if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0)
458		xfs_setup_inode(ip);
459	return 0;
460
461out_error_or_again:
462	if (error == EAGAIN) {
463		delay(1);
464		goto again;
465	}
466	xfs_perag_put(pag);
467	return error;
468}
469
470/*
471 * This is a wrapper routine around the xfs_ilock() routine
472 * used to centralize some grungy code.  It is used in places
473 * that wish to lock the inode solely for reading the extents.
474 * The reason these places can't just call xfs_ilock(SHARED)
475 * is that the inode lock also guards to bringing in of the
476 * extents from disk for a file in b-tree format.  If the inode
477 * is in b-tree format, then we need to lock the inode exclusively
478 * until the extents are read in.  Locking it exclusively all
479 * the time would limit our parallelism unnecessarily, though.
480 * What we do instead is check to see if the extents have been
481 * read in yet, and only lock the inode exclusively if they
482 * have not.
483 *
484 * The function returns a value which should be given to the
485 * corresponding xfs_iunlock_map_shared().  This value is
486 * the mode in which the lock was actually taken.
487 */
488uint
489xfs_ilock_map_shared(
490	xfs_inode_t	*ip)
491{
492	uint	lock_mode;
493
494	if ((ip->i_d.di_format == XFS_DINODE_FMT_BTREE) &&
495	    ((ip->i_df.if_flags & XFS_IFEXTENTS) == 0)) {
496		lock_mode = XFS_ILOCK_EXCL;
497	} else {
498		lock_mode = XFS_ILOCK_SHARED;
499	}
500
501	xfs_ilock(ip, lock_mode);
502
503	return lock_mode;
504}
505
506/*
507 * This is simply the unlock routine to go with xfs_ilock_map_shared().
508 * All it does is call xfs_iunlock() with the given lock_mode.
509 */
510void
511xfs_iunlock_map_shared(
512	xfs_inode_t	*ip,
513	unsigned int	lock_mode)
514{
515	xfs_iunlock(ip, lock_mode);
516}
517
518/*
519 * The xfs inode contains 2 locks: a multi-reader lock called the
520 * i_iolock and a multi-reader lock called the i_lock.  This routine
521 * allows either or both of the locks to be obtained.
522 *
523 * The 2 locks should always be ordered so that the IO lock is
524 * obtained first in order to prevent deadlock.
525 *
526 * ip -- the inode being locked
527 * lock_flags -- this parameter indicates the inode's locks
528 *       to be locked.  It can be:
529 *		XFS_IOLOCK_SHARED,
530 *		XFS_IOLOCK_EXCL,
531 *		XFS_ILOCK_SHARED,
532 *		XFS_ILOCK_EXCL,
533 *		XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED,
534 *		XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL,
535 *		XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED,
536 *		XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL
537 */
538void
539xfs_ilock(
540	xfs_inode_t		*ip,
541	uint			lock_flags)
542{
543	/*
544	 * You can't set both SHARED and EXCL for the same lock,
545	 * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
546	 * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
547	 */
548	ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
549	       (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
550	ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
551	       (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
552	ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
553
554	if (lock_flags & XFS_IOLOCK_EXCL)
555		mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
556	else if (lock_flags & XFS_IOLOCK_SHARED)
557		mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
558
559	if (lock_flags & XFS_ILOCK_EXCL)
560		mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
561	else if (lock_flags & XFS_ILOCK_SHARED)
562		mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
563
564	trace_xfs_ilock(ip, lock_flags, _RET_IP_);
565}
566
567/*
568 * This is just like xfs_ilock(), except that the caller
569 * is guaranteed not to sleep.  It returns 1 if it gets
570 * the requested locks and 0 otherwise.  If the IO lock is
571 * obtained but the inode lock cannot be, then the IO lock
572 * is dropped before returning.
573 *
574 * ip -- the inode being locked
575 * lock_flags -- this parameter indicates the inode's locks to be
576 *       to be locked.  See the comment for xfs_ilock() for a list
577 *	 of valid values.
578 */
579int
580xfs_ilock_nowait(
581	xfs_inode_t		*ip,
582	uint			lock_flags)
583{
584	/*
585	 * You can't set both SHARED and EXCL for the same lock,
586	 * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
587	 * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
588	 */
589	ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
590	       (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
591	ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
592	       (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
593	ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
594
595	if (lock_flags & XFS_IOLOCK_EXCL) {
596		if (!mrtryupdate(&ip->i_iolock))
597			goto out;
598	} else if (lock_flags & XFS_IOLOCK_SHARED) {
599		if (!mrtryaccess(&ip->i_iolock))
600			goto out;
601	}
602	if (lock_flags & XFS_ILOCK_EXCL) {
603		if (!mrtryupdate(&ip->i_lock))
604			goto out_undo_iolock;
605	} else if (lock_flags & XFS_ILOCK_SHARED) {
606		if (!mrtryaccess(&ip->i_lock))
607			goto out_undo_iolock;
608	}
609	trace_xfs_ilock_nowait(ip, lock_flags, _RET_IP_);
610	return 1;
611
612 out_undo_iolock:
613	if (lock_flags & XFS_IOLOCK_EXCL)
614		mrunlock_excl(&ip->i_iolock);
615	else if (lock_flags & XFS_IOLOCK_SHARED)
616		mrunlock_shared(&ip->i_iolock);
617 out:
618	return 0;
619}
620
621/*
622 * xfs_iunlock() is used to drop the inode locks acquired with
623 * xfs_ilock() and xfs_ilock_nowait().  The caller must pass
624 * in the flags given to xfs_ilock() or xfs_ilock_nowait() so
625 * that we know which locks to drop.
626 *
627 * ip -- the inode being unlocked
628 * lock_flags -- this parameter indicates the inode's locks to be
629 *       to be unlocked.  See the comment for xfs_ilock() for a list
630 *	 of valid values for this parameter.
631 *
632 */
633void
634xfs_iunlock(
635	xfs_inode_t		*ip,
636	uint			lock_flags)
637{
638	/*
639	 * You can't set both SHARED and EXCL for the same lock,
640	 * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
641	 * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
642	 */
643	ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
644	       (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
645	ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
646	       (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
647	ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
648	ASSERT(lock_flags != 0);
649
650	if (lock_flags & XFS_IOLOCK_EXCL)
651		mrunlock_excl(&ip->i_iolock);
652	else if (lock_flags & XFS_IOLOCK_SHARED)
653		mrunlock_shared(&ip->i_iolock);
654
655	if (lock_flags & XFS_ILOCK_EXCL)
656		mrunlock_excl(&ip->i_lock);
657	else if (lock_flags & XFS_ILOCK_SHARED)
658		mrunlock_shared(&ip->i_lock);
659
660	trace_xfs_iunlock(ip, lock_flags, _RET_IP_);
661}
662
663/*
664 * give up write locks.  the i/o lock cannot be held nested
665 * if it is being demoted.
666 */
667void
668xfs_ilock_demote(
669	xfs_inode_t		*ip,
670	uint			lock_flags)
671{
672	ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL));
673	ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
674
675	if (lock_flags & XFS_ILOCK_EXCL)
676		mrdemote(&ip->i_lock);
677	if (lock_flags & XFS_IOLOCK_EXCL)
678		mrdemote(&ip->i_iolock);
679
680	trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_);
681}
682
683#ifdef DEBUG
684int
685xfs_isilocked(
686	xfs_inode_t		*ip,
687	uint			lock_flags)
688{
689	if (lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) {
690		if (!(lock_flags & XFS_ILOCK_SHARED))
691			return !!ip->i_lock.mr_writer;
692		return rwsem_is_locked(&ip->i_lock.mr_lock);
693	}
694
695	if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
696		if (!(lock_flags & XFS_IOLOCK_SHARED))
697			return !!ip->i_iolock.mr_writer;
698		return rwsem_is_locked(&ip->i_iolock.mr_lock);
699	}
700
701	ASSERT(0);
702	return 0;
703}
704#endif
705
706void
707__xfs_iflock(
708	struct xfs_inode	*ip)
709{
710	wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT);
711	DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT);
712
713	do {
714		prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
715		if (xfs_isiflocked(ip))
716			io_schedule();
717	} while (!xfs_iflock_nowait(ip));
718
719	finish_wait(wq, &wait.wait);
720}