Linux Audio

Check our new training course

Loading...
v4.17
   1/*
   2 *   Copyright (C) International Business Machines Corp., 2000-2005
   3 *   Portions Copyright (C) Christoph Hellwig, 2001-2002
   4 *
   5 *   This program is free software;  you can redistribute it and/or modify
   6 *   it under the terms of the GNU General Public License as published by
   7 *   the Free Software Foundation; either version 2 of the License, or
   8 *   (at your option) any later version.
   9 *
  10 *   This program is distributed in the hope that it will be useful,
  11 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
  12 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  13 *   the GNU General Public License for more details.
  14 *
  15 *   You should have received a copy of the GNU General Public License
  16 *   along with this program;  if not, write to the Free Software
  17 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18 */
  19
  20/*
  21 *	jfs_txnmgr.c: transaction manager
  22 *
  23 * notes:
  24 * transaction starts with txBegin() and ends with txCommit()
  25 * or txAbort().
  26 *
  27 * tlock is acquired at the time of update;
  28 * (obviate scan at commit time for xtree and dtree)
  29 * tlock and mp points to each other;
  30 * (no hashlist for mp -> tlock).
  31 *
  32 * special cases:
  33 * tlock on in-memory inode:
  34 * in-place tlock in the in-memory inode itself;
  35 * converted to page lock by iWrite() at commit time.
  36 *
  37 * tlock during write()/mmap() under anonymous transaction (tid = 0):
  38 * transferred (?) to transaction at commit time.
  39 *
  40 * use the page itself to update allocation maps
  41 * (obviate intermediate replication of allocation/deallocation data)
  42 * hold on to mp+lock thru update of maps
  43 */
  44
  45#include <linux/fs.h>
  46#include <linux/vmalloc.h>
  47#include <linux/completion.h>
  48#include <linux/freezer.h>
  49#include <linux/module.h>
  50#include <linux/moduleparam.h>
  51#include <linux/kthread.h>
  52#include <linux/seq_file.h>
  53#include "jfs_incore.h"
  54#include "jfs_inode.h"
  55#include "jfs_filsys.h"
  56#include "jfs_metapage.h"
  57#include "jfs_dinode.h"
  58#include "jfs_imap.h"
  59#include "jfs_dmap.h"
  60#include "jfs_superblock.h"
  61#include "jfs_debug.h"
  62
  63/*
  64 *	transaction management structures
  65 */
  66static struct {
  67	int freetid;		/* index of a free tid structure */
  68	int freelock;		/* index first free lock word */
  69	wait_queue_head_t freewait;	/* eventlist of free tblock */
  70	wait_queue_head_t freelockwait;	/* eventlist of free tlock */
  71	wait_queue_head_t lowlockwait;	/* eventlist of ample tlocks */
  72	int tlocksInUse;	/* Number of tlocks in use */
  73	spinlock_t LazyLock;	/* synchronize sync_queue & unlock_queue */
  74/*	struct tblock *sync_queue; * Transactions waiting for data sync */
  75	struct list_head unlock_queue;	/* Txns waiting to be released */
  76	struct list_head anon_list;	/* inodes having anonymous txns */
  77	struct list_head anon_list2;	/* inodes having anonymous txns
  78					   that couldn't be sync'ed */
  79} TxAnchor;
  80
  81int jfs_tlocks_low;		/* Indicates low number of available tlocks */
  82
  83#ifdef CONFIG_JFS_STATISTICS
  84static struct {
  85	uint txBegin;
  86	uint txBegin_barrier;
  87	uint txBegin_lockslow;
  88	uint txBegin_freetid;
  89	uint txBeginAnon;
  90	uint txBeginAnon_barrier;
  91	uint txBeginAnon_lockslow;
  92	uint txLockAlloc;
  93	uint txLockAlloc_freelock;
  94} TxStat;
  95#endif
  96
  97static int nTxBlock = -1;	/* number of transaction blocks */
  98module_param(nTxBlock, int, 0);
  99MODULE_PARM_DESC(nTxBlock,
 100		 "Number of transaction blocks (max:65536)");
 101
 102static int nTxLock = -1;	/* number of transaction locks */
 103module_param(nTxLock, int, 0);
 104MODULE_PARM_DESC(nTxLock,
 105		 "Number of transaction locks (max:65536)");
 106
 107struct tblock *TxBlock;	/* transaction block table */
 108static int TxLockLWM;	/* Low water mark for number of txLocks used */
 109static int TxLockHWM;	/* High water mark for number of txLocks used */
 110static int TxLockVHWM;	/* Very High water mark */
 111struct tlock *TxLock;	/* transaction lock table */
 112
 113/*
 114 *	transaction management lock
 115 */
 116static DEFINE_SPINLOCK(jfsTxnLock);
 117
 118#define TXN_LOCK()		spin_lock(&jfsTxnLock)
 119#define TXN_UNLOCK()		spin_unlock(&jfsTxnLock)
 120
 121#define LAZY_LOCK_INIT()	spin_lock_init(&TxAnchor.LazyLock);
 122#define LAZY_LOCK(flags)	spin_lock_irqsave(&TxAnchor.LazyLock, flags)
 123#define LAZY_UNLOCK(flags) spin_unlock_irqrestore(&TxAnchor.LazyLock, flags)
 124
 125static DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait);
 126static int jfs_commit_thread_waking;
 127
 128/*
 129 * Retry logic exist outside these macros to protect from spurrious wakeups.
 130 */
 131static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event)
 132{
 133	DECLARE_WAITQUEUE(wait, current);
 134
 135	add_wait_queue(event, &wait);
 136	set_current_state(TASK_UNINTERRUPTIBLE);
 137	TXN_UNLOCK();
 138	io_schedule();
 
 139	remove_wait_queue(event, &wait);
 140}
 141
 142#define TXN_SLEEP(event)\
 143{\
 144	TXN_SLEEP_DROP_LOCK(event);\
 145	TXN_LOCK();\
 146}
 147
 148#define TXN_WAKEUP(event) wake_up_all(event)
 149
 150/*
 151 *	statistics
 152 */
 153static struct {
 154	tid_t maxtid;		/* 4: biggest tid ever used */
 155	lid_t maxlid;		/* 4: biggest lid ever used */
 156	int ntid;		/* 4: # of transactions performed */
 157	int nlid;		/* 4: # of tlocks acquired */
 158	int waitlock;		/* 4: # of tlock wait */
 159} stattx;
 160
 161/*
 162 * forward references
 163 */
 164static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 165		struct tlock * tlck, struct commit * cd);
 166static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 167		struct tlock * tlck);
 168static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 169		struct tlock * tlck);
 170static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 171		struct tlock * tlck);
 172static void txAllocPMap(struct inode *ip, struct maplock * maplock,
 173		struct tblock * tblk);
 174static void txForce(struct tblock * tblk);
 175static int txLog(struct jfs_log * log, struct tblock * tblk,
 176		struct commit * cd);
 177static void txUpdateMap(struct tblock * tblk);
 178static void txRelease(struct tblock * tblk);
 179static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 180	   struct tlock * tlck);
 181static void LogSyncRelease(struct metapage * mp);
 182
 183/*
 184 *		transaction block/lock management
 185 *		---------------------------------
 186 */
 187
 188/*
 189 * Get a transaction lock from the free list.  If the number in use is
 190 * greater than the high water mark, wake up the sync daemon.  This should
 191 * free some anonymous transaction locks.  (TXN_LOCK must be held.)
 192 */
 193static lid_t txLockAlloc(void)
 194{
 195	lid_t lid;
 196
 197	INCREMENT(TxStat.txLockAlloc);
 198	if (!TxAnchor.freelock) {
 199		INCREMENT(TxStat.txLockAlloc_freelock);
 200	}
 201
 202	while (!(lid = TxAnchor.freelock))
 203		TXN_SLEEP(&TxAnchor.freelockwait);
 204	TxAnchor.freelock = TxLock[lid].next;
 205	HIGHWATERMARK(stattx.maxlid, lid);
 206	if ((++TxAnchor.tlocksInUse > TxLockHWM) && (jfs_tlocks_low == 0)) {
 207		jfs_info("txLockAlloc tlocks low");
 208		jfs_tlocks_low = 1;
 209		wake_up_process(jfsSyncThread);
 210	}
 211
 212	return lid;
 213}
 214
 215static void txLockFree(lid_t lid)
 216{
 217	TxLock[lid].tid = 0;
 218	TxLock[lid].next = TxAnchor.freelock;
 219	TxAnchor.freelock = lid;
 220	TxAnchor.tlocksInUse--;
 221	if (jfs_tlocks_low && (TxAnchor.tlocksInUse < TxLockLWM)) {
 222		jfs_info("txLockFree jfs_tlocks_low no more");
 223		jfs_tlocks_low = 0;
 224		TXN_WAKEUP(&TxAnchor.lowlockwait);
 225	}
 226	TXN_WAKEUP(&TxAnchor.freelockwait);
 227}
 228
 229/*
 230 * NAME:	txInit()
 231 *
 232 * FUNCTION:	initialize transaction management structures
 233 *
 234 * RETURN:
 235 *
 236 * serialization: single thread at jfs_init()
 237 */
 238int txInit(void)
 239{
 240	int k, size;
 241	struct sysinfo si;
 242
 243	/* Set defaults for nTxLock and nTxBlock if unset */
 244
 245	if (nTxLock == -1) {
 246		if (nTxBlock == -1) {
 247			/* Base default on memory size */
 248			si_meminfo(&si);
 249			if (si.totalram > (256 * 1024)) /* 1 GB */
 250				nTxLock = 64 * 1024;
 251			else
 252				nTxLock = si.totalram >> 2;
 253		} else if (nTxBlock > (8 * 1024))
 254			nTxLock = 64 * 1024;
 255		else
 256			nTxLock = nTxBlock << 3;
 257	}
 258	if (nTxBlock == -1)
 259		nTxBlock = nTxLock >> 3;
 260
 261	/* Verify tunable parameters */
 262	if (nTxBlock < 16)
 263		nTxBlock = 16;	/* No one should set it this low */
 264	if (nTxBlock > 65536)
 265		nTxBlock = 65536;
 266	if (nTxLock < 256)
 267		nTxLock = 256;	/* No one should set it this low */
 268	if (nTxLock > 65536)
 269		nTxLock = 65536;
 270
 271	printk(KERN_INFO "JFS: nTxBlock = %d, nTxLock = %d\n",
 272	       nTxBlock, nTxLock);
 273	/*
 274	 * initialize transaction block (tblock) table
 275	 *
 276	 * transaction id (tid) = tblock index
 277	 * tid = 0 is reserved.
 278	 */
 279	TxLockLWM = (nTxLock * 4) / 10;
 280	TxLockHWM = (nTxLock * 7) / 10;
 281	TxLockVHWM = (nTxLock * 8) / 10;
 282
 283	size = sizeof(struct tblock) * nTxBlock;
 284	TxBlock = vmalloc(size);
 285	if (TxBlock == NULL)
 286		return -ENOMEM;
 287
 288	for (k = 1; k < nTxBlock - 1; k++) {
 289		TxBlock[k].next = k + 1;
 290		init_waitqueue_head(&TxBlock[k].gcwait);
 291		init_waitqueue_head(&TxBlock[k].waitor);
 292	}
 293	TxBlock[k].next = 0;
 294	init_waitqueue_head(&TxBlock[k].gcwait);
 295	init_waitqueue_head(&TxBlock[k].waitor);
 296
 297	TxAnchor.freetid = 1;
 298	init_waitqueue_head(&TxAnchor.freewait);
 299
 300	stattx.maxtid = 1;	/* statistics */
 301
 302	/*
 303	 * initialize transaction lock (tlock) table
 304	 *
 305	 * transaction lock id = tlock index
 306	 * tlock id = 0 is reserved.
 307	 */
 308	size = sizeof(struct tlock) * nTxLock;
 309	TxLock = vmalloc(size);
 310	if (TxLock == NULL) {
 311		vfree(TxBlock);
 312		return -ENOMEM;
 313	}
 314
 315	/* initialize tlock table */
 316	for (k = 1; k < nTxLock - 1; k++)
 317		TxLock[k].next = k + 1;
 318	TxLock[k].next = 0;
 319	init_waitqueue_head(&TxAnchor.freelockwait);
 320	init_waitqueue_head(&TxAnchor.lowlockwait);
 321
 322	TxAnchor.freelock = 1;
 323	TxAnchor.tlocksInUse = 0;
 324	INIT_LIST_HEAD(&TxAnchor.anon_list);
 325	INIT_LIST_HEAD(&TxAnchor.anon_list2);
 326
 327	LAZY_LOCK_INIT();
 328	INIT_LIST_HEAD(&TxAnchor.unlock_queue);
 329
 330	stattx.maxlid = 1;	/* statistics */
 331
 332	return 0;
 333}
 334
 335/*
 336 * NAME:	txExit()
 337 *
 338 * FUNCTION:	clean up when module is unloaded
 339 */
 340void txExit(void)
 341{
 342	vfree(TxLock);
 343	TxLock = NULL;
 344	vfree(TxBlock);
 345	TxBlock = NULL;
 346}
 347
 348/*
 349 * NAME:	txBegin()
 350 *
 351 * FUNCTION:	start a transaction.
 352 *
 353 * PARAMETER:	sb	- superblock
 354 *		flag	- force for nested tx;
 355 *
 356 * RETURN:	tid	- transaction id
 357 *
 358 * note: flag force allows to start tx for nested tx
 359 * to prevent deadlock on logsync barrier;
 360 */
 361tid_t txBegin(struct super_block *sb, int flag)
 362{
 363	tid_t t;
 364	struct tblock *tblk;
 365	struct jfs_log *log;
 366
 367	jfs_info("txBegin: flag = 0x%x", flag);
 368	log = JFS_SBI(sb)->log;
 369
 370	TXN_LOCK();
 371
 372	INCREMENT(TxStat.txBegin);
 373
 374      retry:
 375	if (!(flag & COMMIT_FORCE)) {
 376		/*
 377		 * synchronize with logsync barrier
 378		 */
 379		if (test_bit(log_SYNCBARRIER, &log->flag) ||
 380		    test_bit(log_QUIESCE, &log->flag)) {
 381			INCREMENT(TxStat.txBegin_barrier);
 382			TXN_SLEEP(&log->syncwait);
 383			goto retry;
 384		}
 385	}
 386	if (flag == 0) {
 387		/*
 388		 * Don't begin transaction if we're getting starved for tlocks
 389		 * unless COMMIT_FORCE or COMMIT_INODE (which may ultimately
 390		 * free tlocks)
 391		 */
 392		if (TxAnchor.tlocksInUse > TxLockVHWM) {
 393			INCREMENT(TxStat.txBegin_lockslow);
 394			TXN_SLEEP(&TxAnchor.lowlockwait);
 395			goto retry;
 396		}
 397	}
 398
 399	/*
 400	 * allocate transaction id/block
 401	 */
 402	if ((t = TxAnchor.freetid) == 0) {
 403		jfs_info("txBegin: waiting for free tid");
 404		INCREMENT(TxStat.txBegin_freetid);
 405		TXN_SLEEP(&TxAnchor.freewait);
 406		goto retry;
 407	}
 408
 409	tblk = tid_to_tblock(t);
 410
 411	if ((tblk->next == 0) && !(flag & COMMIT_FORCE)) {
 412		/* Don't let a non-forced transaction take the last tblk */
 413		jfs_info("txBegin: waiting for free tid");
 414		INCREMENT(TxStat.txBegin_freetid);
 415		TXN_SLEEP(&TxAnchor.freewait);
 416		goto retry;
 417	}
 418
 419	TxAnchor.freetid = tblk->next;
 420
 421	/*
 422	 * initialize transaction
 423	 */
 424
 425	/*
 426	 * We can't zero the whole thing or we screw up another thread being
 427	 * awakened after sleeping on tblk->waitor
 428	 *
 429	 * memset(tblk, 0, sizeof(struct tblock));
 430	 */
 431	tblk->next = tblk->last = tblk->xflag = tblk->flag = tblk->lsn = 0;
 432
 433	tblk->sb = sb;
 434	++log->logtid;
 435	tblk->logtid = log->logtid;
 436
 437	++log->active;
 438
 439	HIGHWATERMARK(stattx.maxtid, t);	/* statistics */
 440	INCREMENT(stattx.ntid);	/* statistics */
 441
 442	TXN_UNLOCK();
 443
 444	jfs_info("txBegin: returning tid = %d", t);
 445
 446	return t;
 447}
 448
 449/*
 450 * NAME:	txBeginAnon()
 451 *
 452 * FUNCTION:	start an anonymous transaction.
 453 *		Blocks if logsync or available tlocks are low to prevent
 454 *		anonymous tlocks from depleting supply.
 455 *
 456 * PARAMETER:	sb	- superblock
 457 *
 458 * RETURN:	none
 459 */
 460void txBeginAnon(struct super_block *sb)
 461{
 462	struct jfs_log *log;
 463
 464	log = JFS_SBI(sb)->log;
 465
 466	TXN_LOCK();
 467	INCREMENT(TxStat.txBeginAnon);
 468
 469      retry:
 470	/*
 471	 * synchronize with logsync barrier
 472	 */
 473	if (test_bit(log_SYNCBARRIER, &log->flag) ||
 474	    test_bit(log_QUIESCE, &log->flag)) {
 475		INCREMENT(TxStat.txBeginAnon_barrier);
 476		TXN_SLEEP(&log->syncwait);
 477		goto retry;
 478	}
 479
 480	/*
 481	 * Don't begin transaction if we're getting starved for tlocks
 482	 */
 483	if (TxAnchor.tlocksInUse > TxLockVHWM) {
 484		INCREMENT(TxStat.txBeginAnon_lockslow);
 485		TXN_SLEEP(&TxAnchor.lowlockwait);
 486		goto retry;
 487	}
 488	TXN_UNLOCK();
 489}
 490
 491/*
 492 *	txEnd()
 493 *
 494 * function: free specified transaction block.
 495 *
 496 *	logsync barrier processing:
 497 *
 498 * serialization:
 499 */
 500void txEnd(tid_t tid)
 501{
 502	struct tblock *tblk = tid_to_tblock(tid);
 503	struct jfs_log *log;
 504
 505	jfs_info("txEnd: tid = %d", tid);
 506	TXN_LOCK();
 507
 508	/*
 509	 * wakeup transactions waiting on the page locked
 510	 * by the current transaction
 511	 */
 512	TXN_WAKEUP(&tblk->waitor);
 513
 514	log = JFS_SBI(tblk->sb)->log;
 515
 516	/*
 517	 * Lazy commit thread can't free this guy until we mark it UNLOCKED,
 518	 * otherwise, we would be left with a transaction that may have been
 519	 * reused.
 520	 *
 521	 * Lazy commit thread will turn off tblkGC_LAZY before calling this
 522	 * routine.
 523	 */
 524	if (tblk->flag & tblkGC_LAZY) {
 525		jfs_info("txEnd called w/lazy tid: %d, tblk = 0x%p", tid, tblk);
 526		TXN_UNLOCK();
 527
 528		spin_lock_irq(&log->gclock);	// LOGGC_LOCK
 529		tblk->flag |= tblkGC_UNLOCKED;
 530		spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
 531		return;
 532	}
 533
 534	jfs_info("txEnd: tid: %d, tblk = 0x%p", tid, tblk);
 535
 536	assert(tblk->next == 0);
 537
 538	/*
 539	 * insert tblock back on freelist
 540	 */
 541	tblk->next = TxAnchor.freetid;
 542	TxAnchor.freetid = tid;
 543
 544	/*
 545	 * mark the tblock not active
 546	 */
 547	if (--log->active == 0) {
 548		clear_bit(log_FLUSH, &log->flag);
 549
 550		/*
 551		 * synchronize with logsync barrier
 552		 */
 553		if (test_bit(log_SYNCBARRIER, &log->flag)) {
 554			TXN_UNLOCK();
 555
 556			/* write dirty metadata & forward log syncpt */
 557			jfs_syncpt(log, 1);
 558
 559			jfs_info("log barrier off: 0x%x", log->lsn);
 560
 561			/* enable new transactions start */
 562			clear_bit(log_SYNCBARRIER, &log->flag);
 563
 564			/* wakeup all waitors for logsync barrier */
 565			TXN_WAKEUP(&log->syncwait);
 566
 567			goto wakeup;
 568		}
 569	}
 570
 571	TXN_UNLOCK();
 572wakeup:
 573	/*
 574	 * wakeup all waitors for a free tblock
 575	 */
 576	TXN_WAKEUP(&TxAnchor.freewait);
 577}
 578
 579/*
 580 *	txLock()
 581 *
 582 * function: acquire a transaction lock on the specified <mp>
 583 *
 584 * parameter:
 585 *
 586 * return:	transaction lock id
 587 *
 588 * serialization:
 589 */
 590struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
 591		     int type)
 592{
 593	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 594	int dir_xtree = 0;
 595	lid_t lid;
 596	tid_t xtid;
 597	struct tlock *tlck;
 598	struct xtlock *xtlck;
 599	struct linelock *linelock;
 600	xtpage_t *p;
 601	struct tblock *tblk;
 602
 603	TXN_LOCK();
 604
 605	if (S_ISDIR(ip->i_mode) && (type & tlckXTREE) &&
 606	    !(mp->xflag & COMMIT_PAGE)) {
 607		/*
 608		 * Directory inode is special.  It can have both an xtree tlock
 609		 * and a dtree tlock associated with it.
 610		 */
 611		dir_xtree = 1;
 612		lid = jfs_ip->xtlid;
 613	} else
 614		lid = mp->lid;
 615
 616	/* is page not locked by a transaction ? */
 617	if (lid == 0)
 618		goto allocateLock;
 619
 620	jfs_info("txLock: tid:%d ip:0x%p mp:0x%p lid:%d", tid, ip, mp, lid);
 621
 622	/* is page locked by the requester transaction ? */
 623	tlck = lid_to_tlock(lid);
 624	if ((xtid = tlck->tid) == tid) {
 625		TXN_UNLOCK();
 626		goto grantLock;
 627	}
 628
 629	/*
 630	 * is page locked by anonymous transaction/lock ?
 631	 *
 632	 * (page update without transaction (i.e., file write) is
 633	 * locked under anonymous transaction tid = 0:
 634	 * anonymous tlocks maintained on anonymous tlock list of
 635	 * the inode of the page and available to all anonymous
 636	 * transactions until txCommit() time at which point
 637	 * they are transferred to the transaction tlock list of
 638	 * the committing transaction of the inode)
 639	 */
 640	if (xtid == 0) {
 641		tlck->tid = tid;
 642		TXN_UNLOCK();
 643		tblk = tid_to_tblock(tid);
 644		/*
 645		 * The order of the tlocks in the transaction is important
 646		 * (during truncate, child xtree pages must be freed before
 647		 * parent's tlocks change the working map).
 648		 * Take tlock off anonymous list and add to tail of
 649		 * transaction list
 650		 *
 651		 * Note:  We really need to get rid of the tid & lid and
 652		 * use list_head's.  This code is getting UGLY!
 653		 */
 654		if (jfs_ip->atlhead == lid) {
 655			if (jfs_ip->atltail == lid) {
 656				/* only anonymous txn.
 657				 * Remove from anon_list
 658				 */
 659				TXN_LOCK();
 660				list_del_init(&jfs_ip->anon_inode_list);
 661				TXN_UNLOCK();
 662			}
 663			jfs_ip->atlhead = tlck->next;
 664		} else {
 665			lid_t last;
 666			for (last = jfs_ip->atlhead;
 667			     lid_to_tlock(last)->next != lid;
 668			     last = lid_to_tlock(last)->next) {
 669				assert(last);
 670			}
 671			lid_to_tlock(last)->next = tlck->next;
 672			if (jfs_ip->atltail == lid)
 673				jfs_ip->atltail = last;
 674		}
 675
 676		/* insert the tlock at tail of transaction tlock list */
 677
 678		if (tblk->next)
 679			lid_to_tlock(tblk->last)->next = lid;
 680		else
 681			tblk->next = lid;
 682		tlck->next = 0;
 683		tblk->last = lid;
 684
 685		goto grantLock;
 686	}
 687
 688	goto waitLock;
 689
 690	/*
 691	 * allocate a tlock
 692	 */
 693      allocateLock:
 694	lid = txLockAlloc();
 695	tlck = lid_to_tlock(lid);
 696
 697	/*
 698	 * initialize tlock
 699	 */
 700	tlck->tid = tid;
 701
 702	TXN_UNLOCK();
 703
 704	/* mark tlock for meta-data page */
 705	if (mp->xflag & COMMIT_PAGE) {
 706
 707		tlck->flag = tlckPAGELOCK;
 708
 709		/* mark the page dirty and nohomeok */
 710		metapage_nohomeok(mp);
 711
 712		jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p",
 713			 mp, mp->nohomeok, tid, tlck);
 714
 715		/* if anonymous transaction, and buffer is on the group
 716		 * commit synclist, mark inode to show this.  This will
 717		 * prevent the buffer from being marked nohomeok for too
 718		 * long a time.
 719		 */
 720		if ((tid == 0) && mp->lsn)
 721			set_cflag(COMMIT_Synclist, ip);
 722	}
 723	/* mark tlock for in-memory inode */
 724	else
 725		tlck->flag = tlckINODELOCK;
 726
 727	if (S_ISDIR(ip->i_mode))
 728		tlck->flag |= tlckDIRECTORY;
 729
 730	tlck->type = 0;
 731
 732	/* bind the tlock and the page */
 733	tlck->ip = ip;
 734	tlck->mp = mp;
 735	if (dir_xtree)
 736		jfs_ip->xtlid = lid;
 737	else
 738		mp->lid = lid;
 739
 740	/*
 741	 * enqueue transaction lock to transaction/inode
 742	 */
 743	/* insert the tlock at tail of transaction tlock list */
 744	if (tid) {
 745		tblk = tid_to_tblock(tid);
 746		if (tblk->next)
 747			lid_to_tlock(tblk->last)->next = lid;
 748		else
 749			tblk->next = lid;
 750		tlck->next = 0;
 751		tblk->last = lid;
 752	}
 753	/* anonymous transaction:
 754	 * insert the tlock at head of inode anonymous tlock list
 755	 */
 756	else {
 757		tlck->next = jfs_ip->atlhead;
 758		jfs_ip->atlhead = lid;
 759		if (tlck->next == 0) {
 760			/* This inode's first anonymous transaction */
 761			jfs_ip->atltail = lid;
 762			TXN_LOCK();
 763			list_add_tail(&jfs_ip->anon_inode_list,
 764				      &TxAnchor.anon_list);
 765			TXN_UNLOCK();
 766		}
 767	}
 768
 769	/* initialize type dependent area for linelock */
 770	linelock = (struct linelock *) & tlck->lock;
 771	linelock->next = 0;
 772	linelock->flag = tlckLINELOCK;
 773	linelock->maxcnt = TLOCKSHORT;
 774	linelock->index = 0;
 775
 776	switch (type & tlckTYPE) {
 777	case tlckDTREE:
 778		linelock->l2linesize = L2DTSLOTSIZE;
 779		break;
 780
 781	case tlckXTREE:
 782		linelock->l2linesize = L2XTSLOTSIZE;
 783
 784		xtlck = (struct xtlock *) linelock;
 785		xtlck->header.offset = 0;
 786		xtlck->header.length = 2;
 787
 788		if (type & tlckNEW) {
 789			xtlck->lwm.offset = XTENTRYSTART;
 790		} else {
 791			if (mp->xflag & COMMIT_PAGE)
 792				p = (xtpage_t *) mp->data;
 793			else
 794				p = &jfs_ip->i_xtroot;
 795			xtlck->lwm.offset =
 796			    le16_to_cpu(p->header.nextindex);
 797		}
 798		xtlck->lwm.length = 0;	/* ! */
 799		xtlck->twm.offset = 0;
 800		xtlck->hwm.offset = 0;
 801
 802		xtlck->index = 2;
 803		break;
 804
 805	case tlckINODE:
 806		linelock->l2linesize = L2INODESLOTSIZE;
 807		break;
 808
 809	case tlckDATA:
 810		linelock->l2linesize = L2DATASLOTSIZE;
 811		break;
 812
 813	default:
 814		jfs_err("UFO tlock:0x%p", tlck);
 815	}
 816
 817	/*
 818	 * update tlock vector
 819	 */
 820      grantLock:
 821	tlck->type |= type;
 822
 823	return tlck;
 824
 825	/*
 826	 * page is being locked by another transaction:
 827	 */
 828      waitLock:
 829	/* Only locks on ipimap or ipaimap should reach here */
 830	/* assert(jfs_ip->fileset == AGGREGATE_I); */
 831	if (jfs_ip->fileset != AGGREGATE_I) {
 832		printk(KERN_ERR "txLock: trying to lock locked page!");
 833		print_hex_dump(KERN_ERR, "ip: ", DUMP_PREFIX_ADDRESS, 16, 4,
 834			       ip, sizeof(*ip), 0);
 835		print_hex_dump(KERN_ERR, "mp: ", DUMP_PREFIX_ADDRESS, 16, 4,
 836			       mp, sizeof(*mp), 0);
 837		print_hex_dump(KERN_ERR, "Locker's tblock: ",
 838			       DUMP_PREFIX_ADDRESS, 16, 4, tid_to_tblock(tid),
 839			       sizeof(struct tblock), 0);
 840		print_hex_dump(KERN_ERR, "Tlock: ", DUMP_PREFIX_ADDRESS, 16, 4,
 841			       tlck, sizeof(*tlck), 0);
 842		BUG();
 843	}
 844	INCREMENT(stattx.waitlock);	/* statistics */
 845	TXN_UNLOCK();
 846	release_metapage(mp);
 847	TXN_LOCK();
 848	xtid = tlck->tid;	/* reacquire after dropping TXN_LOCK */
 849
 850	jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d",
 851		 tid, xtid, lid);
 852
 853	/* Recheck everything since dropping TXN_LOCK */
 854	if (xtid && (tlck->mp == mp) && (mp->lid == lid))
 855		TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor);
 856	else
 857		TXN_UNLOCK();
 858	jfs_info("txLock: awakened     tid = %d, lid = %d", tid, lid);
 859
 860	return NULL;
 861}
 862
 863/*
 864 * NAME:	txRelease()
 865 *
 866 * FUNCTION:	Release buffers associated with transaction locks, but don't
 867 *		mark homeok yet.  The allows other transactions to modify
 868 *		buffers, but won't let them go to disk until commit record
 869 *		actually gets written.
 870 *
 871 * PARAMETER:
 872 *		tblk	-
 873 *
 874 * RETURN:	Errors from subroutines.
 875 */
 876static void txRelease(struct tblock * tblk)
 877{
 878	struct metapage *mp;
 879	lid_t lid;
 880	struct tlock *tlck;
 881
 882	TXN_LOCK();
 883
 884	for (lid = tblk->next; lid; lid = tlck->next) {
 885		tlck = lid_to_tlock(lid);
 886		if ((mp = tlck->mp) != NULL &&
 887		    (tlck->type & tlckBTROOT) == 0) {
 888			assert(mp->xflag & COMMIT_PAGE);
 889			mp->lid = 0;
 890		}
 891	}
 892
 893	/*
 894	 * wakeup transactions waiting on a page locked
 895	 * by the current transaction
 896	 */
 897	TXN_WAKEUP(&tblk->waitor);
 898
 899	TXN_UNLOCK();
 900}
 901
 902/*
 903 * NAME:	txUnlock()
 904 *
 905 * FUNCTION:	Initiates pageout of pages modified by tid in journalled
 906 *		objects and frees their lockwords.
 907 */
 908static void txUnlock(struct tblock * tblk)
 909{
 910	struct tlock *tlck;
 911	struct linelock *linelock;
 912	lid_t lid, next, llid, k;
 913	struct metapage *mp;
 914	struct jfs_log *log;
 915	int difft, diffp;
 916	unsigned long flags;
 917
 918	jfs_info("txUnlock: tblk = 0x%p", tblk);
 919	log = JFS_SBI(tblk->sb)->log;
 920
 921	/*
 922	 * mark page under tlock homeok (its log has been written):
 923	 */
 924	for (lid = tblk->next; lid; lid = next) {
 925		tlck = lid_to_tlock(lid);
 926		next = tlck->next;
 927
 928		jfs_info("unlocking lid = %d, tlck = 0x%p", lid, tlck);
 929
 930		/* unbind page from tlock */
 931		if ((mp = tlck->mp) != NULL &&
 932		    (tlck->type & tlckBTROOT) == 0) {
 933			assert(mp->xflag & COMMIT_PAGE);
 934
 935			/* hold buffer
 936			 */
 937			hold_metapage(mp);
 938
 939			assert(mp->nohomeok > 0);
 940			_metapage_homeok(mp);
 941
 942			/* inherit younger/larger clsn */
 943			LOGSYNC_LOCK(log, flags);
 944			if (mp->clsn) {
 945				logdiff(difft, tblk->clsn, log);
 946				logdiff(diffp, mp->clsn, log);
 947				if (difft > diffp)
 948					mp->clsn = tblk->clsn;
 949			} else
 950				mp->clsn = tblk->clsn;
 951			LOGSYNC_UNLOCK(log, flags);
 952
 953			assert(!(tlck->flag & tlckFREEPAGE));
 954
 955			put_metapage(mp);
 956		}
 957
 958		/* insert tlock, and linelock(s) of the tlock if any,
 959		 * at head of freelist
 960		 */
 961		TXN_LOCK();
 962
 963		llid = ((struct linelock *) & tlck->lock)->next;
 964		while (llid) {
 965			linelock = (struct linelock *) lid_to_tlock(llid);
 966			k = linelock->next;
 967			txLockFree(llid);
 968			llid = k;
 969		}
 970		txLockFree(lid);
 971
 972		TXN_UNLOCK();
 973	}
 974	tblk->next = tblk->last = 0;
 975
 976	/*
 977	 * remove tblock from logsynclist
 978	 * (allocation map pages inherited lsn of tblk and
 979	 * has been inserted in logsync list at txUpdateMap())
 980	 */
 981	if (tblk->lsn) {
 982		LOGSYNC_LOCK(log, flags);
 983		log->count--;
 984		list_del(&tblk->synclist);
 985		LOGSYNC_UNLOCK(log, flags);
 986	}
 987}
 988
 989/*
 990 *	txMaplock()
 991 *
 992 * function: allocate a transaction lock for freed page/entry;
 993 *	for freed page, maplock is used as xtlock/dtlock type;
 994 */
 995struct tlock *txMaplock(tid_t tid, struct inode *ip, int type)
 996{
 997	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 998	lid_t lid;
 999	struct tblock *tblk;
1000	struct tlock *tlck;
1001	struct maplock *maplock;
1002
1003	TXN_LOCK();
1004
1005	/*
1006	 * allocate a tlock
1007	 */
1008	lid = txLockAlloc();
1009	tlck = lid_to_tlock(lid);
1010
1011	/*
1012	 * initialize tlock
1013	 */
1014	tlck->tid = tid;
1015
1016	/* bind the tlock and the object */
1017	tlck->flag = tlckINODELOCK;
1018	if (S_ISDIR(ip->i_mode))
1019		tlck->flag |= tlckDIRECTORY;
1020	tlck->ip = ip;
1021	tlck->mp = NULL;
1022
1023	tlck->type = type;
1024
1025	/*
1026	 * enqueue transaction lock to transaction/inode
1027	 */
1028	/* insert the tlock at tail of transaction tlock list */
1029	if (tid) {
1030		tblk = tid_to_tblock(tid);
1031		if (tblk->next)
1032			lid_to_tlock(tblk->last)->next = lid;
1033		else
1034			tblk->next = lid;
1035		tlck->next = 0;
1036		tblk->last = lid;
1037	}
1038	/* anonymous transaction:
1039	 * insert the tlock at head of inode anonymous tlock list
1040	 */
1041	else {
1042		tlck->next = jfs_ip->atlhead;
1043		jfs_ip->atlhead = lid;
1044		if (tlck->next == 0) {
1045			/* This inode's first anonymous transaction */
1046			jfs_ip->atltail = lid;
1047			list_add_tail(&jfs_ip->anon_inode_list,
1048				      &TxAnchor.anon_list);
1049		}
1050	}
1051
1052	TXN_UNLOCK();
1053
1054	/* initialize type dependent area for maplock */
1055	maplock = (struct maplock *) & tlck->lock;
1056	maplock->next = 0;
1057	maplock->maxcnt = 0;
1058	maplock->index = 0;
1059
1060	return tlck;
1061}
1062
1063/*
1064 *	txLinelock()
1065 *
1066 * function: allocate a transaction lock for log vector list
1067 */
1068struct linelock *txLinelock(struct linelock * tlock)
1069{
1070	lid_t lid;
1071	struct tlock *tlck;
1072	struct linelock *linelock;
1073
1074	TXN_LOCK();
1075
1076	/* allocate a TxLock structure */
1077	lid = txLockAlloc();
1078	tlck = lid_to_tlock(lid);
1079
1080	TXN_UNLOCK();
1081
1082	/* initialize linelock */
1083	linelock = (struct linelock *) tlck;
1084	linelock->next = 0;
1085	linelock->flag = tlckLINELOCK;
1086	linelock->maxcnt = TLOCKLONG;
1087	linelock->index = 0;
1088	if (tlck->flag & tlckDIRECTORY)
1089		linelock->flag |= tlckDIRECTORY;
1090
1091	/* append linelock after tlock */
1092	linelock->next = tlock->next;
1093	tlock->next = lid;
1094
1095	return linelock;
1096}
1097
1098/*
1099 *		transaction commit management
1100 *		-----------------------------
1101 */
1102
1103/*
1104 * NAME:	txCommit()
1105 *
1106 * FUNCTION:	commit the changes to the objects specified in
1107 *		clist.  For journalled segments only the
1108 *		changes of the caller are committed, ie by tid.
1109 *		for non-journalled segments the data are flushed to
1110 *		disk and then the change to the disk inode and indirect
1111 *		blocks committed (so blocks newly allocated to the
1112 *		segment will be made a part of the segment atomically).
1113 *
1114 *		all of the segments specified in clist must be in
1115 *		one file system. no more than 6 segments are needed
1116 *		to handle all unix svcs.
1117 *
1118 *		if the i_nlink field (i.e. disk inode link count)
1119 *		is zero, and the type of inode is a regular file or
1120 *		directory, or symbolic link , the inode is truncated
1121 *		to zero length. the truncation is committed but the
1122 *		VM resources are unaffected until it is closed (see
1123 *		iput and iclose).
1124 *
1125 * PARAMETER:
1126 *
1127 * RETURN:
1128 *
1129 * serialization:
1130 *		on entry the inode lock on each segment is assumed
1131 *		to be held.
1132 *
1133 * i/o error:
1134 */
1135int txCommit(tid_t tid,		/* transaction identifier */
1136	     int nip,		/* number of inodes to commit */
1137	     struct inode **iplist,	/* list of inode to commit */
1138	     int flag)
1139{
1140	int rc = 0;
1141	struct commit cd;
1142	struct jfs_log *log;
1143	struct tblock *tblk;
1144	struct lrd *lrd;
1145	struct inode *ip;
1146	struct jfs_inode_info *jfs_ip;
1147	int k, n;
1148	ino_t top;
1149	struct super_block *sb;
1150
1151	jfs_info("txCommit, tid = %d, flag = %d", tid, flag);
1152	/* is read-only file system ? */
1153	if (isReadOnly(iplist[0])) {
1154		rc = -EROFS;
1155		goto TheEnd;
1156	}
1157
1158	sb = cd.sb = iplist[0]->i_sb;
1159	cd.tid = tid;
1160
1161	if (tid == 0)
1162		tid = txBegin(sb, 0);
1163	tblk = tid_to_tblock(tid);
1164
1165	/*
1166	 * initialize commit structure
1167	 */
1168	log = JFS_SBI(sb)->log;
1169	cd.log = log;
1170
1171	/* initialize log record descriptor in commit */
1172	lrd = &cd.lrd;
1173	lrd->logtid = cpu_to_le32(tblk->logtid);
1174	lrd->backchain = 0;
1175
1176	tblk->xflag |= flag;
1177
1178	if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0)
1179		tblk->xflag |= COMMIT_LAZY;
1180	/*
1181	 *	prepare non-journaled objects for commit
1182	 *
1183	 * flush data pages of non-journaled file
1184	 * to prevent the file getting non-initialized disk blocks
1185	 * in case of crash.
1186	 * (new blocks - )
1187	 */
1188	cd.iplist = iplist;
1189	cd.nip = nip;
1190
1191	/*
1192	 *	acquire transaction lock on (on-disk) inodes
1193	 *
1194	 * update on-disk inode from in-memory inode
1195	 * acquiring transaction locks for AFTER records
1196	 * on the on-disk inode of file object
1197	 *
1198	 * sort the inodes array by inode number in descending order
1199	 * to prevent deadlock when acquiring transaction lock
1200	 * of on-disk inodes on multiple on-disk inode pages by
1201	 * multiple concurrent transactions
1202	 */
1203	for (k = 0; k < cd.nip; k++) {
1204		top = (cd.iplist[k])->i_ino;
1205		for (n = k + 1; n < cd.nip; n++) {
1206			ip = cd.iplist[n];
1207			if (ip->i_ino > top) {
1208				top = ip->i_ino;
1209				cd.iplist[n] = cd.iplist[k];
1210				cd.iplist[k] = ip;
1211			}
1212		}
1213
1214		ip = cd.iplist[k];
1215		jfs_ip = JFS_IP(ip);
1216
1217		/*
1218		 * BUGBUG - This code has temporarily been removed.  The
1219		 * intent is to ensure that any file data is written before
1220		 * the metadata is committed to the journal.  This prevents
1221		 * uninitialized data from appearing in a file after the
1222		 * journal has been replayed.  (The uninitialized data
1223		 * could be sensitive data removed by another user.)
1224		 *
1225		 * The problem now is that we are holding the IWRITELOCK
1226		 * on the inode, and calling filemap_fdatawrite on an
1227		 * unmapped page will cause a deadlock in jfs_get_block.
1228		 *
1229		 * The long term solution is to pare down the use of
1230		 * IWRITELOCK.  We are currently holding it too long.
1231		 * We could also be smarter about which data pages need
1232		 * to be written before the transaction is committed and
1233		 * when we don't need to worry about it at all.
1234		 *
1235		 * if ((!S_ISDIR(ip->i_mode))
1236		 *    && (tblk->flag & COMMIT_DELETE) == 0)
1237		 *	filemap_write_and_wait(ip->i_mapping);
1238		 */
1239
1240		/*
1241		 * Mark inode as not dirty.  It will still be on the dirty
1242		 * inode list, but we'll know not to commit it again unless
1243		 * it gets marked dirty again
1244		 */
1245		clear_cflag(COMMIT_Dirty, ip);
1246
1247		/* inherit anonymous tlock(s) of inode */
1248		if (jfs_ip->atlhead) {
1249			lid_to_tlock(jfs_ip->atltail)->next = tblk->next;
1250			tblk->next = jfs_ip->atlhead;
1251			if (!tblk->last)
1252				tblk->last = jfs_ip->atltail;
1253			jfs_ip->atlhead = jfs_ip->atltail = 0;
1254			TXN_LOCK();
1255			list_del_init(&jfs_ip->anon_inode_list);
1256			TXN_UNLOCK();
1257		}
1258
1259		/*
1260		 * acquire transaction lock on on-disk inode page
1261		 * (become first tlock of the tblk's tlock list)
1262		 */
1263		if (((rc = diWrite(tid, ip))))
1264			goto out;
1265	}
1266
1267	/*
1268	 *	write log records from transaction locks
1269	 *
1270	 * txUpdateMap() resets XAD_NEW in XAD.
1271	 */
1272	if ((rc = txLog(log, tblk, &cd)))
1273		goto TheEnd;
1274
1275	/*
1276	 * Ensure that inode isn't reused before
1277	 * lazy commit thread finishes processing
1278	 */
1279	if (tblk->xflag & COMMIT_DELETE) {
1280		ihold(tblk->u.ip);
1281		/*
1282		 * Avoid a rare deadlock
1283		 *
1284		 * If the inode is locked, we may be blocked in
1285		 * jfs_commit_inode.  If so, we don't want the
1286		 * lazy_commit thread doing the last iput() on the inode
1287		 * since that may block on the locked inode.  Instead,
1288		 * commit the transaction synchronously, so the last iput
1289		 * will be done by the calling thread (or later)
1290		 */
1291		/*
1292		 * I believe this code is no longer needed.  Splitting I_LOCK
1293		 * into two bits, I_NEW and I_SYNC should prevent this
1294		 * deadlock as well.  But since I don't have a JFS testload
1295		 * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done.
1296		 * Joern
1297		 */
1298		if (tblk->u.ip->i_state & I_SYNC)
1299			tblk->xflag &= ~COMMIT_LAZY;
1300	}
1301
1302	ASSERT((!(tblk->xflag & COMMIT_DELETE)) ||
1303	       ((tblk->u.ip->i_nlink == 0) &&
1304		!test_cflag(COMMIT_Nolink, tblk->u.ip)));
1305
1306	/*
1307	 *	write COMMIT log record
1308	 */
1309	lrd->type = cpu_to_le16(LOG_COMMIT);
1310	lrd->length = 0;
1311	lmLog(log, tblk, lrd, NULL);
1312
1313	lmGroupCommit(log, tblk);
1314
1315	/*
1316	 *	- transaction is now committed -
1317	 */
1318
1319	/*
1320	 * force pages in careful update
1321	 * (imap addressing structure update)
1322	 */
1323	if (flag & COMMIT_FORCE)
1324		txForce(tblk);
1325
1326	/*
1327	 *	update allocation map.
1328	 *
1329	 * update inode allocation map and inode:
1330	 * free pager lock on memory object of inode if any.
1331	 * update block allocation map.
1332	 *
1333	 * txUpdateMap() resets XAD_NEW in XAD.
1334	 */
1335	if (tblk->xflag & COMMIT_FORCE)
1336		txUpdateMap(tblk);
1337
1338	/*
1339	 *	free transaction locks and pageout/free pages
1340	 */
1341	txRelease(tblk);
1342
1343	if ((tblk->flag & tblkGC_LAZY) == 0)
1344		txUnlock(tblk);
1345
1346
1347	/*
1348	 *	reset in-memory object state
1349	 */
1350	for (k = 0; k < cd.nip; k++) {
1351		ip = cd.iplist[k];
1352		jfs_ip = JFS_IP(ip);
1353
1354		/*
1355		 * reset in-memory inode state
1356		 */
1357		jfs_ip->bxflag = 0;
1358		jfs_ip->blid = 0;
1359	}
1360
1361      out:
1362	if (rc != 0)
1363		txAbort(tid, 1);
1364
1365      TheEnd:
1366	jfs_info("txCommit: tid = %d, returning %d", tid, rc);
1367	return rc;
1368}
1369
1370/*
1371 * NAME:	txLog()
1372 *
1373 * FUNCTION:	Writes AFTER log records for all lines modified
1374 *		by tid for segments specified by inodes in comdata.
1375 *		Code assumes only WRITELOCKS are recorded in lockwords.
1376 *
1377 * PARAMETERS:
1378 *
1379 * RETURN :
1380 */
1381static int txLog(struct jfs_log * log, struct tblock * tblk, struct commit * cd)
1382{
1383	int rc = 0;
1384	struct inode *ip;
1385	lid_t lid;
1386	struct tlock *tlck;
1387	struct lrd *lrd = &cd->lrd;
1388
1389	/*
1390	 * write log record(s) for each tlock of transaction,
1391	 */
1392	for (lid = tblk->next; lid; lid = tlck->next) {
1393		tlck = lid_to_tlock(lid);
1394
1395		tlck->flag |= tlckLOG;
1396
1397		/* initialize lrd common */
1398		ip = tlck->ip;
1399		lrd->aggregate = cpu_to_le32(JFS_SBI(ip->i_sb)->aggregate);
1400		lrd->log.redopage.fileset = cpu_to_le32(JFS_IP(ip)->fileset);
1401		lrd->log.redopage.inode = cpu_to_le32(ip->i_ino);
1402
1403		/* write log record of page from the tlock */
1404		switch (tlck->type & tlckTYPE) {
1405		case tlckXTREE:
1406			xtLog(log, tblk, lrd, tlck);
1407			break;
1408
1409		case tlckDTREE:
1410			dtLog(log, tblk, lrd, tlck);
1411			break;
1412
1413		case tlckINODE:
1414			diLog(log, tblk, lrd, tlck, cd);
1415			break;
1416
1417		case tlckMAP:
1418			mapLog(log, tblk, lrd, tlck);
1419			break;
1420
1421		case tlckDATA:
1422			dataLog(log, tblk, lrd, tlck);
1423			break;
1424
1425		default:
1426			jfs_err("UFO tlock:0x%p", tlck);
1427		}
1428	}
1429
1430	return rc;
1431}
1432
1433/*
1434 *	diLog()
1435 *
1436 * function:	log inode tlock and format maplock to update bmap;
1437 */
1438static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1439		 struct tlock * tlck, struct commit * cd)
1440{
1441	int rc = 0;
1442	struct metapage *mp;
1443	pxd_t *pxd;
1444	struct pxd_lock *pxdlock;
1445
1446	mp = tlck->mp;
1447
1448	/* initialize as REDOPAGE record format */
1449	lrd->log.redopage.type = cpu_to_le16(LOG_INODE);
1450	lrd->log.redopage.l2linesize = cpu_to_le16(L2INODESLOTSIZE);
1451
1452	pxd = &lrd->log.redopage.pxd;
1453
1454	/*
1455	 *	inode after image
1456	 */
1457	if (tlck->type & tlckENTRY) {
1458		/* log after-image for logredo(): */
1459		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1460		PXDaddress(pxd, mp->index);
1461		PXDlength(pxd,
1462			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1463		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1464
1465		/* mark page as homeward bound */
1466		tlck->flag |= tlckWRITEPAGE;
1467	} else if (tlck->type & tlckFREE) {
1468		/*
1469		 *	free inode extent
1470		 *
1471		 * (pages of the freed inode extent have been invalidated and
1472		 * a maplock for free of the extent has been formatted at
1473		 * txLock() time);
1474		 *
1475		 * the tlock had been acquired on the inode allocation map page
1476		 * (iag) that specifies the freed extent, even though the map
1477		 * page is not itself logged, to prevent pageout of the map
1478		 * page before the log;
1479		 */
1480
1481		/* log LOG_NOREDOINOEXT of the freed inode extent for
1482		 * logredo() to start NoRedoPage filters, and to update
1483		 * imap and bmap for free of the extent;
1484		 */
1485		lrd->type = cpu_to_le16(LOG_NOREDOINOEXT);
1486		/*
1487		 * For the LOG_NOREDOINOEXT record, we need
1488		 * to pass the IAG number and inode extent
1489		 * index (within that IAG) from which the
1490		 * the extent being released.  These have been
1491		 * passed to us in the iplist[1] and iplist[2].
1492		 */
1493		lrd->log.noredoinoext.iagnum =
1494		    cpu_to_le32((u32) (size_t) cd->iplist[1]);
1495		lrd->log.noredoinoext.inoext_idx =
1496		    cpu_to_le32((u32) (size_t) cd->iplist[2]);
1497
1498		pxdlock = (struct pxd_lock *) & tlck->lock;
1499		*pxd = pxdlock->pxd;
1500		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1501
1502		/* update bmap */
1503		tlck->flag |= tlckUPDATEMAP;
1504
1505		/* mark page as homeward bound */
1506		tlck->flag |= tlckWRITEPAGE;
1507	} else
1508		jfs_err("diLog: UFO type tlck:0x%p", tlck);
1509#ifdef  _JFS_WIP
1510	/*
1511	 *	alloc/free external EA extent
1512	 *
1513	 * a maplock for txUpdateMap() to update bPWMAP for alloc/free
1514	 * of the extent has been formatted at txLock() time;
1515	 */
1516	else {
1517		assert(tlck->type & tlckEA);
1518
1519		/* log LOG_UPDATEMAP for logredo() to update bmap for
1520		 * alloc of new (and free of old) external EA extent;
1521		 */
1522		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1523		pxdlock = (struct pxd_lock *) & tlck->lock;
1524		nlock = pxdlock->index;
1525		for (i = 0; i < nlock; i++, pxdlock++) {
1526			if (pxdlock->flag & mlckALLOCPXD)
1527				lrd->log.updatemap.type =
1528				    cpu_to_le16(LOG_ALLOCPXD);
1529			else
1530				lrd->log.updatemap.type =
1531				    cpu_to_le16(LOG_FREEPXD);
1532			lrd->log.updatemap.nxd = cpu_to_le16(1);
1533			lrd->log.updatemap.pxd = pxdlock->pxd;
1534			lrd->backchain =
1535			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1536		}
1537
1538		/* update bmap */
1539		tlck->flag |= tlckUPDATEMAP;
1540	}
1541#endif				/* _JFS_WIP */
1542
1543	return rc;
1544}
1545
1546/*
1547 *	dataLog()
1548 *
1549 * function:	log data tlock
1550 */
1551static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1552	    struct tlock * tlck)
1553{
1554	struct metapage *mp;
1555	pxd_t *pxd;
1556
1557	mp = tlck->mp;
1558
1559	/* initialize as REDOPAGE record format */
1560	lrd->log.redopage.type = cpu_to_le16(LOG_DATA);
1561	lrd->log.redopage.l2linesize = cpu_to_le16(L2DATASLOTSIZE);
1562
1563	pxd = &lrd->log.redopage.pxd;
1564
1565	/* log after-image for logredo(): */
1566	lrd->type = cpu_to_le16(LOG_REDOPAGE);
1567
1568	if (jfs_dirtable_inline(tlck->ip)) {
1569		/*
1570		 * The table has been truncated, we've must have deleted
1571		 * the last entry, so don't bother logging this
1572		 */
1573		mp->lid = 0;
1574		grab_metapage(mp);
1575		metapage_homeok(mp);
1576		discard_metapage(mp);
1577		tlck->mp = NULL;
1578		return 0;
1579	}
1580
1581	PXDaddress(pxd, mp->index);
1582	PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits);
1583
1584	lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1585
1586	/* mark page as homeward bound */
1587	tlck->flag |= tlckWRITEPAGE;
1588
1589	return 0;
1590}
1591
1592/*
1593 *	dtLog()
1594 *
1595 * function:	log dtree tlock and format maplock to update bmap;
1596 */
1597static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1598	   struct tlock * tlck)
1599{
1600	struct metapage *mp;
1601	struct pxd_lock *pxdlock;
1602	pxd_t *pxd;
1603
1604	mp = tlck->mp;
1605
1606	/* initialize as REDOPAGE/NOREDOPAGE record format */
1607	lrd->log.redopage.type = cpu_to_le16(LOG_DTREE);
1608	lrd->log.redopage.l2linesize = cpu_to_le16(L2DTSLOTSIZE);
1609
1610	pxd = &lrd->log.redopage.pxd;
1611
1612	if (tlck->type & tlckBTROOT)
1613		lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
1614
1615	/*
1616	 *	page extension via relocation: entry insertion;
1617	 *	page extension in-place: entry insertion;
1618	 *	new right page from page split, reinitialized in-line
1619	 *	root from root page split: entry insertion;
1620	 */
1621	if (tlck->type & (tlckNEW | tlckEXTEND)) {
1622		/* log after-image of the new page for logredo():
1623		 * mark log (LOG_NEW) for logredo() to initialize
1624		 * freelist and update bmap for alloc of the new page;
1625		 */
1626		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1627		if (tlck->type & tlckEXTEND)
1628			lrd->log.redopage.type |= cpu_to_le16(LOG_EXTEND);
1629		else
1630			lrd->log.redopage.type |= cpu_to_le16(LOG_NEW);
1631		PXDaddress(pxd, mp->index);
1632		PXDlength(pxd,
1633			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1634		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1635
1636		/* format a maplock for txUpdateMap() to update bPMAP for
1637		 * alloc of the new page;
1638		 */
1639		if (tlck->type & tlckBTROOT)
1640			return;
1641		tlck->flag |= tlckUPDATEMAP;
1642		pxdlock = (struct pxd_lock *) & tlck->lock;
1643		pxdlock->flag = mlckALLOCPXD;
1644		pxdlock->pxd = *pxd;
1645
1646		pxdlock->index = 1;
1647
1648		/* mark page as homeward bound */
1649		tlck->flag |= tlckWRITEPAGE;
1650		return;
1651	}
1652
1653	/*
1654	 *	entry insertion/deletion,
1655	 *	sibling page link update (old right page before split);
1656	 */
1657	if (tlck->type & (tlckENTRY | tlckRELINK)) {
1658		/* log after-image for logredo(): */
1659		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1660		PXDaddress(pxd, mp->index);
1661		PXDlength(pxd,
1662			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1663		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1664
1665		/* mark page as homeward bound */
1666		tlck->flag |= tlckWRITEPAGE;
1667		return;
1668	}
1669
1670	/*
1671	 *	page deletion: page has been invalidated
1672	 *	page relocation: source extent
1673	 *
1674	 *	a maplock for free of the page has been formatted
1675	 *	at txLock() time);
1676	 */
1677	if (tlck->type & (tlckFREE | tlckRELOCATE)) {
1678		/* log LOG_NOREDOPAGE of the deleted page for logredo()
1679		 * to start NoRedoPage filter and to update bmap for free
1680		 * of the deletd page
1681		 */
1682		lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
1683		pxdlock = (struct pxd_lock *) & tlck->lock;
1684		*pxd = pxdlock->pxd;
1685		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1686
1687		/* a maplock for txUpdateMap() for free of the page
1688		 * has been formatted at txLock() time;
1689		 */
1690		tlck->flag |= tlckUPDATEMAP;
1691	}
1692	return;
1693}
1694
1695/*
1696 *	xtLog()
1697 *
1698 * function:	log xtree tlock and format maplock to update bmap;
1699 */
1700static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1701	   struct tlock * tlck)
1702{
1703	struct inode *ip;
1704	struct metapage *mp;
1705	xtpage_t *p;
1706	struct xtlock *xtlck;
1707	struct maplock *maplock;
1708	struct xdlistlock *xadlock;
1709	struct pxd_lock *pxdlock;
1710	pxd_t *page_pxd;
1711	int next, lwm, hwm;
1712
1713	ip = tlck->ip;
1714	mp = tlck->mp;
1715
1716	/* initialize as REDOPAGE/NOREDOPAGE record format */
1717	lrd->log.redopage.type = cpu_to_le16(LOG_XTREE);
1718	lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE);
1719
1720	page_pxd = &lrd->log.redopage.pxd;
1721
1722	if (tlck->type & tlckBTROOT) {
1723		lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
1724		p = &JFS_IP(ip)->i_xtroot;
1725		if (S_ISDIR(ip->i_mode))
1726			lrd->log.redopage.type |=
1727			    cpu_to_le16(LOG_DIR_XTREE);
1728	} else
1729		p = (xtpage_t *) mp->data;
1730	next = le16_to_cpu(p->header.nextindex);
1731
1732	xtlck = (struct xtlock *) & tlck->lock;
1733
1734	maplock = (struct maplock *) & tlck->lock;
1735	xadlock = (struct xdlistlock *) maplock;
1736
1737	/*
1738	 *	entry insertion/extension;
1739	 *	sibling page link update (old right page before split);
1740	 */
1741	if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) {
1742		/* log after-image for logredo():
1743		 * logredo() will update bmap for alloc of new/extended
1744		 * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from
1745		 * after-image of XADlist;
1746		 * logredo() resets (XAD_NEW|XAD_EXTEND) flag when
1747		 * applying the after-image to the meta-data page.
1748		 */
1749		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1750		PXDaddress(page_pxd, mp->index);
1751		PXDlength(page_pxd,
1752			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1753		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1754
1755		/* format a maplock for txUpdateMap() to update bPMAP
1756		 * for alloc of new/extended extents of XAD[lwm:next)
1757		 * from the page itself;
1758		 * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag.
1759		 */
1760		lwm = xtlck->lwm.offset;
1761		if (lwm == 0)
1762			lwm = XTPAGEMAXSLOT;
1763
1764		if (lwm == next)
1765			goto out;
1766		if (lwm > next) {
1767			jfs_err("xtLog: lwm > next");
1768			goto out;
1769		}
1770		tlck->flag |= tlckUPDATEMAP;
1771		xadlock->flag = mlckALLOCXADLIST;
1772		xadlock->count = next - lwm;
1773		if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) {
1774			int i;
1775			pxd_t *pxd;
1776			/*
1777			 * Lazy commit may allow xtree to be modified before
1778			 * txUpdateMap runs.  Copy xad into linelock to
1779			 * preserve correct data.
1780			 *
1781			 * We can fit twice as may pxd's as xads in the lock
1782			 */
1783			xadlock->flag = mlckALLOCPXDLIST;
1784			pxd = xadlock->xdlist = &xtlck->pxdlock;
1785			for (i = 0; i < xadlock->count; i++) {
1786				PXDaddress(pxd, addressXAD(&p->xad[lwm + i]));
1787				PXDlength(pxd, lengthXAD(&p->xad[lwm + i]));
1788				p->xad[lwm + i].flag &=
1789				    ~(XAD_NEW | XAD_EXTENDED);
1790				pxd++;
1791			}
1792		} else {
1793			/*
1794			 * xdlist will point to into inode's xtree, ensure
1795			 * that transaction is not committed lazily.
1796			 */
1797			xadlock->flag = mlckALLOCXADLIST;
1798			xadlock->xdlist = &p->xad[lwm];
1799			tblk->xflag &= ~COMMIT_LAZY;
1800		}
1801		jfs_info("xtLog: alloc ip:0x%p mp:0x%p tlck:0x%p lwm:%d count:%d",
1802			 tlck->ip, mp, tlck, lwm, xadlock->count);
1803
1804		maplock->index = 1;
1805
1806	      out:
1807		/* mark page as homeward bound */
1808		tlck->flag |= tlckWRITEPAGE;
1809
1810		return;
1811	}
1812
1813	/*
1814	 *	page deletion: file deletion/truncation (ref. xtTruncate())
1815	 *
1816	 * (page will be invalidated after log is written and bmap
1817	 * is updated from the page);
1818	 */
1819	if (tlck->type & tlckFREE) {
1820		/* LOG_NOREDOPAGE log for NoRedoPage filter:
1821		 * if page free from file delete, NoRedoFile filter from
1822		 * inode image of zero link count will subsume NoRedoPage
1823		 * filters for each page;
1824		 * if page free from file truncattion, write NoRedoPage
1825		 * filter;
1826		 *
1827		 * upadte of block allocation map for the page itself:
1828		 * if page free from deletion and truncation, LOG_UPDATEMAP
1829		 * log for the page itself is generated from processing
1830		 * its parent page xad entries;
1831		 */
1832		/* if page free from file truncation, log LOG_NOREDOPAGE
1833		 * of the deleted page for logredo() to start NoRedoPage
1834		 * filter for the page;
1835		 */
1836		if (tblk->xflag & COMMIT_TRUNCATE) {
1837			/* write NOREDOPAGE for the page */
1838			lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
1839			PXDaddress(page_pxd, mp->index);
1840			PXDlength(page_pxd,
1841				  mp->logical_size >> tblk->sb->
1842				  s_blocksize_bits);
1843			lrd->backchain =
1844			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1845
1846			if (tlck->type & tlckBTROOT) {
1847				/* Empty xtree must be logged */
1848				lrd->type = cpu_to_le16(LOG_REDOPAGE);
1849				lrd->backchain =
1850				    cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1851			}
1852		}
1853
1854		/* init LOG_UPDATEMAP of the freed extents
1855		 * XAD[XTENTRYSTART:hwm) from the deleted page itself
1856		 * for logredo() to update bmap;
1857		 */
1858		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1859		lrd->log.updatemap.type = cpu_to_le16(LOG_FREEXADLIST);
1860		xtlck = (struct xtlock *) & tlck->lock;
1861		hwm = xtlck->hwm.offset;
1862		lrd->log.updatemap.nxd =
1863		    cpu_to_le16(hwm - XTENTRYSTART + 1);
1864		/* reformat linelock for lmLog() */
1865		xtlck->header.offset = XTENTRYSTART;
1866		xtlck->header.length = hwm - XTENTRYSTART + 1;
1867		xtlck->index = 1;
1868		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1869
1870		/* format a maplock for txUpdateMap() to update bmap
1871		 * to free extents of XAD[XTENTRYSTART:hwm) from the
1872		 * deleted page itself;
1873		 */
1874		tlck->flag |= tlckUPDATEMAP;
1875		xadlock->count = hwm - XTENTRYSTART + 1;
1876		if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) {
1877			int i;
1878			pxd_t *pxd;
1879			/*
1880			 * Lazy commit may allow xtree to be modified before
1881			 * txUpdateMap runs.  Copy xad into linelock to
1882			 * preserve correct data.
1883			 *
1884			 * We can fit twice as may pxd's as xads in the lock
1885			 */
1886			xadlock->flag = mlckFREEPXDLIST;
1887			pxd = xadlock->xdlist = &xtlck->pxdlock;
1888			for (i = 0; i < xadlock->count; i++) {
1889				PXDaddress(pxd,
1890					addressXAD(&p->xad[XTENTRYSTART + i]));
1891				PXDlength(pxd,
1892					lengthXAD(&p->xad[XTENTRYSTART + i]));
1893				pxd++;
1894			}
1895		} else {
1896			/*
1897			 * xdlist will point to into inode's xtree, ensure
1898			 * that transaction is not committed lazily.
1899			 */
1900			xadlock->flag = mlckFREEXADLIST;
1901			xadlock->xdlist = &p->xad[XTENTRYSTART];
1902			tblk->xflag &= ~COMMIT_LAZY;
1903		}
1904		jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d lwm:2",
1905			 tlck->ip, mp, xadlock->count);
1906
1907		maplock->index = 1;
1908
1909		/* mark page as invalid */
1910		if (((tblk->xflag & COMMIT_PWMAP) || S_ISDIR(ip->i_mode))
1911		    && !(tlck->type & tlckBTROOT))
1912			tlck->flag |= tlckFREEPAGE;
1913		/*
1914		   else (tblk->xflag & COMMIT_PMAP)
1915		   ? release the page;
1916		 */
1917		return;
1918	}
1919
1920	/*
1921	 *	page/entry truncation: file truncation (ref. xtTruncate())
1922	 *
1923	 *	|----------+------+------+---------------|
1924	 *		   |      |      |
1925	 *		   |      |     hwm - hwm before truncation
1926	 *		   |     next - truncation point
1927	 *		  lwm - lwm before truncation
1928	 * header ?
1929	 */
1930	if (tlck->type & tlckTRUNCATE) {
1931		/* This odd declaration suppresses a bogus gcc warning */
1932		pxd_t pxd = pxd;	/* truncated extent of xad */
1933		int twm;
1934
1935		/*
1936		 * For truncation the entire linelock may be used, so it would
1937		 * be difficult to store xad list in linelock itself.
1938		 * Therefore, we'll just force transaction to be committed
1939		 * synchronously, so that xtree pages won't be changed before
1940		 * txUpdateMap runs.
1941		 */
1942		tblk->xflag &= ~COMMIT_LAZY;
1943		lwm = xtlck->lwm.offset;
1944		if (lwm == 0)
1945			lwm = XTPAGEMAXSLOT;
1946		hwm = xtlck->hwm.offset;
1947		twm = xtlck->twm.offset;
1948
1949		/*
1950		 *	write log records
1951		 */
1952		/* log after-image for logredo():
1953		 *
1954		 * logredo() will update bmap for alloc of new/extended
1955		 * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from
1956		 * after-image of XADlist;
1957		 * logredo() resets (XAD_NEW|XAD_EXTEND) flag when
1958		 * applying the after-image to the meta-data page.
1959		 */
1960		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1961		PXDaddress(page_pxd, mp->index);
1962		PXDlength(page_pxd,
1963			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1964		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1965
1966		/*
1967		 * truncate entry XAD[twm == next - 1]:
1968		 */
1969		if (twm == next - 1) {
1970			/* init LOG_UPDATEMAP for logredo() to update bmap for
1971			 * free of truncated delta extent of the truncated
1972			 * entry XAD[next - 1]:
1973			 * (xtlck->pxdlock = truncated delta extent);
1974			 */
1975			pxdlock = (struct pxd_lock *) & xtlck->pxdlock;
1976			/* assert(pxdlock->type & tlckTRUNCATE); */
1977			lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1978			lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD);
1979			lrd->log.updatemap.nxd = cpu_to_le16(1);
1980			lrd->log.updatemap.pxd = pxdlock->pxd;
1981			pxd = pxdlock->pxd;	/* save to format maplock */
1982			lrd->backchain =
1983			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1984		}
1985
1986		/*
1987		 * free entries XAD[next:hwm]:
1988		 */
1989		if (hwm >= next) {
1990			/* init LOG_UPDATEMAP of the freed extents
1991			 * XAD[next:hwm] from the deleted page itself
1992			 * for logredo() to update bmap;
1993			 */
1994			lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1995			lrd->log.updatemap.type =
1996			    cpu_to_le16(LOG_FREEXADLIST);
1997			xtlck = (struct xtlock *) & tlck->lock;
1998			hwm = xtlck->hwm.offset;
1999			lrd->log.updatemap.nxd =
2000			    cpu_to_le16(hwm - next + 1);
2001			/* reformat linelock for lmLog() */
2002			xtlck->header.offset = next;
2003			xtlck->header.length = hwm - next + 1;
2004			xtlck->index = 1;
2005			lrd->backchain =
2006			    cpu_to_le32(lmLog(log, tblk, lrd, tlck));
2007		}
2008
2009		/*
2010		 *	format maplock(s) for txUpdateMap() to update bmap
2011		 */
2012		maplock->index = 0;
2013
2014		/*
2015		 * allocate entries XAD[lwm:next):
2016		 */
2017		if (lwm < next) {
2018			/* format a maplock for txUpdateMap() to update bPMAP
2019			 * for alloc of new/extended extents of XAD[lwm:next)
2020			 * from the page itself;
2021			 * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag.
2022			 */
2023			tlck->flag |= tlckUPDATEMAP;
2024			xadlock->flag = mlckALLOCXADLIST;
2025			xadlock->count = next - lwm;
2026			xadlock->xdlist = &p->xad[lwm];
2027
2028			jfs_info("xtLog: alloc ip:0x%p mp:0x%p count:%d lwm:%d next:%d",
 
2029				 tlck->ip, mp, xadlock->count, lwm, next);
2030			maplock->index++;
2031			xadlock++;
2032		}
2033
2034		/*
2035		 * truncate entry XAD[twm == next - 1]:
2036		 */
2037		if (twm == next - 1) {
2038			/* format a maplock for txUpdateMap() to update bmap
2039			 * to free truncated delta extent of the truncated
2040			 * entry XAD[next - 1];
2041			 * (xtlck->pxdlock = truncated delta extent);
2042			 */
2043			tlck->flag |= tlckUPDATEMAP;
2044			pxdlock = (struct pxd_lock *) xadlock;
2045			pxdlock->flag = mlckFREEPXD;
2046			pxdlock->count = 1;
2047			pxdlock->pxd = pxd;
2048
2049			jfs_info("xtLog: truncate ip:0x%p mp:0x%p count:%d hwm:%d",
2050				 ip, mp, pxdlock->count, hwm);
2051			maplock->index++;
2052			xadlock++;
2053		}
2054
2055		/*
2056		 * free entries XAD[next:hwm]:
2057		 */
2058		if (hwm >= next) {
2059			/* format a maplock for txUpdateMap() to update bmap
2060			 * to free extents of XAD[next:hwm] from thedeleted
2061			 * page itself;
2062			 */
2063			tlck->flag |= tlckUPDATEMAP;
2064			xadlock->flag = mlckFREEXADLIST;
2065			xadlock->count = hwm - next + 1;
2066			xadlock->xdlist = &p->xad[next];
2067
2068			jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d next:%d hwm:%d",
 
2069				 tlck->ip, mp, xadlock->count, next, hwm);
2070			maplock->index++;
2071		}
2072
2073		/* mark page as homeward bound */
2074		tlck->flag |= tlckWRITEPAGE;
2075	}
2076	return;
2077}
2078
2079/*
2080 *	mapLog()
2081 *
2082 * function:	log from maplock of freed data extents;
2083 */
2084static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
2085		   struct tlock * tlck)
2086{
2087	struct pxd_lock *pxdlock;
2088	int i, nlock;
2089	pxd_t *pxd;
2090
2091	/*
2092	 *	page relocation: free the source page extent
2093	 *
2094	 * a maplock for txUpdateMap() for free of the page
2095	 * has been formatted at txLock() time saving the src
2096	 * relocated page address;
2097	 */
2098	if (tlck->type & tlckRELOCATE) {
2099		/* log LOG_NOREDOPAGE of the old relocated page
2100		 * for logredo() to start NoRedoPage filter;
2101		 */
2102		lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
2103		pxdlock = (struct pxd_lock *) & tlck->lock;
2104		pxd = &lrd->log.redopage.pxd;
2105		*pxd = pxdlock->pxd;
2106		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2107
2108		/* (N.B. currently, logredo() does NOT update bmap
2109		 * for free of the page itself for (LOG_XTREE|LOG_NOREDOPAGE);
2110		 * if page free from relocation, LOG_UPDATEMAP log is
2111		 * specifically generated now for logredo()
2112		 * to update bmap for free of src relocated page;
2113		 * (new flag LOG_RELOCATE may be introduced which will
2114		 * inform logredo() to start NORedoPage filter and also
2115		 * update block allocation map at the same time, thus
2116		 * avoiding an extra log write);
2117		 */
2118		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
2119		lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD);
2120		lrd->log.updatemap.nxd = cpu_to_le16(1);
2121		lrd->log.updatemap.pxd = pxdlock->pxd;
2122		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2123
2124		/* a maplock for txUpdateMap() for free of the page
2125		 * has been formatted at txLock() time;
2126		 */
2127		tlck->flag |= tlckUPDATEMAP;
2128		return;
2129	}
2130	/*
2131
2132	 * Otherwise it's not a relocate request
2133	 *
2134	 */
2135	else {
2136		/* log LOG_UPDATEMAP for logredo() to update bmap for
2137		 * free of truncated/relocated delta extent of the data;
2138		 * e.g.: external EA extent, relocated/truncated extent
2139		 * from xtTailgate();
2140		 */
2141		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
2142		pxdlock = (struct pxd_lock *) & tlck->lock;
2143		nlock = pxdlock->index;
2144		for (i = 0; i < nlock; i++, pxdlock++) {
2145			if (pxdlock->flag & mlckALLOCPXD)
2146				lrd->log.updatemap.type =
2147				    cpu_to_le16(LOG_ALLOCPXD);
2148			else
2149				lrd->log.updatemap.type =
2150				    cpu_to_le16(LOG_FREEPXD);
2151			lrd->log.updatemap.nxd = cpu_to_le16(1);
2152			lrd->log.updatemap.pxd = pxdlock->pxd;
2153			lrd->backchain =
2154			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2155			jfs_info("mapLog: xaddr:0x%lx xlen:0x%x",
2156				 (ulong) addressPXD(&pxdlock->pxd),
2157				 lengthPXD(&pxdlock->pxd));
2158		}
2159
2160		/* update bmap */
2161		tlck->flag |= tlckUPDATEMAP;
2162	}
2163}
2164
2165/*
2166 *	txEA()
2167 *
2168 * function:	acquire maplock for EA/ACL extents or
2169 *		set COMMIT_INLINE flag;
2170 */
2171void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea)
2172{
2173	struct tlock *tlck = NULL;
2174	struct pxd_lock *maplock = NULL, *pxdlock = NULL;
2175
2176	/*
2177	 * format maplock for alloc of new EA extent
2178	 */
2179	if (newea) {
2180		/* Since the newea could be a completely zeroed entry we need to
2181		 * check for the two flags which indicate we should actually
2182		 * commit new EA data
2183		 */
2184		if (newea->flag & DXD_EXTENT) {
2185			tlck = txMaplock(tid, ip, tlckMAP);
2186			maplock = (struct pxd_lock *) & tlck->lock;
2187			pxdlock = (struct pxd_lock *) maplock;
2188			pxdlock->flag = mlckALLOCPXD;
2189			PXDaddress(&pxdlock->pxd, addressDXD(newea));
2190			PXDlength(&pxdlock->pxd, lengthDXD(newea));
2191			pxdlock++;
2192			maplock->index = 1;
2193		} else if (newea->flag & DXD_INLINE) {
2194			tlck = NULL;
2195
2196			set_cflag(COMMIT_Inlineea, ip);
2197		}
2198	}
2199
2200	/*
2201	 * format maplock for free of old EA extent
2202	 */
2203	if (!test_cflag(COMMIT_Nolink, ip) && oldea->flag & DXD_EXTENT) {
2204		if (tlck == NULL) {
2205			tlck = txMaplock(tid, ip, tlckMAP);
2206			maplock = (struct pxd_lock *) & tlck->lock;
2207			pxdlock = (struct pxd_lock *) maplock;
2208			maplock->index = 0;
2209		}
2210		pxdlock->flag = mlckFREEPXD;
2211		PXDaddress(&pxdlock->pxd, addressDXD(oldea));
2212		PXDlength(&pxdlock->pxd, lengthDXD(oldea));
2213		maplock->index++;
2214	}
2215}
2216
2217/*
2218 *	txForce()
2219 *
2220 * function: synchronously write pages locked by transaction
2221 *	     after txLog() but before txUpdateMap();
2222 */
2223static void txForce(struct tblock * tblk)
2224{
2225	struct tlock *tlck;
2226	lid_t lid, next;
2227	struct metapage *mp;
2228
2229	/*
2230	 * reverse the order of transaction tlocks in
2231	 * careful update order of address index pages
2232	 * (right to left, bottom up)
2233	 */
2234	tlck = lid_to_tlock(tblk->next);
2235	lid = tlck->next;
2236	tlck->next = 0;
2237	while (lid) {
2238		tlck = lid_to_tlock(lid);
2239		next = tlck->next;
2240		tlck->next = tblk->next;
2241		tblk->next = lid;
2242		lid = next;
2243	}
2244
2245	/*
2246	 * synchronously write the page, and
2247	 * hold the page for txUpdateMap();
2248	 */
2249	for (lid = tblk->next; lid; lid = next) {
2250		tlck = lid_to_tlock(lid);
2251		next = tlck->next;
2252
2253		if ((mp = tlck->mp) != NULL &&
2254		    (tlck->type & tlckBTROOT) == 0) {
2255			assert(mp->xflag & COMMIT_PAGE);
2256
2257			if (tlck->flag & tlckWRITEPAGE) {
2258				tlck->flag &= ~tlckWRITEPAGE;
2259
2260				/* do not release page to freelist */
2261				force_metapage(mp);
2262#if 0
2263				/*
2264				 * The "right" thing to do here is to
2265				 * synchronously write the metadata.
2266				 * With the current implementation this
2267				 * is hard since write_metapage requires
2268				 * us to kunmap & remap the page.  If we
2269				 * have tlocks pointing into the metadata
2270				 * pages, we don't want to do this.  I think
2271				 * we can get by with synchronously writing
2272				 * the pages when they are released.
2273				 */
2274				assert(mp->nohomeok);
2275				set_bit(META_dirty, &mp->flag);
2276				set_bit(META_sync, &mp->flag);
2277#endif
2278			}
2279		}
2280	}
2281}
2282
2283/*
2284 *	txUpdateMap()
2285 *
2286 * function:	update persistent allocation map (and working map
2287 *		if appropriate);
2288 *
2289 * parameter:
2290 */
2291static void txUpdateMap(struct tblock * tblk)
2292{
2293	struct inode *ip;
2294	struct inode *ipimap;
2295	lid_t lid;
2296	struct tlock *tlck;
2297	struct maplock *maplock;
2298	struct pxd_lock pxdlock;
2299	int maptype;
2300	int k, nlock;
2301	struct metapage *mp = NULL;
2302
2303	ipimap = JFS_SBI(tblk->sb)->ipimap;
2304
2305	maptype = (tblk->xflag & COMMIT_PMAP) ? COMMIT_PMAP : COMMIT_PWMAP;
2306
2307
2308	/*
2309	 *	update block allocation map
2310	 *
2311	 * update allocation state in pmap (and wmap) and
2312	 * update lsn of the pmap page;
2313	 */
2314	/*
2315	 * scan each tlock/page of transaction for block allocation/free:
2316	 *
2317	 * for each tlock/page of transaction, update map.
2318	 *  ? are there tlock for pmap and pwmap at the same time ?
2319	 */
2320	for (lid = tblk->next; lid; lid = tlck->next) {
2321		tlck = lid_to_tlock(lid);
2322
2323		if ((tlck->flag & tlckUPDATEMAP) == 0)
2324			continue;
2325
2326		if (tlck->flag & tlckFREEPAGE) {
2327			/*
2328			 * Another thread may attempt to reuse freed space
2329			 * immediately, so we want to get rid of the metapage
2330			 * before anyone else has a chance to get it.
2331			 * Lock metapage, update maps, then invalidate
2332			 * the metapage.
2333			 */
2334			mp = tlck->mp;
2335			ASSERT(mp->xflag & COMMIT_PAGE);
2336			grab_metapage(mp);
2337		}
2338
2339		/*
2340		 * extent list:
2341		 * . in-line PXD list:
2342		 * . out-of-line XAD list:
2343		 */
2344		maplock = (struct maplock *) & tlck->lock;
2345		nlock = maplock->index;
2346
2347		for (k = 0; k < nlock; k++, maplock++) {
2348			/*
2349			 * allocate blocks in persistent map:
2350			 *
2351			 * blocks have been allocated from wmap at alloc time;
2352			 */
2353			if (maplock->flag & mlckALLOC) {
2354				txAllocPMap(ipimap, maplock, tblk);
2355			}
2356			/*
2357			 * free blocks in persistent and working map:
2358			 * blocks will be freed in pmap and then in wmap;
2359			 *
2360			 * ? tblock specifies the PMAP/PWMAP based upon
2361			 * transaction
2362			 *
2363			 * free blocks in persistent map:
2364			 * blocks will be freed from wmap at last reference
2365			 * release of the object for regular files;
2366			 *
2367			 * Alway free blocks from both persistent & working
2368			 * maps for directories
2369			 */
2370			else {	/* (maplock->flag & mlckFREE) */
2371
2372				if (tlck->flag & tlckDIRECTORY)
2373					txFreeMap(ipimap, maplock,
2374						  tblk, COMMIT_PWMAP);
2375				else
2376					txFreeMap(ipimap, maplock,
2377						  tblk, maptype);
2378			}
2379		}
2380		if (tlck->flag & tlckFREEPAGE) {
2381			if (!(tblk->flag & tblkGC_LAZY)) {
2382				/* This is equivalent to txRelease */
2383				ASSERT(mp->lid == lid);
2384				tlck->mp->lid = 0;
2385			}
2386			assert(mp->nohomeok == 1);
2387			metapage_homeok(mp);
2388			discard_metapage(mp);
2389			tlck->mp = NULL;
2390		}
2391	}
2392	/*
2393	 *	update inode allocation map
2394	 *
2395	 * update allocation state in pmap and
2396	 * update lsn of the pmap page;
2397	 * update in-memory inode flag/state
2398	 *
2399	 * unlock mapper/write lock
2400	 */
2401	if (tblk->xflag & COMMIT_CREATE) {
2402		diUpdatePMap(ipimap, tblk->ino, false, tblk);
2403		/* update persistent block allocation map
2404		 * for the allocation of inode extent;
2405		 */
2406		pxdlock.flag = mlckALLOCPXD;
2407		pxdlock.pxd = tblk->u.ixpxd;
2408		pxdlock.index = 1;
2409		txAllocPMap(ipimap, (struct maplock *) & pxdlock, tblk);
2410	} else if (tblk->xflag & COMMIT_DELETE) {
2411		ip = tblk->u.ip;
2412		diUpdatePMap(ipimap, ip->i_ino, true, tblk);
2413		iput(ip);
2414	}
2415}
2416
2417/*
2418 *	txAllocPMap()
2419 *
2420 * function: allocate from persistent map;
2421 *
2422 * parameter:
2423 *	ipbmap	-
2424 *	malock	-
2425 *		xad list:
2426 *		pxd:
2427 *
2428 *	maptype -
2429 *		allocate from persistent map;
2430 *		free from persistent map;
2431 *		(e.g., tmp file - free from working map at releae
2432 *		 of last reference);
2433 *		free from persistent and working map;
2434 *
2435 *	lsn	- log sequence number;
2436 */
2437static void txAllocPMap(struct inode *ip, struct maplock * maplock,
2438			struct tblock * tblk)
2439{
2440	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
2441	struct xdlistlock *xadlistlock;
2442	xad_t *xad;
2443	s64 xaddr;
2444	int xlen;
2445	struct pxd_lock *pxdlock;
2446	struct xdlistlock *pxdlistlock;
2447	pxd_t *pxd;
2448	int n;
2449
2450	/*
2451	 * allocate from persistent map;
2452	 */
2453	if (maplock->flag & mlckALLOCXADLIST) {
2454		xadlistlock = (struct xdlistlock *) maplock;
2455		xad = xadlistlock->xdlist;
2456		for (n = 0; n < xadlistlock->count; n++, xad++) {
2457			if (xad->flag & (XAD_NEW | XAD_EXTENDED)) {
2458				xaddr = addressXAD(xad);
2459				xlen = lengthXAD(xad);
2460				dbUpdatePMap(ipbmap, false, xaddr,
2461					     (s64) xlen, tblk);
2462				xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
2463				jfs_info("allocPMap: xaddr:0x%lx xlen:%d",
2464					 (ulong) xaddr, xlen);
2465			}
2466		}
2467	} else if (maplock->flag & mlckALLOCPXD) {
2468		pxdlock = (struct pxd_lock *) maplock;
2469		xaddr = addressPXD(&pxdlock->pxd);
2470		xlen = lengthPXD(&pxdlock->pxd);
2471		dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen, tblk);
2472		jfs_info("allocPMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen);
2473	} else {		/* (maplock->flag & mlckALLOCPXDLIST) */
2474
2475		pxdlistlock = (struct xdlistlock *) maplock;
2476		pxd = pxdlistlock->xdlist;
2477		for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2478			xaddr = addressPXD(pxd);
2479			xlen = lengthPXD(pxd);
2480			dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen,
2481				     tblk);
2482			jfs_info("allocPMap: xaddr:0x%lx xlen:%d",
2483				 (ulong) xaddr, xlen);
2484		}
2485	}
2486}
2487
2488/*
2489 *	txFreeMap()
2490 *
2491 * function:	free from persistent and/or working map;
2492 *
2493 * todo: optimization
2494 */
2495void txFreeMap(struct inode *ip,
2496	       struct maplock * maplock, struct tblock * tblk, int maptype)
2497{
2498	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
2499	struct xdlistlock *xadlistlock;
2500	xad_t *xad;
2501	s64 xaddr;
2502	int xlen;
2503	struct pxd_lock *pxdlock;
2504	struct xdlistlock *pxdlistlock;
2505	pxd_t *pxd;
2506	int n;
2507
2508	jfs_info("txFreeMap: tblk:0x%p maplock:0x%p maptype:0x%x",
2509		 tblk, maplock, maptype);
2510
2511	/*
2512	 * free from persistent map;
2513	 */
2514	if (maptype == COMMIT_PMAP || maptype == COMMIT_PWMAP) {
2515		if (maplock->flag & mlckFREEXADLIST) {
2516			xadlistlock = (struct xdlistlock *) maplock;
2517			xad = xadlistlock->xdlist;
2518			for (n = 0; n < xadlistlock->count; n++, xad++) {
2519				if (!(xad->flag & XAD_NEW)) {
2520					xaddr = addressXAD(xad);
2521					xlen = lengthXAD(xad);
2522					dbUpdatePMap(ipbmap, true, xaddr,
2523						     (s64) xlen, tblk);
2524					jfs_info("freePMap: xaddr:0x%lx xlen:%d",
 
2525						 (ulong) xaddr, xlen);
2526				}
2527			}
2528		} else if (maplock->flag & mlckFREEPXD) {
2529			pxdlock = (struct pxd_lock *) maplock;
2530			xaddr = addressPXD(&pxdlock->pxd);
2531			xlen = lengthPXD(&pxdlock->pxd);
2532			dbUpdatePMap(ipbmap, true, xaddr, (s64) xlen,
2533				     tblk);
2534			jfs_info("freePMap: xaddr:0x%lx xlen:%d",
2535				 (ulong) xaddr, xlen);
2536		} else {	/* (maplock->flag & mlckALLOCPXDLIST) */
2537
2538			pxdlistlock = (struct xdlistlock *) maplock;
2539			pxd = pxdlistlock->xdlist;
2540			for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2541				xaddr = addressPXD(pxd);
2542				xlen = lengthPXD(pxd);
2543				dbUpdatePMap(ipbmap, true, xaddr,
2544					     (s64) xlen, tblk);
2545				jfs_info("freePMap: xaddr:0x%lx xlen:%d",
2546					 (ulong) xaddr, xlen);
2547			}
2548		}
2549	}
2550
2551	/*
2552	 * free from working map;
2553	 */
2554	if (maptype == COMMIT_PWMAP || maptype == COMMIT_WMAP) {
2555		if (maplock->flag & mlckFREEXADLIST) {
2556			xadlistlock = (struct xdlistlock *) maplock;
2557			xad = xadlistlock->xdlist;
2558			for (n = 0; n < xadlistlock->count; n++, xad++) {
2559				xaddr = addressXAD(xad);
2560				xlen = lengthXAD(xad);
2561				dbFree(ip, xaddr, (s64) xlen);
2562				xad->flag = 0;
2563				jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2564					 (ulong) xaddr, xlen);
2565			}
2566		} else if (maplock->flag & mlckFREEPXD) {
2567			pxdlock = (struct pxd_lock *) maplock;
2568			xaddr = addressPXD(&pxdlock->pxd);
2569			xlen = lengthPXD(&pxdlock->pxd);
2570			dbFree(ip, xaddr, (s64) xlen);
2571			jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2572				 (ulong) xaddr, xlen);
2573		} else {	/* (maplock->flag & mlckFREEPXDLIST) */
2574
2575			pxdlistlock = (struct xdlistlock *) maplock;
2576			pxd = pxdlistlock->xdlist;
2577			for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2578				xaddr = addressPXD(pxd);
2579				xlen = lengthPXD(pxd);
2580				dbFree(ip, xaddr, (s64) xlen);
2581				jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2582					 (ulong) xaddr, xlen);
2583			}
2584		}
2585	}
2586}
2587
2588/*
2589 *	txFreelock()
2590 *
2591 * function:	remove tlock from inode anonymous locklist
2592 */
2593void txFreelock(struct inode *ip)
2594{
2595	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
2596	struct tlock *xtlck, *tlck;
2597	lid_t xlid = 0, lid;
2598
2599	if (!jfs_ip->atlhead)
2600		return;
2601
2602	TXN_LOCK();
2603	xtlck = (struct tlock *) &jfs_ip->atlhead;
2604
2605	while ((lid = xtlck->next) != 0) {
2606		tlck = lid_to_tlock(lid);
2607		if (tlck->flag & tlckFREELOCK) {
2608			xtlck->next = tlck->next;
2609			txLockFree(lid);
2610		} else {
2611			xtlck = tlck;
2612			xlid = lid;
2613		}
2614	}
2615
2616	if (jfs_ip->atlhead)
2617		jfs_ip->atltail = xlid;
2618	else {
2619		jfs_ip->atltail = 0;
2620		/*
2621		 * If inode was on anon_list, remove it
2622		 */
2623		list_del_init(&jfs_ip->anon_inode_list);
2624	}
2625	TXN_UNLOCK();
2626}
2627
2628/*
2629 *	txAbort()
2630 *
2631 * function: abort tx before commit;
2632 *
2633 * frees line-locks and segment locks for all
2634 * segments in comdata structure.
2635 * Optionally sets state of file-system to FM_DIRTY in super-block.
2636 * log age of page-frames in memory for which caller has
2637 * are reset to 0 (to avoid logwarap).
2638 */
2639void txAbort(tid_t tid, int dirty)
2640{
2641	lid_t lid, next;
2642	struct metapage *mp;
2643	struct tblock *tblk = tid_to_tblock(tid);
2644	struct tlock *tlck;
2645
2646	/*
2647	 * free tlocks of the transaction
2648	 */
2649	for (lid = tblk->next; lid; lid = next) {
2650		tlck = lid_to_tlock(lid);
2651		next = tlck->next;
2652		mp = tlck->mp;
2653		JFS_IP(tlck->ip)->xtlid = 0;
2654
2655		if (mp) {
2656			mp->lid = 0;
2657
2658			/*
2659			 * reset lsn of page to avoid logwarap:
2660			 *
2661			 * (page may have been previously committed by another
2662			 * transaction(s) but has not been paged, i.e.,
2663			 * it may be on logsync list even though it has not
2664			 * been logged for the current tx.)
2665			 */
2666			if (mp->xflag & COMMIT_PAGE && mp->lsn)
2667				LogSyncRelease(mp);
2668		}
2669		/* insert tlock at head of freelist */
2670		TXN_LOCK();
2671		txLockFree(lid);
2672		TXN_UNLOCK();
2673	}
2674
2675	/* caller will free the transaction block */
2676
2677	tblk->next = tblk->last = 0;
2678
2679	/*
2680	 * mark filesystem dirty
2681	 */
2682	if (dirty)
2683		jfs_error(tblk->sb, "\n");
2684
2685	return;
2686}
2687
2688/*
2689 *	txLazyCommit(void)
2690 *
2691 *	All transactions except those changing ipimap (COMMIT_FORCE) are
2692 *	processed by this routine.  This insures that the inode and block
2693 *	allocation maps are updated in order.  For synchronous transactions,
2694 *	let the user thread finish processing after txUpdateMap() is called.
2695 */
2696static void txLazyCommit(struct tblock * tblk)
2697{
2698	struct jfs_log *log;
2699
2700	while (((tblk->flag & tblkGC_READY) == 0) &&
2701	       ((tblk->flag & tblkGC_UNLOCKED) == 0)) {
2702		/* We must have gotten ahead of the user thread
2703		 */
2704		jfs_info("jfs_lazycommit: tblk 0x%p not unlocked", tblk);
2705		yield();
2706	}
2707
2708	jfs_info("txLazyCommit: processing tblk 0x%p", tblk);
2709
2710	txUpdateMap(tblk);
2711
2712	log = (struct jfs_log *) JFS_SBI(tblk->sb)->log;
2713
2714	spin_lock_irq(&log->gclock);	// LOGGC_LOCK
2715
2716	tblk->flag |= tblkGC_COMMITTED;
2717
2718	if (tblk->flag & tblkGC_READY)
2719		log->gcrtc--;
2720
2721	wake_up_all(&tblk->gcwait);	// LOGGC_WAKEUP
2722
2723	/*
2724	 * Can't release log->gclock until we've tested tblk->flag
2725	 */
2726	if (tblk->flag & tblkGC_LAZY) {
2727		spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
2728		txUnlock(tblk);
2729		tblk->flag &= ~tblkGC_LAZY;
2730		txEnd(tblk - TxBlock);	/* Convert back to tid */
2731	} else
2732		spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
2733
2734	jfs_info("txLazyCommit: done: tblk = 0x%p", tblk);
2735}
2736
2737/*
2738 *	jfs_lazycommit(void)
2739 *
2740 *	To be run as a kernel daemon.  If lbmIODone is called in an interrupt
2741 *	context, or where blocking is not wanted, this routine will process
2742 *	committed transactions from the unlock queue.
2743 */
2744int jfs_lazycommit(void *arg)
2745{
2746	int WorkDone;
2747	struct tblock *tblk;
2748	unsigned long flags;
2749	struct jfs_sb_info *sbi;
2750
2751	do {
2752		LAZY_LOCK(flags);
2753		jfs_commit_thread_waking = 0;	/* OK to wake another thread */
2754		while (!list_empty(&TxAnchor.unlock_queue)) {
2755			WorkDone = 0;
2756			list_for_each_entry(tblk, &TxAnchor.unlock_queue,
2757					    cqueue) {
2758
2759				sbi = JFS_SBI(tblk->sb);
2760				/*
2761				 * For each volume, the transactions must be
2762				 * handled in order.  If another commit thread
2763				 * is handling a tblk for this superblock,
2764				 * skip it
2765				 */
2766				if (sbi->commit_state & IN_LAZYCOMMIT)
2767					continue;
2768
2769				sbi->commit_state |= IN_LAZYCOMMIT;
2770				WorkDone = 1;
2771
2772				/*
2773				 * Remove transaction from queue
2774				 */
2775				list_del(&tblk->cqueue);
2776
2777				LAZY_UNLOCK(flags);
2778				txLazyCommit(tblk);
2779				LAZY_LOCK(flags);
2780
2781				sbi->commit_state &= ~IN_LAZYCOMMIT;
2782				/*
2783				 * Don't continue in the for loop.  (We can't
2784				 * anyway, it's unsafe!)  We want to go back to
2785				 * the beginning of the list.
2786				 */
2787				break;
2788			}
2789
2790			/* If there was nothing to do, don't continue */
2791			if (!WorkDone)
2792				break;
2793		}
2794		/* In case a wakeup came while all threads were active */
2795		jfs_commit_thread_waking = 0;
2796
2797		if (freezing(current)) {
2798			LAZY_UNLOCK(flags);
2799			try_to_freeze();
2800		} else {
2801			DECLARE_WAITQUEUE(wq, current);
2802
2803			add_wait_queue(&jfs_commit_thread_wait, &wq);
2804			set_current_state(TASK_INTERRUPTIBLE);
2805			LAZY_UNLOCK(flags);
2806			schedule();
 
2807			remove_wait_queue(&jfs_commit_thread_wait, &wq);
2808		}
2809	} while (!kthread_should_stop());
2810
2811	if (!list_empty(&TxAnchor.unlock_queue))
2812		jfs_err("jfs_lazycommit being killed w/pending transactions!");
2813	else
2814		jfs_info("jfs_lazycommit being killed");
2815	return 0;
2816}
2817
2818void txLazyUnlock(struct tblock * tblk)
2819{
2820	unsigned long flags;
2821
2822	LAZY_LOCK(flags);
2823
2824	list_add_tail(&tblk->cqueue, &TxAnchor.unlock_queue);
2825	/*
2826	 * Don't wake up a commit thread if there is already one servicing
2827	 * this superblock, or if the last one we woke up hasn't started yet.
2828	 */
2829	if (!(JFS_SBI(tblk->sb)->commit_state & IN_LAZYCOMMIT) &&
2830	    !jfs_commit_thread_waking) {
2831		jfs_commit_thread_waking = 1;
2832		wake_up(&jfs_commit_thread_wait);
2833	}
2834	LAZY_UNLOCK(flags);
2835}
2836
2837static void LogSyncRelease(struct metapage * mp)
2838{
2839	struct jfs_log *log = mp->log;
2840
2841	assert(mp->nohomeok);
2842	assert(log);
2843	metapage_homeok(mp);
2844}
2845
2846/*
2847 *	txQuiesce
2848 *
2849 *	Block all new transactions and push anonymous transactions to
2850 *	completion
2851 *
2852 *	This does almost the same thing as jfs_sync below.  We don't
2853 *	worry about deadlocking when jfs_tlocks_low is set, since we would
2854 *	expect jfs_sync to get us out of that jam.
2855 */
2856void txQuiesce(struct super_block *sb)
2857{
2858	struct inode *ip;
2859	struct jfs_inode_info *jfs_ip;
2860	struct jfs_log *log = JFS_SBI(sb)->log;
2861	tid_t tid;
2862
2863	set_bit(log_QUIESCE, &log->flag);
2864
2865	TXN_LOCK();
2866restart:
2867	while (!list_empty(&TxAnchor.anon_list)) {
2868		jfs_ip = list_entry(TxAnchor.anon_list.next,
2869				    struct jfs_inode_info,
2870				    anon_inode_list);
2871		ip = &jfs_ip->vfs_inode;
2872
2873		/*
2874		 * inode will be removed from anonymous list
2875		 * when it is committed
2876		 */
2877		TXN_UNLOCK();
2878		tid = txBegin(ip->i_sb, COMMIT_INODE | COMMIT_FORCE);
2879		mutex_lock(&jfs_ip->commit_mutex);
2880		txCommit(tid, 1, &ip, 0);
2881		txEnd(tid);
2882		mutex_unlock(&jfs_ip->commit_mutex);
2883		/*
2884		 * Just to be safe.  I don't know how
2885		 * long we can run without blocking
2886		 */
2887		cond_resched();
2888		TXN_LOCK();
2889	}
2890
2891	/*
2892	 * If jfs_sync is running in parallel, there could be some inodes
2893	 * on anon_list2.  Let's check.
2894	 */
2895	if (!list_empty(&TxAnchor.anon_list2)) {
2896		list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list);
 
2897		goto restart;
2898	}
2899	TXN_UNLOCK();
2900
2901	/*
2902	 * We may need to kick off the group commit
2903	 */
2904	jfs_flush_journal(log, 0);
2905}
2906
2907/*
2908 * txResume()
2909 *
2910 * Allows transactions to start again following txQuiesce
2911 */
2912void txResume(struct super_block *sb)
2913{
2914	struct jfs_log *log = JFS_SBI(sb)->log;
2915
2916	clear_bit(log_QUIESCE, &log->flag);
2917	TXN_WAKEUP(&log->syncwait);
2918}
2919
2920/*
2921 *	jfs_sync(void)
2922 *
2923 *	To be run as a kernel daemon.  This is awakened when tlocks run low.
2924 *	We write any inodes that have anonymous tlocks so they will become
2925 *	available.
2926 */
2927int jfs_sync(void *arg)
2928{
2929	struct inode *ip;
2930	struct jfs_inode_info *jfs_ip;
2931	tid_t tid;
2932
2933	do {
2934		/*
2935		 * write each inode on the anonymous inode list
2936		 */
2937		TXN_LOCK();
2938		while (jfs_tlocks_low && !list_empty(&TxAnchor.anon_list)) {
2939			jfs_ip = list_entry(TxAnchor.anon_list.next,
2940					    struct jfs_inode_info,
2941					    anon_inode_list);
2942			ip = &jfs_ip->vfs_inode;
2943
2944			if (! igrab(ip)) {
2945				/*
2946				 * Inode is being freed
2947				 */
2948				list_del_init(&jfs_ip->anon_inode_list);
2949			} else if (mutex_trylock(&jfs_ip->commit_mutex)) {
2950				/*
2951				 * inode will be removed from anonymous list
2952				 * when it is committed
2953				 */
2954				TXN_UNLOCK();
2955				tid = txBegin(ip->i_sb, COMMIT_INODE);
2956				txCommit(tid, 1, &ip, 0);
2957				txEnd(tid);
2958				mutex_unlock(&jfs_ip->commit_mutex);
2959
2960				iput(ip);
2961				/*
2962				 * Just to be safe.  I don't know how
2963				 * long we can run without blocking
2964				 */
2965				cond_resched();
2966				TXN_LOCK();
2967			} else {
2968				/* We can't get the commit mutex.  It may
2969				 * be held by a thread waiting for tlock's
2970				 * so let's not block here.  Save it to
2971				 * put back on the anon_list.
2972				 */
2973
2974				/* Move from anon_list to anon_list2 */
2975				list_move(&jfs_ip->anon_inode_list,
2976					  &TxAnchor.anon_list2);
 
 
 
2977
2978				TXN_UNLOCK();
2979				iput(ip);
2980				TXN_LOCK();
2981			}
2982		}
2983		/* Add anon_list2 back to anon_list */
2984		list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list);
2985
2986		if (freezing(current)) {
2987			TXN_UNLOCK();
2988			try_to_freeze();
2989		} else {
2990			set_current_state(TASK_INTERRUPTIBLE);
2991			TXN_UNLOCK();
2992			schedule();
 
2993		}
2994	} while (!kthread_should_stop());
2995
2996	jfs_info("jfs_sync being killed");
2997	return 0;
2998}
2999
3000#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG)
3001static int jfs_txanchor_proc_show(struct seq_file *m, void *v)
3002{
3003	char *freewait;
3004	char *freelockwait;
3005	char *lowlockwait;
3006
3007	freewait =
3008	    waitqueue_active(&TxAnchor.freewait) ? "active" : "empty";
3009	freelockwait =
3010	    waitqueue_active(&TxAnchor.freelockwait) ? "active" : "empty";
3011	lowlockwait =
3012	    waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty";
3013
3014	seq_printf(m,
3015		       "JFS TxAnchor\n"
3016		       "============\n"
3017		       "freetid = %d\n"
3018		       "freewait = %s\n"
3019		       "freelock = %d\n"
3020		       "freelockwait = %s\n"
3021		       "lowlockwait = %s\n"
3022		       "tlocksInUse = %d\n"
3023		       "jfs_tlocks_low = %d\n"
3024		       "unlock_queue is %sempty\n",
3025		       TxAnchor.freetid,
3026		       freewait,
3027		       TxAnchor.freelock,
3028		       freelockwait,
3029		       lowlockwait,
3030		       TxAnchor.tlocksInUse,
3031		       jfs_tlocks_low,
3032		       list_empty(&TxAnchor.unlock_queue) ? "" : "not ");
3033	return 0;
3034}
3035
3036static int jfs_txanchor_proc_open(struct inode *inode, struct file *file)
3037{
3038	return single_open(file, jfs_txanchor_proc_show, NULL);
3039}
3040
3041const struct file_operations jfs_txanchor_proc_fops = {
 
3042	.open		= jfs_txanchor_proc_open,
3043	.read		= seq_read,
3044	.llseek		= seq_lseek,
3045	.release	= single_release,
3046};
3047#endif
3048
3049#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS)
3050static int jfs_txstats_proc_show(struct seq_file *m, void *v)
3051{
3052	seq_printf(m,
3053		       "JFS TxStats\n"
3054		       "===========\n"
3055		       "calls to txBegin = %d\n"
3056		       "txBegin blocked by sync barrier = %d\n"
3057		       "txBegin blocked by tlocks low = %d\n"
3058		       "txBegin blocked by no free tid = %d\n"
3059		       "calls to txBeginAnon = %d\n"
3060		       "txBeginAnon blocked by sync barrier = %d\n"
3061		       "txBeginAnon blocked by tlocks low = %d\n"
3062		       "calls to txLockAlloc = %d\n"
3063		       "tLockAlloc blocked by no free lock = %d\n",
3064		       TxStat.txBegin,
3065		       TxStat.txBegin_barrier,
3066		       TxStat.txBegin_lockslow,
3067		       TxStat.txBegin_freetid,
3068		       TxStat.txBeginAnon,
3069		       TxStat.txBeginAnon_barrier,
3070		       TxStat.txBeginAnon_lockslow,
3071		       TxStat.txLockAlloc,
3072		       TxStat.txLockAlloc_freelock);
3073	return 0;
3074}
3075
3076static int jfs_txstats_proc_open(struct inode *inode, struct file *file)
3077{
3078	return single_open(file, jfs_txstats_proc_show, NULL);
3079}
3080
3081const struct file_operations jfs_txstats_proc_fops = {
 
3082	.open		= jfs_txstats_proc_open,
3083	.read		= seq_read,
3084	.llseek		= seq_lseek,
3085	.release	= single_release,
3086};
3087#endif
v3.1
   1/*
   2 *   Copyright (C) International Business Machines Corp., 2000-2005
   3 *   Portions Copyright (C) Christoph Hellwig, 2001-2002
   4 *
   5 *   This program is free software;  you can redistribute it and/or modify
   6 *   it under the terms of the GNU General Public License as published by
   7 *   the Free Software Foundation; either version 2 of the License, or
   8 *   (at your option) any later version.
   9 *
  10 *   This program is distributed in the hope that it will be useful,
  11 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
  12 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  13 *   the GNU General Public License for more details.
  14 *
  15 *   You should have received a copy of the GNU General Public License
  16 *   along with this program;  if not, write to the Free Software
  17 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18 */
  19
  20/*
  21 *	jfs_txnmgr.c: transaction manager
  22 *
  23 * notes:
  24 * transaction starts with txBegin() and ends with txCommit()
  25 * or txAbort().
  26 *
  27 * tlock is acquired at the time of update;
  28 * (obviate scan at commit time for xtree and dtree)
  29 * tlock and mp points to each other;
  30 * (no hashlist for mp -> tlock).
  31 *
  32 * special cases:
  33 * tlock on in-memory inode:
  34 * in-place tlock in the in-memory inode itself;
  35 * converted to page lock by iWrite() at commit time.
  36 *
  37 * tlock during write()/mmap() under anonymous transaction (tid = 0):
  38 * transferred (?) to transaction at commit time.
  39 *
  40 * use the page itself to update allocation maps
  41 * (obviate intermediate replication of allocation/deallocation data)
  42 * hold on to mp+lock thru update of maps
  43 */
  44
  45#include <linux/fs.h>
  46#include <linux/vmalloc.h>
  47#include <linux/completion.h>
  48#include <linux/freezer.h>
  49#include <linux/module.h>
  50#include <linux/moduleparam.h>
  51#include <linux/kthread.h>
  52#include <linux/seq_file.h>
  53#include "jfs_incore.h"
  54#include "jfs_inode.h"
  55#include "jfs_filsys.h"
  56#include "jfs_metapage.h"
  57#include "jfs_dinode.h"
  58#include "jfs_imap.h"
  59#include "jfs_dmap.h"
  60#include "jfs_superblock.h"
  61#include "jfs_debug.h"
  62
  63/*
  64 *	transaction management structures
  65 */
  66static struct {
  67	int freetid;		/* index of a free tid structure */
  68	int freelock;		/* index first free lock word */
  69	wait_queue_head_t freewait;	/* eventlist of free tblock */
  70	wait_queue_head_t freelockwait;	/* eventlist of free tlock */
  71	wait_queue_head_t lowlockwait;	/* eventlist of ample tlocks */
  72	int tlocksInUse;	/* Number of tlocks in use */
  73	spinlock_t LazyLock;	/* synchronize sync_queue & unlock_queue */
  74/*	struct tblock *sync_queue; * Transactions waiting for data sync */
  75	struct list_head unlock_queue;	/* Txns waiting to be released */
  76	struct list_head anon_list;	/* inodes having anonymous txns */
  77	struct list_head anon_list2;	/* inodes having anonymous txns
  78					   that couldn't be sync'ed */
  79} TxAnchor;
  80
  81int jfs_tlocks_low;		/* Indicates low number of available tlocks */
  82
  83#ifdef CONFIG_JFS_STATISTICS
  84static struct {
  85	uint txBegin;
  86	uint txBegin_barrier;
  87	uint txBegin_lockslow;
  88	uint txBegin_freetid;
  89	uint txBeginAnon;
  90	uint txBeginAnon_barrier;
  91	uint txBeginAnon_lockslow;
  92	uint txLockAlloc;
  93	uint txLockAlloc_freelock;
  94} TxStat;
  95#endif
  96
  97static int nTxBlock = -1;	/* number of transaction blocks */
  98module_param(nTxBlock, int, 0);
  99MODULE_PARM_DESC(nTxBlock,
 100		 "Number of transaction blocks (max:65536)");
 101
 102static int nTxLock = -1;	/* number of transaction locks */
 103module_param(nTxLock, int, 0);
 104MODULE_PARM_DESC(nTxLock,
 105		 "Number of transaction locks (max:65536)");
 106
 107struct tblock *TxBlock;	/* transaction block table */
 108static int TxLockLWM;	/* Low water mark for number of txLocks used */
 109static int TxLockHWM;	/* High water mark for number of txLocks used */
 110static int TxLockVHWM;	/* Very High water mark */
 111struct tlock *TxLock;	/* transaction lock table */
 112
 113/*
 114 *	transaction management lock
 115 */
 116static DEFINE_SPINLOCK(jfsTxnLock);
 117
 118#define TXN_LOCK()		spin_lock(&jfsTxnLock)
 119#define TXN_UNLOCK()		spin_unlock(&jfsTxnLock)
 120
 121#define LAZY_LOCK_INIT()	spin_lock_init(&TxAnchor.LazyLock);
 122#define LAZY_LOCK(flags)	spin_lock_irqsave(&TxAnchor.LazyLock, flags)
 123#define LAZY_UNLOCK(flags) spin_unlock_irqrestore(&TxAnchor.LazyLock, flags)
 124
 125static DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait);
 126static int jfs_commit_thread_waking;
 127
 128/*
 129 * Retry logic exist outside these macros to protect from spurrious wakeups.
 130 */
 131static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event)
 132{
 133	DECLARE_WAITQUEUE(wait, current);
 134
 135	add_wait_queue(event, &wait);
 136	set_current_state(TASK_UNINTERRUPTIBLE);
 137	TXN_UNLOCK();
 138	io_schedule();
 139	__set_current_state(TASK_RUNNING);
 140	remove_wait_queue(event, &wait);
 141}
 142
 143#define TXN_SLEEP(event)\
 144{\
 145	TXN_SLEEP_DROP_LOCK(event);\
 146	TXN_LOCK();\
 147}
 148
 149#define TXN_WAKEUP(event) wake_up_all(event)
 150
 151/*
 152 *	statistics
 153 */
 154static struct {
 155	tid_t maxtid;		/* 4: biggest tid ever used */
 156	lid_t maxlid;		/* 4: biggest lid ever used */
 157	int ntid;		/* 4: # of transactions performed */
 158	int nlid;		/* 4: # of tlocks acquired */
 159	int waitlock;		/* 4: # of tlock wait */
 160} stattx;
 161
 162/*
 163 * forward references
 164 */
 165static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 166		struct tlock * tlck, struct commit * cd);
 167static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 168		struct tlock * tlck);
 169static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 170		struct tlock * tlck);
 171static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 172		struct tlock * tlck);
 173static void txAllocPMap(struct inode *ip, struct maplock * maplock,
 174		struct tblock * tblk);
 175static void txForce(struct tblock * tblk);
 176static int txLog(struct jfs_log * log, struct tblock * tblk,
 177		struct commit * cd);
 178static void txUpdateMap(struct tblock * tblk);
 179static void txRelease(struct tblock * tblk);
 180static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 181	   struct tlock * tlck);
 182static void LogSyncRelease(struct metapage * mp);
 183
 184/*
 185 *		transaction block/lock management
 186 *		---------------------------------
 187 */
 188
 189/*
 190 * Get a transaction lock from the free list.  If the number in use is
 191 * greater than the high water mark, wake up the sync daemon.  This should
 192 * free some anonymous transaction locks.  (TXN_LOCK must be held.)
 193 */
 194static lid_t txLockAlloc(void)
 195{
 196	lid_t lid;
 197
 198	INCREMENT(TxStat.txLockAlloc);
 199	if (!TxAnchor.freelock) {
 200		INCREMENT(TxStat.txLockAlloc_freelock);
 201	}
 202
 203	while (!(lid = TxAnchor.freelock))
 204		TXN_SLEEP(&TxAnchor.freelockwait);
 205	TxAnchor.freelock = TxLock[lid].next;
 206	HIGHWATERMARK(stattx.maxlid, lid);
 207	if ((++TxAnchor.tlocksInUse > TxLockHWM) && (jfs_tlocks_low == 0)) {
 208		jfs_info("txLockAlloc tlocks low");
 209		jfs_tlocks_low = 1;
 210		wake_up_process(jfsSyncThread);
 211	}
 212
 213	return lid;
 214}
 215
 216static void txLockFree(lid_t lid)
 217{
 218	TxLock[lid].tid = 0;
 219	TxLock[lid].next = TxAnchor.freelock;
 220	TxAnchor.freelock = lid;
 221	TxAnchor.tlocksInUse--;
 222	if (jfs_tlocks_low && (TxAnchor.tlocksInUse < TxLockLWM)) {
 223		jfs_info("txLockFree jfs_tlocks_low no more");
 224		jfs_tlocks_low = 0;
 225		TXN_WAKEUP(&TxAnchor.lowlockwait);
 226	}
 227	TXN_WAKEUP(&TxAnchor.freelockwait);
 228}
 229
 230/*
 231 * NAME:	txInit()
 232 *
 233 * FUNCTION:	initialize transaction management structures
 234 *
 235 * RETURN:
 236 *
 237 * serialization: single thread at jfs_init()
 238 */
 239int txInit(void)
 240{
 241	int k, size;
 242	struct sysinfo si;
 243
 244	/* Set defaults for nTxLock and nTxBlock if unset */
 245
 246	if (nTxLock == -1) {
 247		if (nTxBlock == -1) {
 248			/* Base default on memory size */
 249			si_meminfo(&si);
 250			if (si.totalram > (256 * 1024)) /* 1 GB */
 251				nTxLock = 64 * 1024;
 252			else
 253				nTxLock = si.totalram >> 2;
 254		} else if (nTxBlock > (8 * 1024))
 255			nTxLock = 64 * 1024;
 256		else
 257			nTxLock = nTxBlock << 3;
 258	}
 259	if (nTxBlock == -1)
 260		nTxBlock = nTxLock >> 3;
 261
 262	/* Verify tunable parameters */
 263	if (nTxBlock < 16)
 264		nTxBlock = 16;	/* No one should set it this low */
 265	if (nTxBlock > 65536)
 266		nTxBlock = 65536;
 267	if (nTxLock < 256)
 268		nTxLock = 256;	/* No one should set it this low */
 269	if (nTxLock > 65536)
 270		nTxLock = 65536;
 271
 272	printk(KERN_INFO "JFS: nTxBlock = %d, nTxLock = %d\n",
 273	       nTxBlock, nTxLock);
 274	/*
 275	 * initialize transaction block (tblock) table
 276	 *
 277	 * transaction id (tid) = tblock index
 278	 * tid = 0 is reserved.
 279	 */
 280	TxLockLWM = (nTxLock * 4) / 10;
 281	TxLockHWM = (nTxLock * 7) / 10;
 282	TxLockVHWM = (nTxLock * 8) / 10;
 283
 284	size = sizeof(struct tblock) * nTxBlock;
 285	TxBlock = vmalloc(size);
 286	if (TxBlock == NULL)
 287		return -ENOMEM;
 288
 289	for (k = 1; k < nTxBlock - 1; k++) {
 290		TxBlock[k].next = k + 1;
 291		init_waitqueue_head(&TxBlock[k].gcwait);
 292		init_waitqueue_head(&TxBlock[k].waitor);
 293	}
 294	TxBlock[k].next = 0;
 295	init_waitqueue_head(&TxBlock[k].gcwait);
 296	init_waitqueue_head(&TxBlock[k].waitor);
 297
 298	TxAnchor.freetid = 1;
 299	init_waitqueue_head(&TxAnchor.freewait);
 300
 301	stattx.maxtid = 1;	/* statistics */
 302
 303	/*
 304	 * initialize transaction lock (tlock) table
 305	 *
 306	 * transaction lock id = tlock index
 307	 * tlock id = 0 is reserved.
 308	 */
 309	size = sizeof(struct tlock) * nTxLock;
 310	TxLock = vmalloc(size);
 311	if (TxLock == NULL) {
 312		vfree(TxBlock);
 313		return -ENOMEM;
 314	}
 315
 316	/* initialize tlock table */
 317	for (k = 1; k < nTxLock - 1; k++)
 318		TxLock[k].next = k + 1;
 319	TxLock[k].next = 0;
 320	init_waitqueue_head(&TxAnchor.freelockwait);
 321	init_waitqueue_head(&TxAnchor.lowlockwait);
 322
 323	TxAnchor.freelock = 1;
 324	TxAnchor.tlocksInUse = 0;
 325	INIT_LIST_HEAD(&TxAnchor.anon_list);
 326	INIT_LIST_HEAD(&TxAnchor.anon_list2);
 327
 328	LAZY_LOCK_INIT();
 329	INIT_LIST_HEAD(&TxAnchor.unlock_queue);
 330
 331	stattx.maxlid = 1;	/* statistics */
 332
 333	return 0;
 334}
 335
 336/*
 337 * NAME:	txExit()
 338 *
 339 * FUNCTION:	clean up when module is unloaded
 340 */
 341void txExit(void)
 342{
 343	vfree(TxLock);
 344	TxLock = NULL;
 345	vfree(TxBlock);
 346	TxBlock = NULL;
 347}
 348
 349/*
 350 * NAME:	txBegin()
 351 *
 352 * FUNCTION:	start a transaction.
 353 *
 354 * PARAMETER:	sb	- superblock
 355 *		flag	- force for nested tx;
 356 *
 357 * RETURN:	tid	- transaction id
 358 *
 359 * note: flag force allows to start tx for nested tx
 360 * to prevent deadlock on logsync barrier;
 361 */
 362tid_t txBegin(struct super_block *sb, int flag)
 363{
 364	tid_t t;
 365	struct tblock *tblk;
 366	struct jfs_log *log;
 367
 368	jfs_info("txBegin: flag = 0x%x", flag);
 369	log = JFS_SBI(sb)->log;
 370
 371	TXN_LOCK();
 372
 373	INCREMENT(TxStat.txBegin);
 374
 375      retry:
 376	if (!(flag & COMMIT_FORCE)) {
 377		/*
 378		 * synchronize with logsync barrier
 379		 */
 380		if (test_bit(log_SYNCBARRIER, &log->flag) ||
 381		    test_bit(log_QUIESCE, &log->flag)) {
 382			INCREMENT(TxStat.txBegin_barrier);
 383			TXN_SLEEP(&log->syncwait);
 384			goto retry;
 385		}
 386	}
 387	if (flag == 0) {
 388		/*
 389		 * Don't begin transaction if we're getting starved for tlocks
 390		 * unless COMMIT_FORCE or COMMIT_INODE (which may ultimately
 391		 * free tlocks)
 392		 */
 393		if (TxAnchor.tlocksInUse > TxLockVHWM) {
 394			INCREMENT(TxStat.txBegin_lockslow);
 395			TXN_SLEEP(&TxAnchor.lowlockwait);
 396			goto retry;
 397		}
 398	}
 399
 400	/*
 401	 * allocate transaction id/block
 402	 */
 403	if ((t = TxAnchor.freetid) == 0) {
 404		jfs_info("txBegin: waiting for free tid");
 405		INCREMENT(TxStat.txBegin_freetid);
 406		TXN_SLEEP(&TxAnchor.freewait);
 407		goto retry;
 408	}
 409
 410	tblk = tid_to_tblock(t);
 411
 412	if ((tblk->next == 0) && !(flag & COMMIT_FORCE)) {
 413		/* Don't let a non-forced transaction take the last tblk */
 414		jfs_info("txBegin: waiting for free tid");
 415		INCREMENT(TxStat.txBegin_freetid);
 416		TXN_SLEEP(&TxAnchor.freewait);
 417		goto retry;
 418	}
 419
 420	TxAnchor.freetid = tblk->next;
 421
 422	/*
 423	 * initialize transaction
 424	 */
 425
 426	/*
 427	 * We can't zero the whole thing or we screw up another thread being
 428	 * awakened after sleeping on tblk->waitor
 429	 *
 430	 * memset(tblk, 0, sizeof(struct tblock));
 431	 */
 432	tblk->next = tblk->last = tblk->xflag = tblk->flag = tblk->lsn = 0;
 433
 434	tblk->sb = sb;
 435	++log->logtid;
 436	tblk->logtid = log->logtid;
 437
 438	++log->active;
 439
 440	HIGHWATERMARK(stattx.maxtid, t);	/* statistics */
 441	INCREMENT(stattx.ntid);	/* statistics */
 442
 443	TXN_UNLOCK();
 444
 445	jfs_info("txBegin: returning tid = %d", t);
 446
 447	return t;
 448}
 449
 450/*
 451 * NAME:	txBeginAnon()
 452 *
 453 * FUNCTION:	start an anonymous transaction.
 454 *		Blocks if logsync or available tlocks are low to prevent
 455 *		anonymous tlocks from depleting supply.
 456 *
 457 * PARAMETER:	sb	- superblock
 458 *
 459 * RETURN:	none
 460 */
 461void txBeginAnon(struct super_block *sb)
 462{
 463	struct jfs_log *log;
 464
 465	log = JFS_SBI(sb)->log;
 466
 467	TXN_LOCK();
 468	INCREMENT(TxStat.txBeginAnon);
 469
 470      retry:
 471	/*
 472	 * synchronize with logsync barrier
 473	 */
 474	if (test_bit(log_SYNCBARRIER, &log->flag) ||
 475	    test_bit(log_QUIESCE, &log->flag)) {
 476		INCREMENT(TxStat.txBeginAnon_barrier);
 477		TXN_SLEEP(&log->syncwait);
 478		goto retry;
 479	}
 480
 481	/*
 482	 * Don't begin transaction if we're getting starved for tlocks
 483	 */
 484	if (TxAnchor.tlocksInUse > TxLockVHWM) {
 485		INCREMENT(TxStat.txBeginAnon_lockslow);
 486		TXN_SLEEP(&TxAnchor.lowlockwait);
 487		goto retry;
 488	}
 489	TXN_UNLOCK();
 490}
 491
 492/*
 493 *	txEnd()
 494 *
 495 * function: free specified transaction block.
 496 *
 497 *	logsync barrier processing:
 498 *
 499 * serialization:
 500 */
 501void txEnd(tid_t tid)
 502{
 503	struct tblock *tblk = tid_to_tblock(tid);
 504	struct jfs_log *log;
 505
 506	jfs_info("txEnd: tid = %d", tid);
 507	TXN_LOCK();
 508
 509	/*
 510	 * wakeup transactions waiting on the page locked
 511	 * by the current transaction
 512	 */
 513	TXN_WAKEUP(&tblk->waitor);
 514
 515	log = JFS_SBI(tblk->sb)->log;
 516
 517	/*
 518	 * Lazy commit thread can't free this guy until we mark it UNLOCKED,
 519	 * otherwise, we would be left with a transaction that may have been
 520	 * reused.
 521	 *
 522	 * Lazy commit thread will turn off tblkGC_LAZY before calling this
 523	 * routine.
 524	 */
 525	if (tblk->flag & tblkGC_LAZY) {
 526		jfs_info("txEnd called w/lazy tid: %d, tblk = 0x%p", tid, tblk);
 527		TXN_UNLOCK();
 528
 529		spin_lock_irq(&log->gclock);	// LOGGC_LOCK
 530		tblk->flag |= tblkGC_UNLOCKED;
 531		spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
 532		return;
 533	}
 534
 535	jfs_info("txEnd: tid: %d, tblk = 0x%p", tid, tblk);
 536
 537	assert(tblk->next == 0);
 538
 539	/*
 540	 * insert tblock back on freelist
 541	 */
 542	tblk->next = TxAnchor.freetid;
 543	TxAnchor.freetid = tid;
 544
 545	/*
 546	 * mark the tblock not active
 547	 */
 548	if (--log->active == 0) {
 549		clear_bit(log_FLUSH, &log->flag);
 550
 551		/*
 552		 * synchronize with logsync barrier
 553		 */
 554		if (test_bit(log_SYNCBARRIER, &log->flag)) {
 555			TXN_UNLOCK();
 556
 557			/* write dirty metadata & forward log syncpt */
 558			jfs_syncpt(log, 1);
 559
 560			jfs_info("log barrier off: 0x%x", log->lsn);
 561
 562			/* enable new transactions start */
 563			clear_bit(log_SYNCBARRIER, &log->flag);
 564
 565			/* wakeup all waitors for logsync barrier */
 566			TXN_WAKEUP(&log->syncwait);
 567
 568			goto wakeup;
 569		}
 570	}
 571
 572	TXN_UNLOCK();
 573wakeup:
 574	/*
 575	 * wakeup all waitors for a free tblock
 576	 */
 577	TXN_WAKEUP(&TxAnchor.freewait);
 578}
 579
 580/*
 581 *	txLock()
 582 *
 583 * function: acquire a transaction lock on the specified <mp>
 584 *
 585 * parameter:
 586 *
 587 * return:	transaction lock id
 588 *
 589 * serialization:
 590 */
 591struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
 592		     int type)
 593{
 594	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 595	int dir_xtree = 0;
 596	lid_t lid;
 597	tid_t xtid;
 598	struct tlock *tlck;
 599	struct xtlock *xtlck;
 600	struct linelock *linelock;
 601	xtpage_t *p;
 602	struct tblock *tblk;
 603
 604	TXN_LOCK();
 605
 606	if (S_ISDIR(ip->i_mode) && (type & tlckXTREE) &&
 607	    !(mp->xflag & COMMIT_PAGE)) {
 608		/*
 609		 * Directory inode is special.  It can have both an xtree tlock
 610		 * and a dtree tlock associated with it.
 611		 */
 612		dir_xtree = 1;
 613		lid = jfs_ip->xtlid;
 614	} else
 615		lid = mp->lid;
 616
 617	/* is page not locked by a transaction ? */
 618	if (lid == 0)
 619		goto allocateLock;
 620
 621	jfs_info("txLock: tid:%d ip:0x%p mp:0x%p lid:%d", tid, ip, mp, lid);
 622
 623	/* is page locked by the requester transaction ? */
 624	tlck = lid_to_tlock(lid);
 625	if ((xtid = tlck->tid) == tid) {
 626		TXN_UNLOCK();
 627		goto grantLock;
 628	}
 629
 630	/*
 631	 * is page locked by anonymous transaction/lock ?
 632	 *
 633	 * (page update without transaction (i.e., file write) is
 634	 * locked under anonymous transaction tid = 0:
 635	 * anonymous tlocks maintained on anonymous tlock list of
 636	 * the inode of the page and available to all anonymous
 637	 * transactions until txCommit() time at which point
 638	 * they are transferred to the transaction tlock list of
 639	 * the committing transaction of the inode)
 640	 */
 641	if (xtid == 0) {
 642		tlck->tid = tid;
 643		TXN_UNLOCK();
 644		tblk = tid_to_tblock(tid);
 645		/*
 646		 * The order of the tlocks in the transaction is important
 647		 * (during truncate, child xtree pages must be freed before
 648		 * parent's tlocks change the working map).
 649		 * Take tlock off anonymous list and add to tail of
 650		 * transaction list
 651		 *
 652		 * Note:  We really need to get rid of the tid & lid and
 653		 * use list_head's.  This code is getting UGLY!
 654		 */
 655		if (jfs_ip->atlhead == lid) {
 656			if (jfs_ip->atltail == lid) {
 657				/* only anonymous txn.
 658				 * Remove from anon_list
 659				 */
 660				TXN_LOCK();
 661				list_del_init(&jfs_ip->anon_inode_list);
 662				TXN_UNLOCK();
 663			}
 664			jfs_ip->atlhead = tlck->next;
 665		} else {
 666			lid_t last;
 667			for (last = jfs_ip->atlhead;
 668			     lid_to_tlock(last)->next != lid;
 669			     last = lid_to_tlock(last)->next) {
 670				assert(last);
 671			}
 672			lid_to_tlock(last)->next = tlck->next;
 673			if (jfs_ip->atltail == lid)
 674				jfs_ip->atltail = last;
 675		}
 676
 677		/* insert the tlock at tail of transaction tlock list */
 678
 679		if (tblk->next)
 680			lid_to_tlock(tblk->last)->next = lid;
 681		else
 682			tblk->next = lid;
 683		tlck->next = 0;
 684		tblk->last = lid;
 685
 686		goto grantLock;
 687	}
 688
 689	goto waitLock;
 690
 691	/*
 692	 * allocate a tlock
 693	 */
 694      allocateLock:
 695	lid = txLockAlloc();
 696	tlck = lid_to_tlock(lid);
 697
 698	/*
 699	 * initialize tlock
 700	 */
 701	tlck->tid = tid;
 702
 703	TXN_UNLOCK();
 704
 705	/* mark tlock for meta-data page */
 706	if (mp->xflag & COMMIT_PAGE) {
 707
 708		tlck->flag = tlckPAGELOCK;
 709
 710		/* mark the page dirty and nohomeok */
 711		metapage_nohomeok(mp);
 712
 713		jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p",
 714			 mp, mp->nohomeok, tid, tlck);
 715
 716		/* if anonymous transaction, and buffer is on the group
 717		 * commit synclist, mark inode to show this.  This will
 718		 * prevent the buffer from being marked nohomeok for too
 719		 * long a time.
 720		 */
 721		if ((tid == 0) && mp->lsn)
 722			set_cflag(COMMIT_Synclist, ip);
 723	}
 724	/* mark tlock for in-memory inode */
 725	else
 726		tlck->flag = tlckINODELOCK;
 727
 728	if (S_ISDIR(ip->i_mode))
 729		tlck->flag |= tlckDIRECTORY;
 730
 731	tlck->type = 0;
 732
 733	/* bind the tlock and the page */
 734	tlck->ip = ip;
 735	tlck->mp = mp;
 736	if (dir_xtree)
 737		jfs_ip->xtlid = lid;
 738	else
 739		mp->lid = lid;
 740
 741	/*
 742	 * enqueue transaction lock to transaction/inode
 743	 */
 744	/* insert the tlock at tail of transaction tlock list */
 745	if (tid) {
 746		tblk = tid_to_tblock(tid);
 747		if (tblk->next)
 748			lid_to_tlock(tblk->last)->next = lid;
 749		else
 750			tblk->next = lid;
 751		tlck->next = 0;
 752		tblk->last = lid;
 753	}
 754	/* anonymous transaction:
 755	 * insert the tlock at head of inode anonymous tlock list
 756	 */
 757	else {
 758		tlck->next = jfs_ip->atlhead;
 759		jfs_ip->atlhead = lid;
 760		if (tlck->next == 0) {
 761			/* This inode's first anonymous transaction */
 762			jfs_ip->atltail = lid;
 763			TXN_LOCK();
 764			list_add_tail(&jfs_ip->anon_inode_list,
 765				      &TxAnchor.anon_list);
 766			TXN_UNLOCK();
 767		}
 768	}
 769
 770	/* initialize type dependent area for linelock */
 771	linelock = (struct linelock *) & tlck->lock;
 772	linelock->next = 0;
 773	linelock->flag = tlckLINELOCK;
 774	linelock->maxcnt = TLOCKSHORT;
 775	linelock->index = 0;
 776
 777	switch (type & tlckTYPE) {
 778	case tlckDTREE:
 779		linelock->l2linesize = L2DTSLOTSIZE;
 780		break;
 781
 782	case tlckXTREE:
 783		linelock->l2linesize = L2XTSLOTSIZE;
 784
 785		xtlck = (struct xtlock *) linelock;
 786		xtlck->header.offset = 0;
 787		xtlck->header.length = 2;
 788
 789		if (type & tlckNEW) {
 790			xtlck->lwm.offset = XTENTRYSTART;
 791		} else {
 792			if (mp->xflag & COMMIT_PAGE)
 793				p = (xtpage_t *) mp->data;
 794			else
 795				p = &jfs_ip->i_xtroot;
 796			xtlck->lwm.offset =
 797			    le16_to_cpu(p->header.nextindex);
 798		}
 799		xtlck->lwm.length = 0;	/* ! */
 800		xtlck->twm.offset = 0;
 801		xtlck->hwm.offset = 0;
 802
 803		xtlck->index = 2;
 804		break;
 805
 806	case tlckINODE:
 807		linelock->l2linesize = L2INODESLOTSIZE;
 808		break;
 809
 810	case tlckDATA:
 811		linelock->l2linesize = L2DATASLOTSIZE;
 812		break;
 813
 814	default:
 815		jfs_err("UFO tlock:0x%p", tlck);
 816	}
 817
 818	/*
 819	 * update tlock vector
 820	 */
 821      grantLock:
 822	tlck->type |= type;
 823
 824	return tlck;
 825
 826	/*
 827	 * page is being locked by another transaction:
 828	 */
 829      waitLock:
 830	/* Only locks on ipimap or ipaimap should reach here */
 831	/* assert(jfs_ip->fileset == AGGREGATE_I); */
 832	if (jfs_ip->fileset != AGGREGATE_I) {
 833		printk(KERN_ERR "txLock: trying to lock locked page!");
 834		print_hex_dump(KERN_ERR, "ip: ", DUMP_PREFIX_ADDRESS, 16, 4,
 835			       ip, sizeof(*ip), 0);
 836		print_hex_dump(KERN_ERR, "mp: ", DUMP_PREFIX_ADDRESS, 16, 4,
 837			       mp, sizeof(*mp), 0);
 838		print_hex_dump(KERN_ERR, "Locker's tblock: ",
 839			       DUMP_PREFIX_ADDRESS, 16, 4, tid_to_tblock(tid),
 840			       sizeof(struct tblock), 0);
 841		print_hex_dump(KERN_ERR, "Tlock: ", DUMP_PREFIX_ADDRESS, 16, 4,
 842			       tlck, sizeof(*tlck), 0);
 843		BUG();
 844	}
 845	INCREMENT(stattx.waitlock);	/* statistics */
 846	TXN_UNLOCK();
 847	release_metapage(mp);
 848	TXN_LOCK();
 849	xtid = tlck->tid;	/* reacquire after dropping TXN_LOCK */
 850
 851	jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d",
 852		 tid, xtid, lid);
 853
 854	/* Recheck everything since dropping TXN_LOCK */
 855	if (xtid && (tlck->mp == mp) && (mp->lid == lid))
 856		TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor);
 857	else
 858		TXN_UNLOCK();
 859	jfs_info("txLock: awakened     tid = %d, lid = %d", tid, lid);
 860
 861	return NULL;
 862}
 863
 864/*
 865 * NAME:	txRelease()
 866 *
 867 * FUNCTION:	Release buffers associated with transaction locks, but don't
 868 *		mark homeok yet.  The allows other transactions to modify
 869 *		buffers, but won't let them go to disk until commit record
 870 *		actually gets written.
 871 *
 872 * PARAMETER:
 873 *		tblk	-
 874 *
 875 * RETURN:	Errors from subroutines.
 876 */
 877static void txRelease(struct tblock * tblk)
 878{
 879	struct metapage *mp;
 880	lid_t lid;
 881	struct tlock *tlck;
 882
 883	TXN_LOCK();
 884
 885	for (lid = tblk->next; lid; lid = tlck->next) {
 886		tlck = lid_to_tlock(lid);
 887		if ((mp = tlck->mp) != NULL &&
 888		    (tlck->type & tlckBTROOT) == 0) {
 889			assert(mp->xflag & COMMIT_PAGE);
 890			mp->lid = 0;
 891		}
 892	}
 893
 894	/*
 895	 * wakeup transactions waiting on a page locked
 896	 * by the current transaction
 897	 */
 898	TXN_WAKEUP(&tblk->waitor);
 899
 900	TXN_UNLOCK();
 901}
 902
 903/*
 904 * NAME:	txUnlock()
 905 *
 906 * FUNCTION:	Initiates pageout of pages modified by tid in journalled
 907 *		objects and frees their lockwords.
 908 */
 909static void txUnlock(struct tblock * tblk)
 910{
 911	struct tlock *tlck;
 912	struct linelock *linelock;
 913	lid_t lid, next, llid, k;
 914	struct metapage *mp;
 915	struct jfs_log *log;
 916	int difft, diffp;
 917	unsigned long flags;
 918
 919	jfs_info("txUnlock: tblk = 0x%p", tblk);
 920	log = JFS_SBI(tblk->sb)->log;
 921
 922	/*
 923	 * mark page under tlock homeok (its log has been written):
 924	 */
 925	for (lid = tblk->next; lid; lid = next) {
 926		tlck = lid_to_tlock(lid);
 927		next = tlck->next;
 928
 929		jfs_info("unlocking lid = %d, tlck = 0x%p", lid, tlck);
 930
 931		/* unbind page from tlock */
 932		if ((mp = tlck->mp) != NULL &&
 933		    (tlck->type & tlckBTROOT) == 0) {
 934			assert(mp->xflag & COMMIT_PAGE);
 935
 936			/* hold buffer
 937			 */
 938			hold_metapage(mp);
 939
 940			assert(mp->nohomeok > 0);
 941			_metapage_homeok(mp);
 942
 943			/* inherit younger/larger clsn */
 944			LOGSYNC_LOCK(log, flags);
 945			if (mp->clsn) {
 946				logdiff(difft, tblk->clsn, log);
 947				logdiff(diffp, mp->clsn, log);
 948				if (difft > diffp)
 949					mp->clsn = tblk->clsn;
 950			} else
 951				mp->clsn = tblk->clsn;
 952			LOGSYNC_UNLOCK(log, flags);
 953
 954			assert(!(tlck->flag & tlckFREEPAGE));
 955
 956			put_metapage(mp);
 957		}
 958
 959		/* insert tlock, and linelock(s) of the tlock if any,
 960		 * at head of freelist
 961		 */
 962		TXN_LOCK();
 963
 964		llid = ((struct linelock *) & tlck->lock)->next;
 965		while (llid) {
 966			linelock = (struct linelock *) lid_to_tlock(llid);
 967			k = linelock->next;
 968			txLockFree(llid);
 969			llid = k;
 970		}
 971		txLockFree(lid);
 972
 973		TXN_UNLOCK();
 974	}
 975	tblk->next = tblk->last = 0;
 976
 977	/*
 978	 * remove tblock from logsynclist
 979	 * (allocation map pages inherited lsn of tblk and
 980	 * has been inserted in logsync list at txUpdateMap())
 981	 */
 982	if (tblk->lsn) {
 983		LOGSYNC_LOCK(log, flags);
 984		log->count--;
 985		list_del(&tblk->synclist);
 986		LOGSYNC_UNLOCK(log, flags);
 987	}
 988}
 989
 990/*
 991 *	txMaplock()
 992 *
 993 * function: allocate a transaction lock for freed page/entry;
 994 *	for freed page, maplock is used as xtlock/dtlock type;
 995 */
 996struct tlock *txMaplock(tid_t tid, struct inode *ip, int type)
 997{
 998	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 999	lid_t lid;
1000	struct tblock *tblk;
1001	struct tlock *tlck;
1002	struct maplock *maplock;
1003
1004	TXN_LOCK();
1005
1006	/*
1007	 * allocate a tlock
1008	 */
1009	lid = txLockAlloc();
1010	tlck = lid_to_tlock(lid);
1011
1012	/*
1013	 * initialize tlock
1014	 */
1015	tlck->tid = tid;
1016
1017	/* bind the tlock and the object */
1018	tlck->flag = tlckINODELOCK;
1019	if (S_ISDIR(ip->i_mode))
1020		tlck->flag |= tlckDIRECTORY;
1021	tlck->ip = ip;
1022	tlck->mp = NULL;
1023
1024	tlck->type = type;
1025
1026	/*
1027	 * enqueue transaction lock to transaction/inode
1028	 */
1029	/* insert the tlock at tail of transaction tlock list */
1030	if (tid) {
1031		tblk = tid_to_tblock(tid);
1032		if (tblk->next)
1033			lid_to_tlock(tblk->last)->next = lid;
1034		else
1035			tblk->next = lid;
1036		tlck->next = 0;
1037		tblk->last = lid;
1038	}
1039	/* anonymous transaction:
1040	 * insert the tlock at head of inode anonymous tlock list
1041	 */
1042	else {
1043		tlck->next = jfs_ip->atlhead;
1044		jfs_ip->atlhead = lid;
1045		if (tlck->next == 0) {
1046			/* This inode's first anonymous transaction */
1047			jfs_ip->atltail = lid;
1048			list_add_tail(&jfs_ip->anon_inode_list,
1049				      &TxAnchor.anon_list);
1050		}
1051	}
1052
1053	TXN_UNLOCK();
1054
1055	/* initialize type dependent area for maplock */
1056	maplock = (struct maplock *) & tlck->lock;
1057	maplock->next = 0;
1058	maplock->maxcnt = 0;
1059	maplock->index = 0;
1060
1061	return tlck;
1062}
1063
1064/*
1065 *	txLinelock()
1066 *
1067 * function: allocate a transaction lock for log vector list
1068 */
1069struct linelock *txLinelock(struct linelock * tlock)
1070{
1071	lid_t lid;
1072	struct tlock *tlck;
1073	struct linelock *linelock;
1074
1075	TXN_LOCK();
1076
1077	/* allocate a TxLock structure */
1078	lid = txLockAlloc();
1079	tlck = lid_to_tlock(lid);
1080
1081	TXN_UNLOCK();
1082
1083	/* initialize linelock */
1084	linelock = (struct linelock *) tlck;
1085	linelock->next = 0;
1086	linelock->flag = tlckLINELOCK;
1087	linelock->maxcnt = TLOCKLONG;
1088	linelock->index = 0;
1089	if (tlck->flag & tlckDIRECTORY)
1090		linelock->flag |= tlckDIRECTORY;
1091
1092	/* append linelock after tlock */
1093	linelock->next = tlock->next;
1094	tlock->next = lid;
1095
1096	return linelock;
1097}
1098
1099/*
1100 *		transaction commit management
1101 *		-----------------------------
1102 */
1103
1104/*
1105 * NAME:	txCommit()
1106 *
1107 * FUNCTION:	commit the changes to the objects specified in
1108 *		clist.  For journalled segments only the
1109 *		changes of the caller are committed, ie by tid.
1110 *		for non-journalled segments the data are flushed to
1111 *		disk and then the change to the disk inode and indirect
1112 *		blocks committed (so blocks newly allocated to the
1113 *		segment will be made a part of the segment atomically).
1114 *
1115 *		all of the segments specified in clist must be in
1116 *		one file system. no more than 6 segments are needed
1117 *		to handle all unix svcs.
1118 *
1119 *		if the i_nlink field (i.e. disk inode link count)
1120 *		is zero, and the type of inode is a regular file or
1121 *		directory, or symbolic link , the inode is truncated
1122 *		to zero length. the truncation is committed but the
1123 *		VM resources are unaffected until it is closed (see
1124 *		iput and iclose).
1125 *
1126 * PARAMETER:
1127 *
1128 * RETURN:
1129 *
1130 * serialization:
1131 *		on entry the inode lock on each segment is assumed
1132 *		to be held.
1133 *
1134 * i/o error:
1135 */
1136int txCommit(tid_t tid,		/* transaction identifier */
1137	     int nip,		/* number of inodes to commit */
1138	     struct inode **iplist,	/* list of inode to commit */
1139	     int flag)
1140{
1141	int rc = 0;
1142	struct commit cd;
1143	struct jfs_log *log;
1144	struct tblock *tblk;
1145	struct lrd *lrd;
1146	struct inode *ip;
1147	struct jfs_inode_info *jfs_ip;
1148	int k, n;
1149	ino_t top;
1150	struct super_block *sb;
1151
1152	jfs_info("txCommit, tid = %d, flag = %d", tid, flag);
1153	/* is read-only file system ? */
1154	if (isReadOnly(iplist[0])) {
1155		rc = -EROFS;
1156		goto TheEnd;
1157	}
1158
1159	sb = cd.sb = iplist[0]->i_sb;
1160	cd.tid = tid;
1161
1162	if (tid == 0)
1163		tid = txBegin(sb, 0);
1164	tblk = tid_to_tblock(tid);
1165
1166	/*
1167	 * initialize commit structure
1168	 */
1169	log = JFS_SBI(sb)->log;
1170	cd.log = log;
1171
1172	/* initialize log record descriptor in commit */
1173	lrd = &cd.lrd;
1174	lrd->logtid = cpu_to_le32(tblk->logtid);
1175	lrd->backchain = 0;
1176
1177	tblk->xflag |= flag;
1178
1179	if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0)
1180		tblk->xflag |= COMMIT_LAZY;
1181	/*
1182	 *	prepare non-journaled objects for commit
1183	 *
1184	 * flush data pages of non-journaled file
1185	 * to prevent the file getting non-initialized disk blocks
1186	 * in case of crash.
1187	 * (new blocks - )
1188	 */
1189	cd.iplist = iplist;
1190	cd.nip = nip;
1191
1192	/*
1193	 *	acquire transaction lock on (on-disk) inodes
1194	 *
1195	 * update on-disk inode from in-memory inode
1196	 * acquiring transaction locks for AFTER records
1197	 * on the on-disk inode of file object
1198	 *
1199	 * sort the inodes array by inode number in descending order
1200	 * to prevent deadlock when acquiring transaction lock
1201	 * of on-disk inodes on multiple on-disk inode pages by
1202	 * multiple concurrent transactions
1203	 */
1204	for (k = 0; k < cd.nip; k++) {
1205		top = (cd.iplist[k])->i_ino;
1206		for (n = k + 1; n < cd.nip; n++) {
1207			ip = cd.iplist[n];
1208			if (ip->i_ino > top) {
1209				top = ip->i_ino;
1210				cd.iplist[n] = cd.iplist[k];
1211				cd.iplist[k] = ip;
1212			}
1213		}
1214
1215		ip = cd.iplist[k];
1216		jfs_ip = JFS_IP(ip);
1217
1218		/*
1219		 * BUGBUG - This code has temporarily been removed.  The
1220		 * intent is to ensure that any file data is written before
1221		 * the metadata is committed to the journal.  This prevents
1222		 * uninitialized data from appearing in a file after the
1223		 * journal has been replayed.  (The uninitialized data
1224		 * could be sensitive data removed by another user.)
1225		 *
1226		 * The problem now is that we are holding the IWRITELOCK
1227		 * on the inode, and calling filemap_fdatawrite on an
1228		 * unmapped page will cause a deadlock in jfs_get_block.
1229		 *
1230		 * The long term solution is to pare down the use of
1231		 * IWRITELOCK.  We are currently holding it too long.
1232		 * We could also be smarter about which data pages need
1233		 * to be written before the transaction is committed and
1234		 * when we don't need to worry about it at all.
1235		 *
1236		 * if ((!S_ISDIR(ip->i_mode))
1237		 *    && (tblk->flag & COMMIT_DELETE) == 0)
1238		 *	filemap_write_and_wait(ip->i_mapping);
1239		 */
1240
1241		/*
1242		 * Mark inode as not dirty.  It will still be on the dirty
1243		 * inode list, but we'll know not to commit it again unless
1244		 * it gets marked dirty again
1245		 */
1246		clear_cflag(COMMIT_Dirty, ip);
1247
1248		/* inherit anonymous tlock(s) of inode */
1249		if (jfs_ip->atlhead) {
1250			lid_to_tlock(jfs_ip->atltail)->next = tblk->next;
1251			tblk->next = jfs_ip->atlhead;
1252			if (!tblk->last)
1253				tblk->last = jfs_ip->atltail;
1254			jfs_ip->atlhead = jfs_ip->atltail = 0;
1255			TXN_LOCK();
1256			list_del_init(&jfs_ip->anon_inode_list);
1257			TXN_UNLOCK();
1258		}
1259
1260		/*
1261		 * acquire transaction lock on on-disk inode page
1262		 * (become first tlock of the tblk's tlock list)
1263		 */
1264		if (((rc = diWrite(tid, ip))))
1265			goto out;
1266	}
1267
1268	/*
1269	 *	write log records from transaction locks
1270	 *
1271	 * txUpdateMap() resets XAD_NEW in XAD.
1272	 */
1273	if ((rc = txLog(log, tblk, &cd)))
1274		goto TheEnd;
1275
1276	/*
1277	 * Ensure that inode isn't reused before
1278	 * lazy commit thread finishes processing
1279	 */
1280	if (tblk->xflag & COMMIT_DELETE) {
1281		ihold(tblk->u.ip);
1282		/*
1283		 * Avoid a rare deadlock
1284		 *
1285		 * If the inode is locked, we may be blocked in
1286		 * jfs_commit_inode.  If so, we don't want the
1287		 * lazy_commit thread doing the last iput() on the inode
1288		 * since that may block on the locked inode.  Instead,
1289		 * commit the transaction synchronously, so the last iput
1290		 * will be done by the calling thread (or later)
1291		 */
1292		/*
1293		 * I believe this code is no longer needed.  Splitting I_LOCK
1294		 * into two bits, I_NEW and I_SYNC should prevent this
1295		 * deadlock as well.  But since I don't have a JFS testload
1296		 * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done.
1297		 * Joern
1298		 */
1299		if (tblk->u.ip->i_state & I_SYNC)
1300			tblk->xflag &= ~COMMIT_LAZY;
1301	}
1302
1303	ASSERT((!(tblk->xflag & COMMIT_DELETE)) ||
1304	       ((tblk->u.ip->i_nlink == 0) &&
1305		!test_cflag(COMMIT_Nolink, tblk->u.ip)));
1306
1307	/*
1308	 *	write COMMIT log record
1309	 */
1310	lrd->type = cpu_to_le16(LOG_COMMIT);
1311	lrd->length = 0;
1312	lmLog(log, tblk, lrd, NULL);
1313
1314	lmGroupCommit(log, tblk);
1315
1316	/*
1317	 *	- transaction is now committed -
1318	 */
1319
1320	/*
1321	 * force pages in careful update
1322	 * (imap addressing structure update)
1323	 */
1324	if (flag & COMMIT_FORCE)
1325		txForce(tblk);
1326
1327	/*
1328	 *	update allocation map.
1329	 *
1330	 * update inode allocation map and inode:
1331	 * free pager lock on memory object of inode if any.
1332	 * update block allocation map.
1333	 *
1334	 * txUpdateMap() resets XAD_NEW in XAD.
1335	 */
1336	if (tblk->xflag & COMMIT_FORCE)
1337		txUpdateMap(tblk);
1338
1339	/*
1340	 *	free transaction locks and pageout/free pages
1341	 */
1342	txRelease(tblk);
1343
1344	if ((tblk->flag & tblkGC_LAZY) == 0)
1345		txUnlock(tblk);
1346
1347
1348	/*
1349	 *	reset in-memory object state
1350	 */
1351	for (k = 0; k < cd.nip; k++) {
1352		ip = cd.iplist[k];
1353		jfs_ip = JFS_IP(ip);
1354
1355		/*
1356		 * reset in-memory inode state
1357		 */
1358		jfs_ip->bxflag = 0;
1359		jfs_ip->blid = 0;
1360	}
1361
1362      out:
1363	if (rc != 0)
1364		txAbort(tid, 1);
1365
1366      TheEnd:
1367	jfs_info("txCommit: tid = %d, returning %d", tid, rc);
1368	return rc;
1369}
1370
1371/*
1372 * NAME:	txLog()
1373 *
1374 * FUNCTION:	Writes AFTER log records for all lines modified
1375 *		by tid for segments specified by inodes in comdata.
1376 *		Code assumes only WRITELOCKS are recorded in lockwords.
1377 *
1378 * PARAMETERS:
1379 *
1380 * RETURN :
1381 */
1382static int txLog(struct jfs_log * log, struct tblock * tblk, struct commit * cd)
1383{
1384	int rc = 0;
1385	struct inode *ip;
1386	lid_t lid;
1387	struct tlock *tlck;
1388	struct lrd *lrd = &cd->lrd;
1389
1390	/*
1391	 * write log record(s) for each tlock of transaction,
1392	 */
1393	for (lid = tblk->next; lid; lid = tlck->next) {
1394		tlck = lid_to_tlock(lid);
1395
1396		tlck->flag |= tlckLOG;
1397
1398		/* initialize lrd common */
1399		ip = tlck->ip;
1400		lrd->aggregate = cpu_to_le32(JFS_SBI(ip->i_sb)->aggregate);
1401		lrd->log.redopage.fileset = cpu_to_le32(JFS_IP(ip)->fileset);
1402		lrd->log.redopage.inode = cpu_to_le32(ip->i_ino);
1403
1404		/* write log record of page from the tlock */
1405		switch (tlck->type & tlckTYPE) {
1406		case tlckXTREE:
1407			xtLog(log, tblk, lrd, tlck);
1408			break;
1409
1410		case tlckDTREE:
1411			dtLog(log, tblk, lrd, tlck);
1412			break;
1413
1414		case tlckINODE:
1415			diLog(log, tblk, lrd, tlck, cd);
1416			break;
1417
1418		case tlckMAP:
1419			mapLog(log, tblk, lrd, tlck);
1420			break;
1421
1422		case tlckDATA:
1423			dataLog(log, tblk, lrd, tlck);
1424			break;
1425
1426		default:
1427			jfs_err("UFO tlock:0x%p", tlck);
1428		}
1429	}
1430
1431	return rc;
1432}
1433
1434/*
1435 *	diLog()
1436 *
1437 * function:	log inode tlock and format maplock to update bmap;
1438 */
1439static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1440		 struct tlock * tlck, struct commit * cd)
1441{
1442	int rc = 0;
1443	struct metapage *mp;
1444	pxd_t *pxd;
1445	struct pxd_lock *pxdlock;
1446
1447	mp = tlck->mp;
1448
1449	/* initialize as REDOPAGE record format */
1450	lrd->log.redopage.type = cpu_to_le16(LOG_INODE);
1451	lrd->log.redopage.l2linesize = cpu_to_le16(L2INODESLOTSIZE);
1452
1453	pxd = &lrd->log.redopage.pxd;
1454
1455	/*
1456	 *	inode after image
1457	 */
1458	if (tlck->type & tlckENTRY) {
1459		/* log after-image for logredo(): */
1460		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1461		PXDaddress(pxd, mp->index);
1462		PXDlength(pxd,
1463			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1464		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1465
1466		/* mark page as homeward bound */
1467		tlck->flag |= tlckWRITEPAGE;
1468	} else if (tlck->type & tlckFREE) {
1469		/*
1470		 *	free inode extent
1471		 *
1472		 * (pages of the freed inode extent have been invalidated and
1473		 * a maplock for free of the extent has been formatted at
1474		 * txLock() time);
1475		 *
1476		 * the tlock had been acquired on the inode allocation map page
1477		 * (iag) that specifies the freed extent, even though the map
1478		 * page is not itself logged, to prevent pageout of the map
1479		 * page before the log;
1480		 */
1481
1482		/* log LOG_NOREDOINOEXT of the freed inode extent for
1483		 * logredo() to start NoRedoPage filters, and to update
1484		 * imap and bmap for free of the extent;
1485		 */
1486		lrd->type = cpu_to_le16(LOG_NOREDOINOEXT);
1487		/*
1488		 * For the LOG_NOREDOINOEXT record, we need
1489		 * to pass the IAG number and inode extent
1490		 * index (within that IAG) from which the
1491		 * the extent being released.  These have been
1492		 * passed to us in the iplist[1] and iplist[2].
1493		 */
1494		lrd->log.noredoinoext.iagnum =
1495		    cpu_to_le32((u32) (size_t) cd->iplist[1]);
1496		lrd->log.noredoinoext.inoext_idx =
1497		    cpu_to_le32((u32) (size_t) cd->iplist[2]);
1498
1499		pxdlock = (struct pxd_lock *) & tlck->lock;
1500		*pxd = pxdlock->pxd;
1501		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1502
1503		/* update bmap */
1504		tlck->flag |= tlckUPDATEMAP;
1505
1506		/* mark page as homeward bound */
1507		tlck->flag |= tlckWRITEPAGE;
1508	} else
1509		jfs_err("diLog: UFO type tlck:0x%p", tlck);
1510#ifdef  _JFS_WIP
1511	/*
1512	 *	alloc/free external EA extent
1513	 *
1514	 * a maplock for txUpdateMap() to update bPWMAP for alloc/free
1515	 * of the extent has been formatted at txLock() time;
1516	 */
1517	else {
1518		assert(tlck->type & tlckEA);
1519
1520		/* log LOG_UPDATEMAP for logredo() to update bmap for
1521		 * alloc of new (and free of old) external EA extent;
1522		 */
1523		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1524		pxdlock = (struct pxd_lock *) & tlck->lock;
1525		nlock = pxdlock->index;
1526		for (i = 0; i < nlock; i++, pxdlock++) {
1527			if (pxdlock->flag & mlckALLOCPXD)
1528				lrd->log.updatemap.type =
1529				    cpu_to_le16(LOG_ALLOCPXD);
1530			else
1531				lrd->log.updatemap.type =
1532				    cpu_to_le16(LOG_FREEPXD);
1533			lrd->log.updatemap.nxd = cpu_to_le16(1);
1534			lrd->log.updatemap.pxd = pxdlock->pxd;
1535			lrd->backchain =
1536			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1537		}
1538
1539		/* update bmap */
1540		tlck->flag |= tlckUPDATEMAP;
1541	}
1542#endif				/* _JFS_WIP */
1543
1544	return rc;
1545}
1546
1547/*
1548 *	dataLog()
1549 *
1550 * function:	log data tlock
1551 */
1552static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1553	    struct tlock * tlck)
1554{
1555	struct metapage *mp;
1556	pxd_t *pxd;
1557
1558	mp = tlck->mp;
1559
1560	/* initialize as REDOPAGE record format */
1561	lrd->log.redopage.type = cpu_to_le16(LOG_DATA);
1562	lrd->log.redopage.l2linesize = cpu_to_le16(L2DATASLOTSIZE);
1563
1564	pxd = &lrd->log.redopage.pxd;
1565
1566	/* log after-image for logredo(): */
1567	lrd->type = cpu_to_le16(LOG_REDOPAGE);
1568
1569	if (jfs_dirtable_inline(tlck->ip)) {
1570		/*
1571		 * The table has been truncated, we've must have deleted
1572		 * the last entry, so don't bother logging this
1573		 */
1574		mp->lid = 0;
1575		grab_metapage(mp);
1576		metapage_homeok(mp);
1577		discard_metapage(mp);
1578		tlck->mp = NULL;
1579		return 0;
1580	}
1581
1582	PXDaddress(pxd, mp->index);
1583	PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits);
1584
1585	lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1586
1587	/* mark page as homeward bound */
1588	tlck->flag |= tlckWRITEPAGE;
1589
1590	return 0;
1591}
1592
1593/*
1594 *	dtLog()
1595 *
1596 * function:	log dtree tlock and format maplock to update bmap;
1597 */
1598static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1599	   struct tlock * tlck)
1600{
1601	struct metapage *mp;
1602	struct pxd_lock *pxdlock;
1603	pxd_t *pxd;
1604
1605	mp = tlck->mp;
1606
1607	/* initialize as REDOPAGE/NOREDOPAGE record format */
1608	lrd->log.redopage.type = cpu_to_le16(LOG_DTREE);
1609	lrd->log.redopage.l2linesize = cpu_to_le16(L2DTSLOTSIZE);
1610
1611	pxd = &lrd->log.redopage.pxd;
1612
1613	if (tlck->type & tlckBTROOT)
1614		lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
1615
1616	/*
1617	 *	page extension via relocation: entry insertion;
1618	 *	page extension in-place: entry insertion;
1619	 *	new right page from page split, reinitialized in-line
1620	 *	root from root page split: entry insertion;
1621	 */
1622	if (tlck->type & (tlckNEW | tlckEXTEND)) {
1623		/* log after-image of the new page for logredo():
1624		 * mark log (LOG_NEW) for logredo() to initialize
1625		 * freelist and update bmap for alloc of the new page;
1626		 */
1627		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1628		if (tlck->type & tlckEXTEND)
1629			lrd->log.redopage.type |= cpu_to_le16(LOG_EXTEND);
1630		else
1631			lrd->log.redopage.type |= cpu_to_le16(LOG_NEW);
1632		PXDaddress(pxd, mp->index);
1633		PXDlength(pxd,
1634			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1635		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1636
1637		/* format a maplock for txUpdateMap() to update bPMAP for
1638		 * alloc of the new page;
1639		 */
1640		if (tlck->type & tlckBTROOT)
1641			return;
1642		tlck->flag |= tlckUPDATEMAP;
1643		pxdlock = (struct pxd_lock *) & tlck->lock;
1644		pxdlock->flag = mlckALLOCPXD;
1645		pxdlock->pxd = *pxd;
1646
1647		pxdlock->index = 1;
1648
1649		/* mark page as homeward bound */
1650		tlck->flag |= tlckWRITEPAGE;
1651		return;
1652	}
1653
1654	/*
1655	 *	entry insertion/deletion,
1656	 *	sibling page link update (old right page before split);
1657	 */
1658	if (tlck->type & (tlckENTRY | tlckRELINK)) {
1659		/* log after-image for logredo(): */
1660		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1661		PXDaddress(pxd, mp->index);
1662		PXDlength(pxd,
1663			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1664		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1665
1666		/* mark page as homeward bound */
1667		tlck->flag |= tlckWRITEPAGE;
1668		return;
1669	}
1670
1671	/*
1672	 *	page deletion: page has been invalidated
1673	 *	page relocation: source extent
1674	 *
1675	 *	a maplock for free of the page has been formatted
1676	 *	at txLock() time);
1677	 */
1678	if (tlck->type & (tlckFREE | tlckRELOCATE)) {
1679		/* log LOG_NOREDOPAGE of the deleted page for logredo()
1680		 * to start NoRedoPage filter and to update bmap for free
1681		 * of the deletd page
1682		 */
1683		lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
1684		pxdlock = (struct pxd_lock *) & tlck->lock;
1685		*pxd = pxdlock->pxd;
1686		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1687
1688		/* a maplock for txUpdateMap() for free of the page
1689		 * has been formatted at txLock() time;
1690		 */
1691		tlck->flag |= tlckUPDATEMAP;
1692	}
1693	return;
1694}
1695
1696/*
1697 *	xtLog()
1698 *
1699 * function:	log xtree tlock and format maplock to update bmap;
1700 */
1701static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1702	   struct tlock * tlck)
1703{
1704	struct inode *ip;
1705	struct metapage *mp;
1706	xtpage_t *p;
1707	struct xtlock *xtlck;
1708	struct maplock *maplock;
1709	struct xdlistlock *xadlock;
1710	struct pxd_lock *pxdlock;
1711	pxd_t *page_pxd;
1712	int next, lwm, hwm;
1713
1714	ip = tlck->ip;
1715	mp = tlck->mp;
1716
1717	/* initialize as REDOPAGE/NOREDOPAGE record format */
1718	lrd->log.redopage.type = cpu_to_le16(LOG_XTREE);
1719	lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE);
1720
1721	page_pxd = &lrd->log.redopage.pxd;
1722
1723	if (tlck->type & tlckBTROOT) {
1724		lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
1725		p = &JFS_IP(ip)->i_xtroot;
1726		if (S_ISDIR(ip->i_mode))
1727			lrd->log.redopage.type |=
1728			    cpu_to_le16(LOG_DIR_XTREE);
1729	} else
1730		p = (xtpage_t *) mp->data;
1731	next = le16_to_cpu(p->header.nextindex);
1732
1733	xtlck = (struct xtlock *) & tlck->lock;
1734
1735	maplock = (struct maplock *) & tlck->lock;
1736	xadlock = (struct xdlistlock *) maplock;
1737
1738	/*
1739	 *	entry insertion/extension;
1740	 *	sibling page link update (old right page before split);
1741	 */
1742	if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) {
1743		/* log after-image for logredo():
1744		 * logredo() will update bmap for alloc of new/extended
1745		 * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from
1746		 * after-image of XADlist;
1747		 * logredo() resets (XAD_NEW|XAD_EXTEND) flag when
1748		 * applying the after-image to the meta-data page.
1749		 */
1750		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1751		PXDaddress(page_pxd, mp->index);
1752		PXDlength(page_pxd,
1753			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1754		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1755
1756		/* format a maplock for txUpdateMap() to update bPMAP
1757		 * for alloc of new/extended extents of XAD[lwm:next)
1758		 * from the page itself;
1759		 * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag.
1760		 */
1761		lwm = xtlck->lwm.offset;
1762		if (lwm == 0)
1763			lwm = XTPAGEMAXSLOT;
1764
1765		if (lwm == next)
1766			goto out;
1767		if (lwm > next) {
1768			jfs_err("xtLog: lwm > next\n");
1769			goto out;
1770		}
1771		tlck->flag |= tlckUPDATEMAP;
1772		xadlock->flag = mlckALLOCXADLIST;
1773		xadlock->count = next - lwm;
1774		if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) {
1775			int i;
1776			pxd_t *pxd;
1777			/*
1778			 * Lazy commit may allow xtree to be modified before
1779			 * txUpdateMap runs.  Copy xad into linelock to
1780			 * preserve correct data.
1781			 *
1782			 * We can fit twice as may pxd's as xads in the lock
1783			 */
1784			xadlock->flag = mlckALLOCPXDLIST;
1785			pxd = xadlock->xdlist = &xtlck->pxdlock;
1786			for (i = 0; i < xadlock->count; i++) {
1787				PXDaddress(pxd, addressXAD(&p->xad[lwm + i]));
1788				PXDlength(pxd, lengthXAD(&p->xad[lwm + i]));
1789				p->xad[lwm + i].flag &=
1790				    ~(XAD_NEW | XAD_EXTENDED);
1791				pxd++;
1792			}
1793		} else {
1794			/*
1795			 * xdlist will point to into inode's xtree, ensure
1796			 * that transaction is not committed lazily.
1797			 */
1798			xadlock->flag = mlckALLOCXADLIST;
1799			xadlock->xdlist = &p->xad[lwm];
1800			tblk->xflag &= ~COMMIT_LAZY;
1801		}
1802		jfs_info("xtLog: alloc ip:0x%p mp:0x%p tlck:0x%p lwm:%d "
1803			 "count:%d", tlck->ip, mp, tlck, lwm, xadlock->count);
1804
1805		maplock->index = 1;
1806
1807	      out:
1808		/* mark page as homeward bound */
1809		tlck->flag |= tlckWRITEPAGE;
1810
1811		return;
1812	}
1813
1814	/*
1815	 *	page deletion: file deletion/truncation (ref. xtTruncate())
1816	 *
1817	 * (page will be invalidated after log is written and bmap
1818	 * is updated from the page);
1819	 */
1820	if (tlck->type & tlckFREE) {
1821		/* LOG_NOREDOPAGE log for NoRedoPage filter:
1822		 * if page free from file delete, NoRedoFile filter from
1823		 * inode image of zero link count will subsume NoRedoPage
1824		 * filters for each page;
1825		 * if page free from file truncattion, write NoRedoPage
1826		 * filter;
1827		 *
1828		 * upadte of block allocation map for the page itself:
1829		 * if page free from deletion and truncation, LOG_UPDATEMAP
1830		 * log for the page itself is generated from processing
1831		 * its parent page xad entries;
1832		 */
1833		/* if page free from file truncation, log LOG_NOREDOPAGE
1834		 * of the deleted page for logredo() to start NoRedoPage
1835		 * filter for the page;
1836		 */
1837		if (tblk->xflag & COMMIT_TRUNCATE) {
1838			/* write NOREDOPAGE for the page */
1839			lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
1840			PXDaddress(page_pxd, mp->index);
1841			PXDlength(page_pxd,
1842				  mp->logical_size >> tblk->sb->
1843				  s_blocksize_bits);
1844			lrd->backchain =
1845			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1846
1847			if (tlck->type & tlckBTROOT) {
1848				/* Empty xtree must be logged */
1849				lrd->type = cpu_to_le16(LOG_REDOPAGE);
1850				lrd->backchain =
1851				    cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1852			}
1853		}
1854
1855		/* init LOG_UPDATEMAP of the freed extents
1856		 * XAD[XTENTRYSTART:hwm) from the deleted page itself
1857		 * for logredo() to update bmap;
1858		 */
1859		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1860		lrd->log.updatemap.type = cpu_to_le16(LOG_FREEXADLIST);
1861		xtlck = (struct xtlock *) & tlck->lock;
1862		hwm = xtlck->hwm.offset;
1863		lrd->log.updatemap.nxd =
1864		    cpu_to_le16(hwm - XTENTRYSTART + 1);
1865		/* reformat linelock for lmLog() */
1866		xtlck->header.offset = XTENTRYSTART;
1867		xtlck->header.length = hwm - XTENTRYSTART + 1;
1868		xtlck->index = 1;
1869		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1870
1871		/* format a maplock for txUpdateMap() to update bmap
1872		 * to free extents of XAD[XTENTRYSTART:hwm) from the
1873		 * deleted page itself;
1874		 */
1875		tlck->flag |= tlckUPDATEMAP;
1876		xadlock->count = hwm - XTENTRYSTART + 1;
1877		if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) {
1878			int i;
1879			pxd_t *pxd;
1880			/*
1881			 * Lazy commit may allow xtree to be modified before
1882			 * txUpdateMap runs.  Copy xad into linelock to
1883			 * preserve correct data.
1884			 *
1885			 * We can fit twice as may pxd's as xads in the lock
1886			 */
1887			xadlock->flag = mlckFREEPXDLIST;
1888			pxd = xadlock->xdlist = &xtlck->pxdlock;
1889			for (i = 0; i < xadlock->count; i++) {
1890				PXDaddress(pxd,
1891					addressXAD(&p->xad[XTENTRYSTART + i]));
1892				PXDlength(pxd,
1893					lengthXAD(&p->xad[XTENTRYSTART + i]));
1894				pxd++;
1895			}
1896		} else {
1897			/*
1898			 * xdlist will point to into inode's xtree, ensure
1899			 * that transaction is not committed lazily.
1900			 */
1901			xadlock->flag = mlckFREEXADLIST;
1902			xadlock->xdlist = &p->xad[XTENTRYSTART];
1903			tblk->xflag &= ~COMMIT_LAZY;
1904		}
1905		jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d lwm:2",
1906			 tlck->ip, mp, xadlock->count);
1907
1908		maplock->index = 1;
1909
1910		/* mark page as invalid */
1911		if (((tblk->xflag & COMMIT_PWMAP) || S_ISDIR(ip->i_mode))
1912		    && !(tlck->type & tlckBTROOT))
1913			tlck->flag |= tlckFREEPAGE;
1914		/*
1915		   else (tblk->xflag & COMMIT_PMAP)
1916		   ? release the page;
1917		 */
1918		return;
1919	}
1920
1921	/*
1922	 *	page/entry truncation: file truncation (ref. xtTruncate())
1923	 *
1924	 *	|----------+------+------+---------------|
1925	 *		   |      |      |
1926	 *		   |      |     hwm - hwm before truncation
1927	 *		   |     next - truncation point
1928	 *		  lwm - lwm before truncation
1929	 * header ?
1930	 */
1931	if (tlck->type & tlckTRUNCATE) {
1932		/* This odd declaration suppresses a bogus gcc warning */
1933		pxd_t pxd = pxd;	/* truncated extent of xad */
1934		int twm;
1935
1936		/*
1937		 * For truncation the entire linelock may be used, so it would
1938		 * be difficult to store xad list in linelock itself.
1939		 * Therefore, we'll just force transaction to be committed
1940		 * synchronously, so that xtree pages won't be changed before
1941		 * txUpdateMap runs.
1942		 */
1943		tblk->xflag &= ~COMMIT_LAZY;
1944		lwm = xtlck->lwm.offset;
1945		if (lwm == 0)
1946			lwm = XTPAGEMAXSLOT;
1947		hwm = xtlck->hwm.offset;
1948		twm = xtlck->twm.offset;
1949
1950		/*
1951		 *	write log records
1952		 */
1953		/* log after-image for logredo():
1954		 *
1955		 * logredo() will update bmap for alloc of new/extended
1956		 * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from
1957		 * after-image of XADlist;
1958		 * logredo() resets (XAD_NEW|XAD_EXTEND) flag when
1959		 * applying the after-image to the meta-data page.
1960		 */
1961		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1962		PXDaddress(page_pxd, mp->index);
1963		PXDlength(page_pxd,
1964			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1965		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1966
1967		/*
1968		 * truncate entry XAD[twm == next - 1]:
1969		 */
1970		if (twm == next - 1) {
1971			/* init LOG_UPDATEMAP for logredo() to update bmap for
1972			 * free of truncated delta extent of the truncated
1973			 * entry XAD[next - 1]:
1974			 * (xtlck->pxdlock = truncated delta extent);
1975			 */
1976			pxdlock = (struct pxd_lock *) & xtlck->pxdlock;
1977			/* assert(pxdlock->type & tlckTRUNCATE); */
1978			lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1979			lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD);
1980			lrd->log.updatemap.nxd = cpu_to_le16(1);
1981			lrd->log.updatemap.pxd = pxdlock->pxd;
1982			pxd = pxdlock->pxd;	/* save to format maplock */
1983			lrd->backchain =
1984			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1985		}
1986
1987		/*
1988		 * free entries XAD[next:hwm]:
1989		 */
1990		if (hwm >= next) {
1991			/* init LOG_UPDATEMAP of the freed extents
1992			 * XAD[next:hwm] from the deleted page itself
1993			 * for logredo() to update bmap;
1994			 */
1995			lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1996			lrd->log.updatemap.type =
1997			    cpu_to_le16(LOG_FREEXADLIST);
1998			xtlck = (struct xtlock *) & tlck->lock;
1999			hwm = xtlck->hwm.offset;
2000			lrd->log.updatemap.nxd =
2001			    cpu_to_le16(hwm - next + 1);
2002			/* reformat linelock for lmLog() */
2003			xtlck->header.offset = next;
2004			xtlck->header.length = hwm - next + 1;
2005			xtlck->index = 1;
2006			lrd->backchain =
2007			    cpu_to_le32(lmLog(log, tblk, lrd, tlck));
2008		}
2009
2010		/*
2011		 *	format maplock(s) for txUpdateMap() to update bmap
2012		 */
2013		maplock->index = 0;
2014
2015		/*
2016		 * allocate entries XAD[lwm:next):
2017		 */
2018		if (lwm < next) {
2019			/* format a maplock for txUpdateMap() to update bPMAP
2020			 * for alloc of new/extended extents of XAD[lwm:next)
2021			 * from the page itself;
2022			 * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag.
2023			 */
2024			tlck->flag |= tlckUPDATEMAP;
2025			xadlock->flag = mlckALLOCXADLIST;
2026			xadlock->count = next - lwm;
2027			xadlock->xdlist = &p->xad[lwm];
2028
2029			jfs_info("xtLog: alloc ip:0x%p mp:0x%p count:%d "
2030				 "lwm:%d next:%d",
2031				 tlck->ip, mp, xadlock->count, lwm, next);
2032			maplock->index++;
2033			xadlock++;
2034		}
2035
2036		/*
2037		 * truncate entry XAD[twm == next - 1]:
2038		 */
2039		if (twm == next - 1) {
2040			/* format a maplock for txUpdateMap() to update bmap
2041			 * to free truncated delta extent of the truncated
2042			 * entry XAD[next - 1];
2043			 * (xtlck->pxdlock = truncated delta extent);
2044			 */
2045			tlck->flag |= tlckUPDATEMAP;
2046			pxdlock = (struct pxd_lock *) xadlock;
2047			pxdlock->flag = mlckFREEPXD;
2048			pxdlock->count = 1;
2049			pxdlock->pxd = pxd;
2050
2051			jfs_info("xtLog: truncate ip:0x%p mp:0x%p count:%d "
2052				 "hwm:%d", ip, mp, pxdlock->count, hwm);
2053			maplock->index++;
2054			xadlock++;
2055		}
2056
2057		/*
2058		 * free entries XAD[next:hwm]:
2059		 */
2060		if (hwm >= next) {
2061			/* format a maplock for txUpdateMap() to update bmap
2062			 * to free extents of XAD[next:hwm] from thedeleted
2063			 * page itself;
2064			 */
2065			tlck->flag |= tlckUPDATEMAP;
2066			xadlock->flag = mlckFREEXADLIST;
2067			xadlock->count = hwm - next + 1;
2068			xadlock->xdlist = &p->xad[next];
2069
2070			jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d "
2071				 "next:%d hwm:%d",
2072				 tlck->ip, mp, xadlock->count, next, hwm);
2073			maplock->index++;
2074		}
2075
2076		/* mark page as homeward bound */
2077		tlck->flag |= tlckWRITEPAGE;
2078	}
2079	return;
2080}
2081
2082/*
2083 *	mapLog()
2084 *
2085 * function:	log from maplock of freed data extents;
2086 */
2087static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
2088		   struct tlock * tlck)
2089{
2090	struct pxd_lock *pxdlock;
2091	int i, nlock;
2092	pxd_t *pxd;
2093
2094	/*
2095	 *	page relocation: free the source page extent
2096	 *
2097	 * a maplock for txUpdateMap() for free of the page
2098	 * has been formatted at txLock() time saving the src
2099	 * relocated page address;
2100	 */
2101	if (tlck->type & tlckRELOCATE) {
2102		/* log LOG_NOREDOPAGE of the old relocated page
2103		 * for logredo() to start NoRedoPage filter;
2104		 */
2105		lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
2106		pxdlock = (struct pxd_lock *) & tlck->lock;
2107		pxd = &lrd->log.redopage.pxd;
2108		*pxd = pxdlock->pxd;
2109		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2110
2111		/* (N.B. currently, logredo() does NOT update bmap
2112		 * for free of the page itself for (LOG_XTREE|LOG_NOREDOPAGE);
2113		 * if page free from relocation, LOG_UPDATEMAP log is
2114		 * specifically generated now for logredo()
2115		 * to update bmap for free of src relocated page;
2116		 * (new flag LOG_RELOCATE may be introduced which will
2117		 * inform logredo() to start NORedoPage filter and also
2118		 * update block allocation map at the same time, thus
2119		 * avoiding an extra log write);
2120		 */
2121		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
2122		lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD);
2123		lrd->log.updatemap.nxd = cpu_to_le16(1);
2124		lrd->log.updatemap.pxd = pxdlock->pxd;
2125		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2126
2127		/* a maplock for txUpdateMap() for free of the page
2128		 * has been formatted at txLock() time;
2129		 */
2130		tlck->flag |= tlckUPDATEMAP;
2131		return;
2132	}
2133	/*
2134
2135	 * Otherwise it's not a relocate request
2136	 *
2137	 */
2138	else {
2139		/* log LOG_UPDATEMAP for logredo() to update bmap for
2140		 * free of truncated/relocated delta extent of the data;
2141		 * e.g.: external EA extent, relocated/truncated extent
2142		 * from xtTailgate();
2143		 */
2144		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
2145		pxdlock = (struct pxd_lock *) & tlck->lock;
2146		nlock = pxdlock->index;
2147		for (i = 0; i < nlock; i++, pxdlock++) {
2148			if (pxdlock->flag & mlckALLOCPXD)
2149				lrd->log.updatemap.type =
2150				    cpu_to_le16(LOG_ALLOCPXD);
2151			else
2152				lrd->log.updatemap.type =
2153				    cpu_to_le16(LOG_FREEPXD);
2154			lrd->log.updatemap.nxd = cpu_to_le16(1);
2155			lrd->log.updatemap.pxd = pxdlock->pxd;
2156			lrd->backchain =
2157			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2158			jfs_info("mapLog: xaddr:0x%lx xlen:0x%x",
2159				 (ulong) addressPXD(&pxdlock->pxd),
2160				 lengthPXD(&pxdlock->pxd));
2161		}
2162
2163		/* update bmap */
2164		tlck->flag |= tlckUPDATEMAP;
2165	}
2166}
2167
2168/*
2169 *	txEA()
2170 *
2171 * function:	acquire maplock for EA/ACL extents or
2172 *		set COMMIT_INLINE flag;
2173 */
2174void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea)
2175{
2176	struct tlock *tlck = NULL;
2177	struct pxd_lock *maplock = NULL, *pxdlock = NULL;
2178
2179	/*
2180	 * format maplock for alloc of new EA extent
2181	 */
2182	if (newea) {
2183		/* Since the newea could be a completely zeroed entry we need to
2184		 * check for the two flags which indicate we should actually
2185		 * commit new EA data
2186		 */
2187		if (newea->flag & DXD_EXTENT) {
2188			tlck = txMaplock(tid, ip, tlckMAP);
2189			maplock = (struct pxd_lock *) & tlck->lock;
2190			pxdlock = (struct pxd_lock *) maplock;
2191			pxdlock->flag = mlckALLOCPXD;
2192			PXDaddress(&pxdlock->pxd, addressDXD(newea));
2193			PXDlength(&pxdlock->pxd, lengthDXD(newea));
2194			pxdlock++;
2195			maplock->index = 1;
2196		} else if (newea->flag & DXD_INLINE) {
2197			tlck = NULL;
2198
2199			set_cflag(COMMIT_Inlineea, ip);
2200		}
2201	}
2202
2203	/*
2204	 * format maplock for free of old EA extent
2205	 */
2206	if (!test_cflag(COMMIT_Nolink, ip) && oldea->flag & DXD_EXTENT) {
2207		if (tlck == NULL) {
2208			tlck = txMaplock(tid, ip, tlckMAP);
2209			maplock = (struct pxd_lock *) & tlck->lock;
2210			pxdlock = (struct pxd_lock *) maplock;
2211			maplock->index = 0;
2212		}
2213		pxdlock->flag = mlckFREEPXD;
2214		PXDaddress(&pxdlock->pxd, addressDXD(oldea));
2215		PXDlength(&pxdlock->pxd, lengthDXD(oldea));
2216		maplock->index++;
2217	}
2218}
2219
2220/*
2221 *	txForce()
2222 *
2223 * function: synchronously write pages locked by transaction
2224 *	     after txLog() but before txUpdateMap();
2225 */
2226static void txForce(struct tblock * tblk)
2227{
2228	struct tlock *tlck;
2229	lid_t lid, next;
2230	struct metapage *mp;
2231
2232	/*
2233	 * reverse the order of transaction tlocks in
2234	 * careful update order of address index pages
2235	 * (right to left, bottom up)
2236	 */
2237	tlck = lid_to_tlock(tblk->next);
2238	lid = tlck->next;
2239	tlck->next = 0;
2240	while (lid) {
2241		tlck = lid_to_tlock(lid);
2242		next = tlck->next;
2243		tlck->next = tblk->next;
2244		tblk->next = lid;
2245		lid = next;
2246	}
2247
2248	/*
2249	 * synchronously write the page, and
2250	 * hold the page for txUpdateMap();
2251	 */
2252	for (lid = tblk->next; lid; lid = next) {
2253		tlck = lid_to_tlock(lid);
2254		next = tlck->next;
2255
2256		if ((mp = tlck->mp) != NULL &&
2257		    (tlck->type & tlckBTROOT) == 0) {
2258			assert(mp->xflag & COMMIT_PAGE);
2259
2260			if (tlck->flag & tlckWRITEPAGE) {
2261				tlck->flag &= ~tlckWRITEPAGE;
2262
2263				/* do not release page to freelist */
2264				force_metapage(mp);
2265#if 0
2266				/*
2267				 * The "right" thing to do here is to
2268				 * synchronously write the metadata.
2269				 * With the current implementation this
2270				 * is hard since write_metapage requires
2271				 * us to kunmap & remap the page.  If we
2272				 * have tlocks pointing into the metadata
2273				 * pages, we don't want to do this.  I think
2274				 * we can get by with synchronously writing
2275				 * the pages when they are released.
2276				 */
2277				assert(mp->nohomeok);
2278				set_bit(META_dirty, &mp->flag);
2279				set_bit(META_sync, &mp->flag);
2280#endif
2281			}
2282		}
2283	}
2284}
2285
2286/*
2287 *	txUpdateMap()
2288 *
2289 * function:	update persistent allocation map (and working map
2290 *		if appropriate);
2291 *
2292 * parameter:
2293 */
2294static void txUpdateMap(struct tblock * tblk)
2295{
2296	struct inode *ip;
2297	struct inode *ipimap;
2298	lid_t lid;
2299	struct tlock *tlck;
2300	struct maplock *maplock;
2301	struct pxd_lock pxdlock;
2302	int maptype;
2303	int k, nlock;
2304	struct metapage *mp = NULL;
2305
2306	ipimap = JFS_SBI(tblk->sb)->ipimap;
2307
2308	maptype = (tblk->xflag & COMMIT_PMAP) ? COMMIT_PMAP : COMMIT_PWMAP;
2309
2310
2311	/*
2312	 *	update block allocation map
2313	 *
2314	 * update allocation state in pmap (and wmap) and
2315	 * update lsn of the pmap page;
2316	 */
2317	/*
2318	 * scan each tlock/page of transaction for block allocation/free:
2319	 *
2320	 * for each tlock/page of transaction, update map.
2321	 *  ? are there tlock for pmap and pwmap at the same time ?
2322	 */
2323	for (lid = tblk->next; lid; lid = tlck->next) {
2324		tlck = lid_to_tlock(lid);
2325
2326		if ((tlck->flag & tlckUPDATEMAP) == 0)
2327			continue;
2328
2329		if (tlck->flag & tlckFREEPAGE) {
2330			/*
2331			 * Another thread may attempt to reuse freed space
2332			 * immediately, so we want to get rid of the metapage
2333			 * before anyone else has a chance to get it.
2334			 * Lock metapage, update maps, then invalidate
2335			 * the metapage.
2336			 */
2337			mp = tlck->mp;
2338			ASSERT(mp->xflag & COMMIT_PAGE);
2339			grab_metapage(mp);
2340		}
2341
2342		/*
2343		 * extent list:
2344		 * . in-line PXD list:
2345		 * . out-of-line XAD list:
2346		 */
2347		maplock = (struct maplock *) & tlck->lock;
2348		nlock = maplock->index;
2349
2350		for (k = 0; k < nlock; k++, maplock++) {
2351			/*
2352			 * allocate blocks in persistent map:
2353			 *
2354			 * blocks have been allocated from wmap at alloc time;
2355			 */
2356			if (maplock->flag & mlckALLOC) {
2357				txAllocPMap(ipimap, maplock, tblk);
2358			}
2359			/*
2360			 * free blocks in persistent and working map:
2361			 * blocks will be freed in pmap and then in wmap;
2362			 *
2363			 * ? tblock specifies the PMAP/PWMAP based upon
2364			 * transaction
2365			 *
2366			 * free blocks in persistent map:
2367			 * blocks will be freed from wmap at last reference
2368			 * release of the object for regular files;
2369			 *
2370			 * Alway free blocks from both persistent & working
2371			 * maps for directories
2372			 */
2373			else {	/* (maplock->flag & mlckFREE) */
2374
2375				if (tlck->flag & tlckDIRECTORY)
2376					txFreeMap(ipimap, maplock,
2377						  tblk, COMMIT_PWMAP);
2378				else
2379					txFreeMap(ipimap, maplock,
2380						  tblk, maptype);
2381			}
2382		}
2383		if (tlck->flag & tlckFREEPAGE) {
2384			if (!(tblk->flag & tblkGC_LAZY)) {
2385				/* This is equivalent to txRelease */
2386				ASSERT(mp->lid == lid);
2387				tlck->mp->lid = 0;
2388			}
2389			assert(mp->nohomeok == 1);
2390			metapage_homeok(mp);
2391			discard_metapage(mp);
2392			tlck->mp = NULL;
2393		}
2394	}
2395	/*
2396	 *	update inode allocation map
2397	 *
2398	 * update allocation state in pmap and
2399	 * update lsn of the pmap page;
2400	 * update in-memory inode flag/state
2401	 *
2402	 * unlock mapper/write lock
2403	 */
2404	if (tblk->xflag & COMMIT_CREATE) {
2405		diUpdatePMap(ipimap, tblk->ino, false, tblk);
2406		/* update persistent block allocation map
2407		 * for the allocation of inode extent;
2408		 */
2409		pxdlock.flag = mlckALLOCPXD;
2410		pxdlock.pxd = tblk->u.ixpxd;
2411		pxdlock.index = 1;
2412		txAllocPMap(ipimap, (struct maplock *) & pxdlock, tblk);
2413	} else if (tblk->xflag & COMMIT_DELETE) {
2414		ip = tblk->u.ip;
2415		diUpdatePMap(ipimap, ip->i_ino, true, tblk);
2416		iput(ip);
2417	}
2418}
2419
2420/*
2421 *	txAllocPMap()
2422 *
2423 * function: allocate from persistent map;
2424 *
2425 * parameter:
2426 *	ipbmap	-
2427 *	malock	-
2428 *		xad list:
2429 *		pxd:
2430 *
2431 *	maptype -
2432 *		allocate from persistent map;
2433 *		free from persistent map;
2434 *		(e.g., tmp file - free from working map at releae
2435 *		 of last reference);
2436 *		free from persistent and working map;
2437 *
2438 *	lsn	- log sequence number;
2439 */
2440static void txAllocPMap(struct inode *ip, struct maplock * maplock,
2441			struct tblock * tblk)
2442{
2443	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
2444	struct xdlistlock *xadlistlock;
2445	xad_t *xad;
2446	s64 xaddr;
2447	int xlen;
2448	struct pxd_lock *pxdlock;
2449	struct xdlistlock *pxdlistlock;
2450	pxd_t *pxd;
2451	int n;
2452
2453	/*
2454	 * allocate from persistent map;
2455	 */
2456	if (maplock->flag & mlckALLOCXADLIST) {
2457		xadlistlock = (struct xdlistlock *) maplock;
2458		xad = xadlistlock->xdlist;
2459		for (n = 0; n < xadlistlock->count; n++, xad++) {
2460			if (xad->flag & (XAD_NEW | XAD_EXTENDED)) {
2461				xaddr = addressXAD(xad);
2462				xlen = lengthXAD(xad);
2463				dbUpdatePMap(ipbmap, false, xaddr,
2464					     (s64) xlen, tblk);
2465				xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
2466				jfs_info("allocPMap: xaddr:0x%lx xlen:%d",
2467					 (ulong) xaddr, xlen);
2468			}
2469		}
2470	} else if (maplock->flag & mlckALLOCPXD) {
2471		pxdlock = (struct pxd_lock *) maplock;
2472		xaddr = addressPXD(&pxdlock->pxd);
2473		xlen = lengthPXD(&pxdlock->pxd);
2474		dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen, tblk);
2475		jfs_info("allocPMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen);
2476	} else {		/* (maplock->flag & mlckALLOCPXDLIST) */
2477
2478		pxdlistlock = (struct xdlistlock *) maplock;
2479		pxd = pxdlistlock->xdlist;
2480		for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2481			xaddr = addressPXD(pxd);
2482			xlen = lengthPXD(pxd);
2483			dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen,
2484				     tblk);
2485			jfs_info("allocPMap: xaddr:0x%lx xlen:%d",
2486				 (ulong) xaddr, xlen);
2487		}
2488	}
2489}
2490
2491/*
2492 *	txFreeMap()
2493 *
2494 * function:	free from persistent and/or working map;
2495 *
2496 * todo: optimization
2497 */
2498void txFreeMap(struct inode *ip,
2499	       struct maplock * maplock, struct tblock * tblk, int maptype)
2500{
2501	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
2502	struct xdlistlock *xadlistlock;
2503	xad_t *xad;
2504	s64 xaddr;
2505	int xlen;
2506	struct pxd_lock *pxdlock;
2507	struct xdlistlock *pxdlistlock;
2508	pxd_t *pxd;
2509	int n;
2510
2511	jfs_info("txFreeMap: tblk:0x%p maplock:0x%p maptype:0x%x",
2512		 tblk, maplock, maptype);
2513
2514	/*
2515	 * free from persistent map;
2516	 */
2517	if (maptype == COMMIT_PMAP || maptype == COMMIT_PWMAP) {
2518		if (maplock->flag & mlckFREEXADLIST) {
2519			xadlistlock = (struct xdlistlock *) maplock;
2520			xad = xadlistlock->xdlist;
2521			for (n = 0; n < xadlistlock->count; n++, xad++) {
2522				if (!(xad->flag & XAD_NEW)) {
2523					xaddr = addressXAD(xad);
2524					xlen = lengthXAD(xad);
2525					dbUpdatePMap(ipbmap, true, xaddr,
2526						     (s64) xlen, tblk);
2527					jfs_info("freePMap: xaddr:0x%lx "
2528						 "xlen:%d",
2529						 (ulong) xaddr, xlen);
2530				}
2531			}
2532		} else if (maplock->flag & mlckFREEPXD) {
2533			pxdlock = (struct pxd_lock *) maplock;
2534			xaddr = addressPXD(&pxdlock->pxd);
2535			xlen = lengthPXD(&pxdlock->pxd);
2536			dbUpdatePMap(ipbmap, true, xaddr, (s64) xlen,
2537				     tblk);
2538			jfs_info("freePMap: xaddr:0x%lx xlen:%d",
2539				 (ulong) xaddr, xlen);
2540		} else {	/* (maplock->flag & mlckALLOCPXDLIST) */
2541
2542			pxdlistlock = (struct xdlistlock *) maplock;
2543			pxd = pxdlistlock->xdlist;
2544			for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2545				xaddr = addressPXD(pxd);
2546				xlen = lengthPXD(pxd);
2547				dbUpdatePMap(ipbmap, true, xaddr,
2548					     (s64) xlen, tblk);
2549				jfs_info("freePMap: xaddr:0x%lx xlen:%d",
2550					 (ulong) xaddr, xlen);
2551			}
2552		}
2553	}
2554
2555	/*
2556	 * free from working map;
2557	 */
2558	if (maptype == COMMIT_PWMAP || maptype == COMMIT_WMAP) {
2559		if (maplock->flag & mlckFREEXADLIST) {
2560			xadlistlock = (struct xdlistlock *) maplock;
2561			xad = xadlistlock->xdlist;
2562			for (n = 0; n < xadlistlock->count; n++, xad++) {
2563				xaddr = addressXAD(xad);
2564				xlen = lengthXAD(xad);
2565				dbFree(ip, xaddr, (s64) xlen);
2566				xad->flag = 0;
2567				jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2568					 (ulong) xaddr, xlen);
2569			}
2570		} else if (maplock->flag & mlckFREEPXD) {
2571			pxdlock = (struct pxd_lock *) maplock;
2572			xaddr = addressPXD(&pxdlock->pxd);
2573			xlen = lengthPXD(&pxdlock->pxd);
2574			dbFree(ip, xaddr, (s64) xlen);
2575			jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2576				 (ulong) xaddr, xlen);
2577		} else {	/* (maplock->flag & mlckFREEPXDLIST) */
2578
2579			pxdlistlock = (struct xdlistlock *) maplock;
2580			pxd = pxdlistlock->xdlist;
2581			for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2582				xaddr = addressPXD(pxd);
2583				xlen = lengthPXD(pxd);
2584				dbFree(ip, xaddr, (s64) xlen);
2585				jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2586					 (ulong) xaddr, xlen);
2587			}
2588		}
2589	}
2590}
2591
2592/*
2593 *	txFreelock()
2594 *
2595 * function:	remove tlock from inode anonymous locklist
2596 */
2597void txFreelock(struct inode *ip)
2598{
2599	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
2600	struct tlock *xtlck, *tlck;
2601	lid_t xlid = 0, lid;
2602
2603	if (!jfs_ip->atlhead)
2604		return;
2605
2606	TXN_LOCK();
2607	xtlck = (struct tlock *) &jfs_ip->atlhead;
2608
2609	while ((lid = xtlck->next) != 0) {
2610		tlck = lid_to_tlock(lid);
2611		if (tlck->flag & tlckFREELOCK) {
2612			xtlck->next = tlck->next;
2613			txLockFree(lid);
2614		} else {
2615			xtlck = tlck;
2616			xlid = lid;
2617		}
2618	}
2619
2620	if (jfs_ip->atlhead)
2621		jfs_ip->atltail = xlid;
2622	else {
2623		jfs_ip->atltail = 0;
2624		/*
2625		 * If inode was on anon_list, remove it
2626		 */
2627		list_del_init(&jfs_ip->anon_inode_list);
2628	}
2629	TXN_UNLOCK();
2630}
2631
2632/*
2633 *	txAbort()
2634 *
2635 * function: abort tx before commit;
2636 *
2637 * frees line-locks and segment locks for all
2638 * segments in comdata structure.
2639 * Optionally sets state of file-system to FM_DIRTY in super-block.
2640 * log age of page-frames in memory for which caller has
2641 * are reset to 0 (to avoid logwarap).
2642 */
2643void txAbort(tid_t tid, int dirty)
2644{
2645	lid_t lid, next;
2646	struct metapage *mp;
2647	struct tblock *tblk = tid_to_tblock(tid);
2648	struct tlock *tlck;
2649
2650	/*
2651	 * free tlocks of the transaction
2652	 */
2653	for (lid = tblk->next; lid; lid = next) {
2654		tlck = lid_to_tlock(lid);
2655		next = tlck->next;
2656		mp = tlck->mp;
2657		JFS_IP(tlck->ip)->xtlid = 0;
2658
2659		if (mp) {
2660			mp->lid = 0;
2661
2662			/*
2663			 * reset lsn of page to avoid logwarap:
2664			 *
2665			 * (page may have been previously committed by another
2666			 * transaction(s) but has not been paged, i.e.,
2667			 * it may be on logsync list even though it has not
2668			 * been logged for the current tx.)
2669			 */
2670			if (mp->xflag & COMMIT_PAGE && mp->lsn)
2671				LogSyncRelease(mp);
2672		}
2673		/* insert tlock at head of freelist */
2674		TXN_LOCK();
2675		txLockFree(lid);
2676		TXN_UNLOCK();
2677	}
2678
2679	/* caller will free the transaction block */
2680
2681	tblk->next = tblk->last = 0;
2682
2683	/*
2684	 * mark filesystem dirty
2685	 */
2686	if (dirty)
2687		jfs_error(tblk->sb, "txAbort");
2688
2689	return;
2690}
2691
2692/*
2693 *	txLazyCommit(void)
2694 *
2695 *	All transactions except those changing ipimap (COMMIT_FORCE) are
2696 *	processed by this routine.  This insures that the inode and block
2697 *	allocation maps are updated in order.  For synchronous transactions,
2698 *	let the user thread finish processing after txUpdateMap() is called.
2699 */
2700static void txLazyCommit(struct tblock * tblk)
2701{
2702	struct jfs_log *log;
2703
2704	while (((tblk->flag & tblkGC_READY) == 0) &&
2705	       ((tblk->flag & tblkGC_UNLOCKED) == 0)) {
2706		/* We must have gotten ahead of the user thread
2707		 */
2708		jfs_info("jfs_lazycommit: tblk 0x%p not unlocked", tblk);
2709		yield();
2710	}
2711
2712	jfs_info("txLazyCommit: processing tblk 0x%p", tblk);
2713
2714	txUpdateMap(tblk);
2715
2716	log = (struct jfs_log *) JFS_SBI(tblk->sb)->log;
2717
2718	spin_lock_irq(&log->gclock);	// LOGGC_LOCK
2719
2720	tblk->flag |= tblkGC_COMMITTED;
2721
2722	if (tblk->flag & tblkGC_READY)
2723		log->gcrtc--;
2724
2725	wake_up_all(&tblk->gcwait);	// LOGGC_WAKEUP
2726
2727	/*
2728	 * Can't release log->gclock until we've tested tblk->flag
2729	 */
2730	if (tblk->flag & tblkGC_LAZY) {
2731		spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
2732		txUnlock(tblk);
2733		tblk->flag &= ~tblkGC_LAZY;
2734		txEnd(tblk - TxBlock);	/* Convert back to tid */
2735	} else
2736		spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
2737
2738	jfs_info("txLazyCommit: done: tblk = 0x%p", tblk);
2739}
2740
2741/*
2742 *	jfs_lazycommit(void)
2743 *
2744 *	To be run as a kernel daemon.  If lbmIODone is called in an interrupt
2745 *	context, or where blocking is not wanted, this routine will process
2746 *	committed transactions from the unlock queue.
2747 */
2748int jfs_lazycommit(void *arg)
2749{
2750	int WorkDone;
2751	struct tblock *tblk;
2752	unsigned long flags;
2753	struct jfs_sb_info *sbi;
2754
2755	do {
2756		LAZY_LOCK(flags);
2757		jfs_commit_thread_waking = 0;	/* OK to wake another thread */
2758		while (!list_empty(&TxAnchor.unlock_queue)) {
2759			WorkDone = 0;
2760			list_for_each_entry(tblk, &TxAnchor.unlock_queue,
2761					    cqueue) {
2762
2763				sbi = JFS_SBI(tblk->sb);
2764				/*
2765				 * For each volume, the transactions must be
2766				 * handled in order.  If another commit thread
2767				 * is handling a tblk for this superblock,
2768				 * skip it
2769				 */
2770				if (sbi->commit_state & IN_LAZYCOMMIT)
2771					continue;
2772
2773				sbi->commit_state |= IN_LAZYCOMMIT;
2774				WorkDone = 1;
2775
2776				/*
2777				 * Remove transaction from queue
2778				 */
2779				list_del(&tblk->cqueue);
2780
2781				LAZY_UNLOCK(flags);
2782				txLazyCommit(tblk);
2783				LAZY_LOCK(flags);
2784
2785				sbi->commit_state &= ~IN_LAZYCOMMIT;
2786				/*
2787				 * Don't continue in the for loop.  (We can't
2788				 * anyway, it's unsafe!)  We want to go back to
2789				 * the beginning of the list.
2790				 */
2791				break;
2792			}
2793
2794			/* If there was nothing to do, don't continue */
2795			if (!WorkDone)
2796				break;
2797		}
2798		/* In case a wakeup came while all threads were active */
2799		jfs_commit_thread_waking = 0;
2800
2801		if (freezing(current)) {
2802			LAZY_UNLOCK(flags);
2803			refrigerator();
2804		} else {
2805			DECLARE_WAITQUEUE(wq, current);
2806
2807			add_wait_queue(&jfs_commit_thread_wait, &wq);
2808			set_current_state(TASK_INTERRUPTIBLE);
2809			LAZY_UNLOCK(flags);
2810			schedule();
2811			__set_current_state(TASK_RUNNING);
2812			remove_wait_queue(&jfs_commit_thread_wait, &wq);
2813		}
2814	} while (!kthread_should_stop());
2815
2816	if (!list_empty(&TxAnchor.unlock_queue))
2817		jfs_err("jfs_lazycommit being killed w/pending transactions!");
2818	else
2819		jfs_info("jfs_lazycommit being killed\n");
2820	return 0;
2821}
2822
2823void txLazyUnlock(struct tblock * tblk)
2824{
2825	unsigned long flags;
2826
2827	LAZY_LOCK(flags);
2828
2829	list_add_tail(&tblk->cqueue, &TxAnchor.unlock_queue);
2830	/*
2831	 * Don't wake up a commit thread if there is already one servicing
2832	 * this superblock, or if the last one we woke up hasn't started yet.
2833	 */
2834	if (!(JFS_SBI(tblk->sb)->commit_state & IN_LAZYCOMMIT) &&
2835	    !jfs_commit_thread_waking) {
2836		jfs_commit_thread_waking = 1;
2837		wake_up(&jfs_commit_thread_wait);
2838	}
2839	LAZY_UNLOCK(flags);
2840}
2841
2842static void LogSyncRelease(struct metapage * mp)
2843{
2844	struct jfs_log *log = mp->log;
2845
2846	assert(mp->nohomeok);
2847	assert(log);
2848	metapage_homeok(mp);
2849}
2850
2851/*
2852 *	txQuiesce
2853 *
2854 *	Block all new transactions and push anonymous transactions to
2855 *	completion
2856 *
2857 *	This does almost the same thing as jfs_sync below.  We don't
2858 *	worry about deadlocking when jfs_tlocks_low is set, since we would
2859 *	expect jfs_sync to get us out of that jam.
2860 */
2861void txQuiesce(struct super_block *sb)
2862{
2863	struct inode *ip;
2864	struct jfs_inode_info *jfs_ip;
2865	struct jfs_log *log = JFS_SBI(sb)->log;
2866	tid_t tid;
2867
2868	set_bit(log_QUIESCE, &log->flag);
2869
2870	TXN_LOCK();
2871restart:
2872	while (!list_empty(&TxAnchor.anon_list)) {
2873		jfs_ip = list_entry(TxAnchor.anon_list.next,
2874				    struct jfs_inode_info,
2875				    anon_inode_list);
2876		ip = &jfs_ip->vfs_inode;
2877
2878		/*
2879		 * inode will be removed from anonymous list
2880		 * when it is committed
2881		 */
2882		TXN_UNLOCK();
2883		tid = txBegin(ip->i_sb, COMMIT_INODE | COMMIT_FORCE);
2884		mutex_lock(&jfs_ip->commit_mutex);
2885		txCommit(tid, 1, &ip, 0);
2886		txEnd(tid);
2887		mutex_unlock(&jfs_ip->commit_mutex);
2888		/*
2889		 * Just to be safe.  I don't know how
2890		 * long we can run without blocking
2891		 */
2892		cond_resched();
2893		TXN_LOCK();
2894	}
2895
2896	/*
2897	 * If jfs_sync is running in parallel, there could be some inodes
2898	 * on anon_list2.  Let's check.
2899	 */
2900	if (!list_empty(&TxAnchor.anon_list2)) {
2901		list_splice(&TxAnchor.anon_list2, &TxAnchor.anon_list);
2902		INIT_LIST_HEAD(&TxAnchor.anon_list2);
2903		goto restart;
2904	}
2905	TXN_UNLOCK();
2906
2907	/*
2908	 * We may need to kick off the group commit
2909	 */
2910	jfs_flush_journal(log, 0);
2911}
2912
2913/*
2914 * txResume()
2915 *
2916 * Allows transactions to start again following txQuiesce
2917 */
2918void txResume(struct super_block *sb)
2919{
2920	struct jfs_log *log = JFS_SBI(sb)->log;
2921
2922	clear_bit(log_QUIESCE, &log->flag);
2923	TXN_WAKEUP(&log->syncwait);
2924}
2925
2926/*
2927 *	jfs_sync(void)
2928 *
2929 *	To be run as a kernel daemon.  This is awakened when tlocks run low.
2930 *	We write any inodes that have anonymous tlocks so they will become
2931 *	available.
2932 */
2933int jfs_sync(void *arg)
2934{
2935	struct inode *ip;
2936	struct jfs_inode_info *jfs_ip;
2937	tid_t tid;
2938
2939	do {
2940		/*
2941		 * write each inode on the anonymous inode list
2942		 */
2943		TXN_LOCK();
2944		while (jfs_tlocks_low && !list_empty(&TxAnchor.anon_list)) {
2945			jfs_ip = list_entry(TxAnchor.anon_list.next,
2946					    struct jfs_inode_info,
2947					    anon_inode_list);
2948			ip = &jfs_ip->vfs_inode;
2949
2950			if (! igrab(ip)) {
2951				/*
2952				 * Inode is being freed
2953				 */
2954				list_del_init(&jfs_ip->anon_inode_list);
2955			} else if (mutex_trylock(&jfs_ip->commit_mutex)) {
2956				/*
2957				 * inode will be removed from anonymous list
2958				 * when it is committed
2959				 */
2960				TXN_UNLOCK();
2961				tid = txBegin(ip->i_sb, COMMIT_INODE);
2962				txCommit(tid, 1, &ip, 0);
2963				txEnd(tid);
2964				mutex_unlock(&jfs_ip->commit_mutex);
2965
2966				iput(ip);
2967				/*
2968				 * Just to be safe.  I don't know how
2969				 * long we can run without blocking
2970				 */
2971				cond_resched();
2972				TXN_LOCK();
2973			} else {
2974				/* We can't get the commit mutex.  It may
2975				 * be held by a thread waiting for tlock's
2976				 * so let's not block here.  Save it to
2977				 * put back on the anon_list.
2978				 */
2979
2980				/* Take off anon_list */
2981				list_del(&jfs_ip->anon_inode_list);
2982
2983				/* Put on anon_list2 */
2984				list_add(&jfs_ip->anon_inode_list,
2985					 &TxAnchor.anon_list2);
2986
2987				TXN_UNLOCK();
2988				iput(ip);
2989				TXN_LOCK();
2990			}
2991		}
2992		/* Add anon_list2 back to anon_list */
2993		list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list);
2994
2995		if (freezing(current)) {
2996			TXN_UNLOCK();
2997			refrigerator();
2998		} else {
2999			set_current_state(TASK_INTERRUPTIBLE);
3000			TXN_UNLOCK();
3001			schedule();
3002			__set_current_state(TASK_RUNNING);
3003		}
3004	} while (!kthread_should_stop());
3005
3006	jfs_info("jfs_sync being killed");
3007	return 0;
3008}
3009
3010#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG)
3011static int jfs_txanchor_proc_show(struct seq_file *m, void *v)
3012{
3013	char *freewait;
3014	char *freelockwait;
3015	char *lowlockwait;
3016
3017	freewait =
3018	    waitqueue_active(&TxAnchor.freewait) ? "active" : "empty";
3019	freelockwait =
3020	    waitqueue_active(&TxAnchor.freelockwait) ? "active" : "empty";
3021	lowlockwait =
3022	    waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty";
3023
3024	seq_printf(m,
3025		       "JFS TxAnchor\n"
3026		       "============\n"
3027		       "freetid = %d\n"
3028		       "freewait = %s\n"
3029		       "freelock = %d\n"
3030		       "freelockwait = %s\n"
3031		       "lowlockwait = %s\n"
3032		       "tlocksInUse = %d\n"
3033		       "jfs_tlocks_low = %d\n"
3034		       "unlock_queue is %sempty\n",
3035		       TxAnchor.freetid,
3036		       freewait,
3037		       TxAnchor.freelock,
3038		       freelockwait,
3039		       lowlockwait,
3040		       TxAnchor.tlocksInUse,
3041		       jfs_tlocks_low,
3042		       list_empty(&TxAnchor.unlock_queue) ? "" : "not ");
3043	return 0;
3044}
3045
3046static int jfs_txanchor_proc_open(struct inode *inode, struct file *file)
3047{
3048	return single_open(file, jfs_txanchor_proc_show, NULL);
3049}
3050
3051const struct file_operations jfs_txanchor_proc_fops = {
3052	.owner		= THIS_MODULE,
3053	.open		= jfs_txanchor_proc_open,
3054	.read		= seq_read,
3055	.llseek		= seq_lseek,
3056	.release	= single_release,
3057};
3058#endif
3059
3060#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS)
3061static int jfs_txstats_proc_show(struct seq_file *m, void *v)
3062{
3063	seq_printf(m,
3064		       "JFS TxStats\n"
3065		       "===========\n"
3066		       "calls to txBegin = %d\n"
3067		       "txBegin blocked by sync barrier = %d\n"
3068		       "txBegin blocked by tlocks low = %d\n"
3069		       "txBegin blocked by no free tid = %d\n"
3070		       "calls to txBeginAnon = %d\n"
3071		       "txBeginAnon blocked by sync barrier = %d\n"
3072		       "txBeginAnon blocked by tlocks low = %d\n"
3073		       "calls to txLockAlloc = %d\n"
3074		       "tLockAlloc blocked by no free lock = %d\n",
3075		       TxStat.txBegin,
3076		       TxStat.txBegin_barrier,
3077		       TxStat.txBegin_lockslow,
3078		       TxStat.txBegin_freetid,
3079		       TxStat.txBeginAnon,
3080		       TxStat.txBeginAnon_barrier,
3081		       TxStat.txBeginAnon_lockslow,
3082		       TxStat.txLockAlloc,
3083		       TxStat.txLockAlloc_freelock);
3084	return 0;
3085}
3086
3087static int jfs_txstats_proc_open(struct inode *inode, struct file *file)
3088{
3089	return single_open(file, jfs_txstats_proc_show, NULL);
3090}
3091
3092const struct file_operations jfs_txstats_proc_fops = {
3093	.owner		= THIS_MODULE,
3094	.open		= jfs_txstats_proc_open,
3095	.read		= seq_read,
3096	.llseek		= seq_lseek,
3097	.release	= single_release,
3098};
3099#endif