Linux Audio

Check our new training course

Loading...
v5.4
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *   Copyright (C) International Business Machines Corp., 2000-2005
   4 *   Portions Copyright (C) Christoph Hellwig, 2001-2002
 
 
 
 
 
 
 
 
 
 
 
 
 
 
   5 */
   6
   7/*
   8 *	jfs_txnmgr.c: transaction manager
   9 *
  10 * notes:
  11 * transaction starts with txBegin() and ends with txCommit()
  12 * or txAbort().
  13 *
  14 * tlock is acquired at the time of update;
  15 * (obviate scan at commit time for xtree and dtree)
  16 * tlock and mp points to each other;
  17 * (no hashlist for mp -> tlock).
  18 *
  19 * special cases:
  20 * tlock on in-memory inode:
  21 * in-place tlock in the in-memory inode itself;
  22 * converted to page lock by iWrite() at commit time.
  23 *
  24 * tlock during write()/mmap() under anonymous transaction (tid = 0):
  25 * transferred (?) to transaction at commit time.
  26 *
  27 * use the page itself to update allocation maps
  28 * (obviate intermediate replication of allocation/deallocation data)
  29 * hold on to mp+lock thru update of maps
  30 */
  31
  32#include <linux/fs.h>
  33#include <linux/vmalloc.h>
  34#include <linux/completion.h>
  35#include <linux/freezer.h>
  36#include <linux/module.h>
  37#include <linux/moduleparam.h>
  38#include <linux/kthread.h>
  39#include <linux/seq_file.h>
  40#include "jfs_incore.h"
  41#include "jfs_inode.h"
  42#include "jfs_filsys.h"
  43#include "jfs_metapage.h"
  44#include "jfs_dinode.h"
  45#include "jfs_imap.h"
  46#include "jfs_dmap.h"
  47#include "jfs_superblock.h"
  48#include "jfs_debug.h"
  49
  50/*
  51 *	transaction management structures
  52 */
  53static struct {
  54	int freetid;		/* index of a free tid structure */
  55	int freelock;		/* index first free lock word */
  56	wait_queue_head_t freewait;	/* eventlist of free tblock */
  57	wait_queue_head_t freelockwait;	/* eventlist of free tlock */
  58	wait_queue_head_t lowlockwait;	/* eventlist of ample tlocks */
  59	int tlocksInUse;	/* Number of tlocks in use */
  60	spinlock_t LazyLock;	/* synchronize sync_queue & unlock_queue */
  61/*	struct tblock *sync_queue; * Transactions waiting for data sync */
  62	struct list_head unlock_queue;	/* Txns waiting to be released */
  63	struct list_head anon_list;	/* inodes having anonymous txns */
  64	struct list_head anon_list2;	/* inodes having anonymous txns
  65					   that couldn't be sync'ed */
  66} TxAnchor;
  67
  68int jfs_tlocks_low;		/* Indicates low number of available tlocks */
  69
  70#ifdef CONFIG_JFS_STATISTICS
  71static struct {
  72	uint txBegin;
  73	uint txBegin_barrier;
  74	uint txBegin_lockslow;
  75	uint txBegin_freetid;
  76	uint txBeginAnon;
  77	uint txBeginAnon_barrier;
  78	uint txBeginAnon_lockslow;
  79	uint txLockAlloc;
  80	uint txLockAlloc_freelock;
  81} TxStat;
  82#endif
  83
  84static int nTxBlock = -1;	/* number of transaction blocks */
  85module_param(nTxBlock, int, 0);
  86MODULE_PARM_DESC(nTxBlock,
  87		 "Number of transaction blocks (max:65536)");
  88
  89static int nTxLock = -1;	/* number of transaction locks */
  90module_param(nTxLock, int, 0);
  91MODULE_PARM_DESC(nTxLock,
  92		 "Number of transaction locks (max:65536)");
  93
  94struct tblock *TxBlock;	/* transaction block table */
  95static int TxLockLWM;	/* Low water mark for number of txLocks used */
  96static int TxLockHWM;	/* High water mark for number of txLocks used */
  97static int TxLockVHWM;	/* Very High water mark */
  98struct tlock *TxLock;	/* transaction lock table */
  99
 100/*
 101 *	transaction management lock
 102 */
 103static DEFINE_SPINLOCK(jfsTxnLock);
 104
 105#define TXN_LOCK()		spin_lock(&jfsTxnLock)
 106#define TXN_UNLOCK()		spin_unlock(&jfsTxnLock)
 107
 108#define LAZY_LOCK_INIT()	spin_lock_init(&TxAnchor.LazyLock);
 109#define LAZY_LOCK(flags)	spin_lock_irqsave(&TxAnchor.LazyLock, flags)
 110#define LAZY_UNLOCK(flags) spin_unlock_irqrestore(&TxAnchor.LazyLock, flags)
 111
 112static DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait);
 113static int jfs_commit_thread_waking;
 114
 115/*
 116 * Retry logic exist outside these macros to protect from spurrious wakeups.
 117 */
 118static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event)
 119{
 120	DECLARE_WAITQUEUE(wait, current);
 121
 122	add_wait_queue(event, &wait);
 123	set_current_state(TASK_UNINTERRUPTIBLE);
 124	TXN_UNLOCK();
 125	io_schedule();
 
 126	remove_wait_queue(event, &wait);
 127}
 128
 129#define TXN_SLEEP(event)\
 130{\
 131	TXN_SLEEP_DROP_LOCK(event);\
 132	TXN_LOCK();\
 133}
 134
 135#define TXN_WAKEUP(event) wake_up_all(event)
 136
 137/*
 138 *	statistics
 139 */
 140static struct {
 141	tid_t maxtid;		/* 4: biggest tid ever used */
 142	lid_t maxlid;		/* 4: biggest lid ever used */
 143	int ntid;		/* 4: # of transactions performed */
 144	int nlid;		/* 4: # of tlocks acquired */
 145	int waitlock;		/* 4: # of tlock wait */
 146} stattx;
 147
 148/*
 149 * forward references
 150 */
 151static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 152		struct tlock * tlck, struct commit * cd);
 153static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 154		struct tlock * tlck);
 155static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 156		struct tlock * tlck);
 157static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 158		struct tlock * tlck);
 159static void txAllocPMap(struct inode *ip, struct maplock * maplock,
 160		struct tblock * tblk);
 161static void txForce(struct tblock * tblk);
 162static int txLog(struct jfs_log * log, struct tblock * tblk,
 163		struct commit * cd);
 164static void txUpdateMap(struct tblock * tblk);
 165static void txRelease(struct tblock * tblk);
 166static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 167	   struct tlock * tlck);
 168static void LogSyncRelease(struct metapage * mp);
 169
 170/*
 171 *		transaction block/lock management
 172 *		---------------------------------
 173 */
 174
 175/*
 176 * Get a transaction lock from the free list.  If the number in use is
 177 * greater than the high water mark, wake up the sync daemon.  This should
 178 * free some anonymous transaction locks.  (TXN_LOCK must be held.)
 179 */
 180static lid_t txLockAlloc(void)
 181{
 182	lid_t lid;
 183
 184	INCREMENT(TxStat.txLockAlloc);
 185	if (!TxAnchor.freelock) {
 186		INCREMENT(TxStat.txLockAlloc_freelock);
 187	}
 188
 189	while (!(lid = TxAnchor.freelock))
 190		TXN_SLEEP(&TxAnchor.freelockwait);
 191	TxAnchor.freelock = TxLock[lid].next;
 192	HIGHWATERMARK(stattx.maxlid, lid);
 193	if ((++TxAnchor.tlocksInUse > TxLockHWM) && (jfs_tlocks_low == 0)) {
 194		jfs_info("txLockAlloc tlocks low");
 195		jfs_tlocks_low = 1;
 196		wake_up_process(jfsSyncThread);
 197	}
 198
 199	return lid;
 200}
 201
 202static void txLockFree(lid_t lid)
 203{
 204	TxLock[lid].tid = 0;
 205	TxLock[lid].next = TxAnchor.freelock;
 206	TxAnchor.freelock = lid;
 207	TxAnchor.tlocksInUse--;
 208	if (jfs_tlocks_low && (TxAnchor.tlocksInUse < TxLockLWM)) {
 209		jfs_info("txLockFree jfs_tlocks_low no more");
 210		jfs_tlocks_low = 0;
 211		TXN_WAKEUP(&TxAnchor.lowlockwait);
 212	}
 213	TXN_WAKEUP(&TxAnchor.freelockwait);
 214}
 215
 216/*
 217 * NAME:	txInit()
 218 *
 219 * FUNCTION:	initialize transaction management structures
 220 *
 221 * RETURN:
 222 *
 223 * serialization: single thread at jfs_init()
 224 */
 225int txInit(void)
 226{
 227	int k, size;
 228	struct sysinfo si;
 229
 230	/* Set defaults for nTxLock and nTxBlock if unset */
 231
 232	if (nTxLock == -1) {
 233		if (nTxBlock == -1) {
 234			/* Base default on memory size */
 235			si_meminfo(&si);
 236			if (si.totalram > (256 * 1024)) /* 1 GB */
 237				nTxLock = 64 * 1024;
 238			else
 239				nTxLock = si.totalram >> 2;
 240		} else if (nTxBlock > (8 * 1024))
 241			nTxLock = 64 * 1024;
 242		else
 243			nTxLock = nTxBlock << 3;
 244	}
 245	if (nTxBlock == -1)
 246		nTxBlock = nTxLock >> 3;
 247
 248	/* Verify tunable parameters */
 249	if (nTxBlock < 16)
 250		nTxBlock = 16;	/* No one should set it this low */
 251	if (nTxBlock > 65536)
 252		nTxBlock = 65536;
 253	if (nTxLock < 256)
 254		nTxLock = 256;	/* No one should set it this low */
 255	if (nTxLock > 65536)
 256		nTxLock = 65536;
 257
 258	printk(KERN_INFO "JFS: nTxBlock = %d, nTxLock = %d\n",
 259	       nTxBlock, nTxLock);
 260	/*
 261	 * initialize transaction block (tblock) table
 262	 *
 263	 * transaction id (tid) = tblock index
 264	 * tid = 0 is reserved.
 265	 */
 266	TxLockLWM = (nTxLock * 4) / 10;
 267	TxLockHWM = (nTxLock * 7) / 10;
 268	TxLockVHWM = (nTxLock * 8) / 10;
 269
 270	size = sizeof(struct tblock) * nTxBlock;
 271	TxBlock = vmalloc(size);
 272	if (TxBlock == NULL)
 273		return -ENOMEM;
 274
 275	for (k = 1; k < nTxBlock - 1; k++) {
 276		TxBlock[k].next = k + 1;
 277		init_waitqueue_head(&TxBlock[k].gcwait);
 278		init_waitqueue_head(&TxBlock[k].waitor);
 279	}
 280	TxBlock[k].next = 0;
 281	init_waitqueue_head(&TxBlock[k].gcwait);
 282	init_waitqueue_head(&TxBlock[k].waitor);
 283
 284	TxAnchor.freetid = 1;
 285	init_waitqueue_head(&TxAnchor.freewait);
 286
 287	stattx.maxtid = 1;	/* statistics */
 288
 289	/*
 290	 * initialize transaction lock (tlock) table
 291	 *
 292	 * transaction lock id = tlock index
 293	 * tlock id = 0 is reserved.
 294	 */
 295	size = sizeof(struct tlock) * nTxLock;
 296	TxLock = vmalloc(size);
 297	if (TxLock == NULL) {
 298		vfree(TxBlock);
 299		return -ENOMEM;
 300	}
 301
 302	/* initialize tlock table */
 303	for (k = 1; k < nTxLock - 1; k++)
 304		TxLock[k].next = k + 1;
 305	TxLock[k].next = 0;
 306	init_waitqueue_head(&TxAnchor.freelockwait);
 307	init_waitqueue_head(&TxAnchor.lowlockwait);
 308
 309	TxAnchor.freelock = 1;
 310	TxAnchor.tlocksInUse = 0;
 311	INIT_LIST_HEAD(&TxAnchor.anon_list);
 312	INIT_LIST_HEAD(&TxAnchor.anon_list2);
 313
 314	LAZY_LOCK_INIT();
 315	INIT_LIST_HEAD(&TxAnchor.unlock_queue);
 316
 317	stattx.maxlid = 1;	/* statistics */
 318
 319	return 0;
 320}
 321
 322/*
 323 * NAME:	txExit()
 324 *
 325 * FUNCTION:	clean up when module is unloaded
 326 */
 327void txExit(void)
 328{
 329	vfree(TxLock);
 330	TxLock = NULL;
 331	vfree(TxBlock);
 332	TxBlock = NULL;
 333}
 334
 335/*
 336 * NAME:	txBegin()
 337 *
 338 * FUNCTION:	start a transaction.
 339 *
 340 * PARAMETER:	sb	- superblock
 341 *		flag	- force for nested tx;
 342 *
 343 * RETURN:	tid	- transaction id
 344 *
 345 * note: flag force allows to start tx for nested tx
 346 * to prevent deadlock on logsync barrier;
 347 */
 348tid_t txBegin(struct super_block *sb, int flag)
 349{
 350	tid_t t;
 351	struct tblock *tblk;
 352	struct jfs_log *log;
 353
 354	jfs_info("txBegin: flag = 0x%x", flag);
 355	log = JFS_SBI(sb)->log;
 356
 357	TXN_LOCK();
 358
 359	INCREMENT(TxStat.txBegin);
 360
 361      retry:
 362	if (!(flag & COMMIT_FORCE)) {
 363		/*
 364		 * synchronize with logsync barrier
 365		 */
 366		if (test_bit(log_SYNCBARRIER, &log->flag) ||
 367		    test_bit(log_QUIESCE, &log->flag)) {
 368			INCREMENT(TxStat.txBegin_barrier);
 369			TXN_SLEEP(&log->syncwait);
 370			goto retry;
 371		}
 372	}
 373	if (flag == 0) {
 374		/*
 375		 * Don't begin transaction if we're getting starved for tlocks
 376		 * unless COMMIT_FORCE or COMMIT_INODE (which may ultimately
 377		 * free tlocks)
 378		 */
 379		if (TxAnchor.tlocksInUse > TxLockVHWM) {
 380			INCREMENT(TxStat.txBegin_lockslow);
 381			TXN_SLEEP(&TxAnchor.lowlockwait);
 382			goto retry;
 383		}
 384	}
 385
 386	/*
 387	 * allocate transaction id/block
 388	 */
 389	if ((t = TxAnchor.freetid) == 0) {
 390		jfs_info("txBegin: waiting for free tid");
 391		INCREMENT(TxStat.txBegin_freetid);
 392		TXN_SLEEP(&TxAnchor.freewait);
 393		goto retry;
 394	}
 395
 396	tblk = tid_to_tblock(t);
 397
 398	if ((tblk->next == 0) && !(flag & COMMIT_FORCE)) {
 399		/* Don't let a non-forced transaction take the last tblk */
 400		jfs_info("txBegin: waiting for free tid");
 401		INCREMENT(TxStat.txBegin_freetid);
 402		TXN_SLEEP(&TxAnchor.freewait);
 403		goto retry;
 404	}
 405
 406	TxAnchor.freetid = tblk->next;
 407
 408	/*
 409	 * initialize transaction
 410	 */
 411
 412	/*
 413	 * We can't zero the whole thing or we screw up another thread being
 414	 * awakened after sleeping on tblk->waitor
 415	 *
 416	 * memset(tblk, 0, sizeof(struct tblock));
 417	 */
 418	tblk->next = tblk->last = tblk->xflag = tblk->flag = tblk->lsn = 0;
 419
 420	tblk->sb = sb;
 421	++log->logtid;
 422	tblk->logtid = log->logtid;
 423
 424	++log->active;
 425
 426	HIGHWATERMARK(stattx.maxtid, t);	/* statistics */
 427	INCREMENT(stattx.ntid);	/* statistics */
 428
 429	TXN_UNLOCK();
 430
 431	jfs_info("txBegin: returning tid = %d", t);
 432
 433	return t;
 434}
 435
 436/*
 437 * NAME:	txBeginAnon()
 438 *
 439 * FUNCTION:	start an anonymous transaction.
 440 *		Blocks if logsync or available tlocks are low to prevent
 441 *		anonymous tlocks from depleting supply.
 442 *
 443 * PARAMETER:	sb	- superblock
 444 *
 445 * RETURN:	none
 446 */
 447void txBeginAnon(struct super_block *sb)
 448{
 449	struct jfs_log *log;
 450
 451	log = JFS_SBI(sb)->log;
 452
 453	TXN_LOCK();
 454	INCREMENT(TxStat.txBeginAnon);
 455
 456      retry:
 457	/*
 458	 * synchronize with logsync barrier
 459	 */
 460	if (test_bit(log_SYNCBARRIER, &log->flag) ||
 461	    test_bit(log_QUIESCE, &log->flag)) {
 462		INCREMENT(TxStat.txBeginAnon_barrier);
 463		TXN_SLEEP(&log->syncwait);
 464		goto retry;
 465	}
 466
 467	/*
 468	 * Don't begin transaction if we're getting starved for tlocks
 469	 */
 470	if (TxAnchor.tlocksInUse > TxLockVHWM) {
 471		INCREMENT(TxStat.txBeginAnon_lockslow);
 472		TXN_SLEEP(&TxAnchor.lowlockwait);
 473		goto retry;
 474	}
 475	TXN_UNLOCK();
 476}
 477
 478/*
 479 *	txEnd()
 480 *
 481 * function: free specified transaction block.
 482 *
 483 *	logsync barrier processing:
 484 *
 485 * serialization:
 486 */
 487void txEnd(tid_t tid)
 488{
 489	struct tblock *tblk = tid_to_tblock(tid);
 490	struct jfs_log *log;
 491
 492	jfs_info("txEnd: tid = %d", tid);
 493	TXN_LOCK();
 494
 495	/*
 496	 * wakeup transactions waiting on the page locked
 497	 * by the current transaction
 498	 */
 499	TXN_WAKEUP(&tblk->waitor);
 500
 501	log = JFS_SBI(tblk->sb)->log;
 502
 503	/*
 504	 * Lazy commit thread can't free this guy until we mark it UNLOCKED,
 505	 * otherwise, we would be left with a transaction that may have been
 506	 * reused.
 507	 *
 508	 * Lazy commit thread will turn off tblkGC_LAZY before calling this
 509	 * routine.
 510	 */
 511	if (tblk->flag & tblkGC_LAZY) {
 512		jfs_info("txEnd called w/lazy tid: %d, tblk = 0x%p", tid, tblk);
 513		TXN_UNLOCK();
 514
 515		spin_lock_irq(&log->gclock);	// LOGGC_LOCK
 516		tblk->flag |= tblkGC_UNLOCKED;
 517		spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
 518		return;
 519	}
 520
 521	jfs_info("txEnd: tid: %d, tblk = 0x%p", tid, tblk);
 522
 523	assert(tblk->next == 0);
 524
 525	/*
 526	 * insert tblock back on freelist
 527	 */
 528	tblk->next = TxAnchor.freetid;
 529	TxAnchor.freetid = tid;
 530
 531	/*
 532	 * mark the tblock not active
 533	 */
 534	if (--log->active == 0) {
 535		clear_bit(log_FLUSH, &log->flag);
 536
 537		/*
 538		 * synchronize with logsync barrier
 539		 */
 540		if (test_bit(log_SYNCBARRIER, &log->flag)) {
 541			TXN_UNLOCK();
 542
 543			/* write dirty metadata & forward log syncpt */
 544			jfs_syncpt(log, 1);
 545
 546			jfs_info("log barrier off: 0x%x", log->lsn);
 547
 548			/* enable new transactions start */
 549			clear_bit(log_SYNCBARRIER, &log->flag);
 550
 551			/* wakeup all waitors for logsync barrier */
 552			TXN_WAKEUP(&log->syncwait);
 553
 554			goto wakeup;
 555		}
 556	}
 557
 558	TXN_UNLOCK();
 559wakeup:
 560	/*
 561	 * wakeup all waitors for a free tblock
 562	 */
 563	TXN_WAKEUP(&TxAnchor.freewait);
 564}
 565
 566/*
 567 *	txLock()
 568 *
 569 * function: acquire a transaction lock on the specified <mp>
 570 *
 571 * parameter:
 572 *
 573 * return:	transaction lock id
 574 *
 575 * serialization:
 576 */
 577struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
 578		     int type)
 579{
 580	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 581	int dir_xtree = 0;
 582	lid_t lid;
 583	tid_t xtid;
 584	struct tlock *tlck;
 585	struct xtlock *xtlck;
 586	struct linelock *linelock;
 587	xtpage_t *p;
 588	struct tblock *tblk;
 589
 590	TXN_LOCK();
 591
 592	if (S_ISDIR(ip->i_mode) && (type & tlckXTREE) &&
 593	    !(mp->xflag & COMMIT_PAGE)) {
 594		/*
 595		 * Directory inode is special.  It can have both an xtree tlock
 596		 * and a dtree tlock associated with it.
 597		 */
 598		dir_xtree = 1;
 599		lid = jfs_ip->xtlid;
 600	} else
 601		lid = mp->lid;
 602
 603	/* is page not locked by a transaction ? */
 604	if (lid == 0)
 605		goto allocateLock;
 606
 607	jfs_info("txLock: tid:%d ip:0x%p mp:0x%p lid:%d", tid, ip, mp, lid);
 608
 609	/* is page locked by the requester transaction ? */
 610	tlck = lid_to_tlock(lid);
 611	if ((xtid = tlck->tid) == tid) {
 612		TXN_UNLOCK();
 613		goto grantLock;
 614	}
 615
 616	/*
 617	 * is page locked by anonymous transaction/lock ?
 618	 *
 619	 * (page update without transaction (i.e., file write) is
 620	 * locked under anonymous transaction tid = 0:
 621	 * anonymous tlocks maintained on anonymous tlock list of
 622	 * the inode of the page and available to all anonymous
 623	 * transactions until txCommit() time at which point
 624	 * they are transferred to the transaction tlock list of
 625	 * the committing transaction of the inode)
 626	 */
 627	if (xtid == 0) {
 628		tlck->tid = tid;
 629		TXN_UNLOCK();
 630		tblk = tid_to_tblock(tid);
 631		/*
 632		 * The order of the tlocks in the transaction is important
 633		 * (during truncate, child xtree pages must be freed before
 634		 * parent's tlocks change the working map).
 635		 * Take tlock off anonymous list and add to tail of
 636		 * transaction list
 637		 *
 638		 * Note:  We really need to get rid of the tid & lid and
 639		 * use list_head's.  This code is getting UGLY!
 640		 */
 641		if (jfs_ip->atlhead == lid) {
 642			if (jfs_ip->atltail == lid) {
 643				/* only anonymous txn.
 644				 * Remove from anon_list
 645				 */
 646				TXN_LOCK();
 647				list_del_init(&jfs_ip->anon_inode_list);
 648				TXN_UNLOCK();
 649			}
 650			jfs_ip->atlhead = tlck->next;
 651		} else {
 652			lid_t last;
 653			for (last = jfs_ip->atlhead;
 654			     lid_to_tlock(last)->next != lid;
 655			     last = lid_to_tlock(last)->next) {
 656				assert(last);
 657			}
 658			lid_to_tlock(last)->next = tlck->next;
 659			if (jfs_ip->atltail == lid)
 660				jfs_ip->atltail = last;
 661		}
 662
 663		/* insert the tlock at tail of transaction tlock list */
 664
 665		if (tblk->next)
 666			lid_to_tlock(tblk->last)->next = lid;
 667		else
 668			tblk->next = lid;
 669		tlck->next = 0;
 670		tblk->last = lid;
 671
 672		goto grantLock;
 673	}
 674
 675	goto waitLock;
 676
 677	/*
 678	 * allocate a tlock
 679	 */
 680      allocateLock:
 681	lid = txLockAlloc();
 682	tlck = lid_to_tlock(lid);
 683
 684	/*
 685	 * initialize tlock
 686	 */
 687	tlck->tid = tid;
 688
 689	TXN_UNLOCK();
 690
 691	/* mark tlock for meta-data page */
 692	if (mp->xflag & COMMIT_PAGE) {
 693
 694		tlck->flag = tlckPAGELOCK;
 695
 696		/* mark the page dirty and nohomeok */
 697		metapage_nohomeok(mp);
 698
 699		jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p",
 700			 mp, mp->nohomeok, tid, tlck);
 701
 702		/* if anonymous transaction, and buffer is on the group
 703		 * commit synclist, mark inode to show this.  This will
 704		 * prevent the buffer from being marked nohomeok for too
 705		 * long a time.
 706		 */
 707		if ((tid == 0) && mp->lsn)
 708			set_cflag(COMMIT_Synclist, ip);
 709	}
 710	/* mark tlock for in-memory inode */
 711	else
 712		tlck->flag = tlckINODELOCK;
 713
 714	if (S_ISDIR(ip->i_mode))
 715		tlck->flag |= tlckDIRECTORY;
 716
 717	tlck->type = 0;
 718
 719	/* bind the tlock and the page */
 720	tlck->ip = ip;
 721	tlck->mp = mp;
 722	if (dir_xtree)
 723		jfs_ip->xtlid = lid;
 724	else
 725		mp->lid = lid;
 726
 727	/*
 728	 * enqueue transaction lock to transaction/inode
 729	 */
 730	/* insert the tlock at tail of transaction tlock list */
 731	if (tid) {
 732		tblk = tid_to_tblock(tid);
 733		if (tblk->next)
 734			lid_to_tlock(tblk->last)->next = lid;
 735		else
 736			tblk->next = lid;
 737		tlck->next = 0;
 738		tblk->last = lid;
 739	}
 740	/* anonymous transaction:
 741	 * insert the tlock at head of inode anonymous tlock list
 742	 */
 743	else {
 744		tlck->next = jfs_ip->atlhead;
 745		jfs_ip->atlhead = lid;
 746		if (tlck->next == 0) {
 747			/* This inode's first anonymous transaction */
 748			jfs_ip->atltail = lid;
 749			TXN_LOCK();
 750			list_add_tail(&jfs_ip->anon_inode_list,
 751				      &TxAnchor.anon_list);
 752			TXN_UNLOCK();
 753		}
 754	}
 755
 756	/* initialize type dependent area for linelock */
 757	linelock = (struct linelock *) & tlck->lock;
 758	linelock->next = 0;
 759	linelock->flag = tlckLINELOCK;
 760	linelock->maxcnt = TLOCKSHORT;
 761	linelock->index = 0;
 762
 763	switch (type & tlckTYPE) {
 764	case tlckDTREE:
 765		linelock->l2linesize = L2DTSLOTSIZE;
 766		break;
 767
 768	case tlckXTREE:
 769		linelock->l2linesize = L2XTSLOTSIZE;
 770
 771		xtlck = (struct xtlock *) linelock;
 772		xtlck->header.offset = 0;
 773		xtlck->header.length = 2;
 774
 775		if (type & tlckNEW) {
 776			xtlck->lwm.offset = XTENTRYSTART;
 777		} else {
 778			if (mp->xflag & COMMIT_PAGE)
 779				p = (xtpage_t *) mp->data;
 780			else
 781				p = &jfs_ip->i_xtroot;
 782			xtlck->lwm.offset =
 783			    le16_to_cpu(p->header.nextindex);
 784		}
 785		xtlck->lwm.length = 0;	/* ! */
 786		xtlck->twm.offset = 0;
 787		xtlck->hwm.offset = 0;
 788
 789		xtlck->index = 2;
 790		break;
 791
 792	case tlckINODE:
 793		linelock->l2linesize = L2INODESLOTSIZE;
 794		break;
 795
 796	case tlckDATA:
 797		linelock->l2linesize = L2DATASLOTSIZE;
 798		break;
 799
 800	default:
 801		jfs_err("UFO tlock:0x%p", tlck);
 802	}
 803
 804	/*
 805	 * update tlock vector
 806	 */
 807      grantLock:
 808	tlck->type |= type;
 809
 810	return tlck;
 811
 812	/*
 813	 * page is being locked by another transaction:
 814	 */
 815      waitLock:
 816	/* Only locks on ipimap or ipaimap should reach here */
 817	/* assert(jfs_ip->fileset == AGGREGATE_I); */
 818	if (jfs_ip->fileset != AGGREGATE_I) {
 819		printk(KERN_ERR "txLock: trying to lock locked page!");
 820		print_hex_dump(KERN_ERR, "ip: ", DUMP_PREFIX_ADDRESS, 16, 4,
 821			       ip, sizeof(*ip), 0);
 822		print_hex_dump(KERN_ERR, "mp: ", DUMP_PREFIX_ADDRESS, 16, 4,
 823			       mp, sizeof(*mp), 0);
 824		print_hex_dump(KERN_ERR, "Locker's tblock: ",
 825			       DUMP_PREFIX_ADDRESS, 16, 4, tid_to_tblock(tid),
 826			       sizeof(struct tblock), 0);
 827		print_hex_dump(KERN_ERR, "Tlock: ", DUMP_PREFIX_ADDRESS, 16, 4,
 828			       tlck, sizeof(*tlck), 0);
 829		BUG();
 830	}
 831	INCREMENT(stattx.waitlock);	/* statistics */
 832	TXN_UNLOCK();
 833	release_metapage(mp);
 834	TXN_LOCK();
 835	xtid = tlck->tid;	/* reacquire after dropping TXN_LOCK */
 836
 837	jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d",
 838		 tid, xtid, lid);
 839
 840	/* Recheck everything since dropping TXN_LOCK */
 841	if (xtid && (tlck->mp == mp) && (mp->lid == lid))
 842		TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor);
 843	else
 844		TXN_UNLOCK();
 845	jfs_info("txLock: awakened     tid = %d, lid = %d", tid, lid);
 846
 847	return NULL;
 848}
 849
 850/*
 851 * NAME:	txRelease()
 852 *
 853 * FUNCTION:	Release buffers associated with transaction locks, but don't
 854 *		mark homeok yet.  The allows other transactions to modify
 855 *		buffers, but won't let them go to disk until commit record
 856 *		actually gets written.
 857 *
 858 * PARAMETER:
 859 *		tblk	-
 860 *
 861 * RETURN:	Errors from subroutines.
 862 */
 863static void txRelease(struct tblock * tblk)
 864{
 865	struct metapage *mp;
 866	lid_t lid;
 867	struct tlock *tlck;
 868
 869	TXN_LOCK();
 870
 871	for (lid = tblk->next; lid; lid = tlck->next) {
 872		tlck = lid_to_tlock(lid);
 873		if ((mp = tlck->mp) != NULL &&
 874		    (tlck->type & tlckBTROOT) == 0) {
 875			assert(mp->xflag & COMMIT_PAGE);
 876			mp->lid = 0;
 877		}
 878	}
 879
 880	/*
 881	 * wakeup transactions waiting on a page locked
 882	 * by the current transaction
 883	 */
 884	TXN_WAKEUP(&tblk->waitor);
 885
 886	TXN_UNLOCK();
 887}
 888
 889/*
 890 * NAME:	txUnlock()
 891 *
 892 * FUNCTION:	Initiates pageout of pages modified by tid in journalled
 893 *		objects and frees their lockwords.
 894 */
 895static void txUnlock(struct tblock * tblk)
 896{
 897	struct tlock *tlck;
 898	struct linelock *linelock;
 899	lid_t lid, next, llid, k;
 900	struct metapage *mp;
 901	struct jfs_log *log;
 902	int difft, diffp;
 903	unsigned long flags;
 904
 905	jfs_info("txUnlock: tblk = 0x%p", tblk);
 906	log = JFS_SBI(tblk->sb)->log;
 907
 908	/*
 909	 * mark page under tlock homeok (its log has been written):
 910	 */
 911	for (lid = tblk->next; lid; lid = next) {
 912		tlck = lid_to_tlock(lid);
 913		next = tlck->next;
 914
 915		jfs_info("unlocking lid = %d, tlck = 0x%p", lid, tlck);
 916
 917		/* unbind page from tlock */
 918		if ((mp = tlck->mp) != NULL &&
 919		    (tlck->type & tlckBTROOT) == 0) {
 920			assert(mp->xflag & COMMIT_PAGE);
 921
 922			/* hold buffer
 923			 */
 924			hold_metapage(mp);
 925
 926			assert(mp->nohomeok > 0);
 927			_metapage_homeok(mp);
 928
 929			/* inherit younger/larger clsn */
 930			LOGSYNC_LOCK(log, flags);
 931			if (mp->clsn) {
 932				logdiff(difft, tblk->clsn, log);
 933				logdiff(diffp, mp->clsn, log);
 934				if (difft > diffp)
 935					mp->clsn = tblk->clsn;
 936			} else
 937				mp->clsn = tblk->clsn;
 938			LOGSYNC_UNLOCK(log, flags);
 939
 940			assert(!(tlck->flag & tlckFREEPAGE));
 941
 942			put_metapage(mp);
 943		}
 944
 945		/* insert tlock, and linelock(s) of the tlock if any,
 946		 * at head of freelist
 947		 */
 948		TXN_LOCK();
 949
 950		llid = ((struct linelock *) & tlck->lock)->next;
 951		while (llid) {
 952			linelock = (struct linelock *) lid_to_tlock(llid);
 953			k = linelock->next;
 954			txLockFree(llid);
 955			llid = k;
 956		}
 957		txLockFree(lid);
 958
 959		TXN_UNLOCK();
 960	}
 961	tblk->next = tblk->last = 0;
 962
 963	/*
 964	 * remove tblock from logsynclist
 965	 * (allocation map pages inherited lsn of tblk and
 966	 * has been inserted in logsync list at txUpdateMap())
 967	 */
 968	if (tblk->lsn) {
 969		LOGSYNC_LOCK(log, flags);
 970		log->count--;
 971		list_del(&tblk->synclist);
 972		LOGSYNC_UNLOCK(log, flags);
 973	}
 974}
 975
 976/*
 977 *	txMaplock()
 978 *
 979 * function: allocate a transaction lock for freed page/entry;
 980 *	for freed page, maplock is used as xtlock/dtlock type;
 981 */
 982struct tlock *txMaplock(tid_t tid, struct inode *ip, int type)
 983{
 984	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 985	lid_t lid;
 986	struct tblock *tblk;
 987	struct tlock *tlck;
 988	struct maplock *maplock;
 989
 990	TXN_LOCK();
 991
 992	/*
 993	 * allocate a tlock
 994	 */
 995	lid = txLockAlloc();
 996	tlck = lid_to_tlock(lid);
 997
 998	/*
 999	 * initialize tlock
1000	 */
1001	tlck->tid = tid;
1002
1003	/* bind the tlock and the object */
1004	tlck->flag = tlckINODELOCK;
1005	if (S_ISDIR(ip->i_mode))
1006		tlck->flag |= tlckDIRECTORY;
1007	tlck->ip = ip;
1008	tlck->mp = NULL;
1009
1010	tlck->type = type;
1011
1012	/*
1013	 * enqueue transaction lock to transaction/inode
1014	 */
1015	/* insert the tlock at tail of transaction tlock list */
1016	if (tid) {
1017		tblk = tid_to_tblock(tid);
1018		if (tblk->next)
1019			lid_to_tlock(tblk->last)->next = lid;
1020		else
1021			tblk->next = lid;
1022		tlck->next = 0;
1023		tblk->last = lid;
1024	}
1025	/* anonymous transaction:
1026	 * insert the tlock at head of inode anonymous tlock list
1027	 */
1028	else {
1029		tlck->next = jfs_ip->atlhead;
1030		jfs_ip->atlhead = lid;
1031		if (tlck->next == 0) {
1032			/* This inode's first anonymous transaction */
1033			jfs_ip->atltail = lid;
1034			list_add_tail(&jfs_ip->anon_inode_list,
1035				      &TxAnchor.anon_list);
1036		}
1037	}
1038
1039	TXN_UNLOCK();
1040
1041	/* initialize type dependent area for maplock */
1042	maplock = (struct maplock *) & tlck->lock;
1043	maplock->next = 0;
1044	maplock->maxcnt = 0;
1045	maplock->index = 0;
1046
1047	return tlck;
1048}
1049
1050/*
1051 *	txLinelock()
1052 *
1053 * function: allocate a transaction lock for log vector list
1054 */
1055struct linelock *txLinelock(struct linelock * tlock)
1056{
1057	lid_t lid;
1058	struct tlock *tlck;
1059	struct linelock *linelock;
1060
1061	TXN_LOCK();
1062
1063	/* allocate a TxLock structure */
1064	lid = txLockAlloc();
1065	tlck = lid_to_tlock(lid);
1066
1067	TXN_UNLOCK();
1068
1069	/* initialize linelock */
1070	linelock = (struct linelock *) tlck;
1071	linelock->next = 0;
1072	linelock->flag = tlckLINELOCK;
1073	linelock->maxcnt = TLOCKLONG;
1074	linelock->index = 0;
1075	if (tlck->flag & tlckDIRECTORY)
1076		linelock->flag |= tlckDIRECTORY;
1077
1078	/* append linelock after tlock */
1079	linelock->next = tlock->next;
1080	tlock->next = lid;
1081
1082	return linelock;
1083}
1084
1085/*
1086 *		transaction commit management
1087 *		-----------------------------
1088 */
1089
1090/*
1091 * NAME:	txCommit()
1092 *
1093 * FUNCTION:	commit the changes to the objects specified in
1094 *		clist.  For journalled segments only the
1095 *		changes of the caller are committed, ie by tid.
1096 *		for non-journalled segments the data are flushed to
1097 *		disk and then the change to the disk inode and indirect
1098 *		blocks committed (so blocks newly allocated to the
1099 *		segment will be made a part of the segment atomically).
1100 *
1101 *		all of the segments specified in clist must be in
1102 *		one file system. no more than 6 segments are needed
1103 *		to handle all unix svcs.
1104 *
1105 *		if the i_nlink field (i.e. disk inode link count)
1106 *		is zero, and the type of inode is a regular file or
1107 *		directory, or symbolic link , the inode is truncated
1108 *		to zero length. the truncation is committed but the
1109 *		VM resources are unaffected until it is closed (see
1110 *		iput and iclose).
1111 *
1112 * PARAMETER:
1113 *
1114 * RETURN:
1115 *
1116 * serialization:
1117 *		on entry the inode lock on each segment is assumed
1118 *		to be held.
1119 *
1120 * i/o error:
1121 */
1122int txCommit(tid_t tid,		/* transaction identifier */
1123	     int nip,		/* number of inodes to commit */
1124	     struct inode **iplist,	/* list of inode to commit */
1125	     int flag)
1126{
1127	int rc = 0;
1128	struct commit cd;
1129	struct jfs_log *log;
1130	struct tblock *tblk;
1131	struct lrd *lrd;
1132	struct inode *ip;
1133	struct jfs_inode_info *jfs_ip;
1134	int k, n;
1135	ino_t top;
1136	struct super_block *sb;
1137
1138	jfs_info("txCommit, tid = %d, flag = %d", tid, flag);
1139	/* is read-only file system ? */
1140	if (isReadOnly(iplist[0])) {
1141		rc = -EROFS;
1142		goto TheEnd;
1143	}
1144
1145	sb = cd.sb = iplist[0]->i_sb;
1146	cd.tid = tid;
1147
1148	if (tid == 0)
1149		tid = txBegin(sb, 0);
1150	tblk = tid_to_tblock(tid);
1151
1152	/*
1153	 * initialize commit structure
1154	 */
1155	log = JFS_SBI(sb)->log;
1156	cd.log = log;
1157
1158	/* initialize log record descriptor in commit */
1159	lrd = &cd.lrd;
1160	lrd->logtid = cpu_to_le32(tblk->logtid);
1161	lrd->backchain = 0;
1162
1163	tblk->xflag |= flag;
1164
1165	if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0)
1166		tblk->xflag |= COMMIT_LAZY;
1167	/*
1168	 *	prepare non-journaled objects for commit
1169	 *
1170	 * flush data pages of non-journaled file
1171	 * to prevent the file getting non-initialized disk blocks
1172	 * in case of crash.
1173	 * (new blocks - )
1174	 */
1175	cd.iplist = iplist;
1176	cd.nip = nip;
1177
1178	/*
1179	 *	acquire transaction lock on (on-disk) inodes
1180	 *
1181	 * update on-disk inode from in-memory inode
1182	 * acquiring transaction locks for AFTER records
1183	 * on the on-disk inode of file object
1184	 *
1185	 * sort the inodes array by inode number in descending order
1186	 * to prevent deadlock when acquiring transaction lock
1187	 * of on-disk inodes on multiple on-disk inode pages by
1188	 * multiple concurrent transactions
1189	 */
1190	for (k = 0; k < cd.nip; k++) {
1191		top = (cd.iplist[k])->i_ino;
1192		for (n = k + 1; n < cd.nip; n++) {
1193			ip = cd.iplist[n];
1194			if (ip->i_ino > top) {
1195				top = ip->i_ino;
1196				cd.iplist[n] = cd.iplist[k];
1197				cd.iplist[k] = ip;
1198			}
1199		}
1200
1201		ip = cd.iplist[k];
1202		jfs_ip = JFS_IP(ip);
1203
1204		/*
1205		 * BUGBUG - This code has temporarily been removed.  The
1206		 * intent is to ensure that any file data is written before
1207		 * the metadata is committed to the journal.  This prevents
1208		 * uninitialized data from appearing in a file after the
1209		 * journal has been replayed.  (The uninitialized data
1210		 * could be sensitive data removed by another user.)
1211		 *
1212		 * The problem now is that we are holding the IWRITELOCK
1213		 * on the inode, and calling filemap_fdatawrite on an
1214		 * unmapped page will cause a deadlock in jfs_get_block.
1215		 *
1216		 * The long term solution is to pare down the use of
1217		 * IWRITELOCK.  We are currently holding it too long.
1218		 * We could also be smarter about which data pages need
1219		 * to be written before the transaction is committed and
1220		 * when we don't need to worry about it at all.
1221		 *
1222		 * if ((!S_ISDIR(ip->i_mode))
1223		 *    && (tblk->flag & COMMIT_DELETE) == 0)
1224		 *	filemap_write_and_wait(ip->i_mapping);
1225		 */
1226
1227		/*
1228		 * Mark inode as not dirty.  It will still be on the dirty
1229		 * inode list, but we'll know not to commit it again unless
1230		 * it gets marked dirty again
1231		 */
1232		clear_cflag(COMMIT_Dirty, ip);
1233
1234		/* inherit anonymous tlock(s) of inode */
1235		if (jfs_ip->atlhead) {
1236			lid_to_tlock(jfs_ip->atltail)->next = tblk->next;
1237			tblk->next = jfs_ip->atlhead;
1238			if (!tblk->last)
1239				tblk->last = jfs_ip->atltail;
1240			jfs_ip->atlhead = jfs_ip->atltail = 0;
1241			TXN_LOCK();
1242			list_del_init(&jfs_ip->anon_inode_list);
1243			TXN_UNLOCK();
1244		}
1245
1246		/*
1247		 * acquire transaction lock on on-disk inode page
1248		 * (become first tlock of the tblk's tlock list)
1249		 */
1250		if (((rc = diWrite(tid, ip))))
1251			goto out;
1252	}
1253
1254	/*
1255	 *	write log records from transaction locks
1256	 *
1257	 * txUpdateMap() resets XAD_NEW in XAD.
1258	 */
1259	if ((rc = txLog(log, tblk, &cd)))
1260		goto TheEnd;
1261
1262	/*
1263	 * Ensure that inode isn't reused before
1264	 * lazy commit thread finishes processing
1265	 */
1266	if (tblk->xflag & COMMIT_DELETE) {
1267		ihold(tblk->u.ip);
1268		/*
1269		 * Avoid a rare deadlock
1270		 *
1271		 * If the inode is locked, we may be blocked in
1272		 * jfs_commit_inode.  If so, we don't want the
1273		 * lazy_commit thread doing the last iput() on the inode
1274		 * since that may block on the locked inode.  Instead,
1275		 * commit the transaction synchronously, so the last iput
1276		 * will be done by the calling thread (or later)
1277		 */
1278		/*
1279		 * I believe this code is no longer needed.  Splitting I_LOCK
1280		 * into two bits, I_NEW and I_SYNC should prevent this
1281		 * deadlock as well.  But since I don't have a JFS testload
1282		 * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done.
1283		 * Joern
1284		 */
1285		if (tblk->u.ip->i_state & I_SYNC)
1286			tblk->xflag &= ~COMMIT_LAZY;
1287	}
1288
1289	ASSERT((!(tblk->xflag & COMMIT_DELETE)) ||
1290	       ((tblk->u.ip->i_nlink == 0) &&
1291		!test_cflag(COMMIT_Nolink, tblk->u.ip)));
1292
1293	/*
1294	 *	write COMMIT log record
1295	 */
1296	lrd->type = cpu_to_le16(LOG_COMMIT);
1297	lrd->length = 0;
1298	lmLog(log, tblk, lrd, NULL);
1299
1300	lmGroupCommit(log, tblk);
1301
1302	/*
1303	 *	- transaction is now committed -
1304	 */
1305
1306	/*
1307	 * force pages in careful update
1308	 * (imap addressing structure update)
1309	 */
1310	if (flag & COMMIT_FORCE)
1311		txForce(tblk);
1312
1313	/*
1314	 *	update allocation map.
1315	 *
1316	 * update inode allocation map and inode:
1317	 * free pager lock on memory object of inode if any.
1318	 * update block allocation map.
1319	 *
1320	 * txUpdateMap() resets XAD_NEW in XAD.
1321	 */
1322	if (tblk->xflag & COMMIT_FORCE)
1323		txUpdateMap(tblk);
1324
1325	/*
1326	 *	free transaction locks and pageout/free pages
1327	 */
1328	txRelease(tblk);
1329
1330	if ((tblk->flag & tblkGC_LAZY) == 0)
1331		txUnlock(tblk);
1332
1333
1334	/*
1335	 *	reset in-memory object state
1336	 */
1337	for (k = 0; k < cd.nip; k++) {
1338		ip = cd.iplist[k];
1339		jfs_ip = JFS_IP(ip);
1340
1341		/*
1342		 * reset in-memory inode state
1343		 */
1344		jfs_ip->bxflag = 0;
1345		jfs_ip->blid = 0;
1346	}
1347
1348      out:
1349	if (rc != 0)
1350		txAbort(tid, 1);
1351
1352      TheEnd:
1353	jfs_info("txCommit: tid = %d, returning %d", tid, rc);
1354	return rc;
1355}
1356
1357/*
1358 * NAME:	txLog()
1359 *
1360 * FUNCTION:	Writes AFTER log records for all lines modified
1361 *		by tid for segments specified by inodes in comdata.
1362 *		Code assumes only WRITELOCKS are recorded in lockwords.
1363 *
1364 * PARAMETERS:
1365 *
1366 * RETURN :
1367 */
1368static int txLog(struct jfs_log * log, struct tblock * tblk, struct commit * cd)
1369{
1370	int rc = 0;
1371	struct inode *ip;
1372	lid_t lid;
1373	struct tlock *tlck;
1374	struct lrd *lrd = &cd->lrd;
1375
1376	/*
1377	 * write log record(s) for each tlock of transaction,
1378	 */
1379	for (lid = tblk->next; lid; lid = tlck->next) {
1380		tlck = lid_to_tlock(lid);
1381
1382		tlck->flag |= tlckLOG;
1383
1384		/* initialize lrd common */
1385		ip = tlck->ip;
1386		lrd->aggregate = cpu_to_le32(JFS_SBI(ip->i_sb)->aggregate);
1387		lrd->log.redopage.fileset = cpu_to_le32(JFS_IP(ip)->fileset);
1388		lrd->log.redopage.inode = cpu_to_le32(ip->i_ino);
1389
1390		/* write log record of page from the tlock */
1391		switch (tlck->type & tlckTYPE) {
1392		case tlckXTREE:
1393			xtLog(log, tblk, lrd, tlck);
1394			break;
1395
1396		case tlckDTREE:
1397			dtLog(log, tblk, lrd, tlck);
1398			break;
1399
1400		case tlckINODE:
1401			diLog(log, tblk, lrd, tlck, cd);
1402			break;
1403
1404		case tlckMAP:
1405			mapLog(log, tblk, lrd, tlck);
1406			break;
1407
1408		case tlckDATA:
1409			dataLog(log, tblk, lrd, tlck);
1410			break;
1411
1412		default:
1413			jfs_err("UFO tlock:0x%p", tlck);
1414		}
1415	}
1416
1417	return rc;
1418}
1419
1420/*
1421 *	diLog()
1422 *
1423 * function:	log inode tlock and format maplock to update bmap;
1424 */
1425static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1426		 struct tlock * tlck, struct commit * cd)
1427{
1428	int rc = 0;
1429	struct metapage *mp;
1430	pxd_t *pxd;
1431	struct pxd_lock *pxdlock;
1432
1433	mp = tlck->mp;
1434
1435	/* initialize as REDOPAGE record format */
1436	lrd->log.redopage.type = cpu_to_le16(LOG_INODE);
1437	lrd->log.redopage.l2linesize = cpu_to_le16(L2INODESLOTSIZE);
1438
1439	pxd = &lrd->log.redopage.pxd;
1440
1441	/*
1442	 *	inode after image
1443	 */
1444	if (tlck->type & tlckENTRY) {
1445		/* log after-image for logredo(): */
1446		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1447		PXDaddress(pxd, mp->index);
1448		PXDlength(pxd,
1449			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1450		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1451
1452		/* mark page as homeward bound */
1453		tlck->flag |= tlckWRITEPAGE;
1454	} else if (tlck->type & tlckFREE) {
1455		/*
1456		 *	free inode extent
1457		 *
1458		 * (pages of the freed inode extent have been invalidated and
1459		 * a maplock for free of the extent has been formatted at
1460		 * txLock() time);
1461		 *
1462		 * the tlock had been acquired on the inode allocation map page
1463		 * (iag) that specifies the freed extent, even though the map
1464		 * page is not itself logged, to prevent pageout of the map
1465		 * page before the log;
1466		 */
1467
1468		/* log LOG_NOREDOINOEXT of the freed inode extent for
1469		 * logredo() to start NoRedoPage filters, and to update
1470		 * imap and bmap for free of the extent;
1471		 */
1472		lrd->type = cpu_to_le16(LOG_NOREDOINOEXT);
1473		/*
1474		 * For the LOG_NOREDOINOEXT record, we need
1475		 * to pass the IAG number and inode extent
1476		 * index (within that IAG) from which the
1477		 * the extent being released.  These have been
1478		 * passed to us in the iplist[1] and iplist[2].
1479		 */
1480		lrd->log.noredoinoext.iagnum =
1481		    cpu_to_le32((u32) (size_t) cd->iplist[1]);
1482		lrd->log.noredoinoext.inoext_idx =
1483		    cpu_to_le32((u32) (size_t) cd->iplist[2]);
1484
1485		pxdlock = (struct pxd_lock *) & tlck->lock;
1486		*pxd = pxdlock->pxd;
1487		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1488
1489		/* update bmap */
1490		tlck->flag |= tlckUPDATEMAP;
1491
1492		/* mark page as homeward bound */
1493		tlck->flag |= tlckWRITEPAGE;
1494	} else
1495		jfs_err("diLog: UFO type tlck:0x%p", tlck);
1496#ifdef  _JFS_WIP
1497	/*
1498	 *	alloc/free external EA extent
1499	 *
1500	 * a maplock for txUpdateMap() to update bPWMAP for alloc/free
1501	 * of the extent has been formatted at txLock() time;
1502	 */
1503	else {
1504		assert(tlck->type & tlckEA);
1505
1506		/* log LOG_UPDATEMAP for logredo() to update bmap for
1507		 * alloc of new (and free of old) external EA extent;
1508		 */
1509		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1510		pxdlock = (struct pxd_lock *) & tlck->lock;
1511		nlock = pxdlock->index;
1512		for (i = 0; i < nlock; i++, pxdlock++) {
1513			if (pxdlock->flag & mlckALLOCPXD)
1514				lrd->log.updatemap.type =
1515				    cpu_to_le16(LOG_ALLOCPXD);
1516			else
1517				lrd->log.updatemap.type =
1518				    cpu_to_le16(LOG_FREEPXD);
1519			lrd->log.updatemap.nxd = cpu_to_le16(1);
1520			lrd->log.updatemap.pxd = pxdlock->pxd;
1521			lrd->backchain =
1522			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1523		}
1524
1525		/* update bmap */
1526		tlck->flag |= tlckUPDATEMAP;
1527	}
1528#endif				/* _JFS_WIP */
1529
1530	return rc;
1531}
1532
1533/*
1534 *	dataLog()
1535 *
1536 * function:	log data tlock
1537 */
1538static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1539	    struct tlock * tlck)
1540{
1541	struct metapage *mp;
1542	pxd_t *pxd;
1543
1544	mp = tlck->mp;
1545
1546	/* initialize as REDOPAGE record format */
1547	lrd->log.redopage.type = cpu_to_le16(LOG_DATA);
1548	lrd->log.redopage.l2linesize = cpu_to_le16(L2DATASLOTSIZE);
1549
1550	pxd = &lrd->log.redopage.pxd;
1551
1552	/* log after-image for logredo(): */
1553	lrd->type = cpu_to_le16(LOG_REDOPAGE);
1554
1555	if (jfs_dirtable_inline(tlck->ip)) {
1556		/*
1557		 * The table has been truncated, we've must have deleted
1558		 * the last entry, so don't bother logging this
1559		 */
1560		mp->lid = 0;
1561		grab_metapage(mp);
1562		metapage_homeok(mp);
1563		discard_metapage(mp);
1564		tlck->mp = NULL;
1565		return 0;
1566	}
1567
1568	PXDaddress(pxd, mp->index);
1569	PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits);
1570
1571	lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1572
1573	/* mark page as homeward bound */
1574	tlck->flag |= tlckWRITEPAGE;
1575
1576	return 0;
1577}
1578
1579/*
1580 *	dtLog()
1581 *
1582 * function:	log dtree tlock and format maplock to update bmap;
1583 */
1584static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1585	   struct tlock * tlck)
1586{
1587	struct metapage *mp;
1588	struct pxd_lock *pxdlock;
1589	pxd_t *pxd;
1590
1591	mp = tlck->mp;
1592
1593	/* initialize as REDOPAGE/NOREDOPAGE record format */
1594	lrd->log.redopage.type = cpu_to_le16(LOG_DTREE);
1595	lrd->log.redopage.l2linesize = cpu_to_le16(L2DTSLOTSIZE);
1596
1597	pxd = &lrd->log.redopage.pxd;
1598
1599	if (tlck->type & tlckBTROOT)
1600		lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
1601
1602	/*
1603	 *	page extension via relocation: entry insertion;
1604	 *	page extension in-place: entry insertion;
1605	 *	new right page from page split, reinitialized in-line
1606	 *	root from root page split: entry insertion;
1607	 */
1608	if (tlck->type & (tlckNEW | tlckEXTEND)) {
1609		/* log after-image of the new page for logredo():
1610		 * mark log (LOG_NEW) for logredo() to initialize
1611		 * freelist and update bmap for alloc of the new page;
1612		 */
1613		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1614		if (tlck->type & tlckEXTEND)
1615			lrd->log.redopage.type |= cpu_to_le16(LOG_EXTEND);
1616		else
1617			lrd->log.redopage.type |= cpu_to_le16(LOG_NEW);
1618		PXDaddress(pxd, mp->index);
1619		PXDlength(pxd,
1620			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1621		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1622
1623		/* format a maplock for txUpdateMap() to update bPMAP for
1624		 * alloc of the new page;
1625		 */
1626		if (tlck->type & tlckBTROOT)
1627			return;
1628		tlck->flag |= tlckUPDATEMAP;
1629		pxdlock = (struct pxd_lock *) & tlck->lock;
1630		pxdlock->flag = mlckALLOCPXD;
1631		pxdlock->pxd = *pxd;
1632
1633		pxdlock->index = 1;
1634
1635		/* mark page as homeward bound */
1636		tlck->flag |= tlckWRITEPAGE;
1637		return;
1638	}
1639
1640	/*
1641	 *	entry insertion/deletion,
1642	 *	sibling page link update (old right page before split);
1643	 */
1644	if (tlck->type & (tlckENTRY | tlckRELINK)) {
1645		/* log after-image for logredo(): */
1646		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1647		PXDaddress(pxd, mp->index);
1648		PXDlength(pxd,
1649			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1650		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1651
1652		/* mark page as homeward bound */
1653		tlck->flag |= tlckWRITEPAGE;
1654		return;
1655	}
1656
1657	/*
1658	 *	page deletion: page has been invalidated
1659	 *	page relocation: source extent
1660	 *
1661	 *	a maplock for free of the page has been formatted
1662	 *	at txLock() time);
1663	 */
1664	if (tlck->type & (tlckFREE | tlckRELOCATE)) {
1665		/* log LOG_NOREDOPAGE of the deleted page for logredo()
1666		 * to start NoRedoPage filter and to update bmap for free
1667		 * of the deletd page
1668		 */
1669		lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
1670		pxdlock = (struct pxd_lock *) & tlck->lock;
1671		*pxd = pxdlock->pxd;
1672		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1673
1674		/* a maplock for txUpdateMap() for free of the page
1675		 * has been formatted at txLock() time;
1676		 */
1677		tlck->flag |= tlckUPDATEMAP;
1678	}
1679	return;
1680}
1681
1682/*
1683 *	xtLog()
1684 *
1685 * function:	log xtree tlock and format maplock to update bmap;
1686 */
1687static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1688	   struct tlock * tlck)
1689{
1690	struct inode *ip;
1691	struct metapage *mp;
1692	xtpage_t *p;
1693	struct xtlock *xtlck;
1694	struct maplock *maplock;
1695	struct xdlistlock *xadlock;
1696	struct pxd_lock *pxdlock;
1697	pxd_t *page_pxd;
1698	int next, lwm, hwm;
1699
1700	ip = tlck->ip;
1701	mp = tlck->mp;
1702
1703	/* initialize as REDOPAGE/NOREDOPAGE record format */
1704	lrd->log.redopage.type = cpu_to_le16(LOG_XTREE);
1705	lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE);
1706
1707	page_pxd = &lrd->log.redopage.pxd;
1708
1709	if (tlck->type & tlckBTROOT) {
1710		lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
1711		p = &JFS_IP(ip)->i_xtroot;
1712		if (S_ISDIR(ip->i_mode))
1713			lrd->log.redopage.type |=
1714			    cpu_to_le16(LOG_DIR_XTREE);
1715	} else
1716		p = (xtpage_t *) mp->data;
1717	next = le16_to_cpu(p->header.nextindex);
1718
1719	xtlck = (struct xtlock *) & tlck->lock;
1720
1721	maplock = (struct maplock *) & tlck->lock;
1722	xadlock = (struct xdlistlock *) maplock;
1723
1724	/*
1725	 *	entry insertion/extension;
1726	 *	sibling page link update (old right page before split);
1727	 */
1728	if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) {
1729		/* log after-image for logredo():
1730		 * logredo() will update bmap for alloc of new/extended
1731		 * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from
1732		 * after-image of XADlist;
1733		 * logredo() resets (XAD_NEW|XAD_EXTEND) flag when
1734		 * applying the after-image to the meta-data page.
1735		 */
1736		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1737		PXDaddress(page_pxd, mp->index);
1738		PXDlength(page_pxd,
1739			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1740		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1741
1742		/* format a maplock for txUpdateMap() to update bPMAP
1743		 * for alloc of new/extended extents of XAD[lwm:next)
1744		 * from the page itself;
1745		 * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag.
1746		 */
1747		lwm = xtlck->lwm.offset;
1748		if (lwm == 0)
1749			lwm = XTPAGEMAXSLOT;
1750
1751		if (lwm == next)
1752			goto out;
1753		if (lwm > next) {
1754			jfs_err("xtLog: lwm > next");
1755			goto out;
1756		}
1757		tlck->flag |= tlckUPDATEMAP;
1758		xadlock->flag = mlckALLOCXADLIST;
1759		xadlock->count = next - lwm;
1760		if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) {
1761			int i;
1762			pxd_t *pxd;
1763			/*
1764			 * Lazy commit may allow xtree to be modified before
1765			 * txUpdateMap runs.  Copy xad into linelock to
1766			 * preserve correct data.
1767			 *
1768			 * We can fit twice as may pxd's as xads in the lock
1769			 */
1770			xadlock->flag = mlckALLOCPXDLIST;
1771			pxd = xadlock->xdlist = &xtlck->pxdlock;
1772			for (i = 0; i < xadlock->count; i++) {
1773				PXDaddress(pxd, addressXAD(&p->xad[lwm + i]));
1774				PXDlength(pxd, lengthXAD(&p->xad[lwm + i]));
1775				p->xad[lwm + i].flag &=
1776				    ~(XAD_NEW | XAD_EXTENDED);
1777				pxd++;
1778			}
1779		} else {
1780			/*
1781			 * xdlist will point to into inode's xtree, ensure
1782			 * that transaction is not committed lazily.
1783			 */
1784			xadlock->flag = mlckALLOCXADLIST;
1785			xadlock->xdlist = &p->xad[lwm];
1786			tblk->xflag &= ~COMMIT_LAZY;
1787		}
1788		jfs_info("xtLog: alloc ip:0x%p mp:0x%p tlck:0x%p lwm:%d count:%d",
1789			 tlck->ip, mp, tlck, lwm, xadlock->count);
1790
1791		maplock->index = 1;
1792
1793	      out:
1794		/* mark page as homeward bound */
1795		tlck->flag |= tlckWRITEPAGE;
1796
1797		return;
1798	}
1799
1800	/*
1801	 *	page deletion: file deletion/truncation (ref. xtTruncate())
1802	 *
1803	 * (page will be invalidated after log is written and bmap
1804	 * is updated from the page);
1805	 */
1806	if (tlck->type & tlckFREE) {
1807		/* LOG_NOREDOPAGE log for NoRedoPage filter:
1808		 * if page free from file delete, NoRedoFile filter from
1809		 * inode image of zero link count will subsume NoRedoPage
1810		 * filters for each page;
1811		 * if page free from file truncattion, write NoRedoPage
1812		 * filter;
1813		 *
1814		 * upadte of block allocation map for the page itself:
1815		 * if page free from deletion and truncation, LOG_UPDATEMAP
1816		 * log for the page itself is generated from processing
1817		 * its parent page xad entries;
1818		 */
1819		/* if page free from file truncation, log LOG_NOREDOPAGE
1820		 * of the deleted page for logredo() to start NoRedoPage
1821		 * filter for the page;
1822		 */
1823		if (tblk->xflag & COMMIT_TRUNCATE) {
1824			/* write NOREDOPAGE for the page */
1825			lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
1826			PXDaddress(page_pxd, mp->index);
1827			PXDlength(page_pxd,
1828				  mp->logical_size >> tblk->sb->
1829				  s_blocksize_bits);
1830			lrd->backchain =
1831			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1832
1833			if (tlck->type & tlckBTROOT) {
1834				/* Empty xtree must be logged */
1835				lrd->type = cpu_to_le16(LOG_REDOPAGE);
1836				lrd->backchain =
1837				    cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1838			}
1839		}
1840
1841		/* init LOG_UPDATEMAP of the freed extents
1842		 * XAD[XTENTRYSTART:hwm) from the deleted page itself
1843		 * for logredo() to update bmap;
1844		 */
1845		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1846		lrd->log.updatemap.type = cpu_to_le16(LOG_FREEXADLIST);
1847		xtlck = (struct xtlock *) & tlck->lock;
1848		hwm = xtlck->hwm.offset;
1849		lrd->log.updatemap.nxd =
1850		    cpu_to_le16(hwm - XTENTRYSTART + 1);
1851		/* reformat linelock for lmLog() */
1852		xtlck->header.offset = XTENTRYSTART;
1853		xtlck->header.length = hwm - XTENTRYSTART + 1;
1854		xtlck->index = 1;
1855		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1856
1857		/* format a maplock for txUpdateMap() to update bmap
1858		 * to free extents of XAD[XTENTRYSTART:hwm) from the
1859		 * deleted page itself;
1860		 */
1861		tlck->flag |= tlckUPDATEMAP;
1862		xadlock->count = hwm - XTENTRYSTART + 1;
1863		if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) {
1864			int i;
1865			pxd_t *pxd;
1866			/*
1867			 * Lazy commit may allow xtree to be modified before
1868			 * txUpdateMap runs.  Copy xad into linelock to
1869			 * preserve correct data.
1870			 *
1871			 * We can fit twice as may pxd's as xads in the lock
1872			 */
1873			xadlock->flag = mlckFREEPXDLIST;
1874			pxd = xadlock->xdlist = &xtlck->pxdlock;
1875			for (i = 0; i < xadlock->count; i++) {
1876				PXDaddress(pxd,
1877					addressXAD(&p->xad[XTENTRYSTART + i]));
1878				PXDlength(pxd,
1879					lengthXAD(&p->xad[XTENTRYSTART + i]));
1880				pxd++;
1881			}
1882		} else {
1883			/*
1884			 * xdlist will point to into inode's xtree, ensure
1885			 * that transaction is not committed lazily.
1886			 */
1887			xadlock->flag = mlckFREEXADLIST;
1888			xadlock->xdlist = &p->xad[XTENTRYSTART];
1889			tblk->xflag &= ~COMMIT_LAZY;
1890		}
1891		jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d lwm:2",
1892			 tlck->ip, mp, xadlock->count);
1893
1894		maplock->index = 1;
1895
1896		/* mark page as invalid */
1897		if (((tblk->xflag & COMMIT_PWMAP) || S_ISDIR(ip->i_mode))
1898		    && !(tlck->type & tlckBTROOT))
1899			tlck->flag |= tlckFREEPAGE;
1900		/*
1901		   else (tblk->xflag & COMMIT_PMAP)
1902		   ? release the page;
1903		 */
1904		return;
1905	}
1906
1907	/*
1908	 *	page/entry truncation: file truncation (ref. xtTruncate())
1909	 *
1910	 *	|----------+------+------+---------------|
1911	 *		   |      |      |
1912	 *		   |      |     hwm - hwm before truncation
1913	 *		   |     next - truncation point
1914	 *		  lwm - lwm before truncation
1915	 * header ?
1916	 */
1917	if (tlck->type & tlckTRUNCATE) {
1918		pxd_t pxd;	/* truncated extent of xad */
 
1919		int twm;
1920
1921		/*
1922		 * For truncation the entire linelock may be used, so it would
1923		 * be difficult to store xad list in linelock itself.
1924		 * Therefore, we'll just force transaction to be committed
1925		 * synchronously, so that xtree pages won't be changed before
1926		 * txUpdateMap runs.
1927		 */
1928		tblk->xflag &= ~COMMIT_LAZY;
1929		lwm = xtlck->lwm.offset;
1930		if (lwm == 0)
1931			lwm = XTPAGEMAXSLOT;
1932		hwm = xtlck->hwm.offset;
1933		twm = xtlck->twm.offset;
1934
1935		/*
1936		 *	write log records
1937		 */
1938		/* log after-image for logredo():
1939		 *
1940		 * logredo() will update bmap for alloc of new/extended
1941		 * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from
1942		 * after-image of XADlist;
1943		 * logredo() resets (XAD_NEW|XAD_EXTEND) flag when
1944		 * applying the after-image to the meta-data page.
1945		 */
1946		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1947		PXDaddress(page_pxd, mp->index);
1948		PXDlength(page_pxd,
1949			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1950		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1951
1952		/*
1953		 * truncate entry XAD[twm == next - 1]:
1954		 */
1955		if (twm == next - 1) {
1956			/* init LOG_UPDATEMAP for logredo() to update bmap for
1957			 * free of truncated delta extent of the truncated
1958			 * entry XAD[next - 1]:
1959			 * (xtlck->pxdlock = truncated delta extent);
1960			 */
1961			pxdlock = (struct pxd_lock *) & xtlck->pxdlock;
1962			/* assert(pxdlock->type & tlckTRUNCATE); */
1963			lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1964			lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD);
1965			lrd->log.updatemap.nxd = cpu_to_le16(1);
1966			lrd->log.updatemap.pxd = pxdlock->pxd;
1967			pxd = pxdlock->pxd;	/* save to format maplock */
1968			lrd->backchain =
1969			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1970		}
1971
1972		/*
1973		 * free entries XAD[next:hwm]:
1974		 */
1975		if (hwm >= next) {
1976			/* init LOG_UPDATEMAP of the freed extents
1977			 * XAD[next:hwm] from the deleted page itself
1978			 * for logredo() to update bmap;
1979			 */
1980			lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1981			lrd->log.updatemap.type =
1982			    cpu_to_le16(LOG_FREEXADLIST);
1983			xtlck = (struct xtlock *) & tlck->lock;
1984			hwm = xtlck->hwm.offset;
1985			lrd->log.updatemap.nxd =
1986			    cpu_to_le16(hwm - next + 1);
1987			/* reformat linelock for lmLog() */
1988			xtlck->header.offset = next;
1989			xtlck->header.length = hwm - next + 1;
1990			xtlck->index = 1;
1991			lrd->backchain =
1992			    cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1993		}
1994
1995		/*
1996		 *	format maplock(s) for txUpdateMap() to update bmap
1997		 */
1998		maplock->index = 0;
1999
2000		/*
2001		 * allocate entries XAD[lwm:next):
2002		 */
2003		if (lwm < next) {
2004			/* format a maplock for txUpdateMap() to update bPMAP
2005			 * for alloc of new/extended extents of XAD[lwm:next)
2006			 * from the page itself;
2007			 * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag.
2008			 */
2009			tlck->flag |= tlckUPDATEMAP;
2010			xadlock->flag = mlckALLOCXADLIST;
2011			xadlock->count = next - lwm;
2012			xadlock->xdlist = &p->xad[lwm];
2013
2014			jfs_info("xtLog: alloc ip:0x%p mp:0x%p count:%d lwm:%d next:%d",
 
2015				 tlck->ip, mp, xadlock->count, lwm, next);
2016			maplock->index++;
2017			xadlock++;
2018		}
2019
2020		/*
2021		 * truncate entry XAD[twm == next - 1]:
2022		 */
2023		if (twm == next - 1) {
2024			/* format a maplock for txUpdateMap() to update bmap
2025			 * to free truncated delta extent of the truncated
2026			 * entry XAD[next - 1];
2027			 * (xtlck->pxdlock = truncated delta extent);
2028			 */
2029			tlck->flag |= tlckUPDATEMAP;
2030			pxdlock = (struct pxd_lock *) xadlock;
2031			pxdlock->flag = mlckFREEPXD;
2032			pxdlock->count = 1;
2033			pxdlock->pxd = pxd;
2034
2035			jfs_info("xtLog: truncate ip:0x%p mp:0x%p count:%d hwm:%d",
2036				 ip, mp, pxdlock->count, hwm);
2037			maplock->index++;
2038			xadlock++;
2039		}
2040
2041		/*
2042		 * free entries XAD[next:hwm]:
2043		 */
2044		if (hwm >= next) {
2045			/* format a maplock for txUpdateMap() to update bmap
2046			 * to free extents of XAD[next:hwm] from thedeleted
2047			 * page itself;
2048			 */
2049			tlck->flag |= tlckUPDATEMAP;
2050			xadlock->flag = mlckFREEXADLIST;
2051			xadlock->count = hwm - next + 1;
2052			xadlock->xdlist = &p->xad[next];
2053
2054			jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d next:%d hwm:%d",
 
2055				 tlck->ip, mp, xadlock->count, next, hwm);
2056			maplock->index++;
2057		}
2058
2059		/* mark page as homeward bound */
2060		tlck->flag |= tlckWRITEPAGE;
2061	}
2062	return;
2063}
2064
2065/*
2066 *	mapLog()
2067 *
2068 * function:	log from maplock of freed data extents;
2069 */
2070static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
2071		   struct tlock * tlck)
2072{
2073	struct pxd_lock *pxdlock;
2074	int i, nlock;
2075	pxd_t *pxd;
2076
2077	/*
2078	 *	page relocation: free the source page extent
2079	 *
2080	 * a maplock for txUpdateMap() for free of the page
2081	 * has been formatted at txLock() time saving the src
2082	 * relocated page address;
2083	 */
2084	if (tlck->type & tlckRELOCATE) {
2085		/* log LOG_NOREDOPAGE of the old relocated page
2086		 * for logredo() to start NoRedoPage filter;
2087		 */
2088		lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
2089		pxdlock = (struct pxd_lock *) & tlck->lock;
2090		pxd = &lrd->log.redopage.pxd;
2091		*pxd = pxdlock->pxd;
2092		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2093
2094		/* (N.B. currently, logredo() does NOT update bmap
2095		 * for free of the page itself for (LOG_XTREE|LOG_NOREDOPAGE);
2096		 * if page free from relocation, LOG_UPDATEMAP log is
2097		 * specifically generated now for logredo()
2098		 * to update bmap for free of src relocated page;
2099		 * (new flag LOG_RELOCATE may be introduced which will
2100		 * inform logredo() to start NORedoPage filter and also
2101		 * update block allocation map at the same time, thus
2102		 * avoiding an extra log write);
2103		 */
2104		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
2105		lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD);
2106		lrd->log.updatemap.nxd = cpu_to_le16(1);
2107		lrd->log.updatemap.pxd = pxdlock->pxd;
2108		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2109
2110		/* a maplock for txUpdateMap() for free of the page
2111		 * has been formatted at txLock() time;
2112		 */
2113		tlck->flag |= tlckUPDATEMAP;
2114		return;
2115	}
2116	/*
2117
2118	 * Otherwise it's not a relocate request
2119	 *
2120	 */
2121	else {
2122		/* log LOG_UPDATEMAP for logredo() to update bmap for
2123		 * free of truncated/relocated delta extent of the data;
2124		 * e.g.: external EA extent, relocated/truncated extent
2125		 * from xtTailgate();
2126		 */
2127		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
2128		pxdlock = (struct pxd_lock *) & tlck->lock;
2129		nlock = pxdlock->index;
2130		for (i = 0; i < nlock; i++, pxdlock++) {
2131			if (pxdlock->flag & mlckALLOCPXD)
2132				lrd->log.updatemap.type =
2133				    cpu_to_le16(LOG_ALLOCPXD);
2134			else
2135				lrd->log.updatemap.type =
2136				    cpu_to_le16(LOG_FREEPXD);
2137			lrd->log.updatemap.nxd = cpu_to_le16(1);
2138			lrd->log.updatemap.pxd = pxdlock->pxd;
2139			lrd->backchain =
2140			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2141			jfs_info("mapLog: xaddr:0x%lx xlen:0x%x",
2142				 (ulong) addressPXD(&pxdlock->pxd),
2143				 lengthPXD(&pxdlock->pxd));
2144		}
2145
2146		/* update bmap */
2147		tlck->flag |= tlckUPDATEMAP;
2148	}
2149}
2150
2151/*
2152 *	txEA()
2153 *
2154 * function:	acquire maplock for EA/ACL extents or
2155 *		set COMMIT_INLINE flag;
2156 */
2157void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea)
2158{
2159	struct tlock *tlck = NULL;
2160	struct pxd_lock *maplock = NULL, *pxdlock = NULL;
2161
2162	/*
2163	 * format maplock for alloc of new EA extent
2164	 */
2165	if (newea) {
2166		/* Since the newea could be a completely zeroed entry we need to
2167		 * check for the two flags which indicate we should actually
2168		 * commit new EA data
2169		 */
2170		if (newea->flag & DXD_EXTENT) {
2171			tlck = txMaplock(tid, ip, tlckMAP);
2172			maplock = (struct pxd_lock *) & tlck->lock;
2173			pxdlock = (struct pxd_lock *) maplock;
2174			pxdlock->flag = mlckALLOCPXD;
2175			PXDaddress(&pxdlock->pxd, addressDXD(newea));
2176			PXDlength(&pxdlock->pxd, lengthDXD(newea));
2177			pxdlock++;
2178			maplock->index = 1;
2179		} else if (newea->flag & DXD_INLINE) {
2180			tlck = NULL;
2181
2182			set_cflag(COMMIT_Inlineea, ip);
2183		}
2184	}
2185
2186	/*
2187	 * format maplock for free of old EA extent
2188	 */
2189	if (!test_cflag(COMMIT_Nolink, ip) && oldea->flag & DXD_EXTENT) {
2190		if (tlck == NULL) {
2191			tlck = txMaplock(tid, ip, tlckMAP);
2192			maplock = (struct pxd_lock *) & tlck->lock;
2193			pxdlock = (struct pxd_lock *) maplock;
2194			maplock->index = 0;
2195		}
2196		pxdlock->flag = mlckFREEPXD;
2197		PXDaddress(&pxdlock->pxd, addressDXD(oldea));
2198		PXDlength(&pxdlock->pxd, lengthDXD(oldea));
2199		maplock->index++;
2200	}
2201}
2202
2203/*
2204 *	txForce()
2205 *
2206 * function: synchronously write pages locked by transaction
2207 *	     after txLog() but before txUpdateMap();
2208 */
2209static void txForce(struct tblock * tblk)
2210{
2211	struct tlock *tlck;
2212	lid_t lid, next;
2213	struct metapage *mp;
2214
2215	/*
2216	 * reverse the order of transaction tlocks in
2217	 * careful update order of address index pages
2218	 * (right to left, bottom up)
2219	 */
2220	tlck = lid_to_tlock(tblk->next);
2221	lid = tlck->next;
2222	tlck->next = 0;
2223	while (lid) {
2224		tlck = lid_to_tlock(lid);
2225		next = tlck->next;
2226		tlck->next = tblk->next;
2227		tblk->next = lid;
2228		lid = next;
2229	}
2230
2231	/*
2232	 * synchronously write the page, and
2233	 * hold the page for txUpdateMap();
2234	 */
2235	for (lid = tblk->next; lid; lid = next) {
2236		tlck = lid_to_tlock(lid);
2237		next = tlck->next;
2238
2239		if ((mp = tlck->mp) != NULL &&
2240		    (tlck->type & tlckBTROOT) == 0) {
2241			assert(mp->xflag & COMMIT_PAGE);
2242
2243			if (tlck->flag & tlckWRITEPAGE) {
2244				tlck->flag &= ~tlckWRITEPAGE;
2245
2246				/* do not release page to freelist */
2247				force_metapage(mp);
2248#if 0
2249				/*
2250				 * The "right" thing to do here is to
2251				 * synchronously write the metadata.
2252				 * With the current implementation this
2253				 * is hard since write_metapage requires
2254				 * us to kunmap & remap the page.  If we
2255				 * have tlocks pointing into the metadata
2256				 * pages, we don't want to do this.  I think
2257				 * we can get by with synchronously writing
2258				 * the pages when they are released.
2259				 */
2260				assert(mp->nohomeok);
2261				set_bit(META_dirty, &mp->flag);
2262				set_bit(META_sync, &mp->flag);
2263#endif
2264			}
2265		}
2266	}
2267}
2268
2269/*
2270 *	txUpdateMap()
2271 *
2272 * function:	update persistent allocation map (and working map
2273 *		if appropriate);
2274 *
2275 * parameter:
2276 */
2277static void txUpdateMap(struct tblock * tblk)
2278{
2279	struct inode *ip;
2280	struct inode *ipimap;
2281	lid_t lid;
2282	struct tlock *tlck;
2283	struct maplock *maplock;
2284	struct pxd_lock pxdlock;
2285	int maptype;
2286	int k, nlock;
2287	struct metapage *mp = NULL;
2288
2289	ipimap = JFS_SBI(tblk->sb)->ipimap;
2290
2291	maptype = (tblk->xflag & COMMIT_PMAP) ? COMMIT_PMAP : COMMIT_PWMAP;
2292
2293
2294	/*
2295	 *	update block allocation map
2296	 *
2297	 * update allocation state in pmap (and wmap) and
2298	 * update lsn of the pmap page;
2299	 */
2300	/*
2301	 * scan each tlock/page of transaction for block allocation/free:
2302	 *
2303	 * for each tlock/page of transaction, update map.
2304	 *  ? are there tlock for pmap and pwmap at the same time ?
2305	 */
2306	for (lid = tblk->next; lid; lid = tlck->next) {
2307		tlck = lid_to_tlock(lid);
2308
2309		if ((tlck->flag & tlckUPDATEMAP) == 0)
2310			continue;
2311
2312		if (tlck->flag & tlckFREEPAGE) {
2313			/*
2314			 * Another thread may attempt to reuse freed space
2315			 * immediately, so we want to get rid of the metapage
2316			 * before anyone else has a chance to get it.
2317			 * Lock metapage, update maps, then invalidate
2318			 * the metapage.
2319			 */
2320			mp = tlck->mp;
2321			ASSERT(mp->xflag & COMMIT_PAGE);
2322			grab_metapage(mp);
2323		}
2324
2325		/*
2326		 * extent list:
2327		 * . in-line PXD list:
2328		 * . out-of-line XAD list:
2329		 */
2330		maplock = (struct maplock *) & tlck->lock;
2331		nlock = maplock->index;
2332
2333		for (k = 0; k < nlock; k++, maplock++) {
2334			/*
2335			 * allocate blocks in persistent map:
2336			 *
2337			 * blocks have been allocated from wmap at alloc time;
2338			 */
2339			if (maplock->flag & mlckALLOC) {
2340				txAllocPMap(ipimap, maplock, tblk);
2341			}
2342			/*
2343			 * free blocks in persistent and working map:
2344			 * blocks will be freed in pmap and then in wmap;
2345			 *
2346			 * ? tblock specifies the PMAP/PWMAP based upon
2347			 * transaction
2348			 *
2349			 * free blocks in persistent map:
2350			 * blocks will be freed from wmap at last reference
2351			 * release of the object for regular files;
2352			 *
2353			 * Alway free blocks from both persistent & working
2354			 * maps for directories
2355			 */
2356			else {	/* (maplock->flag & mlckFREE) */
2357
2358				if (tlck->flag & tlckDIRECTORY)
2359					txFreeMap(ipimap, maplock,
2360						  tblk, COMMIT_PWMAP);
2361				else
2362					txFreeMap(ipimap, maplock,
2363						  tblk, maptype);
2364			}
2365		}
2366		if (tlck->flag & tlckFREEPAGE) {
2367			if (!(tblk->flag & tblkGC_LAZY)) {
2368				/* This is equivalent to txRelease */
2369				ASSERT(mp->lid == lid);
2370				tlck->mp->lid = 0;
2371			}
2372			assert(mp->nohomeok == 1);
2373			metapage_homeok(mp);
2374			discard_metapage(mp);
2375			tlck->mp = NULL;
2376		}
2377	}
2378	/*
2379	 *	update inode allocation map
2380	 *
2381	 * update allocation state in pmap and
2382	 * update lsn of the pmap page;
2383	 * update in-memory inode flag/state
2384	 *
2385	 * unlock mapper/write lock
2386	 */
2387	if (tblk->xflag & COMMIT_CREATE) {
2388		diUpdatePMap(ipimap, tblk->ino, false, tblk);
2389		/* update persistent block allocation map
2390		 * for the allocation of inode extent;
2391		 */
2392		pxdlock.flag = mlckALLOCPXD;
2393		pxdlock.pxd = tblk->u.ixpxd;
2394		pxdlock.index = 1;
2395		txAllocPMap(ipimap, (struct maplock *) & pxdlock, tblk);
2396	} else if (tblk->xflag & COMMIT_DELETE) {
2397		ip = tblk->u.ip;
2398		diUpdatePMap(ipimap, ip->i_ino, true, tblk);
2399		iput(ip);
2400	}
2401}
2402
2403/*
2404 *	txAllocPMap()
2405 *
2406 * function: allocate from persistent map;
2407 *
2408 * parameter:
2409 *	ipbmap	-
2410 *	malock	-
2411 *		xad list:
2412 *		pxd:
2413 *
2414 *	maptype -
2415 *		allocate from persistent map;
2416 *		free from persistent map;
2417 *		(e.g., tmp file - free from working map at releae
2418 *		 of last reference);
2419 *		free from persistent and working map;
2420 *
2421 *	lsn	- log sequence number;
2422 */
2423static void txAllocPMap(struct inode *ip, struct maplock * maplock,
2424			struct tblock * tblk)
2425{
2426	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
2427	struct xdlistlock *xadlistlock;
2428	xad_t *xad;
2429	s64 xaddr;
2430	int xlen;
2431	struct pxd_lock *pxdlock;
2432	struct xdlistlock *pxdlistlock;
2433	pxd_t *pxd;
2434	int n;
2435
2436	/*
2437	 * allocate from persistent map;
2438	 */
2439	if (maplock->flag & mlckALLOCXADLIST) {
2440		xadlistlock = (struct xdlistlock *) maplock;
2441		xad = xadlistlock->xdlist;
2442		for (n = 0; n < xadlistlock->count; n++, xad++) {
2443			if (xad->flag & (XAD_NEW | XAD_EXTENDED)) {
2444				xaddr = addressXAD(xad);
2445				xlen = lengthXAD(xad);
2446				dbUpdatePMap(ipbmap, false, xaddr,
2447					     (s64) xlen, tblk);
2448				xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
2449				jfs_info("allocPMap: xaddr:0x%lx xlen:%d",
2450					 (ulong) xaddr, xlen);
2451			}
2452		}
2453	} else if (maplock->flag & mlckALLOCPXD) {
2454		pxdlock = (struct pxd_lock *) maplock;
2455		xaddr = addressPXD(&pxdlock->pxd);
2456		xlen = lengthPXD(&pxdlock->pxd);
2457		dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen, tblk);
2458		jfs_info("allocPMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen);
2459	} else {		/* (maplock->flag & mlckALLOCPXDLIST) */
2460
2461		pxdlistlock = (struct xdlistlock *) maplock;
2462		pxd = pxdlistlock->xdlist;
2463		for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2464			xaddr = addressPXD(pxd);
2465			xlen = lengthPXD(pxd);
2466			dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen,
2467				     tblk);
2468			jfs_info("allocPMap: xaddr:0x%lx xlen:%d",
2469				 (ulong) xaddr, xlen);
2470		}
2471	}
2472}
2473
2474/*
2475 *	txFreeMap()
2476 *
2477 * function:	free from persistent and/or working map;
2478 *
2479 * todo: optimization
2480 */
2481void txFreeMap(struct inode *ip,
2482	       struct maplock * maplock, struct tblock * tblk, int maptype)
2483{
2484	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
2485	struct xdlistlock *xadlistlock;
2486	xad_t *xad;
2487	s64 xaddr;
2488	int xlen;
2489	struct pxd_lock *pxdlock;
2490	struct xdlistlock *pxdlistlock;
2491	pxd_t *pxd;
2492	int n;
2493
2494	jfs_info("txFreeMap: tblk:0x%p maplock:0x%p maptype:0x%x",
2495		 tblk, maplock, maptype);
2496
2497	/*
2498	 * free from persistent map;
2499	 */
2500	if (maptype == COMMIT_PMAP || maptype == COMMIT_PWMAP) {
2501		if (maplock->flag & mlckFREEXADLIST) {
2502			xadlistlock = (struct xdlistlock *) maplock;
2503			xad = xadlistlock->xdlist;
2504			for (n = 0; n < xadlistlock->count; n++, xad++) {
2505				if (!(xad->flag & XAD_NEW)) {
2506					xaddr = addressXAD(xad);
2507					xlen = lengthXAD(xad);
2508					dbUpdatePMap(ipbmap, true, xaddr,
2509						     (s64) xlen, tblk);
2510					jfs_info("freePMap: xaddr:0x%lx xlen:%d",
 
2511						 (ulong) xaddr, xlen);
2512				}
2513			}
2514		} else if (maplock->flag & mlckFREEPXD) {
2515			pxdlock = (struct pxd_lock *) maplock;
2516			xaddr = addressPXD(&pxdlock->pxd);
2517			xlen = lengthPXD(&pxdlock->pxd);
2518			dbUpdatePMap(ipbmap, true, xaddr, (s64) xlen,
2519				     tblk);
2520			jfs_info("freePMap: xaddr:0x%lx xlen:%d",
2521				 (ulong) xaddr, xlen);
2522		} else {	/* (maplock->flag & mlckALLOCPXDLIST) */
2523
2524			pxdlistlock = (struct xdlistlock *) maplock;
2525			pxd = pxdlistlock->xdlist;
2526			for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2527				xaddr = addressPXD(pxd);
2528				xlen = lengthPXD(pxd);
2529				dbUpdatePMap(ipbmap, true, xaddr,
2530					     (s64) xlen, tblk);
2531				jfs_info("freePMap: xaddr:0x%lx xlen:%d",
2532					 (ulong) xaddr, xlen);
2533			}
2534		}
2535	}
2536
2537	/*
2538	 * free from working map;
2539	 */
2540	if (maptype == COMMIT_PWMAP || maptype == COMMIT_WMAP) {
2541		if (maplock->flag & mlckFREEXADLIST) {
2542			xadlistlock = (struct xdlistlock *) maplock;
2543			xad = xadlistlock->xdlist;
2544			for (n = 0; n < xadlistlock->count; n++, xad++) {
2545				xaddr = addressXAD(xad);
2546				xlen = lengthXAD(xad);
2547				dbFree(ip, xaddr, (s64) xlen);
2548				xad->flag = 0;
2549				jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2550					 (ulong) xaddr, xlen);
2551			}
2552		} else if (maplock->flag & mlckFREEPXD) {
2553			pxdlock = (struct pxd_lock *) maplock;
2554			xaddr = addressPXD(&pxdlock->pxd);
2555			xlen = lengthPXD(&pxdlock->pxd);
2556			dbFree(ip, xaddr, (s64) xlen);
2557			jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2558				 (ulong) xaddr, xlen);
2559		} else {	/* (maplock->flag & mlckFREEPXDLIST) */
2560
2561			pxdlistlock = (struct xdlistlock *) maplock;
2562			pxd = pxdlistlock->xdlist;
2563			for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2564				xaddr = addressPXD(pxd);
2565				xlen = lengthPXD(pxd);
2566				dbFree(ip, xaddr, (s64) xlen);
2567				jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2568					 (ulong) xaddr, xlen);
2569			}
2570		}
2571	}
2572}
2573
2574/*
2575 *	txFreelock()
2576 *
2577 * function:	remove tlock from inode anonymous locklist
2578 */
2579void txFreelock(struct inode *ip)
2580{
2581	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
2582	struct tlock *xtlck, *tlck;
2583	lid_t xlid = 0, lid;
2584
2585	if (!jfs_ip->atlhead)
2586		return;
2587
2588	TXN_LOCK();
2589	xtlck = (struct tlock *) &jfs_ip->atlhead;
2590
2591	while ((lid = xtlck->next) != 0) {
2592		tlck = lid_to_tlock(lid);
2593		if (tlck->flag & tlckFREELOCK) {
2594			xtlck->next = tlck->next;
2595			txLockFree(lid);
2596		} else {
2597			xtlck = tlck;
2598			xlid = lid;
2599		}
2600	}
2601
2602	if (jfs_ip->atlhead)
2603		jfs_ip->atltail = xlid;
2604	else {
2605		jfs_ip->atltail = 0;
2606		/*
2607		 * If inode was on anon_list, remove it
2608		 */
2609		list_del_init(&jfs_ip->anon_inode_list);
2610	}
2611	TXN_UNLOCK();
2612}
2613
2614/*
2615 *	txAbort()
2616 *
2617 * function: abort tx before commit;
2618 *
2619 * frees line-locks and segment locks for all
2620 * segments in comdata structure.
2621 * Optionally sets state of file-system to FM_DIRTY in super-block.
2622 * log age of page-frames in memory for which caller has
2623 * are reset to 0 (to avoid logwarap).
2624 */
2625void txAbort(tid_t tid, int dirty)
2626{
2627	lid_t lid, next;
2628	struct metapage *mp;
2629	struct tblock *tblk = tid_to_tblock(tid);
2630	struct tlock *tlck;
2631
2632	/*
2633	 * free tlocks of the transaction
2634	 */
2635	for (lid = tblk->next; lid; lid = next) {
2636		tlck = lid_to_tlock(lid);
2637		next = tlck->next;
2638		mp = tlck->mp;
2639		JFS_IP(tlck->ip)->xtlid = 0;
2640
2641		if (mp) {
2642			mp->lid = 0;
2643
2644			/*
2645			 * reset lsn of page to avoid logwarap:
2646			 *
2647			 * (page may have been previously committed by another
2648			 * transaction(s) but has not been paged, i.e.,
2649			 * it may be on logsync list even though it has not
2650			 * been logged for the current tx.)
2651			 */
2652			if (mp->xflag & COMMIT_PAGE && mp->lsn)
2653				LogSyncRelease(mp);
2654		}
2655		/* insert tlock at head of freelist */
2656		TXN_LOCK();
2657		txLockFree(lid);
2658		TXN_UNLOCK();
2659	}
2660
2661	/* caller will free the transaction block */
2662
2663	tblk->next = tblk->last = 0;
2664
2665	/*
2666	 * mark filesystem dirty
2667	 */
2668	if (dirty)
2669		jfs_error(tblk->sb, "\n");
2670
2671	return;
2672}
2673
2674/*
2675 *	txLazyCommit(void)
2676 *
2677 *	All transactions except those changing ipimap (COMMIT_FORCE) are
2678 *	processed by this routine.  This insures that the inode and block
2679 *	allocation maps are updated in order.  For synchronous transactions,
2680 *	let the user thread finish processing after txUpdateMap() is called.
2681 */
2682static void txLazyCommit(struct tblock * tblk)
2683{
2684	struct jfs_log *log;
2685
2686	while (((tblk->flag & tblkGC_READY) == 0) &&
2687	       ((tblk->flag & tblkGC_UNLOCKED) == 0)) {
2688		/* We must have gotten ahead of the user thread
2689		 */
2690		jfs_info("jfs_lazycommit: tblk 0x%p not unlocked", tblk);
2691		yield();
2692	}
2693
2694	jfs_info("txLazyCommit: processing tblk 0x%p", tblk);
2695
2696	txUpdateMap(tblk);
2697
2698	log = (struct jfs_log *) JFS_SBI(tblk->sb)->log;
2699
2700	spin_lock_irq(&log->gclock);	// LOGGC_LOCK
2701
2702	tblk->flag |= tblkGC_COMMITTED;
2703
2704	if (tblk->flag & tblkGC_READY)
2705		log->gcrtc--;
2706
2707	wake_up_all(&tblk->gcwait);	// LOGGC_WAKEUP
2708
2709	/*
2710	 * Can't release log->gclock until we've tested tblk->flag
2711	 */
2712	if (tblk->flag & tblkGC_LAZY) {
2713		spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
2714		txUnlock(tblk);
2715		tblk->flag &= ~tblkGC_LAZY;
2716		txEnd(tblk - TxBlock);	/* Convert back to tid */
2717	} else
2718		spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
2719
2720	jfs_info("txLazyCommit: done: tblk = 0x%p", tblk);
2721}
2722
2723/*
2724 *	jfs_lazycommit(void)
2725 *
2726 *	To be run as a kernel daemon.  If lbmIODone is called in an interrupt
2727 *	context, or where blocking is not wanted, this routine will process
2728 *	committed transactions from the unlock queue.
2729 */
2730int jfs_lazycommit(void *arg)
2731{
2732	int WorkDone;
2733	struct tblock *tblk;
2734	unsigned long flags;
2735	struct jfs_sb_info *sbi;
2736
2737	do {
2738		LAZY_LOCK(flags);
2739		jfs_commit_thread_waking = 0;	/* OK to wake another thread */
2740		while (!list_empty(&TxAnchor.unlock_queue)) {
2741			WorkDone = 0;
2742			list_for_each_entry(tblk, &TxAnchor.unlock_queue,
2743					    cqueue) {
2744
2745				sbi = JFS_SBI(tblk->sb);
2746				/*
2747				 * For each volume, the transactions must be
2748				 * handled in order.  If another commit thread
2749				 * is handling a tblk for this superblock,
2750				 * skip it
2751				 */
2752				if (sbi->commit_state & IN_LAZYCOMMIT)
2753					continue;
2754
2755				sbi->commit_state |= IN_LAZYCOMMIT;
2756				WorkDone = 1;
2757
2758				/*
2759				 * Remove transaction from queue
2760				 */
2761				list_del(&tblk->cqueue);
2762
2763				LAZY_UNLOCK(flags);
2764				txLazyCommit(tblk);
2765				LAZY_LOCK(flags);
2766
2767				sbi->commit_state &= ~IN_LAZYCOMMIT;
2768				/*
2769				 * Don't continue in the for loop.  (We can't
2770				 * anyway, it's unsafe!)  We want to go back to
2771				 * the beginning of the list.
2772				 */
2773				break;
2774			}
2775
2776			/* If there was nothing to do, don't continue */
2777			if (!WorkDone)
2778				break;
2779		}
2780		/* In case a wakeup came while all threads were active */
2781		jfs_commit_thread_waking = 0;
2782
2783		if (freezing(current)) {
2784			LAZY_UNLOCK(flags);
2785			try_to_freeze();
2786		} else {
2787			DECLARE_WAITQUEUE(wq, current);
2788
2789			add_wait_queue(&jfs_commit_thread_wait, &wq);
2790			set_current_state(TASK_INTERRUPTIBLE);
2791			LAZY_UNLOCK(flags);
2792			schedule();
 
2793			remove_wait_queue(&jfs_commit_thread_wait, &wq);
2794		}
2795	} while (!kthread_should_stop());
2796
2797	if (!list_empty(&TxAnchor.unlock_queue))
2798		jfs_err("jfs_lazycommit being killed w/pending transactions!");
2799	else
2800		jfs_info("jfs_lazycommit being killed");
2801	return 0;
2802}
2803
2804void txLazyUnlock(struct tblock * tblk)
2805{
2806	unsigned long flags;
2807
2808	LAZY_LOCK(flags);
2809
2810	list_add_tail(&tblk->cqueue, &TxAnchor.unlock_queue);
2811	/*
2812	 * Don't wake up a commit thread if there is already one servicing
2813	 * this superblock, or if the last one we woke up hasn't started yet.
2814	 */
2815	if (!(JFS_SBI(tblk->sb)->commit_state & IN_LAZYCOMMIT) &&
2816	    !jfs_commit_thread_waking) {
2817		jfs_commit_thread_waking = 1;
2818		wake_up(&jfs_commit_thread_wait);
2819	}
2820	LAZY_UNLOCK(flags);
2821}
2822
2823static void LogSyncRelease(struct metapage * mp)
2824{
2825	struct jfs_log *log = mp->log;
2826
2827	assert(mp->nohomeok);
2828	assert(log);
2829	metapage_homeok(mp);
2830}
2831
2832/*
2833 *	txQuiesce
2834 *
2835 *	Block all new transactions and push anonymous transactions to
2836 *	completion
2837 *
2838 *	This does almost the same thing as jfs_sync below.  We don't
2839 *	worry about deadlocking when jfs_tlocks_low is set, since we would
2840 *	expect jfs_sync to get us out of that jam.
2841 */
2842void txQuiesce(struct super_block *sb)
2843{
2844	struct inode *ip;
2845	struct jfs_inode_info *jfs_ip;
2846	struct jfs_log *log = JFS_SBI(sb)->log;
2847	tid_t tid;
2848
2849	set_bit(log_QUIESCE, &log->flag);
2850
2851	TXN_LOCK();
2852restart:
2853	while (!list_empty(&TxAnchor.anon_list)) {
2854		jfs_ip = list_entry(TxAnchor.anon_list.next,
2855				    struct jfs_inode_info,
2856				    anon_inode_list);
2857		ip = &jfs_ip->vfs_inode;
2858
2859		/*
2860		 * inode will be removed from anonymous list
2861		 * when it is committed
2862		 */
2863		TXN_UNLOCK();
2864		tid = txBegin(ip->i_sb, COMMIT_INODE | COMMIT_FORCE);
2865		mutex_lock(&jfs_ip->commit_mutex);
2866		txCommit(tid, 1, &ip, 0);
2867		txEnd(tid);
2868		mutex_unlock(&jfs_ip->commit_mutex);
2869		/*
2870		 * Just to be safe.  I don't know how
2871		 * long we can run without blocking
2872		 */
2873		cond_resched();
2874		TXN_LOCK();
2875	}
2876
2877	/*
2878	 * If jfs_sync is running in parallel, there could be some inodes
2879	 * on anon_list2.  Let's check.
2880	 */
2881	if (!list_empty(&TxAnchor.anon_list2)) {
2882		list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list);
 
2883		goto restart;
2884	}
2885	TXN_UNLOCK();
2886
2887	/*
2888	 * We may need to kick off the group commit
2889	 */
2890	jfs_flush_journal(log, 0);
2891}
2892
2893/*
2894 * txResume()
2895 *
2896 * Allows transactions to start again following txQuiesce
2897 */
2898void txResume(struct super_block *sb)
2899{
2900	struct jfs_log *log = JFS_SBI(sb)->log;
2901
2902	clear_bit(log_QUIESCE, &log->flag);
2903	TXN_WAKEUP(&log->syncwait);
2904}
2905
2906/*
2907 *	jfs_sync(void)
2908 *
2909 *	To be run as a kernel daemon.  This is awakened when tlocks run low.
2910 *	We write any inodes that have anonymous tlocks so they will become
2911 *	available.
2912 */
2913int jfs_sync(void *arg)
2914{
2915	struct inode *ip;
2916	struct jfs_inode_info *jfs_ip;
2917	tid_t tid;
2918
2919	do {
2920		/*
2921		 * write each inode on the anonymous inode list
2922		 */
2923		TXN_LOCK();
2924		while (jfs_tlocks_low && !list_empty(&TxAnchor.anon_list)) {
2925			jfs_ip = list_entry(TxAnchor.anon_list.next,
2926					    struct jfs_inode_info,
2927					    anon_inode_list);
2928			ip = &jfs_ip->vfs_inode;
2929
2930			if (! igrab(ip)) {
2931				/*
2932				 * Inode is being freed
2933				 */
2934				list_del_init(&jfs_ip->anon_inode_list);
2935			} else if (mutex_trylock(&jfs_ip->commit_mutex)) {
2936				/*
2937				 * inode will be removed from anonymous list
2938				 * when it is committed
2939				 */
2940				TXN_UNLOCK();
2941				tid = txBegin(ip->i_sb, COMMIT_INODE);
2942				txCommit(tid, 1, &ip, 0);
2943				txEnd(tid);
2944				mutex_unlock(&jfs_ip->commit_mutex);
2945
2946				iput(ip);
2947				/*
2948				 * Just to be safe.  I don't know how
2949				 * long we can run without blocking
2950				 */
2951				cond_resched();
2952				TXN_LOCK();
2953			} else {
2954				/* We can't get the commit mutex.  It may
2955				 * be held by a thread waiting for tlock's
2956				 * so let's not block here.  Save it to
2957				 * put back on the anon_list.
2958				 */
2959
2960				/* Move from anon_list to anon_list2 */
2961				list_move(&jfs_ip->anon_inode_list,
2962					  &TxAnchor.anon_list2);
 
 
 
2963
2964				TXN_UNLOCK();
2965				iput(ip);
2966				TXN_LOCK();
2967			}
2968		}
2969		/* Add anon_list2 back to anon_list */
2970		list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list);
2971
2972		if (freezing(current)) {
2973			TXN_UNLOCK();
2974			try_to_freeze();
2975		} else {
2976			set_current_state(TASK_INTERRUPTIBLE);
2977			TXN_UNLOCK();
2978			schedule();
 
2979		}
2980	} while (!kthread_should_stop());
2981
2982	jfs_info("jfs_sync being killed");
2983	return 0;
2984}
2985
2986#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG)
2987int jfs_txanchor_proc_show(struct seq_file *m, void *v)
2988{
2989	char *freewait;
2990	char *freelockwait;
2991	char *lowlockwait;
2992
2993	freewait =
2994	    waitqueue_active(&TxAnchor.freewait) ? "active" : "empty";
2995	freelockwait =
2996	    waitqueue_active(&TxAnchor.freelockwait) ? "active" : "empty";
2997	lowlockwait =
2998	    waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty";
2999
3000	seq_printf(m,
3001		       "JFS TxAnchor\n"
3002		       "============\n"
3003		       "freetid = %d\n"
3004		       "freewait = %s\n"
3005		       "freelock = %d\n"
3006		       "freelockwait = %s\n"
3007		       "lowlockwait = %s\n"
3008		       "tlocksInUse = %d\n"
3009		       "jfs_tlocks_low = %d\n"
3010		       "unlock_queue is %sempty\n",
3011		       TxAnchor.freetid,
3012		       freewait,
3013		       TxAnchor.freelock,
3014		       freelockwait,
3015		       lowlockwait,
3016		       TxAnchor.tlocksInUse,
3017		       jfs_tlocks_low,
3018		       list_empty(&TxAnchor.unlock_queue) ? "" : "not ");
3019	return 0;
3020}
 
 
 
 
 
 
 
 
 
 
 
 
 
3021#endif
3022
3023#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS)
3024int jfs_txstats_proc_show(struct seq_file *m, void *v)
3025{
3026	seq_printf(m,
3027		       "JFS TxStats\n"
3028		       "===========\n"
3029		       "calls to txBegin = %d\n"
3030		       "txBegin blocked by sync barrier = %d\n"
3031		       "txBegin blocked by tlocks low = %d\n"
3032		       "txBegin blocked by no free tid = %d\n"
3033		       "calls to txBeginAnon = %d\n"
3034		       "txBeginAnon blocked by sync barrier = %d\n"
3035		       "txBeginAnon blocked by tlocks low = %d\n"
3036		       "calls to txLockAlloc = %d\n"
3037		       "tLockAlloc blocked by no free lock = %d\n",
3038		       TxStat.txBegin,
3039		       TxStat.txBegin_barrier,
3040		       TxStat.txBegin_lockslow,
3041		       TxStat.txBegin_freetid,
3042		       TxStat.txBeginAnon,
3043		       TxStat.txBeginAnon_barrier,
3044		       TxStat.txBeginAnon_lockslow,
3045		       TxStat.txLockAlloc,
3046		       TxStat.txLockAlloc_freelock);
3047	return 0;
3048}
 
 
 
 
 
 
 
 
 
 
 
 
 
3049#endif
v3.1
 
   1/*
   2 *   Copyright (C) International Business Machines Corp., 2000-2005
   3 *   Portions Copyright (C) Christoph Hellwig, 2001-2002
   4 *
   5 *   This program is free software;  you can redistribute it and/or modify
   6 *   it under the terms of the GNU General Public License as published by
   7 *   the Free Software Foundation; either version 2 of the License, or
   8 *   (at your option) any later version.
   9 *
  10 *   This program is distributed in the hope that it will be useful,
  11 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
  12 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  13 *   the GNU General Public License for more details.
  14 *
  15 *   You should have received a copy of the GNU General Public License
  16 *   along with this program;  if not, write to the Free Software
  17 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18 */
  19
  20/*
  21 *	jfs_txnmgr.c: transaction manager
  22 *
  23 * notes:
  24 * transaction starts with txBegin() and ends with txCommit()
  25 * or txAbort().
  26 *
  27 * tlock is acquired at the time of update;
  28 * (obviate scan at commit time for xtree and dtree)
  29 * tlock and mp points to each other;
  30 * (no hashlist for mp -> tlock).
  31 *
  32 * special cases:
  33 * tlock on in-memory inode:
  34 * in-place tlock in the in-memory inode itself;
  35 * converted to page lock by iWrite() at commit time.
  36 *
  37 * tlock during write()/mmap() under anonymous transaction (tid = 0):
  38 * transferred (?) to transaction at commit time.
  39 *
  40 * use the page itself to update allocation maps
  41 * (obviate intermediate replication of allocation/deallocation data)
  42 * hold on to mp+lock thru update of maps
  43 */
  44
  45#include <linux/fs.h>
  46#include <linux/vmalloc.h>
  47#include <linux/completion.h>
  48#include <linux/freezer.h>
  49#include <linux/module.h>
  50#include <linux/moduleparam.h>
  51#include <linux/kthread.h>
  52#include <linux/seq_file.h>
  53#include "jfs_incore.h"
  54#include "jfs_inode.h"
  55#include "jfs_filsys.h"
  56#include "jfs_metapage.h"
  57#include "jfs_dinode.h"
  58#include "jfs_imap.h"
  59#include "jfs_dmap.h"
  60#include "jfs_superblock.h"
  61#include "jfs_debug.h"
  62
  63/*
  64 *	transaction management structures
  65 */
  66static struct {
  67	int freetid;		/* index of a free tid structure */
  68	int freelock;		/* index first free lock word */
  69	wait_queue_head_t freewait;	/* eventlist of free tblock */
  70	wait_queue_head_t freelockwait;	/* eventlist of free tlock */
  71	wait_queue_head_t lowlockwait;	/* eventlist of ample tlocks */
  72	int tlocksInUse;	/* Number of tlocks in use */
  73	spinlock_t LazyLock;	/* synchronize sync_queue & unlock_queue */
  74/*	struct tblock *sync_queue; * Transactions waiting for data sync */
  75	struct list_head unlock_queue;	/* Txns waiting to be released */
  76	struct list_head anon_list;	/* inodes having anonymous txns */
  77	struct list_head anon_list2;	/* inodes having anonymous txns
  78					   that couldn't be sync'ed */
  79} TxAnchor;
  80
  81int jfs_tlocks_low;		/* Indicates low number of available tlocks */
  82
  83#ifdef CONFIG_JFS_STATISTICS
  84static struct {
  85	uint txBegin;
  86	uint txBegin_barrier;
  87	uint txBegin_lockslow;
  88	uint txBegin_freetid;
  89	uint txBeginAnon;
  90	uint txBeginAnon_barrier;
  91	uint txBeginAnon_lockslow;
  92	uint txLockAlloc;
  93	uint txLockAlloc_freelock;
  94} TxStat;
  95#endif
  96
  97static int nTxBlock = -1;	/* number of transaction blocks */
  98module_param(nTxBlock, int, 0);
  99MODULE_PARM_DESC(nTxBlock,
 100		 "Number of transaction blocks (max:65536)");
 101
 102static int nTxLock = -1;	/* number of transaction locks */
 103module_param(nTxLock, int, 0);
 104MODULE_PARM_DESC(nTxLock,
 105		 "Number of transaction locks (max:65536)");
 106
 107struct tblock *TxBlock;	/* transaction block table */
 108static int TxLockLWM;	/* Low water mark for number of txLocks used */
 109static int TxLockHWM;	/* High water mark for number of txLocks used */
 110static int TxLockVHWM;	/* Very High water mark */
 111struct tlock *TxLock;	/* transaction lock table */
 112
 113/*
 114 *	transaction management lock
 115 */
 116static DEFINE_SPINLOCK(jfsTxnLock);
 117
 118#define TXN_LOCK()		spin_lock(&jfsTxnLock)
 119#define TXN_UNLOCK()		spin_unlock(&jfsTxnLock)
 120
 121#define LAZY_LOCK_INIT()	spin_lock_init(&TxAnchor.LazyLock);
 122#define LAZY_LOCK(flags)	spin_lock_irqsave(&TxAnchor.LazyLock, flags)
 123#define LAZY_UNLOCK(flags) spin_unlock_irqrestore(&TxAnchor.LazyLock, flags)
 124
 125static DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait);
 126static int jfs_commit_thread_waking;
 127
 128/*
 129 * Retry logic exist outside these macros to protect from spurrious wakeups.
 130 */
 131static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event)
 132{
 133	DECLARE_WAITQUEUE(wait, current);
 134
 135	add_wait_queue(event, &wait);
 136	set_current_state(TASK_UNINTERRUPTIBLE);
 137	TXN_UNLOCK();
 138	io_schedule();
 139	__set_current_state(TASK_RUNNING);
 140	remove_wait_queue(event, &wait);
 141}
 142
 143#define TXN_SLEEP(event)\
 144{\
 145	TXN_SLEEP_DROP_LOCK(event);\
 146	TXN_LOCK();\
 147}
 148
 149#define TXN_WAKEUP(event) wake_up_all(event)
 150
 151/*
 152 *	statistics
 153 */
 154static struct {
 155	tid_t maxtid;		/* 4: biggest tid ever used */
 156	lid_t maxlid;		/* 4: biggest lid ever used */
 157	int ntid;		/* 4: # of transactions performed */
 158	int nlid;		/* 4: # of tlocks acquired */
 159	int waitlock;		/* 4: # of tlock wait */
 160} stattx;
 161
 162/*
 163 * forward references
 164 */
 165static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 166		struct tlock * tlck, struct commit * cd);
 167static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 168		struct tlock * tlck);
 169static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 170		struct tlock * tlck);
 171static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 172		struct tlock * tlck);
 173static void txAllocPMap(struct inode *ip, struct maplock * maplock,
 174		struct tblock * tblk);
 175static void txForce(struct tblock * tblk);
 176static int txLog(struct jfs_log * log, struct tblock * tblk,
 177		struct commit * cd);
 178static void txUpdateMap(struct tblock * tblk);
 179static void txRelease(struct tblock * tblk);
 180static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 181	   struct tlock * tlck);
 182static void LogSyncRelease(struct metapage * mp);
 183
 184/*
 185 *		transaction block/lock management
 186 *		---------------------------------
 187 */
 188
 189/*
 190 * Get a transaction lock from the free list.  If the number in use is
 191 * greater than the high water mark, wake up the sync daemon.  This should
 192 * free some anonymous transaction locks.  (TXN_LOCK must be held.)
 193 */
 194static lid_t txLockAlloc(void)
 195{
 196	lid_t lid;
 197
 198	INCREMENT(TxStat.txLockAlloc);
 199	if (!TxAnchor.freelock) {
 200		INCREMENT(TxStat.txLockAlloc_freelock);
 201	}
 202
 203	while (!(lid = TxAnchor.freelock))
 204		TXN_SLEEP(&TxAnchor.freelockwait);
 205	TxAnchor.freelock = TxLock[lid].next;
 206	HIGHWATERMARK(stattx.maxlid, lid);
 207	if ((++TxAnchor.tlocksInUse > TxLockHWM) && (jfs_tlocks_low == 0)) {
 208		jfs_info("txLockAlloc tlocks low");
 209		jfs_tlocks_low = 1;
 210		wake_up_process(jfsSyncThread);
 211	}
 212
 213	return lid;
 214}
 215
 216static void txLockFree(lid_t lid)
 217{
 218	TxLock[lid].tid = 0;
 219	TxLock[lid].next = TxAnchor.freelock;
 220	TxAnchor.freelock = lid;
 221	TxAnchor.tlocksInUse--;
 222	if (jfs_tlocks_low && (TxAnchor.tlocksInUse < TxLockLWM)) {
 223		jfs_info("txLockFree jfs_tlocks_low no more");
 224		jfs_tlocks_low = 0;
 225		TXN_WAKEUP(&TxAnchor.lowlockwait);
 226	}
 227	TXN_WAKEUP(&TxAnchor.freelockwait);
 228}
 229
 230/*
 231 * NAME:	txInit()
 232 *
 233 * FUNCTION:	initialize transaction management structures
 234 *
 235 * RETURN:
 236 *
 237 * serialization: single thread at jfs_init()
 238 */
 239int txInit(void)
 240{
 241	int k, size;
 242	struct sysinfo si;
 243
 244	/* Set defaults for nTxLock and nTxBlock if unset */
 245
 246	if (nTxLock == -1) {
 247		if (nTxBlock == -1) {
 248			/* Base default on memory size */
 249			si_meminfo(&si);
 250			if (si.totalram > (256 * 1024)) /* 1 GB */
 251				nTxLock = 64 * 1024;
 252			else
 253				nTxLock = si.totalram >> 2;
 254		} else if (nTxBlock > (8 * 1024))
 255			nTxLock = 64 * 1024;
 256		else
 257			nTxLock = nTxBlock << 3;
 258	}
 259	if (nTxBlock == -1)
 260		nTxBlock = nTxLock >> 3;
 261
 262	/* Verify tunable parameters */
 263	if (nTxBlock < 16)
 264		nTxBlock = 16;	/* No one should set it this low */
 265	if (nTxBlock > 65536)
 266		nTxBlock = 65536;
 267	if (nTxLock < 256)
 268		nTxLock = 256;	/* No one should set it this low */
 269	if (nTxLock > 65536)
 270		nTxLock = 65536;
 271
 272	printk(KERN_INFO "JFS: nTxBlock = %d, nTxLock = %d\n",
 273	       nTxBlock, nTxLock);
 274	/*
 275	 * initialize transaction block (tblock) table
 276	 *
 277	 * transaction id (tid) = tblock index
 278	 * tid = 0 is reserved.
 279	 */
 280	TxLockLWM = (nTxLock * 4) / 10;
 281	TxLockHWM = (nTxLock * 7) / 10;
 282	TxLockVHWM = (nTxLock * 8) / 10;
 283
 284	size = sizeof(struct tblock) * nTxBlock;
 285	TxBlock = vmalloc(size);
 286	if (TxBlock == NULL)
 287		return -ENOMEM;
 288
 289	for (k = 1; k < nTxBlock - 1; k++) {
 290		TxBlock[k].next = k + 1;
 291		init_waitqueue_head(&TxBlock[k].gcwait);
 292		init_waitqueue_head(&TxBlock[k].waitor);
 293	}
 294	TxBlock[k].next = 0;
 295	init_waitqueue_head(&TxBlock[k].gcwait);
 296	init_waitqueue_head(&TxBlock[k].waitor);
 297
 298	TxAnchor.freetid = 1;
 299	init_waitqueue_head(&TxAnchor.freewait);
 300
 301	stattx.maxtid = 1;	/* statistics */
 302
 303	/*
 304	 * initialize transaction lock (tlock) table
 305	 *
 306	 * transaction lock id = tlock index
 307	 * tlock id = 0 is reserved.
 308	 */
 309	size = sizeof(struct tlock) * nTxLock;
 310	TxLock = vmalloc(size);
 311	if (TxLock == NULL) {
 312		vfree(TxBlock);
 313		return -ENOMEM;
 314	}
 315
 316	/* initialize tlock table */
 317	for (k = 1; k < nTxLock - 1; k++)
 318		TxLock[k].next = k + 1;
 319	TxLock[k].next = 0;
 320	init_waitqueue_head(&TxAnchor.freelockwait);
 321	init_waitqueue_head(&TxAnchor.lowlockwait);
 322
 323	TxAnchor.freelock = 1;
 324	TxAnchor.tlocksInUse = 0;
 325	INIT_LIST_HEAD(&TxAnchor.anon_list);
 326	INIT_LIST_HEAD(&TxAnchor.anon_list2);
 327
 328	LAZY_LOCK_INIT();
 329	INIT_LIST_HEAD(&TxAnchor.unlock_queue);
 330
 331	stattx.maxlid = 1;	/* statistics */
 332
 333	return 0;
 334}
 335
 336/*
 337 * NAME:	txExit()
 338 *
 339 * FUNCTION:	clean up when module is unloaded
 340 */
 341void txExit(void)
 342{
 343	vfree(TxLock);
 344	TxLock = NULL;
 345	vfree(TxBlock);
 346	TxBlock = NULL;
 347}
 348
 349/*
 350 * NAME:	txBegin()
 351 *
 352 * FUNCTION:	start a transaction.
 353 *
 354 * PARAMETER:	sb	- superblock
 355 *		flag	- force for nested tx;
 356 *
 357 * RETURN:	tid	- transaction id
 358 *
 359 * note: flag force allows to start tx for nested tx
 360 * to prevent deadlock on logsync barrier;
 361 */
 362tid_t txBegin(struct super_block *sb, int flag)
 363{
 364	tid_t t;
 365	struct tblock *tblk;
 366	struct jfs_log *log;
 367
 368	jfs_info("txBegin: flag = 0x%x", flag);
 369	log = JFS_SBI(sb)->log;
 370
 371	TXN_LOCK();
 372
 373	INCREMENT(TxStat.txBegin);
 374
 375      retry:
 376	if (!(flag & COMMIT_FORCE)) {
 377		/*
 378		 * synchronize with logsync barrier
 379		 */
 380		if (test_bit(log_SYNCBARRIER, &log->flag) ||
 381		    test_bit(log_QUIESCE, &log->flag)) {
 382			INCREMENT(TxStat.txBegin_barrier);
 383			TXN_SLEEP(&log->syncwait);
 384			goto retry;
 385		}
 386	}
 387	if (flag == 0) {
 388		/*
 389		 * Don't begin transaction if we're getting starved for tlocks
 390		 * unless COMMIT_FORCE or COMMIT_INODE (which may ultimately
 391		 * free tlocks)
 392		 */
 393		if (TxAnchor.tlocksInUse > TxLockVHWM) {
 394			INCREMENT(TxStat.txBegin_lockslow);
 395			TXN_SLEEP(&TxAnchor.lowlockwait);
 396			goto retry;
 397		}
 398	}
 399
 400	/*
 401	 * allocate transaction id/block
 402	 */
 403	if ((t = TxAnchor.freetid) == 0) {
 404		jfs_info("txBegin: waiting for free tid");
 405		INCREMENT(TxStat.txBegin_freetid);
 406		TXN_SLEEP(&TxAnchor.freewait);
 407		goto retry;
 408	}
 409
 410	tblk = tid_to_tblock(t);
 411
 412	if ((tblk->next == 0) && !(flag & COMMIT_FORCE)) {
 413		/* Don't let a non-forced transaction take the last tblk */
 414		jfs_info("txBegin: waiting for free tid");
 415		INCREMENT(TxStat.txBegin_freetid);
 416		TXN_SLEEP(&TxAnchor.freewait);
 417		goto retry;
 418	}
 419
 420	TxAnchor.freetid = tblk->next;
 421
 422	/*
 423	 * initialize transaction
 424	 */
 425
 426	/*
 427	 * We can't zero the whole thing or we screw up another thread being
 428	 * awakened after sleeping on tblk->waitor
 429	 *
 430	 * memset(tblk, 0, sizeof(struct tblock));
 431	 */
 432	tblk->next = tblk->last = tblk->xflag = tblk->flag = tblk->lsn = 0;
 433
 434	tblk->sb = sb;
 435	++log->logtid;
 436	tblk->logtid = log->logtid;
 437
 438	++log->active;
 439
 440	HIGHWATERMARK(stattx.maxtid, t);	/* statistics */
 441	INCREMENT(stattx.ntid);	/* statistics */
 442
 443	TXN_UNLOCK();
 444
 445	jfs_info("txBegin: returning tid = %d", t);
 446
 447	return t;
 448}
 449
 450/*
 451 * NAME:	txBeginAnon()
 452 *
 453 * FUNCTION:	start an anonymous transaction.
 454 *		Blocks if logsync or available tlocks are low to prevent
 455 *		anonymous tlocks from depleting supply.
 456 *
 457 * PARAMETER:	sb	- superblock
 458 *
 459 * RETURN:	none
 460 */
 461void txBeginAnon(struct super_block *sb)
 462{
 463	struct jfs_log *log;
 464
 465	log = JFS_SBI(sb)->log;
 466
 467	TXN_LOCK();
 468	INCREMENT(TxStat.txBeginAnon);
 469
 470      retry:
 471	/*
 472	 * synchronize with logsync barrier
 473	 */
 474	if (test_bit(log_SYNCBARRIER, &log->flag) ||
 475	    test_bit(log_QUIESCE, &log->flag)) {
 476		INCREMENT(TxStat.txBeginAnon_barrier);
 477		TXN_SLEEP(&log->syncwait);
 478		goto retry;
 479	}
 480
 481	/*
 482	 * Don't begin transaction if we're getting starved for tlocks
 483	 */
 484	if (TxAnchor.tlocksInUse > TxLockVHWM) {
 485		INCREMENT(TxStat.txBeginAnon_lockslow);
 486		TXN_SLEEP(&TxAnchor.lowlockwait);
 487		goto retry;
 488	}
 489	TXN_UNLOCK();
 490}
 491
 492/*
 493 *	txEnd()
 494 *
 495 * function: free specified transaction block.
 496 *
 497 *	logsync barrier processing:
 498 *
 499 * serialization:
 500 */
 501void txEnd(tid_t tid)
 502{
 503	struct tblock *tblk = tid_to_tblock(tid);
 504	struct jfs_log *log;
 505
 506	jfs_info("txEnd: tid = %d", tid);
 507	TXN_LOCK();
 508
 509	/*
 510	 * wakeup transactions waiting on the page locked
 511	 * by the current transaction
 512	 */
 513	TXN_WAKEUP(&tblk->waitor);
 514
 515	log = JFS_SBI(tblk->sb)->log;
 516
 517	/*
 518	 * Lazy commit thread can't free this guy until we mark it UNLOCKED,
 519	 * otherwise, we would be left with a transaction that may have been
 520	 * reused.
 521	 *
 522	 * Lazy commit thread will turn off tblkGC_LAZY before calling this
 523	 * routine.
 524	 */
 525	if (tblk->flag & tblkGC_LAZY) {
 526		jfs_info("txEnd called w/lazy tid: %d, tblk = 0x%p", tid, tblk);
 527		TXN_UNLOCK();
 528
 529		spin_lock_irq(&log->gclock);	// LOGGC_LOCK
 530		tblk->flag |= tblkGC_UNLOCKED;
 531		spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
 532		return;
 533	}
 534
 535	jfs_info("txEnd: tid: %d, tblk = 0x%p", tid, tblk);
 536
 537	assert(tblk->next == 0);
 538
 539	/*
 540	 * insert tblock back on freelist
 541	 */
 542	tblk->next = TxAnchor.freetid;
 543	TxAnchor.freetid = tid;
 544
 545	/*
 546	 * mark the tblock not active
 547	 */
 548	if (--log->active == 0) {
 549		clear_bit(log_FLUSH, &log->flag);
 550
 551		/*
 552		 * synchronize with logsync barrier
 553		 */
 554		if (test_bit(log_SYNCBARRIER, &log->flag)) {
 555			TXN_UNLOCK();
 556
 557			/* write dirty metadata & forward log syncpt */
 558			jfs_syncpt(log, 1);
 559
 560			jfs_info("log barrier off: 0x%x", log->lsn);
 561
 562			/* enable new transactions start */
 563			clear_bit(log_SYNCBARRIER, &log->flag);
 564
 565			/* wakeup all waitors for logsync barrier */
 566			TXN_WAKEUP(&log->syncwait);
 567
 568			goto wakeup;
 569		}
 570	}
 571
 572	TXN_UNLOCK();
 573wakeup:
 574	/*
 575	 * wakeup all waitors for a free tblock
 576	 */
 577	TXN_WAKEUP(&TxAnchor.freewait);
 578}
 579
 580/*
 581 *	txLock()
 582 *
 583 * function: acquire a transaction lock on the specified <mp>
 584 *
 585 * parameter:
 586 *
 587 * return:	transaction lock id
 588 *
 589 * serialization:
 590 */
 591struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
 592		     int type)
 593{
 594	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 595	int dir_xtree = 0;
 596	lid_t lid;
 597	tid_t xtid;
 598	struct tlock *tlck;
 599	struct xtlock *xtlck;
 600	struct linelock *linelock;
 601	xtpage_t *p;
 602	struct tblock *tblk;
 603
 604	TXN_LOCK();
 605
 606	if (S_ISDIR(ip->i_mode) && (type & tlckXTREE) &&
 607	    !(mp->xflag & COMMIT_PAGE)) {
 608		/*
 609		 * Directory inode is special.  It can have both an xtree tlock
 610		 * and a dtree tlock associated with it.
 611		 */
 612		dir_xtree = 1;
 613		lid = jfs_ip->xtlid;
 614	} else
 615		lid = mp->lid;
 616
 617	/* is page not locked by a transaction ? */
 618	if (lid == 0)
 619		goto allocateLock;
 620
 621	jfs_info("txLock: tid:%d ip:0x%p mp:0x%p lid:%d", tid, ip, mp, lid);
 622
 623	/* is page locked by the requester transaction ? */
 624	tlck = lid_to_tlock(lid);
 625	if ((xtid = tlck->tid) == tid) {
 626		TXN_UNLOCK();
 627		goto grantLock;
 628	}
 629
 630	/*
 631	 * is page locked by anonymous transaction/lock ?
 632	 *
 633	 * (page update without transaction (i.e., file write) is
 634	 * locked under anonymous transaction tid = 0:
 635	 * anonymous tlocks maintained on anonymous tlock list of
 636	 * the inode of the page and available to all anonymous
 637	 * transactions until txCommit() time at which point
 638	 * they are transferred to the transaction tlock list of
 639	 * the committing transaction of the inode)
 640	 */
 641	if (xtid == 0) {
 642		tlck->tid = tid;
 643		TXN_UNLOCK();
 644		tblk = tid_to_tblock(tid);
 645		/*
 646		 * The order of the tlocks in the transaction is important
 647		 * (during truncate, child xtree pages must be freed before
 648		 * parent's tlocks change the working map).
 649		 * Take tlock off anonymous list and add to tail of
 650		 * transaction list
 651		 *
 652		 * Note:  We really need to get rid of the tid & lid and
 653		 * use list_head's.  This code is getting UGLY!
 654		 */
 655		if (jfs_ip->atlhead == lid) {
 656			if (jfs_ip->atltail == lid) {
 657				/* only anonymous txn.
 658				 * Remove from anon_list
 659				 */
 660				TXN_LOCK();
 661				list_del_init(&jfs_ip->anon_inode_list);
 662				TXN_UNLOCK();
 663			}
 664			jfs_ip->atlhead = tlck->next;
 665		} else {
 666			lid_t last;
 667			for (last = jfs_ip->atlhead;
 668			     lid_to_tlock(last)->next != lid;
 669			     last = lid_to_tlock(last)->next) {
 670				assert(last);
 671			}
 672			lid_to_tlock(last)->next = tlck->next;
 673			if (jfs_ip->atltail == lid)
 674				jfs_ip->atltail = last;
 675		}
 676
 677		/* insert the tlock at tail of transaction tlock list */
 678
 679		if (tblk->next)
 680			lid_to_tlock(tblk->last)->next = lid;
 681		else
 682			tblk->next = lid;
 683		tlck->next = 0;
 684		tblk->last = lid;
 685
 686		goto grantLock;
 687	}
 688
 689	goto waitLock;
 690
 691	/*
 692	 * allocate a tlock
 693	 */
 694      allocateLock:
 695	lid = txLockAlloc();
 696	tlck = lid_to_tlock(lid);
 697
 698	/*
 699	 * initialize tlock
 700	 */
 701	tlck->tid = tid;
 702
 703	TXN_UNLOCK();
 704
 705	/* mark tlock for meta-data page */
 706	if (mp->xflag & COMMIT_PAGE) {
 707
 708		tlck->flag = tlckPAGELOCK;
 709
 710		/* mark the page dirty and nohomeok */
 711		metapage_nohomeok(mp);
 712
 713		jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p",
 714			 mp, mp->nohomeok, tid, tlck);
 715
 716		/* if anonymous transaction, and buffer is on the group
 717		 * commit synclist, mark inode to show this.  This will
 718		 * prevent the buffer from being marked nohomeok for too
 719		 * long a time.
 720		 */
 721		if ((tid == 0) && mp->lsn)
 722			set_cflag(COMMIT_Synclist, ip);
 723	}
 724	/* mark tlock for in-memory inode */
 725	else
 726		tlck->flag = tlckINODELOCK;
 727
 728	if (S_ISDIR(ip->i_mode))
 729		tlck->flag |= tlckDIRECTORY;
 730
 731	tlck->type = 0;
 732
 733	/* bind the tlock and the page */
 734	tlck->ip = ip;
 735	tlck->mp = mp;
 736	if (dir_xtree)
 737		jfs_ip->xtlid = lid;
 738	else
 739		mp->lid = lid;
 740
 741	/*
 742	 * enqueue transaction lock to transaction/inode
 743	 */
 744	/* insert the tlock at tail of transaction tlock list */
 745	if (tid) {
 746		tblk = tid_to_tblock(tid);
 747		if (tblk->next)
 748			lid_to_tlock(tblk->last)->next = lid;
 749		else
 750			tblk->next = lid;
 751		tlck->next = 0;
 752		tblk->last = lid;
 753	}
 754	/* anonymous transaction:
 755	 * insert the tlock at head of inode anonymous tlock list
 756	 */
 757	else {
 758		tlck->next = jfs_ip->atlhead;
 759		jfs_ip->atlhead = lid;
 760		if (tlck->next == 0) {
 761			/* This inode's first anonymous transaction */
 762			jfs_ip->atltail = lid;
 763			TXN_LOCK();
 764			list_add_tail(&jfs_ip->anon_inode_list,
 765				      &TxAnchor.anon_list);
 766			TXN_UNLOCK();
 767		}
 768	}
 769
 770	/* initialize type dependent area for linelock */
 771	linelock = (struct linelock *) & tlck->lock;
 772	linelock->next = 0;
 773	linelock->flag = tlckLINELOCK;
 774	linelock->maxcnt = TLOCKSHORT;
 775	linelock->index = 0;
 776
 777	switch (type & tlckTYPE) {
 778	case tlckDTREE:
 779		linelock->l2linesize = L2DTSLOTSIZE;
 780		break;
 781
 782	case tlckXTREE:
 783		linelock->l2linesize = L2XTSLOTSIZE;
 784
 785		xtlck = (struct xtlock *) linelock;
 786		xtlck->header.offset = 0;
 787		xtlck->header.length = 2;
 788
 789		if (type & tlckNEW) {
 790			xtlck->lwm.offset = XTENTRYSTART;
 791		} else {
 792			if (mp->xflag & COMMIT_PAGE)
 793				p = (xtpage_t *) mp->data;
 794			else
 795				p = &jfs_ip->i_xtroot;
 796			xtlck->lwm.offset =
 797			    le16_to_cpu(p->header.nextindex);
 798		}
 799		xtlck->lwm.length = 0;	/* ! */
 800		xtlck->twm.offset = 0;
 801		xtlck->hwm.offset = 0;
 802
 803		xtlck->index = 2;
 804		break;
 805
 806	case tlckINODE:
 807		linelock->l2linesize = L2INODESLOTSIZE;
 808		break;
 809
 810	case tlckDATA:
 811		linelock->l2linesize = L2DATASLOTSIZE;
 812		break;
 813
 814	default:
 815		jfs_err("UFO tlock:0x%p", tlck);
 816	}
 817
 818	/*
 819	 * update tlock vector
 820	 */
 821      grantLock:
 822	tlck->type |= type;
 823
 824	return tlck;
 825
 826	/*
 827	 * page is being locked by another transaction:
 828	 */
 829      waitLock:
 830	/* Only locks on ipimap or ipaimap should reach here */
 831	/* assert(jfs_ip->fileset == AGGREGATE_I); */
 832	if (jfs_ip->fileset != AGGREGATE_I) {
 833		printk(KERN_ERR "txLock: trying to lock locked page!");
 834		print_hex_dump(KERN_ERR, "ip: ", DUMP_PREFIX_ADDRESS, 16, 4,
 835			       ip, sizeof(*ip), 0);
 836		print_hex_dump(KERN_ERR, "mp: ", DUMP_PREFIX_ADDRESS, 16, 4,
 837			       mp, sizeof(*mp), 0);
 838		print_hex_dump(KERN_ERR, "Locker's tblock: ",
 839			       DUMP_PREFIX_ADDRESS, 16, 4, tid_to_tblock(tid),
 840			       sizeof(struct tblock), 0);
 841		print_hex_dump(KERN_ERR, "Tlock: ", DUMP_PREFIX_ADDRESS, 16, 4,
 842			       tlck, sizeof(*tlck), 0);
 843		BUG();
 844	}
 845	INCREMENT(stattx.waitlock);	/* statistics */
 846	TXN_UNLOCK();
 847	release_metapage(mp);
 848	TXN_LOCK();
 849	xtid = tlck->tid;	/* reacquire after dropping TXN_LOCK */
 850
 851	jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d",
 852		 tid, xtid, lid);
 853
 854	/* Recheck everything since dropping TXN_LOCK */
 855	if (xtid && (tlck->mp == mp) && (mp->lid == lid))
 856		TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor);
 857	else
 858		TXN_UNLOCK();
 859	jfs_info("txLock: awakened     tid = %d, lid = %d", tid, lid);
 860
 861	return NULL;
 862}
 863
 864/*
 865 * NAME:	txRelease()
 866 *
 867 * FUNCTION:	Release buffers associated with transaction locks, but don't
 868 *		mark homeok yet.  The allows other transactions to modify
 869 *		buffers, but won't let them go to disk until commit record
 870 *		actually gets written.
 871 *
 872 * PARAMETER:
 873 *		tblk	-
 874 *
 875 * RETURN:	Errors from subroutines.
 876 */
 877static void txRelease(struct tblock * tblk)
 878{
 879	struct metapage *mp;
 880	lid_t lid;
 881	struct tlock *tlck;
 882
 883	TXN_LOCK();
 884
 885	for (lid = tblk->next; lid; lid = tlck->next) {
 886		tlck = lid_to_tlock(lid);
 887		if ((mp = tlck->mp) != NULL &&
 888		    (tlck->type & tlckBTROOT) == 0) {
 889			assert(mp->xflag & COMMIT_PAGE);
 890			mp->lid = 0;
 891		}
 892	}
 893
 894	/*
 895	 * wakeup transactions waiting on a page locked
 896	 * by the current transaction
 897	 */
 898	TXN_WAKEUP(&tblk->waitor);
 899
 900	TXN_UNLOCK();
 901}
 902
 903/*
 904 * NAME:	txUnlock()
 905 *
 906 * FUNCTION:	Initiates pageout of pages modified by tid in journalled
 907 *		objects and frees their lockwords.
 908 */
 909static void txUnlock(struct tblock * tblk)
 910{
 911	struct tlock *tlck;
 912	struct linelock *linelock;
 913	lid_t lid, next, llid, k;
 914	struct metapage *mp;
 915	struct jfs_log *log;
 916	int difft, diffp;
 917	unsigned long flags;
 918
 919	jfs_info("txUnlock: tblk = 0x%p", tblk);
 920	log = JFS_SBI(tblk->sb)->log;
 921
 922	/*
 923	 * mark page under tlock homeok (its log has been written):
 924	 */
 925	for (lid = tblk->next; lid; lid = next) {
 926		tlck = lid_to_tlock(lid);
 927		next = tlck->next;
 928
 929		jfs_info("unlocking lid = %d, tlck = 0x%p", lid, tlck);
 930
 931		/* unbind page from tlock */
 932		if ((mp = tlck->mp) != NULL &&
 933		    (tlck->type & tlckBTROOT) == 0) {
 934			assert(mp->xflag & COMMIT_PAGE);
 935
 936			/* hold buffer
 937			 */
 938			hold_metapage(mp);
 939
 940			assert(mp->nohomeok > 0);
 941			_metapage_homeok(mp);
 942
 943			/* inherit younger/larger clsn */
 944			LOGSYNC_LOCK(log, flags);
 945			if (mp->clsn) {
 946				logdiff(difft, tblk->clsn, log);
 947				logdiff(diffp, mp->clsn, log);
 948				if (difft > diffp)
 949					mp->clsn = tblk->clsn;
 950			} else
 951				mp->clsn = tblk->clsn;
 952			LOGSYNC_UNLOCK(log, flags);
 953
 954			assert(!(tlck->flag & tlckFREEPAGE));
 955
 956			put_metapage(mp);
 957		}
 958
 959		/* insert tlock, and linelock(s) of the tlock if any,
 960		 * at head of freelist
 961		 */
 962		TXN_LOCK();
 963
 964		llid = ((struct linelock *) & tlck->lock)->next;
 965		while (llid) {
 966			linelock = (struct linelock *) lid_to_tlock(llid);
 967			k = linelock->next;
 968			txLockFree(llid);
 969			llid = k;
 970		}
 971		txLockFree(lid);
 972
 973		TXN_UNLOCK();
 974	}
 975	tblk->next = tblk->last = 0;
 976
 977	/*
 978	 * remove tblock from logsynclist
 979	 * (allocation map pages inherited lsn of tblk and
 980	 * has been inserted in logsync list at txUpdateMap())
 981	 */
 982	if (tblk->lsn) {
 983		LOGSYNC_LOCK(log, flags);
 984		log->count--;
 985		list_del(&tblk->synclist);
 986		LOGSYNC_UNLOCK(log, flags);
 987	}
 988}
 989
 990/*
 991 *	txMaplock()
 992 *
 993 * function: allocate a transaction lock for freed page/entry;
 994 *	for freed page, maplock is used as xtlock/dtlock type;
 995 */
 996struct tlock *txMaplock(tid_t tid, struct inode *ip, int type)
 997{
 998	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 999	lid_t lid;
1000	struct tblock *tblk;
1001	struct tlock *tlck;
1002	struct maplock *maplock;
1003
1004	TXN_LOCK();
1005
1006	/*
1007	 * allocate a tlock
1008	 */
1009	lid = txLockAlloc();
1010	tlck = lid_to_tlock(lid);
1011
1012	/*
1013	 * initialize tlock
1014	 */
1015	tlck->tid = tid;
1016
1017	/* bind the tlock and the object */
1018	tlck->flag = tlckINODELOCK;
1019	if (S_ISDIR(ip->i_mode))
1020		tlck->flag |= tlckDIRECTORY;
1021	tlck->ip = ip;
1022	tlck->mp = NULL;
1023
1024	tlck->type = type;
1025
1026	/*
1027	 * enqueue transaction lock to transaction/inode
1028	 */
1029	/* insert the tlock at tail of transaction tlock list */
1030	if (tid) {
1031		tblk = tid_to_tblock(tid);
1032		if (tblk->next)
1033			lid_to_tlock(tblk->last)->next = lid;
1034		else
1035			tblk->next = lid;
1036		tlck->next = 0;
1037		tblk->last = lid;
1038	}
1039	/* anonymous transaction:
1040	 * insert the tlock at head of inode anonymous tlock list
1041	 */
1042	else {
1043		tlck->next = jfs_ip->atlhead;
1044		jfs_ip->atlhead = lid;
1045		if (tlck->next == 0) {
1046			/* This inode's first anonymous transaction */
1047			jfs_ip->atltail = lid;
1048			list_add_tail(&jfs_ip->anon_inode_list,
1049				      &TxAnchor.anon_list);
1050		}
1051	}
1052
1053	TXN_UNLOCK();
1054
1055	/* initialize type dependent area for maplock */
1056	maplock = (struct maplock *) & tlck->lock;
1057	maplock->next = 0;
1058	maplock->maxcnt = 0;
1059	maplock->index = 0;
1060
1061	return tlck;
1062}
1063
1064/*
1065 *	txLinelock()
1066 *
1067 * function: allocate a transaction lock for log vector list
1068 */
1069struct linelock *txLinelock(struct linelock * tlock)
1070{
1071	lid_t lid;
1072	struct tlock *tlck;
1073	struct linelock *linelock;
1074
1075	TXN_LOCK();
1076
1077	/* allocate a TxLock structure */
1078	lid = txLockAlloc();
1079	tlck = lid_to_tlock(lid);
1080
1081	TXN_UNLOCK();
1082
1083	/* initialize linelock */
1084	linelock = (struct linelock *) tlck;
1085	linelock->next = 0;
1086	linelock->flag = tlckLINELOCK;
1087	linelock->maxcnt = TLOCKLONG;
1088	linelock->index = 0;
1089	if (tlck->flag & tlckDIRECTORY)
1090		linelock->flag |= tlckDIRECTORY;
1091
1092	/* append linelock after tlock */
1093	linelock->next = tlock->next;
1094	tlock->next = lid;
1095
1096	return linelock;
1097}
1098
1099/*
1100 *		transaction commit management
1101 *		-----------------------------
1102 */
1103
1104/*
1105 * NAME:	txCommit()
1106 *
1107 * FUNCTION:	commit the changes to the objects specified in
1108 *		clist.  For journalled segments only the
1109 *		changes of the caller are committed, ie by tid.
1110 *		for non-journalled segments the data are flushed to
1111 *		disk and then the change to the disk inode and indirect
1112 *		blocks committed (so blocks newly allocated to the
1113 *		segment will be made a part of the segment atomically).
1114 *
1115 *		all of the segments specified in clist must be in
1116 *		one file system. no more than 6 segments are needed
1117 *		to handle all unix svcs.
1118 *
1119 *		if the i_nlink field (i.e. disk inode link count)
1120 *		is zero, and the type of inode is a regular file or
1121 *		directory, or symbolic link , the inode is truncated
1122 *		to zero length. the truncation is committed but the
1123 *		VM resources are unaffected until it is closed (see
1124 *		iput and iclose).
1125 *
1126 * PARAMETER:
1127 *
1128 * RETURN:
1129 *
1130 * serialization:
1131 *		on entry the inode lock on each segment is assumed
1132 *		to be held.
1133 *
1134 * i/o error:
1135 */
1136int txCommit(tid_t tid,		/* transaction identifier */
1137	     int nip,		/* number of inodes to commit */
1138	     struct inode **iplist,	/* list of inode to commit */
1139	     int flag)
1140{
1141	int rc = 0;
1142	struct commit cd;
1143	struct jfs_log *log;
1144	struct tblock *tblk;
1145	struct lrd *lrd;
1146	struct inode *ip;
1147	struct jfs_inode_info *jfs_ip;
1148	int k, n;
1149	ino_t top;
1150	struct super_block *sb;
1151
1152	jfs_info("txCommit, tid = %d, flag = %d", tid, flag);
1153	/* is read-only file system ? */
1154	if (isReadOnly(iplist[0])) {
1155		rc = -EROFS;
1156		goto TheEnd;
1157	}
1158
1159	sb = cd.sb = iplist[0]->i_sb;
1160	cd.tid = tid;
1161
1162	if (tid == 0)
1163		tid = txBegin(sb, 0);
1164	tblk = tid_to_tblock(tid);
1165
1166	/*
1167	 * initialize commit structure
1168	 */
1169	log = JFS_SBI(sb)->log;
1170	cd.log = log;
1171
1172	/* initialize log record descriptor in commit */
1173	lrd = &cd.lrd;
1174	lrd->logtid = cpu_to_le32(tblk->logtid);
1175	lrd->backchain = 0;
1176
1177	tblk->xflag |= flag;
1178
1179	if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0)
1180		tblk->xflag |= COMMIT_LAZY;
1181	/*
1182	 *	prepare non-journaled objects for commit
1183	 *
1184	 * flush data pages of non-journaled file
1185	 * to prevent the file getting non-initialized disk blocks
1186	 * in case of crash.
1187	 * (new blocks - )
1188	 */
1189	cd.iplist = iplist;
1190	cd.nip = nip;
1191
1192	/*
1193	 *	acquire transaction lock on (on-disk) inodes
1194	 *
1195	 * update on-disk inode from in-memory inode
1196	 * acquiring transaction locks for AFTER records
1197	 * on the on-disk inode of file object
1198	 *
1199	 * sort the inodes array by inode number in descending order
1200	 * to prevent deadlock when acquiring transaction lock
1201	 * of on-disk inodes on multiple on-disk inode pages by
1202	 * multiple concurrent transactions
1203	 */
1204	for (k = 0; k < cd.nip; k++) {
1205		top = (cd.iplist[k])->i_ino;
1206		for (n = k + 1; n < cd.nip; n++) {
1207			ip = cd.iplist[n];
1208			if (ip->i_ino > top) {
1209				top = ip->i_ino;
1210				cd.iplist[n] = cd.iplist[k];
1211				cd.iplist[k] = ip;
1212			}
1213		}
1214
1215		ip = cd.iplist[k];
1216		jfs_ip = JFS_IP(ip);
1217
1218		/*
1219		 * BUGBUG - This code has temporarily been removed.  The
1220		 * intent is to ensure that any file data is written before
1221		 * the metadata is committed to the journal.  This prevents
1222		 * uninitialized data from appearing in a file after the
1223		 * journal has been replayed.  (The uninitialized data
1224		 * could be sensitive data removed by another user.)
1225		 *
1226		 * The problem now is that we are holding the IWRITELOCK
1227		 * on the inode, and calling filemap_fdatawrite on an
1228		 * unmapped page will cause a deadlock in jfs_get_block.
1229		 *
1230		 * The long term solution is to pare down the use of
1231		 * IWRITELOCK.  We are currently holding it too long.
1232		 * We could also be smarter about which data pages need
1233		 * to be written before the transaction is committed and
1234		 * when we don't need to worry about it at all.
1235		 *
1236		 * if ((!S_ISDIR(ip->i_mode))
1237		 *    && (tblk->flag & COMMIT_DELETE) == 0)
1238		 *	filemap_write_and_wait(ip->i_mapping);
1239		 */
1240
1241		/*
1242		 * Mark inode as not dirty.  It will still be on the dirty
1243		 * inode list, but we'll know not to commit it again unless
1244		 * it gets marked dirty again
1245		 */
1246		clear_cflag(COMMIT_Dirty, ip);
1247
1248		/* inherit anonymous tlock(s) of inode */
1249		if (jfs_ip->atlhead) {
1250			lid_to_tlock(jfs_ip->atltail)->next = tblk->next;
1251			tblk->next = jfs_ip->atlhead;
1252			if (!tblk->last)
1253				tblk->last = jfs_ip->atltail;
1254			jfs_ip->atlhead = jfs_ip->atltail = 0;
1255			TXN_LOCK();
1256			list_del_init(&jfs_ip->anon_inode_list);
1257			TXN_UNLOCK();
1258		}
1259
1260		/*
1261		 * acquire transaction lock on on-disk inode page
1262		 * (become first tlock of the tblk's tlock list)
1263		 */
1264		if (((rc = diWrite(tid, ip))))
1265			goto out;
1266	}
1267
1268	/*
1269	 *	write log records from transaction locks
1270	 *
1271	 * txUpdateMap() resets XAD_NEW in XAD.
1272	 */
1273	if ((rc = txLog(log, tblk, &cd)))
1274		goto TheEnd;
1275
1276	/*
1277	 * Ensure that inode isn't reused before
1278	 * lazy commit thread finishes processing
1279	 */
1280	if (tblk->xflag & COMMIT_DELETE) {
1281		ihold(tblk->u.ip);
1282		/*
1283		 * Avoid a rare deadlock
1284		 *
1285		 * If the inode is locked, we may be blocked in
1286		 * jfs_commit_inode.  If so, we don't want the
1287		 * lazy_commit thread doing the last iput() on the inode
1288		 * since that may block on the locked inode.  Instead,
1289		 * commit the transaction synchronously, so the last iput
1290		 * will be done by the calling thread (or later)
1291		 */
1292		/*
1293		 * I believe this code is no longer needed.  Splitting I_LOCK
1294		 * into two bits, I_NEW and I_SYNC should prevent this
1295		 * deadlock as well.  But since I don't have a JFS testload
1296		 * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done.
1297		 * Joern
1298		 */
1299		if (tblk->u.ip->i_state & I_SYNC)
1300			tblk->xflag &= ~COMMIT_LAZY;
1301	}
1302
1303	ASSERT((!(tblk->xflag & COMMIT_DELETE)) ||
1304	       ((tblk->u.ip->i_nlink == 0) &&
1305		!test_cflag(COMMIT_Nolink, tblk->u.ip)));
1306
1307	/*
1308	 *	write COMMIT log record
1309	 */
1310	lrd->type = cpu_to_le16(LOG_COMMIT);
1311	lrd->length = 0;
1312	lmLog(log, tblk, lrd, NULL);
1313
1314	lmGroupCommit(log, tblk);
1315
1316	/*
1317	 *	- transaction is now committed -
1318	 */
1319
1320	/*
1321	 * force pages in careful update
1322	 * (imap addressing structure update)
1323	 */
1324	if (flag & COMMIT_FORCE)
1325		txForce(tblk);
1326
1327	/*
1328	 *	update allocation map.
1329	 *
1330	 * update inode allocation map and inode:
1331	 * free pager lock on memory object of inode if any.
1332	 * update block allocation map.
1333	 *
1334	 * txUpdateMap() resets XAD_NEW in XAD.
1335	 */
1336	if (tblk->xflag & COMMIT_FORCE)
1337		txUpdateMap(tblk);
1338
1339	/*
1340	 *	free transaction locks and pageout/free pages
1341	 */
1342	txRelease(tblk);
1343
1344	if ((tblk->flag & tblkGC_LAZY) == 0)
1345		txUnlock(tblk);
1346
1347
1348	/*
1349	 *	reset in-memory object state
1350	 */
1351	for (k = 0; k < cd.nip; k++) {
1352		ip = cd.iplist[k];
1353		jfs_ip = JFS_IP(ip);
1354
1355		/*
1356		 * reset in-memory inode state
1357		 */
1358		jfs_ip->bxflag = 0;
1359		jfs_ip->blid = 0;
1360	}
1361
1362      out:
1363	if (rc != 0)
1364		txAbort(tid, 1);
1365
1366      TheEnd:
1367	jfs_info("txCommit: tid = %d, returning %d", tid, rc);
1368	return rc;
1369}
1370
1371/*
1372 * NAME:	txLog()
1373 *
1374 * FUNCTION:	Writes AFTER log records for all lines modified
1375 *		by tid for segments specified by inodes in comdata.
1376 *		Code assumes only WRITELOCKS are recorded in lockwords.
1377 *
1378 * PARAMETERS:
1379 *
1380 * RETURN :
1381 */
1382static int txLog(struct jfs_log * log, struct tblock * tblk, struct commit * cd)
1383{
1384	int rc = 0;
1385	struct inode *ip;
1386	lid_t lid;
1387	struct tlock *tlck;
1388	struct lrd *lrd = &cd->lrd;
1389
1390	/*
1391	 * write log record(s) for each tlock of transaction,
1392	 */
1393	for (lid = tblk->next; lid; lid = tlck->next) {
1394		tlck = lid_to_tlock(lid);
1395
1396		tlck->flag |= tlckLOG;
1397
1398		/* initialize lrd common */
1399		ip = tlck->ip;
1400		lrd->aggregate = cpu_to_le32(JFS_SBI(ip->i_sb)->aggregate);
1401		lrd->log.redopage.fileset = cpu_to_le32(JFS_IP(ip)->fileset);
1402		lrd->log.redopage.inode = cpu_to_le32(ip->i_ino);
1403
1404		/* write log record of page from the tlock */
1405		switch (tlck->type & tlckTYPE) {
1406		case tlckXTREE:
1407			xtLog(log, tblk, lrd, tlck);
1408			break;
1409
1410		case tlckDTREE:
1411			dtLog(log, tblk, lrd, tlck);
1412			break;
1413
1414		case tlckINODE:
1415			diLog(log, tblk, lrd, tlck, cd);
1416			break;
1417
1418		case tlckMAP:
1419			mapLog(log, tblk, lrd, tlck);
1420			break;
1421
1422		case tlckDATA:
1423			dataLog(log, tblk, lrd, tlck);
1424			break;
1425
1426		default:
1427			jfs_err("UFO tlock:0x%p", tlck);
1428		}
1429	}
1430
1431	return rc;
1432}
1433
1434/*
1435 *	diLog()
1436 *
1437 * function:	log inode tlock and format maplock to update bmap;
1438 */
1439static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1440		 struct tlock * tlck, struct commit * cd)
1441{
1442	int rc = 0;
1443	struct metapage *mp;
1444	pxd_t *pxd;
1445	struct pxd_lock *pxdlock;
1446
1447	mp = tlck->mp;
1448
1449	/* initialize as REDOPAGE record format */
1450	lrd->log.redopage.type = cpu_to_le16(LOG_INODE);
1451	lrd->log.redopage.l2linesize = cpu_to_le16(L2INODESLOTSIZE);
1452
1453	pxd = &lrd->log.redopage.pxd;
1454
1455	/*
1456	 *	inode after image
1457	 */
1458	if (tlck->type & tlckENTRY) {
1459		/* log after-image for logredo(): */
1460		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1461		PXDaddress(pxd, mp->index);
1462		PXDlength(pxd,
1463			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1464		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1465
1466		/* mark page as homeward bound */
1467		tlck->flag |= tlckWRITEPAGE;
1468	} else if (tlck->type & tlckFREE) {
1469		/*
1470		 *	free inode extent
1471		 *
1472		 * (pages of the freed inode extent have been invalidated and
1473		 * a maplock for free of the extent has been formatted at
1474		 * txLock() time);
1475		 *
1476		 * the tlock had been acquired on the inode allocation map page
1477		 * (iag) that specifies the freed extent, even though the map
1478		 * page is not itself logged, to prevent pageout of the map
1479		 * page before the log;
1480		 */
1481
1482		/* log LOG_NOREDOINOEXT of the freed inode extent for
1483		 * logredo() to start NoRedoPage filters, and to update
1484		 * imap and bmap for free of the extent;
1485		 */
1486		lrd->type = cpu_to_le16(LOG_NOREDOINOEXT);
1487		/*
1488		 * For the LOG_NOREDOINOEXT record, we need
1489		 * to pass the IAG number and inode extent
1490		 * index (within that IAG) from which the
1491		 * the extent being released.  These have been
1492		 * passed to us in the iplist[1] and iplist[2].
1493		 */
1494		lrd->log.noredoinoext.iagnum =
1495		    cpu_to_le32((u32) (size_t) cd->iplist[1]);
1496		lrd->log.noredoinoext.inoext_idx =
1497		    cpu_to_le32((u32) (size_t) cd->iplist[2]);
1498
1499		pxdlock = (struct pxd_lock *) & tlck->lock;
1500		*pxd = pxdlock->pxd;
1501		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1502
1503		/* update bmap */
1504		tlck->flag |= tlckUPDATEMAP;
1505
1506		/* mark page as homeward bound */
1507		tlck->flag |= tlckWRITEPAGE;
1508	} else
1509		jfs_err("diLog: UFO type tlck:0x%p", tlck);
1510#ifdef  _JFS_WIP
1511	/*
1512	 *	alloc/free external EA extent
1513	 *
1514	 * a maplock for txUpdateMap() to update bPWMAP for alloc/free
1515	 * of the extent has been formatted at txLock() time;
1516	 */
1517	else {
1518		assert(tlck->type & tlckEA);
1519
1520		/* log LOG_UPDATEMAP for logredo() to update bmap for
1521		 * alloc of new (and free of old) external EA extent;
1522		 */
1523		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1524		pxdlock = (struct pxd_lock *) & tlck->lock;
1525		nlock = pxdlock->index;
1526		for (i = 0; i < nlock; i++, pxdlock++) {
1527			if (pxdlock->flag & mlckALLOCPXD)
1528				lrd->log.updatemap.type =
1529				    cpu_to_le16(LOG_ALLOCPXD);
1530			else
1531				lrd->log.updatemap.type =
1532				    cpu_to_le16(LOG_FREEPXD);
1533			lrd->log.updatemap.nxd = cpu_to_le16(1);
1534			lrd->log.updatemap.pxd = pxdlock->pxd;
1535			lrd->backchain =
1536			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1537		}
1538
1539		/* update bmap */
1540		tlck->flag |= tlckUPDATEMAP;
1541	}
1542#endif				/* _JFS_WIP */
1543
1544	return rc;
1545}
1546
1547/*
1548 *	dataLog()
1549 *
1550 * function:	log data tlock
1551 */
1552static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1553	    struct tlock * tlck)
1554{
1555	struct metapage *mp;
1556	pxd_t *pxd;
1557
1558	mp = tlck->mp;
1559
1560	/* initialize as REDOPAGE record format */
1561	lrd->log.redopage.type = cpu_to_le16(LOG_DATA);
1562	lrd->log.redopage.l2linesize = cpu_to_le16(L2DATASLOTSIZE);
1563
1564	pxd = &lrd->log.redopage.pxd;
1565
1566	/* log after-image for logredo(): */
1567	lrd->type = cpu_to_le16(LOG_REDOPAGE);
1568
1569	if (jfs_dirtable_inline(tlck->ip)) {
1570		/*
1571		 * The table has been truncated, we've must have deleted
1572		 * the last entry, so don't bother logging this
1573		 */
1574		mp->lid = 0;
1575		grab_metapage(mp);
1576		metapage_homeok(mp);
1577		discard_metapage(mp);
1578		tlck->mp = NULL;
1579		return 0;
1580	}
1581
1582	PXDaddress(pxd, mp->index);
1583	PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits);
1584
1585	lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1586
1587	/* mark page as homeward bound */
1588	tlck->flag |= tlckWRITEPAGE;
1589
1590	return 0;
1591}
1592
1593/*
1594 *	dtLog()
1595 *
1596 * function:	log dtree tlock and format maplock to update bmap;
1597 */
1598static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1599	   struct tlock * tlck)
1600{
1601	struct metapage *mp;
1602	struct pxd_lock *pxdlock;
1603	pxd_t *pxd;
1604
1605	mp = tlck->mp;
1606
1607	/* initialize as REDOPAGE/NOREDOPAGE record format */
1608	lrd->log.redopage.type = cpu_to_le16(LOG_DTREE);
1609	lrd->log.redopage.l2linesize = cpu_to_le16(L2DTSLOTSIZE);
1610
1611	pxd = &lrd->log.redopage.pxd;
1612
1613	if (tlck->type & tlckBTROOT)
1614		lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
1615
1616	/*
1617	 *	page extension via relocation: entry insertion;
1618	 *	page extension in-place: entry insertion;
1619	 *	new right page from page split, reinitialized in-line
1620	 *	root from root page split: entry insertion;
1621	 */
1622	if (tlck->type & (tlckNEW | tlckEXTEND)) {
1623		/* log after-image of the new page for logredo():
1624		 * mark log (LOG_NEW) for logredo() to initialize
1625		 * freelist and update bmap for alloc of the new page;
1626		 */
1627		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1628		if (tlck->type & tlckEXTEND)
1629			lrd->log.redopage.type |= cpu_to_le16(LOG_EXTEND);
1630		else
1631			lrd->log.redopage.type |= cpu_to_le16(LOG_NEW);
1632		PXDaddress(pxd, mp->index);
1633		PXDlength(pxd,
1634			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1635		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1636
1637		/* format a maplock for txUpdateMap() to update bPMAP for
1638		 * alloc of the new page;
1639		 */
1640		if (tlck->type & tlckBTROOT)
1641			return;
1642		tlck->flag |= tlckUPDATEMAP;
1643		pxdlock = (struct pxd_lock *) & tlck->lock;
1644		pxdlock->flag = mlckALLOCPXD;
1645		pxdlock->pxd = *pxd;
1646
1647		pxdlock->index = 1;
1648
1649		/* mark page as homeward bound */
1650		tlck->flag |= tlckWRITEPAGE;
1651		return;
1652	}
1653
1654	/*
1655	 *	entry insertion/deletion,
1656	 *	sibling page link update (old right page before split);
1657	 */
1658	if (tlck->type & (tlckENTRY | tlckRELINK)) {
1659		/* log after-image for logredo(): */
1660		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1661		PXDaddress(pxd, mp->index);
1662		PXDlength(pxd,
1663			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1664		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1665
1666		/* mark page as homeward bound */
1667		tlck->flag |= tlckWRITEPAGE;
1668		return;
1669	}
1670
1671	/*
1672	 *	page deletion: page has been invalidated
1673	 *	page relocation: source extent
1674	 *
1675	 *	a maplock for free of the page has been formatted
1676	 *	at txLock() time);
1677	 */
1678	if (tlck->type & (tlckFREE | tlckRELOCATE)) {
1679		/* log LOG_NOREDOPAGE of the deleted page for logredo()
1680		 * to start NoRedoPage filter and to update bmap for free
1681		 * of the deletd page
1682		 */
1683		lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
1684		pxdlock = (struct pxd_lock *) & tlck->lock;
1685		*pxd = pxdlock->pxd;
1686		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1687
1688		/* a maplock for txUpdateMap() for free of the page
1689		 * has been formatted at txLock() time;
1690		 */
1691		tlck->flag |= tlckUPDATEMAP;
1692	}
1693	return;
1694}
1695
1696/*
1697 *	xtLog()
1698 *
1699 * function:	log xtree tlock and format maplock to update bmap;
1700 */
1701static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1702	   struct tlock * tlck)
1703{
1704	struct inode *ip;
1705	struct metapage *mp;
1706	xtpage_t *p;
1707	struct xtlock *xtlck;
1708	struct maplock *maplock;
1709	struct xdlistlock *xadlock;
1710	struct pxd_lock *pxdlock;
1711	pxd_t *page_pxd;
1712	int next, lwm, hwm;
1713
1714	ip = tlck->ip;
1715	mp = tlck->mp;
1716
1717	/* initialize as REDOPAGE/NOREDOPAGE record format */
1718	lrd->log.redopage.type = cpu_to_le16(LOG_XTREE);
1719	lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE);
1720
1721	page_pxd = &lrd->log.redopage.pxd;
1722
1723	if (tlck->type & tlckBTROOT) {
1724		lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
1725		p = &JFS_IP(ip)->i_xtroot;
1726		if (S_ISDIR(ip->i_mode))
1727			lrd->log.redopage.type |=
1728			    cpu_to_le16(LOG_DIR_XTREE);
1729	} else
1730		p = (xtpage_t *) mp->data;
1731	next = le16_to_cpu(p->header.nextindex);
1732
1733	xtlck = (struct xtlock *) & tlck->lock;
1734
1735	maplock = (struct maplock *) & tlck->lock;
1736	xadlock = (struct xdlistlock *) maplock;
1737
1738	/*
1739	 *	entry insertion/extension;
1740	 *	sibling page link update (old right page before split);
1741	 */
1742	if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) {
1743		/* log after-image for logredo():
1744		 * logredo() will update bmap for alloc of new/extended
1745		 * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from
1746		 * after-image of XADlist;
1747		 * logredo() resets (XAD_NEW|XAD_EXTEND) flag when
1748		 * applying the after-image to the meta-data page.
1749		 */
1750		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1751		PXDaddress(page_pxd, mp->index);
1752		PXDlength(page_pxd,
1753			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1754		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1755
1756		/* format a maplock for txUpdateMap() to update bPMAP
1757		 * for alloc of new/extended extents of XAD[lwm:next)
1758		 * from the page itself;
1759		 * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag.
1760		 */
1761		lwm = xtlck->lwm.offset;
1762		if (lwm == 0)
1763			lwm = XTPAGEMAXSLOT;
1764
1765		if (lwm == next)
1766			goto out;
1767		if (lwm > next) {
1768			jfs_err("xtLog: lwm > next\n");
1769			goto out;
1770		}
1771		tlck->flag |= tlckUPDATEMAP;
1772		xadlock->flag = mlckALLOCXADLIST;
1773		xadlock->count = next - lwm;
1774		if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) {
1775			int i;
1776			pxd_t *pxd;
1777			/*
1778			 * Lazy commit may allow xtree to be modified before
1779			 * txUpdateMap runs.  Copy xad into linelock to
1780			 * preserve correct data.
1781			 *
1782			 * We can fit twice as may pxd's as xads in the lock
1783			 */
1784			xadlock->flag = mlckALLOCPXDLIST;
1785			pxd = xadlock->xdlist = &xtlck->pxdlock;
1786			for (i = 0; i < xadlock->count; i++) {
1787				PXDaddress(pxd, addressXAD(&p->xad[lwm + i]));
1788				PXDlength(pxd, lengthXAD(&p->xad[lwm + i]));
1789				p->xad[lwm + i].flag &=
1790				    ~(XAD_NEW | XAD_EXTENDED);
1791				pxd++;
1792			}
1793		} else {
1794			/*
1795			 * xdlist will point to into inode's xtree, ensure
1796			 * that transaction is not committed lazily.
1797			 */
1798			xadlock->flag = mlckALLOCXADLIST;
1799			xadlock->xdlist = &p->xad[lwm];
1800			tblk->xflag &= ~COMMIT_LAZY;
1801		}
1802		jfs_info("xtLog: alloc ip:0x%p mp:0x%p tlck:0x%p lwm:%d "
1803			 "count:%d", tlck->ip, mp, tlck, lwm, xadlock->count);
1804
1805		maplock->index = 1;
1806
1807	      out:
1808		/* mark page as homeward bound */
1809		tlck->flag |= tlckWRITEPAGE;
1810
1811		return;
1812	}
1813
1814	/*
1815	 *	page deletion: file deletion/truncation (ref. xtTruncate())
1816	 *
1817	 * (page will be invalidated after log is written and bmap
1818	 * is updated from the page);
1819	 */
1820	if (tlck->type & tlckFREE) {
1821		/* LOG_NOREDOPAGE log for NoRedoPage filter:
1822		 * if page free from file delete, NoRedoFile filter from
1823		 * inode image of zero link count will subsume NoRedoPage
1824		 * filters for each page;
1825		 * if page free from file truncattion, write NoRedoPage
1826		 * filter;
1827		 *
1828		 * upadte of block allocation map for the page itself:
1829		 * if page free from deletion and truncation, LOG_UPDATEMAP
1830		 * log for the page itself is generated from processing
1831		 * its parent page xad entries;
1832		 */
1833		/* if page free from file truncation, log LOG_NOREDOPAGE
1834		 * of the deleted page for logredo() to start NoRedoPage
1835		 * filter for the page;
1836		 */
1837		if (tblk->xflag & COMMIT_TRUNCATE) {
1838			/* write NOREDOPAGE for the page */
1839			lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
1840			PXDaddress(page_pxd, mp->index);
1841			PXDlength(page_pxd,
1842				  mp->logical_size >> tblk->sb->
1843				  s_blocksize_bits);
1844			lrd->backchain =
1845			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1846
1847			if (tlck->type & tlckBTROOT) {
1848				/* Empty xtree must be logged */
1849				lrd->type = cpu_to_le16(LOG_REDOPAGE);
1850				lrd->backchain =
1851				    cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1852			}
1853		}
1854
1855		/* init LOG_UPDATEMAP of the freed extents
1856		 * XAD[XTENTRYSTART:hwm) from the deleted page itself
1857		 * for logredo() to update bmap;
1858		 */
1859		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1860		lrd->log.updatemap.type = cpu_to_le16(LOG_FREEXADLIST);
1861		xtlck = (struct xtlock *) & tlck->lock;
1862		hwm = xtlck->hwm.offset;
1863		lrd->log.updatemap.nxd =
1864		    cpu_to_le16(hwm - XTENTRYSTART + 1);
1865		/* reformat linelock for lmLog() */
1866		xtlck->header.offset = XTENTRYSTART;
1867		xtlck->header.length = hwm - XTENTRYSTART + 1;
1868		xtlck->index = 1;
1869		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1870
1871		/* format a maplock for txUpdateMap() to update bmap
1872		 * to free extents of XAD[XTENTRYSTART:hwm) from the
1873		 * deleted page itself;
1874		 */
1875		tlck->flag |= tlckUPDATEMAP;
1876		xadlock->count = hwm - XTENTRYSTART + 1;
1877		if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) {
1878			int i;
1879			pxd_t *pxd;
1880			/*
1881			 * Lazy commit may allow xtree to be modified before
1882			 * txUpdateMap runs.  Copy xad into linelock to
1883			 * preserve correct data.
1884			 *
1885			 * We can fit twice as may pxd's as xads in the lock
1886			 */
1887			xadlock->flag = mlckFREEPXDLIST;
1888			pxd = xadlock->xdlist = &xtlck->pxdlock;
1889			for (i = 0; i < xadlock->count; i++) {
1890				PXDaddress(pxd,
1891					addressXAD(&p->xad[XTENTRYSTART + i]));
1892				PXDlength(pxd,
1893					lengthXAD(&p->xad[XTENTRYSTART + i]));
1894				pxd++;
1895			}
1896		} else {
1897			/*
1898			 * xdlist will point to into inode's xtree, ensure
1899			 * that transaction is not committed lazily.
1900			 */
1901			xadlock->flag = mlckFREEXADLIST;
1902			xadlock->xdlist = &p->xad[XTENTRYSTART];
1903			tblk->xflag &= ~COMMIT_LAZY;
1904		}
1905		jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d lwm:2",
1906			 tlck->ip, mp, xadlock->count);
1907
1908		maplock->index = 1;
1909
1910		/* mark page as invalid */
1911		if (((tblk->xflag & COMMIT_PWMAP) || S_ISDIR(ip->i_mode))
1912		    && !(tlck->type & tlckBTROOT))
1913			tlck->flag |= tlckFREEPAGE;
1914		/*
1915		   else (tblk->xflag & COMMIT_PMAP)
1916		   ? release the page;
1917		 */
1918		return;
1919	}
1920
1921	/*
1922	 *	page/entry truncation: file truncation (ref. xtTruncate())
1923	 *
1924	 *	|----------+------+------+---------------|
1925	 *		   |      |      |
1926	 *		   |      |     hwm - hwm before truncation
1927	 *		   |     next - truncation point
1928	 *		  lwm - lwm before truncation
1929	 * header ?
1930	 */
1931	if (tlck->type & tlckTRUNCATE) {
1932		/* This odd declaration suppresses a bogus gcc warning */
1933		pxd_t pxd = pxd;	/* truncated extent of xad */
1934		int twm;
1935
1936		/*
1937		 * For truncation the entire linelock may be used, so it would
1938		 * be difficult to store xad list in linelock itself.
1939		 * Therefore, we'll just force transaction to be committed
1940		 * synchronously, so that xtree pages won't be changed before
1941		 * txUpdateMap runs.
1942		 */
1943		tblk->xflag &= ~COMMIT_LAZY;
1944		lwm = xtlck->lwm.offset;
1945		if (lwm == 0)
1946			lwm = XTPAGEMAXSLOT;
1947		hwm = xtlck->hwm.offset;
1948		twm = xtlck->twm.offset;
1949
1950		/*
1951		 *	write log records
1952		 */
1953		/* log after-image for logredo():
1954		 *
1955		 * logredo() will update bmap for alloc of new/extended
1956		 * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from
1957		 * after-image of XADlist;
1958		 * logredo() resets (XAD_NEW|XAD_EXTEND) flag when
1959		 * applying the after-image to the meta-data page.
1960		 */
1961		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1962		PXDaddress(page_pxd, mp->index);
1963		PXDlength(page_pxd,
1964			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1965		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1966
1967		/*
1968		 * truncate entry XAD[twm == next - 1]:
1969		 */
1970		if (twm == next - 1) {
1971			/* init LOG_UPDATEMAP for logredo() to update bmap for
1972			 * free of truncated delta extent of the truncated
1973			 * entry XAD[next - 1]:
1974			 * (xtlck->pxdlock = truncated delta extent);
1975			 */
1976			pxdlock = (struct pxd_lock *) & xtlck->pxdlock;
1977			/* assert(pxdlock->type & tlckTRUNCATE); */
1978			lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1979			lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD);
1980			lrd->log.updatemap.nxd = cpu_to_le16(1);
1981			lrd->log.updatemap.pxd = pxdlock->pxd;
1982			pxd = pxdlock->pxd;	/* save to format maplock */
1983			lrd->backchain =
1984			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1985		}
1986
1987		/*
1988		 * free entries XAD[next:hwm]:
1989		 */
1990		if (hwm >= next) {
1991			/* init LOG_UPDATEMAP of the freed extents
1992			 * XAD[next:hwm] from the deleted page itself
1993			 * for logredo() to update bmap;
1994			 */
1995			lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1996			lrd->log.updatemap.type =
1997			    cpu_to_le16(LOG_FREEXADLIST);
1998			xtlck = (struct xtlock *) & tlck->lock;
1999			hwm = xtlck->hwm.offset;
2000			lrd->log.updatemap.nxd =
2001			    cpu_to_le16(hwm - next + 1);
2002			/* reformat linelock for lmLog() */
2003			xtlck->header.offset = next;
2004			xtlck->header.length = hwm - next + 1;
2005			xtlck->index = 1;
2006			lrd->backchain =
2007			    cpu_to_le32(lmLog(log, tblk, lrd, tlck));
2008		}
2009
2010		/*
2011		 *	format maplock(s) for txUpdateMap() to update bmap
2012		 */
2013		maplock->index = 0;
2014
2015		/*
2016		 * allocate entries XAD[lwm:next):
2017		 */
2018		if (lwm < next) {
2019			/* format a maplock for txUpdateMap() to update bPMAP
2020			 * for alloc of new/extended extents of XAD[lwm:next)
2021			 * from the page itself;
2022			 * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag.
2023			 */
2024			tlck->flag |= tlckUPDATEMAP;
2025			xadlock->flag = mlckALLOCXADLIST;
2026			xadlock->count = next - lwm;
2027			xadlock->xdlist = &p->xad[lwm];
2028
2029			jfs_info("xtLog: alloc ip:0x%p mp:0x%p count:%d "
2030				 "lwm:%d next:%d",
2031				 tlck->ip, mp, xadlock->count, lwm, next);
2032			maplock->index++;
2033			xadlock++;
2034		}
2035
2036		/*
2037		 * truncate entry XAD[twm == next - 1]:
2038		 */
2039		if (twm == next - 1) {
2040			/* format a maplock for txUpdateMap() to update bmap
2041			 * to free truncated delta extent of the truncated
2042			 * entry XAD[next - 1];
2043			 * (xtlck->pxdlock = truncated delta extent);
2044			 */
2045			tlck->flag |= tlckUPDATEMAP;
2046			pxdlock = (struct pxd_lock *) xadlock;
2047			pxdlock->flag = mlckFREEPXD;
2048			pxdlock->count = 1;
2049			pxdlock->pxd = pxd;
2050
2051			jfs_info("xtLog: truncate ip:0x%p mp:0x%p count:%d "
2052				 "hwm:%d", ip, mp, pxdlock->count, hwm);
2053			maplock->index++;
2054			xadlock++;
2055		}
2056
2057		/*
2058		 * free entries XAD[next:hwm]:
2059		 */
2060		if (hwm >= next) {
2061			/* format a maplock for txUpdateMap() to update bmap
2062			 * to free extents of XAD[next:hwm] from thedeleted
2063			 * page itself;
2064			 */
2065			tlck->flag |= tlckUPDATEMAP;
2066			xadlock->flag = mlckFREEXADLIST;
2067			xadlock->count = hwm - next + 1;
2068			xadlock->xdlist = &p->xad[next];
2069
2070			jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d "
2071				 "next:%d hwm:%d",
2072				 tlck->ip, mp, xadlock->count, next, hwm);
2073			maplock->index++;
2074		}
2075
2076		/* mark page as homeward bound */
2077		tlck->flag |= tlckWRITEPAGE;
2078	}
2079	return;
2080}
2081
2082/*
2083 *	mapLog()
2084 *
2085 * function:	log from maplock of freed data extents;
2086 */
2087static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
2088		   struct tlock * tlck)
2089{
2090	struct pxd_lock *pxdlock;
2091	int i, nlock;
2092	pxd_t *pxd;
2093
2094	/*
2095	 *	page relocation: free the source page extent
2096	 *
2097	 * a maplock for txUpdateMap() for free of the page
2098	 * has been formatted at txLock() time saving the src
2099	 * relocated page address;
2100	 */
2101	if (tlck->type & tlckRELOCATE) {
2102		/* log LOG_NOREDOPAGE of the old relocated page
2103		 * for logredo() to start NoRedoPage filter;
2104		 */
2105		lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
2106		pxdlock = (struct pxd_lock *) & tlck->lock;
2107		pxd = &lrd->log.redopage.pxd;
2108		*pxd = pxdlock->pxd;
2109		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2110
2111		/* (N.B. currently, logredo() does NOT update bmap
2112		 * for free of the page itself for (LOG_XTREE|LOG_NOREDOPAGE);
2113		 * if page free from relocation, LOG_UPDATEMAP log is
2114		 * specifically generated now for logredo()
2115		 * to update bmap for free of src relocated page;
2116		 * (new flag LOG_RELOCATE may be introduced which will
2117		 * inform logredo() to start NORedoPage filter and also
2118		 * update block allocation map at the same time, thus
2119		 * avoiding an extra log write);
2120		 */
2121		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
2122		lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD);
2123		lrd->log.updatemap.nxd = cpu_to_le16(1);
2124		lrd->log.updatemap.pxd = pxdlock->pxd;
2125		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2126
2127		/* a maplock for txUpdateMap() for free of the page
2128		 * has been formatted at txLock() time;
2129		 */
2130		tlck->flag |= tlckUPDATEMAP;
2131		return;
2132	}
2133	/*
2134
2135	 * Otherwise it's not a relocate request
2136	 *
2137	 */
2138	else {
2139		/* log LOG_UPDATEMAP for logredo() to update bmap for
2140		 * free of truncated/relocated delta extent of the data;
2141		 * e.g.: external EA extent, relocated/truncated extent
2142		 * from xtTailgate();
2143		 */
2144		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
2145		pxdlock = (struct pxd_lock *) & tlck->lock;
2146		nlock = pxdlock->index;
2147		for (i = 0; i < nlock; i++, pxdlock++) {
2148			if (pxdlock->flag & mlckALLOCPXD)
2149				lrd->log.updatemap.type =
2150				    cpu_to_le16(LOG_ALLOCPXD);
2151			else
2152				lrd->log.updatemap.type =
2153				    cpu_to_le16(LOG_FREEPXD);
2154			lrd->log.updatemap.nxd = cpu_to_le16(1);
2155			lrd->log.updatemap.pxd = pxdlock->pxd;
2156			lrd->backchain =
2157			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2158			jfs_info("mapLog: xaddr:0x%lx xlen:0x%x",
2159				 (ulong) addressPXD(&pxdlock->pxd),
2160				 lengthPXD(&pxdlock->pxd));
2161		}
2162
2163		/* update bmap */
2164		tlck->flag |= tlckUPDATEMAP;
2165	}
2166}
2167
2168/*
2169 *	txEA()
2170 *
2171 * function:	acquire maplock for EA/ACL extents or
2172 *		set COMMIT_INLINE flag;
2173 */
2174void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea)
2175{
2176	struct tlock *tlck = NULL;
2177	struct pxd_lock *maplock = NULL, *pxdlock = NULL;
2178
2179	/*
2180	 * format maplock for alloc of new EA extent
2181	 */
2182	if (newea) {
2183		/* Since the newea could be a completely zeroed entry we need to
2184		 * check for the two flags which indicate we should actually
2185		 * commit new EA data
2186		 */
2187		if (newea->flag & DXD_EXTENT) {
2188			tlck = txMaplock(tid, ip, tlckMAP);
2189			maplock = (struct pxd_lock *) & tlck->lock;
2190			pxdlock = (struct pxd_lock *) maplock;
2191			pxdlock->flag = mlckALLOCPXD;
2192			PXDaddress(&pxdlock->pxd, addressDXD(newea));
2193			PXDlength(&pxdlock->pxd, lengthDXD(newea));
2194			pxdlock++;
2195			maplock->index = 1;
2196		} else if (newea->flag & DXD_INLINE) {
2197			tlck = NULL;
2198
2199			set_cflag(COMMIT_Inlineea, ip);
2200		}
2201	}
2202
2203	/*
2204	 * format maplock for free of old EA extent
2205	 */
2206	if (!test_cflag(COMMIT_Nolink, ip) && oldea->flag & DXD_EXTENT) {
2207		if (tlck == NULL) {
2208			tlck = txMaplock(tid, ip, tlckMAP);
2209			maplock = (struct pxd_lock *) & tlck->lock;
2210			pxdlock = (struct pxd_lock *) maplock;
2211			maplock->index = 0;
2212		}
2213		pxdlock->flag = mlckFREEPXD;
2214		PXDaddress(&pxdlock->pxd, addressDXD(oldea));
2215		PXDlength(&pxdlock->pxd, lengthDXD(oldea));
2216		maplock->index++;
2217	}
2218}
2219
2220/*
2221 *	txForce()
2222 *
2223 * function: synchronously write pages locked by transaction
2224 *	     after txLog() but before txUpdateMap();
2225 */
2226static void txForce(struct tblock * tblk)
2227{
2228	struct tlock *tlck;
2229	lid_t lid, next;
2230	struct metapage *mp;
2231
2232	/*
2233	 * reverse the order of transaction tlocks in
2234	 * careful update order of address index pages
2235	 * (right to left, bottom up)
2236	 */
2237	tlck = lid_to_tlock(tblk->next);
2238	lid = tlck->next;
2239	tlck->next = 0;
2240	while (lid) {
2241		tlck = lid_to_tlock(lid);
2242		next = tlck->next;
2243		tlck->next = tblk->next;
2244		tblk->next = lid;
2245		lid = next;
2246	}
2247
2248	/*
2249	 * synchronously write the page, and
2250	 * hold the page for txUpdateMap();
2251	 */
2252	for (lid = tblk->next; lid; lid = next) {
2253		tlck = lid_to_tlock(lid);
2254		next = tlck->next;
2255
2256		if ((mp = tlck->mp) != NULL &&
2257		    (tlck->type & tlckBTROOT) == 0) {
2258			assert(mp->xflag & COMMIT_PAGE);
2259
2260			if (tlck->flag & tlckWRITEPAGE) {
2261				tlck->flag &= ~tlckWRITEPAGE;
2262
2263				/* do not release page to freelist */
2264				force_metapage(mp);
2265#if 0
2266				/*
2267				 * The "right" thing to do here is to
2268				 * synchronously write the metadata.
2269				 * With the current implementation this
2270				 * is hard since write_metapage requires
2271				 * us to kunmap & remap the page.  If we
2272				 * have tlocks pointing into the metadata
2273				 * pages, we don't want to do this.  I think
2274				 * we can get by with synchronously writing
2275				 * the pages when they are released.
2276				 */
2277				assert(mp->nohomeok);
2278				set_bit(META_dirty, &mp->flag);
2279				set_bit(META_sync, &mp->flag);
2280#endif
2281			}
2282		}
2283	}
2284}
2285
2286/*
2287 *	txUpdateMap()
2288 *
2289 * function:	update persistent allocation map (and working map
2290 *		if appropriate);
2291 *
2292 * parameter:
2293 */
2294static void txUpdateMap(struct tblock * tblk)
2295{
2296	struct inode *ip;
2297	struct inode *ipimap;
2298	lid_t lid;
2299	struct tlock *tlck;
2300	struct maplock *maplock;
2301	struct pxd_lock pxdlock;
2302	int maptype;
2303	int k, nlock;
2304	struct metapage *mp = NULL;
2305
2306	ipimap = JFS_SBI(tblk->sb)->ipimap;
2307
2308	maptype = (tblk->xflag & COMMIT_PMAP) ? COMMIT_PMAP : COMMIT_PWMAP;
2309
2310
2311	/*
2312	 *	update block allocation map
2313	 *
2314	 * update allocation state in pmap (and wmap) and
2315	 * update lsn of the pmap page;
2316	 */
2317	/*
2318	 * scan each tlock/page of transaction for block allocation/free:
2319	 *
2320	 * for each tlock/page of transaction, update map.
2321	 *  ? are there tlock for pmap and pwmap at the same time ?
2322	 */
2323	for (lid = tblk->next; lid; lid = tlck->next) {
2324		tlck = lid_to_tlock(lid);
2325
2326		if ((tlck->flag & tlckUPDATEMAP) == 0)
2327			continue;
2328
2329		if (tlck->flag & tlckFREEPAGE) {
2330			/*
2331			 * Another thread may attempt to reuse freed space
2332			 * immediately, so we want to get rid of the metapage
2333			 * before anyone else has a chance to get it.
2334			 * Lock metapage, update maps, then invalidate
2335			 * the metapage.
2336			 */
2337			mp = tlck->mp;
2338			ASSERT(mp->xflag & COMMIT_PAGE);
2339			grab_metapage(mp);
2340		}
2341
2342		/*
2343		 * extent list:
2344		 * . in-line PXD list:
2345		 * . out-of-line XAD list:
2346		 */
2347		maplock = (struct maplock *) & tlck->lock;
2348		nlock = maplock->index;
2349
2350		for (k = 0; k < nlock; k++, maplock++) {
2351			/*
2352			 * allocate blocks in persistent map:
2353			 *
2354			 * blocks have been allocated from wmap at alloc time;
2355			 */
2356			if (maplock->flag & mlckALLOC) {
2357				txAllocPMap(ipimap, maplock, tblk);
2358			}
2359			/*
2360			 * free blocks in persistent and working map:
2361			 * blocks will be freed in pmap and then in wmap;
2362			 *
2363			 * ? tblock specifies the PMAP/PWMAP based upon
2364			 * transaction
2365			 *
2366			 * free blocks in persistent map:
2367			 * blocks will be freed from wmap at last reference
2368			 * release of the object for regular files;
2369			 *
2370			 * Alway free blocks from both persistent & working
2371			 * maps for directories
2372			 */
2373			else {	/* (maplock->flag & mlckFREE) */
2374
2375				if (tlck->flag & tlckDIRECTORY)
2376					txFreeMap(ipimap, maplock,
2377						  tblk, COMMIT_PWMAP);
2378				else
2379					txFreeMap(ipimap, maplock,
2380						  tblk, maptype);
2381			}
2382		}
2383		if (tlck->flag & tlckFREEPAGE) {
2384			if (!(tblk->flag & tblkGC_LAZY)) {
2385				/* This is equivalent to txRelease */
2386				ASSERT(mp->lid == lid);
2387				tlck->mp->lid = 0;
2388			}
2389			assert(mp->nohomeok == 1);
2390			metapage_homeok(mp);
2391			discard_metapage(mp);
2392			tlck->mp = NULL;
2393		}
2394	}
2395	/*
2396	 *	update inode allocation map
2397	 *
2398	 * update allocation state in pmap and
2399	 * update lsn of the pmap page;
2400	 * update in-memory inode flag/state
2401	 *
2402	 * unlock mapper/write lock
2403	 */
2404	if (tblk->xflag & COMMIT_CREATE) {
2405		diUpdatePMap(ipimap, tblk->ino, false, tblk);
2406		/* update persistent block allocation map
2407		 * for the allocation of inode extent;
2408		 */
2409		pxdlock.flag = mlckALLOCPXD;
2410		pxdlock.pxd = tblk->u.ixpxd;
2411		pxdlock.index = 1;
2412		txAllocPMap(ipimap, (struct maplock *) & pxdlock, tblk);
2413	} else if (tblk->xflag & COMMIT_DELETE) {
2414		ip = tblk->u.ip;
2415		diUpdatePMap(ipimap, ip->i_ino, true, tblk);
2416		iput(ip);
2417	}
2418}
2419
2420/*
2421 *	txAllocPMap()
2422 *
2423 * function: allocate from persistent map;
2424 *
2425 * parameter:
2426 *	ipbmap	-
2427 *	malock	-
2428 *		xad list:
2429 *		pxd:
2430 *
2431 *	maptype -
2432 *		allocate from persistent map;
2433 *		free from persistent map;
2434 *		(e.g., tmp file - free from working map at releae
2435 *		 of last reference);
2436 *		free from persistent and working map;
2437 *
2438 *	lsn	- log sequence number;
2439 */
2440static void txAllocPMap(struct inode *ip, struct maplock * maplock,
2441			struct tblock * tblk)
2442{
2443	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
2444	struct xdlistlock *xadlistlock;
2445	xad_t *xad;
2446	s64 xaddr;
2447	int xlen;
2448	struct pxd_lock *pxdlock;
2449	struct xdlistlock *pxdlistlock;
2450	pxd_t *pxd;
2451	int n;
2452
2453	/*
2454	 * allocate from persistent map;
2455	 */
2456	if (maplock->flag & mlckALLOCXADLIST) {
2457		xadlistlock = (struct xdlistlock *) maplock;
2458		xad = xadlistlock->xdlist;
2459		for (n = 0; n < xadlistlock->count; n++, xad++) {
2460			if (xad->flag & (XAD_NEW | XAD_EXTENDED)) {
2461				xaddr = addressXAD(xad);
2462				xlen = lengthXAD(xad);
2463				dbUpdatePMap(ipbmap, false, xaddr,
2464					     (s64) xlen, tblk);
2465				xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
2466				jfs_info("allocPMap: xaddr:0x%lx xlen:%d",
2467					 (ulong) xaddr, xlen);
2468			}
2469		}
2470	} else if (maplock->flag & mlckALLOCPXD) {
2471		pxdlock = (struct pxd_lock *) maplock;
2472		xaddr = addressPXD(&pxdlock->pxd);
2473		xlen = lengthPXD(&pxdlock->pxd);
2474		dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen, tblk);
2475		jfs_info("allocPMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen);
2476	} else {		/* (maplock->flag & mlckALLOCPXDLIST) */
2477
2478		pxdlistlock = (struct xdlistlock *) maplock;
2479		pxd = pxdlistlock->xdlist;
2480		for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2481			xaddr = addressPXD(pxd);
2482			xlen = lengthPXD(pxd);
2483			dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen,
2484				     tblk);
2485			jfs_info("allocPMap: xaddr:0x%lx xlen:%d",
2486				 (ulong) xaddr, xlen);
2487		}
2488	}
2489}
2490
2491/*
2492 *	txFreeMap()
2493 *
2494 * function:	free from persistent and/or working map;
2495 *
2496 * todo: optimization
2497 */
2498void txFreeMap(struct inode *ip,
2499	       struct maplock * maplock, struct tblock * tblk, int maptype)
2500{
2501	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
2502	struct xdlistlock *xadlistlock;
2503	xad_t *xad;
2504	s64 xaddr;
2505	int xlen;
2506	struct pxd_lock *pxdlock;
2507	struct xdlistlock *pxdlistlock;
2508	pxd_t *pxd;
2509	int n;
2510
2511	jfs_info("txFreeMap: tblk:0x%p maplock:0x%p maptype:0x%x",
2512		 tblk, maplock, maptype);
2513
2514	/*
2515	 * free from persistent map;
2516	 */
2517	if (maptype == COMMIT_PMAP || maptype == COMMIT_PWMAP) {
2518		if (maplock->flag & mlckFREEXADLIST) {
2519			xadlistlock = (struct xdlistlock *) maplock;
2520			xad = xadlistlock->xdlist;
2521			for (n = 0; n < xadlistlock->count; n++, xad++) {
2522				if (!(xad->flag & XAD_NEW)) {
2523					xaddr = addressXAD(xad);
2524					xlen = lengthXAD(xad);
2525					dbUpdatePMap(ipbmap, true, xaddr,
2526						     (s64) xlen, tblk);
2527					jfs_info("freePMap: xaddr:0x%lx "
2528						 "xlen:%d",
2529						 (ulong) xaddr, xlen);
2530				}
2531			}
2532		} else if (maplock->flag & mlckFREEPXD) {
2533			pxdlock = (struct pxd_lock *) maplock;
2534			xaddr = addressPXD(&pxdlock->pxd);
2535			xlen = lengthPXD(&pxdlock->pxd);
2536			dbUpdatePMap(ipbmap, true, xaddr, (s64) xlen,
2537				     tblk);
2538			jfs_info("freePMap: xaddr:0x%lx xlen:%d",
2539				 (ulong) xaddr, xlen);
2540		} else {	/* (maplock->flag & mlckALLOCPXDLIST) */
2541
2542			pxdlistlock = (struct xdlistlock *) maplock;
2543			pxd = pxdlistlock->xdlist;
2544			for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2545				xaddr = addressPXD(pxd);
2546				xlen = lengthPXD(pxd);
2547				dbUpdatePMap(ipbmap, true, xaddr,
2548					     (s64) xlen, tblk);
2549				jfs_info("freePMap: xaddr:0x%lx xlen:%d",
2550					 (ulong) xaddr, xlen);
2551			}
2552		}
2553	}
2554
2555	/*
2556	 * free from working map;
2557	 */
2558	if (maptype == COMMIT_PWMAP || maptype == COMMIT_WMAP) {
2559		if (maplock->flag & mlckFREEXADLIST) {
2560			xadlistlock = (struct xdlistlock *) maplock;
2561			xad = xadlistlock->xdlist;
2562			for (n = 0; n < xadlistlock->count; n++, xad++) {
2563				xaddr = addressXAD(xad);
2564				xlen = lengthXAD(xad);
2565				dbFree(ip, xaddr, (s64) xlen);
2566				xad->flag = 0;
2567				jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2568					 (ulong) xaddr, xlen);
2569			}
2570		} else if (maplock->flag & mlckFREEPXD) {
2571			pxdlock = (struct pxd_lock *) maplock;
2572			xaddr = addressPXD(&pxdlock->pxd);
2573			xlen = lengthPXD(&pxdlock->pxd);
2574			dbFree(ip, xaddr, (s64) xlen);
2575			jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2576				 (ulong) xaddr, xlen);
2577		} else {	/* (maplock->flag & mlckFREEPXDLIST) */
2578
2579			pxdlistlock = (struct xdlistlock *) maplock;
2580			pxd = pxdlistlock->xdlist;
2581			for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2582				xaddr = addressPXD(pxd);
2583				xlen = lengthPXD(pxd);
2584				dbFree(ip, xaddr, (s64) xlen);
2585				jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2586					 (ulong) xaddr, xlen);
2587			}
2588		}
2589	}
2590}
2591
2592/*
2593 *	txFreelock()
2594 *
2595 * function:	remove tlock from inode anonymous locklist
2596 */
2597void txFreelock(struct inode *ip)
2598{
2599	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
2600	struct tlock *xtlck, *tlck;
2601	lid_t xlid = 0, lid;
2602
2603	if (!jfs_ip->atlhead)
2604		return;
2605
2606	TXN_LOCK();
2607	xtlck = (struct tlock *) &jfs_ip->atlhead;
2608
2609	while ((lid = xtlck->next) != 0) {
2610		tlck = lid_to_tlock(lid);
2611		if (tlck->flag & tlckFREELOCK) {
2612			xtlck->next = tlck->next;
2613			txLockFree(lid);
2614		} else {
2615			xtlck = tlck;
2616			xlid = lid;
2617		}
2618	}
2619
2620	if (jfs_ip->atlhead)
2621		jfs_ip->atltail = xlid;
2622	else {
2623		jfs_ip->atltail = 0;
2624		/*
2625		 * If inode was on anon_list, remove it
2626		 */
2627		list_del_init(&jfs_ip->anon_inode_list);
2628	}
2629	TXN_UNLOCK();
2630}
2631
2632/*
2633 *	txAbort()
2634 *
2635 * function: abort tx before commit;
2636 *
2637 * frees line-locks and segment locks for all
2638 * segments in comdata structure.
2639 * Optionally sets state of file-system to FM_DIRTY in super-block.
2640 * log age of page-frames in memory for which caller has
2641 * are reset to 0 (to avoid logwarap).
2642 */
2643void txAbort(tid_t tid, int dirty)
2644{
2645	lid_t lid, next;
2646	struct metapage *mp;
2647	struct tblock *tblk = tid_to_tblock(tid);
2648	struct tlock *tlck;
2649
2650	/*
2651	 * free tlocks of the transaction
2652	 */
2653	for (lid = tblk->next; lid; lid = next) {
2654		tlck = lid_to_tlock(lid);
2655		next = tlck->next;
2656		mp = tlck->mp;
2657		JFS_IP(tlck->ip)->xtlid = 0;
2658
2659		if (mp) {
2660			mp->lid = 0;
2661
2662			/*
2663			 * reset lsn of page to avoid logwarap:
2664			 *
2665			 * (page may have been previously committed by another
2666			 * transaction(s) but has not been paged, i.e.,
2667			 * it may be on logsync list even though it has not
2668			 * been logged for the current tx.)
2669			 */
2670			if (mp->xflag & COMMIT_PAGE && mp->lsn)
2671				LogSyncRelease(mp);
2672		}
2673		/* insert tlock at head of freelist */
2674		TXN_LOCK();
2675		txLockFree(lid);
2676		TXN_UNLOCK();
2677	}
2678
2679	/* caller will free the transaction block */
2680
2681	tblk->next = tblk->last = 0;
2682
2683	/*
2684	 * mark filesystem dirty
2685	 */
2686	if (dirty)
2687		jfs_error(tblk->sb, "txAbort");
2688
2689	return;
2690}
2691
2692/*
2693 *	txLazyCommit(void)
2694 *
2695 *	All transactions except those changing ipimap (COMMIT_FORCE) are
2696 *	processed by this routine.  This insures that the inode and block
2697 *	allocation maps are updated in order.  For synchronous transactions,
2698 *	let the user thread finish processing after txUpdateMap() is called.
2699 */
2700static void txLazyCommit(struct tblock * tblk)
2701{
2702	struct jfs_log *log;
2703
2704	while (((tblk->flag & tblkGC_READY) == 0) &&
2705	       ((tblk->flag & tblkGC_UNLOCKED) == 0)) {
2706		/* We must have gotten ahead of the user thread
2707		 */
2708		jfs_info("jfs_lazycommit: tblk 0x%p not unlocked", tblk);
2709		yield();
2710	}
2711
2712	jfs_info("txLazyCommit: processing tblk 0x%p", tblk);
2713
2714	txUpdateMap(tblk);
2715
2716	log = (struct jfs_log *) JFS_SBI(tblk->sb)->log;
2717
2718	spin_lock_irq(&log->gclock);	// LOGGC_LOCK
2719
2720	tblk->flag |= tblkGC_COMMITTED;
2721
2722	if (tblk->flag & tblkGC_READY)
2723		log->gcrtc--;
2724
2725	wake_up_all(&tblk->gcwait);	// LOGGC_WAKEUP
2726
2727	/*
2728	 * Can't release log->gclock until we've tested tblk->flag
2729	 */
2730	if (tblk->flag & tblkGC_LAZY) {
2731		spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
2732		txUnlock(tblk);
2733		tblk->flag &= ~tblkGC_LAZY;
2734		txEnd(tblk - TxBlock);	/* Convert back to tid */
2735	} else
2736		spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
2737
2738	jfs_info("txLazyCommit: done: tblk = 0x%p", tblk);
2739}
2740
2741/*
2742 *	jfs_lazycommit(void)
2743 *
2744 *	To be run as a kernel daemon.  If lbmIODone is called in an interrupt
2745 *	context, or where blocking is not wanted, this routine will process
2746 *	committed transactions from the unlock queue.
2747 */
2748int jfs_lazycommit(void *arg)
2749{
2750	int WorkDone;
2751	struct tblock *tblk;
2752	unsigned long flags;
2753	struct jfs_sb_info *sbi;
2754
2755	do {
2756		LAZY_LOCK(flags);
2757		jfs_commit_thread_waking = 0;	/* OK to wake another thread */
2758		while (!list_empty(&TxAnchor.unlock_queue)) {
2759			WorkDone = 0;
2760			list_for_each_entry(tblk, &TxAnchor.unlock_queue,
2761					    cqueue) {
2762
2763				sbi = JFS_SBI(tblk->sb);
2764				/*
2765				 * For each volume, the transactions must be
2766				 * handled in order.  If another commit thread
2767				 * is handling a tblk for this superblock,
2768				 * skip it
2769				 */
2770				if (sbi->commit_state & IN_LAZYCOMMIT)
2771					continue;
2772
2773				sbi->commit_state |= IN_LAZYCOMMIT;
2774				WorkDone = 1;
2775
2776				/*
2777				 * Remove transaction from queue
2778				 */
2779				list_del(&tblk->cqueue);
2780
2781				LAZY_UNLOCK(flags);
2782				txLazyCommit(tblk);
2783				LAZY_LOCK(flags);
2784
2785				sbi->commit_state &= ~IN_LAZYCOMMIT;
2786				/*
2787				 * Don't continue in the for loop.  (We can't
2788				 * anyway, it's unsafe!)  We want to go back to
2789				 * the beginning of the list.
2790				 */
2791				break;
2792			}
2793
2794			/* If there was nothing to do, don't continue */
2795			if (!WorkDone)
2796				break;
2797		}
2798		/* In case a wakeup came while all threads were active */
2799		jfs_commit_thread_waking = 0;
2800
2801		if (freezing(current)) {
2802			LAZY_UNLOCK(flags);
2803			refrigerator();
2804		} else {
2805			DECLARE_WAITQUEUE(wq, current);
2806
2807			add_wait_queue(&jfs_commit_thread_wait, &wq);
2808			set_current_state(TASK_INTERRUPTIBLE);
2809			LAZY_UNLOCK(flags);
2810			schedule();
2811			__set_current_state(TASK_RUNNING);
2812			remove_wait_queue(&jfs_commit_thread_wait, &wq);
2813		}
2814	} while (!kthread_should_stop());
2815
2816	if (!list_empty(&TxAnchor.unlock_queue))
2817		jfs_err("jfs_lazycommit being killed w/pending transactions!");
2818	else
2819		jfs_info("jfs_lazycommit being killed\n");
2820	return 0;
2821}
2822
2823void txLazyUnlock(struct tblock * tblk)
2824{
2825	unsigned long flags;
2826
2827	LAZY_LOCK(flags);
2828
2829	list_add_tail(&tblk->cqueue, &TxAnchor.unlock_queue);
2830	/*
2831	 * Don't wake up a commit thread if there is already one servicing
2832	 * this superblock, or if the last one we woke up hasn't started yet.
2833	 */
2834	if (!(JFS_SBI(tblk->sb)->commit_state & IN_LAZYCOMMIT) &&
2835	    !jfs_commit_thread_waking) {
2836		jfs_commit_thread_waking = 1;
2837		wake_up(&jfs_commit_thread_wait);
2838	}
2839	LAZY_UNLOCK(flags);
2840}
2841
2842static void LogSyncRelease(struct metapage * mp)
2843{
2844	struct jfs_log *log = mp->log;
2845
2846	assert(mp->nohomeok);
2847	assert(log);
2848	metapage_homeok(mp);
2849}
2850
2851/*
2852 *	txQuiesce
2853 *
2854 *	Block all new transactions and push anonymous transactions to
2855 *	completion
2856 *
2857 *	This does almost the same thing as jfs_sync below.  We don't
2858 *	worry about deadlocking when jfs_tlocks_low is set, since we would
2859 *	expect jfs_sync to get us out of that jam.
2860 */
2861void txQuiesce(struct super_block *sb)
2862{
2863	struct inode *ip;
2864	struct jfs_inode_info *jfs_ip;
2865	struct jfs_log *log = JFS_SBI(sb)->log;
2866	tid_t tid;
2867
2868	set_bit(log_QUIESCE, &log->flag);
2869
2870	TXN_LOCK();
2871restart:
2872	while (!list_empty(&TxAnchor.anon_list)) {
2873		jfs_ip = list_entry(TxAnchor.anon_list.next,
2874				    struct jfs_inode_info,
2875				    anon_inode_list);
2876		ip = &jfs_ip->vfs_inode;
2877
2878		/*
2879		 * inode will be removed from anonymous list
2880		 * when it is committed
2881		 */
2882		TXN_UNLOCK();
2883		tid = txBegin(ip->i_sb, COMMIT_INODE | COMMIT_FORCE);
2884		mutex_lock(&jfs_ip->commit_mutex);
2885		txCommit(tid, 1, &ip, 0);
2886		txEnd(tid);
2887		mutex_unlock(&jfs_ip->commit_mutex);
2888		/*
2889		 * Just to be safe.  I don't know how
2890		 * long we can run without blocking
2891		 */
2892		cond_resched();
2893		TXN_LOCK();
2894	}
2895
2896	/*
2897	 * If jfs_sync is running in parallel, there could be some inodes
2898	 * on anon_list2.  Let's check.
2899	 */
2900	if (!list_empty(&TxAnchor.anon_list2)) {
2901		list_splice(&TxAnchor.anon_list2, &TxAnchor.anon_list);
2902		INIT_LIST_HEAD(&TxAnchor.anon_list2);
2903		goto restart;
2904	}
2905	TXN_UNLOCK();
2906
2907	/*
2908	 * We may need to kick off the group commit
2909	 */
2910	jfs_flush_journal(log, 0);
2911}
2912
2913/*
2914 * txResume()
2915 *
2916 * Allows transactions to start again following txQuiesce
2917 */
2918void txResume(struct super_block *sb)
2919{
2920	struct jfs_log *log = JFS_SBI(sb)->log;
2921
2922	clear_bit(log_QUIESCE, &log->flag);
2923	TXN_WAKEUP(&log->syncwait);
2924}
2925
2926/*
2927 *	jfs_sync(void)
2928 *
2929 *	To be run as a kernel daemon.  This is awakened when tlocks run low.
2930 *	We write any inodes that have anonymous tlocks so they will become
2931 *	available.
2932 */
2933int jfs_sync(void *arg)
2934{
2935	struct inode *ip;
2936	struct jfs_inode_info *jfs_ip;
2937	tid_t tid;
2938
2939	do {
2940		/*
2941		 * write each inode on the anonymous inode list
2942		 */
2943		TXN_LOCK();
2944		while (jfs_tlocks_low && !list_empty(&TxAnchor.anon_list)) {
2945			jfs_ip = list_entry(TxAnchor.anon_list.next,
2946					    struct jfs_inode_info,
2947					    anon_inode_list);
2948			ip = &jfs_ip->vfs_inode;
2949
2950			if (! igrab(ip)) {
2951				/*
2952				 * Inode is being freed
2953				 */
2954				list_del_init(&jfs_ip->anon_inode_list);
2955			} else if (mutex_trylock(&jfs_ip->commit_mutex)) {
2956				/*
2957				 * inode will be removed from anonymous list
2958				 * when it is committed
2959				 */
2960				TXN_UNLOCK();
2961				tid = txBegin(ip->i_sb, COMMIT_INODE);
2962				txCommit(tid, 1, &ip, 0);
2963				txEnd(tid);
2964				mutex_unlock(&jfs_ip->commit_mutex);
2965
2966				iput(ip);
2967				/*
2968				 * Just to be safe.  I don't know how
2969				 * long we can run without blocking
2970				 */
2971				cond_resched();
2972				TXN_LOCK();
2973			} else {
2974				/* We can't get the commit mutex.  It may
2975				 * be held by a thread waiting for tlock's
2976				 * so let's not block here.  Save it to
2977				 * put back on the anon_list.
2978				 */
2979
2980				/* Take off anon_list */
2981				list_del(&jfs_ip->anon_inode_list);
2982
2983				/* Put on anon_list2 */
2984				list_add(&jfs_ip->anon_inode_list,
2985					 &TxAnchor.anon_list2);
2986
2987				TXN_UNLOCK();
2988				iput(ip);
2989				TXN_LOCK();
2990			}
2991		}
2992		/* Add anon_list2 back to anon_list */
2993		list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list);
2994
2995		if (freezing(current)) {
2996			TXN_UNLOCK();
2997			refrigerator();
2998		} else {
2999			set_current_state(TASK_INTERRUPTIBLE);
3000			TXN_UNLOCK();
3001			schedule();
3002			__set_current_state(TASK_RUNNING);
3003		}
3004	} while (!kthread_should_stop());
3005
3006	jfs_info("jfs_sync being killed");
3007	return 0;
3008}
3009
3010#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG)
3011static int jfs_txanchor_proc_show(struct seq_file *m, void *v)
3012{
3013	char *freewait;
3014	char *freelockwait;
3015	char *lowlockwait;
3016
3017	freewait =
3018	    waitqueue_active(&TxAnchor.freewait) ? "active" : "empty";
3019	freelockwait =
3020	    waitqueue_active(&TxAnchor.freelockwait) ? "active" : "empty";
3021	lowlockwait =
3022	    waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty";
3023
3024	seq_printf(m,
3025		       "JFS TxAnchor\n"
3026		       "============\n"
3027		       "freetid = %d\n"
3028		       "freewait = %s\n"
3029		       "freelock = %d\n"
3030		       "freelockwait = %s\n"
3031		       "lowlockwait = %s\n"
3032		       "tlocksInUse = %d\n"
3033		       "jfs_tlocks_low = %d\n"
3034		       "unlock_queue is %sempty\n",
3035		       TxAnchor.freetid,
3036		       freewait,
3037		       TxAnchor.freelock,
3038		       freelockwait,
3039		       lowlockwait,
3040		       TxAnchor.tlocksInUse,
3041		       jfs_tlocks_low,
3042		       list_empty(&TxAnchor.unlock_queue) ? "" : "not ");
3043	return 0;
3044}
3045
3046static int jfs_txanchor_proc_open(struct inode *inode, struct file *file)
3047{
3048	return single_open(file, jfs_txanchor_proc_show, NULL);
3049}
3050
3051const struct file_operations jfs_txanchor_proc_fops = {
3052	.owner		= THIS_MODULE,
3053	.open		= jfs_txanchor_proc_open,
3054	.read		= seq_read,
3055	.llseek		= seq_lseek,
3056	.release	= single_release,
3057};
3058#endif
3059
3060#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS)
3061static int jfs_txstats_proc_show(struct seq_file *m, void *v)
3062{
3063	seq_printf(m,
3064		       "JFS TxStats\n"
3065		       "===========\n"
3066		       "calls to txBegin = %d\n"
3067		       "txBegin blocked by sync barrier = %d\n"
3068		       "txBegin blocked by tlocks low = %d\n"
3069		       "txBegin blocked by no free tid = %d\n"
3070		       "calls to txBeginAnon = %d\n"
3071		       "txBeginAnon blocked by sync barrier = %d\n"
3072		       "txBeginAnon blocked by tlocks low = %d\n"
3073		       "calls to txLockAlloc = %d\n"
3074		       "tLockAlloc blocked by no free lock = %d\n",
3075		       TxStat.txBegin,
3076		       TxStat.txBegin_barrier,
3077		       TxStat.txBegin_lockslow,
3078		       TxStat.txBegin_freetid,
3079		       TxStat.txBeginAnon,
3080		       TxStat.txBeginAnon_barrier,
3081		       TxStat.txBeginAnon_lockslow,
3082		       TxStat.txLockAlloc,
3083		       TxStat.txLockAlloc_freelock);
3084	return 0;
3085}
3086
3087static int jfs_txstats_proc_open(struct inode *inode, struct file *file)
3088{
3089	return single_open(file, jfs_txstats_proc_show, NULL);
3090}
3091
3092const struct file_operations jfs_txstats_proc_fops = {
3093	.owner		= THIS_MODULE,
3094	.open		= jfs_txstats_proc_open,
3095	.read		= seq_read,
3096	.llseek		= seq_lseek,
3097	.release	= single_release,
3098};
3099#endif