Linux Audio

Check our new training course

Linux kernel drivers training

May 6-19, 2025
Register
Loading...
v6.13.7
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *   Copyright (C) International Business Machines Corp., 2000-2005
   4 *   Portions Copyright (C) Christoph Hellwig, 2001-2002
 
 
 
 
 
 
 
 
 
 
 
 
 
 
   5 */
   6
   7/*
   8 *	jfs_txnmgr.c: transaction manager
   9 *
  10 * notes:
  11 * transaction starts with txBegin() and ends with txCommit()
  12 * or txAbort().
  13 *
  14 * tlock is acquired at the time of update;
  15 * (obviate scan at commit time for xtree and dtree)
  16 * tlock and mp points to each other;
  17 * (no hashlist for mp -> tlock).
  18 *
  19 * special cases:
  20 * tlock on in-memory inode:
  21 * in-place tlock in the in-memory inode itself;
  22 * converted to page lock by iWrite() at commit time.
  23 *
  24 * tlock during write()/mmap() under anonymous transaction (tid = 0):
  25 * transferred (?) to transaction at commit time.
  26 *
  27 * use the page itself to update allocation maps
  28 * (obviate intermediate replication of allocation/deallocation data)
  29 * hold on to mp+lock thru update of maps
  30 */
  31
  32#include <linux/fs.h>
  33#include <linux/vmalloc.h>
  34#include <linux/completion.h>
  35#include <linux/freezer.h>
  36#include <linux/module.h>
  37#include <linux/moduleparam.h>
  38#include <linux/kthread.h>
  39#include <linux/seq_file.h>
  40#include "jfs_incore.h"
  41#include "jfs_inode.h"
  42#include "jfs_filsys.h"
  43#include "jfs_metapage.h"
  44#include "jfs_dinode.h"
  45#include "jfs_imap.h"
  46#include "jfs_dmap.h"
  47#include "jfs_superblock.h"
  48#include "jfs_debug.h"
  49
  50/*
  51 *	transaction management structures
  52 */
  53static struct {
  54	int freetid;		/* index of a free tid structure */
  55	int freelock;		/* index first free lock word */
  56	wait_queue_head_t freewait;	/* eventlist of free tblock */
  57	wait_queue_head_t freelockwait;	/* eventlist of free tlock */
  58	wait_queue_head_t lowlockwait;	/* eventlist of ample tlocks */
  59	int tlocksInUse;	/* Number of tlocks in use */
  60	spinlock_t LazyLock;	/* synchronize sync_queue & unlock_queue */
  61/*	struct tblock *sync_queue; * Transactions waiting for data sync */
  62	struct list_head unlock_queue;	/* Txns waiting to be released */
  63	struct list_head anon_list;	/* inodes having anonymous txns */
  64	struct list_head anon_list2;	/* inodes having anonymous txns
  65					   that couldn't be sync'ed */
  66} TxAnchor;
  67
  68int jfs_tlocks_low;		/* Indicates low number of available tlocks */
  69
  70#ifdef CONFIG_JFS_STATISTICS
  71static struct {
  72	uint txBegin;
  73	uint txBegin_barrier;
  74	uint txBegin_lockslow;
  75	uint txBegin_freetid;
  76	uint txBeginAnon;
  77	uint txBeginAnon_barrier;
  78	uint txBeginAnon_lockslow;
  79	uint txLockAlloc;
  80	uint txLockAlloc_freelock;
  81} TxStat;
  82#endif
  83
  84static int nTxBlock = -1;	/* number of transaction blocks */
  85module_param(nTxBlock, int, 0);
  86MODULE_PARM_DESC(nTxBlock,
  87		 "Number of transaction blocks (max:65536)");
  88
  89static int nTxLock = -1;	/* number of transaction locks */
  90module_param(nTxLock, int, 0);
  91MODULE_PARM_DESC(nTxLock,
  92		 "Number of transaction locks (max:65536)");
  93
  94struct tblock *TxBlock;	/* transaction block table */
  95static int TxLockLWM;	/* Low water mark for number of txLocks used */
  96static int TxLockHWM;	/* High water mark for number of txLocks used */
  97static int TxLockVHWM;	/* Very High water mark */
  98struct tlock *TxLock;	/* transaction lock table */
  99
 100/*
 101 *	transaction management lock
 102 */
 103static DEFINE_SPINLOCK(jfsTxnLock);
 104
 105#define TXN_LOCK()		spin_lock(&jfsTxnLock)
 106#define TXN_UNLOCK()		spin_unlock(&jfsTxnLock)
 107
 108#define LAZY_LOCK_INIT()	spin_lock_init(&TxAnchor.LazyLock)
 109#define LAZY_LOCK(flags)	spin_lock_irqsave(&TxAnchor.LazyLock, flags)
 110#define LAZY_UNLOCK(flags) spin_unlock_irqrestore(&TxAnchor.LazyLock, flags)
 111
 112static DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait);
 113static int jfs_commit_thread_waking;
 114
 115/*
 116 * Retry logic exist outside these macros to protect from spurrious wakeups.
 117 */
 118static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event)
 119{
 120	DECLARE_WAITQUEUE(wait, current);
 121
 122	add_wait_queue(event, &wait);
 123	set_current_state(TASK_UNINTERRUPTIBLE);
 124	TXN_UNLOCK();
 125	io_schedule();
 
 126	remove_wait_queue(event, &wait);
 127}
 128
 129#define TXN_SLEEP(event)\
 130{\
 131	TXN_SLEEP_DROP_LOCK(event);\
 132	TXN_LOCK();\
 133}
 134
 135#define TXN_WAKEUP(event) wake_up_all(event)
 136
 137/*
 138 *	statistics
 139 */
 140static struct {
 141	tid_t maxtid;		/* 4: biggest tid ever used */
 142	lid_t maxlid;		/* 4: biggest lid ever used */
 143	int ntid;		/* 4: # of transactions performed */
 144	int nlid;		/* 4: # of tlocks acquired */
 145	int waitlock;		/* 4: # of tlock wait */
 146} stattx;
 147
 148/*
 149 * forward references
 150 */
 151static void diLog(struct jfs_log *log, struct tblock *tblk, struct lrd *lrd,
 152		struct tlock *tlck, struct commit *cd);
 153static void dataLog(struct jfs_log *log, struct tblock *tblk, struct lrd *lrd,
 154		struct tlock *tlck);
 155static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 156		struct tlock * tlck);
 157static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 158		struct tlock * tlck);
 159static void txAllocPMap(struct inode *ip, struct maplock * maplock,
 160		struct tblock * tblk);
 161static void txForce(struct tblock * tblk);
 162static void txLog(struct jfs_log *log, struct tblock *tblk,
 163		struct commit *cd);
 164static void txUpdateMap(struct tblock * tblk);
 165static void txRelease(struct tblock * tblk);
 166static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 167	   struct tlock * tlck);
 168static void LogSyncRelease(struct metapage * mp);
 169
 170/*
 171 *		transaction block/lock management
 172 *		---------------------------------
 173 */
 174
 175/*
 176 * Get a transaction lock from the free list.  If the number in use is
 177 * greater than the high water mark, wake up the sync daemon.  This should
 178 * free some anonymous transaction locks.  (TXN_LOCK must be held.)
 179 */
 180static lid_t txLockAlloc(void)
 181{
 182	lid_t lid;
 183
 184	INCREMENT(TxStat.txLockAlloc);
 185	if (!TxAnchor.freelock) {
 186		INCREMENT(TxStat.txLockAlloc_freelock);
 187	}
 188
 189	while (!(lid = TxAnchor.freelock))
 190		TXN_SLEEP(&TxAnchor.freelockwait);
 191	TxAnchor.freelock = TxLock[lid].next;
 192	HIGHWATERMARK(stattx.maxlid, lid);
 193	if ((++TxAnchor.tlocksInUse > TxLockHWM) && (jfs_tlocks_low == 0)) {
 194		jfs_info("txLockAlloc tlocks low");
 195		jfs_tlocks_low = 1;
 196		wake_up_process(jfsSyncThread);
 197	}
 198
 199	return lid;
 200}
 201
 202static void txLockFree(lid_t lid)
 203{
 204	TxLock[lid].tid = 0;
 205	TxLock[lid].next = TxAnchor.freelock;
 206	TxAnchor.freelock = lid;
 207	TxAnchor.tlocksInUse--;
 208	if (jfs_tlocks_low && (TxAnchor.tlocksInUse < TxLockLWM)) {
 209		jfs_info("txLockFree jfs_tlocks_low no more");
 210		jfs_tlocks_low = 0;
 211		TXN_WAKEUP(&TxAnchor.lowlockwait);
 212	}
 213	TXN_WAKEUP(&TxAnchor.freelockwait);
 214}
 215
 216/*
 217 * NAME:	txInit()
 218 *
 219 * FUNCTION:	initialize transaction management structures
 220 *
 221 * RETURN:
 222 *
 223 * serialization: single thread at jfs_init()
 224 */
 225int txInit(void)
 226{
 227	int k, size;
 228	struct sysinfo si;
 229
 230	/* Set defaults for nTxLock and nTxBlock if unset */
 231
 232	if (nTxLock == -1) {
 233		if (nTxBlock == -1) {
 234			/* Base default on memory size */
 235			si_meminfo(&si);
 236			if (si.totalram > (256 * 1024)) /* 1 GB */
 237				nTxLock = 64 * 1024;
 238			else
 239				nTxLock = si.totalram >> 2;
 240		} else if (nTxBlock > (8 * 1024))
 241			nTxLock = 64 * 1024;
 242		else
 243			nTxLock = nTxBlock << 3;
 244	}
 245	if (nTxBlock == -1)
 246		nTxBlock = nTxLock >> 3;
 247
 248	/* Verify tunable parameters */
 249	if (nTxBlock < 16)
 250		nTxBlock = 16;	/* No one should set it this low */
 251	if (nTxBlock > 65536)
 252		nTxBlock = 65536;
 253	if (nTxLock < 256)
 254		nTxLock = 256;	/* No one should set it this low */
 255	if (nTxLock > 65536)
 256		nTxLock = 65536;
 257
 258	printk(KERN_INFO "JFS: nTxBlock = %d, nTxLock = %d\n",
 259	       nTxBlock, nTxLock);
 260	/*
 261	 * initialize transaction block (tblock) table
 262	 *
 263	 * transaction id (tid) = tblock index
 264	 * tid = 0 is reserved.
 265	 */
 266	TxLockLWM = (nTxLock * 4) / 10;
 267	TxLockHWM = (nTxLock * 7) / 10;
 268	TxLockVHWM = (nTxLock * 8) / 10;
 269
 270	size = sizeof(struct tblock) * nTxBlock;
 271	TxBlock = vmalloc(size);
 272	if (TxBlock == NULL)
 273		return -ENOMEM;
 274
 275	for (k = 1; k < nTxBlock - 1; k++) {
 276		TxBlock[k].next = k + 1;
 277		init_waitqueue_head(&TxBlock[k].gcwait);
 278		init_waitqueue_head(&TxBlock[k].waitor);
 279	}
 280	TxBlock[k].next = 0;
 281	init_waitqueue_head(&TxBlock[k].gcwait);
 282	init_waitqueue_head(&TxBlock[k].waitor);
 283
 284	TxAnchor.freetid = 1;
 285	init_waitqueue_head(&TxAnchor.freewait);
 286
 287	stattx.maxtid = 1;	/* statistics */
 288
 289	/*
 290	 * initialize transaction lock (tlock) table
 291	 *
 292	 * transaction lock id = tlock index
 293	 * tlock id = 0 is reserved.
 294	 */
 295	size = sizeof(struct tlock) * nTxLock;
 296	TxLock = vmalloc(size);
 297	if (TxLock == NULL) {
 298		vfree(TxBlock);
 299		return -ENOMEM;
 300	}
 301
 302	/* initialize tlock table */
 303	for (k = 1; k < nTxLock - 1; k++)
 304		TxLock[k].next = k + 1;
 305	TxLock[k].next = 0;
 306	init_waitqueue_head(&TxAnchor.freelockwait);
 307	init_waitqueue_head(&TxAnchor.lowlockwait);
 308
 309	TxAnchor.freelock = 1;
 310	TxAnchor.tlocksInUse = 0;
 311	INIT_LIST_HEAD(&TxAnchor.anon_list);
 312	INIT_LIST_HEAD(&TxAnchor.anon_list2);
 313
 314	LAZY_LOCK_INIT();
 315	INIT_LIST_HEAD(&TxAnchor.unlock_queue);
 316
 317	stattx.maxlid = 1;	/* statistics */
 318
 319	return 0;
 320}
 321
 322/*
 323 * NAME:	txExit()
 324 *
 325 * FUNCTION:	clean up when module is unloaded
 326 */
 327void txExit(void)
 328{
 329	vfree(TxLock);
 330	TxLock = NULL;
 331	vfree(TxBlock);
 332	TxBlock = NULL;
 333}
 334
 335/*
 336 * NAME:	txBegin()
 337 *
 338 * FUNCTION:	start a transaction.
 339 *
 340 * PARAMETER:	sb	- superblock
 341 *		flag	- force for nested tx;
 342 *
 343 * RETURN:	tid	- transaction id
 344 *
 345 * note: flag force allows to start tx for nested tx
 346 * to prevent deadlock on logsync barrier;
 347 */
 348tid_t txBegin(struct super_block *sb, int flag)
 349{
 350	tid_t t;
 351	struct tblock *tblk;
 352	struct jfs_log *log;
 353
 354	jfs_info("txBegin: flag = 0x%x", flag);
 355	log = JFS_SBI(sb)->log;
 356
 357	if (!log) {
 358		jfs_error(sb, "read-only filesystem\n");
 359		return 0;
 360	}
 361
 362	TXN_LOCK();
 363
 364	INCREMENT(TxStat.txBegin);
 365
 366      retry:
 367	if (!(flag & COMMIT_FORCE)) {
 368		/*
 369		 * synchronize with logsync barrier
 370		 */
 371		if (test_bit(log_SYNCBARRIER, &log->flag) ||
 372		    test_bit(log_QUIESCE, &log->flag)) {
 373			INCREMENT(TxStat.txBegin_barrier);
 374			TXN_SLEEP(&log->syncwait);
 375			goto retry;
 376		}
 377	}
 378	if (flag == 0) {
 379		/*
 380		 * Don't begin transaction if we're getting starved for tlocks
 381		 * unless COMMIT_FORCE or COMMIT_INODE (which may ultimately
 382		 * free tlocks)
 383		 */
 384		if (TxAnchor.tlocksInUse > TxLockVHWM) {
 385			INCREMENT(TxStat.txBegin_lockslow);
 386			TXN_SLEEP(&TxAnchor.lowlockwait);
 387			goto retry;
 388		}
 389	}
 390
 391	/*
 392	 * allocate transaction id/block
 393	 */
 394	if ((t = TxAnchor.freetid) == 0) {
 395		jfs_info("txBegin: waiting for free tid");
 396		INCREMENT(TxStat.txBegin_freetid);
 397		TXN_SLEEP(&TxAnchor.freewait);
 398		goto retry;
 399	}
 400
 401	tblk = tid_to_tblock(t);
 402
 403	if ((tblk->next == 0) && !(flag & COMMIT_FORCE)) {
 404		/* Don't let a non-forced transaction take the last tblk */
 405		jfs_info("txBegin: waiting for free tid");
 406		INCREMENT(TxStat.txBegin_freetid);
 407		TXN_SLEEP(&TxAnchor.freewait);
 408		goto retry;
 409	}
 410
 411	TxAnchor.freetid = tblk->next;
 412
 413	/*
 414	 * initialize transaction
 415	 */
 416
 417	/*
 418	 * We can't zero the whole thing or we screw up another thread being
 419	 * awakened after sleeping on tblk->waitor
 420	 *
 421	 * memset(tblk, 0, sizeof(struct tblock));
 422	 */
 423	tblk->next = tblk->last = tblk->xflag = tblk->flag = tblk->lsn = 0;
 424
 425	tblk->sb = sb;
 426	++log->logtid;
 427	tblk->logtid = log->logtid;
 428
 429	++log->active;
 430
 431	HIGHWATERMARK(stattx.maxtid, t);	/* statistics */
 432	INCREMENT(stattx.ntid);	/* statistics */
 433
 434	TXN_UNLOCK();
 435
 436	jfs_info("txBegin: returning tid = %d", t);
 437
 438	return t;
 439}
 440
 441/*
 442 * NAME:	txBeginAnon()
 443 *
 444 * FUNCTION:	start an anonymous transaction.
 445 *		Blocks if logsync or available tlocks are low to prevent
 446 *		anonymous tlocks from depleting supply.
 447 *
 448 * PARAMETER:	sb	- superblock
 449 *
 450 * RETURN:	none
 451 */
 452void txBeginAnon(struct super_block *sb)
 453{
 454	struct jfs_log *log;
 455
 456	log = JFS_SBI(sb)->log;
 457
 458	TXN_LOCK();
 459	INCREMENT(TxStat.txBeginAnon);
 460
 461      retry:
 462	/*
 463	 * synchronize with logsync barrier
 464	 */
 465	if (test_bit(log_SYNCBARRIER, &log->flag) ||
 466	    test_bit(log_QUIESCE, &log->flag)) {
 467		INCREMENT(TxStat.txBeginAnon_barrier);
 468		TXN_SLEEP(&log->syncwait);
 469		goto retry;
 470	}
 471
 472	/*
 473	 * Don't begin transaction if we're getting starved for tlocks
 474	 */
 475	if (TxAnchor.tlocksInUse > TxLockVHWM) {
 476		INCREMENT(TxStat.txBeginAnon_lockslow);
 477		TXN_SLEEP(&TxAnchor.lowlockwait);
 478		goto retry;
 479	}
 480	TXN_UNLOCK();
 481}
 482
 483/*
 484 *	txEnd()
 485 *
 486 * function: free specified transaction block.
 487 *
 488 *	logsync barrier processing:
 489 *
 490 * serialization:
 491 */
 492void txEnd(tid_t tid)
 493{
 494	struct tblock *tblk = tid_to_tblock(tid);
 495	struct jfs_log *log;
 496
 497	jfs_info("txEnd: tid = %d", tid);
 498	TXN_LOCK();
 499
 500	/*
 501	 * wakeup transactions waiting on the page locked
 502	 * by the current transaction
 503	 */
 504	TXN_WAKEUP(&tblk->waitor);
 505
 506	log = JFS_SBI(tblk->sb)->log;
 507
 508	/*
 509	 * Lazy commit thread can't free this guy until we mark it UNLOCKED,
 510	 * otherwise, we would be left with a transaction that may have been
 511	 * reused.
 512	 *
 513	 * Lazy commit thread will turn off tblkGC_LAZY before calling this
 514	 * routine.
 515	 */
 516	if (tblk->flag & tblkGC_LAZY) {
 517		jfs_info("txEnd called w/lazy tid: %d, tblk = 0x%p", tid, tblk);
 518		TXN_UNLOCK();
 519
 520		spin_lock_irq(&log->gclock);	// LOGGC_LOCK
 521		tblk->flag |= tblkGC_UNLOCKED;
 522		spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
 523		return;
 524	}
 525
 526	jfs_info("txEnd: tid: %d, tblk = 0x%p", tid, tblk);
 527
 528	assert(tblk->next == 0);
 529
 530	/*
 531	 * insert tblock back on freelist
 532	 */
 533	tblk->next = TxAnchor.freetid;
 534	TxAnchor.freetid = tid;
 535
 536	/*
 537	 * mark the tblock not active
 538	 */
 539	if (--log->active == 0) {
 540		clear_bit(log_FLUSH, &log->flag);
 541
 542		/*
 543		 * synchronize with logsync barrier
 544		 */
 545		if (test_bit(log_SYNCBARRIER, &log->flag)) {
 546			TXN_UNLOCK();
 547
 548			/* write dirty metadata & forward log syncpt */
 549			jfs_syncpt(log, 1);
 550
 551			jfs_info("log barrier off: 0x%x", log->lsn);
 552
 553			/* enable new transactions start */
 554			clear_bit(log_SYNCBARRIER, &log->flag);
 555
 556			/* wakeup all waitors for logsync barrier */
 557			TXN_WAKEUP(&log->syncwait);
 558
 559			goto wakeup;
 560		}
 561	}
 562
 563	TXN_UNLOCK();
 564wakeup:
 565	/*
 566	 * wakeup all waitors for a free tblock
 567	 */
 568	TXN_WAKEUP(&TxAnchor.freewait);
 569}
 570
 571/*
 572 *	txLock()
 573 *
 574 * function: acquire a transaction lock on the specified <mp>
 575 *
 576 * parameter:
 577 *
 578 * return:	transaction lock id
 579 *
 580 * serialization:
 581 */
 582struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
 583		     int type)
 584{
 585	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 586	int dir_xtree = 0;
 587	lid_t lid;
 588	tid_t xtid;
 589	struct tlock *tlck;
 590	struct xtlock *xtlck;
 591	struct linelock *linelock;
 592	xtpage_t *p;
 593	struct tblock *tblk;
 594
 595	TXN_LOCK();
 596
 597	if (S_ISDIR(ip->i_mode) && (type & tlckXTREE) &&
 598	    !(mp->xflag & COMMIT_PAGE)) {
 599		/*
 600		 * Directory inode is special.  It can have both an xtree tlock
 601		 * and a dtree tlock associated with it.
 602		 */
 603		dir_xtree = 1;
 604		lid = jfs_ip->xtlid;
 605	} else
 606		lid = mp->lid;
 607
 608	/* is page not locked by a transaction ? */
 609	if (lid == 0)
 610		goto allocateLock;
 611
 612	jfs_info("txLock: tid:%d ip:0x%p mp:0x%p lid:%d", tid, ip, mp, lid);
 613
 614	/* is page locked by the requester transaction ? */
 615	tlck = lid_to_tlock(lid);
 616	if ((xtid = tlck->tid) == tid) {
 617		TXN_UNLOCK();
 618		goto grantLock;
 619	}
 620
 621	/*
 622	 * is page locked by anonymous transaction/lock ?
 623	 *
 624	 * (page update without transaction (i.e., file write) is
 625	 * locked under anonymous transaction tid = 0:
 626	 * anonymous tlocks maintained on anonymous tlock list of
 627	 * the inode of the page and available to all anonymous
 628	 * transactions until txCommit() time at which point
 629	 * they are transferred to the transaction tlock list of
 630	 * the committing transaction of the inode)
 631	 */
 632	if (xtid == 0) {
 633		tlck->tid = tid;
 634		TXN_UNLOCK();
 635		tblk = tid_to_tblock(tid);
 636		/*
 637		 * The order of the tlocks in the transaction is important
 638		 * (during truncate, child xtree pages must be freed before
 639		 * parent's tlocks change the working map).
 640		 * Take tlock off anonymous list and add to tail of
 641		 * transaction list
 642		 *
 643		 * Note:  We really need to get rid of the tid & lid and
 644		 * use list_head's.  This code is getting UGLY!
 645		 */
 646		if (jfs_ip->atlhead == lid) {
 647			if (jfs_ip->atltail == lid) {
 648				/* only anonymous txn.
 649				 * Remove from anon_list
 650				 */
 651				TXN_LOCK();
 652				list_del_init(&jfs_ip->anon_inode_list);
 653				TXN_UNLOCK();
 654			}
 655			jfs_ip->atlhead = tlck->next;
 656		} else {
 657			lid_t last;
 658			for (last = jfs_ip->atlhead;
 659			     lid_to_tlock(last)->next != lid;
 660			     last = lid_to_tlock(last)->next) {
 661				assert(last);
 662			}
 663			lid_to_tlock(last)->next = tlck->next;
 664			if (jfs_ip->atltail == lid)
 665				jfs_ip->atltail = last;
 666		}
 667
 668		/* insert the tlock at tail of transaction tlock list */
 669
 670		if (tblk->next)
 671			lid_to_tlock(tblk->last)->next = lid;
 672		else
 673			tblk->next = lid;
 674		tlck->next = 0;
 675		tblk->last = lid;
 676
 677		goto grantLock;
 678	}
 679
 680	goto waitLock;
 681
 682	/*
 683	 * allocate a tlock
 684	 */
 685      allocateLock:
 686	lid = txLockAlloc();
 687	tlck = lid_to_tlock(lid);
 688
 689	/*
 690	 * initialize tlock
 691	 */
 692	tlck->tid = tid;
 693
 694	TXN_UNLOCK();
 695
 696	/* mark tlock for meta-data page */
 697	if (mp->xflag & COMMIT_PAGE) {
 698
 699		tlck->flag = tlckPAGELOCK;
 700
 701		/* mark the page dirty and nohomeok */
 702		metapage_nohomeok(mp);
 703
 704		jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p",
 705			 mp, mp->nohomeok, tid, tlck);
 706
 707		/* if anonymous transaction, and buffer is on the group
 708		 * commit synclist, mark inode to show this.  This will
 709		 * prevent the buffer from being marked nohomeok for too
 710		 * long a time.
 711		 */
 712		if ((tid == 0) && mp->lsn)
 713			set_cflag(COMMIT_Synclist, ip);
 714	}
 715	/* mark tlock for in-memory inode */
 716	else
 717		tlck->flag = tlckINODELOCK;
 718
 719	if (S_ISDIR(ip->i_mode))
 720		tlck->flag |= tlckDIRECTORY;
 721
 722	tlck->type = 0;
 723
 724	/* bind the tlock and the page */
 725	tlck->ip = ip;
 726	tlck->mp = mp;
 727	if (dir_xtree)
 728		jfs_ip->xtlid = lid;
 729	else
 730		mp->lid = lid;
 731
 732	/*
 733	 * enqueue transaction lock to transaction/inode
 734	 */
 735	/* insert the tlock at tail of transaction tlock list */
 736	if (tid) {
 737		tblk = tid_to_tblock(tid);
 738		if (tblk->next)
 739			lid_to_tlock(tblk->last)->next = lid;
 740		else
 741			tblk->next = lid;
 742		tlck->next = 0;
 743		tblk->last = lid;
 744	}
 745	/* anonymous transaction:
 746	 * insert the tlock at head of inode anonymous tlock list
 747	 */
 748	else {
 749		tlck->next = jfs_ip->atlhead;
 750		jfs_ip->atlhead = lid;
 751		if (tlck->next == 0) {
 752			/* This inode's first anonymous transaction */
 753			jfs_ip->atltail = lid;
 754			TXN_LOCK();
 755			list_add_tail(&jfs_ip->anon_inode_list,
 756				      &TxAnchor.anon_list);
 757			TXN_UNLOCK();
 758		}
 759	}
 760
 761	/* initialize type dependent area for linelock */
 762	linelock = (struct linelock *) & tlck->lock;
 763	linelock->next = 0;
 764	linelock->flag = tlckLINELOCK;
 765	linelock->maxcnt = TLOCKSHORT;
 766	linelock->index = 0;
 767
 768	switch (type & tlckTYPE) {
 769	case tlckDTREE:
 770		linelock->l2linesize = L2DTSLOTSIZE;
 771		break;
 772
 773	case tlckXTREE:
 774		linelock->l2linesize = L2XTSLOTSIZE;
 775
 776		xtlck = (struct xtlock *) linelock;
 777		xtlck->header.offset = 0;
 778		xtlck->header.length = 2;
 779
 780		if (type & tlckNEW) {
 781			xtlck->lwm.offset = XTENTRYSTART;
 782		} else {
 783			if (mp->xflag & COMMIT_PAGE)
 784				p = (xtpage_t *) mp->data;
 785			else
 786				p = (xtpage_t *) &jfs_ip->i_xtroot;
 787			xtlck->lwm.offset =
 788			    le16_to_cpu(p->header.nextindex);
 789		}
 790		xtlck->lwm.length = 0;	/* ! */
 791		xtlck->twm.offset = 0;
 792		xtlck->hwm.offset = 0;
 793
 794		xtlck->index = 2;
 795		break;
 796
 797	case tlckINODE:
 798		linelock->l2linesize = L2INODESLOTSIZE;
 799		break;
 800
 801	case tlckDATA:
 802		linelock->l2linesize = L2DATASLOTSIZE;
 803		break;
 804
 805	default:
 806		jfs_err("UFO tlock:0x%p", tlck);
 807	}
 808
 809	/*
 810	 * update tlock vector
 811	 */
 812      grantLock:
 813	tlck->type |= type;
 814
 815	return tlck;
 816
 817	/*
 818	 * page is being locked by another transaction:
 819	 */
 820      waitLock:
 821	/* Only locks on ipimap or ipaimap should reach here */
 822	/* assert(jfs_ip->fileset == AGGREGATE_I); */
 823	if (jfs_ip->fileset != AGGREGATE_I) {
 824		printk(KERN_ERR "txLock: trying to lock locked page!");
 825		print_hex_dump(KERN_ERR, "ip: ", DUMP_PREFIX_ADDRESS, 16, 4,
 826			       ip, sizeof(*ip), 0);
 827		print_hex_dump(KERN_ERR, "mp: ", DUMP_PREFIX_ADDRESS, 16, 4,
 828			       mp, sizeof(*mp), 0);
 829		print_hex_dump(KERN_ERR, "Locker's tblock: ",
 830			       DUMP_PREFIX_ADDRESS, 16, 4, tid_to_tblock(tid),
 831			       sizeof(struct tblock), 0);
 832		print_hex_dump(KERN_ERR, "Tlock: ", DUMP_PREFIX_ADDRESS, 16, 4,
 833			       tlck, sizeof(*tlck), 0);
 834		BUG();
 835	}
 836	INCREMENT(stattx.waitlock);	/* statistics */
 837	TXN_UNLOCK();
 838	release_metapage(mp);
 839	TXN_LOCK();
 840	xtid = tlck->tid;	/* reacquire after dropping TXN_LOCK */
 841
 842	jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d",
 843		 tid, xtid, lid);
 844
 845	/* Recheck everything since dropping TXN_LOCK */
 846	if (xtid && (tlck->mp == mp) && (mp->lid == lid))
 847		TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor);
 848	else
 849		TXN_UNLOCK();
 850	jfs_info("txLock: awakened     tid = %d, lid = %d", tid, lid);
 851
 852	return NULL;
 853}
 854
 855/*
 856 * NAME:	txRelease()
 857 *
 858 * FUNCTION:	Release buffers associated with transaction locks, but don't
 859 *		mark homeok yet.  The allows other transactions to modify
 860 *		buffers, but won't let them go to disk until commit record
 861 *		actually gets written.
 862 *
 863 * PARAMETER:
 864 *		tblk	-
 865 *
 866 * RETURN:	Errors from subroutines.
 867 */
 868static void txRelease(struct tblock * tblk)
 869{
 870	struct metapage *mp;
 871	lid_t lid;
 872	struct tlock *tlck;
 873
 874	TXN_LOCK();
 875
 876	for (lid = tblk->next; lid; lid = tlck->next) {
 877		tlck = lid_to_tlock(lid);
 878		if ((mp = tlck->mp) != NULL &&
 879		    (tlck->type & tlckBTROOT) == 0) {
 880			assert(mp->xflag & COMMIT_PAGE);
 881			mp->lid = 0;
 882		}
 883	}
 884
 885	/*
 886	 * wakeup transactions waiting on a page locked
 887	 * by the current transaction
 888	 */
 889	TXN_WAKEUP(&tblk->waitor);
 890
 891	TXN_UNLOCK();
 892}
 893
 894/*
 895 * NAME:	txUnlock()
 896 *
 897 * FUNCTION:	Initiates pageout of pages modified by tid in journalled
 898 *		objects and frees their lockwords.
 899 */
 900static void txUnlock(struct tblock * tblk)
 901{
 902	struct tlock *tlck;
 903	struct linelock *linelock;
 904	lid_t lid, next, llid, k;
 905	struct metapage *mp;
 906	struct jfs_log *log;
 907	int difft, diffp;
 908	unsigned long flags;
 909
 910	jfs_info("txUnlock: tblk = 0x%p", tblk);
 911	log = JFS_SBI(tblk->sb)->log;
 912
 913	/*
 914	 * mark page under tlock homeok (its log has been written):
 915	 */
 916	for (lid = tblk->next; lid; lid = next) {
 917		tlck = lid_to_tlock(lid);
 918		next = tlck->next;
 919
 920		jfs_info("unlocking lid = %d, tlck = 0x%p", lid, tlck);
 921
 922		/* unbind page from tlock */
 923		if ((mp = tlck->mp) != NULL &&
 924		    (tlck->type & tlckBTROOT) == 0) {
 925			assert(mp->xflag & COMMIT_PAGE);
 926
 927			/* hold buffer
 928			 */
 929			hold_metapage(mp);
 930
 931			assert(mp->nohomeok > 0);
 932			_metapage_homeok(mp);
 933
 934			/* inherit younger/larger clsn */
 935			LOGSYNC_LOCK(log, flags);
 936			if (mp->clsn) {
 937				logdiff(difft, tblk->clsn, log);
 938				logdiff(diffp, mp->clsn, log);
 939				if (difft > diffp)
 940					mp->clsn = tblk->clsn;
 941			} else
 942				mp->clsn = tblk->clsn;
 943			LOGSYNC_UNLOCK(log, flags);
 944
 945			assert(!(tlck->flag & tlckFREEPAGE));
 946
 947			put_metapage(mp);
 948		}
 949
 950		/* insert tlock, and linelock(s) of the tlock if any,
 951		 * at head of freelist
 952		 */
 953		TXN_LOCK();
 954
 955		llid = ((struct linelock *) & tlck->lock)->next;
 956		while (llid) {
 957			linelock = (struct linelock *) lid_to_tlock(llid);
 958			k = linelock->next;
 959			txLockFree(llid);
 960			llid = k;
 961		}
 962		txLockFree(lid);
 963
 964		TXN_UNLOCK();
 965	}
 966	tblk->next = tblk->last = 0;
 967
 968	/*
 969	 * remove tblock from logsynclist
 970	 * (allocation map pages inherited lsn of tblk and
 971	 * has been inserted in logsync list at txUpdateMap())
 972	 */
 973	if (tblk->lsn) {
 974		LOGSYNC_LOCK(log, flags);
 975		log->count--;
 976		list_del(&tblk->synclist);
 977		LOGSYNC_UNLOCK(log, flags);
 978	}
 979}
 980
 981/*
 982 *	txMaplock()
 983 *
 984 * function: allocate a transaction lock for freed page/entry;
 985 *	for freed page, maplock is used as xtlock/dtlock type;
 986 */
 987struct tlock *txMaplock(tid_t tid, struct inode *ip, int type)
 988{
 989	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 990	lid_t lid;
 991	struct tblock *tblk;
 992	struct tlock *tlck;
 993	struct maplock *maplock;
 994
 995	TXN_LOCK();
 996
 997	/*
 998	 * allocate a tlock
 999	 */
1000	lid = txLockAlloc();
1001	tlck = lid_to_tlock(lid);
1002
1003	/*
1004	 * initialize tlock
1005	 */
1006	tlck->tid = tid;
1007
1008	/* bind the tlock and the object */
1009	tlck->flag = tlckINODELOCK;
1010	if (S_ISDIR(ip->i_mode))
1011		tlck->flag |= tlckDIRECTORY;
1012	tlck->ip = ip;
1013	tlck->mp = NULL;
1014
1015	tlck->type = type;
1016
1017	/*
1018	 * enqueue transaction lock to transaction/inode
1019	 */
1020	/* insert the tlock at tail of transaction tlock list */
1021	if (tid) {
1022		tblk = tid_to_tblock(tid);
1023		if (tblk->next)
1024			lid_to_tlock(tblk->last)->next = lid;
1025		else
1026			tblk->next = lid;
1027		tlck->next = 0;
1028		tblk->last = lid;
1029	}
1030	/* anonymous transaction:
1031	 * insert the tlock at head of inode anonymous tlock list
1032	 */
1033	else {
1034		tlck->next = jfs_ip->atlhead;
1035		jfs_ip->atlhead = lid;
1036		if (tlck->next == 0) {
1037			/* This inode's first anonymous transaction */
1038			jfs_ip->atltail = lid;
1039			list_add_tail(&jfs_ip->anon_inode_list,
1040				      &TxAnchor.anon_list);
1041		}
1042	}
1043
1044	TXN_UNLOCK();
1045
1046	/* initialize type dependent area for maplock */
1047	maplock = (struct maplock *) & tlck->lock;
1048	maplock->next = 0;
1049	maplock->maxcnt = 0;
1050	maplock->index = 0;
1051
1052	return tlck;
1053}
1054
1055/*
1056 *	txLinelock()
1057 *
1058 * function: allocate a transaction lock for log vector list
1059 */
1060struct linelock *txLinelock(struct linelock * tlock)
1061{
1062	lid_t lid;
1063	struct tlock *tlck;
1064	struct linelock *linelock;
1065
1066	TXN_LOCK();
1067
1068	/* allocate a TxLock structure */
1069	lid = txLockAlloc();
1070	tlck = lid_to_tlock(lid);
1071
1072	TXN_UNLOCK();
1073
1074	/* initialize linelock */
1075	linelock = (struct linelock *) tlck;
1076	linelock->next = 0;
1077	linelock->flag = tlckLINELOCK;
1078	linelock->maxcnt = TLOCKLONG;
1079	linelock->index = 0;
1080	if (tlck->flag & tlckDIRECTORY)
1081		linelock->flag |= tlckDIRECTORY;
1082
1083	/* append linelock after tlock */
1084	linelock->next = tlock->next;
1085	tlock->next = lid;
1086
1087	return linelock;
1088}
1089
1090/*
1091 *		transaction commit management
1092 *		-----------------------------
1093 */
1094
1095/*
1096 * NAME:	txCommit()
1097 *
1098 * FUNCTION:	commit the changes to the objects specified in
1099 *		clist.  For journalled segments only the
1100 *		changes of the caller are committed, ie by tid.
1101 *		for non-journalled segments the data are flushed to
1102 *		disk and then the change to the disk inode and indirect
1103 *		blocks committed (so blocks newly allocated to the
1104 *		segment will be made a part of the segment atomically).
1105 *
1106 *		all of the segments specified in clist must be in
1107 *		one file system. no more than 6 segments are needed
1108 *		to handle all unix svcs.
1109 *
1110 *		if the i_nlink field (i.e. disk inode link count)
1111 *		is zero, and the type of inode is a regular file or
1112 *		directory, or symbolic link , the inode is truncated
1113 *		to zero length. the truncation is committed but the
1114 *		VM resources are unaffected until it is closed (see
1115 *		iput and iclose).
1116 *
1117 * PARAMETER:
1118 *
1119 * RETURN:
1120 *
1121 * serialization:
1122 *		on entry the inode lock on each segment is assumed
1123 *		to be held.
1124 *
1125 * i/o error:
1126 */
1127int txCommit(tid_t tid,		/* transaction identifier */
1128	     int nip,		/* number of inodes to commit */
1129	     struct inode **iplist,	/* list of inode to commit */
1130	     int flag)
1131{
1132	int rc = 0;
1133	struct commit cd;
1134	struct jfs_log *log;
1135	struct tblock *tblk;
1136	struct lrd *lrd;
1137	struct inode *ip;
1138	struct jfs_inode_info *jfs_ip;
1139	int k, n;
1140	ino_t top;
1141	struct super_block *sb;
1142
1143	jfs_info("txCommit, tid = %d, flag = %d", tid, flag);
1144	/* is read-only file system ? */
1145	if (isReadOnly(iplist[0])) {
1146		rc = -EROFS;
1147		goto TheEnd;
1148	}
1149
1150	sb = cd.sb = iplist[0]->i_sb;
1151	cd.tid = tid;
1152
1153	if (tid == 0)
1154		tid = txBegin(sb, 0);
1155	tblk = tid_to_tblock(tid);
1156
1157	/*
1158	 * initialize commit structure
1159	 */
1160	log = JFS_SBI(sb)->log;
1161	cd.log = log;
1162
1163	/* initialize log record descriptor in commit */
1164	lrd = &cd.lrd;
1165	lrd->logtid = cpu_to_le32(tblk->logtid);
1166	lrd->backchain = 0;
1167
1168	tblk->xflag |= flag;
1169
1170	if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0)
1171		tblk->xflag |= COMMIT_LAZY;
1172	/*
1173	 *	prepare non-journaled objects for commit
1174	 *
1175	 * flush data pages of non-journaled file
1176	 * to prevent the file getting non-initialized disk blocks
1177	 * in case of crash.
1178	 * (new blocks - )
1179	 */
1180	cd.iplist = iplist;
1181	cd.nip = nip;
1182
1183	/*
1184	 *	acquire transaction lock on (on-disk) inodes
1185	 *
1186	 * update on-disk inode from in-memory inode
1187	 * acquiring transaction locks for AFTER records
1188	 * on the on-disk inode of file object
1189	 *
1190	 * sort the inodes array by inode number in descending order
1191	 * to prevent deadlock when acquiring transaction lock
1192	 * of on-disk inodes on multiple on-disk inode pages by
1193	 * multiple concurrent transactions
1194	 */
1195	for (k = 0; k < cd.nip; k++) {
1196		top = (cd.iplist[k])->i_ino;
1197		for (n = k + 1; n < cd.nip; n++) {
1198			ip = cd.iplist[n];
1199			if (ip->i_ino > top) {
1200				top = ip->i_ino;
1201				cd.iplist[n] = cd.iplist[k];
1202				cd.iplist[k] = ip;
1203			}
1204		}
1205
1206		ip = cd.iplist[k];
1207		jfs_ip = JFS_IP(ip);
1208
1209		/*
1210		 * BUGBUG - This code has temporarily been removed.  The
1211		 * intent is to ensure that any file data is written before
1212		 * the metadata is committed to the journal.  This prevents
1213		 * uninitialized data from appearing in a file after the
1214		 * journal has been replayed.  (The uninitialized data
1215		 * could be sensitive data removed by another user.)
1216		 *
1217		 * The problem now is that we are holding the IWRITELOCK
1218		 * on the inode, and calling filemap_fdatawrite on an
1219		 * unmapped page will cause a deadlock in jfs_get_block.
1220		 *
1221		 * The long term solution is to pare down the use of
1222		 * IWRITELOCK.  We are currently holding it too long.
1223		 * We could also be smarter about which data pages need
1224		 * to be written before the transaction is committed and
1225		 * when we don't need to worry about it at all.
1226		 *
1227		 * if ((!S_ISDIR(ip->i_mode))
1228		 *    && (tblk->flag & COMMIT_DELETE) == 0)
1229		 *	filemap_write_and_wait(ip->i_mapping);
1230		 */
1231
1232		/*
1233		 * Mark inode as not dirty.  It will still be on the dirty
1234		 * inode list, but we'll know not to commit it again unless
1235		 * it gets marked dirty again
1236		 */
1237		clear_cflag(COMMIT_Dirty, ip);
1238
1239		/* inherit anonymous tlock(s) of inode */
1240		if (jfs_ip->atlhead) {
1241			lid_to_tlock(jfs_ip->atltail)->next = tblk->next;
1242			tblk->next = jfs_ip->atlhead;
1243			if (!tblk->last)
1244				tblk->last = jfs_ip->atltail;
1245			jfs_ip->atlhead = jfs_ip->atltail = 0;
1246			TXN_LOCK();
1247			list_del_init(&jfs_ip->anon_inode_list);
1248			TXN_UNLOCK();
1249		}
1250
1251		/*
1252		 * acquire transaction lock on on-disk inode page
1253		 * (become first tlock of the tblk's tlock list)
1254		 */
1255		if (((rc = diWrite(tid, ip))))
1256			goto out;
1257	}
1258
1259	/*
1260	 *	write log records from transaction locks
1261	 *
1262	 * txUpdateMap() resets XAD_NEW in XAD.
1263	 */
1264	txLog(log, tblk, &cd);
 
1265
1266	/*
1267	 * Ensure that inode isn't reused before
1268	 * lazy commit thread finishes processing
1269	 */
1270	if (tblk->xflag & COMMIT_DELETE) {
1271		ihold(tblk->u.ip);
1272		/*
1273		 * Avoid a rare deadlock
1274		 *
1275		 * If the inode is locked, we may be blocked in
1276		 * jfs_commit_inode.  If so, we don't want the
1277		 * lazy_commit thread doing the last iput() on the inode
1278		 * since that may block on the locked inode.  Instead,
1279		 * commit the transaction synchronously, so the last iput
1280		 * will be done by the calling thread (or later)
1281		 */
1282		/*
1283		 * I believe this code is no longer needed.  Splitting I_LOCK
1284		 * into two bits, I_NEW and I_SYNC should prevent this
1285		 * deadlock as well.  But since I don't have a JFS testload
1286		 * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done.
1287		 * Joern
1288		 */
1289		if (tblk->u.ip->i_state & I_SYNC)
1290			tblk->xflag &= ~COMMIT_LAZY;
1291	}
1292
1293	ASSERT((!(tblk->xflag & COMMIT_DELETE)) ||
1294	       ((tblk->u.ip->i_nlink == 0) &&
1295		!test_cflag(COMMIT_Nolink, tblk->u.ip)));
1296
1297	/*
1298	 *	write COMMIT log record
1299	 */
1300	lrd->type = cpu_to_le16(LOG_COMMIT);
1301	lrd->length = 0;
1302	lmLog(log, tblk, lrd, NULL);
1303
1304	lmGroupCommit(log, tblk);
1305
1306	/*
1307	 *	- transaction is now committed -
1308	 */
1309
1310	/*
1311	 * force pages in careful update
1312	 * (imap addressing structure update)
1313	 */
1314	if (flag & COMMIT_FORCE)
1315		txForce(tblk);
1316
1317	/*
1318	 *	update allocation map.
1319	 *
1320	 * update inode allocation map and inode:
1321	 * free pager lock on memory object of inode if any.
1322	 * update block allocation map.
1323	 *
1324	 * txUpdateMap() resets XAD_NEW in XAD.
1325	 */
1326	if (tblk->xflag & COMMIT_FORCE)
1327		txUpdateMap(tblk);
1328
1329	/*
1330	 *	free transaction locks and pageout/free pages
1331	 */
1332	txRelease(tblk);
1333
1334	if ((tblk->flag & tblkGC_LAZY) == 0)
1335		txUnlock(tblk);
1336
1337
1338	/*
1339	 *	reset in-memory object state
1340	 */
1341	for (k = 0; k < cd.nip; k++) {
1342		ip = cd.iplist[k];
1343		jfs_ip = JFS_IP(ip);
1344
1345		/*
1346		 * reset in-memory inode state
1347		 */
1348		jfs_ip->bxflag = 0;
1349		jfs_ip->blid = 0;
1350	}
1351
1352      out:
1353	if (rc != 0)
1354		txAbort(tid, 1);
1355
1356      TheEnd:
1357	jfs_info("txCommit: tid = %d, returning %d", tid, rc);
1358	return rc;
1359}
1360
1361/*
1362 * NAME:	txLog()
1363 *
1364 * FUNCTION:	Writes AFTER log records for all lines modified
1365 *		by tid for segments specified by inodes in comdata.
1366 *		Code assumes only WRITELOCKS are recorded in lockwords.
1367 *
1368 * PARAMETERS:
1369 *
1370 * RETURN :
1371 */
1372static void txLog(struct jfs_log *log, struct tblock *tblk, struct commit *cd)
1373{
 
1374	struct inode *ip;
1375	lid_t lid;
1376	struct tlock *tlck;
1377	struct lrd *lrd = &cd->lrd;
1378
1379	/*
1380	 * write log record(s) for each tlock of transaction,
1381	 */
1382	for (lid = tblk->next; lid; lid = tlck->next) {
1383		tlck = lid_to_tlock(lid);
1384
1385		tlck->flag |= tlckLOG;
1386
1387		/* initialize lrd common */
1388		ip = tlck->ip;
1389		lrd->aggregate = cpu_to_le32(JFS_SBI(ip->i_sb)->aggregate);
1390		lrd->log.redopage.fileset = cpu_to_le32(JFS_IP(ip)->fileset);
1391		lrd->log.redopage.inode = cpu_to_le32(ip->i_ino);
1392
1393		/* write log record of page from the tlock */
1394		switch (tlck->type & tlckTYPE) {
1395		case tlckXTREE:
1396			xtLog(log, tblk, lrd, tlck);
1397			break;
1398
1399		case tlckDTREE:
1400			dtLog(log, tblk, lrd, tlck);
1401			break;
1402
1403		case tlckINODE:
1404			diLog(log, tblk, lrd, tlck, cd);
1405			break;
1406
1407		case tlckMAP:
1408			mapLog(log, tblk, lrd, tlck);
1409			break;
1410
1411		case tlckDATA:
1412			dataLog(log, tblk, lrd, tlck);
1413			break;
1414
1415		default:
1416			jfs_err("UFO tlock:0x%p", tlck);
1417		}
1418	}
1419
1420	return;
1421}
1422
1423/*
1424 *	diLog()
1425 *
1426 * function:	log inode tlock and format maplock to update bmap;
1427 */
1428static void diLog(struct jfs_log *log, struct tblock *tblk, struct lrd *lrd,
1429		 struct tlock *tlck, struct commit *cd)
1430{
 
1431	struct metapage *mp;
1432	pxd_t *pxd;
1433	struct pxd_lock *pxdlock;
1434
1435	mp = tlck->mp;
1436
1437	/* initialize as REDOPAGE record format */
1438	lrd->log.redopage.type = cpu_to_le16(LOG_INODE);
1439	lrd->log.redopage.l2linesize = cpu_to_le16(L2INODESLOTSIZE);
1440
1441	pxd = &lrd->log.redopage.pxd;
1442
1443	/*
1444	 *	inode after image
1445	 */
1446	if (tlck->type & tlckENTRY) {
1447		/* log after-image for logredo(): */
1448		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1449		PXDaddress(pxd, mp->index);
1450		PXDlength(pxd,
1451			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1452		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1453
1454		/* mark page as homeward bound */
1455		tlck->flag |= tlckWRITEPAGE;
1456	} else if (tlck->type & tlckFREE) {
1457		/*
1458		 *	free inode extent
1459		 *
1460		 * (pages of the freed inode extent have been invalidated and
1461		 * a maplock for free of the extent has been formatted at
1462		 * txLock() time);
1463		 *
1464		 * the tlock had been acquired on the inode allocation map page
1465		 * (iag) that specifies the freed extent, even though the map
1466		 * page is not itself logged, to prevent pageout of the map
1467		 * page before the log;
1468		 */
1469
1470		/* log LOG_NOREDOINOEXT of the freed inode extent for
1471		 * logredo() to start NoRedoPage filters, and to update
1472		 * imap and bmap for free of the extent;
1473		 */
1474		lrd->type = cpu_to_le16(LOG_NOREDOINOEXT);
1475		/*
1476		 * For the LOG_NOREDOINOEXT record, we need
1477		 * to pass the IAG number and inode extent
1478		 * index (within that IAG) from which the
1479		 * extent is being released.  These have been
1480		 * passed to us in the iplist[1] and iplist[2].
1481		 */
1482		lrd->log.noredoinoext.iagnum =
1483		    cpu_to_le32((u32) (size_t) cd->iplist[1]);
1484		lrd->log.noredoinoext.inoext_idx =
1485		    cpu_to_le32((u32) (size_t) cd->iplist[2]);
1486
1487		pxdlock = (struct pxd_lock *) & tlck->lock;
1488		*pxd = pxdlock->pxd;
1489		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1490
1491		/* update bmap */
1492		tlck->flag |= tlckUPDATEMAP;
1493
1494		/* mark page as homeward bound */
1495		tlck->flag |= tlckWRITEPAGE;
1496	} else
1497		jfs_err("diLog: UFO type tlck:0x%p", tlck);
1498	return;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1499}
1500
1501/*
1502 *	dataLog()
1503 *
1504 * function:	log data tlock
1505 */
1506static void dataLog(struct jfs_log *log, struct tblock *tblk, struct lrd *lrd,
1507	    struct tlock *tlck)
1508{
1509	struct metapage *mp;
1510	pxd_t *pxd;
1511
1512	mp = tlck->mp;
1513
1514	/* initialize as REDOPAGE record format */
1515	lrd->log.redopage.type = cpu_to_le16(LOG_DATA);
1516	lrd->log.redopage.l2linesize = cpu_to_le16(L2DATASLOTSIZE);
1517
1518	pxd = &lrd->log.redopage.pxd;
1519
1520	/* log after-image for logredo(): */
1521	lrd->type = cpu_to_le16(LOG_REDOPAGE);
1522
1523	if (jfs_dirtable_inline(tlck->ip)) {
1524		/*
1525		 * The table has been truncated, we've must have deleted
1526		 * the last entry, so don't bother logging this
1527		 */
1528		mp->lid = 0;
1529		grab_metapage(mp);
1530		metapage_homeok(mp);
1531		discard_metapage(mp);
1532		tlck->mp = NULL;
1533		return;
1534	}
1535
1536	PXDaddress(pxd, mp->index);
1537	PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits);
1538
1539	lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1540
1541	/* mark page as homeward bound */
1542	tlck->flag |= tlckWRITEPAGE;
1543
1544	return;
1545}
1546
1547/*
1548 *	dtLog()
1549 *
1550 * function:	log dtree tlock and format maplock to update bmap;
1551 */
1552static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1553	   struct tlock * tlck)
1554{
1555	struct metapage *mp;
1556	struct pxd_lock *pxdlock;
1557	pxd_t *pxd;
1558
1559	mp = tlck->mp;
1560
1561	/* initialize as REDOPAGE/NOREDOPAGE record format */
1562	lrd->log.redopage.type = cpu_to_le16(LOG_DTREE);
1563	lrd->log.redopage.l2linesize = cpu_to_le16(L2DTSLOTSIZE);
1564
1565	pxd = &lrd->log.redopage.pxd;
1566
1567	if (tlck->type & tlckBTROOT)
1568		lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
1569
1570	/*
1571	 *	page extension via relocation: entry insertion;
1572	 *	page extension in-place: entry insertion;
1573	 *	new right page from page split, reinitialized in-line
1574	 *	root from root page split: entry insertion;
1575	 */
1576	if (tlck->type & (tlckNEW | tlckEXTEND)) {
1577		/* log after-image of the new page for logredo():
1578		 * mark log (LOG_NEW) for logredo() to initialize
1579		 * freelist and update bmap for alloc of the new page;
1580		 */
1581		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1582		if (tlck->type & tlckEXTEND)
1583			lrd->log.redopage.type |= cpu_to_le16(LOG_EXTEND);
1584		else
1585			lrd->log.redopage.type |= cpu_to_le16(LOG_NEW);
1586		PXDaddress(pxd, mp->index);
1587		PXDlength(pxd,
1588			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1589		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1590
1591		/* format a maplock for txUpdateMap() to update bPMAP for
1592		 * alloc of the new page;
1593		 */
1594		if (tlck->type & tlckBTROOT)
1595			return;
1596		tlck->flag |= tlckUPDATEMAP;
1597		pxdlock = (struct pxd_lock *) & tlck->lock;
1598		pxdlock->flag = mlckALLOCPXD;
1599		pxdlock->pxd = *pxd;
1600
1601		pxdlock->index = 1;
1602
1603		/* mark page as homeward bound */
1604		tlck->flag |= tlckWRITEPAGE;
1605		return;
1606	}
1607
1608	/*
1609	 *	entry insertion/deletion,
1610	 *	sibling page link update (old right page before split);
1611	 */
1612	if (tlck->type & (tlckENTRY | tlckRELINK)) {
1613		/* log after-image for logredo(): */
1614		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1615		PXDaddress(pxd, mp->index);
1616		PXDlength(pxd,
1617			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1618		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1619
1620		/* mark page as homeward bound */
1621		tlck->flag |= tlckWRITEPAGE;
1622		return;
1623	}
1624
1625	/*
1626	 *	page deletion: page has been invalidated
1627	 *	page relocation: source extent
1628	 *
1629	 *	a maplock for free of the page has been formatted
1630	 *	at txLock() time);
1631	 */
1632	if (tlck->type & (tlckFREE | tlckRELOCATE)) {
1633		/* log LOG_NOREDOPAGE of the deleted page for logredo()
1634		 * to start NoRedoPage filter and to update bmap for free
1635		 * of the deletd page
1636		 */
1637		lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
1638		pxdlock = (struct pxd_lock *) & tlck->lock;
1639		*pxd = pxdlock->pxd;
1640		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1641
1642		/* a maplock for txUpdateMap() for free of the page
1643		 * has been formatted at txLock() time;
1644		 */
1645		tlck->flag |= tlckUPDATEMAP;
1646	}
1647	return;
1648}
1649
1650/*
1651 *	xtLog()
1652 *
1653 * function:	log xtree tlock and format maplock to update bmap;
1654 */
1655static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1656	   struct tlock * tlck)
1657{
1658	struct inode *ip;
1659	struct metapage *mp;
1660	xtpage_t *p;
1661	struct xtlock *xtlck;
1662	struct maplock *maplock;
1663	struct xdlistlock *xadlock;
1664	struct pxd_lock *pxdlock;
1665	pxd_t *page_pxd;
1666	int next, lwm, hwm;
1667
1668	ip = tlck->ip;
1669	mp = tlck->mp;
1670
1671	/* initialize as REDOPAGE/NOREDOPAGE record format */
1672	lrd->log.redopage.type = cpu_to_le16(LOG_XTREE);
1673	lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE);
1674
1675	page_pxd = &lrd->log.redopage.pxd;
1676
1677	if (tlck->type & tlckBTROOT) {
1678		lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
1679		p = (xtpage_t *) &JFS_IP(ip)->i_xtroot;
1680		if (S_ISDIR(ip->i_mode))
1681			lrd->log.redopage.type |=
1682			    cpu_to_le16(LOG_DIR_XTREE);
1683	} else
1684		p = (xtpage_t *) mp->data;
1685	next = le16_to_cpu(p->header.nextindex);
1686
1687	xtlck = (struct xtlock *) & tlck->lock;
1688
1689	maplock = (struct maplock *) & tlck->lock;
1690	xadlock = (struct xdlistlock *) maplock;
1691
1692	/*
1693	 *	entry insertion/extension;
1694	 *	sibling page link update (old right page before split);
1695	 */
1696	if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) {
1697		/* log after-image for logredo():
1698		 * logredo() will update bmap for alloc of new/extended
1699		 * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from
1700		 * after-image of XADlist;
1701		 * logredo() resets (XAD_NEW|XAD_EXTEND) flag when
1702		 * applying the after-image to the meta-data page.
1703		 */
1704		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1705		PXDaddress(page_pxd, mp->index);
1706		PXDlength(page_pxd,
1707			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1708		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1709
1710		/* format a maplock for txUpdateMap() to update bPMAP
1711		 * for alloc of new/extended extents of XAD[lwm:next)
1712		 * from the page itself;
1713		 * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag.
1714		 */
1715		lwm = xtlck->lwm.offset;
1716		if (lwm == 0)
1717			lwm = XTPAGEMAXSLOT;
1718
1719		if (lwm == next)
1720			goto out;
1721		if (lwm > next) {
1722			jfs_err("xtLog: lwm > next");
1723			goto out;
1724		}
1725		tlck->flag |= tlckUPDATEMAP;
1726		xadlock->flag = mlckALLOCXADLIST;
1727		xadlock->count = next - lwm;
1728		if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) {
1729			int i;
1730			pxd_t *pxd;
1731			/*
1732			 * Lazy commit may allow xtree to be modified before
1733			 * txUpdateMap runs.  Copy xad into linelock to
1734			 * preserve correct data.
1735			 *
1736			 * We can fit twice as may pxd's as xads in the lock
1737			 */
1738			xadlock->flag = mlckALLOCPXDLIST;
1739			pxd = xadlock->xdlist = &xtlck->pxdlock;
1740			for (i = 0; i < xadlock->count; i++) {
1741				PXDaddress(pxd, addressXAD(&p->xad[lwm + i]));
1742				PXDlength(pxd, lengthXAD(&p->xad[lwm + i]));
1743				p->xad[lwm + i].flag &=
1744				    ~(XAD_NEW | XAD_EXTENDED);
1745				pxd++;
1746			}
1747		} else {
1748			/*
1749			 * xdlist will point to into inode's xtree, ensure
1750			 * that transaction is not committed lazily.
1751			 */
1752			xadlock->flag = mlckALLOCXADLIST;
1753			xadlock->xdlist = &p->xad[lwm];
1754			tblk->xflag &= ~COMMIT_LAZY;
1755		}
1756		jfs_info("xtLog: alloc ip:0x%p mp:0x%p tlck:0x%p lwm:%d count:%d",
1757			 tlck->ip, mp, tlck, lwm, xadlock->count);
1758
1759		maplock->index = 1;
1760
1761	      out:
1762		/* mark page as homeward bound */
1763		tlck->flag |= tlckWRITEPAGE;
1764
1765		return;
1766	}
1767
1768	/*
1769	 *	page deletion: file deletion/truncation (ref. xtTruncate())
1770	 *
1771	 * (page will be invalidated after log is written and bmap
1772	 * is updated from the page);
1773	 */
1774	if (tlck->type & tlckFREE) {
1775		/* LOG_NOREDOPAGE log for NoRedoPage filter:
1776		 * if page free from file delete, NoRedoFile filter from
1777		 * inode image of zero link count will subsume NoRedoPage
1778		 * filters for each page;
1779		 * if page free from file truncattion, write NoRedoPage
1780		 * filter;
1781		 *
1782		 * upadte of block allocation map for the page itself:
1783		 * if page free from deletion and truncation, LOG_UPDATEMAP
1784		 * log for the page itself is generated from processing
1785		 * its parent page xad entries;
1786		 */
1787		/* if page free from file truncation, log LOG_NOREDOPAGE
1788		 * of the deleted page for logredo() to start NoRedoPage
1789		 * filter for the page;
1790		 */
1791		if (tblk->xflag & COMMIT_TRUNCATE) {
1792			/* write NOREDOPAGE for the page */
1793			lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
1794			PXDaddress(page_pxd, mp->index);
1795			PXDlength(page_pxd,
1796				  mp->logical_size >> tblk->sb->
1797				  s_blocksize_bits);
1798			lrd->backchain =
1799			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1800
1801			if (tlck->type & tlckBTROOT) {
1802				/* Empty xtree must be logged */
1803				lrd->type = cpu_to_le16(LOG_REDOPAGE);
1804				lrd->backchain =
1805				    cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1806			}
1807		}
1808
1809		/* init LOG_UPDATEMAP of the freed extents
1810		 * XAD[XTENTRYSTART:hwm) from the deleted page itself
1811		 * for logredo() to update bmap;
1812		 */
1813		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1814		lrd->log.updatemap.type = cpu_to_le16(LOG_FREEXADLIST);
1815		xtlck = (struct xtlock *) & tlck->lock;
1816		hwm = xtlck->hwm.offset;
1817		lrd->log.updatemap.nxd =
1818		    cpu_to_le16(hwm - XTENTRYSTART + 1);
1819		/* reformat linelock for lmLog() */
1820		xtlck->header.offset = XTENTRYSTART;
1821		xtlck->header.length = hwm - XTENTRYSTART + 1;
1822		xtlck->index = 1;
1823		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1824
1825		/* format a maplock for txUpdateMap() to update bmap
1826		 * to free extents of XAD[XTENTRYSTART:hwm) from the
1827		 * deleted page itself;
1828		 */
1829		tlck->flag |= tlckUPDATEMAP;
1830		xadlock->count = hwm - XTENTRYSTART + 1;
1831		if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) {
1832			int i;
1833			pxd_t *pxd;
1834			/*
1835			 * Lazy commit may allow xtree to be modified before
1836			 * txUpdateMap runs.  Copy xad into linelock to
1837			 * preserve correct data.
1838			 *
1839			 * We can fit twice as may pxd's as xads in the lock
1840			 */
1841			xadlock->flag = mlckFREEPXDLIST;
1842			pxd = xadlock->xdlist = &xtlck->pxdlock;
1843			for (i = 0; i < xadlock->count; i++) {
1844				PXDaddress(pxd,
1845					addressXAD(&p->xad[XTENTRYSTART + i]));
1846				PXDlength(pxd,
1847					lengthXAD(&p->xad[XTENTRYSTART + i]));
1848				pxd++;
1849			}
1850		} else {
1851			/*
1852			 * xdlist will point to into inode's xtree, ensure
1853			 * that transaction is not committed lazily.
1854			 */
1855			xadlock->flag = mlckFREEXADLIST;
1856			xadlock->xdlist = &p->xad[XTENTRYSTART];
1857			tblk->xflag &= ~COMMIT_LAZY;
1858		}
1859		jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d lwm:2",
1860			 tlck->ip, mp, xadlock->count);
1861
1862		maplock->index = 1;
1863
1864		/* mark page as invalid */
1865		if (((tblk->xflag & COMMIT_PWMAP) || S_ISDIR(ip->i_mode))
1866		    && !(tlck->type & tlckBTROOT))
1867			tlck->flag |= tlckFREEPAGE;
1868		/*
1869		   else (tblk->xflag & COMMIT_PMAP)
1870		   ? release the page;
1871		 */
1872		return;
1873	}
1874
1875	/*
1876	 *	page/entry truncation: file truncation (ref. xtTruncate())
1877	 *
1878	 *	|----------+------+------+---------------|
1879	 *		   |      |      |
1880	 *		   |      |     hwm - hwm before truncation
1881	 *		   |     next - truncation point
1882	 *		  lwm - lwm before truncation
1883	 * header ?
1884	 */
1885	if (tlck->type & tlckTRUNCATE) {
1886		pxd_t pxd;	/* truncated extent of xad */
 
1887		int twm;
1888
1889		/*
1890		 * For truncation the entire linelock may be used, so it would
1891		 * be difficult to store xad list in linelock itself.
1892		 * Therefore, we'll just force transaction to be committed
1893		 * synchronously, so that xtree pages won't be changed before
1894		 * txUpdateMap runs.
1895		 */
1896		tblk->xflag &= ~COMMIT_LAZY;
1897		lwm = xtlck->lwm.offset;
1898		if (lwm == 0)
1899			lwm = XTPAGEMAXSLOT;
1900		hwm = xtlck->hwm.offset;
1901		twm = xtlck->twm.offset;
1902
1903		/*
1904		 *	write log records
1905		 */
1906		/* log after-image for logredo():
1907		 *
1908		 * logredo() will update bmap for alloc of new/extended
1909		 * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from
1910		 * after-image of XADlist;
1911		 * logredo() resets (XAD_NEW|XAD_EXTEND) flag when
1912		 * applying the after-image to the meta-data page.
1913		 */
1914		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1915		PXDaddress(page_pxd, mp->index);
1916		PXDlength(page_pxd,
1917			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1918		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1919
1920		/*
1921		 * truncate entry XAD[twm == next - 1]:
1922		 */
1923		if (twm == next - 1) {
1924			/* init LOG_UPDATEMAP for logredo() to update bmap for
1925			 * free of truncated delta extent of the truncated
1926			 * entry XAD[next - 1]:
1927			 * (xtlck->pxdlock = truncated delta extent);
1928			 */
1929			pxdlock = (struct pxd_lock *) & xtlck->pxdlock;
1930			/* assert(pxdlock->type & tlckTRUNCATE); */
1931			lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1932			lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD);
1933			lrd->log.updatemap.nxd = cpu_to_le16(1);
1934			lrd->log.updatemap.pxd = pxdlock->pxd;
1935			pxd = pxdlock->pxd;	/* save to format maplock */
1936			lrd->backchain =
1937			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1938		}
1939
1940		/*
1941		 * free entries XAD[next:hwm]:
1942		 */
1943		if (hwm >= next) {
1944			/* init LOG_UPDATEMAP of the freed extents
1945			 * XAD[next:hwm] from the deleted page itself
1946			 * for logredo() to update bmap;
1947			 */
1948			lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1949			lrd->log.updatemap.type =
1950			    cpu_to_le16(LOG_FREEXADLIST);
1951			xtlck = (struct xtlock *) & tlck->lock;
1952			hwm = xtlck->hwm.offset;
1953			lrd->log.updatemap.nxd =
1954			    cpu_to_le16(hwm - next + 1);
1955			/* reformat linelock for lmLog() */
1956			xtlck->header.offset = next;
1957			xtlck->header.length = hwm - next + 1;
1958			xtlck->index = 1;
1959			lrd->backchain =
1960			    cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1961		}
1962
1963		/*
1964		 *	format maplock(s) for txUpdateMap() to update bmap
1965		 */
1966		maplock->index = 0;
1967
1968		/*
1969		 * allocate entries XAD[lwm:next):
1970		 */
1971		if (lwm < next) {
1972			/* format a maplock for txUpdateMap() to update bPMAP
1973			 * for alloc of new/extended extents of XAD[lwm:next)
1974			 * from the page itself;
1975			 * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag.
1976			 */
1977			tlck->flag |= tlckUPDATEMAP;
1978			xadlock->flag = mlckALLOCXADLIST;
1979			xadlock->count = next - lwm;
1980			xadlock->xdlist = &p->xad[lwm];
1981
1982			jfs_info("xtLog: alloc ip:0x%p mp:0x%p count:%d lwm:%d next:%d",
 
1983				 tlck->ip, mp, xadlock->count, lwm, next);
1984			maplock->index++;
1985			xadlock++;
1986		}
1987
1988		/*
1989		 * truncate entry XAD[twm == next - 1]:
1990		 */
1991		if (twm == next - 1) {
1992			/* format a maplock for txUpdateMap() to update bmap
1993			 * to free truncated delta extent of the truncated
1994			 * entry XAD[next - 1];
1995			 * (xtlck->pxdlock = truncated delta extent);
1996			 */
1997			tlck->flag |= tlckUPDATEMAP;
1998			pxdlock = (struct pxd_lock *) xadlock;
1999			pxdlock->flag = mlckFREEPXD;
2000			pxdlock->count = 1;
2001			pxdlock->pxd = pxd;
2002
2003			jfs_info("xtLog: truncate ip:0x%p mp:0x%p count:%d hwm:%d",
2004				 ip, mp, pxdlock->count, hwm);
2005			maplock->index++;
2006			xadlock++;
2007		}
2008
2009		/*
2010		 * free entries XAD[next:hwm]:
2011		 */
2012		if (hwm >= next) {
2013			/* format a maplock for txUpdateMap() to update bmap
2014			 * to free extents of XAD[next:hwm] from thedeleted
2015			 * page itself;
2016			 */
2017			tlck->flag |= tlckUPDATEMAP;
2018			xadlock->flag = mlckFREEXADLIST;
2019			xadlock->count = hwm - next + 1;
2020			xadlock->xdlist = &p->xad[next];
2021
2022			jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d next:%d hwm:%d",
 
2023				 tlck->ip, mp, xadlock->count, next, hwm);
2024			maplock->index++;
2025		}
2026
2027		/* mark page as homeward bound */
2028		tlck->flag |= tlckWRITEPAGE;
2029	}
2030	return;
2031}
2032
2033/*
2034 *	mapLog()
2035 *
2036 * function:	log from maplock of freed data extents;
2037 */
2038static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
2039		   struct tlock * tlck)
2040{
2041	struct pxd_lock *pxdlock;
2042	int i, nlock;
2043	pxd_t *pxd;
2044
2045	/*
2046	 *	page relocation: free the source page extent
2047	 *
2048	 * a maplock for txUpdateMap() for free of the page
2049	 * has been formatted at txLock() time saving the src
2050	 * relocated page address;
2051	 */
2052	if (tlck->type & tlckRELOCATE) {
2053		/* log LOG_NOREDOPAGE of the old relocated page
2054		 * for logredo() to start NoRedoPage filter;
2055		 */
2056		lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
2057		pxdlock = (struct pxd_lock *) & tlck->lock;
2058		pxd = &lrd->log.redopage.pxd;
2059		*pxd = pxdlock->pxd;
2060		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2061
2062		/* (N.B. currently, logredo() does NOT update bmap
2063		 * for free of the page itself for (LOG_XTREE|LOG_NOREDOPAGE);
2064		 * if page free from relocation, LOG_UPDATEMAP log is
2065		 * specifically generated now for logredo()
2066		 * to update bmap for free of src relocated page;
2067		 * (new flag LOG_RELOCATE may be introduced which will
2068		 * inform logredo() to start NORedoPage filter and also
2069		 * update block allocation map at the same time, thus
2070		 * avoiding an extra log write);
2071		 */
2072		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
2073		lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD);
2074		lrd->log.updatemap.nxd = cpu_to_le16(1);
2075		lrd->log.updatemap.pxd = pxdlock->pxd;
2076		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2077
2078		/* a maplock for txUpdateMap() for free of the page
2079		 * has been formatted at txLock() time;
2080		 */
2081		tlck->flag |= tlckUPDATEMAP;
2082		return;
2083	}
2084	/*
2085
2086	 * Otherwise it's not a relocate request
2087	 *
2088	 */
2089	else {
2090		/* log LOG_UPDATEMAP for logredo() to update bmap for
2091		 * free of truncated/relocated delta extent of the data;
2092		 * e.g.: external EA extent, relocated/truncated extent
2093		 * from xtTailgate();
2094		 */
2095		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
2096		pxdlock = (struct pxd_lock *) & tlck->lock;
2097		nlock = pxdlock->index;
2098		for (i = 0; i < nlock; i++, pxdlock++) {
2099			if (pxdlock->flag & mlckALLOCPXD)
2100				lrd->log.updatemap.type =
2101				    cpu_to_le16(LOG_ALLOCPXD);
2102			else
2103				lrd->log.updatemap.type =
2104				    cpu_to_le16(LOG_FREEPXD);
2105			lrd->log.updatemap.nxd = cpu_to_le16(1);
2106			lrd->log.updatemap.pxd = pxdlock->pxd;
2107			lrd->backchain =
2108			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2109			jfs_info("mapLog: xaddr:0x%lx xlen:0x%x",
2110				 (ulong) addressPXD(&pxdlock->pxd),
2111				 lengthPXD(&pxdlock->pxd));
2112		}
2113
2114		/* update bmap */
2115		tlck->flag |= tlckUPDATEMAP;
2116	}
2117}
2118
2119/*
2120 *	txEA()
2121 *
2122 * function:	acquire maplock for EA/ACL extents or
2123 *		set COMMIT_INLINE flag;
2124 */
2125void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea)
2126{
2127	struct tlock *tlck = NULL;
2128	struct pxd_lock *maplock = NULL, *pxdlock = NULL;
2129
2130	/*
2131	 * format maplock for alloc of new EA extent
2132	 */
2133	if (newea) {
2134		/* Since the newea could be a completely zeroed entry we need to
2135		 * check for the two flags which indicate we should actually
2136		 * commit new EA data
2137		 */
2138		if (newea->flag & DXD_EXTENT) {
2139			tlck = txMaplock(tid, ip, tlckMAP);
2140			maplock = (struct pxd_lock *) & tlck->lock;
2141			pxdlock = (struct pxd_lock *) maplock;
2142			pxdlock->flag = mlckALLOCPXD;
2143			PXDaddress(&pxdlock->pxd, addressDXD(newea));
2144			PXDlength(&pxdlock->pxd, lengthDXD(newea));
2145			pxdlock++;
2146			maplock->index = 1;
2147		} else if (newea->flag & DXD_INLINE) {
2148			tlck = NULL;
2149
2150			set_cflag(COMMIT_Inlineea, ip);
2151		}
2152	}
2153
2154	/*
2155	 * format maplock for free of old EA extent
2156	 */
2157	if (!test_cflag(COMMIT_Nolink, ip) && oldea->flag & DXD_EXTENT) {
2158		if (tlck == NULL) {
2159			tlck = txMaplock(tid, ip, tlckMAP);
2160			maplock = (struct pxd_lock *) & tlck->lock;
2161			pxdlock = (struct pxd_lock *) maplock;
2162			maplock->index = 0;
2163		}
2164		pxdlock->flag = mlckFREEPXD;
2165		PXDaddress(&pxdlock->pxd, addressDXD(oldea));
2166		PXDlength(&pxdlock->pxd, lengthDXD(oldea));
2167		maplock->index++;
2168	}
2169}
2170
2171/*
2172 *	txForce()
2173 *
2174 * function: synchronously write pages locked by transaction
2175 *	     after txLog() but before txUpdateMap();
2176 */
2177static void txForce(struct tblock * tblk)
2178{
2179	struct tlock *tlck;
2180	lid_t lid, next;
2181	struct metapage *mp;
2182
2183	/*
2184	 * reverse the order of transaction tlocks in
2185	 * careful update order of address index pages
2186	 * (right to left, bottom up)
2187	 */
2188	tlck = lid_to_tlock(tblk->next);
2189	lid = tlck->next;
2190	tlck->next = 0;
2191	while (lid) {
2192		tlck = lid_to_tlock(lid);
2193		next = tlck->next;
2194		tlck->next = tblk->next;
2195		tblk->next = lid;
2196		lid = next;
2197	}
2198
2199	/*
2200	 * synchronously write the page, and
2201	 * hold the page for txUpdateMap();
2202	 */
2203	for (lid = tblk->next; lid; lid = next) {
2204		tlck = lid_to_tlock(lid);
2205		next = tlck->next;
2206
2207		if ((mp = tlck->mp) != NULL &&
2208		    (tlck->type & tlckBTROOT) == 0) {
2209			assert(mp->xflag & COMMIT_PAGE);
2210
2211			if (tlck->flag & tlckWRITEPAGE) {
2212				tlck->flag &= ~tlckWRITEPAGE;
2213
2214				/* do not release page to freelist */
2215				force_metapage(mp);
2216#if 0
2217				/*
2218				 * The "right" thing to do here is to
2219				 * synchronously write the metadata.
2220				 * With the current implementation this
2221				 * is hard since write_metapage requires
2222				 * us to kunmap & remap the page.  If we
2223				 * have tlocks pointing into the metadata
2224				 * pages, we don't want to do this.  I think
2225				 * we can get by with synchronously writing
2226				 * the pages when they are released.
2227				 */
2228				assert(mp->nohomeok);
2229				set_bit(META_dirty, &mp->flag);
2230				set_bit(META_sync, &mp->flag);
2231#endif
2232			}
2233		}
2234	}
2235}
2236
2237/*
2238 *	txUpdateMap()
2239 *
2240 * function:	update persistent allocation map (and working map
2241 *		if appropriate);
2242 *
2243 * parameter:
2244 */
2245static void txUpdateMap(struct tblock * tblk)
2246{
2247	struct inode *ip;
2248	struct inode *ipimap;
2249	lid_t lid;
2250	struct tlock *tlck;
2251	struct maplock *maplock;
2252	struct pxd_lock pxdlock;
2253	int maptype;
2254	int k, nlock;
2255	struct metapage *mp = NULL;
2256
2257	ipimap = JFS_SBI(tblk->sb)->ipimap;
2258
2259	maptype = (tblk->xflag & COMMIT_PMAP) ? COMMIT_PMAP : COMMIT_PWMAP;
2260
2261
2262	/*
2263	 *	update block allocation map
2264	 *
2265	 * update allocation state in pmap (and wmap) and
2266	 * update lsn of the pmap page;
2267	 */
2268	/*
2269	 * scan each tlock/page of transaction for block allocation/free:
2270	 *
2271	 * for each tlock/page of transaction, update map.
2272	 *  ? are there tlock for pmap and pwmap at the same time ?
2273	 */
2274	for (lid = tblk->next; lid; lid = tlck->next) {
2275		tlck = lid_to_tlock(lid);
2276
2277		if ((tlck->flag & tlckUPDATEMAP) == 0)
2278			continue;
2279
2280		if (tlck->flag & tlckFREEPAGE) {
2281			/*
2282			 * Another thread may attempt to reuse freed space
2283			 * immediately, so we want to get rid of the metapage
2284			 * before anyone else has a chance to get it.
2285			 * Lock metapage, update maps, then invalidate
2286			 * the metapage.
2287			 */
2288			mp = tlck->mp;
2289			ASSERT(mp->xflag & COMMIT_PAGE);
2290			grab_metapage(mp);
2291		}
2292
2293		/*
2294		 * extent list:
2295		 * . in-line PXD list:
2296		 * . out-of-line XAD list:
2297		 */
2298		maplock = (struct maplock *) & tlck->lock;
2299		nlock = maplock->index;
2300
2301		for (k = 0; k < nlock; k++, maplock++) {
2302			/*
2303			 * allocate blocks in persistent map:
2304			 *
2305			 * blocks have been allocated from wmap at alloc time;
2306			 */
2307			if (maplock->flag & mlckALLOC) {
2308				txAllocPMap(ipimap, maplock, tblk);
2309			}
2310			/*
2311			 * free blocks in persistent and working map:
2312			 * blocks will be freed in pmap and then in wmap;
2313			 *
2314			 * ? tblock specifies the PMAP/PWMAP based upon
2315			 * transaction
2316			 *
2317			 * free blocks in persistent map:
2318			 * blocks will be freed from wmap at last reference
2319			 * release of the object for regular files;
2320			 *
2321			 * Alway free blocks from both persistent & working
2322			 * maps for directories
2323			 */
2324			else {	/* (maplock->flag & mlckFREE) */
2325
2326				if (tlck->flag & tlckDIRECTORY)
2327					txFreeMap(ipimap, maplock,
2328						  tblk, COMMIT_PWMAP);
2329				else
2330					txFreeMap(ipimap, maplock,
2331						  tblk, maptype);
2332			}
2333		}
2334		if (tlck->flag & tlckFREEPAGE) {
2335			if (!(tblk->flag & tblkGC_LAZY)) {
2336				/* This is equivalent to txRelease */
2337				ASSERT(mp->lid == lid);
2338				tlck->mp->lid = 0;
2339			}
2340			assert(mp->nohomeok == 1);
2341			metapage_homeok(mp);
2342			discard_metapage(mp);
2343			tlck->mp = NULL;
2344		}
2345	}
2346	/*
2347	 *	update inode allocation map
2348	 *
2349	 * update allocation state in pmap and
2350	 * update lsn of the pmap page;
2351	 * update in-memory inode flag/state
2352	 *
2353	 * unlock mapper/write lock
2354	 */
2355	if (tblk->xflag & COMMIT_CREATE) {
2356		diUpdatePMap(ipimap, tblk->ino, false, tblk);
2357		/* update persistent block allocation map
2358		 * for the allocation of inode extent;
2359		 */
2360		pxdlock.flag = mlckALLOCPXD;
2361		pxdlock.pxd = tblk->u.ixpxd;
2362		pxdlock.index = 1;
2363		txAllocPMap(ipimap, (struct maplock *) & pxdlock, tblk);
2364	} else if (tblk->xflag & COMMIT_DELETE) {
2365		ip = tblk->u.ip;
2366		diUpdatePMap(ipimap, ip->i_ino, true, tblk);
2367		iput(ip);
2368	}
2369}
2370
2371/*
2372 *	txAllocPMap()
2373 *
2374 * function: allocate from persistent map;
2375 *
2376 * parameter:
2377 *	ipbmap	-
2378 *	malock	-
2379 *		xad list:
2380 *		pxd:
2381 *
2382 *	maptype -
2383 *		allocate from persistent map;
2384 *		free from persistent map;
2385 *		(e.g., tmp file - free from working map at releae
2386 *		 of last reference);
2387 *		free from persistent and working map;
2388 *
2389 *	lsn	- log sequence number;
2390 */
2391static void txAllocPMap(struct inode *ip, struct maplock * maplock,
2392			struct tblock * tblk)
2393{
2394	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
2395	struct xdlistlock *xadlistlock;
2396	xad_t *xad;
2397	s64 xaddr;
2398	int xlen;
2399	struct pxd_lock *pxdlock;
2400	struct xdlistlock *pxdlistlock;
2401	pxd_t *pxd;
2402	int n;
2403
2404	/*
2405	 * allocate from persistent map;
2406	 */
2407	if (maplock->flag & mlckALLOCXADLIST) {
2408		xadlistlock = (struct xdlistlock *) maplock;
2409		xad = xadlistlock->xdlist;
2410		for (n = 0; n < xadlistlock->count; n++, xad++) {
2411			if (xad->flag & (XAD_NEW | XAD_EXTENDED)) {
2412				xaddr = addressXAD(xad);
2413				xlen = lengthXAD(xad);
2414				dbUpdatePMap(ipbmap, false, xaddr,
2415					     (s64) xlen, tblk);
2416				xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
2417				jfs_info("allocPMap: xaddr:0x%lx xlen:%d",
2418					 (ulong) xaddr, xlen);
2419			}
2420		}
2421	} else if (maplock->flag & mlckALLOCPXD) {
2422		pxdlock = (struct pxd_lock *) maplock;
2423		xaddr = addressPXD(&pxdlock->pxd);
2424		xlen = lengthPXD(&pxdlock->pxd);
2425		dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen, tblk);
2426		jfs_info("allocPMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen);
2427	} else {		/* (maplock->flag & mlckALLOCPXDLIST) */
2428
2429		pxdlistlock = (struct xdlistlock *) maplock;
2430		pxd = pxdlistlock->xdlist;
2431		for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2432			xaddr = addressPXD(pxd);
2433			xlen = lengthPXD(pxd);
2434			dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen,
2435				     tblk);
2436			jfs_info("allocPMap: xaddr:0x%lx xlen:%d",
2437				 (ulong) xaddr, xlen);
2438		}
2439	}
2440}
2441
2442/*
2443 *	txFreeMap()
2444 *
2445 * function:	free from persistent and/or working map;
2446 *
2447 * todo: optimization
2448 */
2449void txFreeMap(struct inode *ip,
2450	       struct maplock * maplock, struct tblock * tblk, int maptype)
2451{
2452	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
2453	struct xdlistlock *xadlistlock;
2454	xad_t *xad;
2455	s64 xaddr;
2456	int xlen;
2457	struct pxd_lock *pxdlock;
2458	struct xdlistlock *pxdlistlock;
2459	pxd_t *pxd;
2460	int n;
2461
2462	jfs_info("txFreeMap: tblk:0x%p maplock:0x%p maptype:0x%x",
2463		 tblk, maplock, maptype);
2464
2465	/*
2466	 * free from persistent map;
2467	 */
2468	if (maptype == COMMIT_PMAP || maptype == COMMIT_PWMAP) {
2469		if (maplock->flag & mlckFREEXADLIST) {
2470			xadlistlock = (struct xdlistlock *) maplock;
2471			xad = xadlistlock->xdlist;
2472			for (n = 0; n < xadlistlock->count; n++, xad++) {
2473				if (!(xad->flag & XAD_NEW)) {
2474					xaddr = addressXAD(xad);
2475					xlen = lengthXAD(xad);
2476					dbUpdatePMap(ipbmap, true, xaddr,
2477						     (s64) xlen, tblk);
2478					jfs_info("freePMap: xaddr:0x%lx xlen:%d",
 
2479						 (ulong) xaddr, xlen);
2480				}
2481			}
2482		} else if (maplock->flag & mlckFREEPXD) {
2483			pxdlock = (struct pxd_lock *) maplock;
2484			xaddr = addressPXD(&pxdlock->pxd);
2485			xlen = lengthPXD(&pxdlock->pxd);
2486			dbUpdatePMap(ipbmap, true, xaddr, (s64) xlen,
2487				     tblk);
2488			jfs_info("freePMap: xaddr:0x%lx xlen:%d",
2489				 (ulong) xaddr, xlen);
2490		} else {	/* (maplock->flag & mlckALLOCPXDLIST) */
2491
2492			pxdlistlock = (struct xdlistlock *) maplock;
2493			pxd = pxdlistlock->xdlist;
2494			for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2495				xaddr = addressPXD(pxd);
2496				xlen = lengthPXD(pxd);
2497				dbUpdatePMap(ipbmap, true, xaddr,
2498					     (s64) xlen, tblk);
2499				jfs_info("freePMap: xaddr:0x%lx xlen:%d",
2500					 (ulong) xaddr, xlen);
2501			}
2502		}
2503	}
2504
2505	/*
2506	 * free from working map;
2507	 */
2508	if (maptype == COMMIT_PWMAP || maptype == COMMIT_WMAP) {
2509		if (maplock->flag & mlckFREEXADLIST) {
2510			xadlistlock = (struct xdlistlock *) maplock;
2511			xad = xadlistlock->xdlist;
2512			for (n = 0; n < xadlistlock->count; n++, xad++) {
2513				xaddr = addressXAD(xad);
2514				xlen = lengthXAD(xad);
2515				dbFree(ip, xaddr, (s64) xlen);
2516				xad->flag = 0;
2517				jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2518					 (ulong) xaddr, xlen);
2519			}
2520		} else if (maplock->flag & mlckFREEPXD) {
2521			pxdlock = (struct pxd_lock *) maplock;
2522			xaddr = addressPXD(&pxdlock->pxd);
2523			xlen = lengthPXD(&pxdlock->pxd);
2524			dbFree(ip, xaddr, (s64) xlen);
2525			jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2526				 (ulong) xaddr, xlen);
2527		} else {	/* (maplock->flag & mlckFREEPXDLIST) */
2528
2529			pxdlistlock = (struct xdlistlock *) maplock;
2530			pxd = pxdlistlock->xdlist;
2531			for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2532				xaddr = addressPXD(pxd);
2533				xlen = lengthPXD(pxd);
2534				dbFree(ip, xaddr, (s64) xlen);
2535				jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2536					 (ulong) xaddr, xlen);
2537			}
2538		}
2539	}
2540}
2541
2542/*
2543 *	txFreelock()
2544 *
2545 * function:	remove tlock from inode anonymous locklist
2546 */
2547void txFreelock(struct inode *ip)
2548{
2549	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
2550	struct tlock *xtlck, *tlck;
2551	lid_t xlid = 0, lid;
2552
2553	if (!jfs_ip->atlhead)
2554		return;
2555
2556	TXN_LOCK();
2557	xtlck = (struct tlock *) &jfs_ip->atlhead;
2558
2559	while ((lid = xtlck->next) != 0) {
2560		tlck = lid_to_tlock(lid);
2561		if (tlck->flag & tlckFREELOCK) {
2562			xtlck->next = tlck->next;
2563			txLockFree(lid);
2564		} else {
2565			xtlck = tlck;
2566			xlid = lid;
2567		}
2568	}
2569
2570	if (jfs_ip->atlhead)
2571		jfs_ip->atltail = xlid;
2572	else {
2573		jfs_ip->atltail = 0;
2574		/*
2575		 * If inode was on anon_list, remove it
2576		 */
2577		list_del_init(&jfs_ip->anon_inode_list);
2578	}
2579	TXN_UNLOCK();
2580}
2581
2582/*
2583 *	txAbort()
2584 *
2585 * function: abort tx before commit;
2586 *
2587 * frees line-locks and segment locks for all
2588 * segments in comdata structure.
2589 * Optionally sets state of file-system to FM_DIRTY in super-block.
2590 * log age of page-frames in memory for which caller has
2591 * are reset to 0 (to avoid logwarap).
2592 */
2593void txAbort(tid_t tid, int dirty)
2594{
2595	lid_t lid, next;
2596	struct metapage *mp;
2597	struct tblock *tblk = tid_to_tblock(tid);
2598	struct tlock *tlck;
2599
2600	/*
2601	 * free tlocks of the transaction
2602	 */
2603	for (lid = tblk->next; lid; lid = next) {
2604		tlck = lid_to_tlock(lid);
2605		next = tlck->next;
2606		mp = tlck->mp;
2607		JFS_IP(tlck->ip)->xtlid = 0;
2608
2609		if (mp) {
2610			mp->lid = 0;
2611
2612			/*
2613			 * reset lsn of page to avoid logwarap:
2614			 *
2615			 * (page may have been previously committed by another
2616			 * transaction(s) but has not been paged, i.e.,
2617			 * it may be on logsync list even though it has not
2618			 * been logged for the current tx.)
2619			 */
2620			if (mp->xflag & COMMIT_PAGE && mp->lsn)
2621				LogSyncRelease(mp);
2622		}
2623		/* insert tlock at head of freelist */
2624		TXN_LOCK();
2625		txLockFree(lid);
2626		TXN_UNLOCK();
2627	}
2628
2629	/* caller will free the transaction block */
2630
2631	tblk->next = tblk->last = 0;
2632
2633	/*
2634	 * mark filesystem dirty
2635	 */
2636	if (dirty)
2637		jfs_error(tblk->sb, "\n");
2638
2639	return;
2640}
2641
2642/*
2643 *	txLazyCommit(void)
2644 *
2645 *	All transactions except those changing ipimap (COMMIT_FORCE) are
2646 *	processed by this routine.  This insures that the inode and block
2647 *	allocation maps are updated in order.  For synchronous transactions,
2648 *	let the user thread finish processing after txUpdateMap() is called.
2649 */
2650static void txLazyCommit(struct tblock * tblk)
2651{
2652	struct jfs_log *log;
2653
2654	while (((tblk->flag & tblkGC_READY) == 0) &&
2655	       ((tblk->flag & tblkGC_UNLOCKED) == 0)) {
2656		/* We must have gotten ahead of the user thread
2657		 */
2658		jfs_info("jfs_lazycommit: tblk 0x%p not unlocked", tblk);
2659		yield();
2660	}
2661
2662	jfs_info("txLazyCommit: processing tblk 0x%p", tblk);
2663
2664	txUpdateMap(tblk);
2665
2666	log = (struct jfs_log *) JFS_SBI(tblk->sb)->log;
2667
2668	spin_lock_irq(&log->gclock);	// LOGGC_LOCK
2669
2670	tblk->flag |= tblkGC_COMMITTED;
2671
2672	if (tblk->flag & tblkGC_READY)
2673		log->gcrtc--;
2674
2675	wake_up_all(&tblk->gcwait);	// LOGGC_WAKEUP
2676
2677	/*
2678	 * Can't release log->gclock until we've tested tblk->flag
2679	 */
2680	if (tblk->flag & tblkGC_LAZY) {
2681		spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
2682		txUnlock(tblk);
2683		tblk->flag &= ~tblkGC_LAZY;
2684		txEnd(tblk - TxBlock);	/* Convert back to tid */
2685	} else
2686		spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
2687
2688	jfs_info("txLazyCommit: done: tblk = 0x%p", tblk);
2689}
2690
2691/*
2692 *	jfs_lazycommit(void)
2693 *
2694 *	To be run as a kernel daemon.  If lbmIODone is called in an interrupt
2695 *	context, or where blocking is not wanted, this routine will process
2696 *	committed transactions from the unlock queue.
2697 */
2698int jfs_lazycommit(void *arg)
2699{
2700	int WorkDone;
2701	struct tblock *tblk;
2702	unsigned long flags;
2703	struct jfs_sb_info *sbi;
2704
2705	set_freezable();
2706	do {
2707		LAZY_LOCK(flags);
2708		jfs_commit_thread_waking = 0;	/* OK to wake another thread */
2709		while (!list_empty(&TxAnchor.unlock_queue)) {
2710			WorkDone = 0;
2711			list_for_each_entry(tblk, &TxAnchor.unlock_queue,
2712					    cqueue) {
2713
2714				sbi = JFS_SBI(tblk->sb);
2715				/*
2716				 * For each volume, the transactions must be
2717				 * handled in order.  If another commit thread
2718				 * is handling a tblk for this superblock,
2719				 * skip it
2720				 */
2721				if (sbi->commit_state & IN_LAZYCOMMIT)
2722					continue;
2723
2724				sbi->commit_state |= IN_LAZYCOMMIT;
2725				WorkDone = 1;
2726
2727				/*
2728				 * Remove transaction from queue
2729				 */
2730				list_del(&tblk->cqueue);
2731
2732				LAZY_UNLOCK(flags);
2733				txLazyCommit(tblk);
2734				LAZY_LOCK(flags);
2735
2736				sbi->commit_state &= ~IN_LAZYCOMMIT;
2737				/*
2738				 * Don't continue in the for loop.  (We can't
2739				 * anyway, it's unsafe!)  We want to go back to
2740				 * the beginning of the list.
2741				 */
2742				break;
2743			}
2744
2745			/* If there was nothing to do, don't continue */
2746			if (!WorkDone)
2747				break;
2748		}
2749		/* In case a wakeup came while all threads were active */
2750		jfs_commit_thread_waking = 0;
2751
2752		if (freezing(current)) {
2753			LAZY_UNLOCK(flags);
2754			try_to_freeze();
2755		} else {
2756			DECLARE_WAITQUEUE(wq, current);
2757
2758			add_wait_queue(&jfs_commit_thread_wait, &wq);
2759			set_current_state(TASK_INTERRUPTIBLE);
2760			LAZY_UNLOCK(flags);
2761			schedule();
 
2762			remove_wait_queue(&jfs_commit_thread_wait, &wq);
2763		}
2764	} while (!kthread_should_stop());
2765
2766	if (!list_empty(&TxAnchor.unlock_queue))
2767		jfs_err("jfs_lazycommit being killed w/pending transactions!");
2768	else
2769		jfs_info("jfs_lazycommit being killed");
2770	return 0;
2771}
2772
2773void txLazyUnlock(struct tblock * tblk)
2774{
2775	unsigned long flags;
2776
2777	LAZY_LOCK(flags);
2778
2779	list_add_tail(&tblk->cqueue, &TxAnchor.unlock_queue);
2780	/*
2781	 * Don't wake up a commit thread if there is already one servicing
2782	 * this superblock, or if the last one we woke up hasn't started yet.
2783	 */
2784	if (!(JFS_SBI(tblk->sb)->commit_state & IN_LAZYCOMMIT) &&
2785	    !jfs_commit_thread_waking) {
2786		jfs_commit_thread_waking = 1;
2787		wake_up(&jfs_commit_thread_wait);
2788	}
2789	LAZY_UNLOCK(flags);
2790}
2791
2792static void LogSyncRelease(struct metapage * mp)
2793{
2794	struct jfs_log *log = mp->log;
2795
2796	assert(mp->nohomeok);
2797	assert(log);
2798	metapage_homeok(mp);
2799}
2800
2801/*
2802 *	txQuiesce
2803 *
2804 *	Block all new transactions and push anonymous transactions to
2805 *	completion
2806 *
2807 *	This does almost the same thing as jfs_sync below.  We don't
2808 *	worry about deadlocking when jfs_tlocks_low is set, since we would
2809 *	expect jfs_sync to get us out of that jam.
2810 */
2811void txQuiesce(struct super_block *sb)
2812{
2813	struct inode *ip;
2814	struct jfs_inode_info *jfs_ip;
2815	struct jfs_log *log = JFS_SBI(sb)->log;
2816	tid_t tid;
2817
2818	set_bit(log_QUIESCE, &log->flag);
2819
2820	TXN_LOCK();
2821restart:
2822	while (!list_empty(&TxAnchor.anon_list)) {
2823		jfs_ip = list_entry(TxAnchor.anon_list.next,
2824				    struct jfs_inode_info,
2825				    anon_inode_list);
2826		ip = &jfs_ip->vfs_inode;
2827
2828		/*
2829		 * inode will be removed from anonymous list
2830		 * when it is committed
2831		 */
2832		TXN_UNLOCK();
2833		tid = txBegin(ip->i_sb, COMMIT_INODE | COMMIT_FORCE);
2834		mutex_lock(&jfs_ip->commit_mutex);
2835		txCommit(tid, 1, &ip, 0);
2836		txEnd(tid);
2837		mutex_unlock(&jfs_ip->commit_mutex);
2838		/*
2839		 * Just to be safe.  I don't know how
2840		 * long we can run without blocking
2841		 */
2842		cond_resched();
2843		TXN_LOCK();
2844	}
2845
2846	/*
2847	 * If jfs_sync is running in parallel, there could be some inodes
2848	 * on anon_list2.  Let's check.
2849	 */
2850	if (!list_empty(&TxAnchor.anon_list2)) {
2851		list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list);
 
2852		goto restart;
2853	}
2854	TXN_UNLOCK();
2855
2856	/*
2857	 * We may need to kick off the group commit
2858	 */
2859	jfs_flush_journal(log, 0);
2860}
2861
2862/*
2863 * txResume()
2864 *
2865 * Allows transactions to start again following txQuiesce
2866 */
2867void txResume(struct super_block *sb)
2868{
2869	struct jfs_log *log = JFS_SBI(sb)->log;
2870
2871	clear_bit(log_QUIESCE, &log->flag);
2872	TXN_WAKEUP(&log->syncwait);
2873}
2874
2875/*
2876 *	jfs_sync(void)
2877 *
2878 *	To be run as a kernel daemon.  This is awakened when tlocks run low.
2879 *	We write any inodes that have anonymous tlocks so they will become
2880 *	available.
2881 */
2882int jfs_sync(void *arg)
2883{
2884	struct inode *ip;
2885	struct jfs_inode_info *jfs_ip;
2886	tid_t tid;
2887
2888	set_freezable();
2889	do {
2890		/*
2891		 * write each inode on the anonymous inode list
2892		 */
2893		TXN_LOCK();
2894		while (jfs_tlocks_low && !list_empty(&TxAnchor.anon_list)) {
2895			jfs_ip = list_entry(TxAnchor.anon_list.next,
2896					    struct jfs_inode_info,
2897					    anon_inode_list);
2898			ip = &jfs_ip->vfs_inode;
2899
2900			if (! igrab(ip)) {
2901				/*
2902				 * Inode is being freed
2903				 */
2904				list_del_init(&jfs_ip->anon_inode_list);
2905			} else if (mutex_trylock(&jfs_ip->commit_mutex)) {
2906				/*
2907				 * inode will be removed from anonymous list
2908				 * when it is committed
2909				 */
2910				TXN_UNLOCK();
2911				tid = txBegin(ip->i_sb, COMMIT_INODE);
2912				txCommit(tid, 1, &ip, 0);
2913				txEnd(tid);
2914				mutex_unlock(&jfs_ip->commit_mutex);
2915
2916				iput(ip);
2917				/*
2918				 * Just to be safe.  I don't know how
2919				 * long we can run without blocking
2920				 */
2921				cond_resched();
2922				TXN_LOCK();
2923			} else {
2924				/* We can't get the commit mutex.  It may
2925				 * be held by a thread waiting for tlock's
2926				 * so let's not block here.  Save it to
2927				 * put back on the anon_list.
2928				 */
2929
2930				/* Move from anon_list to anon_list2 */
2931				list_move(&jfs_ip->anon_inode_list,
2932					  &TxAnchor.anon_list2);
 
 
 
2933
2934				TXN_UNLOCK();
2935				iput(ip);
2936				TXN_LOCK();
2937			}
2938		}
2939		/* Add anon_list2 back to anon_list */
2940		list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list);
2941
2942		if (freezing(current)) {
2943			TXN_UNLOCK();
2944			try_to_freeze();
2945		} else {
2946			set_current_state(TASK_INTERRUPTIBLE);
2947			TXN_UNLOCK();
2948			schedule();
 
2949		}
2950	} while (!kthread_should_stop());
2951
2952	jfs_info("jfs_sync being killed");
2953	return 0;
2954}
2955
2956#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG)
2957int jfs_txanchor_proc_show(struct seq_file *m, void *v)
2958{
2959	char *freewait;
2960	char *freelockwait;
2961	char *lowlockwait;
2962
2963	freewait =
2964	    waitqueue_active(&TxAnchor.freewait) ? "active" : "empty";
2965	freelockwait =
2966	    waitqueue_active(&TxAnchor.freelockwait) ? "active" : "empty";
2967	lowlockwait =
2968	    waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty";
2969
2970	seq_printf(m,
2971		       "JFS TxAnchor\n"
2972		       "============\n"
2973		       "freetid = %d\n"
2974		       "freewait = %s\n"
2975		       "freelock = %d\n"
2976		       "freelockwait = %s\n"
2977		       "lowlockwait = %s\n"
2978		       "tlocksInUse = %d\n"
2979		       "jfs_tlocks_low = %d\n"
2980		       "unlock_queue is %sempty\n",
2981		       TxAnchor.freetid,
2982		       freewait,
2983		       TxAnchor.freelock,
2984		       freelockwait,
2985		       lowlockwait,
2986		       TxAnchor.tlocksInUse,
2987		       jfs_tlocks_low,
2988		       list_empty(&TxAnchor.unlock_queue) ? "" : "not ");
2989	return 0;
2990}
 
 
 
 
 
 
 
 
 
 
 
 
 
2991#endif
2992
2993#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS)
2994int jfs_txstats_proc_show(struct seq_file *m, void *v)
2995{
2996	seq_printf(m,
2997		       "JFS TxStats\n"
2998		       "===========\n"
2999		       "calls to txBegin = %d\n"
3000		       "txBegin blocked by sync barrier = %d\n"
3001		       "txBegin blocked by tlocks low = %d\n"
3002		       "txBegin blocked by no free tid = %d\n"
3003		       "calls to txBeginAnon = %d\n"
3004		       "txBeginAnon blocked by sync barrier = %d\n"
3005		       "txBeginAnon blocked by tlocks low = %d\n"
3006		       "calls to txLockAlloc = %d\n"
3007		       "tLockAlloc blocked by no free lock = %d\n",
3008		       TxStat.txBegin,
3009		       TxStat.txBegin_barrier,
3010		       TxStat.txBegin_lockslow,
3011		       TxStat.txBegin_freetid,
3012		       TxStat.txBeginAnon,
3013		       TxStat.txBeginAnon_barrier,
3014		       TxStat.txBeginAnon_lockslow,
3015		       TxStat.txLockAlloc,
3016		       TxStat.txLockAlloc_freelock);
3017	return 0;
3018}
 
 
 
 
 
 
 
 
 
 
 
 
 
3019#endif
v3.5.6
 
   1/*
   2 *   Copyright (C) International Business Machines Corp., 2000-2005
   3 *   Portions Copyright (C) Christoph Hellwig, 2001-2002
   4 *
   5 *   This program is free software;  you can redistribute it and/or modify
   6 *   it under the terms of the GNU General Public License as published by
   7 *   the Free Software Foundation; either version 2 of the License, or
   8 *   (at your option) any later version.
   9 *
  10 *   This program is distributed in the hope that it will be useful,
  11 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
  12 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  13 *   the GNU General Public License for more details.
  14 *
  15 *   You should have received a copy of the GNU General Public License
  16 *   along with this program;  if not, write to the Free Software
  17 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18 */
  19
  20/*
  21 *	jfs_txnmgr.c: transaction manager
  22 *
  23 * notes:
  24 * transaction starts with txBegin() and ends with txCommit()
  25 * or txAbort().
  26 *
  27 * tlock is acquired at the time of update;
  28 * (obviate scan at commit time for xtree and dtree)
  29 * tlock and mp points to each other;
  30 * (no hashlist for mp -> tlock).
  31 *
  32 * special cases:
  33 * tlock on in-memory inode:
  34 * in-place tlock in the in-memory inode itself;
  35 * converted to page lock by iWrite() at commit time.
  36 *
  37 * tlock during write()/mmap() under anonymous transaction (tid = 0):
  38 * transferred (?) to transaction at commit time.
  39 *
  40 * use the page itself to update allocation maps
  41 * (obviate intermediate replication of allocation/deallocation data)
  42 * hold on to mp+lock thru update of maps
  43 */
  44
  45#include <linux/fs.h>
  46#include <linux/vmalloc.h>
  47#include <linux/completion.h>
  48#include <linux/freezer.h>
  49#include <linux/module.h>
  50#include <linux/moduleparam.h>
  51#include <linux/kthread.h>
  52#include <linux/seq_file.h>
  53#include "jfs_incore.h"
  54#include "jfs_inode.h"
  55#include "jfs_filsys.h"
  56#include "jfs_metapage.h"
  57#include "jfs_dinode.h"
  58#include "jfs_imap.h"
  59#include "jfs_dmap.h"
  60#include "jfs_superblock.h"
  61#include "jfs_debug.h"
  62
  63/*
  64 *	transaction management structures
  65 */
  66static struct {
  67	int freetid;		/* index of a free tid structure */
  68	int freelock;		/* index first free lock word */
  69	wait_queue_head_t freewait;	/* eventlist of free tblock */
  70	wait_queue_head_t freelockwait;	/* eventlist of free tlock */
  71	wait_queue_head_t lowlockwait;	/* eventlist of ample tlocks */
  72	int tlocksInUse;	/* Number of tlocks in use */
  73	spinlock_t LazyLock;	/* synchronize sync_queue & unlock_queue */
  74/*	struct tblock *sync_queue; * Transactions waiting for data sync */
  75	struct list_head unlock_queue;	/* Txns waiting to be released */
  76	struct list_head anon_list;	/* inodes having anonymous txns */
  77	struct list_head anon_list2;	/* inodes having anonymous txns
  78					   that couldn't be sync'ed */
  79} TxAnchor;
  80
  81int jfs_tlocks_low;		/* Indicates low number of available tlocks */
  82
  83#ifdef CONFIG_JFS_STATISTICS
  84static struct {
  85	uint txBegin;
  86	uint txBegin_barrier;
  87	uint txBegin_lockslow;
  88	uint txBegin_freetid;
  89	uint txBeginAnon;
  90	uint txBeginAnon_barrier;
  91	uint txBeginAnon_lockslow;
  92	uint txLockAlloc;
  93	uint txLockAlloc_freelock;
  94} TxStat;
  95#endif
  96
  97static int nTxBlock = -1;	/* number of transaction blocks */
  98module_param(nTxBlock, int, 0);
  99MODULE_PARM_DESC(nTxBlock,
 100		 "Number of transaction blocks (max:65536)");
 101
 102static int nTxLock = -1;	/* number of transaction locks */
 103module_param(nTxLock, int, 0);
 104MODULE_PARM_DESC(nTxLock,
 105		 "Number of transaction locks (max:65536)");
 106
 107struct tblock *TxBlock;	/* transaction block table */
 108static int TxLockLWM;	/* Low water mark for number of txLocks used */
 109static int TxLockHWM;	/* High water mark for number of txLocks used */
 110static int TxLockVHWM;	/* Very High water mark */
 111struct tlock *TxLock;	/* transaction lock table */
 112
 113/*
 114 *	transaction management lock
 115 */
 116static DEFINE_SPINLOCK(jfsTxnLock);
 117
 118#define TXN_LOCK()		spin_lock(&jfsTxnLock)
 119#define TXN_UNLOCK()		spin_unlock(&jfsTxnLock)
 120
 121#define LAZY_LOCK_INIT()	spin_lock_init(&TxAnchor.LazyLock);
 122#define LAZY_LOCK(flags)	spin_lock_irqsave(&TxAnchor.LazyLock, flags)
 123#define LAZY_UNLOCK(flags) spin_unlock_irqrestore(&TxAnchor.LazyLock, flags)
 124
 125static DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait);
 126static int jfs_commit_thread_waking;
 127
 128/*
 129 * Retry logic exist outside these macros to protect from spurrious wakeups.
 130 */
 131static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event)
 132{
 133	DECLARE_WAITQUEUE(wait, current);
 134
 135	add_wait_queue(event, &wait);
 136	set_current_state(TASK_UNINTERRUPTIBLE);
 137	TXN_UNLOCK();
 138	io_schedule();
 139	__set_current_state(TASK_RUNNING);
 140	remove_wait_queue(event, &wait);
 141}
 142
 143#define TXN_SLEEP(event)\
 144{\
 145	TXN_SLEEP_DROP_LOCK(event);\
 146	TXN_LOCK();\
 147}
 148
 149#define TXN_WAKEUP(event) wake_up_all(event)
 150
 151/*
 152 *	statistics
 153 */
 154static struct {
 155	tid_t maxtid;		/* 4: biggest tid ever used */
 156	lid_t maxlid;		/* 4: biggest lid ever used */
 157	int ntid;		/* 4: # of transactions performed */
 158	int nlid;		/* 4: # of tlocks acquired */
 159	int waitlock;		/* 4: # of tlock wait */
 160} stattx;
 161
 162/*
 163 * forward references
 164 */
 165static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 166		struct tlock * tlck, struct commit * cd);
 167static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 168		struct tlock * tlck);
 169static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 170		struct tlock * tlck);
 171static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 172		struct tlock * tlck);
 173static void txAllocPMap(struct inode *ip, struct maplock * maplock,
 174		struct tblock * tblk);
 175static void txForce(struct tblock * tblk);
 176static int txLog(struct jfs_log * log, struct tblock * tblk,
 177		struct commit * cd);
 178static void txUpdateMap(struct tblock * tblk);
 179static void txRelease(struct tblock * tblk);
 180static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 181	   struct tlock * tlck);
 182static void LogSyncRelease(struct metapage * mp);
 183
 184/*
 185 *		transaction block/lock management
 186 *		---------------------------------
 187 */
 188
 189/*
 190 * Get a transaction lock from the free list.  If the number in use is
 191 * greater than the high water mark, wake up the sync daemon.  This should
 192 * free some anonymous transaction locks.  (TXN_LOCK must be held.)
 193 */
 194static lid_t txLockAlloc(void)
 195{
 196	lid_t lid;
 197
 198	INCREMENT(TxStat.txLockAlloc);
 199	if (!TxAnchor.freelock) {
 200		INCREMENT(TxStat.txLockAlloc_freelock);
 201	}
 202
 203	while (!(lid = TxAnchor.freelock))
 204		TXN_SLEEP(&TxAnchor.freelockwait);
 205	TxAnchor.freelock = TxLock[lid].next;
 206	HIGHWATERMARK(stattx.maxlid, lid);
 207	if ((++TxAnchor.tlocksInUse > TxLockHWM) && (jfs_tlocks_low == 0)) {
 208		jfs_info("txLockAlloc tlocks low");
 209		jfs_tlocks_low = 1;
 210		wake_up_process(jfsSyncThread);
 211	}
 212
 213	return lid;
 214}
 215
 216static void txLockFree(lid_t lid)
 217{
 218	TxLock[lid].tid = 0;
 219	TxLock[lid].next = TxAnchor.freelock;
 220	TxAnchor.freelock = lid;
 221	TxAnchor.tlocksInUse--;
 222	if (jfs_tlocks_low && (TxAnchor.tlocksInUse < TxLockLWM)) {
 223		jfs_info("txLockFree jfs_tlocks_low no more");
 224		jfs_tlocks_low = 0;
 225		TXN_WAKEUP(&TxAnchor.lowlockwait);
 226	}
 227	TXN_WAKEUP(&TxAnchor.freelockwait);
 228}
 229
 230/*
 231 * NAME:	txInit()
 232 *
 233 * FUNCTION:	initialize transaction management structures
 234 *
 235 * RETURN:
 236 *
 237 * serialization: single thread at jfs_init()
 238 */
 239int txInit(void)
 240{
 241	int k, size;
 242	struct sysinfo si;
 243
 244	/* Set defaults for nTxLock and nTxBlock if unset */
 245
 246	if (nTxLock == -1) {
 247		if (nTxBlock == -1) {
 248			/* Base default on memory size */
 249			si_meminfo(&si);
 250			if (si.totalram > (256 * 1024)) /* 1 GB */
 251				nTxLock = 64 * 1024;
 252			else
 253				nTxLock = si.totalram >> 2;
 254		} else if (nTxBlock > (8 * 1024))
 255			nTxLock = 64 * 1024;
 256		else
 257			nTxLock = nTxBlock << 3;
 258	}
 259	if (nTxBlock == -1)
 260		nTxBlock = nTxLock >> 3;
 261
 262	/* Verify tunable parameters */
 263	if (nTxBlock < 16)
 264		nTxBlock = 16;	/* No one should set it this low */
 265	if (nTxBlock > 65536)
 266		nTxBlock = 65536;
 267	if (nTxLock < 256)
 268		nTxLock = 256;	/* No one should set it this low */
 269	if (nTxLock > 65536)
 270		nTxLock = 65536;
 271
 272	printk(KERN_INFO "JFS: nTxBlock = %d, nTxLock = %d\n",
 273	       nTxBlock, nTxLock);
 274	/*
 275	 * initialize transaction block (tblock) table
 276	 *
 277	 * transaction id (tid) = tblock index
 278	 * tid = 0 is reserved.
 279	 */
 280	TxLockLWM = (nTxLock * 4) / 10;
 281	TxLockHWM = (nTxLock * 7) / 10;
 282	TxLockVHWM = (nTxLock * 8) / 10;
 283
 284	size = sizeof(struct tblock) * nTxBlock;
 285	TxBlock = vmalloc(size);
 286	if (TxBlock == NULL)
 287		return -ENOMEM;
 288
 289	for (k = 1; k < nTxBlock - 1; k++) {
 290		TxBlock[k].next = k + 1;
 291		init_waitqueue_head(&TxBlock[k].gcwait);
 292		init_waitqueue_head(&TxBlock[k].waitor);
 293	}
 294	TxBlock[k].next = 0;
 295	init_waitqueue_head(&TxBlock[k].gcwait);
 296	init_waitqueue_head(&TxBlock[k].waitor);
 297
 298	TxAnchor.freetid = 1;
 299	init_waitqueue_head(&TxAnchor.freewait);
 300
 301	stattx.maxtid = 1;	/* statistics */
 302
 303	/*
 304	 * initialize transaction lock (tlock) table
 305	 *
 306	 * transaction lock id = tlock index
 307	 * tlock id = 0 is reserved.
 308	 */
 309	size = sizeof(struct tlock) * nTxLock;
 310	TxLock = vmalloc(size);
 311	if (TxLock == NULL) {
 312		vfree(TxBlock);
 313		return -ENOMEM;
 314	}
 315
 316	/* initialize tlock table */
 317	for (k = 1; k < nTxLock - 1; k++)
 318		TxLock[k].next = k + 1;
 319	TxLock[k].next = 0;
 320	init_waitqueue_head(&TxAnchor.freelockwait);
 321	init_waitqueue_head(&TxAnchor.lowlockwait);
 322
 323	TxAnchor.freelock = 1;
 324	TxAnchor.tlocksInUse = 0;
 325	INIT_LIST_HEAD(&TxAnchor.anon_list);
 326	INIT_LIST_HEAD(&TxAnchor.anon_list2);
 327
 328	LAZY_LOCK_INIT();
 329	INIT_LIST_HEAD(&TxAnchor.unlock_queue);
 330
 331	stattx.maxlid = 1;	/* statistics */
 332
 333	return 0;
 334}
 335
 336/*
 337 * NAME:	txExit()
 338 *
 339 * FUNCTION:	clean up when module is unloaded
 340 */
 341void txExit(void)
 342{
 343	vfree(TxLock);
 344	TxLock = NULL;
 345	vfree(TxBlock);
 346	TxBlock = NULL;
 347}
 348
 349/*
 350 * NAME:	txBegin()
 351 *
 352 * FUNCTION:	start a transaction.
 353 *
 354 * PARAMETER:	sb	- superblock
 355 *		flag	- force for nested tx;
 356 *
 357 * RETURN:	tid	- transaction id
 358 *
 359 * note: flag force allows to start tx for nested tx
 360 * to prevent deadlock on logsync barrier;
 361 */
 362tid_t txBegin(struct super_block *sb, int flag)
 363{
 364	tid_t t;
 365	struct tblock *tblk;
 366	struct jfs_log *log;
 367
 368	jfs_info("txBegin: flag = 0x%x", flag);
 369	log = JFS_SBI(sb)->log;
 370
 
 
 
 
 
 371	TXN_LOCK();
 372
 373	INCREMENT(TxStat.txBegin);
 374
 375      retry:
 376	if (!(flag & COMMIT_FORCE)) {
 377		/*
 378		 * synchronize with logsync barrier
 379		 */
 380		if (test_bit(log_SYNCBARRIER, &log->flag) ||
 381		    test_bit(log_QUIESCE, &log->flag)) {
 382			INCREMENT(TxStat.txBegin_barrier);
 383			TXN_SLEEP(&log->syncwait);
 384			goto retry;
 385		}
 386	}
 387	if (flag == 0) {
 388		/*
 389		 * Don't begin transaction if we're getting starved for tlocks
 390		 * unless COMMIT_FORCE or COMMIT_INODE (which may ultimately
 391		 * free tlocks)
 392		 */
 393		if (TxAnchor.tlocksInUse > TxLockVHWM) {
 394			INCREMENT(TxStat.txBegin_lockslow);
 395			TXN_SLEEP(&TxAnchor.lowlockwait);
 396			goto retry;
 397		}
 398	}
 399
 400	/*
 401	 * allocate transaction id/block
 402	 */
 403	if ((t = TxAnchor.freetid) == 0) {
 404		jfs_info("txBegin: waiting for free tid");
 405		INCREMENT(TxStat.txBegin_freetid);
 406		TXN_SLEEP(&TxAnchor.freewait);
 407		goto retry;
 408	}
 409
 410	tblk = tid_to_tblock(t);
 411
 412	if ((tblk->next == 0) && !(flag & COMMIT_FORCE)) {
 413		/* Don't let a non-forced transaction take the last tblk */
 414		jfs_info("txBegin: waiting for free tid");
 415		INCREMENT(TxStat.txBegin_freetid);
 416		TXN_SLEEP(&TxAnchor.freewait);
 417		goto retry;
 418	}
 419
 420	TxAnchor.freetid = tblk->next;
 421
 422	/*
 423	 * initialize transaction
 424	 */
 425
 426	/*
 427	 * We can't zero the whole thing or we screw up another thread being
 428	 * awakened after sleeping on tblk->waitor
 429	 *
 430	 * memset(tblk, 0, sizeof(struct tblock));
 431	 */
 432	tblk->next = tblk->last = tblk->xflag = tblk->flag = tblk->lsn = 0;
 433
 434	tblk->sb = sb;
 435	++log->logtid;
 436	tblk->logtid = log->logtid;
 437
 438	++log->active;
 439
 440	HIGHWATERMARK(stattx.maxtid, t);	/* statistics */
 441	INCREMENT(stattx.ntid);	/* statistics */
 442
 443	TXN_UNLOCK();
 444
 445	jfs_info("txBegin: returning tid = %d", t);
 446
 447	return t;
 448}
 449
 450/*
 451 * NAME:	txBeginAnon()
 452 *
 453 * FUNCTION:	start an anonymous transaction.
 454 *		Blocks if logsync or available tlocks are low to prevent
 455 *		anonymous tlocks from depleting supply.
 456 *
 457 * PARAMETER:	sb	- superblock
 458 *
 459 * RETURN:	none
 460 */
 461void txBeginAnon(struct super_block *sb)
 462{
 463	struct jfs_log *log;
 464
 465	log = JFS_SBI(sb)->log;
 466
 467	TXN_LOCK();
 468	INCREMENT(TxStat.txBeginAnon);
 469
 470      retry:
 471	/*
 472	 * synchronize with logsync barrier
 473	 */
 474	if (test_bit(log_SYNCBARRIER, &log->flag) ||
 475	    test_bit(log_QUIESCE, &log->flag)) {
 476		INCREMENT(TxStat.txBeginAnon_barrier);
 477		TXN_SLEEP(&log->syncwait);
 478		goto retry;
 479	}
 480
 481	/*
 482	 * Don't begin transaction if we're getting starved for tlocks
 483	 */
 484	if (TxAnchor.tlocksInUse > TxLockVHWM) {
 485		INCREMENT(TxStat.txBeginAnon_lockslow);
 486		TXN_SLEEP(&TxAnchor.lowlockwait);
 487		goto retry;
 488	}
 489	TXN_UNLOCK();
 490}
 491
 492/*
 493 *	txEnd()
 494 *
 495 * function: free specified transaction block.
 496 *
 497 *	logsync barrier processing:
 498 *
 499 * serialization:
 500 */
 501void txEnd(tid_t tid)
 502{
 503	struct tblock *tblk = tid_to_tblock(tid);
 504	struct jfs_log *log;
 505
 506	jfs_info("txEnd: tid = %d", tid);
 507	TXN_LOCK();
 508
 509	/*
 510	 * wakeup transactions waiting on the page locked
 511	 * by the current transaction
 512	 */
 513	TXN_WAKEUP(&tblk->waitor);
 514
 515	log = JFS_SBI(tblk->sb)->log;
 516
 517	/*
 518	 * Lazy commit thread can't free this guy until we mark it UNLOCKED,
 519	 * otherwise, we would be left with a transaction that may have been
 520	 * reused.
 521	 *
 522	 * Lazy commit thread will turn off tblkGC_LAZY before calling this
 523	 * routine.
 524	 */
 525	if (tblk->flag & tblkGC_LAZY) {
 526		jfs_info("txEnd called w/lazy tid: %d, tblk = 0x%p", tid, tblk);
 527		TXN_UNLOCK();
 528
 529		spin_lock_irq(&log->gclock);	// LOGGC_LOCK
 530		tblk->flag |= tblkGC_UNLOCKED;
 531		spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
 532		return;
 533	}
 534
 535	jfs_info("txEnd: tid: %d, tblk = 0x%p", tid, tblk);
 536
 537	assert(tblk->next == 0);
 538
 539	/*
 540	 * insert tblock back on freelist
 541	 */
 542	tblk->next = TxAnchor.freetid;
 543	TxAnchor.freetid = tid;
 544
 545	/*
 546	 * mark the tblock not active
 547	 */
 548	if (--log->active == 0) {
 549		clear_bit(log_FLUSH, &log->flag);
 550
 551		/*
 552		 * synchronize with logsync barrier
 553		 */
 554		if (test_bit(log_SYNCBARRIER, &log->flag)) {
 555			TXN_UNLOCK();
 556
 557			/* write dirty metadata & forward log syncpt */
 558			jfs_syncpt(log, 1);
 559
 560			jfs_info("log barrier off: 0x%x", log->lsn);
 561
 562			/* enable new transactions start */
 563			clear_bit(log_SYNCBARRIER, &log->flag);
 564
 565			/* wakeup all waitors for logsync barrier */
 566			TXN_WAKEUP(&log->syncwait);
 567
 568			goto wakeup;
 569		}
 570	}
 571
 572	TXN_UNLOCK();
 573wakeup:
 574	/*
 575	 * wakeup all waitors for a free tblock
 576	 */
 577	TXN_WAKEUP(&TxAnchor.freewait);
 578}
 579
 580/*
 581 *	txLock()
 582 *
 583 * function: acquire a transaction lock on the specified <mp>
 584 *
 585 * parameter:
 586 *
 587 * return:	transaction lock id
 588 *
 589 * serialization:
 590 */
 591struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
 592		     int type)
 593{
 594	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 595	int dir_xtree = 0;
 596	lid_t lid;
 597	tid_t xtid;
 598	struct tlock *tlck;
 599	struct xtlock *xtlck;
 600	struct linelock *linelock;
 601	xtpage_t *p;
 602	struct tblock *tblk;
 603
 604	TXN_LOCK();
 605
 606	if (S_ISDIR(ip->i_mode) && (type & tlckXTREE) &&
 607	    !(mp->xflag & COMMIT_PAGE)) {
 608		/*
 609		 * Directory inode is special.  It can have both an xtree tlock
 610		 * and a dtree tlock associated with it.
 611		 */
 612		dir_xtree = 1;
 613		lid = jfs_ip->xtlid;
 614	} else
 615		lid = mp->lid;
 616
 617	/* is page not locked by a transaction ? */
 618	if (lid == 0)
 619		goto allocateLock;
 620
 621	jfs_info("txLock: tid:%d ip:0x%p mp:0x%p lid:%d", tid, ip, mp, lid);
 622
 623	/* is page locked by the requester transaction ? */
 624	tlck = lid_to_tlock(lid);
 625	if ((xtid = tlck->tid) == tid) {
 626		TXN_UNLOCK();
 627		goto grantLock;
 628	}
 629
 630	/*
 631	 * is page locked by anonymous transaction/lock ?
 632	 *
 633	 * (page update without transaction (i.e., file write) is
 634	 * locked under anonymous transaction tid = 0:
 635	 * anonymous tlocks maintained on anonymous tlock list of
 636	 * the inode of the page and available to all anonymous
 637	 * transactions until txCommit() time at which point
 638	 * they are transferred to the transaction tlock list of
 639	 * the committing transaction of the inode)
 640	 */
 641	if (xtid == 0) {
 642		tlck->tid = tid;
 643		TXN_UNLOCK();
 644		tblk = tid_to_tblock(tid);
 645		/*
 646		 * The order of the tlocks in the transaction is important
 647		 * (during truncate, child xtree pages must be freed before
 648		 * parent's tlocks change the working map).
 649		 * Take tlock off anonymous list and add to tail of
 650		 * transaction list
 651		 *
 652		 * Note:  We really need to get rid of the tid & lid and
 653		 * use list_head's.  This code is getting UGLY!
 654		 */
 655		if (jfs_ip->atlhead == lid) {
 656			if (jfs_ip->atltail == lid) {
 657				/* only anonymous txn.
 658				 * Remove from anon_list
 659				 */
 660				TXN_LOCK();
 661				list_del_init(&jfs_ip->anon_inode_list);
 662				TXN_UNLOCK();
 663			}
 664			jfs_ip->atlhead = tlck->next;
 665		} else {
 666			lid_t last;
 667			for (last = jfs_ip->atlhead;
 668			     lid_to_tlock(last)->next != lid;
 669			     last = lid_to_tlock(last)->next) {
 670				assert(last);
 671			}
 672			lid_to_tlock(last)->next = tlck->next;
 673			if (jfs_ip->atltail == lid)
 674				jfs_ip->atltail = last;
 675		}
 676
 677		/* insert the tlock at tail of transaction tlock list */
 678
 679		if (tblk->next)
 680			lid_to_tlock(tblk->last)->next = lid;
 681		else
 682			tblk->next = lid;
 683		tlck->next = 0;
 684		tblk->last = lid;
 685
 686		goto grantLock;
 687	}
 688
 689	goto waitLock;
 690
 691	/*
 692	 * allocate a tlock
 693	 */
 694      allocateLock:
 695	lid = txLockAlloc();
 696	tlck = lid_to_tlock(lid);
 697
 698	/*
 699	 * initialize tlock
 700	 */
 701	tlck->tid = tid;
 702
 703	TXN_UNLOCK();
 704
 705	/* mark tlock for meta-data page */
 706	if (mp->xflag & COMMIT_PAGE) {
 707
 708		tlck->flag = tlckPAGELOCK;
 709
 710		/* mark the page dirty and nohomeok */
 711		metapage_nohomeok(mp);
 712
 713		jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p",
 714			 mp, mp->nohomeok, tid, tlck);
 715
 716		/* if anonymous transaction, and buffer is on the group
 717		 * commit synclist, mark inode to show this.  This will
 718		 * prevent the buffer from being marked nohomeok for too
 719		 * long a time.
 720		 */
 721		if ((tid == 0) && mp->lsn)
 722			set_cflag(COMMIT_Synclist, ip);
 723	}
 724	/* mark tlock for in-memory inode */
 725	else
 726		tlck->flag = tlckINODELOCK;
 727
 728	if (S_ISDIR(ip->i_mode))
 729		tlck->flag |= tlckDIRECTORY;
 730
 731	tlck->type = 0;
 732
 733	/* bind the tlock and the page */
 734	tlck->ip = ip;
 735	tlck->mp = mp;
 736	if (dir_xtree)
 737		jfs_ip->xtlid = lid;
 738	else
 739		mp->lid = lid;
 740
 741	/*
 742	 * enqueue transaction lock to transaction/inode
 743	 */
 744	/* insert the tlock at tail of transaction tlock list */
 745	if (tid) {
 746		tblk = tid_to_tblock(tid);
 747		if (tblk->next)
 748			lid_to_tlock(tblk->last)->next = lid;
 749		else
 750			tblk->next = lid;
 751		tlck->next = 0;
 752		tblk->last = lid;
 753	}
 754	/* anonymous transaction:
 755	 * insert the tlock at head of inode anonymous tlock list
 756	 */
 757	else {
 758		tlck->next = jfs_ip->atlhead;
 759		jfs_ip->atlhead = lid;
 760		if (tlck->next == 0) {
 761			/* This inode's first anonymous transaction */
 762			jfs_ip->atltail = lid;
 763			TXN_LOCK();
 764			list_add_tail(&jfs_ip->anon_inode_list,
 765				      &TxAnchor.anon_list);
 766			TXN_UNLOCK();
 767		}
 768	}
 769
 770	/* initialize type dependent area for linelock */
 771	linelock = (struct linelock *) & tlck->lock;
 772	linelock->next = 0;
 773	linelock->flag = tlckLINELOCK;
 774	linelock->maxcnt = TLOCKSHORT;
 775	linelock->index = 0;
 776
 777	switch (type & tlckTYPE) {
 778	case tlckDTREE:
 779		linelock->l2linesize = L2DTSLOTSIZE;
 780		break;
 781
 782	case tlckXTREE:
 783		linelock->l2linesize = L2XTSLOTSIZE;
 784
 785		xtlck = (struct xtlock *) linelock;
 786		xtlck->header.offset = 0;
 787		xtlck->header.length = 2;
 788
 789		if (type & tlckNEW) {
 790			xtlck->lwm.offset = XTENTRYSTART;
 791		} else {
 792			if (mp->xflag & COMMIT_PAGE)
 793				p = (xtpage_t *) mp->data;
 794			else
 795				p = &jfs_ip->i_xtroot;
 796			xtlck->lwm.offset =
 797			    le16_to_cpu(p->header.nextindex);
 798		}
 799		xtlck->lwm.length = 0;	/* ! */
 800		xtlck->twm.offset = 0;
 801		xtlck->hwm.offset = 0;
 802
 803		xtlck->index = 2;
 804		break;
 805
 806	case tlckINODE:
 807		linelock->l2linesize = L2INODESLOTSIZE;
 808		break;
 809
 810	case tlckDATA:
 811		linelock->l2linesize = L2DATASLOTSIZE;
 812		break;
 813
 814	default:
 815		jfs_err("UFO tlock:0x%p", tlck);
 816	}
 817
 818	/*
 819	 * update tlock vector
 820	 */
 821      grantLock:
 822	tlck->type |= type;
 823
 824	return tlck;
 825
 826	/*
 827	 * page is being locked by another transaction:
 828	 */
 829      waitLock:
 830	/* Only locks on ipimap or ipaimap should reach here */
 831	/* assert(jfs_ip->fileset == AGGREGATE_I); */
 832	if (jfs_ip->fileset != AGGREGATE_I) {
 833		printk(KERN_ERR "txLock: trying to lock locked page!");
 834		print_hex_dump(KERN_ERR, "ip: ", DUMP_PREFIX_ADDRESS, 16, 4,
 835			       ip, sizeof(*ip), 0);
 836		print_hex_dump(KERN_ERR, "mp: ", DUMP_PREFIX_ADDRESS, 16, 4,
 837			       mp, sizeof(*mp), 0);
 838		print_hex_dump(KERN_ERR, "Locker's tblock: ",
 839			       DUMP_PREFIX_ADDRESS, 16, 4, tid_to_tblock(tid),
 840			       sizeof(struct tblock), 0);
 841		print_hex_dump(KERN_ERR, "Tlock: ", DUMP_PREFIX_ADDRESS, 16, 4,
 842			       tlck, sizeof(*tlck), 0);
 843		BUG();
 844	}
 845	INCREMENT(stattx.waitlock);	/* statistics */
 846	TXN_UNLOCK();
 847	release_metapage(mp);
 848	TXN_LOCK();
 849	xtid = tlck->tid;	/* reacquire after dropping TXN_LOCK */
 850
 851	jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d",
 852		 tid, xtid, lid);
 853
 854	/* Recheck everything since dropping TXN_LOCK */
 855	if (xtid && (tlck->mp == mp) && (mp->lid == lid))
 856		TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor);
 857	else
 858		TXN_UNLOCK();
 859	jfs_info("txLock: awakened     tid = %d, lid = %d", tid, lid);
 860
 861	return NULL;
 862}
 863
 864/*
 865 * NAME:	txRelease()
 866 *
 867 * FUNCTION:	Release buffers associated with transaction locks, but don't
 868 *		mark homeok yet.  The allows other transactions to modify
 869 *		buffers, but won't let them go to disk until commit record
 870 *		actually gets written.
 871 *
 872 * PARAMETER:
 873 *		tblk	-
 874 *
 875 * RETURN:	Errors from subroutines.
 876 */
 877static void txRelease(struct tblock * tblk)
 878{
 879	struct metapage *mp;
 880	lid_t lid;
 881	struct tlock *tlck;
 882
 883	TXN_LOCK();
 884
 885	for (lid = tblk->next; lid; lid = tlck->next) {
 886		tlck = lid_to_tlock(lid);
 887		if ((mp = tlck->mp) != NULL &&
 888		    (tlck->type & tlckBTROOT) == 0) {
 889			assert(mp->xflag & COMMIT_PAGE);
 890			mp->lid = 0;
 891		}
 892	}
 893
 894	/*
 895	 * wakeup transactions waiting on a page locked
 896	 * by the current transaction
 897	 */
 898	TXN_WAKEUP(&tblk->waitor);
 899
 900	TXN_UNLOCK();
 901}
 902
 903/*
 904 * NAME:	txUnlock()
 905 *
 906 * FUNCTION:	Initiates pageout of pages modified by tid in journalled
 907 *		objects and frees their lockwords.
 908 */
 909static void txUnlock(struct tblock * tblk)
 910{
 911	struct tlock *tlck;
 912	struct linelock *linelock;
 913	lid_t lid, next, llid, k;
 914	struct metapage *mp;
 915	struct jfs_log *log;
 916	int difft, diffp;
 917	unsigned long flags;
 918
 919	jfs_info("txUnlock: tblk = 0x%p", tblk);
 920	log = JFS_SBI(tblk->sb)->log;
 921
 922	/*
 923	 * mark page under tlock homeok (its log has been written):
 924	 */
 925	for (lid = tblk->next; lid; lid = next) {
 926		tlck = lid_to_tlock(lid);
 927		next = tlck->next;
 928
 929		jfs_info("unlocking lid = %d, tlck = 0x%p", lid, tlck);
 930
 931		/* unbind page from tlock */
 932		if ((mp = tlck->mp) != NULL &&
 933		    (tlck->type & tlckBTROOT) == 0) {
 934			assert(mp->xflag & COMMIT_PAGE);
 935
 936			/* hold buffer
 937			 */
 938			hold_metapage(mp);
 939
 940			assert(mp->nohomeok > 0);
 941			_metapage_homeok(mp);
 942
 943			/* inherit younger/larger clsn */
 944			LOGSYNC_LOCK(log, flags);
 945			if (mp->clsn) {
 946				logdiff(difft, tblk->clsn, log);
 947				logdiff(diffp, mp->clsn, log);
 948				if (difft > diffp)
 949					mp->clsn = tblk->clsn;
 950			} else
 951				mp->clsn = tblk->clsn;
 952			LOGSYNC_UNLOCK(log, flags);
 953
 954			assert(!(tlck->flag & tlckFREEPAGE));
 955
 956			put_metapage(mp);
 957		}
 958
 959		/* insert tlock, and linelock(s) of the tlock if any,
 960		 * at head of freelist
 961		 */
 962		TXN_LOCK();
 963
 964		llid = ((struct linelock *) & tlck->lock)->next;
 965		while (llid) {
 966			linelock = (struct linelock *) lid_to_tlock(llid);
 967			k = linelock->next;
 968			txLockFree(llid);
 969			llid = k;
 970		}
 971		txLockFree(lid);
 972
 973		TXN_UNLOCK();
 974	}
 975	tblk->next = tblk->last = 0;
 976
 977	/*
 978	 * remove tblock from logsynclist
 979	 * (allocation map pages inherited lsn of tblk and
 980	 * has been inserted in logsync list at txUpdateMap())
 981	 */
 982	if (tblk->lsn) {
 983		LOGSYNC_LOCK(log, flags);
 984		log->count--;
 985		list_del(&tblk->synclist);
 986		LOGSYNC_UNLOCK(log, flags);
 987	}
 988}
 989
 990/*
 991 *	txMaplock()
 992 *
 993 * function: allocate a transaction lock for freed page/entry;
 994 *	for freed page, maplock is used as xtlock/dtlock type;
 995 */
 996struct tlock *txMaplock(tid_t tid, struct inode *ip, int type)
 997{
 998	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 999	lid_t lid;
1000	struct tblock *tblk;
1001	struct tlock *tlck;
1002	struct maplock *maplock;
1003
1004	TXN_LOCK();
1005
1006	/*
1007	 * allocate a tlock
1008	 */
1009	lid = txLockAlloc();
1010	tlck = lid_to_tlock(lid);
1011
1012	/*
1013	 * initialize tlock
1014	 */
1015	tlck->tid = tid;
1016
1017	/* bind the tlock and the object */
1018	tlck->flag = tlckINODELOCK;
1019	if (S_ISDIR(ip->i_mode))
1020		tlck->flag |= tlckDIRECTORY;
1021	tlck->ip = ip;
1022	tlck->mp = NULL;
1023
1024	tlck->type = type;
1025
1026	/*
1027	 * enqueue transaction lock to transaction/inode
1028	 */
1029	/* insert the tlock at tail of transaction tlock list */
1030	if (tid) {
1031		tblk = tid_to_tblock(tid);
1032		if (tblk->next)
1033			lid_to_tlock(tblk->last)->next = lid;
1034		else
1035			tblk->next = lid;
1036		tlck->next = 0;
1037		tblk->last = lid;
1038	}
1039	/* anonymous transaction:
1040	 * insert the tlock at head of inode anonymous tlock list
1041	 */
1042	else {
1043		tlck->next = jfs_ip->atlhead;
1044		jfs_ip->atlhead = lid;
1045		if (tlck->next == 0) {
1046			/* This inode's first anonymous transaction */
1047			jfs_ip->atltail = lid;
1048			list_add_tail(&jfs_ip->anon_inode_list,
1049				      &TxAnchor.anon_list);
1050		}
1051	}
1052
1053	TXN_UNLOCK();
1054
1055	/* initialize type dependent area for maplock */
1056	maplock = (struct maplock *) & tlck->lock;
1057	maplock->next = 0;
1058	maplock->maxcnt = 0;
1059	maplock->index = 0;
1060
1061	return tlck;
1062}
1063
1064/*
1065 *	txLinelock()
1066 *
1067 * function: allocate a transaction lock for log vector list
1068 */
1069struct linelock *txLinelock(struct linelock * tlock)
1070{
1071	lid_t lid;
1072	struct tlock *tlck;
1073	struct linelock *linelock;
1074
1075	TXN_LOCK();
1076
1077	/* allocate a TxLock structure */
1078	lid = txLockAlloc();
1079	tlck = lid_to_tlock(lid);
1080
1081	TXN_UNLOCK();
1082
1083	/* initialize linelock */
1084	linelock = (struct linelock *) tlck;
1085	linelock->next = 0;
1086	linelock->flag = tlckLINELOCK;
1087	linelock->maxcnt = TLOCKLONG;
1088	linelock->index = 0;
1089	if (tlck->flag & tlckDIRECTORY)
1090		linelock->flag |= tlckDIRECTORY;
1091
1092	/* append linelock after tlock */
1093	linelock->next = tlock->next;
1094	tlock->next = lid;
1095
1096	return linelock;
1097}
1098
1099/*
1100 *		transaction commit management
1101 *		-----------------------------
1102 */
1103
1104/*
1105 * NAME:	txCommit()
1106 *
1107 * FUNCTION:	commit the changes to the objects specified in
1108 *		clist.  For journalled segments only the
1109 *		changes of the caller are committed, ie by tid.
1110 *		for non-journalled segments the data are flushed to
1111 *		disk and then the change to the disk inode and indirect
1112 *		blocks committed (so blocks newly allocated to the
1113 *		segment will be made a part of the segment atomically).
1114 *
1115 *		all of the segments specified in clist must be in
1116 *		one file system. no more than 6 segments are needed
1117 *		to handle all unix svcs.
1118 *
1119 *		if the i_nlink field (i.e. disk inode link count)
1120 *		is zero, and the type of inode is a regular file or
1121 *		directory, or symbolic link , the inode is truncated
1122 *		to zero length. the truncation is committed but the
1123 *		VM resources are unaffected until it is closed (see
1124 *		iput and iclose).
1125 *
1126 * PARAMETER:
1127 *
1128 * RETURN:
1129 *
1130 * serialization:
1131 *		on entry the inode lock on each segment is assumed
1132 *		to be held.
1133 *
1134 * i/o error:
1135 */
1136int txCommit(tid_t tid,		/* transaction identifier */
1137	     int nip,		/* number of inodes to commit */
1138	     struct inode **iplist,	/* list of inode to commit */
1139	     int flag)
1140{
1141	int rc = 0;
1142	struct commit cd;
1143	struct jfs_log *log;
1144	struct tblock *tblk;
1145	struct lrd *lrd;
1146	struct inode *ip;
1147	struct jfs_inode_info *jfs_ip;
1148	int k, n;
1149	ino_t top;
1150	struct super_block *sb;
1151
1152	jfs_info("txCommit, tid = %d, flag = %d", tid, flag);
1153	/* is read-only file system ? */
1154	if (isReadOnly(iplist[0])) {
1155		rc = -EROFS;
1156		goto TheEnd;
1157	}
1158
1159	sb = cd.sb = iplist[0]->i_sb;
1160	cd.tid = tid;
1161
1162	if (tid == 0)
1163		tid = txBegin(sb, 0);
1164	tblk = tid_to_tblock(tid);
1165
1166	/*
1167	 * initialize commit structure
1168	 */
1169	log = JFS_SBI(sb)->log;
1170	cd.log = log;
1171
1172	/* initialize log record descriptor in commit */
1173	lrd = &cd.lrd;
1174	lrd->logtid = cpu_to_le32(tblk->logtid);
1175	lrd->backchain = 0;
1176
1177	tblk->xflag |= flag;
1178
1179	if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0)
1180		tblk->xflag |= COMMIT_LAZY;
1181	/*
1182	 *	prepare non-journaled objects for commit
1183	 *
1184	 * flush data pages of non-journaled file
1185	 * to prevent the file getting non-initialized disk blocks
1186	 * in case of crash.
1187	 * (new blocks - )
1188	 */
1189	cd.iplist = iplist;
1190	cd.nip = nip;
1191
1192	/*
1193	 *	acquire transaction lock on (on-disk) inodes
1194	 *
1195	 * update on-disk inode from in-memory inode
1196	 * acquiring transaction locks for AFTER records
1197	 * on the on-disk inode of file object
1198	 *
1199	 * sort the inodes array by inode number in descending order
1200	 * to prevent deadlock when acquiring transaction lock
1201	 * of on-disk inodes on multiple on-disk inode pages by
1202	 * multiple concurrent transactions
1203	 */
1204	for (k = 0; k < cd.nip; k++) {
1205		top = (cd.iplist[k])->i_ino;
1206		for (n = k + 1; n < cd.nip; n++) {
1207			ip = cd.iplist[n];
1208			if (ip->i_ino > top) {
1209				top = ip->i_ino;
1210				cd.iplist[n] = cd.iplist[k];
1211				cd.iplist[k] = ip;
1212			}
1213		}
1214
1215		ip = cd.iplist[k];
1216		jfs_ip = JFS_IP(ip);
1217
1218		/*
1219		 * BUGBUG - This code has temporarily been removed.  The
1220		 * intent is to ensure that any file data is written before
1221		 * the metadata is committed to the journal.  This prevents
1222		 * uninitialized data from appearing in a file after the
1223		 * journal has been replayed.  (The uninitialized data
1224		 * could be sensitive data removed by another user.)
1225		 *
1226		 * The problem now is that we are holding the IWRITELOCK
1227		 * on the inode, and calling filemap_fdatawrite on an
1228		 * unmapped page will cause a deadlock in jfs_get_block.
1229		 *
1230		 * The long term solution is to pare down the use of
1231		 * IWRITELOCK.  We are currently holding it too long.
1232		 * We could also be smarter about which data pages need
1233		 * to be written before the transaction is committed and
1234		 * when we don't need to worry about it at all.
1235		 *
1236		 * if ((!S_ISDIR(ip->i_mode))
1237		 *    && (tblk->flag & COMMIT_DELETE) == 0)
1238		 *	filemap_write_and_wait(ip->i_mapping);
1239		 */
1240
1241		/*
1242		 * Mark inode as not dirty.  It will still be on the dirty
1243		 * inode list, but we'll know not to commit it again unless
1244		 * it gets marked dirty again
1245		 */
1246		clear_cflag(COMMIT_Dirty, ip);
1247
1248		/* inherit anonymous tlock(s) of inode */
1249		if (jfs_ip->atlhead) {
1250			lid_to_tlock(jfs_ip->atltail)->next = tblk->next;
1251			tblk->next = jfs_ip->atlhead;
1252			if (!tblk->last)
1253				tblk->last = jfs_ip->atltail;
1254			jfs_ip->atlhead = jfs_ip->atltail = 0;
1255			TXN_LOCK();
1256			list_del_init(&jfs_ip->anon_inode_list);
1257			TXN_UNLOCK();
1258		}
1259
1260		/*
1261		 * acquire transaction lock on on-disk inode page
1262		 * (become first tlock of the tblk's tlock list)
1263		 */
1264		if (((rc = diWrite(tid, ip))))
1265			goto out;
1266	}
1267
1268	/*
1269	 *	write log records from transaction locks
1270	 *
1271	 * txUpdateMap() resets XAD_NEW in XAD.
1272	 */
1273	if ((rc = txLog(log, tblk, &cd)))
1274		goto TheEnd;
1275
1276	/*
1277	 * Ensure that inode isn't reused before
1278	 * lazy commit thread finishes processing
1279	 */
1280	if (tblk->xflag & COMMIT_DELETE) {
1281		ihold(tblk->u.ip);
1282		/*
1283		 * Avoid a rare deadlock
1284		 *
1285		 * If the inode is locked, we may be blocked in
1286		 * jfs_commit_inode.  If so, we don't want the
1287		 * lazy_commit thread doing the last iput() on the inode
1288		 * since that may block on the locked inode.  Instead,
1289		 * commit the transaction synchronously, so the last iput
1290		 * will be done by the calling thread (or later)
1291		 */
1292		/*
1293		 * I believe this code is no longer needed.  Splitting I_LOCK
1294		 * into two bits, I_NEW and I_SYNC should prevent this
1295		 * deadlock as well.  But since I don't have a JFS testload
1296		 * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done.
1297		 * Joern
1298		 */
1299		if (tblk->u.ip->i_state & I_SYNC)
1300			tblk->xflag &= ~COMMIT_LAZY;
1301	}
1302
1303	ASSERT((!(tblk->xflag & COMMIT_DELETE)) ||
1304	       ((tblk->u.ip->i_nlink == 0) &&
1305		!test_cflag(COMMIT_Nolink, tblk->u.ip)));
1306
1307	/*
1308	 *	write COMMIT log record
1309	 */
1310	lrd->type = cpu_to_le16(LOG_COMMIT);
1311	lrd->length = 0;
1312	lmLog(log, tblk, lrd, NULL);
1313
1314	lmGroupCommit(log, tblk);
1315
1316	/*
1317	 *	- transaction is now committed -
1318	 */
1319
1320	/*
1321	 * force pages in careful update
1322	 * (imap addressing structure update)
1323	 */
1324	if (flag & COMMIT_FORCE)
1325		txForce(tblk);
1326
1327	/*
1328	 *	update allocation map.
1329	 *
1330	 * update inode allocation map and inode:
1331	 * free pager lock on memory object of inode if any.
1332	 * update block allocation map.
1333	 *
1334	 * txUpdateMap() resets XAD_NEW in XAD.
1335	 */
1336	if (tblk->xflag & COMMIT_FORCE)
1337		txUpdateMap(tblk);
1338
1339	/*
1340	 *	free transaction locks and pageout/free pages
1341	 */
1342	txRelease(tblk);
1343
1344	if ((tblk->flag & tblkGC_LAZY) == 0)
1345		txUnlock(tblk);
1346
1347
1348	/*
1349	 *	reset in-memory object state
1350	 */
1351	for (k = 0; k < cd.nip; k++) {
1352		ip = cd.iplist[k];
1353		jfs_ip = JFS_IP(ip);
1354
1355		/*
1356		 * reset in-memory inode state
1357		 */
1358		jfs_ip->bxflag = 0;
1359		jfs_ip->blid = 0;
1360	}
1361
1362      out:
1363	if (rc != 0)
1364		txAbort(tid, 1);
1365
1366      TheEnd:
1367	jfs_info("txCommit: tid = %d, returning %d", tid, rc);
1368	return rc;
1369}
1370
1371/*
1372 * NAME:	txLog()
1373 *
1374 * FUNCTION:	Writes AFTER log records for all lines modified
1375 *		by tid for segments specified by inodes in comdata.
1376 *		Code assumes only WRITELOCKS are recorded in lockwords.
1377 *
1378 * PARAMETERS:
1379 *
1380 * RETURN :
1381 */
1382static int txLog(struct jfs_log * log, struct tblock * tblk, struct commit * cd)
1383{
1384	int rc = 0;
1385	struct inode *ip;
1386	lid_t lid;
1387	struct tlock *tlck;
1388	struct lrd *lrd = &cd->lrd;
1389
1390	/*
1391	 * write log record(s) for each tlock of transaction,
1392	 */
1393	for (lid = tblk->next; lid; lid = tlck->next) {
1394		tlck = lid_to_tlock(lid);
1395
1396		tlck->flag |= tlckLOG;
1397
1398		/* initialize lrd common */
1399		ip = tlck->ip;
1400		lrd->aggregate = cpu_to_le32(JFS_SBI(ip->i_sb)->aggregate);
1401		lrd->log.redopage.fileset = cpu_to_le32(JFS_IP(ip)->fileset);
1402		lrd->log.redopage.inode = cpu_to_le32(ip->i_ino);
1403
1404		/* write log record of page from the tlock */
1405		switch (tlck->type & tlckTYPE) {
1406		case tlckXTREE:
1407			xtLog(log, tblk, lrd, tlck);
1408			break;
1409
1410		case tlckDTREE:
1411			dtLog(log, tblk, lrd, tlck);
1412			break;
1413
1414		case tlckINODE:
1415			diLog(log, tblk, lrd, tlck, cd);
1416			break;
1417
1418		case tlckMAP:
1419			mapLog(log, tblk, lrd, tlck);
1420			break;
1421
1422		case tlckDATA:
1423			dataLog(log, tblk, lrd, tlck);
1424			break;
1425
1426		default:
1427			jfs_err("UFO tlock:0x%p", tlck);
1428		}
1429	}
1430
1431	return rc;
1432}
1433
1434/*
1435 *	diLog()
1436 *
1437 * function:	log inode tlock and format maplock to update bmap;
1438 */
1439static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1440		 struct tlock * tlck, struct commit * cd)
1441{
1442	int rc = 0;
1443	struct metapage *mp;
1444	pxd_t *pxd;
1445	struct pxd_lock *pxdlock;
1446
1447	mp = tlck->mp;
1448
1449	/* initialize as REDOPAGE record format */
1450	lrd->log.redopage.type = cpu_to_le16(LOG_INODE);
1451	lrd->log.redopage.l2linesize = cpu_to_le16(L2INODESLOTSIZE);
1452
1453	pxd = &lrd->log.redopage.pxd;
1454
1455	/*
1456	 *	inode after image
1457	 */
1458	if (tlck->type & tlckENTRY) {
1459		/* log after-image for logredo(): */
1460		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1461		PXDaddress(pxd, mp->index);
1462		PXDlength(pxd,
1463			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1464		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1465
1466		/* mark page as homeward bound */
1467		tlck->flag |= tlckWRITEPAGE;
1468	} else if (tlck->type & tlckFREE) {
1469		/*
1470		 *	free inode extent
1471		 *
1472		 * (pages of the freed inode extent have been invalidated and
1473		 * a maplock for free of the extent has been formatted at
1474		 * txLock() time);
1475		 *
1476		 * the tlock had been acquired on the inode allocation map page
1477		 * (iag) that specifies the freed extent, even though the map
1478		 * page is not itself logged, to prevent pageout of the map
1479		 * page before the log;
1480		 */
1481
1482		/* log LOG_NOREDOINOEXT of the freed inode extent for
1483		 * logredo() to start NoRedoPage filters, and to update
1484		 * imap and bmap for free of the extent;
1485		 */
1486		lrd->type = cpu_to_le16(LOG_NOREDOINOEXT);
1487		/*
1488		 * For the LOG_NOREDOINOEXT record, we need
1489		 * to pass the IAG number and inode extent
1490		 * index (within that IAG) from which the
1491		 * the extent being released.  These have been
1492		 * passed to us in the iplist[1] and iplist[2].
1493		 */
1494		lrd->log.noredoinoext.iagnum =
1495		    cpu_to_le32((u32) (size_t) cd->iplist[1]);
1496		lrd->log.noredoinoext.inoext_idx =
1497		    cpu_to_le32((u32) (size_t) cd->iplist[2]);
1498
1499		pxdlock = (struct pxd_lock *) & tlck->lock;
1500		*pxd = pxdlock->pxd;
1501		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1502
1503		/* update bmap */
1504		tlck->flag |= tlckUPDATEMAP;
1505
1506		/* mark page as homeward bound */
1507		tlck->flag |= tlckWRITEPAGE;
1508	} else
1509		jfs_err("diLog: UFO type tlck:0x%p", tlck);
1510#ifdef  _JFS_WIP
1511	/*
1512	 *	alloc/free external EA extent
1513	 *
1514	 * a maplock for txUpdateMap() to update bPWMAP for alloc/free
1515	 * of the extent has been formatted at txLock() time;
1516	 */
1517	else {
1518		assert(tlck->type & tlckEA);
1519
1520		/* log LOG_UPDATEMAP for logredo() to update bmap for
1521		 * alloc of new (and free of old) external EA extent;
1522		 */
1523		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1524		pxdlock = (struct pxd_lock *) & tlck->lock;
1525		nlock = pxdlock->index;
1526		for (i = 0; i < nlock; i++, pxdlock++) {
1527			if (pxdlock->flag & mlckALLOCPXD)
1528				lrd->log.updatemap.type =
1529				    cpu_to_le16(LOG_ALLOCPXD);
1530			else
1531				lrd->log.updatemap.type =
1532				    cpu_to_le16(LOG_FREEPXD);
1533			lrd->log.updatemap.nxd = cpu_to_le16(1);
1534			lrd->log.updatemap.pxd = pxdlock->pxd;
1535			lrd->backchain =
1536			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1537		}
1538
1539		/* update bmap */
1540		tlck->flag |= tlckUPDATEMAP;
1541	}
1542#endif				/* _JFS_WIP */
1543
1544	return rc;
1545}
1546
1547/*
1548 *	dataLog()
1549 *
1550 * function:	log data tlock
1551 */
1552static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1553	    struct tlock * tlck)
1554{
1555	struct metapage *mp;
1556	pxd_t *pxd;
1557
1558	mp = tlck->mp;
1559
1560	/* initialize as REDOPAGE record format */
1561	lrd->log.redopage.type = cpu_to_le16(LOG_DATA);
1562	lrd->log.redopage.l2linesize = cpu_to_le16(L2DATASLOTSIZE);
1563
1564	pxd = &lrd->log.redopage.pxd;
1565
1566	/* log after-image for logredo(): */
1567	lrd->type = cpu_to_le16(LOG_REDOPAGE);
1568
1569	if (jfs_dirtable_inline(tlck->ip)) {
1570		/*
1571		 * The table has been truncated, we've must have deleted
1572		 * the last entry, so don't bother logging this
1573		 */
1574		mp->lid = 0;
1575		grab_metapage(mp);
1576		metapage_homeok(mp);
1577		discard_metapage(mp);
1578		tlck->mp = NULL;
1579		return 0;
1580	}
1581
1582	PXDaddress(pxd, mp->index);
1583	PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits);
1584
1585	lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1586
1587	/* mark page as homeward bound */
1588	tlck->flag |= tlckWRITEPAGE;
1589
1590	return 0;
1591}
1592
1593/*
1594 *	dtLog()
1595 *
1596 * function:	log dtree tlock and format maplock to update bmap;
1597 */
1598static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1599	   struct tlock * tlck)
1600{
1601	struct metapage *mp;
1602	struct pxd_lock *pxdlock;
1603	pxd_t *pxd;
1604
1605	mp = tlck->mp;
1606
1607	/* initialize as REDOPAGE/NOREDOPAGE record format */
1608	lrd->log.redopage.type = cpu_to_le16(LOG_DTREE);
1609	lrd->log.redopage.l2linesize = cpu_to_le16(L2DTSLOTSIZE);
1610
1611	pxd = &lrd->log.redopage.pxd;
1612
1613	if (tlck->type & tlckBTROOT)
1614		lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
1615
1616	/*
1617	 *	page extension via relocation: entry insertion;
1618	 *	page extension in-place: entry insertion;
1619	 *	new right page from page split, reinitialized in-line
1620	 *	root from root page split: entry insertion;
1621	 */
1622	if (tlck->type & (tlckNEW | tlckEXTEND)) {
1623		/* log after-image of the new page for logredo():
1624		 * mark log (LOG_NEW) for logredo() to initialize
1625		 * freelist and update bmap for alloc of the new page;
1626		 */
1627		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1628		if (tlck->type & tlckEXTEND)
1629			lrd->log.redopage.type |= cpu_to_le16(LOG_EXTEND);
1630		else
1631			lrd->log.redopage.type |= cpu_to_le16(LOG_NEW);
1632		PXDaddress(pxd, mp->index);
1633		PXDlength(pxd,
1634			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1635		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1636
1637		/* format a maplock for txUpdateMap() to update bPMAP for
1638		 * alloc of the new page;
1639		 */
1640		if (tlck->type & tlckBTROOT)
1641			return;
1642		tlck->flag |= tlckUPDATEMAP;
1643		pxdlock = (struct pxd_lock *) & tlck->lock;
1644		pxdlock->flag = mlckALLOCPXD;
1645		pxdlock->pxd = *pxd;
1646
1647		pxdlock->index = 1;
1648
1649		/* mark page as homeward bound */
1650		tlck->flag |= tlckWRITEPAGE;
1651		return;
1652	}
1653
1654	/*
1655	 *	entry insertion/deletion,
1656	 *	sibling page link update (old right page before split);
1657	 */
1658	if (tlck->type & (tlckENTRY | tlckRELINK)) {
1659		/* log after-image for logredo(): */
1660		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1661		PXDaddress(pxd, mp->index);
1662		PXDlength(pxd,
1663			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1664		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1665
1666		/* mark page as homeward bound */
1667		tlck->flag |= tlckWRITEPAGE;
1668		return;
1669	}
1670
1671	/*
1672	 *	page deletion: page has been invalidated
1673	 *	page relocation: source extent
1674	 *
1675	 *	a maplock for free of the page has been formatted
1676	 *	at txLock() time);
1677	 */
1678	if (tlck->type & (tlckFREE | tlckRELOCATE)) {
1679		/* log LOG_NOREDOPAGE of the deleted page for logredo()
1680		 * to start NoRedoPage filter and to update bmap for free
1681		 * of the deletd page
1682		 */
1683		lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
1684		pxdlock = (struct pxd_lock *) & tlck->lock;
1685		*pxd = pxdlock->pxd;
1686		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1687
1688		/* a maplock for txUpdateMap() for free of the page
1689		 * has been formatted at txLock() time;
1690		 */
1691		tlck->flag |= tlckUPDATEMAP;
1692	}
1693	return;
1694}
1695
1696/*
1697 *	xtLog()
1698 *
1699 * function:	log xtree tlock and format maplock to update bmap;
1700 */
1701static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1702	   struct tlock * tlck)
1703{
1704	struct inode *ip;
1705	struct metapage *mp;
1706	xtpage_t *p;
1707	struct xtlock *xtlck;
1708	struct maplock *maplock;
1709	struct xdlistlock *xadlock;
1710	struct pxd_lock *pxdlock;
1711	pxd_t *page_pxd;
1712	int next, lwm, hwm;
1713
1714	ip = tlck->ip;
1715	mp = tlck->mp;
1716
1717	/* initialize as REDOPAGE/NOREDOPAGE record format */
1718	lrd->log.redopage.type = cpu_to_le16(LOG_XTREE);
1719	lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE);
1720
1721	page_pxd = &lrd->log.redopage.pxd;
1722
1723	if (tlck->type & tlckBTROOT) {
1724		lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
1725		p = &JFS_IP(ip)->i_xtroot;
1726		if (S_ISDIR(ip->i_mode))
1727			lrd->log.redopage.type |=
1728			    cpu_to_le16(LOG_DIR_XTREE);
1729	} else
1730		p = (xtpage_t *) mp->data;
1731	next = le16_to_cpu(p->header.nextindex);
1732
1733	xtlck = (struct xtlock *) & tlck->lock;
1734
1735	maplock = (struct maplock *) & tlck->lock;
1736	xadlock = (struct xdlistlock *) maplock;
1737
1738	/*
1739	 *	entry insertion/extension;
1740	 *	sibling page link update (old right page before split);
1741	 */
1742	if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) {
1743		/* log after-image for logredo():
1744		 * logredo() will update bmap for alloc of new/extended
1745		 * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from
1746		 * after-image of XADlist;
1747		 * logredo() resets (XAD_NEW|XAD_EXTEND) flag when
1748		 * applying the after-image to the meta-data page.
1749		 */
1750		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1751		PXDaddress(page_pxd, mp->index);
1752		PXDlength(page_pxd,
1753			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1754		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1755
1756		/* format a maplock for txUpdateMap() to update bPMAP
1757		 * for alloc of new/extended extents of XAD[lwm:next)
1758		 * from the page itself;
1759		 * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag.
1760		 */
1761		lwm = xtlck->lwm.offset;
1762		if (lwm == 0)
1763			lwm = XTPAGEMAXSLOT;
1764
1765		if (lwm == next)
1766			goto out;
1767		if (lwm > next) {
1768			jfs_err("xtLog: lwm > next\n");
1769			goto out;
1770		}
1771		tlck->flag |= tlckUPDATEMAP;
1772		xadlock->flag = mlckALLOCXADLIST;
1773		xadlock->count = next - lwm;
1774		if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) {
1775			int i;
1776			pxd_t *pxd;
1777			/*
1778			 * Lazy commit may allow xtree to be modified before
1779			 * txUpdateMap runs.  Copy xad into linelock to
1780			 * preserve correct data.
1781			 *
1782			 * We can fit twice as may pxd's as xads in the lock
1783			 */
1784			xadlock->flag = mlckALLOCPXDLIST;
1785			pxd = xadlock->xdlist = &xtlck->pxdlock;
1786			for (i = 0; i < xadlock->count; i++) {
1787				PXDaddress(pxd, addressXAD(&p->xad[lwm + i]));
1788				PXDlength(pxd, lengthXAD(&p->xad[lwm + i]));
1789				p->xad[lwm + i].flag &=
1790				    ~(XAD_NEW | XAD_EXTENDED);
1791				pxd++;
1792			}
1793		} else {
1794			/*
1795			 * xdlist will point to into inode's xtree, ensure
1796			 * that transaction is not committed lazily.
1797			 */
1798			xadlock->flag = mlckALLOCXADLIST;
1799			xadlock->xdlist = &p->xad[lwm];
1800			tblk->xflag &= ~COMMIT_LAZY;
1801		}
1802		jfs_info("xtLog: alloc ip:0x%p mp:0x%p tlck:0x%p lwm:%d "
1803			 "count:%d", tlck->ip, mp, tlck, lwm, xadlock->count);
1804
1805		maplock->index = 1;
1806
1807	      out:
1808		/* mark page as homeward bound */
1809		tlck->flag |= tlckWRITEPAGE;
1810
1811		return;
1812	}
1813
1814	/*
1815	 *	page deletion: file deletion/truncation (ref. xtTruncate())
1816	 *
1817	 * (page will be invalidated after log is written and bmap
1818	 * is updated from the page);
1819	 */
1820	if (tlck->type & tlckFREE) {
1821		/* LOG_NOREDOPAGE log for NoRedoPage filter:
1822		 * if page free from file delete, NoRedoFile filter from
1823		 * inode image of zero link count will subsume NoRedoPage
1824		 * filters for each page;
1825		 * if page free from file truncattion, write NoRedoPage
1826		 * filter;
1827		 *
1828		 * upadte of block allocation map for the page itself:
1829		 * if page free from deletion and truncation, LOG_UPDATEMAP
1830		 * log for the page itself is generated from processing
1831		 * its parent page xad entries;
1832		 */
1833		/* if page free from file truncation, log LOG_NOREDOPAGE
1834		 * of the deleted page for logredo() to start NoRedoPage
1835		 * filter for the page;
1836		 */
1837		if (tblk->xflag & COMMIT_TRUNCATE) {
1838			/* write NOREDOPAGE for the page */
1839			lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
1840			PXDaddress(page_pxd, mp->index);
1841			PXDlength(page_pxd,
1842				  mp->logical_size >> tblk->sb->
1843				  s_blocksize_bits);
1844			lrd->backchain =
1845			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1846
1847			if (tlck->type & tlckBTROOT) {
1848				/* Empty xtree must be logged */
1849				lrd->type = cpu_to_le16(LOG_REDOPAGE);
1850				lrd->backchain =
1851				    cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1852			}
1853		}
1854
1855		/* init LOG_UPDATEMAP of the freed extents
1856		 * XAD[XTENTRYSTART:hwm) from the deleted page itself
1857		 * for logredo() to update bmap;
1858		 */
1859		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1860		lrd->log.updatemap.type = cpu_to_le16(LOG_FREEXADLIST);
1861		xtlck = (struct xtlock *) & tlck->lock;
1862		hwm = xtlck->hwm.offset;
1863		lrd->log.updatemap.nxd =
1864		    cpu_to_le16(hwm - XTENTRYSTART + 1);
1865		/* reformat linelock for lmLog() */
1866		xtlck->header.offset = XTENTRYSTART;
1867		xtlck->header.length = hwm - XTENTRYSTART + 1;
1868		xtlck->index = 1;
1869		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1870
1871		/* format a maplock for txUpdateMap() to update bmap
1872		 * to free extents of XAD[XTENTRYSTART:hwm) from the
1873		 * deleted page itself;
1874		 */
1875		tlck->flag |= tlckUPDATEMAP;
1876		xadlock->count = hwm - XTENTRYSTART + 1;
1877		if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) {
1878			int i;
1879			pxd_t *pxd;
1880			/*
1881			 * Lazy commit may allow xtree to be modified before
1882			 * txUpdateMap runs.  Copy xad into linelock to
1883			 * preserve correct data.
1884			 *
1885			 * We can fit twice as may pxd's as xads in the lock
1886			 */
1887			xadlock->flag = mlckFREEPXDLIST;
1888			pxd = xadlock->xdlist = &xtlck->pxdlock;
1889			for (i = 0; i < xadlock->count; i++) {
1890				PXDaddress(pxd,
1891					addressXAD(&p->xad[XTENTRYSTART + i]));
1892				PXDlength(pxd,
1893					lengthXAD(&p->xad[XTENTRYSTART + i]));
1894				pxd++;
1895			}
1896		} else {
1897			/*
1898			 * xdlist will point to into inode's xtree, ensure
1899			 * that transaction is not committed lazily.
1900			 */
1901			xadlock->flag = mlckFREEXADLIST;
1902			xadlock->xdlist = &p->xad[XTENTRYSTART];
1903			tblk->xflag &= ~COMMIT_LAZY;
1904		}
1905		jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d lwm:2",
1906			 tlck->ip, mp, xadlock->count);
1907
1908		maplock->index = 1;
1909
1910		/* mark page as invalid */
1911		if (((tblk->xflag & COMMIT_PWMAP) || S_ISDIR(ip->i_mode))
1912		    && !(tlck->type & tlckBTROOT))
1913			tlck->flag |= tlckFREEPAGE;
1914		/*
1915		   else (tblk->xflag & COMMIT_PMAP)
1916		   ? release the page;
1917		 */
1918		return;
1919	}
1920
1921	/*
1922	 *	page/entry truncation: file truncation (ref. xtTruncate())
1923	 *
1924	 *	|----------+------+------+---------------|
1925	 *		   |      |      |
1926	 *		   |      |     hwm - hwm before truncation
1927	 *		   |     next - truncation point
1928	 *		  lwm - lwm before truncation
1929	 * header ?
1930	 */
1931	if (tlck->type & tlckTRUNCATE) {
1932		/* This odd declaration suppresses a bogus gcc warning */
1933		pxd_t pxd = pxd;	/* truncated extent of xad */
1934		int twm;
1935
1936		/*
1937		 * For truncation the entire linelock may be used, so it would
1938		 * be difficult to store xad list in linelock itself.
1939		 * Therefore, we'll just force transaction to be committed
1940		 * synchronously, so that xtree pages won't be changed before
1941		 * txUpdateMap runs.
1942		 */
1943		tblk->xflag &= ~COMMIT_LAZY;
1944		lwm = xtlck->lwm.offset;
1945		if (lwm == 0)
1946			lwm = XTPAGEMAXSLOT;
1947		hwm = xtlck->hwm.offset;
1948		twm = xtlck->twm.offset;
1949
1950		/*
1951		 *	write log records
1952		 */
1953		/* log after-image for logredo():
1954		 *
1955		 * logredo() will update bmap for alloc of new/extended
1956		 * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from
1957		 * after-image of XADlist;
1958		 * logredo() resets (XAD_NEW|XAD_EXTEND) flag when
1959		 * applying the after-image to the meta-data page.
1960		 */
1961		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1962		PXDaddress(page_pxd, mp->index);
1963		PXDlength(page_pxd,
1964			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1965		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1966
1967		/*
1968		 * truncate entry XAD[twm == next - 1]:
1969		 */
1970		if (twm == next - 1) {
1971			/* init LOG_UPDATEMAP for logredo() to update bmap for
1972			 * free of truncated delta extent of the truncated
1973			 * entry XAD[next - 1]:
1974			 * (xtlck->pxdlock = truncated delta extent);
1975			 */
1976			pxdlock = (struct pxd_lock *) & xtlck->pxdlock;
1977			/* assert(pxdlock->type & tlckTRUNCATE); */
1978			lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1979			lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD);
1980			lrd->log.updatemap.nxd = cpu_to_le16(1);
1981			lrd->log.updatemap.pxd = pxdlock->pxd;
1982			pxd = pxdlock->pxd;	/* save to format maplock */
1983			lrd->backchain =
1984			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1985		}
1986
1987		/*
1988		 * free entries XAD[next:hwm]:
1989		 */
1990		if (hwm >= next) {
1991			/* init LOG_UPDATEMAP of the freed extents
1992			 * XAD[next:hwm] from the deleted page itself
1993			 * for logredo() to update bmap;
1994			 */
1995			lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1996			lrd->log.updatemap.type =
1997			    cpu_to_le16(LOG_FREEXADLIST);
1998			xtlck = (struct xtlock *) & tlck->lock;
1999			hwm = xtlck->hwm.offset;
2000			lrd->log.updatemap.nxd =
2001			    cpu_to_le16(hwm - next + 1);
2002			/* reformat linelock for lmLog() */
2003			xtlck->header.offset = next;
2004			xtlck->header.length = hwm - next + 1;
2005			xtlck->index = 1;
2006			lrd->backchain =
2007			    cpu_to_le32(lmLog(log, tblk, lrd, tlck));
2008		}
2009
2010		/*
2011		 *	format maplock(s) for txUpdateMap() to update bmap
2012		 */
2013		maplock->index = 0;
2014
2015		/*
2016		 * allocate entries XAD[lwm:next):
2017		 */
2018		if (lwm < next) {
2019			/* format a maplock for txUpdateMap() to update bPMAP
2020			 * for alloc of new/extended extents of XAD[lwm:next)
2021			 * from the page itself;
2022			 * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag.
2023			 */
2024			tlck->flag |= tlckUPDATEMAP;
2025			xadlock->flag = mlckALLOCXADLIST;
2026			xadlock->count = next - lwm;
2027			xadlock->xdlist = &p->xad[lwm];
2028
2029			jfs_info("xtLog: alloc ip:0x%p mp:0x%p count:%d "
2030				 "lwm:%d next:%d",
2031				 tlck->ip, mp, xadlock->count, lwm, next);
2032			maplock->index++;
2033			xadlock++;
2034		}
2035
2036		/*
2037		 * truncate entry XAD[twm == next - 1]:
2038		 */
2039		if (twm == next - 1) {
2040			/* format a maplock for txUpdateMap() to update bmap
2041			 * to free truncated delta extent of the truncated
2042			 * entry XAD[next - 1];
2043			 * (xtlck->pxdlock = truncated delta extent);
2044			 */
2045			tlck->flag |= tlckUPDATEMAP;
2046			pxdlock = (struct pxd_lock *) xadlock;
2047			pxdlock->flag = mlckFREEPXD;
2048			pxdlock->count = 1;
2049			pxdlock->pxd = pxd;
2050
2051			jfs_info("xtLog: truncate ip:0x%p mp:0x%p count:%d "
2052				 "hwm:%d", ip, mp, pxdlock->count, hwm);
2053			maplock->index++;
2054			xadlock++;
2055		}
2056
2057		/*
2058		 * free entries XAD[next:hwm]:
2059		 */
2060		if (hwm >= next) {
2061			/* format a maplock for txUpdateMap() to update bmap
2062			 * to free extents of XAD[next:hwm] from thedeleted
2063			 * page itself;
2064			 */
2065			tlck->flag |= tlckUPDATEMAP;
2066			xadlock->flag = mlckFREEXADLIST;
2067			xadlock->count = hwm - next + 1;
2068			xadlock->xdlist = &p->xad[next];
2069
2070			jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d "
2071				 "next:%d hwm:%d",
2072				 tlck->ip, mp, xadlock->count, next, hwm);
2073			maplock->index++;
2074		}
2075
2076		/* mark page as homeward bound */
2077		tlck->flag |= tlckWRITEPAGE;
2078	}
2079	return;
2080}
2081
2082/*
2083 *	mapLog()
2084 *
2085 * function:	log from maplock of freed data extents;
2086 */
2087static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
2088		   struct tlock * tlck)
2089{
2090	struct pxd_lock *pxdlock;
2091	int i, nlock;
2092	pxd_t *pxd;
2093
2094	/*
2095	 *	page relocation: free the source page extent
2096	 *
2097	 * a maplock for txUpdateMap() for free of the page
2098	 * has been formatted at txLock() time saving the src
2099	 * relocated page address;
2100	 */
2101	if (tlck->type & tlckRELOCATE) {
2102		/* log LOG_NOREDOPAGE of the old relocated page
2103		 * for logredo() to start NoRedoPage filter;
2104		 */
2105		lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
2106		pxdlock = (struct pxd_lock *) & tlck->lock;
2107		pxd = &lrd->log.redopage.pxd;
2108		*pxd = pxdlock->pxd;
2109		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2110
2111		/* (N.B. currently, logredo() does NOT update bmap
2112		 * for free of the page itself for (LOG_XTREE|LOG_NOREDOPAGE);
2113		 * if page free from relocation, LOG_UPDATEMAP log is
2114		 * specifically generated now for logredo()
2115		 * to update bmap for free of src relocated page;
2116		 * (new flag LOG_RELOCATE may be introduced which will
2117		 * inform logredo() to start NORedoPage filter and also
2118		 * update block allocation map at the same time, thus
2119		 * avoiding an extra log write);
2120		 */
2121		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
2122		lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD);
2123		lrd->log.updatemap.nxd = cpu_to_le16(1);
2124		lrd->log.updatemap.pxd = pxdlock->pxd;
2125		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2126
2127		/* a maplock for txUpdateMap() for free of the page
2128		 * has been formatted at txLock() time;
2129		 */
2130		tlck->flag |= tlckUPDATEMAP;
2131		return;
2132	}
2133	/*
2134
2135	 * Otherwise it's not a relocate request
2136	 *
2137	 */
2138	else {
2139		/* log LOG_UPDATEMAP for logredo() to update bmap for
2140		 * free of truncated/relocated delta extent of the data;
2141		 * e.g.: external EA extent, relocated/truncated extent
2142		 * from xtTailgate();
2143		 */
2144		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
2145		pxdlock = (struct pxd_lock *) & tlck->lock;
2146		nlock = pxdlock->index;
2147		for (i = 0; i < nlock; i++, pxdlock++) {
2148			if (pxdlock->flag & mlckALLOCPXD)
2149				lrd->log.updatemap.type =
2150				    cpu_to_le16(LOG_ALLOCPXD);
2151			else
2152				lrd->log.updatemap.type =
2153				    cpu_to_le16(LOG_FREEPXD);
2154			lrd->log.updatemap.nxd = cpu_to_le16(1);
2155			lrd->log.updatemap.pxd = pxdlock->pxd;
2156			lrd->backchain =
2157			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2158			jfs_info("mapLog: xaddr:0x%lx xlen:0x%x",
2159				 (ulong) addressPXD(&pxdlock->pxd),
2160				 lengthPXD(&pxdlock->pxd));
2161		}
2162
2163		/* update bmap */
2164		tlck->flag |= tlckUPDATEMAP;
2165	}
2166}
2167
2168/*
2169 *	txEA()
2170 *
2171 * function:	acquire maplock for EA/ACL extents or
2172 *		set COMMIT_INLINE flag;
2173 */
2174void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea)
2175{
2176	struct tlock *tlck = NULL;
2177	struct pxd_lock *maplock = NULL, *pxdlock = NULL;
2178
2179	/*
2180	 * format maplock for alloc of new EA extent
2181	 */
2182	if (newea) {
2183		/* Since the newea could be a completely zeroed entry we need to
2184		 * check for the two flags which indicate we should actually
2185		 * commit new EA data
2186		 */
2187		if (newea->flag & DXD_EXTENT) {
2188			tlck = txMaplock(tid, ip, tlckMAP);
2189			maplock = (struct pxd_lock *) & tlck->lock;
2190			pxdlock = (struct pxd_lock *) maplock;
2191			pxdlock->flag = mlckALLOCPXD;
2192			PXDaddress(&pxdlock->pxd, addressDXD(newea));
2193			PXDlength(&pxdlock->pxd, lengthDXD(newea));
2194			pxdlock++;
2195			maplock->index = 1;
2196		} else if (newea->flag & DXD_INLINE) {
2197			tlck = NULL;
2198
2199			set_cflag(COMMIT_Inlineea, ip);
2200		}
2201	}
2202
2203	/*
2204	 * format maplock for free of old EA extent
2205	 */
2206	if (!test_cflag(COMMIT_Nolink, ip) && oldea->flag & DXD_EXTENT) {
2207		if (tlck == NULL) {
2208			tlck = txMaplock(tid, ip, tlckMAP);
2209			maplock = (struct pxd_lock *) & tlck->lock;
2210			pxdlock = (struct pxd_lock *) maplock;
2211			maplock->index = 0;
2212		}
2213		pxdlock->flag = mlckFREEPXD;
2214		PXDaddress(&pxdlock->pxd, addressDXD(oldea));
2215		PXDlength(&pxdlock->pxd, lengthDXD(oldea));
2216		maplock->index++;
2217	}
2218}
2219
2220/*
2221 *	txForce()
2222 *
2223 * function: synchronously write pages locked by transaction
2224 *	     after txLog() but before txUpdateMap();
2225 */
2226static void txForce(struct tblock * tblk)
2227{
2228	struct tlock *tlck;
2229	lid_t lid, next;
2230	struct metapage *mp;
2231
2232	/*
2233	 * reverse the order of transaction tlocks in
2234	 * careful update order of address index pages
2235	 * (right to left, bottom up)
2236	 */
2237	tlck = lid_to_tlock(tblk->next);
2238	lid = tlck->next;
2239	tlck->next = 0;
2240	while (lid) {
2241		tlck = lid_to_tlock(lid);
2242		next = tlck->next;
2243		tlck->next = tblk->next;
2244		tblk->next = lid;
2245		lid = next;
2246	}
2247
2248	/*
2249	 * synchronously write the page, and
2250	 * hold the page for txUpdateMap();
2251	 */
2252	for (lid = tblk->next; lid; lid = next) {
2253		tlck = lid_to_tlock(lid);
2254		next = tlck->next;
2255
2256		if ((mp = tlck->mp) != NULL &&
2257		    (tlck->type & tlckBTROOT) == 0) {
2258			assert(mp->xflag & COMMIT_PAGE);
2259
2260			if (tlck->flag & tlckWRITEPAGE) {
2261				tlck->flag &= ~tlckWRITEPAGE;
2262
2263				/* do not release page to freelist */
2264				force_metapage(mp);
2265#if 0
2266				/*
2267				 * The "right" thing to do here is to
2268				 * synchronously write the metadata.
2269				 * With the current implementation this
2270				 * is hard since write_metapage requires
2271				 * us to kunmap & remap the page.  If we
2272				 * have tlocks pointing into the metadata
2273				 * pages, we don't want to do this.  I think
2274				 * we can get by with synchronously writing
2275				 * the pages when they are released.
2276				 */
2277				assert(mp->nohomeok);
2278				set_bit(META_dirty, &mp->flag);
2279				set_bit(META_sync, &mp->flag);
2280#endif
2281			}
2282		}
2283	}
2284}
2285
2286/*
2287 *	txUpdateMap()
2288 *
2289 * function:	update persistent allocation map (and working map
2290 *		if appropriate);
2291 *
2292 * parameter:
2293 */
2294static void txUpdateMap(struct tblock * tblk)
2295{
2296	struct inode *ip;
2297	struct inode *ipimap;
2298	lid_t lid;
2299	struct tlock *tlck;
2300	struct maplock *maplock;
2301	struct pxd_lock pxdlock;
2302	int maptype;
2303	int k, nlock;
2304	struct metapage *mp = NULL;
2305
2306	ipimap = JFS_SBI(tblk->sb)->ipimap;
2307
2308	maptype = (tblk->xflag & COMMIT_PMAP) ? COMMIT_PMAP : COMMIT_PWMAP;
2309
2310
2311	/*
2312	 *	update block allocation map
2313	 *
2314	 * update allocation state in pmap (and wmap) and
2315	 * update lsn of the pmap page;
2316	 */
2317	/*
2318	 * scan each tlock/page of transaction for block allocation/free:
2319	 *
2320	 * for each tlock/page of transaction, update map.
2321	 *  ? are there tlock for pmap and pwmap at the same time ?
2322	 */
2323	for (lid = tblk->next; lid; lid = tlck->next) {
2324		tlck = lid_to_tlock(lid);
2325
2326		if ((tlck->flag & tlckUPDATEMAP) == 0)
2327			continue;
2328
2329		if (tlck->flag & tlckFREEPAGE) {
2330			/*
2331			 * Another thread may attempt to reuse freed space
2332			 * immediately, so we want to get rid of the metapage
2333			 * before anyone else has a chance to get it.
2334			 * Lock metapage, update maps, then invalidate
2335			 * the metapage.
2336			 */
2337			mp = tlck->mp;
2338			ASSERT(mp->xflag & COMMIT_PAGE);
2339			grab_metapage(mp);
2340		}
2341
2342		/*
2343		 * extent list:
2344		 * . in-line PXD list:
2345		 * . out-of-line XAD list:
2346		 */
2347		maplock = (struct maplock *) & tlck->lock;
2348		nlock = maplock->index;
2349
2350		for (k = 0; k < nlock; k++, maplock++) {
2351			/*
2352			 * allocate blocks in persistent map:
2353			 *
2354			 * blocks have been allocated from wmap at alloc time;
2355			 */
2356			if (maplock->flag & mlckALLOC) {
2357				txAllocPMap(ipimap, maplock, tblk);
2358			}
2359			/*
2360			 * free blocks in persistent and working map:
2361			 * blocks will be freed in pmap and then in wmap;
2362			 *
2363			 * ? tblock specifies the PMAP/PWMAP based upon
2364			 * transaction
2365			 *
2366			 * free blocks in persistent map:
2367			 * blocks will be freed from wmap at last reference
2368			 * release of the object for regular files;
2369			 *
2370			 * Alway free blocks from both persistent & working
2371			 * maps for directories
2372			 */
2373			else {	/* (maplock->flag & mlckFREE) */
2374
2375				if (tlck->flag & tlckDIRECTORY)
2376					txFreeMap(ipimap, maplock,
2377						  tblk, COMMIT_PWMAP);
2378				else
2379					txFreeMap(ipimap, maplock,
2380						  tblk, maptype);
2381			}
2382		}
2383		if (tlck->flag & tlckFREEPAGE) {
2384			if (!(tblk->flag & tblkGC_LAZY)) {
2385				/* This is equivalent to txRelease */
2386				ASSERT(mp->lid == lid);
2387				tlck->mp->lid = 0;
2388			}
2389			assert(mp->nohomeok == 1);
2390			metapage_homeok(mp);
2391			discard_metapage(mp);
2392			tlck->mp = NULL;
2393		}
2394	}
2395	/*
2396	 *	update inode allocation map
2397	 *
2398	 * update allocation state in pmap and
2399	 * update lsn of the pmap page;
2400	 * update in-memory inode flag/state
2401	 *
2402	 * unlock mapper/write lock
2403	 */
2404	if (tblk->xflag & COMMIT_CREATE) {
2405		diUpdatePMap(ipimap, tblk->ino, false, tblk);
2406		/* update persistent block allocation map
2407		 * for the allocation of inode extent;
2408		 */
2409		pxdlock.flag = mlckALLOCPXD;
2410		pxdlock.pxd = tblk->u.ixpxd;
2411		pxdlock.index = 1;
2412		txAllocPMap(ipimap, (struct maplock *) & pxdlock, tblk);
2413	} else if (tblk->xflag & COMMIT_DELETE) {
2414		ip = tblk->u.ip;
2415		diUpdatePMap(ipimap, ip->i_ino, true, tblk);
2416		iput(ip);
2417	}
2418}
2419
2420/*
2421 *	txAllocPMap()
2422 *
2423 * function: allocate from persistent map;
2424 *
2425 * parameter:
2426 *	ipbmap	-
2427 *	malock	-
2428 *		xad list:
2429 *		pxd:
2430 *
2431 *	maptype -
2432 *		allocate from persistent map;
2433 *		free from persistent map;
2434 *		(e.g., tmp file - free from working map at releae
2435 *		 of last reference);
2436 *		free from persistent and working map;
2437 *
2438 *	lsn	- log sequence number;
2439 */
2440static void txAllocPMap(struct inode *ip, struct maplock * maplock,
2441			struct tblock * tblk)
2442{
2443	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
2444	struct xdlistlock *xadlistlock;
2445	xad_t *xad;
2446	s64 xaddr;
2447	int xlen;
2448	struct pxd_lock *pxdlock;
2449	struct xdlistlock *pxdlistlock;
2450	pxd_t *pxd;
2451	int n;
2452
2453	/*
2454	 * allocate from persistent map;
2455	 */
2456	if (maplock->flag & mlckALLOCXADLIST) {
2457		xadlistlock = (struct xdlistlock *) maplock;
2458		xad = xadlistlock->xdlist;
2459		for (n = 0; n < xadlistlock->count; n++, xad++) {
2460			if (xad->flag & (XAD_NEW | XAD_EXTENDED)) {
2461				xaddr = addressXAD(xad);
2462				xlen = lengthXAD(xad);
2463				dbUpdatePMap(ipbmap, false, xaddr,
2464					     (s64) xlen, tblk);
2465				xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
2466				jfs_info("allocPMap: xaddr:0x%lx xlen:%d",
2467					 (ulong) xaddr, xlen);
2468			}
2469		}
2470	} else if (maplock->flag & mlckALLOCPXD) {
2471		pxdlock = (struct pxd_lock *) maplock;
2472		xaddr = addressPXD(&pxdlock->pxd);
2473		xlen = lengthPXD(&pxdlock->pxd);
2474		dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen, tblk);
2475		jfs_info("allocPMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen);
2476	} else {		/* (maplock->flag & mlckALLOCPXDLIST) */
2477
2478		pxdlistlock = (struct xdlistlock *) maplock;
2479		pxd = pxdlistlock->xdlist;
2480		for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2481			xaddr = addressPXD(pxd);
2482			xlen = lengthPXD(pxd);
2483			dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen,
2484				     tblk);
2485			jfs_info("allocPMap: xaddr:0x%lx xlen:%d",
2486				 (ulong) xaddr, xlen);
2487		}
2488	}
2489}
2490
2491/*
2492 *	txFreeMap()
2493 *
2494 * function:	free from persistent and/or working map;
2495 *
2496 * todo: optimization
2497 */
2498void txFreeMap(struct inode *ip,
2499	       struct maplock * maplock, struct tblock * tblk, int maptype)
2500{
2501	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
2502	struct xdlistlock *xadlistlock;
2503	xad_t *xad;
2504	s64 xaddr;
2505	int xlen;
2506	struct pxd_lock *pxdlock;
2507	struct xdlistlock *pxdlistlock;
2508	pxd_t *pxd;
2509	int n;
2510
2511	jfs_info("txFreeMap: tblk:0x%p maplock:0x%p maptype:0x%x",
2512		 tblk, maplock, maptype);
2513
2514	/*
2515	 * free from persistent map;
2516	 */
2517	if (maptype == COMMIT_PMAP || maptype == COMMIT_PWMAP) {
2518		if (maplock->flag & mlckFREEXADLIST) {
2519			xadlistlock = (struct xdlistlock *) maplock;
2520			xad = xadlistlock->xdlist;
2521			for (n = 0; n < xadlistlock->count; n++, xad++) {
2522				if (!(xad->flag & XAD_NEW)) {
2523					xaddr = addressXAD(xad);
2524					xlen = lengthXAD(xad);
2525					dbUpdatePMap(ipbmap, true, xaddr,
2526						     (s64) xlen, tblk);
2527					jfs_info("freePMap: xaddr:0x%lx "
2528						 "xlen:%d",
2529						 (ulong) xaddr, xlen);
2530				}
2531			}
2532		} else if (maplock->flag & mlckFREEPXD) {
2533			pxdlock = (struct pxd_lock *) maplock;
2534			xaddr = addressPXD(&pxdlock->pxd);
2535			xlen = lengthPXD(&pxdlock->pxd);
2536			dbUpdatePMap(ipbmap, true, xaddr, (s64) xlen,
2537				     tblk);
2538			jfs_info("freePMap: xaddr:0x%lx xlen:%d",
2539				 (ulong) xaddr, xlen);
2540		} else {	/* (maplock->flag & mlckALLOCPXDLIST) */
2541
2542			pxdlistlock = (struct xdlistlock *) maplock;
2543			pxd = pxdlistlock->xdlist;
2544			for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2545				xaddr = addressPXD(pxd);
2546				xlen = lengthPXD(pxd);
2547				dbUpdatePMap(ipbmap, true, xaddr,
2548					     (s64) xlen, tblk);
2549				jfs_info("freePMap: xaddr:0x%lx xlen:%d",
2550					 (ulong) xaddr, xlen);
2551			}
2552		}
2553	}
2554
2555	/*
2556	 * free from working map;
2557	 */
2558	if (maptype == COMMIT_PWMAP || maptype == COMMIT_WMAP) {
2559		if (maplock->flag & mlckFREEXADLIST) {
2560			xadlistlock = (struct xdlistlock *) maplock;
2561			xad = xadlistlock->xdlist;
2562			for (n = 0; n < xadlistlock->count; n++, xad++) {
2563				xaddr = addressXAD(xad);
2564				xlen = lengthXAD(xad);
2565				dbFree(ip, xaddr, (s64) xlen);
2566				xad->flag = 0;
2567				jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2568					 (ulong) xaddr, xlen);
2569			}
2570		} else if (maplock->flag & mlckFREEPXD) {
2571			pxdlock = (struct pxd_lock *) maplock;
2572			xaddr = addressPXD(&pxdlock->pxd);
2573			xlen = lengthPXD(&pxdlock->pxd);
2574			dbFree(ip, xaddr, (s64) xlen);
2575			jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2576				 (ulong) xaddr, xlen);
2577		} else {	/* (maplock->flag & mlckFREEPXDLIST) */
2578
2579			pxdlistlock = (struct xdlistlock *) maplock;
2580			pxd = pxdlistlock->xdlist;
2581			for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2582				xaddr = addressPXD(pxd);
2583				xlen = lengthPXD(pxd);
2584				dbFree(ip, xaddr, (s64) xlen);
2585				jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2586					 (ulong) xaddr, xlen);
2587			}
2588		}
2589	}
2590}
2591
2592/*
2593 *	txFreelock()
2594 *
2595 * function:	remove tlock from inode anonymous locklist
2596 */
2597void txFreelock(struct inode *ip)
2598{
2599	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
2600	struct tlock *xtlck, *tlck;
2601	lid_t xlid = 0, lid;
2602
2603	if (!jfs_ip->atlhead)
2604		return;
2605
2606	TXN_LOCK();
2607	xtlck = (struct tlock *) &jfs_ip->atlhead;
2608
2609	while ((lid = xtlck->next) != 0) {
2610		tlck = lid_to_tlock(lid);
2611		if (tlck->flag & tlckFREELOCK) {
2612			xtlck->next = tlck->next;
2613			txLockFree(lid);
2614		} else {
2615			xtlck = tlck;
2616			xlid = lid;
2617		}
2618	}
2619
2620	if (jfs_ip->atlhead)
2621		jfs_ip->atltail = xlid;
2622	else {
2623		jfs_ip->atltail = 0;
2624		/*
2625		 * If inode was on anon_list, remove it
2626		 */
2627		list_del_init(&jfs_ip->anon_inode_list);
2628	}
2629	TXN_UNLOCK();
2630}
2631
2632/*
2633 *	txAbort()
2634 *
2635 * function: abort tx before commit;
2636 *
2637 * frees line-locks and segment locks for all
2638 * segments in comdata structure.
2639 * Optionally sets state of file-system to FM_DIRTY in super-block.
2640 * log age of page-frames in memory for which caller has
2641 * are reset to 0 (to avoid logwarap).
2642 */
2643void txAbort(tid_t tid, int dirty)
2644{
2645	lid_t lid, next;
2646	struct metapage *mp;
2647	struct tblock *tblk = tid_to_tblock(tid);
2648	struct tlock *tlck;
2649
2650	/*
2651	 * free tlocks of the transaction
2652	 */
2653	for (lid = tblk->next; lid; lid = next) {
2654		tlck = lid_to_tlock(lid);
2655		next = tlck->next;
2656		mp = tlck->mp;
2657		JFS_IP(tlck->ip)->xtlid = 0;
2658
2659		if (mp) {
2660			mp->lid = 0;
2661
2662			/*
2663			 * reset lsn of page to avoid logwarap:
2664			 *
2665			 * (page may have been previously committed by another
2666			 * transaction(s) but has not been paged, i.e.,
2667			 * it may be on logsync list even though it has not
2668			 * been logged for the current tx.)
2669			 */
2670			if (mp->xflag & COMMIT_PAGE && mp->lsn)
2671				LogSyncRelease(mp);
2672		}
2673		/* insert tlock at head of freelist */
2674		TXN_LOCK();
2675		txLockFree(lid);
2676		TXN_UNLOCK();
2677	}
2678
2679	/* caller will free the transaction block */
2680
2681	tblk->next = tblk->last = 0;
2682
2683	/*
2684	 * mark filesystem dirty
2685	 */
2686	if (dirty)
2687		jfs_error(tblk->sb, "txAbort");
2688
2689	return;
2690}
2691
2692/*
2693 *	txLazyCommit(void)
2694 *
2695 *	All transactions except those changing ipimap (COMMIT_FORCE) are
2696 *	processed by this routine.  This insures that the inode and block
2697 *	allocation maps are updated in order.  For synchronous transactions,
2698 *	let the user thread finish processing after txUpdateMap() is called.
2699 */
2700static void txLazyCommit(struct tblock * tblk)
2701{
2702	struct jfs_log *log;
2703
2704	while (((tblk->flag & tblkGC_READY) == 0) &&
2705	       ((tblk->flag & tblkGC_UNLOCKED) == 0)) {
2706		/* We must have gotten ahead of the user thread
2707		 */
2708		jfs_info("jfs_lazycommit: tblk 0x%p not unlocked", tblk);
2709		yield();
2710	}
2711
2712	jfs_info("txLazyCommit: processing tblk 0x%p", tblk);
2713
2714	txUpdateMap(tblk);
2715
2716	log = (struct jfs_log *) JFS_SBI(tblk->sb)->log;
2717
2718	spin_lock_irq(&log->gclock);	// LOGGC_LOCK
2719
2720	tblk->flag |= tblkGC_COMMITTED;
2721
2722	if (tblk->flag & tblkGC_READY)
2723		log->gcrtc--;
2724
2725	wake_up_all(&tblk->gcwait);	// LOGGC_WAKEUP
2726
2727	/*
2728	 * Can't release log->gclock until we've tested tblk->flag
2729	 */
2730	if (tblk->flag & tblkGC_LAZY) {
2731		spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
2732		txUnlock(tblk);
2733		tblk->flag &= ~tblkGC_LAZY;
2734		txEnd(tblk - TxBlock);	/* Convert back to tid */
2735	} else
2736		spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
2737
2738	jfs_info("txLazyCommit: done: tblk = 0x%p", tblk);
2739}
2740
2741/*
2742 *	jfs_lazycommit(void)
2743 *
2744 *	To be run as a kernel daemon.  If lbmIODone is called in an interrupt
2745 *	context, or where blocking is not wanted, this routine will process
2746 *	committed transactions from the unlock queue.
2747 */
2748int jfs_lazycommit(void *arg)
2749{
2750	int WorkDone;
2751	struct tblock *tblk;
2752	unsigned long flags;
2753	struct jfs_sb_info *sbi;
2754
 
2755	do {
2756		LAZY_LOCK(flags);
2757		jfs_commit_thread_waking = 0;	/* OK to wake another thread */
2758		while (!list_empty(&TxAnchor.unlock_queue)) {
2759			WorkDone = 0;
2760			list_for_each_entry(tblk, &TxAnchor.unlock_queue,
2761					    cqueue) {
2762
2763				sbi = JFS_SBI(tblk->sb);
2764				/*
2765				 * For each volume, the transactions must be
2766				 * handled in order.  If another commit thread
2767				 * is handling a tblk for this superblock,
2768				 * skip it
2769				 */
2770				if (sbi->commit_state & IN_LAZYCOMMIT)
2771					continue;
2772
2773				sbi->commit_state |= IN_LAZYCOMMIT;
2774				WorkDone = 1;
2775
2776				/*
2777				 * Remove transaction from queue
2778				 */
2779				list_del(&tblk->cqueue);
2780
2781				LAZY_UNLOCK(flags);
2782				txLazyCommit(tblk);
2783				LAZY_LOCK(flags);
2784
2785				sbi->commit_state &= ~IN_LAZYCOMMIT;
2786				/*
2787				 * Don't continue in the for loop.  (We can't
2788				 * anyway, it's unsafe!)  We want to go back to
2789				 * the beginning of the list.
2790				 */
2791				break;
2792			}
2793
2794			/* If there was nothing to do, don't continue */
2795			if (!WorkDone)
2796				break;
2797		}
2798		/* In case a wakeup came while all threads were active */
2799		jfs_commit_thread_waking = 0;
2800
2801		if (freezing(current)) {
2802			LAZY_UNLOCK(flags);
2803			try_to_freeze();
2804		} else {
2805			DECLARE_WAITQUEUE(wq, current);
2806
2807			add_wait_queue(&jfs_commit_thread_wait, &wq);
2808			set_current_state(TASK_INTERRUPTIBLE);
2809			LAZY_UNLOCK(flags);
2810			schedule();
2811			__set_current_state(TASK_RUNNING);
2812			remove_wait_queue(&jfs_commit_thread_wait, &wq);
2813		}
2814	} while (!kthread_should_stop());
2815
2816	if (!list_empty(&TxAnchor.unlock_queue))
2817		jfs_err("jfs_lazycommit being killed w/pending transactions!");
2818	else
2819		jfs_info("jfs_lazycommit being killed\n");
2820	return 0;
2821}
2822
2823void txLazyUnlock(struct tblock * tblk)
2824{
2825	unsigned long flags;
2826
2827	LAZY_LOCK(flags);
2828
2829	list_add_tail(&tblk->cqueue, &TxAnchor.unlock_queue);
2830	/*
2831	 * Don't wake up a commit thread if there is already one servicing
2832	 * this superblock, or if the last one we woke up hasn't started yet.
2833	 */
2834	if (!(JFS_SBI(tblk->sb)->commit_state & IN_LAZYCOMMIT) &&
2835	    !jfs_commit_thread_waking) {
2836		jfs_commit_thread_waking = 1;
2837		wake_up(&jfs_commit_thread_wait);
2838	}
2839	LAZY_UNLOCK(flags);
2840}
2841
2842static void LogSyncRelease(struct metapage * mp)
2843{
2844	struct jfs_log *log = mp->log;
2845
2846	assert(mp->nohomeok);
2847	assert(log);
2848	metapage_homeok(mp);
2849}
2850
2851/*
2852 *	txQuiesce
2853 *
2854 *	Block all new transactions and push anonymous transactions to
2855 *	completion
2856 *
2857 *	This does almost the same thing as jfs_sync below.  We don't
2858 *	worry about deadlocking when jfs_tlocks_low is set, since we would
2859 *	expect jfs_sync to get us out of that jam.
2860 */
2861void txQuiesce(struct super_block *sb)
2862{
2863	struct inode *ip;
2864	struct jfs_inode_info *jfs_ip;
2865	struct jfs_log *log = JFS_SBI(sb)->log;
2866	tid_t tid;
2867
2868	set_bit(log_QUIESCE, &log->flag);
2869
2870	TXN_LOCK();
2871restart:
2872	while (!list_empty(&TxAnchor.anon_list)) {
2873		jfs_ip = list_entry(TxAnchor.anon_list.next,
2874				    struct jfs_inode_info,
2875				    anon_inode_list);
2876		ip = &jfs_ip->vfs_inode;
2877
2878		/*
2879		 * inode will be removed from anonymous list
2880		 * when it is committed
2881		 */
2882		TXN_UNLOCK();
2883		tid = txBegin(ip->i_sb, COMMIT_INODE | COMMIT_FORCE);
2884		mutex_lock(&jfs_ip->commit_mutex);
2885		txCommit(tid, 1, &ip, 0);
2886		txEnd(tid);
2887		mutex_unlock(&jfs_ip->commit_mutex);
2888		/*
2889		 * Just to be safe.  I don't know how
2890		 * long we can run without blocking
2891		 */
2892		cond_resched();
2893		TXN_LOCK();
2894	}
2895
2896	/*
2897	 * If jfs_sync is running in parallel, there could be some inodes
2898	 * on anon_list2.  Let's check.
2899	 */
2900	if (!list_empty(&TxAnchor.anon_list2)) {
2901		list_splice(&TxAnchor.anon_list2, &TxAnchor.anon_list);
2902		INIT_LIST_HEAD(&TxAnchor.anon_list2);
2903		goto restart;
2904	}
2905	TXN_UNLOCK();
2906
2907	/*
2908	 * We may need to kick off the group commit
2909	 */
2910	jfs_flush_journal(log, 0);
2911}
2912
2913/*
2914 * txResume()
2915 *
2916 * Allows transactions to start again following txQuiesce
2917 */
2918void txResume(struct super_block *sb)
2919{
2920	struct jfs_log *log = JFS_SBI(sb)->log;
2921
2922	clear_bit(log_QUIESCE, &log->flag);
2923	TXN_WAKEUP(&log->syncwait);
2924}
2925
2926/*
2927 *	jfs_sync(void)
2928 *
2929 *	To be run as a kernel daemon.  This is awakened when tlocks run low.
2930 *	We write any inodes that have anonymous tlocks so they will become
2931 *	available.
2932 */
2933int jfs_sync(void *arg)
2934{
2935	struct inode *ip;
2936	struct jfs_inode_info *jfs_ip;
2937	tid_t tid;
2938
 
2939	do {
2940		/*
2941		 * write each inode on the anonymous inode list
2942		 */
2943		TXN_LOCK();
2944		while (jfs_tlocks_low && !list_empty(&TxAnchor.anon_list)) {
2945			jfs_ip = list_entry(TxAnchor.anon_list.next,
2946					    struct jfs_inode_info,
2947					    anon_inode_list);
2948			ip = &jfs_ip->vfs_inode;
2949
2950			if (! igrab(ip)) {
2951				/*
2952				 * Inode is being freed
2953				 */
2954				list_del_init(&jfs_ip->anon_inode_list);
2955			} else if (mutex_trylock(&jfs_ip->commit_mutex)) {
2956				/*
2957				 * inode will be removed from anonymous list
2958				 * when it is committed
2959				 */
2960				TXN_UNLOCK();
2961				tid = txBegin(ip->i_sb, COMMIT_INODE);
2962				txCommit(tid, 1, &ip, 0);
2963				txEnd(tid);
2964				mutex_unlock(&jfs_ip->commit_mutex);
2965
2966				iput(ip);
2967				/*
2968				 * Just to be safe.  I don't know how
2969				 * long we can run without blocking
2970				 */
2971				cond_resched();
2972				TXN_LOCK();
2973			} else {
2974				/* We can't get the commit mutex.  It may
2975				 * be held by a thread waiting for tlock's
2976				 * so let's not block here.  Save it to
2977				 * put back on the anon_list.
2978				 */
2979
2980				/* Take off anon_list */
2981				list_del(&jfs_ip->anon_inode_list);
2982
2983				/* Put on anon_list2 */
2984				list_add(&jfs_ip->anon_inode_list,
2985					 &TxAnchor.anon_list2);
2986
2987				TXN_UNLOCK();
2988				iput(ip);
2989				TXN_LOCK();
2990			}
2991		}
2992		/* Add anon_list2 back to anon_list */
2993		list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list);
2994
2995		if (freezing(current)) {
2996			TXN_UNLOCK();
2997			try_to_freeze();
2998		} else {
2999			set_current_state(TASK_INTERRUPTIBLE);
3000			TXN_UNLOCK();
3001			schedule();
3002			__set_current_state(TASK_RUNNING);
3003		}
3004	} while (!kthread_should_stop());
3005
3006	jfs_info("jfs_sync being killed");
3007	return 0;
3008}
3009
3010#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG)
3011static int jfs_txanchor_proc_show(struct seq_file *m, void *v)
3012{
3013	char *freewait;
3014	char *freelockwait;
3015	char *lowlockwait;
3016
3017	freewait =
3018	    waitqueue_active(&TxAnchor.freewait) ? "active" : "empty";
3019	freelockwait =
3020	    waitqueue_active(&TxAnchor.freelockwait) ? "active" : "empty";
3021	lowlockwait =
3022	    waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty";
3023
3024	seq_printf(m,
3025		       "JFS TxAnchor\n"
3026		       "============\n"
3027		       "freetid = %d\n"
3028		       "freewait = %s\n"
3029		       "freelock = %d\n"
3030		       "freelockwait = %s\n"
3031		       "lowlockwait = %s\n"
3032		       "tlocksInUse = %d\n"
3033		       "jfs_tlocks_low = %d\n"
3034		       "unlock_queue is %sempty\n",
3035		       TxAnchor.freetid,
3036		       freewait,
3037		       TxAnchor.freelock,
3038		       freelockwait,
3039		       lowlockwait,
3040		       TxAnchor.tlocksInUse,
3041		       jfs_tlocks_low,
3042		       list_empty(&TxAnchor.unlock_queue) ? "" : "not ");
3043	return 0;
3044}
3045
3046static int jfs_txanchor_proc_open(struct inode *inode, struct file *file)
3047{
3048	return single_open(file, jfs_txanchor_proc_show, NULL);
3049}
3050
3051const struct file_operations jfs_txanchor_proc_fops = {
3052	.owner		= THIS_MODULE,
3053	.open		= jfs_txanchor_proc_open,
3054	.read		= seq_read,
3055	.llseek		= seq_lseek,
3056	.release	= single_release,
3057};
3058#endif
3059
3060#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS)
3061static int jfs_txstats_proc_show(struct seq_file *m, void *v)
3062{
3063	seq_printf(m,
3064		       "JFS TxStats\n"
3065		       "===========\n"
3066		       "calls to txBegin = %d\n"
3067		       "txBegin blocked by sync barrier = %d\n"
3068		       "txBegin blocked by tlocks low = %d\n"
3069		       "txBegin blocked by no free tid = %d\n"
3070		       "calls to txBeginAnon = %d\n"
3071		       "txBeginAnon blocked by sync barrier = %d\n"
3072		       "txBeginAnon blocked by tlocks low = %d\n"
3073		       "calls to txLockAlloc = %d\n"
3074		       "tLockAlloc blocked by no free lock = %d\n",
3075		       TxStat.txBegin,
3076		       TxStat.txBegin_barrier,
3077		       TxStat.txBegin_lockslow,
3078		       TxStat.txBegin_freetid,
3079		       TxStat.txBeginAnon,
3080		       TxStat.txBeginAnon_barrier,
3081		       TxStat.txBeginAnon_lockslow,
3082		       TxStat.txLockAlloc,
3083		       TxStat.txLockAlloc_freelock);
3084	return 0;
3085}
3086
3087static int jfs_txstats_proc_open(struct inode *inode, struct file *file)
3088{
3089	return single_open(file, jfs_txstats_proc_show, NULL);
3090}
3091
3092const struct file_operations jfs_txstats_proc_fops = {
3093	.owner		= THIS_MODULE,
3094	.open		= jfs_txstats_proc_open,
3095	.read		= seq_read,
3096	.llseek		= seq_lseek,
3097	.release	= single_release,
3098};
3099#endif