Linux Audio

Check our new training course

Loading...
v4.17
 
   1/*
   2 *   Copyright (C) International Business Machines Corp., 2000-2005
   3 *   Portions Copyright (C) Christoph Hellwig, 2001-2002
   4 *
   5 *   This program is free software;  you can redistribute it and/or modify
   6 *   it under the terms of the GNU General Public License as published by
   7 *   the Free Software Foundation; either version 2 of the License, or
   8 *   (at your option) any later version.
   9 *
  10 *   This program is distributed in the hope that it will be useful,
  11 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
  12 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  13 *   the GNU General Public License for more details.
  14 *
  15 *   You should have received a copy of the GNU General Public License
  16 *   along with this program;  if not, write to the Free Software
  17 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18 */
  19
  20/*
  21 *	jfs_txnmgr.c: transaction manager
  22 *
  23 * notes:
  24 * transaction starts with txBegin() and ends with txCommit()
  25 * or txAbort().
  26 *
  27 * tlock is acquired at the time of update;
  28 * (obviate scan at commit time for xtree and dtree)
  29 * tlock and mp points to each other;
  30 * (no hashlist for mp -> tlock).
  31 *
  32 * special cases:
  33 * tlock on in-memory inode:
  34 * in-place tlock in the in-memory inode itself;
  35 * converted to page lock by iWrite() at commit time.
  36 *
  37 * tlock during write()/mmap() under anonymous transaction (tid = 0):
  38 * transferred (?) to transaction at commit time.
  39 *
  40 * use the page itself to update allocation maps
  41 * (obviate intermediate replication of allocation/deallocation data)
  42 * hold on to mp+lock thru update of maps
  43 */
  44
  45#include <linux/fs.h>
  46#include <linux/vmalloc.h>
  47#include <linux/completion.h>
  48#include <linux/freezer.h>
  49#include <linux/module.h>
  50#include <linux/moduleparam.h>
  51#include <linux/kthread.h>
  52#include <linux/seq_file.h>
  53#include "jfs_incore.h"
  54#include "jfs_inode.h"
  55#include "jfs_filsys.h"
  56#include "jfs_metapage.h"
  57#include "jfs_dinode.h"
  58#include "jfs_imap.h"
  59#include "jfs_dmap.h"
  60#include "jfs_superblock.h"
  61#include "jfs_debug.h"
  62
  63/*
  64 *	transaction management structures
  65 */
  66static struct {
  67	int freetid;		/* index of a free tid structure */
  68	int freelock;		/* index first free lock word */
  69	wait_queue_head_t freewait;	/* eventlist of free tblock */
  70	wait_queue_head_t freelockwait;	/* eventlist of free tlock */
  71	wait_queue_head_t lowlockwait;	/* eventlist of ample tlocks */
  72	int tlocksInUse;	/* Number of tlocks in use */
  73	spinlock_t LazyLock;	/* synchronize sync_queue & unlock_queue */
  74/*	struct tblock *sync_queue; * Transactions waiting for data sync */
  75	struct list_head unlock_queue;	/* Txns waiting to be released */
  76	struct list_head anon_list;	/* inodes having anonymous txns */
  77	struct list_head anon_list2;	/* inodes having anonymous txns
  78					   that couldn't be sync'ed */
  79} TxAnchor;
  80
  81int jfs_tlocks_low;		/* Indicates low number of available tlocks */
  82
  83#ifdef CONFIG_JFS_STATISTICS
  84static struct {
  85	uint txBegin;
  86	uint txBegin_barrier;
  87	uint txBegin_lockslow;
  88	uint txBegin_freetid;
  89	uint txBeginAnon;
  90	uint txBeginAnon_barrier;
  91	uint txBeginAnon_lockslow;
  92	uint txLockAlloc;
  93	uint txLockAlloc_freelock;
  94} TxStat;
  95#endif
  96
  97static int nTxBlock = -1;	/* number of transaction blocks */
  98module_param(nTxBlock, int, 0);
  99MODULE_PARM_DESC(nTxBlock,
 100		 "Number of transaction blocks (max:65536)");
 101
 102static int nTxLock = -1;	/* number of transaction locks */
 103module_param(nTxLock, int, 0);
 104MODULE_PARM_DESC(nTxLock,
 105		 "Number of transaction locks (max:65536)");
 106
 107struct tblock *TxBlock;	/* transaction block table */
 108static int TxLockLWM;	/* Low water mark for number of txLocks used */
 109static int TxLockHWM;	/* High water mark for number of txLocks used */
 110static int TxLockVHWM;	/* Very High water mark */
 111struct tlock *TxLock;	/* transaction lock table */
 112
 113/*
 114 *	transaction management lock
 115 */
 116static DEFINE_SPINLOCK(jfsTxnLock);
 117
 118#define TXN_LOCK()		spin_lock(&jfsTxnLock)
 119#define TXN_UNLOCK()		spin_unlock(&jfsTxnLock)
 120
 121#define LAZY_LOCK_INIT()	spin_lock_init(&TxAnchor.LazyLock);
 122#define LAZY_LOCK(flags)	spin_lock_irqsave(&TxAnchor.LazyLock, flags)
 123#define LAZY_UNLOCK(flags) spin_unlock_irqrestore(&TxAnchor.LazyLock, flags)
 124
 125static DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait);
 126static int jfs_commit_thread_waking;
 127
 128/*
 129 * Retry logic exist outside these macros to protect from spurrious wakeups.
 130 */
 131static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event)
 132{
 133	DECLARE_WAITQUEUE(wait, current);
 134
 135	add_wait_queue(event, &wait);
 136	set_current_state(TASK_UNINTERRUPTIBLE);
 137	TXN_UNLOCK();
 138	io_schedule();
 139	remove_wait_queue(event, &wait);
 140}
 141
 142#define TXN_SLEEP(event)\
 143{\
 144	TXN_SLEEP_DROP_LOCK(event);\
 145	TXN_LOCK();\
 146}
 147
 148#define TXN_WAKEUP(event) wake_up_all(event)
 149
 150/*
 151 *	statistics
 152 */
 153static struct {
 154	tid_t maxtid;		/* 4: biggest tid ever used */
 155	lid_t maxlid;		/* 4: biggest lid ever used */
 156	int ntid;		/* 4: # of transactions performed */
 157	int nlid;		/* 4: # of tlocks acquired */
 158	int waitlock;		/* 4: # of tlock wait */
 159} stattx;
 160
 161/*
 162 * forward references
 163 */
 164static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 165		struct tlock * tlck, struct commit * cd);
 166static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 167		struct tlock * tlck);
 168static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 169		struct tlock * tlck);
 170static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 171		struct tlock * tlck);
 172static void txAllocPMap(struct inode *ip, struct maplock * maplock,
 173		struct tblock * tblk);
 174static void txForce(struct tblock * tblk);
 175static int txLog(struct jfs_log * log, struct tblock * tblk,
 176		struct commit * cd);
 177static void txUpdateMap(struct tblock * tblk);
 178static void txRelease(struct tblock * tblk);
 179static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 180	   struct tlock * tlck);
 181static void LogSyncRelease(struct metapage * mp);
 182
 183/*
 184 *		transaction block/lock management
 185 *		---------------------------------
 186 */
 187
 188/*
 189 * Get a transaction lock from the free list.  If the number in use is
 190 * greater than the high water mark, wake up the sync daemon.  This should
 191 * free some anonymous transaction locks.  (TXN_LOCK must be held.)
 192 */
 193static lid_t txLockAlloc(void)
 194{
 195	lid_t lid;
 196
 197	INCREMENT(TxStat.txLockAlloc);
 198	if (!TxAnchor.freelock) {
 199		INCREMENT(TxStat.txLockAlloc_freelock);
 200	}
 201
 202	while (!(lid = TxAnchor.freelock))
 203		TXN_SLEEP(&TxAnchor.freelockwait);
 204	TxAnchor.freelock = TxLock[lid].next;
 205	HIGHWATERMARK(stattx.maxlid, lid);
 206	if ((++TxAnchor.tlocksInUse > TxLockHWM) && (jfs_tlocks_low == 0)) {
 207		jfs_info("txLockAlloc tlocks low");
 208		jfs_tlocks_low = 1;
 209		wake_up_process(jfsSyncThread);
 210	}
 211
 212	return lid;
 213}
 214
 215static void txLockFree(lid_t lid)
 216{
 217	TxLock[lid].tid = 0;
 218	TxLock[lid].next = TxAnchor.freelock;
 219	TxAnchor.freelock = lid;
 220	TxAnchor.tlocksInUse--;
 221	if (jfs_tlocks_low && (TxAnchor.tlocksInUse < TxLockLWM)) {
 222		jfs_info("txLockFree jfs_tlocks_low no more");
 223		jfs_tlocks_low = 0;
 224		TXN_WAKEUP(&TxAnchor.lowlockwait);
 225	}
 226	TXN_WAKEUP(&TxAnchor.freelockwait);
 227}
 228
 229/*
 230 * NAME:	txInit()
 231 *
 232 * FUNCTION:	initialize transaction management structures
 233 *
 234 * RETURN:
 235 *
 236 * serialization: single thread at jfs_init()
 237 */
 238int txInit(void)
 239{
 240	int k, size;
 241	struct sysinfo si;
 242
 243	/* Set defaults for nTxLock and nTxBlock if unset */
 244
 245	if (nTxLock == -1) {
 246		if (nTxBlock == -1) {
 247			/* Base default on memory size */
 248			si_meminfo(&si);
 249			if (si.totalram > (256 * 1024)) /* 1 GB */
 250				nTxLock = 64 * 1024;
 251			else
 252				nTxLock = si.totalram >> 2;
 253		} else if (nTxBlock > (8 * 1024))
 254			nTxLock = 64 * 1024;
 255		else
 256			nTxLock = nTxBlock << 3;
 257	}
 258	if (nTxBlock == -1)
 259		nTxBlock = nTxLock >> 3;
 260
 261	/* Verify tunable parameters */
 262	if (nTxBlock < 16)
 263		nTxBlock = 16;	/* No one should set it this low */
 264	if (nTxBlock > 65536)
 265		nTxBlock = 65536;
 266	if (nTxLock < 256)
 267		nTxLock = 256;	/* No one should set it this low */
 268	if (nTxLock > 65536)
 269		nTxLock = 65536;
 270
 271	printk(KERN_INFO "JFS: nTxBlock = %d, nTxLock = %d\n",
 272	       nTxBlock, nTxLock);
 273	/*
 274	 * initialize transaction block (tblock) table
 275	 *
 276	 * transaction id (tid) = tblock index
 277	 * tid = 0 is reserved.
 278	 */
 279	TxLockLWM = (nTxLock * 4) / 10;
 280	TxLockHWM = (nTxLock * 7) / 10;
 281	TxLockVHWM = (nTxLock * 8) / 10;
 282
 283	size = sizeof(struct tblock) * nTxBlock;
 284	TxBlock = vmalloc(size);
 285	if (TxBlock == NULL)
 286		return -ENOMEM;
 287
 288	for (k = 1; k < nTxBlock - 1; k++) {
 289		TxBlock[k].next = k + 1;
 290		init_waitqueue_head(&TxBlock[k].gcwait);
 291		init_waitqueue_head(&TxBlock[k].waitor);
 292	}
 293	TxBlock[k].next = 0;
 294	init_waitqueue_head(&TxBlock[k].gcwait);
 295	init_waitqueue_head(&TxBlock[k].waitor);
 296
 297	TxAnchor.freetid = 1;
 298	init_waitqueue_head(&TxAnchor.freewait);
 299
 300	stattx.maxtid = 1;	/* statistics */
 301
 302	/*
 303	 * initialize transaction lock (tlock) table
 304	 *
 305	 * transaction lock id = tlock index
 306	 * tlock id = 0 is reserved.
 307	 */
 308	size = sizeof(struct tlock) * nTxLock;
 309	TxLock = vmalloc(size);
 310	if (TxLock == NULL) {
 311		vfree(TxBlock);
 312		return -ENOMEM;
 313	}
 314
 315	/* initialize tlock table */
 316	for (k = 1; k < nTxLock - 1; k++)
 317		TxLock[k].next = k + 1;
 318	TxLock[k].next = 0;
 319	init_waitqueue_head(&TxAnchor.freelockwait);
 320	init_waitqueue_head(&TxAnchor.lowlockwait);
 321
 322	TxAnchor.freelock = 1;
 323	TxAnchor.tlocksInUse = 0;
 324	INIT_LIST_HEAD(&TxAnchor.anon_list);
 325	INIT_LIST_HEAD(&TxAnchor.anon_list2);
 326
 327	LAZY_LOCK_INIT();
 328	INIT_LIST_HEAD(&TxAnchor.unlock_queue);
 329
 330	stattx.maxlid = 1;	/* statistics */
 331
 332	return 0;
 333}
 334
 335/*
 336 * NAME:	txExit()
 337 *
 338 * FUNCTION:	clean up when module is unloaded
 339 */
 340void txExit(void)
 341{
 342	vfree(TxLock);
 343	TxLock = NULL;
 344	vfree(TxBlock);
 345	TxBlock = NULL;
 346}
 347
 348/*
 349 * NAME:	txBegin()
 350 *
 351 * FUNCTION:	start a transaction.
 352 *
 353 * PARAMETER:	sb	- superblock
 354 *		flag	- force for nested tx;
 355 *
 356 * RETURN:	tid	- transaction id
 357 *
 358 * note: flag force allows to start tx for nested tx
 359 * to prevent deadlock on logsync barrier;
 360 */
 361tid_t txBegin(struct super_block *sb, int flag)
 362{
 363	tid_t t;
 364	struct tblock *tblk;
 365	struct jfs_log *log;
 366
 367	jfs_info("txBegin: flag = 0x%x", flag);
 368	log = JFS_SBI(sb)->log;
 369
 
 
 
 
 
 370	TXN_LOCK();
 371
 372	INCREMENT(TxStat.txBegin);
 373
 374      retry:
 375	if (!(flag & COMMIT_FORCE)) {
 376		/*
 377		 * synchronize with logsync barrier
 378		 */
 379		if (test_bit(log_SYNCBARRIER, &log->flag) ||
 380		    test_bit(log_QUIESCE, &log->flag)) {
 381			INCREMENT(TxStat.txBegin_barrier);
 382			TXN_SLEEP(&log->syncwait);
 383			goto retry;
 384		}
 385	}
 386	if (flag == 0) {
 387		/*
 388		 * Don't begin transaction if we're getting starved for tlocks
 389		 * unless COMMIT_FORCE or COMMIT_INODE (which may ultimately
 390		 * free tlocks)
 391		 */
 392		if (TxAnchor.tlocksInUse > TxLockVHWM) {
 393			INCREMENT(TxStat.txBegin_lockslow);
 394			TXN_SLEEP(&TxAnchor.lowlockwait);
 395			goto retry;
 396		}
 397	}
 398
 399	/*
 400	 * allocate transaction id/block
 401	 */
 402	if ((t = TxAnchor.freetid) == 0) {
 403		jfs_info("txBegin: waiting for free tid");
 404		INCREMENT(TxStat.txBegin_freetid);
 405		TXN_SLEEP(&TxAnchor.freewait);
 406		goto retry;
 407	}
 408
 409	tblk = tid_to_tblock(t);
 410
 411	if ((tblk->next == 0) && !(flag & COMMIT_FORCE)) {
 412		/* Don't let a non-forced transaction take the last tblk */
 413		jfs_info("txBegin: waiting for free tid");
 414		INCREMENT(TxStat.txBegin_freetid);
 415		TXN_SLEEP(&TxAnchor.freewait);
 416		goto retry;
 417	}
 418
 419	TxAnchor.freetid = tblk->next;
 420
 421	/*
 422	 * initialize transaction
 423	 */
 424
 425	/*
 426	 * We can't zero the whole thing or we screw up another thread being
 427	 * awakened after sleeping on tblk->waitor
 428	 *
 429	 * memset(tblk, 0, sizeof(struct tblock));
 430	 */
 431	tblk->next = tblk->last = tblk->xflag = tblk->flag = tblk->lsn = 0;
 432
 433	tblk->sb = sb;
 434	++log->logtid;
 435	tblk->logtid = log->logtid;
 436
 437	++log->active;
 438
 439	HIGHWATERMARK(stattx.maxtid, t);	/* statistics */
 440	INCREMENT(stattx.ntid);	/* statistics */
 441
 442	TXN_UNLOCK();
 443
 444	jfs_info("txBegin: returning tid = %d", t);
 445
 446	return t;
 447}
 448
 449/*
 450 * NAME:	txBeginAnon()
 451 *
 452 * FUNCTION:	start an anonymous transaction.
 453 *		Blocks if logsync or available tlocks are low to prevent
 454 *		anonymous tlocks from depleting supply.
 455 *
 456 * PARAMETER:	sb	- superblock
 457 *
 458 * RETURN:	none
 459 */
 460void txBeginAnon(struct super_block *sb)
 461{
 462	struct jfs_log *log;
 463
 464	log = JFS_SBI(sb)->log;
 465
 466	TXN_LOCK();
 467	INCREMENT(TxStat.txBeginAnon);
 468
 469      retry:
 470	/*
 471	 * synchronize with logsync barrier
 472	 */
 473	if (test_bit(log_SYNCBARRIER, &log->flag) ||
 474	    test_bit(log_QUIESCE, &log->flag)) {
 475		INCREMENT(TxStat.txBeginAnon_barrier);
 476		TXN_SLEEP(&log->syncwait);
 477		goto retry;
 478	}
 479
 480	/*
 481	 * Don't begin transaction if we're getting starved for tlocks
 482	 */
 483	if (TxAnchor.tlocksInUse > TxLockVHWM) {
 484		INCREMENT(TxStat.txBeginAnon_lockslow);
 485		TXN_SLEEP(&TxAnchor.lowlockwait);
 486		goto retry;
 487	}
 488	TXN_UNLOCK();
 489}
 490
 491/*
 492 *	txEnd()
 493 *
 494 * function: free specified transaction block.
 495 *
 496 *	logsync barrier processing:
 497 *
 498 * serialization:
 499 */
 500void txEnd(tid_t tid)
 501{
 502	struct tblock *tblk = tid_to_tblock(tid);
 503	struct jfs_log *log;
 504
 505	jfs_info("txEnd: tid = %d", tid);
 506	TXN_LOCK();
 507
 508	/*
 509	 * wakeup transactions waiting on the page locked
 510	 * by the current transaction
 511	 */
 512	TXN_WAKEUP(&tblk->waitor);
 513
 514	log = JFS_SBI(tblk->sb)->log;
 515
 516	/*
 517	 * Lazy commit thread can't free this guy until we mark it UNLOCKED,
 518	 * otherwise, we would be left with a transaction that may have been
 519	 * reused.
 520	 *
 521	 * Lazy commit thread will turn off tblkGC_LAZY before calling this
 522	 * routine.
 523	 */
 524	if (tblk->flag & tblkGC_LAZY) {
 525		jfs_info("txEnd called w/lazy tid: %d, tblk = 0x%p", tid, tblk);
 526		TXN_UNLOCK();
 527
 528		spin_lock_irq(&log->gclock);	// LOGGC_LOCK
 529		tblk->flag |= tblkGC_UNLOCKED;
 530		spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
 531		return;
 532	}
 533
 534	jfs_info("txEnd: tid: %d, tblk = 0x%p", tid, tblk);
 535
 536	assert(tblk->next == 0);
 537
 538	/*
 539	 * insert tblock back on freelist
 540	 */
 541	tblk->next = TxAnchor.freetid;
 542	TxAnchor.freetid = tid;
 543
 544	/*
 545	 * mark the tblock not active
 546	 */
 547	if (--log->active == 0) {
 548		clear_bit(log_FLUSH, &log->flag);
 549
 550		/*
 551		 * synchronize with logsync barrier
 552		 */
 553		if (test_bit(log_SYNCBARRIER, &log->flag)) {
 554			TXN_UNLOCK();
 555
 556			/* write dirty metadata & forward log syncpt */
 557			jfs_syncpt(log, 1);
 558
 559			jfs_info("log barrier off: 0x%x", log->lsn);
 560
 561			/* enable new transactions start */
 562			clear_bit(log_SYNCBARRIER, &log->flag);
 563
 564			/* wakeup all waitors for logsync barrier */
 565			TXN_WAKEUP(&log->syncwait);
 566
 567			goto wakeup;
 568		}
 569	}
 570
 571	TXN_UNLOCK();
 572wakeup:
 573	/*
 574	 * wakeup all waitors for a free tblock
 575	 */
 576	TXN_WAKEUP(&TxAnchor.freewait);
 577}
 578
 579/*
 580 *	txLock()
 581 *
 582 * function: acquire a transaction lock on the specified <mp>
 583 *
 584 * parameter:
 585 *
 586 * return:	transaction lock id
 587 *
 588 * serialization:
 589 */
 590struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
 591		     int type)
 592{
 593	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 594	int dir_xtree = 0;
 595	lid_t lid;
 596	tid_t xtid;
 597	struct tlock *tlck;
 598	struct xtlock *xtlck;
 599	struct linelock *linelock;
 600	xtpage_t *p;
 601	struct tblock *tblk;
 602
 603	TXN_LOCK();
 604
 605	if (S_ISDIR(ip->i_mode) && (type & tlckXTREE) &&
 606	    !(mp->xflag & COMMIT_PAGE)) {
 607		/*
 608		 * Directory inode is special.  It can have both an xtree tlock
 609		 * and a dtree tlock associated with it.
 610		 */
 611		dir_xtree = 1;
 612		lid = jfs_ip->xtlid;
 613	} else
 614		lid = mp->lid;
 615
 616	/* is page not locked by a transaction ? */
 617	if (lid == 0)
 618		goto allocateLock;
 619
 620	jfs_info("txLock: tid:%d ip:0x%p mp:0x%p lid:%d", tid, ip, mp, lid);
 621
 622	/* is page locked by the requester transaction ? */
 623	tlck = lid_to_tlock(lid);
 624	if ((xtid = tlck->tid) == tid) {
 625		TXN_UNLOCK();
 626		goto grantLock;
 627	}
 628
 629	/*
 630	 * is page locked by anonymous transaction/lock ?
 631	 *
 632	 * (page update without transaction (i.e., file write) is
 633	 * locked under anonymous transaction tid = 0:
 634	 * anonymous tlocks maintained on anonymous tlock list of
 635	 * the inode of the page and available to all anonymous
 636	 * transactions until txCommit() time at which point
 637	 * they are transferred to the transaction tlock list of
 638	 * the committing transaction of the inode)
 639	 */
 640	if (xtid == 0) {
 641		tlck->tid = tid;
 642		TXN_UNLOCK();
 643		tblk = tid_to_tblock(tid);
 644		/*
 645		 * The order of the tlocks in the transaction is important
 646		 * (during truncate, child xtree pages must be freed before
 647		 * parent's tlocks change the working map).
 648		 * Take tlock off anonymous list and add to tail of
 649		 * transaction list
 650		 *
 651		 * Note:  We really need to get rid of the tid & lid and
 652		 * use list_head's.  This code is getting UGLY!
 653		 */
 654		if (jfs_ip->atlhead == lid) {
 655			if (jfs_ip->atltail == lid) {
 656				/* only anonymous txn.
 657				 * Remove from anon_list
 658				 */
 659				TXN_LOCK();
 660				list_del_init(&jfs_ip->anon_inode_list);
 661				TXN_UNLOCK();
 662			}
 663			jfs_ip->atlhead = tlck->next;
 664		} else {
 665			lid_t last;
 666			for (last = jfs_ip->atlhead;
 667			     lid_to_tlock(last)->next != lid;
 668			     last = lid_to_tlock(last)->next) {
 669				assert(last);
 670			}
 671			lid_to_tlock(last)->next = tlck->next;
 672			if (jfs_ip->atltail == lid)
 673				jfs_ip->atltail = last;
 674		}
 675
 676		/* insert the tlock at tail of transaction tlock list */
 677
 678		if (tblk->next)
 679			lid_to_tlock(tblk->last)->next = lid;
 680		else
 681			tblk->next = lid;
 682		tlck->next = 0;
 683		tblk->last = lid;
 684
 685		goto grantLock;
 686	}
 687
 688	goto waitLock;
 689
 690	/*
 691	 * allocate a tlock
 692	 */
 693      allocateLock:
 694	lid = txLockAlloc();
 695	tlck = lid_to_tlock(lid);
 696
 697	/*
 698	 * initialize tlock
 699	 */
 700	tlck->tid = tid;
 701
 702	TXN_UNLOCK();
 703
 704	/* mark tlock for meta-data page */
 705	if (mp->xflag & COMMIT_PAGE) {
 706
 707		tlck->flag = tlckPAGELOCK;
 708
 709		/* mark the page dirty and nohomeok */
 710		metapage_nohomeok(mp);
 711
 712		jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p",
 713			 mp, mp->nohomeok, tid, tlck);
 714
 715		/* if anonymous transaction, and buffer is on the group
 716		 * commit synclist, mark inode to show this.  This will
 717		 * prevent the buffer from being marked nohomeok for too
 718		 * long a time.
 719		 */
 720		if ((tid == 0) && mp->lsn)
 721			set_cflag(COMMIT_Synclist, ip);
 722	}
 723	/* mark tlock for in-memory inode */
 724	else
 725		tlck->flag = tlckINODELOCK;
 726
 727	if (S_ISDIR(ip->i_mode))
 728		tlck->flag |= tlckDIRECTORY;
 729
 730	tlck->type = 0;
 731
 732	/* bind the tlock and the page */
 733	tlck->ip = ip;
 734	tlck->mp = mp;
 735	if (dir_xtree)
 736		jfs_ip->xtlid = lid;
 737	else
 738		mp->lid = lid;
 739
 740	/*
 741	 * enqueue transaction lock to transaction/inode
 742	 */
 743	/* insert the tlock at tail of transaction tlock list */
 744	if (tid) {
 745		tblk = tid_to_tblock(tid);
 746		if (tblk->next)
 747			lid_to_tlock(tblk->last)->next = lid;
 748		else
 749			tblk->next = lid;
 750		tlck->next = 0;
 751		tblk->last = lid;
 752	}
 753	/* anonymous transaction:
 754	 * insert the tlock at head of inode anonymous tlock list
 755	 */
 756	else {
 757		tlck->next = jfs_ip->atlhead;
 758		jfs_ip->atlhead = lid;
 759		if (tlck->next == 0) {
 760			/* This inode's first anonymous transaction */
 761			jfs_ip->atltail = lid;
 762			TXN_LOCK();
 763			list_add_tail(&jfs_ip->anon_inode_list,
 764				      &TxAnchor.anon_list);
 765			TXN_UNLOCK();
 766		}
 767	}
 768
 769	/* initialize type dependent area for linelock */
 770	linelock = (struct linelock *) & tlck->lock;
 771	linelock->next = 0;
 772	linelock->flag = tlckLINELOCK;
 773	linelock->maxcnt = TLOCKSHORT;
 774	linelock->index = 0;
 775
 776	switch (type & tlckTYPE) {
 777	case tlckDTREE:
 778		linelock->l2linesize = L2DTSLOTSIZE;
 779		break;
 780
 781	case tlckXTREE:
 782		linelock->l2linesize = L2XTSLOTSIZE;
 783
 784		xtlck = (struct xtlock *) linelock;
 785		xtlck->header.offset = 0;
 786		xtlck->header.length = 2;
 787
 788		if (type & tlckNEW) {
 789			xtlck->lwm.offset = XTENTRYSTART;
 790		} else {
 791			if (mp->xflag & COMMIT_PAGE)
 792				p = (xtpage_t *) mp->data;
 793			else
 794				p = &jfs_ip->i_xtroot;
 795			xtlck->lwm.offset =
 796			    le16_to_cpu(p->header.nextindex);
 797		}
 798		xtlck->lwm.length = 0;	/* ! */
 799		xtlck->twm.offset = 0;
 800		xtlck->hwm.offset = 0;
 801
 802		xtlck->index = 2;
 803		break;
 804
 805	case tlckINODE:
 806		linelock->l2linesize = L2INODESLOTSIZE;
 807		break;
 808
 809	case tlckDATA:
 810		linelock->l2linesize = L2DATASLOTSIZE;
 811		break;
 812
 813	default:
 814		jfs_err("UFO tlock:0x%p", tlck);
 815	}
 816
 817	/*
 818	 * update tlock vector
 819	 */
 820      grantLock:
 821	tlck->type |= type;
 822
 823	return tlck;
 824
 825	/*
 826	 * page is being locked by another transaction:
 827	 */
 828      waitLock:
 829	/* Only locks on ipimap or ipaimap should reach here */
 830	/* assert(jfs_ip->fileset == AGGREGATE_I); */
 831	if (jfs_ip->fileset != AGGREGATE_I) {
 832		printk(KERN_ERR "txLock: trying to lock locked page!");
 833		print_hex_dump(KERN_ERR, "ip: ", DUMP_PREFIX_ADDRESS, 16, 4,
 834			       ip, sizeof(*ip), 0);
 835		print_hex_dump(KERN_ERR, "mp: ", DUMP_PREFIX_ADDRESS, 16, 4,
 836			       mp, sizeof(*mp), 0);
 837		print_hex_dump(KERN_ERR, "Locker's tblock: ",
 838			       DUMP_PREFIX_ADDRESS, 16, 4, tid_to_tblock(tid),
 839			       sizeof(struct tblock), 0);
 840		print_hex_dump(KERN_ERR, "Tlock: ", DUMP_PREFIX_ADDRESS, 16, 4,
 841			       tlck, sizeof(*tlck), 0);
 842		BUG();
 843	}
 844	INCREMENT(stattx.waitlock);	/* statistics */
 845	TXN_UNLOCK();
 846	release_metapage(mp);
 847	TXN_LOCK();
 848	xtid = tlck->tid;	/* reacquire after dropping TXN_LOCK */
 849
 850	jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d",
 851		 tid, xtid, lid);
 852
 853	/* Recheck everything since dropping TXN_LOCK */
 854	if (xtid && (tlck->mp == mp) && (mp->lid == lid))
 855		TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor);
 856	else
 857		TXN_UNLOCK();
 858	jfs_info("txLock: awakened     tid = %d, lid = %d", tid, lid);
 859
 860	return NULL;
 861}
 862
 863/*
 864 * NAME:	txRelease()
 865 *
 866 * FUNCTION:	Release buffers associated with transaction locks, but don't
 867 *		mark homeok yet.  The allows other transactions to modify
 868 *		buffers, but won't let them go to disk until commit record
 869 *		actually gets written.
 870 *
 871 * PARAMETER:
 872 *		tblk	-
 873 *
 874 * RETURN:	Errors from subroutines.
 875 */
 876static void txRelease(struct tblock * tblk)
 877{
 878	struct metapage *mp;
 879	lid_t lid;
 880	struct tlock *tlck;
 881
 882	TXN_LOCK();
 883
 884	for (lid = tblk->next; lid; lid = tlck->next) {
 885		tlck = lid_to_tlock(lid);
 886		if ((mp = tlck->mp) != NULL &&
 887		    (tlck->type & tlckBTROOT) == 0) {
 888			assert(mp->xflag & COMMIT_PAGE);
 889			mp->lid = 0;
 890		}
 891	}
 892
 893	/*
 894	 * wakeup transactions waiting on a page locked
 895	 * by the current transaction
 896	 */
 897	TXN_WAKEUP(&tblk->waitor);
 898
 899	TXN_UNLOCK();
 900}
 901
 902/*
 903 * NAME:	txUnlock()
 904 *
 905 * FUNCTION:	Initiates pageout of pages modified by tid in journalled
 906 *		objects and frees their lockwords.
 907 */
 908static void txUnlock(struct tblock * tblk)
 909{
 910	struct tlock *tlck;
 911	struct linelock *linelock;
 912	lid_t lid, next, llid, k;
 913	struct metapage *mp;
 914	struct jfs_log *log;
 915	int difft, diffp;
 916	unsigned long flags;
 917
 918	jfs_info("txUnlock: tblk = 0x%p", tblk);
 919	log = JFS_SBI(tblk->sb)->log;
 920
 921	/*
 922	 * mark page under tlock homeok (its log has been written):
 923	 */
 924	for (lid = tblk->next; lid; lid = next) {
 925		tlck = lid_to_tlock(lid);
 926		next = tlck->next;
 927
 928		jfs_info("unlocking lid = %d, tlck = 0x%p", lid, tlck);
 929
 930		/* unbind page from tlock */
 931		if ((mp = tlck->mp) != NULL &&
 932		    (tlck->type & tlckBTROOT) == 0) {
 933			assert(mp->xflag & COMMIT_PAGE);
 934
 935			/* hold buffer
 936			 */
 937			hold_metapage(mp);
 938
 939			assert(mp->nohomeok > 0);
 940			_metapage_homeok(mp);
 941
 942			/* inherit younger/larger clsn */
 943			LOGSYNC_LOCK(log, flags);
 944			if (mp->clsn) {
 945				logdiff(difft, tblk->clsn, log);
 946				logdiff(diffp, mp->clsn, log);
 947				if (difft > diffp)
 948					mp->clsn = tblk->clsn;
 949			} else
 950				mp->clsn = tblk->clsn;
 951			LOGSYNC_UNLOCK(log, flags);
 952
 953			assert(!(tlck->flag & tlckFREEPAGE));
 954
 955			put_metapage(mp);
 956		}
 957
 958		/* insert tlock, and linelock(s) of the tlock if any,
 959		 * at head of freelist
 960		 */
 961		TXN_LOCK();
 962
 963		llid = ((struct linelock *) & tlck->lock)->next;
 964		while (llid) {
 965			linelock = (struct linelock *) lid_to_tlock(llid);
 966			k = linelock->next;
 967			txLockFree(llid);
 968			llid = k;
 969		}
 970		txLockFree(lid);
 971
 972		TXN_UNLOCK();
 973	}
 974	tblk->next = tblk->last = 0;
 975
 976	/*
 977	 * remove tblock from logsynclist
 978	 * (allocation map pages inherited lsn of tblk and
 979	 * has been inserted in logsync list at txUpdateMap())
 980	 */
 981	if (tblk->lsn) {
 982		LOGSYNC_LOCK(log, flags);
 983		log->count--;
 984		list_del(&tblk->synclist);
 985		LOGSYNC_UNLOCK(log, flags);
 986	}
 987}
 988
 989/*
 990 *	txMaplock()
 991 *
 992 * function: allocate a transaction lock for freed page/entry;
 993 *	for freed page, maplock is used as xtlock/dtlock type;
 994 */
 995struct tlock *txMaplock(tid_t tid, struct inode *ip, int type)
 996{
 997	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 998	lid_t lid;
 999	struct tblock *tblk;
1000	struct tlock *tlck;
1001	struct maplock *maplock;
1002
1003	TXN_LOCK();
1004
1005	/*
1006	 * allocate a tlock
1007	 */
1008	lid = txLockAlloc();
1009	tlck = lid_to_tlock(lid);
1010
1011	/*
1012	 * initialize tlock
1013	 */
1014	tlck->tid = tid;
1015
1016	/* bind the tlock and the object */
1017	tlck->flag = tlckINODELOCK;
1018	if (S_ISDIR(ip->i_mode))
1019		tlck->flag |= tlckDIRECTORY;
1020	tlck->ip = ip;
1021	tlck->mp = NULL;
1022
1023	tlck->type = type;
1024
1025	/*
1026	 * enqueue transaction lock to transaction/inode
1027	 */
1028	/* insert the tlock at tail of transaction tlock list */
1029	if (tid) {
1030		tblk = tid_to_tblock(tid);
1031		if (tblk->next)
1032			lid_to_tlock(tblk->last)->next = lid;
1033		else
1034			tblk->next = lid;
1035		tlck->next = 0;
1036		tblk->last = lid;
1037	}
1038	/* anonymous transaction:
1039	 * insert the tlock at head of inode anonymous tlock list
1040	 */
1041	else {
1042		tlck->next = jfs_ip->atlhead;
1043		jfs_ip->atlhead = lid;
1044		if (tlck->next == 0) {
1045			/* This inode's first anonymous transaction */
1046			jfs_ip->atltail = lid;
1047			list_add_tail(&jfs_ip->anon_inode_list,
1048				      &TxAnchor.anon_list);
1049		}
1050	}
1051
1052	TXN_UNLOCK();
1053
1054	/* initialize type dependent area for maplock */
1055	maplock = (struct maplock *) & tlck->lock;
1056	maplock->next = 0;
1057	maplock->maxcnt = 0;
1058	maplock->index = 0;
1059
1060	return tlck;
1061}
1062
1063/*
1064 *	txLinelock()
1065 *
1066 * function: allocate a transaction lock for log vector list
1067 */
1068struct linelock *txLinelock(struct linelock * tlock)
1069{
1070	lid_t lid;
1071	struct tlock *tlck;
1072	struct linelock *linelock;
1073
1074	TXN_LOCK();
1075
1076	/* allocate a TxLock structure */
1077	lid = txLockAlloc();
1078	tlck = lid_to_tlock(lid);
1079
1080	TXN_UNLOCK();
1081
1082	/* initialize linelock */
1083	linelock = (struct linelock *) tlck;
1084	linelock->next = 0;
1085	linelock->flag = tlckLINELOCK;
1086	linelock->maxcnt = TLOCKLONG;
1087	linelock->index = 0;
1088	if (tlck->flag & tlckDIRECTORY)
1089		linelock->flag |= tlckDIRECTORY;
1090
1091	/* append linelock after tlock */
1092	linelock->next = tlock->next;
1093	tlock->next = lid;
1094
1095	return linelock;
1096}
1097
1098/*
1099 *		transaction commit management
1100 *		-----------------------------
1101 */
1102
1103/*
1104 * NAME:	txCommit()
1105 *
1106 * FUNCTION:	commit the changes to the objects specified in
1107 *		clist.  For journalled segments only the
1108 *		changes of the caller are committed, ie by tid.
1109 *		for non-journalled segments the data are flushed to
1110 *		disk and then the change to the disk inode and indirect
1111 *		blocks committed (so blocks newly allocated to the
1112 *		segment will be made a part of the segment atomically).
1113 *
1114 *		all of the segments specified in clist must be in
1115 *		one file system. no more than 6 segments are needed
1116 *		to handle all unix svcs.
1117 *
1118 *		if the i_nlink field (i.e. disk inode link count)
1119 *		is zero, and the type of inode is a regular file or
1120 *		directory, or symbolic link , the inode is truncated
1121 *		to zero length. the truncation is committed but the
1122 *		VM resources are unaffected until it is closed (see
1123 *		iput and iclose).
1124 *
1125 * PARAMETER:
1126 *
1127 * RETURN:
1128 *
1129 * serialization:
1130 *		on entry the inode lock on each segment is assumed
1131 *		to be held.
1132 *
1133 * i/o error:
1134 */
1135int txCommit(tid_t tid,		/* transaction identifier */
1136	     int nip,		/* number of inodes to commit */
1137	     struct inode **iplist,	/* list of inode to commit */
1138	     int flag)
1139{
1140	int rc = 0;
1141	struct commit cd;
1142	struct jfs_log *log;
1143	struct tblock *tblk;
1144	struct lrd *lrd;
1145	struct inode *ip;
1146	struct jfs_inode_info *jfs_ip;
1147	int k, n;
1148	ino_t top;
1149	struct super_block *sb;
1150
1151	jfs_info("txCommit, tid = %d, flag = %d", tid, flag);
1152	/* is read-only file system ? */
1153	if (isReadOnly(iplist[0])) {
1154		rc = -EROFS;
1155		goto TheEnd;
1156	}
1157
1158	sb = cd.sb = iplist[0]->i_sb;
1159	cd.tid = tid;
1160
1161	if (tid == 0)
1162		tid = txBegin(sb, 0);
1163	tblk = tid_to_tblock(tid);
1164
1165	/*
1166	 * initialize commit structure
1167	 */
1168	log = JFS_SBI(sb)->log;
1169	cd.log = log;
1170
1171	/* initialize log record descriptor in commit */
1172	lrd = &cd.lrd;
1173	lrd->logtid = cpu_to_le32(tblk->logtid);
1174	lrd->backchain = 0;
1175
1176	tblk->xflag |= flag;
1177
1178	if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0)
1179		tblk->xflag |= COMMIT_LAZY;
1180	/*
1181	 *	prepare non-journaled objects for commit
1182	 *
1183	 * flush data pages of non-journaled file
1184	 * to prevent the file getting non-initialized disk blocks
1185	 * in case of crash.
1186	 * (new blocks - )
1187	 */
1188	cd.iplist = iplist;
1189	cd.nip = nip;
1190
1191	/*
1192	 *	acquire transaction lock on (on-disk) inodes
1193	 *
1194	 * update on-disk inode from in-memory inode
1195	 * acquiring transaction locks for AFTER records
1196	 * on the on-disk inode of file object
1197	 *
1198	 * sort the inodes array by inode number in descending order
1199	 * to prevent deadlock when acquiring transaction lock
1200	 * of on-disk inodes on multiple on-disk inode pages by
1201	 * multiple concurrent transactions
1202	 */
1203	for (k = 0; k < cd.nip; k++) {
1204		top = (cd.iplist[k])->i_ino;
1205		for (n = k + 1; n < cd.nip; n++) {
1206			ip = cd.iplist[n];
1207			if (ip->i_ino > top) {
1208				top = ip->i_ino;
1209				cd.iplist[n] = cd.iplist[k];
1210				cd.iplist[k] = ip;
1211			}
1212		}
1213
1214		ip = cd.iplist[k];
1215		jfs_ip = JFS_IP(ip);
1216
1217		/*
1218		 * BUGBUG - This code has temporarily been removed.  The
1219		 * intent is to ensure that any file data is written before
1220		 * the metadata is committed to the journal.  This prevents
1221		 * uninitialized data from appearing in a file after the
1222		 * journal has been replayed.  (The uninitialized data
1223		 * could be sensitive data removed by another user.)
1224		 *
1225		 * The problem now is that we are holding the IWRITELOCK
1226		 * on the inode, and calling filemap_fdatawrite on an
1227		 * unmapped page will cause a deadlock in jfs_get_block.
1228		 *
1229		 * The long term solution is to pare down the use of
1230		 * IWRITELOCK.  We are currently holding it too long.
1231		 * We could also be smarter about which data pages need
1232		 * to be written before the transaction is committed and
1233		 * when we don't need to worry about it at all.
1234		 *
1235		 * if ((!S_ISDIR(ip->i_mode))
1236		 *    && (tblk->flag & COMMIT_DELETE) == 0)
1237		 *	filemap_write_and_wait(ip->i_mapping);
1238		 */
1239
1240		/*
1241		 * Mark inode as not dirty.  It will still be on the dirty
1242		 * inode list, but we'll know not to commit it again unless
1243		 * it gets marked dirty again
1244		 */
1245		clear_cflag(COMMIT_Dirty, ip);
1246
1247		/* inherit anonymous tlock(s) of inode */
1248		if (jfs_ip->atlhead) {
1249			lid_to_tlock(jfs_ip->atltail)->next = tblk->next;
1250			tblk->next = jfs_ip->atlhead;
1251			if (!tblk->last)
1252				tblk->last = jfs_ip->atltail;
1253			jfs_ip->atlhead = jfs_ip->atltail = 0;
1254			TXN_LOCK();
1255			list_del_init(&jfs_ip->anon_inode_list);
1256			TXN_UNLOCK();
1257		}
1258
1259		/*
1260		 * acquire transaction lock on on-disk inode page
1261		 * (become first tlock of the tblk's tlock list)
1262		 */
1263		if (((rc = diWrite(tid, ip))))
1264			goto out;
1265	}
1266
1267	/*
1268	 *	write log records from transaction locks
1269	 *
1270	 * txUpdateMap() resets XAD_NEW in XAD.
1271	 */
1272	if ((rc = txLog(log, tblk, &cd)))
1273		goto TheEnd;
1274
1275	/*
1276	 * Ensure that inode isn't reused before
1277	 * lazy commit thread finishes processing
1278	 */
1279	if (tblk->xflag & COMMIT_DELETE) {
1280		ihold(tblk->u.ip);
1281		/*
1282		 * Avoid a rare deadlock
1283		 *
1284		 * If the inode is locked, we may be blocked in
1285		 * jfs_commit_inode.  If so, we don't want the
1286		 * lazy_commit thread doing the last iput() on the inode
1287		 * since that may block on the locked inode.  Instead,
1288		 * commit the transaction synchronously, so the last iput
1289		 * will be done by the calling thread (or later)
1290		 */
1291		/*
1292		 * I believe this code is no longer needed.  Splitting I_LOCK
1293		 * into two bits, I_NEW and I_SYNC should prevent this
1294		 * deadlock as well.  But since I don't have a JFS testload
1295		 * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done.
1296		 * Joern
1297		 */
1298		if (tblk->u.ip->i_state & I_SYNC)
1299			tblk->xflag &= ~COMMIT_LAZY;
1300	}
1301
1302	ASSERT((!(tblk->xflag & COMMIT_DELETE)) ||
1303	       ((tblk->u.ip->i_nlink == 0) &&
1304		!test_cflag(COMMIT_Nolink, tblk->u.ip)));
1305
1306	/*
1307	 *	write COMMIT log record
1308	 */
1309	lrd->type = cpu_to_le16(LOG_COMMIT);
1310	lrd->length = 0;
1311	lmLog(log, tblk, lrd, NULL);
1312
1313	lmGroupCommit(log, tblk);
1314
1315	/*
1316	 *	- transaction is now committed -
1317	 */
1318
1319	/*
1320	 * force pages in careful update
1321	 * (imap addressing structure update)
1322	 */
1323	if (flag & COMMIT_FORCE)
1324		txForce(tblk);
1325
1326	/*
1327	 *	update allocation map.
1328	 *
1329	 * update inode allocation map and inode:
1330	 * free pager lock on memory object of inode if any.
1331	 * update block allocation map.
1332	 *
1333	 * txUpdateMap() resets XAD_NEW in XAD.
1334	 */
1335	if (tblk->xflag & COMMIT_FORCE)
1336		txUpdateMap(tblk);
1337
1338	/*
1339	 *	free transaction locks and pageout/free pages
1340	 */
1341	txRelease(tblk);
1342
1343	if ((tblk->flag & tblkGC_LAZY) == 0)
1344		txUnlock(tblk);
1345
1346
1347	/*
1348	 *	reset in-memory object state
1349	 */
1350	for (k = 0; k < cd.nip; k++) {
1351		ip = cd.iplist[k];
1352		jfs_ip = JFS_IP(ip);
1353
1354		/*
1355		 * reset in-memory inode state
1356		 */
1357		jfs_ip->bxflag = 0;
1358		jfs_ip->blid = 0;
1359	}
1360
1361      out:
1362	if (rc != 0)
1363		txAbort(tid, 1);
1364
1365      TheEnd:
1366	jfs_info("txCommit: tid = %d, returning %d", tid, rc);
1367	return rc;
1368}
1369
1370/*
1371 * NAME:	txLog()
1372 *
1373 * FUNCTION:	Writes AFTER log records for all lines modified
1374 *		by tid for segments specified by inodes in comdata.
1375 *		Code assumes only WRITELOCKS are recorded in lockwords.
1376 *
1377 * PARAMETERS:
1378 *
1379 * RETURN :
1380 */
1381static int txLog(struct jfs_log * log, struct tblock * tblk, struct commit * cd)
1382{
1383	int rc = 0;
1384	struct inode *ip;
1385	lid_t lid;
1386	struct tlock *tlck;
1387	struct lrd *lrd = &cd->lrd;
1388
1389	/*
1390	 * write log record(s) for each tlock of transaction,
1391	 */
1392	for (lid = tblk->next; lid; lid = tlck->next) {
1393		tlck = lid_to_tlock(lid);
1394
1395		tlck->flag |= tlckLOG;
1396
1397		/* initialize lrd common */
1398		ip = tlck->ip;
1399		lrd->aggregate = cpu_to_le32(JFS_SBI(ip->i_sb)->aggregate);
1400		lrd->log.redopage.fileset = cpu_to_le32(JFS_IP(ip)->fileset);
1401		lrd->log.redopage.inode = cpu_to_le32(ip->i_ino);
1402
1403		/* write log record of page from the tlock */
1404		switch (tlck->type & tlckTYPE) {
1405		case tlckXTREE:
1406			xtLog(log, tblk, lrd, tlck);
1407			break;
1408
1409		case tlckDTREE:
1410			dtLog(log, tblk, lrd, tlck);
1411			break;
1412
1413		case tlckINODE:
1414			diLog(log, tblk, lrd, tlck, cd);
1415			break;
1416
1417		case tlckMAP:
1418			mapLog(log, tblk, lrd, tlck);
1419			break;
1420
1421		case tlckDATA:
1422			dataLog(log, tblk, lrd, tlck);
1423			break;
1424
1425		default:
1426			jfs_err("UFO tlock:0x%p", tlck);
1427		}
1428	}
1429
1430	return rc;
1431}
1432
1433/*
1434 *	diLog()
1435 *
1436 * function:	log inode tlock and format maplock to update bmap;
1437 */
1438static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1439		 struct tlock * tlck, struct commit * cd)
1440{
1441	int rc = 0;
1442	struct metapage *mp;
1443	pxd_t *pxd;
1444	struct pxd_lock *pxdlock;
1445
1446	mp = tlck->mp;
1447
1448	/* initialize as REDOPAGE record format */
1449	lrd->log.redopage.type = cpu_to_le16(LOG_INODE);
1450	lrd->log.redopage.l2linesize = cpu_to_le16(L2INODESLOTSIZE);
1451
1452	pxd = &lrd->log.redopage.pxd;
1453
1454	/*
1455	 *	inode after image
1456	 */
1457	if (tlck->type & tlckENTRY) {
1458		/* log after-image for logredo(): */
1459		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1460		PXDaddress(pxd, mp->index);
1461		PXDlength(pxd,
1462			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1463		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1464
1465		/* mark page as homeward bound */
1466		tlck->flag |= tlckWRITEPAGE;
1467	} else if (tlck->type & tlckFREE) {
1468		/*
1469		 *	free inode extent
1470		 *
1471		 * (pages of the freed inode extent have been invalidated and
1472		 * a maplock for free of the extent has been formatted at
1473		 * txLock() time);
1474		 *
1475		 * the tlock had been acquired on the inode allocation map page
1476		 * (iag) that specifies the freed extent, even though the map
1477		 * page is not itself logged, to prevent pageout of the map
1478		 * page before the log;
1479		 */
1480
1481		/* log LOG_NOREDOINOEXT of the freed inode extent for
1482		 * logredo() to start NoRedoPage filters, and to update
1483		 * imap and bmap for free of the extent;
1484		 */
1485		lrd->type = cpu_to_le16(LOG_NOREDOINOEXT);
1486		/*
1487		 * For the LOG_NOREDOINOEXT record, we need
1488		 * to pass the IAG number and inode extent
1489		 * index (within that IAG) from which the
1490		 * the extent being released.  These have been
1491		 * passed to us in the iplist[1] and iplist[2].
1492		 */
1493		lrd->log.noredoinoext.iagnum =
1494		    cpu_to_le32((u32) (size_t) cd->iplist[1]);
1495		lrd->log.noredoinoext.inoext_idx =
1496		    cpu_to_le32((u32) (size_t) cd->iplist[2]);
1497
1498		pxdlock = (struct pxd_lock *) & tlck->lock;
1499		*pxd = pxdlock->pxd;
1500		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1501
1502		/* update bmap */
1503		tlck->flag |= tlckUPDATEMAP;
1504
1505		/* mark page as homeward bound */
1506		tlck->flag |= tlckWRITEPAGE;
1507	} else
1508		jfs_err("diLog: UFO type tlck:0x%p", tlck);
1509#ifdef  _JFS_WIP
1510	/*
1511	 *	alloc/free external EA extent
1512	 *
1513	 * a maplock for txUpdateMap() to update bPWMAP for alloc/free
1514	 * of the extent has been formatted at txLock() time;
1515	 */
1516	else {
1517		assert(tlck->type & tlckEA);
1518
1519		/* log LOG_UPDATEMAP for logredo() to update bmap for
1520		 * alloc of new (and free of old) external EA extent;
1521		 */
1522		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1523		pxdlock = (struct pxd_lock *) & tlck->lock;
1524		nlock = pxdlock->index;
1525		for (i = 0; i < nlock; i++, pxdlock++) {
1526			if (pxdlock->flag & mlckALLOCPXD)
1527				lrd->log.updatemap.type =
1528				    cpu_to_le16(LOG_ALLOCPXD);
1529			else
1530				lrd->log.updatemap.type =
1531				    cpu_to_le16(LOG_FREEPXD);
1532			lrd->log.updatemap.nxd = cpu_to_le16(1);
1533			lrd->log.updatemap.pxd = pxdlock->pxd;
1534			lrd->backchain =
1535			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1536		}
1537
1538		/* update bmap */
1539		tlck->flag |= tlckUPDATEMAP;
1540	}
1541#endif				/* _JFS_WIP */
1542
1543	return rc;
1544}
1545
1546/*
1547 *	dataLog()
1548 *
1549 * function:	log data tlock
1550 */
1551static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1552	    struct tlock * tlck)
1553{
1554	struct metapage *mp;
1555	pxd_t *pxd;
1556
1557	mp = tlck->mp;
1558
1559	/* initialize as REDOPAGE record format */
1560	lrd->log.redopage.type = cpu_to_le16(LOG_DATA);
1561	lrd->log.redopage.l2linesize = cpu_to_le16(L2DATASLOTSIZE);
1562
1563	pxd = &lrd->log.redopage.pxd;
1564
1565	/* log after-image for logredo(): */
1566	lrd->type = cpu_to_le16(LOG_REDOPAGE);
1567
1568	if (jfs_dirtable_inline(tlck->ip)) {
1569		/*
1570		 * The table has been truncated, we've must have deleted
1571		 * the last entry, so don't bother logging this
1572		 */
1573		mp->lid = 0;
1574		grab_metapage(mp);
1575		metapage_homeok(mp);
1576		discard_metapage(mp);
1577		tlck->mp = NULL;
1578		return 0;
1579	}
1580
1581	PXDaddress(pxd, mp->index);
1582	PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits);
1583
1584	lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1585
1586	/* mark page as homeward bound */
1587	tlck->flag |= tlckWRITEPAGE;
1588
1589	return 0;
1590}
1591
1592/*
1593 *	dtLog()
1594 *
1595 * function:	log dtree tlock and format maplock to update bmap;
1596 */
1597static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1598	   struct tlock * tlck)
1599{
1600	struct metapage *mp;
1601	struct pxd_lock *pxdlock;
1602	pxd_t *pxd;
1603
1604	mp = tlck->mp;
1605
1606	/* initialize as REDOPAGE/NOREDOPAGE record format */
1607	lrd->log.redopage.type = cpu_to_le16(LOG_DTREE);
1608	lrd->log.redopage.l2linesize = cpu_to_le16(L2DTSLOTSIZE);
1609
1610	pxd = &lrd->log.redopage.pxd;
1611
1612	if (tlck->type & tlckBTROOT)
1613		lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
1614
1615	/*
1616	 *	page extension via relocation: entry insertion;
1617	 *	page extension in-place: entry insertion;
1618	 *	new right page from page split, reinitialized in-line
1619	 *	root from root page split: entry insertion;
1620	 */
1621	if (tlck->type & (tlckNEW | tlckEXTEND)) {
1622		/* log after-image of the new page for logredo():
1623		 * mark log (LOG_NEW) for logredo() to initialize
1624		 * freelist and update bmap for alloc of the new page;
1625		 */
1626		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1627		if (tlck->type & tlckEXTEND)
1628			lrd->log.redopage.type |= cpu_to_le16(LOG_EXTEND);
1629		else
1630			lrd->log.redopage.type |= cpu_to_le16(LOG_NEW);
1631		PXDaddress(pxd, mp->index);
1632		PXDlength(pxd,
1633			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1634		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1635
1636		/* format a maplock for txUpdateMap() to update bPMAP for
1637		 * alloc of the new page;
1638		 */
1639		if (tlck->type & tlckBTROOT)
1640			return;
1641		tlck->flag |= tlckUPDATEMAP;
1642		pxdlock = (struct pxd_lock *) & tlck->lock;
1643		pxdlock->flag = mlckALLOCPXD;
1644		pxdlock->pxd = *pxd;
1645
1646		pxdlock->index = 1;
1647
1648		/* mark page as homeward bound */
1649		tlck->flag |= tlckWRITEPAGE;
1650		return;
1651	}
1652
1653	/*
1654	 *	entry insertion/deletion,
1655	 *	sibling page link update (old right page before split);
1656	 */
1657	if (tlck->type & (tlckENTRY | tlckRELINK)) {
1658		/* log after-image for logredo(): */
1659		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1660		PXDaddress(pxd, mp->index);
1661		PXDlength(pxd,
1662			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1663		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1664
1665		/* mark page as homeward bound */
1666		tlck->flag |= tlckWRITEPAGE;
1667		return;
1668	}
1669
1670	/*
1671	 *	page deletion: page has been invalidated
1672	 *	page relocation: source extent
1673	 *
1674	 *	a maplock for free of the page has been formatted
1675	 *	at txLock() time);
1676	 */
1677	if (tlck->type & (tlckFREE | tlckRELOCATE)) {
1678		/* log LOG_NOREDOPAGE of the deleted page for logredo()
1679		 * to start NoRedoPage filter and to update bmap for free
1680		 * of the deletd page
1681		 */
1682		lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
1683		pxdlock = (struct pxd_lock *) & tlck->lock;
1684		*pxd = pxdlock->pxd;
1685		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1686
1687		/* a maplock for txUpdateMap() for free of the page
1688		 * has been formatted at txLock() time;
1689		 */
1690		tlck->flag |= tlckUPDATEMAP;
1691	}
1692	return;
1693}
1694
1695/*
1696 *	xtLog()
1697 *
1698 * function:	log xtree tlock and format maplock to update bmap;
1699 */
1700static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1701	   struct tlock * tlck)
1702{
1703	struct inode *ip;
1704	struct metapage *mp;
1705	xtpage_t *p;
1706	struct xtlock *xtlck;
1707	struct maplock *maplock;
1708	struct xdlistlock *xadlock;
1709	struct pxd_lock *pxdlock;
1710	pxd_t *page_pxd;
1711	int next, lwm, hwm;
1712
1713	ip = tlck->ip;
1714	mp = tlck->mp;
1715
1716	/* initialize as REDOPAGE/NOREDOPAGE record format */
1717	lrd->log.redopage.type = cpu_to_le16(LOG_XTREE);
1718	lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE);
1719
1720	page_pxd = &lrd->log.redopage.pxd;
1721
1722	if (tlck->type & tlckBTROOT) {
1723		lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
1724		p = &JFS_IP(ip)->i_xtroot;
1725		if (S_ISDIR(ip->i_mode))
1726			lrd->log.redopage.type |=
1727			    cpu_to_le16(LOG_DIR_XTREE);
1728	} else
1729		p = (xtpage_t *) mp->data;
1730	next = le16_to_cpu(p->header.nextindex);
1731
1732	xtlck = (struct xtlock *) & tlck->lock;
1733
1734	maplock = (struct maplock *) & tlck->lock;
1735	xadlock = (struct xdlistlock *) maplock;
1736
1737	/*
1738	 *	entry insertion/extension;
1739	 *	sibling page link update (old right page before split);
1740	 */
1741	if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) {
1742		/* log after-image for logredo():
1743		 * logredo() will update bmap for alloc of new/extended
1744		 * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from
1745		 * after-image of XADlist;
1746		 * logredo() resets (XAD_NEW|XAD_EXTEND) flag when
1747		 * applying the after-image to the meta-data page.
1748		 */
1749		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1750		PXDaddress(page_pxd, mp->index);
1751		PXDlength(page_pxd,
1752			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1753		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1754
1755		/* format a maplock for txUpdateMap() to update bPMAP
1756		 * for alloc of new/extended extents of XAD[lwm:next)
1757		 * from the page itself;
1758		 * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag.
1759		 */
1760		lwm = xtlck->lwm.offset;
1761		if (lwm == 0)
1762			lwm = XTPAGEMAXSLOT;
1763
1764		if (lwm == next)
1765			goto out;
1766		if (lwm > next) {
1767			jfs_err("xtLog: lwm > next");
1768			goto out;
1769		}
1770		tlck->flag |= tlckUPDATEMAP;
1771		xadlock->flag = mlckALLOCXADLIST;
1772		xadlock->count = next - lwm;
1773		if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) {
1774			int i;
1775			pxd_t *pxd;
1776			/*
1777			 * Lazy commit may allow xtree to be modified before
1778			 * txUpdateMap runs.  Copy xad into linelock to
1779			 * preserve correct data.
1780			 *
1781			 * We can fit twice as may pxd's as xads in the lock
1782			 */
1783			xadlock->flag = mlckALLOCPXDLIST;
1784			pxd = xadlock->xdlist = &xtlck->pxdlock;
1785			for (i = 0; i < xadlock->count; i++) {
1786				PXDaddress(pxd, addressXAD(&p->xad[lwm + i]));
1787				PXDlength(pxd, lengthXAD(&p->xad[lwm + i]));
1788				p->xad[lwm + i].flag &=
1789				    ~(XAD_NEW | XAD_EXTENDED);
1790				pxd++;
1791			}
1792		} else {
1793			/*
1794			 * xdlist will point to into inode's xtree, ensure
1795			 * that transaction is not committed lazily.
1796			 */
1797			xadlock->flag = mlckALLOCXADLIST;
1798			xadlock->xdlist = &p->xad[lwm];
1799			tblk->xflag &= ~COMMIT_LAZY;
1800		}
1801		jfs_info("xtLog: alloc ip:0x%p mp:0x%p tlck:0x%p lwm:%d count:%d",
1802			 tlck->ip, mp, tlck, lwm, xadlock->count);
1803
1804		maplock->index = 1;
1805
1806	      out:
1807		/* mark page as homeward bound */
1808		tlck->flag |= tlckWRITEPAGE;
1809
1810		return;
1811	}
1812
1813	/*
1814	 *	page deletion: file deletion/truncation (ref. xtTruncate())
1815	 *
1816	 * (page will be invalidated after log is written and bmap
1817	 * is updated from the page);
1818	 */
1819	if (tlck->type & tlckFREE) {
1820		/* LOG_NOREDOPAGE log for NoRedoPage filter:
1821		 * if page free from file delete, NoRedoFile filter from
1822		 * inode image of zero link count will subsume NoRedoPage
1823		 * filters for each page;
1824		 * if page free from file truncattion, write NoRedoPage
1825		 * filter;
1826		 *
1827		 * upadte of block allocation map for the page itself:
1828		 * if page free from deletion and truncation, LOG_UPDATEMAP
1829		 * log for the page itself is generated from processing
1830		 * its parent page xad entries;
1831		 */
1832		/* if page free from file truncation, log LOG_NOREDOPAGE
1833		 * of the deleted page for logredo() to start NoRedoPage
1834		 * filter for the page;
1835		 */
1836		if (tblk->xflag & COMMIT_TRUNCATE) {
1837			/* write NOREDOPAGE for the page */
1838			lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
1839			PXDaddress(page_pxd, mp->index);
1840			PXDlength(page_pxd,
1841				  mp->logical_size >> tblk->sb->
1842				  s_blocksize_bits);
1843			lrd->backchain =
1844			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1845
1846			if (tlck->type & tlckBTROOT) {
1847				/* Empty xtree must be logged */
1848				lrd->type = cpu_to_le16(LOG_REDOPAGE);
1849				lrd->backchain =
1850				    cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1851			}
1852		}
1853
1854		/* init LOG_UPDATEMAP of the freed extents
1855		 * XAD[XTENTRYSTART:hwm) from the deleted page itself
1856		 * for logredo() to update bmap;
1857		 */
1858		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1859		lrd->log.updatemap.type = cpu_to_le16(LOG_FREEXADLIST);
1860		xtlck = (struct xtlock *) & tlck->lock;
1861		hwm = xtlck->hwm.offset;
1862		lrd->log.updatemap.nxd =
1863		    cpu_to_le16(hwm - XTENTRYSTART + 1);
1864		/* reformat linelock for lmLog() */
1865		xtlck->header.offset = XTENTRYSTART;
1866		xtlck->header.length = hwm - XTENTRYSTART + 1;
1867		xtlck->index = 1;
1868		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1869
1870		/* format a maplock for txUpdateMap() to update bmap
1871		 * to free extents of XAD[XTENTRYSTART:hwm) from the
1872		 * deleted page itself;
1873		 */
1874		tlck->flag |= tlckUPDATEMAP;
1875		xadlock->count = hwm - XTENTRYSTART + 1;
1876		if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) {
1877			int i;
1878			pxd_t *pxd;
1879			/*
1880			 * Lazy commit may allow xtree to be modified before
1881			 * txUpdateMap runs.  Copy xad into linelock to
1882			 * preserve correct data.
1883			 *
1884			 * We can fit twice as may pxd's as xads in the lock
1885			 */
1886			xadlock->flag = mlckFREEPXDLIST;
1887			pxd = xadlock->xdlist = &xtlck->pxdlock;
1888			for (i = 0; i < xadlock->count; i++) {
1889				PXDaddress(pxd,
1890					addressXAD(&p->xad[XTENTRYSTART + i]));
1891				PXDlength(pxd,
1892					lengthXAD(&p->xad[XTENTRYSTART + i]));
1893				pxd++;
1894			}
1895		} else {
1896			/*
1897			 * xdlist will point to into inode's xtree, ensure
1898			 * that transaction is not committed lazily.
1899			 */
1900			xadlock->flag = mlckFREEXADLIST;
1901			xadlock->xdlist = &p->xad[XTENTRYSTART];
1902			tblk->xflag &= ~COMMIT_LAZY;
1903		}
1904		jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d lwm:2",
1905			 tlck->ip, mp, xadlock->count);
1906
1907		maplock->index = 1;
1908
1909		/* mark page as invalid */
1910		if (((tblk->xflag & COMMIT_PWMAP) || S_ISDIR(ip->i_mode))
1911		    && !(tlck->type & tlckBTROOT))
1912			tlck->flag |= tlckFREEPAGE;
1913		/*
1914		   else (tblk->xflag & COMMIT_PMAP)
1915		   ? release the page;
1916		 */
1917		return;
1918	}
1919
1920	/*
1921	 *	page/entry truncation: file truncation (ref. xtTruncate())
1922	 *
1923	 *	|----------+------+------+---------------|
1924	 *		   |      |      |
1925	 *		   |      |     hwm - hwm before truncation
1926	 *		   |     next - truncation point
1927	 *		  lwm - lwm before truncation
1928	 * header ?
1929	 */
1930	if (tlck->type & tlckTRUNCATE) {
1931		/* This odd declaration suppresses a bogus gcc warning */
1932		pxd_t pxd = pxd;	/* truncated extent of xad */
1933		int twm;
1934
1935		/*
1936		 * For truncation the entire linelock may be used, so it would
1937		 * be difficult to store xad list in linelock itself.
1938		 * Therefore, we'll just force transaction to be committed
1939		 * synchronously, so that xtree pages won't be changed before
1940		 * txUpdateMap runs.
1941		 */
1942		tblk->xflag &= ~COMMIT_LAZY;
1943		lwm = xtlck->lwm.offset;
1944		if (lwm == 0)
1945			lwm = XTPAGEMAXSLOT;
1946		hwm = xtlck->hwm.offset;
1947		twm = xtlck->twm.offset;
1948
1949		/*
1950		 *	write log records
1951		 */
1952		/* log after-image for logredo():
1953		 *
1954		 * logredo() will update bmap for alloc of new/extended
1955		 * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from
1956		 * after-image of XADlist;
1957		 * logredo() resets (XAD_NEW|XAD_EXTEND) flag when
1958		 * applying the after-image to the meta-data page.
1959		 */
1960		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1961		PXDaddress(page_pxd, mp->index);
1962		PXDlength(page_pxd,
1963			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1964		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1965
1966		/*
1967		 * truncate entry XAD[twm == next - 1]:
1968		 */
1969		if (twm == next - 1) {
1970			/* init LOG_UPDATEMAP for logredo() to update bmap for
1971			 * free of truncated delta extent of the truncated
1972			 * entry XAD[next - 1]:
1973			 * (xtlck->pxdlock = truncated delta extent);
1974			 */
1975			pxdlock = (struct pxd_lock *) & xtlck->pxdlock;
1976			/* assert(pxdlock->type & tlckTRUNCATE); */
1977			lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1978			lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD);
1979			lrd->log.updatemap.nxd = cpu_to_le16(1);
1980			lrd->log.updatemap.pxd = pxdlock->pxd;
1981			pxd = pxdlock->pxd;	/* save to format maplock */
1982			lrd->backchain =
1983			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1984		}
1985
1986		/*
1987		 * free entries XAD[next:hwm]:
1988		 */
1989		if (hwm >= next) {
1990			/* init LOG_UPDATEMAP of the freed extents
1991			 * XAD[next:hwm] from the deleted page itself
1992			 * for logredo() to update bmap;
1993			 */
1994			lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1995			lrd->log.updatemap.type =
1996			    cpu_to_le16(LOG_FREEXADLIST);
1997			xtlck = (struct xtlock *) & tlck->lock;
1998			hwm = xtlck->hwm.offset;
1999			lrd->log.updatemap.nxd =
2000			    cpu_to_le16(hwm - next + 1);
2001			/* reformat linelock for lmLog() */
2002			xtlck->header.offset = next;
2003			xtlck->header.length = hwm - next + 1;
2004			xtlck->index = 1;
2005			lrd->backchain =
2006			    cpu_to_le32(lmLog(log, tblk, lrd, tlck));
2007		}
2008
2009		/*
2010		 *	format maplock(s) for txUpdateMap() to update bmap
2011		 */
2012		maplock->index = 0;
2013
2014		/*
2015		 * allocate entries XAD[lwm:next):
2016		 */
2017		if (lwm < next) {
2018			/* format a maplock for txUpdateMap() to update bPMAP
2019			 * for alloc of new/extended extents of XAD[lwm:next)
2020			 * from the page itself;
2021			 * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag.
2022			 */
2023			tlck->flag |= tlckUPDATEMAP;
2024			xadlock->flag = mlckALLOCXADLIST;
2025			xadlock->count = next - lwm;
2026			xadlock->xdlist = &p->xad[lwm];
2027
2028			jfs_info("xtLog: alloc ip:0x%p mp:0x%p count:%d lwm:%d next:%d",
2029				 tlck->ip, mp, xadlock->count, lwm, next);
2030			maplock->index++;
2031			xadlock++;
2032		}
2033
2034		/*
2035		 * truncate entry XAD[twm == next - 1]:
2036		 */
2037		if (twm == next - 1) {
2038			/* format a maplock for txUpdateMap() to update bmap
2039			 * to free truncated delta extent of the truncated
2040			 * entry XAD[next - 1];
2041			 * (xtlck->pxdlock = truncated delta extent);
2042			 */
2043			tlck->flag |= tlckUPDATEMAP;
2044			pxdlock = (struct pxd_lock *) xadlock;
2045			pxdlock->flag = mlckFREEPXD;
2046			pxdlock->count = 1;
2047			pxdlock->pxd = pxd;
2048
2049			jfs_info("xtLog: truncate ip:0x%p mp:0x%p count:%d hwm:%d",
2050				 ip, mp, pxdlock->count, hwm);
2051			maplock->index++;
2052			xadlock++;
2053		}
2054
2055		/*
2056		 * free entries XAD[next:hwm]:
2057		 */
2058		if (hwm >= next) {
2059			/* format a maplock for txUpdateMap() to update bmap
2060			 * to free extents of XAD[next:hwm] from thedeleted
2061			 * page itself;
2062			 */
2063			tlck->flag |= tlckUPDATEMAP;
2064			xadlock->flag = mlckFREEXADLIST;
2065			xadlock->count = hwm - next + 1;
2066			xadlock->xdlist = &p->xad[next];
2067
2068			jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d next:%d hwm:%d",
2069				 tlck->ip, mp, xadlock->count, next, hwm);
2070			maplock->index++;
2071		}
2072
2073		/* mark page as homeward bound */
2074		tlck->flag |= tlckWRITEPAGE;
2075	}
2076	return;
2077}
2078
2079/*
2080 *	mapLog()
2081 *
2082 * function:	log from maplock of freed data extents;
2083 */
2084static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
2085		   struct tlock * tlck)
2086{
2087	struct pxd_lock *pxdlock;
2088	int i, nlock;
2089	pxd_t *pxd;
2090
2091	/*
2092	 *	page relocation: free the source page extent
2093	 *
2094	 * a maplock for txUpdateMap() for free of the page
2095	 * has been formatted at txLock() time saving the src
2096	 * relocated page address;
2097	 */
2098	if (tlck->type & tlckRELOCATE) {
2099		/* log LOG_NOREDOPAGE of the old relocated page
2100		 * for logredo() to start NoRedoPage filter;
2101		 */
2102		lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
2103		pxdlock = (struct pxd_lock *) & tlck->lock;
2104		pxd = &lrd->log.redopage.pxd;
2105		*pxd = pxdlock->pxd;
2106		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2107
2108		/* (N.B. currently, logredo() does NOT update bmap
2109		 * for free of the page itself for (LOG_XTREE|LOG_NOREDOPAGE);
2110		 * if page free from relocation, LOG_UPDATEMAP log is
2111		 * specifically generated now for logredo()
2112		 * to update bmap for free of src relocated page;
2113		 * (new flag LOG_RELOCATE may be introduced which will
2114		 * inform logredo() to start NORedoPage filter and also
2115		 * update block allocation map at the same time, thus
2116		 * avoiding an extra log write);
2117		 */
2118		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
2119		lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD);
2120		lrd->log.updatemap.nxd = cpu_to_le16(1);
2121		lrd->log.updatemap.pxd = pxdlock->pxd;
2122		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2123
2124		/* a maplock for txUpdateMap() for free of the page
2125		 * has been formatted at txLock() time;
2126		 */
2127		tlck->flag |= tlckUPDATEMAP;
2128		return;
2129	}
2130	/*
2131
2132	 * Otherwise it's not a relocate request
2133	 *
2134	 */
2135	else {
2136		/* log LOG_UPDATEMAP for logredo() to update bmap for
2137		 * free of truncated/relocated delta extent of the data;
2138		 * e.g.: external EA extent, relocated/truncated extent
2139		 * from xtTailgate();
2140		 */
2141		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
2142		pxdlock = (struct pxd_lock *) & tlck->lock;
2143		nlock = pxdlock->index;
2144		for (i = 0; i < nlock; i++, pxdlock++) {
2145			if (pxdlock->flag & mlckALLOCPXD)
2146				lrd->log.updatemap.type =
2147				    cpu_to_le16(LOG_ALLOCPXD);
2148			else
2149				lrd->log.updatemap.type =
2150				    cpu_to_le16(LOG_FREEPXD);
2151			lrd->log.updatemap.nxd = cpu_to_le16(1);
2152			lrd->log.updatemap.pxd = pxdlock->pxd;
2153			lrd->backchain =
2154			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2155			jfs_info("mapLog: xaddr:0x%lx xlen:0x%x",
2156				 (ulong) addressPXD(&pxdlock->pxd),
2157				 lengthPXD(&pxdlock->pxd));
2158		}
2159
2160		/* update bmap */
2161		tlck->flag |= tlckUPDATEMAP;
2162	}
2163}
2164
2165/*
2166 *	txEA()
2167 *
2168 * function:	acquire maplock for EA/ACL extents or
2169 *		set COMMIT_INLINE flag;
2170 */
2171void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea)
2172{
2173	struct tlock *tlck = NULL;
2174	struct pxd_lock *maplock = NULL, *pxdlock = NULL;
2175
2176	/*
2177	 * format maplock for alloc of new EA extent
2178	 */
2179	if (newea) {
2180		/* Since the newea could be a completely zeroed entry we need to
2181		 * check for the two flags which indicate we should actually
2182		 * commit new EA data
2183		 */
2184		if (newea->flag & DXD_EXTENT) {
2185			tlck = txMaplock(tid, ip, tlckMAP);
2186			maplock = (struct pxd_lock *) & tlck->lock;
2187			pxdlock = (struct pxd_lock *) maplock;
2188			pxdlock->flag = mlckALLOCPXD;
2189			PXDaddress(&pxdlock->pxd, addressDXD(newea));
2190			PXDlength(&pxdlock->pxd, lengthDXD(newea));
2191			pxdlock++;
2192			maplock->index = 1;
2193		} else if (newea->flag & DXD_INLINE) {
2194			tlck = NULL;
2195
2196			set_cflag(COMMIT_Inlineea, ip);
2197		}
2198	}
2199
2200	/*
2201	 * format maplock for free of old EA extent
2202	 */
2203	if (!test_cflag(COMMIT_Nolink, ip) && oldea->flag & DXD_EXTENT) {
2204		if (tlck == NULL) {
2205			tlck = txMaplock(tid, ip, tlckMAP);
2206			maplock = (struct pxd_lock *) & tlck->lock;
2207			pxdlock = (struct pxd_lock *) maplock;
2208			maplock->index = 0;
2209		}
2210		pxdlock->flag = mlckFREEPXD;
2211		PXDaddress(&pxdlock->pxd, addressDXD(oldea));
2212		PXDlength(&pxdlock->pxd, lengthDXD(oldea));
2213		maplock->index++;
2214	}
2215}
2216
2217/*
2218 *	txForce()
2219 *
2220 * function: synchronously write pages locked by transaction
2221 *	     after txLog() but before txUpdateMap();
2222 */
2223static void txForce(struct tblock * tblk)
2224{
2225	struct tlock *tlck;
2226	lid_t lid, next;
2227	struct metapage *mp;
2228
2229	/*
2230	 * reverse the order of transaction tlocks in
2231	 * careful update order of address index pages
2232	 * (right to left, bottom up)
2233	 */
2234	tlck = lid_to_tlock(tblk->next);
2235	lid = tlck->next;
2236	tlck->next = 0;
2237	while (lid) {
2238		tlck = lid_to_tlock(lid);
2239		next = tlck->next;
2240		tlck->next = tblk->next;
2241		tblk->next = lid;
2242		lid = next;
2243	}
2244
2245	/*
2246	 * synchronously write the page, and
2247	 * hold the page for txUpdateMap();
2248	 */
2249	for (lid = tblk->next; lid; lid = next) {
2250		tlck = lid_to_tlock(lid);
2251		next = tlck->next;
2252
2253		if ((mp = tlck->mp) != NULL &&
2254		    (tlck->type & tlckBTROOT) == 0) {
2255			assert(mp->xflag & COMMIT_PAGE);
2256
2257			if (tlck->flag & tlckWRITEPAGE) {
2258				tlck->flag &= ~tlckWRITEPAGE;
2259
2260				/* do not release page to freelist */
2261				force_metapage(mp);
2262#if 0
2263				/*
2264				 * The "right" thing to do here is to
2265				 * synchronously write the metadata.
2266				 * With the current implementation this
2267				 * is hard since write_metapage requires
2268				 * us to kunmap & remap the page.  If we
2269				 * have tlocks pointing into the metadata
2270				 * pages, we don't want to do this.  I think
2271				 * we can get by with synchronously writing
2272				 * the pages when they are released.
2273				 */
2274				assert(mp->nohomeok);
2275				set_bit(META_dirty, &mp->flag);
2276				set_bit(META_sync, &mp->flag);
2277#endif
2278			}
2279		}
2280	}
2281}
2282
2283/*
2284 *	txUpdateMap()
2285 *
2286 * function:	update persistent allocation map (and working map
2287 *		if appropriate);
2288 *
2289 * parameter:
2290 */
2291static void txUpdateMap(struct tblock * tblk)
2292{
2293	struct inode *ip;
2294	struct inode *ipimap;
2295	lid_t lid;
2296	struct tlock *tlck;
2297	struct maplock *maplock;
2298	struct pxd_lock pxdlock;
2299	int maptype;
2300	int k, nlock;
2301	struct metapage *mp = NULL;
2302
2303	ipimap = JFS_SBI(tblk->sb)->ipimap;
2304
2305	maptype = (tblk->xflag & COMMIT_PMAP) ? COMMIT_PMAP : COMMIT_PWMAP;
2306
2307
2308	/*
2309	 *	update block allocation map
2310	 *
2311	 * update allocation state in pmap (and wmap) and
2312	 * update lsn of the pmap page;
2313	 */
2314	/*
2315	 * scan each tlock/page of transaction for block allocation/free:
2316	 *
2317	 * for each tlock/page of transaction, update map.
2318	 *  ? are there tlock for pmap and pwmap at the same time ?
2319	 */
2320	for (lid = tblk->next; lid; lid = tlck->next) {
2321		tlck = lid_to_tlock(lid);
2322
2323		if ((tlck->flag & tlckUPDATEMAP) == 0)
2324			continue;
2325
2326		if (tlck->flag & tlckFREEPAGE) {
2327			/*
2328			 * Another thread may attempt to reuse freed space
2329			 * immediately, so we want to get rid of the metapage
2330			 * before anyone else has a chance to get it.
2331			 * Lock metapage, update maps, then invalidate
2332			 * the metapage.
2333			 */
2334			mp = tlck->mp;
2335			ASSERT(mp->xflag & COMMIT_PAGE);
2336			grab_metapage(mp);
2337		}
2338
2339		/*
2340		 * extent list:
2341		 * . in-line PXD list:
2342		 * . out-of-line XAD list:
2343		 */
2344		maplock = (struct maplock *) & tlck->lock;
2345		nlock = maplock->index;
2346
2347		for (k = 0; k < nlock; k++, maplock++) {
2348			/*
2349			 * allocate blocks in persistent map:
2350			 *
2351			 * blocks have been allocated from wmap at alloc time;
2352			 */
2353			if (maplock->flag & mlckALLOC) {
2354				txAllocPMap(ipimap, maplock, tblk);
2355			}
2356			/*
2357			 * free blocks in persistent and working map:
2358			 * blocks will be freed in pmap and then in wmap;
2359			 *
2360			 * ? tblock specifies the PMAP/PWMAP based upon
2361			 * transaction
2362			 *
2363			 * free blocks in persistent map:
2364			 * blocks will be freed from wmap at last reference
2365			 * release of the object for regular files;
2366			 *
2367			 * Alway free blocks from both persistent & working
2368			 * maps for directories
2369			 */
2370			else {	/* (maplock->flag & mlckFREE) */
2371
2372				if (tlck->flag & tlckDIRECTORY)
2373					txFreeMap(ipimap, maplock,
2374						  tblk, COMMIT_PWMAP);
2375				else
2376					txFreeMap(ipimap, maplock,
2377						  tblk, maptype);
2378			}
2379		}
2380		if (tlck->flag & tlckFREEPAGE) {
2381			if (!(tblk->flag & tblkGC_LAZY)) {
2382				/* This is equivalent to txRelease */
2383				ASSERT(mp->lid == lid);
2384				tlck->mp->lid = 0;
2385			}
2386			assert(mp->nohomeok == 1);
2387			metapage_homeok(mp);
2388			discard_metapage(mp);
2389			tlck->mp = NULL;
2390		}
2391	}
2392	/*
2393	 *	update inode allocation map
2394	 *
2395	 * update allocation state in pmap and
2396	 * update lsn of the pmap page;
2397	 * update in-memory inode flag/state
2398	 *
2399	 * unlock mapper/write lock
2400	 */
2401	if (tblk->xflag & COMMIT_CREATE) {
2402		diUpdatePMap(ipimap, tblk->ino, false, tblk);
2403		/* update persistent block allocation map
2404		 * for the allocation of inode extent;
2405		 */
2406		pxdlock.flag = mlckALLOCPXD;
2407		pxdlock.pxd = tblk->u.ixpxd;
2408		pxdlock.index = 1;
2409		txAllocPMap(ipimap, (struct maplock *) & pxdlock, tblk);
2410	} else if (tblk->xflag & COMMIT_DELETE) {
2411		ip = tblk->u.ip;
2412		diUpdatePMap(ipimap, ip->i_ino, true, tblk);
2413		iput(ip);
2414	}
2415}
2416
2417/*
2418 *	txAllocPMap()
2419 *
2420 * function: allocate from persistent map;
2421 *
2422 * parameter:
2423 *	ipbmap	-
2424 *	malock	-
2425 *		xad list:
2426 *		pxd:
2427 *
2428 *	maptype -
2429 *		allocate from persistent map;
2430 *		free from persistent map;
2431 *		(e.g., tmp file - free from working map at releae
2432 *		 of last reference);
2433 *		free from persistent and working map;
2434 *
2435 *	lsn	- log sequence number;
2436 */
2437static void txAllocPMap(struct inode *ip, struct maplock * maplock,
2438			struct tblock * tblk)
2439{
2440	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
2441	struct xdlistlock *xadlistlock;
2442	xad_t *xad;
2443	s64 xaddr;
2444	int xlen;
2445	struct pxd_lock *pxdlock;
2446	struct xdlistlock *pxdlistlock;
2447	pxd_t *pxd;
2448	int n;
2449
2450	/*
2451	 * allocate from persistent map;
2452	 */
2453	if (maplock->flag & mlckALLOCXADLIST) {
2454		xadlistlock = (struct xdlistlock *) maplock;
2455		xad = xadlistlock->xdlist;
2456		for (n = 0; n < xadlistlock->count; n++, xad++) {
2457			if (xad->flag & (XAD_NEW | XAD_EXTENDED)) {
2458				xaddr = addressXAD(xad);
2459				xlen = lengthXAD(xad);
2460				dbUpdatePMap(ipbmap, false, xaddr,
2461					     (s64) xlen, tblk);
2462				xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
2463				jfs_info("allocPMap: xaddr:0x%lx xlen:%d",
2464					 (ulong) xaddr, xlen);
2465			}
2466		}
2467	} else if (maplock->flag & mlckALLOCPXD) {
2468		pxdlock = (struct pxd_lock *) maplock;
2469		xaddr = addressPXD(&pxdlock->pxd);
2470		xlen = lengthPXD(&pxdlock->pxd);
2471		dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen, tblk);
2472		jfs_info("allocPMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen);
2473	} else {		/* (maplock->flag & mlckALLOCPXDLIST) */
2474
2475		pxdlistlock = (struct xdlistlock *) maplock;
2476		pxd = pxdlistlock->xdlist;
2477		for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2478			xaddr = addressPXD(pxd);
2479			xlen = lengthPXD(pxd);
2480			dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen,
2481				     tblk);
2482			jfs_info("allocPMap: xaddr:0x%lx xlen:%d",
2483				 (ulong) xaddr, xlen);
2484		}
2485	}
2486}
2487
2488/*
2489 *	txFreeMap()
2490 *
2491 * function:	free from persistent and/or working map;
2492 *
2493 * todo: optimization
2494 */
2495void txFreeMap(struct inode *ip,
2496	       struct maplock * maplock, struct tblock * tblk, int maptype)
2497{
2498	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
2499	struct xdlistlock *xadlistlock;
2500	xad_t *xad;
2501	s64 xaddr;
2502	int xlen;
2503	struct pxd_lock *pxdlock;
2504	struct xdlistlock *pxdlistlock;
2505	pxd_t *pxd;
2506	int n;
2507
2508	jfs_info("txFreeMap: tblk:0x%p maplock:0x%p maptype:0x%x",
2509		 tblk, maplock, maptype);
2510
2511	/*
2512	 * free from persistent map;
2513	 */
2514	if (maptype == COMMIT_PMAP || maptype == COMMIT_PWMAP) {
2515		if (maplock->flag & mlckFREEXADLIST) {
2516			xadlistlock = (struct xdlistlock *) maplock;
2517			xad = xadlistlock->xdlist;
2518			for (n = 0; n < xadlistlock->count; n++, xad++) {
2519				if (!(xad->flag & XAD_NEW)) {
2520					xaddr = addressXAD(xad);
2521					xlen = lengthXAD(xad);
2522					dbUpdatePMap(ipbmap, true, xaddr,
2523						     (s64) xlen, tblk);
2524					jfs_info("freePMap: xaddr:0x%lx xlen:%d",
2525						 (ulong) xaddr, xlen);
2526				}
2527			}
2528		} else if (maplock->flag & mlckFREEPXD) {
2529			pxdlock = (struct pxd_lock *) maplock;
2530			xaddr = addressPXD(&pxdlock->pxd);
2531			xlen = lengthPXD(&pxdlock->pxd);
2532			dbUpdatePMap(ipbmap, true, xaddr, (s64) xlen,
2533				     tblk);
2534			jfs_info("freePMap: xaddr:0x%lx xlen:%d",
2535				 (ulong) xaddr, xlen);
2536		} else {	/* (maplock->flag & mlckALLOCPXDLIST) */
2537
2538			pxdlistlock = (struct xdlistlock *) maplock;
2539			pxd = pxdlistlock->xdlist;
2540			for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2541				xaddr = addressPXD(pxd);
2542				xlen = lengthPXD(pxd);
2543				dbUpdatePMap(ipbmap, true, xaddr,
2544					     (s64) xlen, tblk);
2545				jfs_info("freePMap: xaddr:0x%lx xlen:%d",
2546					 (ulong) xaddr, xlen);
2547			}
2548		}
2549	}
2550
2551	/*
2552	 * free from working map;
2553	 */
2554	if (maptype == COMMIT_PWMAP || maptype == COMMIT_WMAP) {
2555		if (maplock->flag & mlckFREEXADLIST) {
2556			xadlistlock = (struct xdlistlock *) maplock;
2557			xad = xadlistlock->xdlist;
2558			for (n = 0; n < xadlistlock->count; n++, xad++) {
2559				xaddr = addressXAD(xad);
2560				xlen = lengthXAD(xad);
2561				dbFree(ip, xaddr, (s64) xlen);
2562				xad->flag = 0;
2563				jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2564					 (ulong) xaddr, xlen);
2565			}
2566		} else if (maplock->flag & mlckFREEPXD) {
2567			pxdlock = (struct pxd_lock *) maplock;
2568			xaddr = addressPXD(&pxdlock->pxd);
2569			xlen = lengthPXD(&pxdlock->pxd);
2570			dbFree(ip, xaddr, (s64) xlen);
2571			jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2572				 (ulong) xaddr, xlen);
2573		} else {	/* (maplock->flag & mlckFREEPXDLIST) */
2574
2575			pxdlistlock = (struct xdlistlock *) maplock;
2576			pxd = pxdlistlock->xdlist;
2577			for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2578				xaddr = addressPXD(pxd);
2579				xlen = lengthPXD(pxd);
2580				dbFree(ip, xaddr, (s64) xlen);
2581				jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2582					 (ulong) xaddr, xlen);
2583			}
2584		}
2585	}
2586}
2587
2588/*
2589 *	txFreelock()
2590 *
2591 * function:	remove tlock from inode anonymous locklist
2592 */
2593void txFreelock(struct inode *ip)
2594{
2595	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
2596	struct tlock *xtlck, *tlck;
2597	lid_t xlid = 0, lid;
2598
2599	if (!jfs_ip->atlhead)
2600		return;
2601
2602	TXN_LOCK();
2603	xtlck = (struct tlock *) &jfs_ip->atlhead;
2604
2605	while ((lid = xtlck->next) != 0) {
2606		tlck = lid_to_tlock(lid);
2607		if (tlck->flag & tlckFREELOCK) {
2608			xtlck->next = tlck->next;
2609			txLockFree(lid);
2610		} else {
2611			xtlck = tlck;
2612			xlid = lid;
2613		}
2614	}
2615
2616	if (jfs_ip->atlhead)
2617		jfs_ip->atltail = xlid;
2618	else {
2619		jfs_ip->atltail = 0;
2620		/*
2621		 * If inode was on anon_list, remove it
2622		 */
2623		list_del_init(&jfs_ip->anon_inode_list);
2624	}
2625	TXN_UNLOCK();
2626}
2627
2628/*
2629 *	txAbort()
2630 *
2631 * function: abort tx before commit;
2632 *
2633 * frees line-locks and segment locks for all
2634 * segments in comdata structure.
2635 * Optionally sets state of file-system to FM_DIRTY in super-block.
2636 * log age of page-frames in memory for which caller has
2637 * are reset to 0 (to avoid logwarap).
2638 */
2639void txAbort(tid_t tid, int dirty)
2640{
2641	lid_t lid, next;
2642	struct metapage *mp;
2643	struct tblock *tblk = tid_to_tblock(tid);
2644	struct tlock *tlck;
2645
2646	/*
2647	 * free tlocks of the transaction
2648	 */
2649	for (lid = tblk->next; lid; lid = next) {
2650		tlck = lid_to_tlock(lid);
2651		next = tlck->next;
2652		mp = tlck->mp;
2653		JFS_IP(tlck->ip)->xtlid = 0;
2654
2655		if (mp) {
2656			mp->lid = 0;
2657
2658			/*
2659			 * reset lsn of page to avoid logwarap:
2660			 *
2661			 * (page may have been previously committed by another
2662			 * transaction(s) but has not been paged, i.e.,
2663			 * it may be on logsync list even though it has not
2664			 * been logged for the current tx.)
2665			 */
2666			if (mp->xflag & COMMIT_PAGE && mp->lsn)
2667				LogSyncRelease(mp);
2668		}
2669		/* insert tlock at head of freelist */
2670		TXN_LOCK();
2671		txLockFree(lid);
2672		TXN_UNLOCK();
2673	}
2674
2675	/* caller will free the transaction block */
2676
2677	tblk->next = tblk->last = 0;
2678
2679	/*
2680	 * mark filesystem dirty
2681	 */
2682	if (dirty)
2683		jfs_error(tblk->sb, "\n");
2684
2685	return;
2686}
2687
2688/*
2689 *	txLazyCommit(void)
2690 *
2691 *	All transactions except those changing ipimap (COMMIT_FORCE) are
2692 *	processed by this routine.  This insures that the inode and block
2693 *	allocation maps are updated in order.  For synchronous transactions,
2694 *	let the user thread finish processing after txUpdateMap() is called.
2695 */
2696static void txLazyCommit(struct tblock * tblk)
2697{
2698	struct jfs_log *log;
2699
2700	while (((tblk->flag & tblkGC_READY) == 0) &&
2701	       ((tblk->flag & tblkGC_UNLOCKED) == 0)) {
2702		/* We must have gotten ahead of the user thread
2703		 */
2704		jfs_info("jfs_lazycommit: tblk 0x%p not unlocked", tblk);
2705		yield();
2706	}
2707
2708	jfs_info("txLazyCommit: processing tblk 0x%p", tblk);
2709
2710	txUpdateMap(tblk);
2711
2712	log = (struct jfs_log *) JFS_SBI(tblk->sb)->log;
2713
2714	spin_lock_irq(&log->gclock);	// LOGGC_LOCK
2715
2716	tblk->flag |= tblkGC_COMMITTED;
2717
2718	if (tblk->flag & tblkGC_READY)
2719		log->gcrtc--;
2720
2721	wake_up_all(&tblk->gcwait);	// LOGGC_WAKEUP
2722
2723	/*
2724	 * Can't release log->gclock until we've tested tblk->flag
2725	 */
2726	if (tblk->flag & tblkGC_LAZY) {
2727		spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
2728		txUnlock(tblk);
2729		tblk->flag &= ~tblkGC_LAZY;
2730		txEnd(tblk - TxBlock);	/* Convert back to tid */
2731	} else
2732		spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
2733
2734	jfs_info("txLazyCommit: done: tblk = 0x%p", tblk);
2735}
2736
2737/*
2738 *	jfs_lazycommit(void)
2739 *
2740 *	To be run as a kernel daemon.  If lbmIODone is called in an interrupt
2741 *	context, or where blocking is not wanted, this routine will process
2742 *	committed transactions from the unlock queue.
2743 */
2744int jfs_lazycommit(void *arg)
2745{
2746	int WorkDone;
2747	struct tblock *tblk;
2748	unsigned long flags;
2749	struct jfs_sb_info *sbi;
2750
 
2751	do {
2752		LAZY_LOCK(flags);
2753		jfs_commit_thread_waking = 0;	/* OK to wake another thread */
2754		while (!list_empty(&TxAnchor.unlock_queue)) {
2755			WorkDone = 0;
2756			list_for_each_entry(tblk, &TxAnchor.unlock_queue,
2757					    cqueue) {
2758
2759				sbi = JFS_SBI(tblk->sb);
2760				/*
2761				 * For each volume, the transactions must be
2762				 * handled in order.  If another commit thread
2763				 * is handling a tblk for this superblock,
2764				 * skip it
2765				 */
2766				if (sbi->commit_state & IN_LAZYCOMMIT)
2767					continue;
2768
2769				sbi->commit_state |= IN_LAZYCOMMIT;
2770				WorkDone = 1;
2771
2772				/*
2773				 * Remove transaction from queue
2774				 */
2775				list_del(&tblk->cqueue);
2776
2777				LAZY_UNLOCK(flags);
2778				txLazyCommit(tblk);
2779				LAZY_LOCK(flags);
2780
2781				sbi->commit_state &= ~IN_LAZYCOMMIT;
2782				/*
2783				 * Don't continue in the for loop.  (We can't
2784				 * anyway, it's unsafe!)  We want to go back to
2785				 * the beginning of the list.
2786				 */
2787				break;
2788			}
2789
2790			/* If there was nothing to do, don't continue */
2791			if (!WorkDone)
2792				break;
2793		}
2794		/* In case a wakeup came while all threads were active */
2795		jfs_commit_thread_waking = 0;
2796
2797		if (freezing(current)) {
2798			LAZY_UNLOCK(flags);
2799			try_to_freeze();
2800		} else {
2801			DECLARE_WAITQUEUE(wq, current);
2802
2803			add_wait_queue(&jfs_commit_thread_wait, &wq);
2804			set_current_state(TASK_INTERRUPTIBLE);
2805			LAZY_UNLOCK(flags);
2806			schedule();
2807			remove_wait_queue(&jfs_commit_thread_wait, &wq);
2808		}
2809	} while (!kthread_should_stop());
2810
2811	if (!list_empty(&TxAnchor.unlock_queue))
2812		jfs_err("jfs_lazycommit being killed w/pending transactions!");
2813	else
2814		jfs_info("jfs_lazycommit being killed");
2815	return 0;
2816}
2817
2818void txLazyUnlock(struct tblock * tblk)
2819{
2820	unsigned long flags;
2821
2822	LAZY_LOCK(flags);
2823
2824	list_add_tail(&tblk->cqueue, &TxAnchor.unlock_queue);
2825	/*
2826	 * Don't wake up a commit thread if there is already one servicing
2827	 * this superblock, or if the last one we woke up hasn't started yet.
2828	 */
2829	if (!(JFS_SBI(tblk->sb)->commit_state & IN_LAZYCOMMIT) &&
2830	    !jfs_commit_thread_waking) {
2831		jfs_commit_thread_waking = 1;
2832		wake_up(&jfs_commit_thread_wait);
2833	}
2834	LAZY_UNLOCK(flags);
2835}
2836
2837static void LogSyncRelease(struct metapage * mp)
2838{
2839	struct jfs_log *log = mp->log;
2840
2841	assert(mp->nohomeok);
2842	assert(log);
2843	metapage_homeok(mp);
2844}
2845
2846/*
2847 *	txQuiesce
2848 *
2849 *	Block all new transactions and push anonymous transactions to
2850 *	completion
2851 *
2852 *	This does almost the same thing as jfs_sync below.  We don't
2853 *	worry about deadlocking when jfs_tlocks_low is set, since we would
2854 *	expect jfs_sync to get us out of that jam.
2855 */
2856void txQuiesce(struct super_block *sb)
2857{
2858	struct inode *ip;
2859	struct jfs_inode_info *jfs_ip;
2860	struct jfs_log *log = JFS_SBI(sb)->log;
2861	tid_t tid;
2862
2863	set_bit(log_QUIESCE, &log->flag);
2864
2865	TXN_LOCK();
2866restart:
2867	while (!list_empty(&TxAnchor.anon_list)) {
2868		jfs_ip = list_entry(TxAnchor.anon_list.next,
2869				    struct jfs_inode_info,
2870				    anon_inode_list);
2871		ip = &jfs_ip->vfs_inode;
2872
2873		/*
2874		 * inode will be removed from anonymous list
2875		 * when it is committed
2876		 */
2877		TXN_UNLOCK();
2878		tid = txBegin(ip->i_sb, COMMIT_INODE | COMMIT_FORCE);
2879		mutex_lock(&jfs_ip->commit_mutex);
2880		txCommit(tid, 1, &ip, 0);
2881		txEnd(tid);
2882		mutex_unlock(&jfs_ip->commit_mutex);
2883		/*
2884		 * Just to be safe.  I don't know how
2885		 * long we can run without blocking
2886		 */
2887		cond_resched();
2888		TXN_LOCK();
2889	}
2890
2891	/*
2892	 * If jfs_sync is running in parallel, there could be some inodes
2893	 * on anon_list2.  Let's check.
2894	 */
2895	if (!list_empty(&TxAnchor.anon_list2)) {
2896		list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list);
2897		goto restart;
2898	}
2899	TXN_UNLOCK();
2900
2901	/*
2902	 * We may need to kick off the group commit
2903	 */
2904	jfs_flush_journal(log, 0);
2905}
2906
2907/*
2908 * txResume()
2909 *
2910 * Allows transactions to start again following txQuiesce
2911 */
2912void txResume(struct super_block *sb)
2913{
2914	struct jfs_log *log = JFS_SBI(sb)->log;
2915
2916	clear_bit(log_QUIESCE, &log->flag);
2917	TXN_WAKEUP(&log->syncwait);
2918}
2919
2920/*
2921 *	jfs_sync(void)
2922 *
2923 *	To be run as a kernel daemon.  This is awakened when tlocks run low.
2924 *	We write any inodes that have anonymous tlocks so they will become
2925 *	available.
2926 */
2927int jfs_sync(void *arg)
2928{
2929	struct inode *ip;
2930	struct jfs_inode_info *jfs_ip;
2931	tid_t tid;
2932
 
2933	do {
2934		/*
2935		 * write each inode on the anonymous inode list
2936		 */
2937		TXN_LOCK();
2938		while (jfs_tlocks_low && !list_empty(&TxAnchor.anon_list)) {
2939			jfs_ip = list_entry(TxAnchor.anon_list.next,
2940					    struct jfs_inode_info,
2941					    anon_inode_list);
2942			ip = &jfs_ip->vfs_inode;
2943
2944			if (! igrab(ip)) {
2945				/*
2946				 * Inode is being freed
2947				 */
2948				list_del_init(&jfs_ip->anon_inode_list);
2949			} else if (mutex_trylock(&jfs_ip->commit_mutex)) {
2950				/*
2951				 * inode will be removed from anonymous list
2952				 * when it is committed
2953				 */
2954				TXN_UNLOCK();
2955				tid = txBegin(ip->i_sb, COMMIT_INODE);
2956				txCommit(tid, 1, &ip, 0);
2957				txEnd(tid);
2958				mutex_unlock(&jfs_ip->commit_mutex);
2959
2960				iput(ip);
2961				/*
2962				 * Just to be safe.  I don't know how
2963				 * long we can run without blocking
2964				 */
2965				cond_resched();
2966				TXN_LOCK();
2967			} else {
2968				/* We can't get the commit mutex.  It may
2969				 * be held by a thread waiting for tlock's
2970				 * so let's not block here.  Save it to
2971				 * put back on the anon_list.
2972				 */
2973
2974				/* Move from anon_list to anon_list2 */
2975				list_move(&jfs_ip->anon_inode_list,
2976					  &TxAnchor.anon_list2);
2977
2978				TXN_UNLOCK();
2979				iput(ip);
2980				TXN_LOCK();
2981			}
2982		}
2983		/* Add anon_list2 back to anon_list */
2984		list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list);
2985
2986		if (freezing(current)) {
2987			TXN_UNLOCK();
2988			try_to_freeze();
2989		} else {
2990			set_current_state(TASK_INTERRUPTIBLE);
2991			TXN_UNLOCK();
2992			schedule();
2993		}
2994	} while (!kthread_should_stop());
2995
2996	jfs_info("jfs_sync being killed");
2997	return 0;
2998}
2999
3000#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG)
3001static int jfs_txanchor_proc_show(struct seq_file *m, void *v)
3002{
3003	char *freewait;
3004	char *freelockwait;
3005	char *lowlockwait;
3006
3007	freewait =
3008	    waitqueue_active(&TxAnchor.freewait) ? "active" : "empty";
3009	freelockwait =
3010	    waitqueue_active(&TxAnchor.freelockwait) ? "active" : "empty";
3011	lowlockwait =
3012	    waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty";
3013
3014	seq_printf(m,
3015		       "JFS TxAnchor\n"
3016		       "============\n"
3017		       "freetid = %d\n"
3018		       "freewait = %s\n"
3019		       "freelock = %d\n"
3020		       "freelockwait = %s\n"
3021		       "lowlockwait = %s\n"
3022		       "tlocksInUse = %d\n"
3023		       "jfs_tlocks_low = %d\n"
3024		       "unlock_queue is %sempty\n",
3025		       TxAnchor.freetid,
3026		       freewait,
3027		       TxAnchor.freelock,
3028		       freelockwait,
3029		       lowlockwait,
3030		       TxAnchor.tlocksInUse,
3031		       jfs_tlocks_low,
3032		       list_empty(&TxAnchor.unlock_queue) ? "" : "not ");
3033	return 0;
3034}
3035
3036static int jfs_txanchor_proc_open(struct inode *inode, struct file *file)
3037{
3038	return single_open(file, jfs_txanchor_proc_show, NULL);
3039}
3040
3041const struct file_operations jfs_txanchor_proc_fops = {
3042	.open		= jfs_txanchor_proc_open,
3043	.read		= seq_read,
3044	.llseek		= seq_lseek,
3045	.release	= single_release,
3046};
3047#endif
3048
3049#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS)
3050static int jfs_txstats_proc_show(struct seq_file *m, void *v)
3051{
3052	seq_printf(m,
3053		       "JFS TxStats\n"
3054		       "===========\n"
3055		       "calls to txBegin = %d\n"
3056		       "txBegin blocked by sync barrier = %d\n"
3057		       "txBegin blocked by tlocks low = %d\n"
3058		       "txBegin blocked by no free tid = %d\n"
3059		       "calls to txBeginAnon = %d\n"
3060		       "txBeginAnon blocked by sync barrier = %d\n"
3061		       "txBeginAnon blocked by tlocks low = %d\n"
3062		       "calls to txLockAlloc = %d\n"
3063		       "tLockAlloc blocked by no free lock = %d\n",
3064		       TxStat.txBegin,
3065		       TxStat.txBegin_barrier,
3066		       TxStat.txBegin_lockslow,
3067		       TxStat.txBegin_freetid,
3068		       TxStat.txBeginAnon,
3069		       TxStat.txBeginAnon_barrier,
3070		       TxStat.txBeginAnon_lockslow,
3071		       TxStat.txLockAlloc,
3072		       TxStat.txLockAlloc_freelock);
3073	return 0;
3074}
3075
3076static int jfs_txstats_proc_open(struct inode *inode, struct file *file)
3077{
3078	return single_open(file, jfs_txstats_proc_show, NULL);
3079}
3080
3081const struct file_operations jfs_txstats_proc_fops = {
3082	.open		= jfs_txstats_proc_open,
3083	.read		= seq_read,
3084	.llseek		= seq_lseek,
3085	.release	= single_release,
3086};
3087#endif
v6.8
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *   Copyright (C) International Business Machines Corp., 2000-2005
   4 *   Portions Copyright (C) Christoph Hellwig, 2001-2002
 
 
 
 
 
 
 
 
 
 
 
 
 
 
   5 */
   6
   7/*
   8 *	jfs_txnmgr.c: transaction manager
   9 *
  10 * notes:
  11 * transaction starts with txBegin() and ends with txCommit()
  12 * or txAbort().
  13 *
  14 * tlock is acquired at the time of update;
  15 * (obviate scan at commit time for xtree and dtree)
  16 * tlock and mp points to each other;
  17 * (no hashlist for mp -> tlock).
  18 *
  19 * special cases:
  20 * tlock on in-memory inode:
  21 * in-place tlock in the in-memory inode itself;
  22 * converted to page lock by iWrite() at commit time.
  23 *
  24 * tlock during write()/mmap() under anonymous transaction (tid = 0):
  25 * transferred (?) to transaction at commit time.
  26 *
  27 * use the page itself to update allocation maps
  28 * (obviate intermediate replication of allocation/deallocation data)
  29 * hold on to mp+lock thru update of maps
  30 */
  31
  32#include <linux/fs.h>
  33#include <linux/vmalloc.h>
  34#include <linux/completion.h>
  35#include <linux/freezer.h>
  36#include <linux/module.h>
  37#include <linux/moduleparam.h>
  38#include <linux/kthread.h>
  39#include <linux/seq_file.h>
  40#include "jfs_incore.h"
  41#include "jfs_inode.h"
  42#include "jfs_filsys.h"
  43#include "jfs_metapage.h"
  44#include "jfs_dinode.h"
  45#include "jfs_imap.h"
  46#include "jfs_dmap.h"
  47#include "jfs_superblock.h"
  48#include "jfs_debug.h"
  49
  50/*
  51 *	transaction management structures
  52 */
  53static struct {
  54	int freetid;		/* index of a free tid structure */
  55	int freelock;		/* index first free lock word */
  56	wait_queue_head_t freewait;	/* eventlist of free tblock */
  57	wait_queue_head_t freelockwait;	/* eventlist of free tlock */
  58	wait_queue_head_t lowlockwait;	/* eventlist of ample tlocks */
  59	int tlocksInUse;	/* Number of tlocks in use */
  60	spinlock_t LazyLock;	/* synchronize sync_queue & unlock_queue */
  61/*	struct tblock *sync_queue; * Transactions waiting for data sync */
  62	struct list_head unlock_queue;	/* Txns waiting to be released */
  63	struct list_head anon_list;	/* inodes having anonymous txns */
  64	struct list_head anon_list2;	/* inodes having anonymous txns
  65					   that couldn't be sync'ed */
  66} TxAnchor;
  67
  68int jfs_tlocks_low;		/* Indicates low number of available tlocks */
  69
  70#ifdef CONFIG_JFS_STATISTICS
  71static struct {
  72	uint txBegin;
  73	uint txBegin_barrier;
  74	uint txBegin_lockslow;
  75	uint txBegin_freetid;
  76	uint txBeginAnon;
  77	uint txBeginAnon_barrier;
  78	uint txBeginAnon_lockslow;
  79	uint txLockAlloc;
  80	uint txLockAlloc_freelock;
  81} TxStat;
  82#endif
  83
  84static int nTxBlock = -1;	/* number of transaction blocks */
  85module_param(nTxBlock, int, 0);
  86MODULE_PARM_DESC(nTxBlock,
  87		 "Number of transaction blocks (max:65536)");
  88
  89static int nTxLock = -1;	/* number of transaction locks */
  90module_param(nTxLock, int, 0);
  91MODULE_PARM_DESC(nTxLock,
  92		 "Number of transaction locks (max:65536)");
  93
  94struct tblock *TxBlock;	/* transaction block table */
  95static int TxLockLWM;	/* Low water mark for number of txLocks used */
  96static int TxLockHWM;	/* High water mark for number of txLocks used */
  97static int TxLockVHWM;	/* Very High water mark */
  98struct tlock *TxLock;	/* transaction lock table */
  99
 100/*
 101 *	transaction management lock
 102 */
 103static DEFINE_SPINLOCK(jfsTxnLock);
 104
 105#define TXN_LOCK()		spin_lock(&jfsTxnLock)
 106#define TXN_UNLOCK()		spin_unlock(&jfsTxnLock)
 107
 108#define LAZY_LOCK_INIT()	spin_lock_init(&TxAnchor.LazyLock)
 109#define LAZY_LOCK(flags)	spin_lock_irqsave(&TxAnchor.LazyLock, flags)
 110#define LAZY_UNLOCK(flags) spin_unlock_irqrestore(&TxAnchor.LazyLock, flags)
 111
 112static DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait);
 113static int jfs_commit_thread_waking;
 114
 115/*
 116 * Retry logic exist outside these macros to protect from spurrious wakeups.
 117 */
 118static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event)
 119{
 120	DECLARE_WAITQUEUE(wait, current);
 121
 122	add_wait_queue(event, &wait);
 123	set_current_state(TASK_UNINTERRUPTIBLE);
 124	TXN_UNLOCK();
 125	io_schedule();
 126	remove_wait_queue(event, &wait);
 127}
 128
 129#define TXN_SLEEP(event)\
 130{\
 131	TXN_SLEEP_DROP_LOCK(event);\
 132	TXN_LOCK();\
 133}
 134
 135#define TXN_WAKEUP(event) wake_up_all(event)
 136
 137/*
 138 *	statistics
 139 */
 140static struct {
 141	tid_t maxtid;		/* 4: biggest tid ever used */
 142	lid_t maxlid;		/* 4: biggest lid ever used */
 143	int ntid;		/* 4: # of transactions performed */
 144	int nlid;		/* 4: # of tlocks acquired */
 145	int waitlock;		/* 4: # of tlock wait */
 146} stattx;
 147
 148/*
 149 * forward references
 150 */
 151static void diLog(struct jfs_log *log, struct tblock *tblk, struct lrd *lrd,
 152		struct tlock *tlck, struct commit *cd);
 153static void dataLog(struct jfs_log *log, struct tblock *tblk, struct lrd *lrd,
 154		struct tlock *tlck);
 155static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 156		struct tlock * tlck);
 157static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 158		struct tlock * tlck);
 159static void txAllocPMap(struct inode *ip, struct maplock * maplock,
 160		struct tblock * tblk);
 161static void txForce(struct tblock * tblk);
 162static void txLog(struct jfs_log *log, struct tblock *tblk,
 163		struct commit *cd);
 164static void txUpdateMap(struct tblock * tblk);
 165static void txRelease(struct tblock * tblk);
 166static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 167	   struct tlock * tlck);
 168static void LogSyncRelease(struct metapage * mp);
 169
 170/*
 171 *		transaction block/lock management
 172 *		---------------------------------
 173 */
 174
 175/*
 176 * Get a transaction lock from the free list.  If the number in use is
 177 * greater than the high water mark, wake up the sync daemon.  This should
 178 * free some anonymous transaction locks.  (TXN_LOCK must be held.)
 179 */
 180static lid_t txLockAlloc(void)
 181{
 182	lid_t lid;
 183
 184	INCREMENT(TxStat.txLockAlloc);
 185	if (!TxAnchor.freelock) {
 186		INCREMENT(TxStat.txLockAlloc_freelock);
 187	}
 188
 189	while (!(lid = TxAnchor.freelock))
 190		TXN_SLEEP(&TxAnchor.freelockwait);
 191	TxAnchor.freelock = TxLock[lid].next;
 192	HIGHWATERMARK(stattx.maxlid, lid);
 193	if ((++TxAnchor.tlocksInUse > TxLockHWM) && (jfs_tlocks_low == 0)) {
 194		jfs_info("txLockAlloc tlocks low");
 195		jfs_tlocks_low = 1;
 196		wake_up_process(jfsSyncThread);
 197	}
 198
 199	return lid;
 200}
 201
 202static void txLockFree(lid_t lid)
 203{
 204	TxLock[lid].tid = 0;
 205	TxLock[lid].next = TxAnchor.freelock;
 206	TxAnchor.freelock = lid;
 207	TxAnchor.tlocksInUse--;
 208	if (jfs_tlocks_low && (TxAnchor.tlocksInUse < TxLockLWM)) {
 209		jfs_info("txLockFree jfs_tlocks_low no more");
 210		jfs_tlocks_low = 0;
 211		TXN_WAKEUP(&TxAnchor.lowlockwait);
 212	}
 213	TXN_WAKEUP(&TxAnchor.freelockwait);
 214}
 215
 216/*
 217 * NAME:	txInit()
 218 *
 219 * FUNCTION:	initialize transaction management structures
 220 *
 221 * RETURN:
 222 *
 223 * serialization: single thread at jfs_init()
 224 */
 225int txInit(void)
 226{
 227	int k, size;
 228	struct sysinfo si;
 229
 230	/* Set defaults for nTxLock and nTxBlock if unset */
 231
 232	if (nTxLock == -1) {
 233		if (nTxBlock == -1) {
 234			/* Base default on memory size */
 235			si_meminfo(&si);
 236			if (si.totalram > (256 * 1024)) /* 1 GB */
 237				nTxLock = 64 * 1024;
 238			else
 239				nTxLock = si.totalram >> 2;
 240		} else if (nTxBlock > (8 * 1024))
 241			nTxLock = 64 * 1024;
 242		else
 243			nTxLock = nTxBlock << 3;
 244	}
 245	if (nTxBlock == -1)
 246		nTxBlock = nTxLock >> 3;
 247
 248	/* Verify tunable parameters */
 249	if (nTxBlock < 16)
 250		nTxBlock = 16;	/* No one should set it this low */
 251	if (nTxBlock > 65536)
 252		nTxBlock = 65536;
 253	if (nTxLock < 256)
 254		nTxLock = 256;	/* No one should set it this low */
 255	if (nTxLock > 65536)
 256		nTxLock = 65536;
 257
 258	printk(KERN_INFO "JFS: nTxBlock = %d, nTxLock = %d\n",
 259	       nTxBlock, nTxLock);
 260	/*
 261	 * initialize transaction block (tblock) table
 262	 *
 263	 * transaction id (tid) = tblock index
 264	 * tid = 0 is reserved.
 265	 */
 266	TxLockLWM = (nTxLock * 4) / 10;
 267	TxLockHWM = (nTxLock * 7) / 10;
 268	TxLockVHWM = (nTxLock * 8) / 10;
 269
 270	size = sizeof(struct tblock) * nTxBlock;
 271	TxBlock = vmalloc(size);
 272	if (TxBlock == NULL)
 273		return -ENOMEM;
 274
 275	for (k = 1; k < nTxBlock - 1; k++) {
 276		TxBlock[k].next = k + 1;
 277		init_waitqueue_head(&TxBlock[k].gcwait);
 278		init_waitqueue_head(&TxBlock[k].waitor);
 279	}
 280	TxBlock[k].next = 0;
 281	init_waitqueue_head(&TxBlock[k].gcwait);
 282	init_waitqueue_head(&TxBlock[k].waitor);
 283
 284	TxAnchor.freetid = 1;
 285	init_waitqueue_head(&TxAnchor.freewait);
 286
 287	stattx.maxtid = 1;	/* statistics */
 288
 289	/*
 290	 * initialize transaction lock (tlock) table
 291	 *
 292	 * transaction lock id = tlock index
 293	 * tlock id = 0 is reserved.
 294	 */
 295	size = sizeof(struct tlock) * nTxLock;
 296	TxLock = vmalloc(size);
 297	if (TxLock == NULL) {
 298		vfree(TxBlock);
 299		return -ENOMEM;
 300	}
 301
 302	/* initialize tlock table */
 303	for (k = 1; k < nTxLock - 1; k++)
 304		TxLock[k].next = k + 1;
 305	TxLock[k].next = 0;
 306	init_waitqueue_head(&TxAnchor.freelockwait);
 307	init_waitqueue_head(&TxAnchor.lowlockwait);
 308
 309	TxAnchor.freelock = 1;
 310	TxAnchor.tlocksInUse = 0;
 311	INIT_LIST_HEAD(&TxAnchor.anon_list);
 312	INIT_LIST_HEAD(&TxAnchor.anon_list2);
 313
 314	LAZY_LOCK_INIT();
 315	INIT_LIST_HEAD(&TxAnchor.unlock_queue);
 316
 317	stattx.maxlid = 1;	/* statistics */
 318
 319	return 0;
 320}
 321
 322/*
 323 * NAME:	txExit()
 324 *
 325 * FUNCTION:	clean up when module is unloaded
 326 */
 327void txExit(void)
 328{
 329	vfree(TxLock);
 330	TxLock = NULL;
 331	vfree(TxBlock);
 332	TxBlock = NULL;
 333}
 334
 335/*
 336 * NAME:	txBegin()
 337 *
 338 * FUNCTION:	start a transaction.
 339 *
 340 * PARAMETER:	sb	- superblock
 341 *		flag	- force for nested tx;
 342 *
 343 * RETURN:	tid	- transaction id
 344 *
 345 * note: flag force allows to start tx for nested tx
 346 * to prevent deadlock on logsync barrier;
 347 */
 348tid_t txBegin(struct super_block *sb, int flag)
 349{
 350	tid_t t;
 351	struct tblock *tblk;
 352	struct jfs_log *log;
 353
 354	jfs_info("txBegin: flag = 0x%x", flag);
 355	log = JFS_SBI(sb)->log;
 356
 357	if (!log) {
 358		jfs_error(sb, "read-only filesystem\n");
 359		return 0;
 360	}
 361
 362	TXN_LOCK();
 363
 364	INCREMENT(TxStat.txBegin);
 365
 366      retry:
 367	if (!(flag & COMMIT_FORCE)) {
 368		/*
 369		 * synchronize with logsync barrier
 370		 */
 371		if (test_bit(log_SYNCBARRIER, &log->flag) ||
 372		    test_bit(log_QUIESCE, &log->flag)) {
 373			INCREMENT(TxStat.txBegin_barrier);
 374			TXN_SLEEP(&log->syncwait);
 375			goto retry;
 376		}
 377	}
 378	if (flag == 0) {
 379		/*
 380		 * Don't begin transaction if we're getting starved for tlocks
 381		 * unless COMMIT_FORCE or COMMIT_INODE (which may ultimately
 382		 * free tlocks)
 383		 */
 384		if (TxAnchor.tlocksInUse > TxLockVHWM) {
 385			INCREMENT(TxStat.txBegin_lockslow);
 386			TXN_SLEEP(&TxAnchor.lowlockwait);
 387			goto retry;
 388		}
 389	}
 390
 391	/*
 392	 * allocate transaction id/block
 393	 */
 394	if ((t = TxAnchor.freetid) == 0) {
 395		jfs_info("txBegin: waiting for free tid");
 396		INCREMENT(TxStat.txBegin_freetid);
 397		TXN_SLEEP(&TxAnchor.freewait);
 398		goto retry;
 399	}
 400
 401	tblk = tid_to_tblock(t);
 402
 403	if ((tblk->next == 0) && !(flag & COMMIT_FORCE)) {
 404		/* Don't let a non-forced transaction take the last tblk */
 405		jfs_info("txBegin: waiting for free tid");
 406		INCREMENT(TxStat.txBegin_freetid);
 407		TXN_SLEEP(&TxAnchor.freewait);
 408		goto retry;
 409	}
 410
 411	TxAnchor.freetid = tblk->next;
 412
 413	/*
 414	 * initialize transaction
 415	 */
 416
 417	/*
 418	 * We can't zero the whole thing or we screw up another thread being
 419	 * awakened after sleeping on tblk->waitor
 420	 *
 421	 * memset(tblk, 0, sizeof(struct tblock));
 422	 */
 423	tblk->next = tblk->last = tblk->xflag = tblk->flag = tblk->lsn = 0;
 424
 425	tblk->sb = sb;
 426	++log->logtid;
 427	tblk->logtid = log->logtid;
 428
 429	++log->active;
 430
 431	HIGHWATERMARK(stattx.maxtid, t);	/* statistics */
 432	INCREMENT(stattx.ntid);	/* statistics */
 433
 434	TXN_UNLOCK();
 435
 436	jfs_info("txBegin: returning tid = %d", t);
 437
 438	return t;
 439}
 440
 441/*
 442 * NAME:	txBeginAnon()
 443 *
 444 * FUNCTION:	start an anonymous transaction.
 445 *		Blocks if logsync or available tlocks are low to prevent
 446 *		anonymous tlocks from depleting supply.
 447 *
 448 * PARAMETER:	sb	- superblock
 449 *
 450 * RETURN:	none
 451 */
 452void txBeginAnon(struct super_block *sb)
 453{
 454	struct jfs_log *log;
 455
 456	log = JFS_SBI(sb)->log;
 457
 458	TXN_LOCK();
 459	INCREMENT(TxStat.txBeginAnon);
 460
 461      retry:
 462	/*
 463	 * synchronize with logsync barrier
 464	 */
 465	if (test_bit(log_SYNCBARRIER, &log->flag) ||
 466	    test_bit(log_QUIESCE, &log->flag)) {
 467		INCREMENT(TxStat.txBeginAnon_barrier);
 468		TXN_SLEEP(&log->syncwait);
 469		goto retry;
 470	}
 471
 472	/*
 473	 * Don't begin transaction if we're getting starved for tlocks
 474	 */
 475	if (TxAnchor.tlocksInUse > TxLockVHWM) {
 476		INCREMENT(TxStat.txBeginAnon_lockslow);
 477		TXN_SLEEP(&TxAnchor.lowlockwait);
 478		goto retry;
 479	}
 480	TXN_UNLOCK();
 481}
 482
 483/*
 484 *	txEnd()
 485 *
 486 * function: free specified transaction block.
 487 *
 488 *	logsync barrier processing:
 489 *
 490 * serialization:
 491 */
 492void txEnd(tid_t tid)
 493{
 494	struct tblock *tblk = tid_to_tblock(tid);
 495	struct jfs_log *log;
 496
 497	jfs_info("txEnd: tid = %d", tid);
 498	TXN_LOCK();
 499
 500	/*
 501	 * wakeup transactions waiting on the page locked
 502	 * by the current transaction
 503	 */
 504	TXN_WAKEUP(&tblk->waitor);
 505
 506	log = JFS_SBI(tblk->sb)->log;
 507
 508	/*
 509	 * Lazy commit thread can't free this guy until we mark it UNLOCKED,
 510	 * otherwise, we would be left with a transaction that may have been
 511	 * reused.
 512	 *
 513	 * Lazy commit thread will turn off tblkGC_LAZY before calling this
 514	 * routine.
 515	 */
 516	if (tblk->flag & tblkGC_LAZY) {
 517		jfs_info("txEnd called w/lazy tid: %d, tblk = 0x%p", tid, tblk);
 518		TXN_UNLOCK();
 519
 520		spin_lock_irq(&log->gclock);	// LOGGC_LOCK
 521		tblk->flag |= tblkGC_UNLOCKED;
 522		spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
 523		return;
 524	}
 525
 526	jfs_info("txEnd: tid: %d, tblk = 0x%p", tid, tblk);
 527
 528	assert(tblk->next == 0);
 529
 530	/*
 531	 * insert tblock back on freelist
 532	 */
 533	tblk->next = TxAnchor.freetid;
 534	TxAnchor.freetid = tid;
 535
 536	/*
 537	 * mark the tblock not active
 538	 */
 539	if (--log->active == 0) {
 540		clear_bit(log_FLUSH, &log->flag);
 541
 542		/*
 543		 * synchronize with logsync barrier
 544		 */
 545		if (test_bit(log_SYNCBARRIER, &log->flag)) {
 546			TXN_UNLOCK();
 547
 548			/* write dirty metadata & forward log syncpt */
 549			jfs_syncpt(log, 1);
 550
 551			jfs_info("log barrier off: 0x%x", log->lsn);
 552
 553			/* enable new transactions start */
 554			clear_bit(log_SYNCBARRIER, &log->flag);
 555
 556			/* wakeup all waitors for logsync barrier */
 557			TXN_WAKEUP(&log->syncwait);
 558
 559			goto wakeup;
 560		}
 561	}
 562
 563	TXN_UNLOCK();
 564wakeup:
 565	/*
 566	 * wakeup all waitors for a free tblock
 567	 */
 568	TXN_WAKEUP(&TxAnchor.freewait);
 569}
 570
 571/*
 572 *	txLock()
 573 *
 574 * function: acquire a transaction lock on the specified <mp>
 575 *
 576 * parameter:
 577 *
 578 * return:	transaction lock id
 579 *
 580 * serialization:
 581 */
 582struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
 583		     int type)
 584{
 585	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 586	int dir_xtree = 0;
 587	lid_t lid;
 588	tid_t xtid;
 589	struct tlock *tlck;
 590	struct xtlock *xtlck;
 591	struct linelock *linelock;
 592	xtpage_t *p;
 593	struct tblock *tblk;
 594
 595	TXN_LOCK();
 596
 597	if (S_ISDIR(ip->i_mode) && (type & tlckXTREE) &&
 598	    !(mp->xflag & COMMIT_PAGE)) {
 599		/*
 600		 * Directory inode is special.  It can have both an xtree tlock
 601		 * and a dtree tlock associated with it.
 602		 */
 603		dir_xtree = 1;
 604		lid = jfs_ip->xtlid;
 605	} else
 606		lid = mp->lid;
 607
 608	/* is page not locked by a transaction ? */
 609	if (lid == 0)
 610		goto allocateLock;
 611
 612	jfs_info("txLock: tid:%d ip:0x%p mp:0x%p lid:%d", tid, ip, mp, lid);
 613
 614	/* is page locked by the requester transaction ? */
 615	tlck = lid_to_tlock(lid);
 616	if ((xtid = tlck->tid) == tid) {
 617		TXN_UNLOCK();
 618		goto grantLock;
 619	}
 620
 621	/*
 622	 * is page locked by anonymous transaction/lock ?
 623	 *
 624	 * (page update without transaction (i.e., file write) is
 625	 * locked under anonymous transaction tid = 0:
 626	 * anonymous tlocks maintained on anonymous tlock list of
 627	 * the inode of the page and available to all anonymous
 628	 * transactions until txCommit() time at which point
 629	 * they are transferred to the transaction tlock list of
 630	 * the committing transaction of the inode)
 631	 */
 632	if (xtid == 0) {
 633		tlck->tid = tid;
 634		TXN_UNLOCK();
 635		tblk = tid_to_tblock(tid);
 636		/*
 637		 * The order of the tlocks in the transaction is important
 638		 * (during truncate, child xtree pages must be freed before
 639		 * parent's tlocks change the working map).
 640		 * Take tlock off anonymous list and add to tail of
 641		 * transaction list
 642		 *
 643		 * Note:  We really need to get rid of the tid & lid and
 644		 * use list_head's.  This code is getting UGLY!
 645		 */
 646		if (jfs_ip->atlhead == lid) {
 647			if (jfs_ip->atltail == lid) {
 648				/* only anonymous txn.
 649				 * Remove from anon_list
 650				 */
 651				TXN_LOCK();
 652				list_del_init(&jfs_ip->anon_inode_list);
 653				TXN_UNLOCK();
 654			}
 655			jfs_ip->atlhead = tlck->next;
 656		} else {
 657			lid_t last;
 658			for (last = jfs_ip->atlhead;
 659			     lid_to_tlock(last)->next != lid;
 660			     last = lid_to_tlock(last)->next) {
 661				assert(last);
 662			}
 663			lid_to_tlock(last)->next = tlck->next;
 664			if (jfs_ip->atltail == lid)
 665				jfs_ip->atltail = last;
 666		}
 667
 668		/* insert the tlock at tail of transaction tlock list */
 669
 670		if (tblk->next)
 671			lid_to_tlock(tblk->last)->next = lid;
 672		else
 673			tblk->next = lid;
 674		tlck->next = 0;
 675		tblk->last = lid;
 676
 677		goto grantLock;
 678	}
 679
 680	goto waitLock;
 681
 682	/*
 683	 * allocate a tlock
 684	 */
 685      allocateLock:
 686	lid = txLockAlloc();
 687	tlck = lid_to_tlock(lid);
 688
 689	/*
 690	 * initialize tlock
 691	 */
 692	tlck->tid = tid;
 693
 694	TXN_UNLOCK();
 695
 696	/* mark tlock for meta-data page */
 697	if (mp->xflag & COMMIT_PAGE) {
 698
 699		tlck->flag = tlckPAGELOCK;
 700
 701		/* mark the page dirty and nohomeok */
 702		metapage_nohomeok(mp);
 703
 704		jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p",
 705			 mp, mp->nohomeok, tid, tlck);
 706
 707		/* if anonymous transaction, and buffer is on the group
 708		 * commit synclist, mark inode to show this.  This will
 709		 * prevent the buffer from being marked nohomeok for too
 710		 * long a time.
 711		 */
 712		if ((tid == 0) && mp->lsn)
 713			set_cflag(COMMIT_Synclist, ip);
 714	}
 715	/* mark tlock for in-memory inode */
 716	else
 717		tlck->flag = tlckINODELOCK;
 718
 719	if (S_ISDIR(ip->i_mode))
 720		tlck->flag |= tlckDIRECTORY;
 721
 722	tlck->type = 0;
 723
 724	/* bind the tlock and the page */
 725	tlck->ip = ip;
 726	tlck->mp = mp;
 727	if (dir_xtree)
 728		jfs_ip->xtlid = lid;
 729	else
 730		mp->lid = lid;
 731
 732	/*
 733	 * enqueue transaction lock to transaction/inode
 734	 */
 735	/* insert the tlock at tail of transaction tlock list */
 736	if (tid) {
 737		tblk = tid_to_tblock(tid);
 738		if (tblk->next)
 739			lid_to_tlock(tblk->last)->next = lid;
 740		else
 741			tblk->next = lid;
 742		tlck->next = 0;
 743		tblk->last = lid;
 744	}
 745	/* anonymous transaction:
 746	 * insert the tlock at head of inode anonymous tlock list
 747	 */
 748	else {
 749		tlck->next = jfs_ip->atlhead;
 750		jfs_ip->atlhead = lid;
 751		if (tlck->next == 0) {
 752			/* This inode's first anonymous transaction */
 753			jfs_ip->atltail = lid;
 754			TXN_LOCK();
 755			list_add_tail(&jfs_ip->anon_inode_list,
 756				      &TxAnchor.anon_list);
 757			TXN_UNLOCK();
 758		}
 759	}
 760
 761	/* initialize type dependent area for linelock */
 762	linelock = (struct linelock *) & tlck->lock;
 763	linelock->next = 0;
 764	linelock->flag = tlckLINELOCK;
 765	linelock->maxcnt = TLOCKSHORT;
 766	linelock->index = 0;
 767
 768	switch (type & tlckTYPE) {
 769	case tlckDTREE:
 770		linelock->l2linesize = L2DTSLOTSIZE;
 771		break;
 772
 773	case tlckXTREE:
 774		linelock->l2linesize = L2XTSLOTSIZE;
 775
 776		xtlck = (struct xtlock *) linelock;
 777		xtlck->header.offset = 0;
 778		xtlck->header.length = 2;
 779
 780		if (type & tlckNEW) {
 781			xtlck->lwm.offset = XTENTRYSTART;
 782		} else {
 783			if (mp->xflag & COMMIT_PAGE)
 784				p = (xtpage_t *) mp->data;
 785			else
 786				p = (xtpage_t *) &jfs_ip->i_xtroot;
 787			xtlck->lwm.offset =
 788			    le16_to_cpu(p->header.nextindex);
 789		}
 790		xtlck->lwm.length = 0;	/* ! */
 791		xtlck->twm.offset = 0;
 792		xtlck->hwm.offset = 0;
 793
 794		xtlck->index = 2;
 795		break;
 796
 797	case tlckINODE:
 798		linelock->l2linesize = L2INODESLOTSIZE;
 799		break;
 800
 801	case tlckDATA:
 802		linelock->l2linesize = L2DATASLOTSIZE;
 803		break;
 804
 805	default:
 806		jfs_err("UFO tlock:0x%p", tlck);
 807	}
 808
 809	/*
 810	 * update tlock vector
 811	 */
 812      grantLock:
 813	tlck->type |= type;
 814
 815	return tlck;
 816
 817	/*
 818	 * page is being locked by another transaction:
 819	 */
 820      waitLock:
 821	/* Only locks on ipimap or ipaimap should reach here */
 822	/* assert(jfs_ip->fileset == AGGREGATE_I); */
 823	if (jfs_ip->fileset != AGGREGATE_I) {
 824		printk(KERN_ERR "txLock: trying to lock locked page!");
 825		print_hex_dump(KERN_ERR, "ip: ", DUMP_PREFIX_ADDRESS, 16, 4,
 826			       ip, sizeof(*ip), 0);
 827		print_hex_dump(KERN_ERR, "mp: ", DUMP_PREFIX_ADDRESS, 16, 4,
 828			       mp, sizeof(*mp), 0);
 829		print_hex_dump(KERN_ERR, "Locker's tblock: ",
 830			       DUMP_PREFIX_ADDRESS, 16, 4, tid_to_tblock(tid),
 831			       sizeof(struct tblock), 0);
 832		print_hex_dump(KERN_ERR, "Tlock: ", DUMP_PREFIX_ADDRESS, 16, 4,
 833			       tlck, sizeof(*tlck), 0);
 834		BUG();
 835	}
 836	INCREMENT(stattx.waitlock);	/* statistics */
 837	TXN_UNLOCK();
 838	release_metapage(mp);
 839	TXN_LOCK();
 840	xtid = tlck->tid;	/* reacquire after dropping TXN_LOCK */
 841
 842	jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d",
 843		 tid, xtid, lid);
 844
 845	/* Recheck everything since dropping TXN_LOCK */
 846	if (xtid && (tlck->mp == mp) && (mp->lid == lid))
 847		TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor);
 848	else
 849		TXN_UNLOCK();
 850	jfs_info("txLock: awakened     tid = %d, lid = %d", tid, lid);
 851
 852	return NULL;
 853}
 854
 855/*
 856 * NAME:	txRelease()
 857 *
 858 * FUNCTION:	Release buffers associated with transaction locks, but don't
 859 *		mark homeok yet.  The allows other transactions to modify
 860 *		buffers, but won't let them go to disk until commit record
 861 *		actually gets written.
 862 *
 863 * PARAMETER:
 864 *		tblk	-
 865 *
 866 * RETURN:	Errors from subroutines.
 867 */
 868static void txRelease(struct tblock * tblk)
 869{
 870	struct metapage *mp;
 871	lid_t lid;
 872	struct tlock *tlck;
 873
 874	TXN_LOCK();
 875
 876	for (lid = tblk->next; lid; lid = tlck->next) {
 877		tlck = lid_to_tlock(lid);
 878		if ((mp = tlck->mp) != NULL &&
 879		    (tlck->type & tlckBTROOT) == 0) {
 880			assert(mp->xflag & COMMIT_PAGE);
 881			mp->lid = 0;
 882		}
 883	}
 884
 885	/*
 886	 * wakeup transactions waiting on a page locked
 887	 * by the current transaction
 888	 */
 889	TXN_WAKEUP(&tblk->waitor);
 890
 891	TXN_UNLOCK();
 892}
 893
 894/*
 895 * NAME:	txUnlock()
 896 *
 897 * FUNCTION:	Initiates pageout of pages modified by tid in journalled
 898 *		objects and frees their lockwords.
 899 */
 900static void txUnlock(struct tblock * tblk)
 901{
 902	struct tlock *tlck;
 903	struct linelock *linelock;
 904	lid_t lid, next, llid, k;
 905	struct metapage *mp;
 906	struct jfs_log *log;
 907	int difft, diffp;
 908	unsigned long flags;
 909
 910	jfs_info("txUnlock: tblk = 0x%p", tblk);
 911	log = JFS_SBI(tblk->sb)->log;
 912
 913	/*
 914	 * mark page under tlock homeok (its log has been written):
 915	 */
 916	for (lid = tblk->next; lid; lid = next) {
 917		tlck = lid_to_tlock(lid);
 918		next = tlck->next;
 919
 920		jfs_info("unlocking lid = %d, tlck = 0x%p", lid, tlck);
 921
 922		/* unbind page from tlock */
 923		if ((mp = tlck->mp) != NULL &&
 924		    (tlck->type & tlckBTROOT) == 0) {
 925			assert(mp->xflag & COMMIT_PAGE);
 926
 927			/* hold buffer
 928			 */
 929			hold_metapage(mp);
 930
 931			assert(mp->nohomeok > 0);
 932			_metapage_homeok(mp);
 933
 934			/* inherit younger/larger clsn */
 935			LOGSYNC_LOCK(log, flags);
 936			if (mp->clsn) {
 937				logdiff(difft, tblk->clsn, log);
 938				logdiff(diffp, mp->clsn, log);
 939				if (difft > diffp)
 940					mp->clsn = tblk->clsn;
 941			} else
 942				mp->clsn = tblk->clsn;
 943			LOGSYNC_UNLOCK(log, flags);
 944
 945			assert(!(tlck->flag & tlckFREEPAGE));
 946
 947			put_metapage(mp);
 948		}
 949
 950		/* insert tlock, and linelock(s) of the tlock if any,
 951		 * at head of freelist
 952		 */
 953		TXN_LOCK();
 954
 955		llid = ((struct linelock *) & tlck->lock)->next;
 956		while (llid) {
 957			linelock = (struct linelock *) lid_to_tlock(llid);
 958			k = linelock->next;
 959			txLockFree(llid);
 960			llid = k;
 961		}
 962		txLockFree(lid);
 963
 964		TXN_UNLOCK();
 965	}
 966	tblk->next = tblk->last = 0;
 967
 968	/*
 969	 * remove tblock from logsynclist
 970	 * (allocation map pages inherited lsn of tblk and
 971	 * has been inserted in logsync list at txUpdateMap())
 972	 */
 973	if (tblk->lsn) {
 974		LOGSYNC_LOCK(log, flags);
 975		log->count--;
 976		list_del(&tblk->synclist);
 977		LOGSYNC_UNLOCK(log, flags);
 978	}
 979}
 980
 981/*
 982 *	txMaplock()
 983 *
 984 * function: allocate a transaction lock for freed page/entry;
 985 *	for freed page, maplock is used as xtlock/dtlock type;
 986 */
 987struct tlock *txMaplock(tid_t tid, struct inode *ip, int type)
 988{
 989	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 990	lid_t lid;
 991	struct tblock *tblk;
 992	struct tlock *tlck;
 993	struct maplock *maplock;
 994
 995	TXN_LOCK();
 996
 997	/*
 998	 * allocate a tlock
 999	 */
1000	lid = txLockAlloc();
1001	tlck = lid_to_tlock(lid);
1002
1003	/*
1004	 * initialize tlock
1005	 */
1006	tlck->tid = tid;
1007
1008	/* bind the tlock and the object */
1009	tlck->flag = tlckINODELOCK;
1010	if (S_ISDIR(ip->i_mode))
1011		tlck->flag |= tlckDIRECTORY;
1012	tlck->ip = ip;
1013	tlck->mp = NULL;
1014
1015	tlck->type = type;
1016
1017	/*
1018	 * enqueue transaction lock to transaction/inode
1019	 */
1020	/* insert the tlock at tail of transaction tlock list */
1021	if (tid) {
1022		tblk = tid_to_tblock(tid);
1023		if (tblk->next)
1024			lid_to_tlock(tblk->last)->next = lid;
1025		else
1026			tblk->next = lid;
1027		tlck->next = 0;
1028		tblk->last = lid;
1029	}
1030	/* anonymous transaction:
1031	 * insert the tlock at head of inode anonymous tlock list
1032	 */
1033	else {
1034		tlck->next = jfs_ip->atlhead;
1035		jfs_ip->atlhead = lid;
1036		if (tlck->next == 0) {
1037			/* This inode's first anonymous transaction */
1038			jfs_ip->atltail = lid;
1039			list_add_tail(&jfs_ip->anon_inode_list,
1040				      &TxAnchor.anon_list);
1041		}
1042	}
1043
1044	TXN_UNLOCK();
1045
1046	/* initialize type dependent area for maplock */
1047	maplock = (struct maplock *) & tlck->lock;
1048	maplock->next = 0;
1049	maplock->maxcnt = 0;
1050	maplock->index = 0;
1051
1052	return tlck;
1053}
1054
1055/*
1056 *	txLinelock()
1057 *
1058 * function: allocate a transaction lock for log vector list
1059 */
1060struct linelock *txLinelock(struct linelock * tlock)
1061{
1062	lid_t lid;
1063	struct tlock *tlck;
1064	struct linelock *linelock;
1065
1066	TXN_LOCK();
1067
1068	/* allocate a TxLock structure */
1069	lid = txLockAlloc();
1070	tlck = lid_to_tlock(lid);
1071
1072	TXN_UNLOCK();
1073
1074	/* initialize linelock */
1075	linelock = (struct linelock *) tlck;
1076	linelock->next = 0;
1077	linelock->flag = tlckLINELOCK;
1078	linelock->maxcnt = TLOCKLONG;
1079	linelock->index = 0;
1080	if (tlck->flag & tlckDIRECTORY)
1081		linelock->flag |= tlckDIRECTORY;
1082
1083	/* append linelock after tlock */
1084	linelock->next = tlock->next;
1085	tlock->next = lid;
1086
1087	return linelock;
1088}
1089
1090/*
1091 *		transaction commit management
1092 *		-----------------------------
1093 */
1094
1095/*
1096 * NAME:	txCommit()
1097 *
1098 * FUNCTION:	commit the changes to the objects specified in
1099 *		clist.  For journalled segments only the
1100 *		changes of the caller are committed, ie by tid.
1101 *		for non-journalled segments the data are flushed to
1102 *		disk and then the change to the disk inode and indirect
1103 *		blocks committed (so blocks newly allocated to the
1104 *		segment will be made a part of the segment atomically).
1105 *
1106 *		all of the segments specified in clist must be in
1107 *		one file system. no more than 6 segments are needed
1108 *		to handle all unix svcs.
1109 *
1110 *		if the i_nlink field (i.e. disk inode link count)
1111 *		is zero, and the type of inode is a regular file or
1112 *		directory, or symbolic link , the inode is truncated
1113 *		to zero length. the truncation is committed but the
1114 *		VM resources are unaffected until it is closed (see
1115 *		iput and iclose).
1116 *
1117 * PARAMETER:
1118 *
1119 * RETURN:
1120 *
1121 * serialization:
1122 *		on entry the inode lock on each segment is assumed
1123 *		to be held.
1124 *
1125 * i/o error:
1126 */
1127int txCommit(tid_t tid,		/* transaction identifier */
1128	     int nip,		/* number of inodes to commit */
1129	     struct inode **iplist,	/* list of inode to commit */
1130	     int flag)
1131{
1132	int rc = 0;
1133	struct commit cd;
1134	struct jfs_log *log;
1135	struct tblock *tblk;
1136	struct lrd *lrd;
1137	struct inode *ip;
1138	struct jfs_inode_info *jfs_ip;
1139	int k, n;
1140	ino_t top;
1141	struct super_block *sb;
1142
1143	jfs_info("txCommit, tid = %d, flag = %d", tid, flag);
1144	/* is read-only file system ? */
1145	if (isReadOnly(iplist[0])) {
1146		rc = -EROFS;
1147		goto TheEnd;
1148	}
1149
1150	sb = cd.sb = iplist[0]->i_sb;
1151	cd.tid = tid;
1152
1153	if (tid == 0)
1154		tid = txBegin(sb, 0);
1155	tblk = tid_to_tblock(tid);
1156
1157	/*
1158	 * initialize commit structure
1159	 */
1160	log = JFS_SBI(sb)->log;
1161	cd.log = log;
1162
1163	/* initialize log record descriptor in commit */
1164	lrd = &cd.lrd;
1165	lrd->logtid = cpu_to_le32(tblk->logtid);
1166	lrd->backchain = 0;
1167
1168	tblk->xflag |= flag;
1169
1170	if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0)
1171		tblk->xflag |= COMMIT_LAZY;
1172	/*
1173	 *	prepare non-journaled objects for commit
1174	 *
1175	 * flush data pages of non-journaled file
1176	 * to prevent the file getting non-initialized disk blocks
1177	 * in case of crash.
1178	 * (new blocks - )
1179	 */
1180	cd.iplist = iplist;
1181	cd.nip = nip;
1182
1183	/*
1184	 *	acquire transaction lock on (on-disk) inodes
1185	 *
1186	 * update on-disk inode from in-memory inode
1187	 * acquiring transaction locks for AFTER records
1188	 * on the on-disk inode of file object
1189	 *
1190	 * sort the inodes array by inode number in descending order
1191	 * to prevent deadlock when acquiring transaction lock
1192	 * of on-disk inodes on multiple on-disk inode pages by
1193	 * multiple concurrent transactions
1194	 */
1195	for (k = 0; k < cd.nip; k++) {
1196		top = (cd.iplist[k])->i_ino;
1197		for (n = k + 1; n < cd.nip; n++) {
1198			ip = cd.iplist[n];
1199			if (ip->i_ino > top) {
1200				top = ip->i_ino;
1201				cd.iplist[n] = cd.iplist[k];
1202				cd.iplist[k] = ip;
1203			}
1204		}
1205
1206		ip = cd.iplist[k];
1207		jfs_ip = JFS_IP(ip);
1208
1209		/*
1210		 * BUGBUG - This code has temporarily been removed.  The
1211		 * intent is to ensure that any file data is written before
1212		 * the metadata is committed to the journal.  This prevents
1213		 * uninitialized data from appearing in a file after the
1214		 * journal has been replayed.  (The uninitialized data
1215		 * could be sensitive data removed by another user.)
1216		 *
1217		 * The problem now is that we are holding the IWRITELOCK
1218		 * on the inode, and calling filemap_fdatawrite on an
1219		 * unmapped page will cause a deadlock in jfs_get_block.
1220		 *
1221		 * The long term solution is to pare down the use of
1222		 * IWRITELOCK.  We are currently holding it too long.
1223		 * We could also be smarter about which data pages need
1224		 * to be written before the transaction is committed and
1225		 * when we don't need to worry about it at all.
1226		 *
1227		 * if ((!S_ISDIR(ip->i_mode))
1228		 *    && (tblk->flag & COMMIT_DELETE) == 0)
1229		 *	filemap_write_and_wait(ip->i_mapping);
1230		 */
1231
1232		/*
1233		 * Mark inode as not dirty.  It will still be on the dirty
1234		 * inode list, but we'll know not to commit it again unless
1235		 * it gets marked dirty again
1236		 */
1237		clear_cflag(COMMIT_Dirty, ip);
1238
1239		/* inherit anonymous tlock(s) of inode */
1240		if (jfs_ip->atlhead) {
1241			lid_to_tlock(jfs_ip->atltail)->next = tblk->next;
1242			tblk->next = jfs_ip->atlhead;
1243			if (!tblk->last)
1244				tblk->last = jfs_ip->atltail;
1245			jfs_ip->atlhead = jfs_ip->atltail = 0;
1246			TXN_LOCK();
1247			list_del_init(&jfs_ip->anon_inode_list);
1248			TXN_UNLOCK();
1249		}
1250
1251		/*
1252		 * acquire transaction lock on on-disk inode page
1253		 * (become first tlock of the tblk's tlock list)
1254		 */
1255		if (((rc = diWrite(tid, ip))))
1256			goto out;
1257	}
1258
1259	/*
1260	 *	write log records from transaction locks
1261	 *
1262	 * txUpdateMap() resets XAD_NEW in XAD.
1263	 */
1264	txLog(log, tblk, &cd);
 
1265
1266	/*
1267	 * Ensure that inode isn't reused before
1268	 * lazy commit thread finishes processing
1269	 */
1270	if (tblk->xflag & COMMIT_DELETE) {
1271		ihold(tblk->u.ip);
1272		/*
1273		 * Avoid a rare deadlock
1274		 *
1275		 * If the inode is locked, we may be blocked in
1276		 * jfs_commit_inode.  If so, we don't want the
1277		 * lazy_commit thread doing the last iput() on the inode
1278		 * since that may block on the locked inode.  Instead,
1279		 * commit the transaction synchronously, so the last iput
1280		 * will be done by the calling thread (or later)
1281		 */
1282		/*
1283		 * I believe this code is no longer needed.  Splitting I_LOCK
1284		 * into two bits, I_NEW and I_SYNC should prevent this
1285		 * deadlock as well.  But since I don't have a JFS testload
1286		 * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done.
1287		 * Joern
1288		 */
1289		if (tblk->u.ip->i_state & I_SYNC)
1290			tblk->xflag &= ~COMMIT_LAZY;
1291	}
1292
1293	ASSERT((!(tblk->xflag & COMMIT_DELETE)) ||
1294	       ((tblk->u.ip->i_nlink == 0) &&
1295		!test_cflag(COMMIT_Nolink, tblk->u.ip)));
1296
1297	/*
1298	 *	write COMMIT log record
1299	 */
1300	lrd->type = cpu_to_le16(LOG_COMMIT);
1301	lrd->length = 0;
1302	lmLog(log, tblk, lrd, NULL);
1303
1304	lmGroupCommit(log, tblk);
1305
1306	/*
1307	 *	- transaction is now committed -
1308	 */
1309
1310	/*
1311	 * force pages in careful update
1312	 * (imap addressing structure update)
1313	 */
1314	if (flag & COMMIT_FORCE)
1315		txForce(tblk);
1316
1317	/*
1318	 *	update allocation map.
1319	 *
1320	 * update inode allocation map and inode:
1321	 * free pager lock on memory object of inode if any.
1322	 * update block allocation map.
1323	 *
1324	 * txUpdateMap() resets XAD_NEW in XAD.
1325	 */
1326	if (tblk->xflag & COMMIT_FORCE)
1327		txUpdateMap(tblk);
1328
1329	/*
1330	 *	free transaction locks and pageout/free pages
1331	 */
1332	txRelease(tblk);
1333
1334	if ((tblk->flag & tblkGC_LAZY) == 0)
1335		txUnlock(tblk);
1336
1337
1338	/*
1339	 *	reset in-memory object state
1340	 */
1341	for (k = 0; k < cd.nip; k++) {
1342		ip = cd.iplist[k];
1343		jfs_ip = JFS_IP(ip);
1344
1345		/*
1346		 * reset in-memory inode state
1347		 */
1348		jfs_ip->bxflag = 0;
1349		jfs_ip->blid = 0;
1350	}
1351
1352      out:
1353	if (rc != 0)
1354		txAbort(tid, 1);
1355
1356      TheEnd:
1357	jfs_info("txCommit: tid = %d, returning %d", tid, rc);
1358	return rc;
1359}
1360
1361/*
1362 * NAME:	txLog()
1363 *
1364 * FUNCTION:	Writes AFTER log records for all lines modified
1365 *		by tid for segments specified by inodes in comdata.
1366 *		Code assumes only WRITELOCKS are recorded in lockwords.
1367 *
1368 * PARAMETERS:
1369 *
1370 * RETURN :
1371 */
1372static void txLog(struct jfs_log *log, struct tblock *tblk, struct commit *cd)
1373{
 
1374	struct inode *ip;
1375	lid_t lid;
1376	struct tlock *tlck;
1377	struct lrd *lrd = &cd->lrd;
1378
1379	/*
1380	 * write log record(s) for each tlock of transaction,
1381	 */
1382	for (lid = tblk->next; lid; lid = tlck->next) {
1383		tlck = lid_to_tlock(lid);
1384
1385		tlck->flag |= tlckLOG;
1386
1387		/* initialize lrd common */
1388		ip = tlck->ip;
1389		lrd->aggregate = cpu_to_le32(JFS_SBI(ip->i_sb)->aggregate);
1390		lrd->log.redopage.fileset = cpu_to_le32(JFS_IP(ip)->fileset);
1391		lrd->log.redopage.inode = cpu_to_le32(ip->i_ino);
1392
1393		/* write log record of page from the tlock */
1394		switch (tlck->type & tlckTYPE) {
1395		case tlckXTREE:
1396			xtLog(log, tblk, lrd, tlck);
1397			break;
1398
1399		case tlckDTREE:
1400			dtLog(log, tblk, lrd, tlck);
1401			break;
1402
1403		case tlckINODE:
1404			diLog(log, tblk, lrd, tlck, cd);
1405			break;
1406
1407		case tlckMAP:
1408			mapLog(log, tblk, lrd, tlck);
1409			break;
1410
1411		case tlckDATA:
1412			dataLog(log, tblk, lrd, tlck);
1413			break;
1414
1415		default:
1416			jfs_err("UFO tlock:0x%p", tlck);
1417		}
1418	}
1419
1420	return;
1421}
1422
1423/*
1424 *	diLog()
1425 *
1426 * function:	log inode tlock and format maplock to update bmap;
1427 */
1428static void diLog(struct jfs_log *log, struct tblock *tblk, struct lrd *lrd,
1429		 struct tlock *tlck, struct commit *cd)
1430{
 
1431	struct metapage *mp;
1432	pxd_t *pxd;
1433	struct pxd_lock *pxdlock;
1434
1435	mp = tlck->mp;
1436
1437	/* initialize as REDOPAGE record format */
1438	lrd->log.redopage.type = cpu_to_le16(LOG_INODE);
1439	lrd->log.redopage.l2linesize = cpu_to_le16(L2INODESLOTSIZE);
1440
1441	pxd = &lrd->log.redopage.pxd;
1442
1443	/*
1444	 *	inode after image
1445	 */
1446	if (tlck->type & tlckENTRY) {
1447		/* log after-image for logredo(): */
1448		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1449		PXDaddress(pxd, mp->index);
1450		PXDlength(pxd,
1451			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1452		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1453
1454		/* mark page as homeward bound */
1455		tlck->flag |= tlckWRITEPAGE;
1456	} else if (tlck->type & tlckFREE) {
1457		/*
1458		 *	free inode extent
1459		 *
1460		 * (pages of the freed inode extent have been invalidated and
1461		 * a maplock for free of the extent has been formatted at
1462		 * txLock() time);
1463		 *
1464		 * the tlock had been acquired on the inode allocation map page
1465		 * (iag) that specifies the freed extent, even though the map
1466		 * page is not itself logged, to prevent pageout of the map
1467		 * page before the log;
1468		 */
1469
1470		/* log LOG_NOREDOINOEXT of the freed inode extent for
1471		 * logredo() to start NoRedoPage filters, and to update
1472		 * imap and bmap for free of the extent;
1473		 */
1474		lrd->type = cpu_to_le16(LOG_NOREDOINOEXT);
1475		/*
1476		 * For the LOG_NOREDOINOEXT record, we need
1477		 * to pass the IAG number and inode extent
1478		 * index (within that IAG) from which the
1479		 * extent is being released.  These have been
1480		 * passed to us in the iplist[1] and iplist[2].
1481		 */
1482		lrd->log.noredoinoext.iagnum =
1483		    cpu_to_le32((u32) (size_t) cd->iplist[1]);
1484		lrd->log.noredoinoext.inoext_idx =
1485		    cpu_to_le32((u32) (size_t) cd->iplist[2]);
1486
1487		pxdlock = (struct pxd_lock *) & tlck->lock;
1488		*pxd = pxdlock->pxd;
1489		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1490
1491		/* update bmap */
1492		tlck->flag |= tlckUPDATEMAP;
1493
1494		/* mark page as homeward bound */
1495		tlck->flag |= tlckWRITEPAGE;
1496	} else
1497		jfs_err("diLog: UFO type tlck:0x%p", tlck);
1498	return;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1499}
1500
1501/*
1502 *	dataLog()
1503 *
1504 * function:	log data tlock
1505 */
1506static void dataLog(struct jfs_log *log, struct tblock *tblk, struct lrd *lrd,
1507	    struct tlock *tlck)
1508{
1509	struct metapage *mp;
1510	pxd_t *pxd;
1511
1512	mp = tlck->mp;
1513
1514	/* initialize as REDOPAGE record format */
1515	lrd->log.redopage.type = cpu_to_le16(LOG_DATA);
1516	lrd->log.redopage.l2linesize = cpu_to_le16(L2DATASLOTSIZE);
1517
1518	pxd = &lrd->log.redopage.pxd;
1519
1520	/* log after-image for logredo(): */
1521	lrd->type = cpu_to_le16(LOG_REDOPAGE);
1522
1523	if (jfs_dirtable_inline(tlck->ip)) {
1524		/*
1525		 * The table has been truncated, we've must have deleted
1526		 * the last entry, so don't bother logging this
1527		 */
1528		mp->lid = 0;
1529		grab_metapage(mp);
1530		metapage_homeok(mp);
1531		discard_metapage(mp);
1532		tlck->mp = NULL;
1533		return;
1534	}
1535
1536	PXDaddress(pxd, mp->index);
1537	PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits);
1538
1539	lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1540
1541	/* mark page as homeward bound */
1542	tlck->flag |= tlckWRITEPAGE;
1543
1544	return;
1545}
1546
1547/*
1548 *	dtLog()
1549 *
1550 * function:	log dtree tlock and format maplock to update bmap;
1551 */
1552static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1553	   struct tlock * tlck)
1554{
1555	struct metapage *mp;
1556	struct pxd_lock *pxdlock;
1557	pxd_t *pxd;
1558
1559	mp = tlck->mp;
1560
1561	/* initialize as REDOPAGE/NOREDOPAGE record format */
1562	lrd->log.redopage.type = cpu_to_le16(LOG_DTREE);
1563	lrd->log.redopage.l2linesize = cpu_to_le16(L2DTSLOTSIZE);
1564
1565	pxd = &lrd->log.redopage.pxd;
1566
1567	if (tlck->type & tlckBTROOT)
1568		lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
1569
1570	/*
1571	 *	page extension via relocation: entry insertion;
1572	 *	page extension in-place: entry insertion;
1573	 *	new right page from page split, reinitialized in-line
1574	 *	root from root page split: entry insertion;
1575	 */
1576	if (tlck->type & (tlckNEW | tlckEXTEND)) {
1577		/* log after-image of the new page for logredo():
1578		 * mark log (LOG_NEW) for logredo() to initialize
1579		 * freelist and update bmap for alloc of the new page;
1580		 */
1581		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1582		if (tlck->type & tlckEXTEND)
1583			lrd->log.redopage.type |= cpu_to_le16(LOG_EXTEND);
1584		else
1585			lrd->log.redopage.type |= cpu_to_le16(LOG_NEW);
1586		PXDaddress(pxd, mp->index);
1587		PXDlength(pxd,
1588			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1589		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1590
1591		/* format a maplock for txUpdateMap() to update bPMAP for
1592		 * alloc of the new page;
1593		 */
1594		if (tlck->type & tlckBTROOT)
1595			return;
1596		tlck->flag |= tlckUPDATEMAP;
1597		pxdlock = (struct pxd_lock *) & tlck->lock;
1598		pxdlock->flag = mlckALLOCPXD;
1599		pxdlock->pxd = *pxd;
1600
1601		pxdlock->index = 1;
1602
1603		/* mark page as homeward bound */
1604		tlck->flag |= tlckWRITEPAGE;
1605		return;
1606	}
1607
1608	/*
1609	 *	entry insertion/deletion,
1610	 *	sibling page link update (old right page before split);
1611	 */
1612	if (tlck->type & (tlckENTRY | tlckRELINK)) {
1613		/* log after-image for logredo(): */
1614		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1615		PXDaddress(pxd, mp->index);
1616		PXDlength(pxd,
1617			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1618		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1619
1620		/* mark page as homeward bound */
1621		tlck->flag |= tlckWRITEPAGE;
1622		return;
1623	}
1624
1625	/*
1626	 *	page deletion: page has been invalidated
1627	 *	page relocation: source extent
1628	 *
1629	 *	a maplock for free of the page has been formatted
1630	 *	at txLock() time);
1631	 */
1632	if (tlck->type & (tlckFREE | tlckRELOCATE)) {
1633		/* log LOG_NOREDOPAGE of the deleted page for logredo()
1634		 * to start NoRedoPage filter and to update bmap for free
1635		 * of the deletd page
1636		 */
1637		lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
1638		pxdlock = (struct pxd_lock *) & tlck->lock;
1639		*pxd = pxdlock->pxd;
1640		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1641
1642		/* a maplock for txUpdateMap() for free of the page
1643		 * has been formatted at txLock() time;
1644		 */
1645		tlck->flag |= tlckUPDATEMAP;
1646	}
1647	return;
1648}
1649
1650/*
1651 *	xtLog()
1652 *
1653 * function:	log xtree tlock and format maplock to update bmap;
1654 */
1655static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1656	   struct tlock * tlck)
1657{
1658	struct inode *ip;
1659	struct metapage *mp;
1660	xtpage_t *p;
1661	struct xtlock *xtlck;
1662	struct maplock *maplock;
1663	struct xdlistlock *xadlock;
1664	struct pxd_lock *pxdlock;
1665	pxd_t *page_pxd;
1666	int next, lwm, hwm;
1667
1668	ip = tlck->ip;
1669	mp = tlck->mp;
1670
1671	/* initialize as REDOPAGE/NOREDOPAGE record format */
1672	lrd->log.redopage.type = cpu_to_le16(LOG_XTREE);
1673	lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE);
1674
1675	page_pxd = &lrd->log.redopage.pxd;
1676
1677	if (tlck->type & tlckBTROOT) {
1678		lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
1679		p = (xtpage_t *) &JFS_IP(ip)->i_xtroot;
1680		if (S_ISDIR(ip->i_mode))
1681			lrd->log.redopage.type |=
1682			    cpu_to_le16(LOG_DIR_XTREE);
1683	} else
1684		p = (xtpage_t *) mp->data;
1685	next = le16_to_cpu(p->header.nextindex);
1686
1687	xtlck = (struct xtlock *) & tlck->lock;
1688
1689	maplock = (struct maplock *) & tlck->lock;
1690	xadlock = (struct xdlistlock *) maplock;
1691
1692	/*
1693	 *	entry insertion/extension;
1694	 *	sibling page link update (old right page before split);
1695	 */
1696	if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) {
1697		/* log after-image for logredo():
1698		 * logredo() will update bmap for alloc of new/extended
1699		 * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from
1700		 * after-image of XADlist;
1701		 * logredo() resets (XAD_NEW|XAD_EXTEND) flag when
1702		 * applying the after-image to the meta-data page.
1703		 */
1704		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1705		PXDaddress(page_pxd, mp->index);
1706		PXDlength(page_pxd,
1707			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1708		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1709
1710		/* format a maplock for txUpdateMap() to update bPMAP
1711		 * for alloc of new/extended extents of XAD[lwm:next)
1712		 * from the page itself;
1713		 * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag.
1714		 */
1715		lwm = xtlck->lwm.offset;
1716		if (lwm == 0)
1717			lwm = XTPAGEMAXSLOT;
1718
1719		if (lwm == next)
1720			goto out;
1721		if (lwm > next) {
1722			jfs_err("xtLog: lwm > next");
1723			goto out;
1724		}
1725		tlck->flag |= tlckUPDATEMAP;
1726		xadlock->flag = mlckALLOCXADLIST;
1727		xadlock->count = next - lwm;
1728		if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) {
1729			int i;
1730			pxd_t *pxd;
1731			/*
1732			 * Lazy commit may allow xtree to be modified before
1733			 * txUpdateMap runs.  Copy xad into linelock to
1734			 * preserve correct data.
1735			 *
1736			 * We can fit twice as may pxd's as xads in the lock
1737			 */
1738			xadlock->flag = mlckALLOCPXDLIST;
1739			pxd = xadlock->xdlist = &xtlck->pxdlock;
1740			for (i = 0; i < xadlock->count; i++) {
1741				PXDaddress(pxd, addressXAD(&p->xad[lwm + i]));
1742				PXDlength(pxd, lengthXAD(&p->xad[lwm + i]));
1743				p->xad[lwm + i].flag &=
1744				    ~(XAD_NEW | XAD_EXTENDED);
1745				pxd++;
1746			}
1747		} else {
1748			/*
1749			 * xdlist will point to into inode's xtree, ensure
1750			 * that transaction is not committed lazily.
1751			 */
1752			xadlock->flag = mlckALLOCXADLIST;
1753			xadlock->xdlist = &p->xad[lwm];
1754			tblk->xflag &= ~COMMIT_LAZY;
1755		}
1756		jfs_info("xtLog: alloc ip:0x%p mp:0x%p tlck:0x%p lwm:%d count:%d",
1757			 tlck->ip, mp, tlck, lwm, xadlock->count);
1758
1759		maplock->index = 1;
1760
1761	      out:
1762		/* mark page as homeward bound */
1763		tlck->flag |= tlckWRITEPAGE;
1764
1765		return;
1766	}
1767
1768	/*
1769	 *	page deletion: file deletion/truncation (ref. xtTruncate())
1770	 *
1771	 * (page will be invalidated after log is written and bmap
1772	 * is updated from the page);
1773	 */
1774	if (tlck->type & tlckFREE) {
1775		/* LOG_NOREDOPAGE log for NoRedoPage filter:
1776		 * if page free from file delete, NoRedoFile filter from
1777		 * inode image of zero link count will subsume NoRedoPage
1778		 * filters for each page;
1779		 * if page free from file truncattion, write NoRedoPage
1780		 * filter;
1781		 *
1782		 * upadte of block allocation map for the page itself:
1783		 * if page free from deletion and truncation, LOG_UPDATEMAP
1784		 * log for the page itself is generated from processing
1785		 * its parent page xad entries;
1786		 */
1787		/* if page free from file truncation, log LOG_NOREDOPAGE
1788		 * of the deleted page for logredo() to start NoRedoPage
1789		 * filter for the page;
1790		 */
1791		if (tblk->xflag & COMMIT_TRUNCATE) {
1792			/* write NOREDOPAGE for the page */
1793			lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
1794			PXDaddress(page_pxd, mp->index);
1795			PXDlength(page_pxd,
1796				  mp->logical_size >> tblk->sb->
1797				  s_blocksize_bits);
1798			lrd->backchain =
1799			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1800
1801			if (tlck->type & tlckBTROOT) {
1802				/* Empty xtree must be logged */
1803				lrd->type = cpu_to_le16(LOG_REDOPAGE);
1804				lrd->backchain =
1805				    cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1806			}
1807		}
1808
1809		/* init LOG_UPDATEMAP of the freed extents
1810		 * XAD[XTENTRYSTART:hwm) from the deleted page itself
1811		 * for logredo() to update bmap;
1812		 */
1813		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1814		lrd->log.updatemap.type = cpu_to_le16(LOG_FREEXADLIST);
1815		xtlck = (struct xtlock *) & tlck->lock;
1816		hwm = xtlck->hwm.offset;
1817		lrd->log.updatemap.nxd =
1818		    cpu_to_le16(hwm - XTENTRYSTART + 1);
1819		/* reformat linelock for lmLog() */
1820		xtlck->header.offset = XTENTRYSTART;
1821		xtlck->header.length = hwm - XTENTRYSTART + 1;
1822		xtlck->index = 1;
1823		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1824
1825		/* format a maplock for txUpdateMap() to update bmap
1826		 * to free extents of XAD[XTENTRYSTART:hwm) from the
1827		 * deleted page itself;
1828		 */
1829		tlck->flag |= tlckUPDATEMAP;
1830		xadlock->count = hwm - XTENTRYSTART + 1;
1831		if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) {
1832			int i;
1833			pxd_t *pxd;
1834			/*
1835			 * Lazy commit may allow xtree to be modified before
1836			 * txUpdateMap runs.  Copy xad into linelock to
1837			 * preserve correct data.
1838			 *
1839			 * We can fit twice as may pxd's as xads in the lock
1840			 */
1841			xadlock->flag = mlckFREEPXDLIST;
1842			pxd = xadlock->xdlist = &xtlck->pxdlock;
1843			for (i = 0; i < xadlock->count; i++) {
1844				PXDaddress(pxd,
1845					addressXAD(&p->xad[XTENTRYSTART + i]));
1846				PXDlength(pxd,
1847					lengthXAD(&p->xad[XTENTRYSTART + i]));
1848				pxd++;
1849			}
1850		} else {
1851			/*
1852			 * xdlist will point to into inode's xtree, ensure
1853			 * that transaction is not committed lazily.
1854			 */
1855			xadlock->flag = mlckFREEXADLIST;
1856			xadlock->xdlist = &p->xad[XTENTRYSTART];
1857			tblk->xflag &= ~COMMIT_LAZY;
1858		}
1859		jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d lwm:2",
1860			 tlck->ip, mp, xadlock->count);
1861
1862		maplock->index = 1;
1863
1864		/* mark page as invalid */
1865		if (((tblk->xflag & COMMIT_PWMAP) || S_ISDIR(ip->i_mode))
1866		    && !(tlck->type & tlckBTROOT))
1867			tlck->flag |= tlckFREEPAGE;
1868		/*
1869		   else (tblk->xflag & COMMIT_PMAP)
1870		   ? release the page;
1871		 */
1872		return;
1873	}
1874
1875	/*
1876	 *	page/entry truncation: file truncation (ref. xtTruncate())
1877	 *
1878	 *	|----------+------+------+---------------|
1879	 *		   |      |      |
1880	 *		   |      |     hwm - hwm before truncation
1881	 *		   |     next - truncation point
1882	 *		  lwm - lwm before truncation
1883	 * header ?
1884	 */
1885	if (tlck->type & tlckTRUNCATE) {
1886		pxd_t pxd;	/* truncated extent of xad */
 
1887		int twm;
1888
1889		/*
1890		 * For truncation the entire linelock may be used, so it would
1891		 * be difficult to store xad list in linelock itself.
1892		 * Therefore, we'll just force transaction to be committed
1893		 * synchronously, so that xtree pages won't be changed before
1894		 * txUpdateMap runs.
1895		 */
1896		tblk->xflag &= ~COMMIT_LAZY;
1897		lwm = xtlck->lwm.offset;
1898		if (lwm == 0)
1899			lwm = XTPAGEMAXSLOT;
1900		hwm = xtlck->hwm.offset;
1901		twm = xtlck->twm.offset;
1902
1903		/*
1904		 *	write log records
1905		 */
1906		/* log after-image for logredo():
1907		 *
1908		 * logredo() will update bmap for alloc of new/extended
1909		 * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from
1910		 * after-image of XADlist;
1911		 * logredo() resets (XAD_NEW|XAD_EXTEND) flag when
1912		 * applying the after-image to the meta-data page.
1913		 */
1914		lrd->type = cpu_to_le16(LOG_REDOPAGE);
1915		PXDaddress(page_pxd, mp->index);
1916		PXDlength(page_pxd,
1917			  mp->logical_size >> tblk->sb->s_blocksize_bits);
1918		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1919
1920		/*
1921		 * truncate entry XAD[twm == next - 1]:
1922		 */
1923		if (twm == next - 1) {
1924			/* init LOG_UPDATEMAP for logredo() to update bmap for
1925			 * free of truncated delta extent of the truncated
1926			 * entry XAD[next - 1]:
1927			 * (xtlck->pxdlock = truncated delta extent);
1928			 */
1929			pxdlock = (struct pxd_lock *) & xtlck->pxdlock;
1930			/* assert(pxdlock->type & tlckTRUNCATE); */
1931			lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1932			lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD);
1933			lrd->log.updatemap.nxd = cpu_to_le16(1);
1934			lrd->log.updatemap.pxd = pxdlock->pxd;
1935			pxd = pxdlock->pxd;	/* save to format maplock */
1936			lrd->backchain =
1937			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1938		}
1939
1940		/*
1941		 * free entries XAD[next:hwm]:
1942		 */
1943		if (hwm >= next) {
1944			/* init LOG_UPDATEMAP of the freed extents
1945			 * XAD[next:hwm] from the deleted page itself
1946			 * for logredo() to update bmap;
1947			 */
1948			lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1949			lrd->log.updatemap.type =
1950			    cpu_to_le16(LOG_FREEXADLIST);
1951			xtlck = (struct xtlock *) & tlck->lock;
1952			hwm = xtlck->hwm.offset;
1953			lrd->log.updatemap.nxd =
1954			    cpu_to_le16(hwm - next + 1);
1955			/* reformat linelock for lmLog() */
1956			xtlck->header.offset = next;
1957			xtlck->header.length = hwm - next + 1;
1958			xtlck->index = 1;
1959			lrd->backchain =
1960			    cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1961		}
1962
1963		/*
1964		 *	format maplock(s) for txUpdateMap() to update bmap
1965		 */
1966		maplock->index = 0;
1967
1968		/*
1969		 * allocate entries XAD[lwm:next):
1970		 */
1971		if (lwm < next) {
1972			/* format a maplock for txUpdateMap() to update bPMAP
1973			 * for alloc of new/extended extents of XAD[lwm:next)
1974			 * from the page itself;
1975			 * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag.
1976			 */
1977			tlck->flag |= tlckUPDATEMAP;
1978			xadlock->flag = mlckALLOCXADLIST;
1979			xadlock->count = next - lwm;
1980			xadlock->xdlist = &p->xad[lwm];
1981
1982			jfs_info("xtLog: alloc ip:0x%p mp:0x%p count:%d lwm:%d next:%d",
1983				 tlck->ip, mp, xadlock->count, lwm, next);
1984			maplock->index++;
1985			xadlock++;
1986		}
1987
1988		/*
1989		 * truncate entry XAD[twm == next - 1]:
1990		 */
1991		if (twm == next - 1) {
1992			/* format a maplock for txUpdateMap() to update bmap
1993			 * to free truncated delta extent of the truncated
1994			 * entry XAD[next - 1];
1995			 * (xtlck->pxdlock = truncated delta extent);
1996			 */
1997			tlck->flag |= tlckUPDATEMAP;
1998			pxdlock = (struct pxd_lock *) xadlock;
1999			pxdlock->flag = mlckFREEPXD;
2000			pxdlock->count = 1;
2001			pxdlock->pxd = pxd;
2002
2003			jfs_info("xtLog: truncate ip:0x%p mp:0x%p count:%d hwm:%d",
2004				 ip, mp, pxdlock->count, hwm);
2005			maplock->index++;
2006			xadlock++;
2007		}
2008
2009		/*
2010		 * free entries XAD[next:hwm]:
2011		 */
2012		if (hwm >= next) {
2013			/* format a maplock for txUpdateMap() to update bmap
2014			 * to free extents of XAD[next:hwm] from thedeleted
2015			 * page itself;
2016			 */
2017			tlck->flag |= tlckUPDATEMAP;
2018			xadlock->flag = mlckFREEXADLIST;
2019			xadlock->count = hwm - next + 1;
2020			xadlock->xdlist = &p->xad[next];
2021
2022			jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d next:%d hwm:%d",
2023				 tlck->ip, mp, xadlock->count, next, hwm);
2024			maplock->index++;
2025		}
2026
2027		/* mark page as homeward bound */
2028		tlck->flag |= tlckWRITEPAGE;
2029	}
2030	return;
2031}
2032
2033/*
2034 *	mapLog()
2035 *
2036 * function:	log from maplock of freed data extents;
2037 */
2038static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
2039		   struct tlock * tlck)
2040{
2041	struct pxd_lock *pxdlock;
2042	int i, nlock;
2043	pxd_t *pxd;
2044
2045	/*
2046	 *	page relocation: free the source page extent
2047	 *
2048	 * a maplock for txUpdateMap() for free of the page
2049	 * has been formatted at txLock() time saving the src
2050	 * relocated page address;
2051	 */
2052	if (tlck->type & tlckRELOCATE) {
2053		/* log LOG_NOREDOPAGE of the old relocated page
2054		 * for logredo() to start NoRedoPage filter;
2055		 */
2056		lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
2057		pxdlock = (struct pxd_lock *) & tlck->lock;
2058		pxd = &lrd->log.redopage.pxd;
2059		*pxd = pxdlock->pxd;
2060		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2061
2062		/* (N.B. currently, logredo() does NOT update bmap
2063		 * for free of the page itself for (LOG_XTREE|LOG_NOREDOPAGE);
2064		 * if page free from relocation, LOG_UPDATEMAP log is
2065		 * specifically generated now for logredo()
2066		 * to update bmap for free of src relocated page;
2067		 * (new flag LOG_RELOCATE may be introduced which will
2068		 * inform logredo() to start NORedoPage filter and also
2069		 * update block allocation map at the same time, thus
2070		 * avoiding an extra log write);
2071		 */
2072		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
2073		lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD);
2074		lrd->log.updatemap.nxd = cpu_to_le16(1);
2075		lrd->log.updatemap.pxd = pxdlock->pxd;
2076		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2077
2078		/* a maplock for txUpdateMap() for free of the page
2079		 * has been formatted at txLock() time;
2080		 */
2081		tlck->flag |= tlckUPDATEMAP;
2082		return;
2083	}
2084	/*
2085
2086	 * Otherwise it's not a relocate request
2087	 *
2088	 */
2089	else {
2090		/* log LOG_UPDATEMAP for logredo() to update bmap for
2091		 * free of truncated/relocated delta extent of the data;
2092		 * e.g.: external EA extent, relocated/truncated extent
2093		 * from xtTailgate();
2094		 */
2095		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
2096		pxdlock = (struct pxd_lock *) & tlck->lock;
2097		nlock = pxdlock->index;
2098		for (i = 0; i < nlock; i++, pxdlock++) {
2099			if (pxdlock->flag & mlckALLOCPXD)
2100				lrd->log.updatemap.type =
2101				    cpu_to_le16(LOG_ALLOCPXD);
2102			else
2103				lrd->log.updatemap.type =
2104				    cpu_to_le16(LOG_FREEPXD);
2105			lrd->log.updatemap.nxd = cpu_to_le16(1);
2106			lrd->log.updatemap.pxd = pxdlock->pxd;
2107			lrd->backchain =
2108			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2109			jfs_info("mapLog: xaddr:0x%lx xlen:0x%x",
2110				 (ulong) addressPXD(&pxdlock->pxd),
2111				 lengthPXD(&pxdlock->pxd));
2112		}
2113
2114		/* update bmap */
2115		tlck->flag |= tlckUPDATEMAP;
2116	}
2117}
2118
2119/*
2120 *	txEA()
2121 *
2122 * function:	acquire maplock for EA/ACL extents or
2123 *		set COMMIT_INLINE flag;
2124 */
2125void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea)
2126{
2127	struct tlock *tlck = NULL;
2128	struct pxd_lock *maplock = NULL, *pxdlock = NULL;
2129
2130	/*
2131	 * format maplock for alloc of new EA extent
2132	 */
2133	if (newea) {
2134		/* Since the newea could be a completely zeroed entry we need to
2135		 * check for the two flags which indicate we should actually
2136		 * commit new EA data
2137		 */
2138		if (newea->flag & DXD_EXTENT) {
2139			tlck = txMaplock(tid, ip, tlckMAP);
2140			maplock = (struct pxd_lock *) & tlck->lock;
2141			pxdlock = (struct pxd_lock *) maplock;
2142			pxdlock->flag = mlckALLOCPXD;
2143			PXDaddress(&pxdlock->pxd, addressDXD(newea));
2144			PXDlength(&pxdlock->pxd, lengthDXD(newea));
2145			pxdlock++;
2146			maplock->index = 1;
2147		} else if (newea->flag & DXD_INLINE) {
2148			tlck = NULL;
2149
2150			set_cflag(COMMIT_Inlineea, ip);
2151		}
2152	}
2153
2154	/*
2155	 * format maplock for free of old EA extent
2156	 */
2157	if (!test_cflag(COMMIT_Nolink, ip) && oldea->flag & DXD_EXTENT) {
2158		if (tlck == NULL) {
2159			tlck = txMaplock(tid, ip, tlckMAP);
2160			maplock = (struct pxd_lock *) & tlck->lock;
2161			pxdlock = (struct pxd_lock *) maplock;
2162			maplock->index = 0;
2163		}
2164		pxdlock->flag = mlckFREEPXD;
2165		PXDaddress(&pxdlock->pxd, addressDXD(oldea));
2166		PXDlength(&pxdlock->pxd, lengthDXD(oldea));
2167		maplock->index++;
2168	}
2169}
2170
2171/*
2172 *	txForce()
2173 *
2174 * function: synchronously write pages locked by transaction
2175 *	     after txLog() but before txUpdateMap();
2176 */
2177static void txForce(struct tblock * tblk)
2178{
2179	struct tlock *tlck;
2180	lid_t lid, next;
2181	struct metapage *mp;
2182
2183	/*
2184	 * reverse the order of transaction tlocks in
2185	 * careful update order of address index pages
2186	 * (right to left, bottom up)
2187	 */
2188	tlck = lid_to_tlock(tblk->next);
2189	lid = tlck->next;
2190	tlck->next = 0;
2191	while (lid) {
2192		tlck = lid_to_tlock(lid);
2193		next = tlck->next;
2194		tlck->next = tblk->next;
2195		tblk->next = lid;
2196		lid = next;
2197	}
2198
2199	/*
2200	 * synchronously write the page, and
2201	 * hold the page for txUpdateMap();
2202	 */
2203	for (lid = tblk->next; lid; lid = next) {
2204		tlck = lid_to_tlock(lid);
2205		next = tlck->next;
2206
2207		if ((mp = tlck->mp) != NULL &&
2208		    (tlck->type & tlckBTROOT) == 0) {
2209			assert(mp->xflag & COMMIT_PAGE);
2210
2211			if (tlck->flag & tlckWRITEPAGE) {
2212				tlck->flag &= ~tlckWRITEPAGE;
2213
2214				/* do not release page to freelist */
2215				force_metapage(mp);
2216#if 0
2217				/*
2218				 * The "right" thing to do here is to
2219				 * synchronously write the metadata.
2220				 * With the current implementation this
2221				 * is hard since write_metapage requires
2222				 * us to kunmap & remap the page.  If we
2223				 * have tlocks pointing into the metadata
2224				 * pages, we don't want to do this.  I think
2225				 * we can get by with synchronously writing
2226				 * the pages when they are released.
2227				 */
2228				assert(mp->nohomeok);
2229				set_bit(META_dirty, &mp->flag);
2230				set_bit(META_sync, &mp->flag);
2231#endif
2232			}
2233		}
2234	}
2235}
2236
2237/*
2238 *	txUpdateMap()
2239 *
2240 * function:	update persistent allocation map (and working map
2241 *		if appropriate);
2242 *
2243 * parameter:
2244 */
2245static void txUpdateMap(struct tblock * tblk)
2246{
2247	struct inode *ip;
2248	struct inode *ipimap;
2249	lid_t lid;
2250	struct tlock *tlck;
2251	struct maplock *maplock;
2252	struct pxd_lock pxdlock;
2253	int maptype;
2254	int k, nlock;
2255	struct metapage *mp = NULL;
2256
2257	ipimap = JFS_SBI(tblk->sb)->ipimap;
2258
2259	maptype = (tblk->xflag & COMMIT_PMAP) ? COMMIT_PMAP : COMMIT_PWMAP;
2260
2261
2262	/*
2263	 *	update block allocation map
2264	 *
2265	 * update allocation state in pmap (and wmap) and
2266	 * update lsn of the pmap page;
2267	 */
2268	/*
2269	 * scan each tlock/page of transaction for block allocation/free:
2270	 *
2271	 * for each tlock/page of transaction, update map.
2272	 *  ? are there tlock for pmap and pwmap at the same time ?
2273	 */
2274	for (lid = tblk->next; lid; lid = tlck->next) {
2275		tlck = lid_to_tlock(lid);
2276
2277		if ((tlck->flag & tlckUPDATEMAP) == 0)
2278			continue;
2279
2280		if (tlck->flag & tlckFREEPAGE) {
2281			/*
2282			 * Another thread may attempt to reuse freed space
2283			 * immediately, so we want to get rid of the metapage
2284			 * before anyone else has a chance to get it.
2285			 * Lock metapage, update maps, then invalidate
2286			 * the metapage.
2287			 */
2288			mp = tlck->mp;
2289			ASSERT(mp->xflag & COMMIT_PAGE);
2290			grab_metapage(mp);
2291		}
2292
2293		/*
2294		 * extent list:
2295		 * . in-line PXD list:
2296		 * . out-of-line XAD list:
2297		 */
2298		maplock = (struct maplock *) & tlck->lock;
2299		nlock = maplock->index;
2300
2301		for (k = 0; k < nlock; k++, maplock++) {
2302			/*
2303			 * allocate blocks in persistent map:
2304			 *
2305			 * blocks have been allocated from wmap at alloc time;
2306			 */
2307			if (maplock->flag & mlckALLOC) {
2308				txAllocPMap(ipimap, maplock, tblk);
2309			}
2310			/*
2311			 * free blocks in persistent and working map:
2312			 * blocks will be freed in pmap and then in wmap;
2313			 *
2314			 * ? tblock specifies the PMAP/PWMAP based upon
2315			 * transaction
2316			 *
2317			 * free blocks in persistent map:
2318			 * blocks will be freed from wmap at last reference
2319			 * release of the object for regular files;
2320			 *
2321			 * Alway free blocks from both persistent & working
2322			 * maps for directories
2323			 */
2324			else {	/* (maplock->flag & mlckFREE) */
2325
2326				if (tlck->flag & tlckDIRECTORY)
2327					txFreeMap(ipimap, maplock,
2328						  tblk, COMMIT_PWMAP);
2329				else
2330					txFreeMap(ipimap, maplock,
2331						  tblk, maptype);
2332			}
2333		}
2334		if (tlck->flag & tlckFREEPAGE) {
2335			if (!(tblk->flag & tblkGC_LAZY)) {
2336				/* This is equivalent to txRelease */
2337				ASSERT(mp->lid == lid);
2338				tlck->mp->lid = 0;
2339			}
2340			assert(mp->nohomeok == 1);
2341			metapage_homeok(mp);
2342			discard_metapage(mp);
2343			tlck->mp = NULL;
2344		}
2345	}
2346	/*
2347	 *	update inode allocation map
2348	 *
2349	 * update allocation state in pmap and
2350	 * update lsn of the pmap page;
2351	 * update in-memory inode flag/state
2352	 *
2353	 * unlock mapper/write lock
2354	 */
2355	if (tblk->xflag & COMMIT_CREATE) {
2356		diUpdatePMap(ipimap, tblk->ino, false, tblk);
2357		/* update persistent block allocation map
2358		 * for the allocation of inode extent;
2359		 */
2360		pxdlock.flag = mlckALLOCPXD;
2361		pxdlock.pxd = tblk->u.ixpxd;
2362		pxdlock.index = 1;
2363		txAllocPMap(ipimap, (struct maplock *) & pxdlock, tblk);
2364	} else if (tblk->xflag & COMMIT_DELETE) {
2365		ip = tblk->u.ip;
2366		diUpdatePMap(ipimap, ip->i_ino, true, tblk);
2367		iput(ip);
2368	}
2369}
2370
2371/*
2372 *	txAllocPMap()
2373 *
2374 * function: allocate from persistent map;
2375 *
2376 * parameter:
2377 *	ipbmap	-
2378 *	malock	-
2379 *		xad list:
2380 *		pxd:
2381 *
2382 *	maptype -
2383 *		allocate from persistent map;
2384 *		free from persistent map;
2385 *		(e.g., tmp file - free from working map at releae
2386 *		 of last reference);
2387 *		free from persistent and working map;
2388 *
2389 *	lsn	- log sequence number;
2390 */
2391static void txAllocPMap(struct inode *ip, struct maplock * maplock,
2392			struct tblock * tblk)
2393{
2394	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
2395	struct xdlistlock *xadlistlock;
2396	xad_t *xad;
2397	s64 xaddr;
2398	int xlen;
2399	struct pxd_lock *pxdlock;
2400	struct xdlistlock *pxdlistlock;
2401	pxd_t *pxd;
2402	int n;
2403
2404	/*
2405	 * allocate from persistent map;
2406	 */
2407	if (maplock->flag & mlckALLOCXADLIST) {
2408		xadlistlock = (struct xdlistlock *) maplock;
2409		xad = xadlistlock->xdlist;
2410		for (n = 0; n < xadlistlock->count; n++, xad++) {
2411			if (xad->flag & (XAD_NEW | XAD_EXTENDED)) {
2412				xaddr = addressXAD(xad);
2413				xlen = lengthXAD(xad);
2414				dbUpdatePMap(ipbmap, false, xaddr,
2415					     (s64) xlen, tblk);
2416				xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
2417				jfs_info("allocPMap: xaddr:0x%lx xlen:%d",
2418					 (ulong) xaddr, xlen);
2419			}
2420		}
2421	} else if (maplock->flag & mlckALLOCPXD) {
2422		pxdlock = (struct pxd_lock *) maplock;
2423		xaddr = addressPXD(&pxdlock->pxd);
2424		xlen = lengthPXD(&pxdlock->pxd);
2425		dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen, tblk);
2426		jfs_info("allocPMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen);
2427	} else {		/* (maplock->flag & mlckALLOCPXDLIST) */
2428
2429		pxdlistlock = (struct xdlistlock *) maplock;
2430		pxd = pxdlistlock->xdlist;
2431		for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2432			xaddr = addressPXD(pxd);
2433			xlen = lengthPXD(pxd);
2434			dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen,
2435				     tblk);
2436			jfs_info("allocPMap: xaddr:0x%lx xlen:%d",
2437				 (ulong) xaddr, xlen);
2438		}
2439	}
2440}
2441
2442/*
2443 *	txFreeMap()
2444 *
2445 * function:	free from persistent and/or working map;
2446 *
2447 * todo: optimization
2448 */
2449void txFreeMap(struct inode *ip,
2450	       struct maplock * maplock, struct tblock * tblk, int maptype)
2451{
2452	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
2453	struct xdlistlock *xadlistlock;
2454	xad_t *xad;
2455	s64 xaddr;
2456	int xlen;
2457	struct pxd_lock *pxdlock;
2458	struct xdlistlock *pxdlistlock;
2459	pxd_t *pxd;
2460	int n;
2461
2462	jfs_info("txFreeMap: tblk:0x%p maplock:0x%p maptype:0x%x",
2463		 tblk, maplock, maptype);
2464
2465	/*
2466	 * free from persistent map;
2467	 */
2468	if (maptype == COMMIT_PMAP || maptype == COMMIT_PWMAP) {
2469		if (maplock->flag & mlckFREEXADLIST) {
2470			xadlistlock = (struct xdlistlock *) maplock;
2471			xad = xadlistlock->xdlist;
2472			for (n = 0; n < xadlistlock->count; n++, xad++) {
2473				if (!(xad->flag & XAD_NEW)) {
2474					xaddr = addressXAD(xad);
2475					xlen = lengthXAD(xad);
2476					dbUpdatePMap(ipbmap, true, xaddr,
2477						     (s64) xlen, tblk);
2478					jfs_info("freePMap: xaddr:0x%lx xlen:%d",
2479						 (ulong) xaddr, xlen);
2480				}
2481			}
2482		} else if (maplock->flag & mlckFREEPXD) {
2483			pxdlock = (struct pxd_lock *) maplock;
2484			xaddr = addressPXD(&pxdlock->pxd);
2485			xlen = lengthPXD(&pxdlock->pxd);
2486			dbUpdatePMap(ipbmap, true, xaddr, (s64) xlen,
2487				     tblk);
2488			jfs_info("freePMap: xaddr:0x%lx xlen:%d",
2489				 (ulong) xaddr, xlen);
2490		} else {	/* (maplock->flag & mlckALLOCPXDLIST) */
2491
2492			pxdlistlock = (struct xdlistlock *) maplock;
2493			pxd = pxdlistlock->xdlist;
2494			for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2495				xaddr = addressPXD(pxd);
2496				xlen = lengthPXD(pxd);
2497				dbUpdatePMap(ipbmap, true, xaddr,
2498					     (s64) xlen, tblk);
2499				jfs_info("freePMap: xaddr:0x%lx xlen:%d",
2500					 (ulong) xaddr, xlen);
2501			}
2502		}
2503	}
2504
2505	/*
2506	 * free from working map;
2507	 */
2508	if (maptype == COMMIT_PWMAP || maptype == COMMIT_WMAP) {
2509		if (maplock->flag & mlckFREEXADLIST) {
2510			xadlistlock = (struct xdlistlock *) maplock;
2511			xad = xadlistlock->xdlist;
2512			for (n = 0; n < xadlistlock->count; n++, xad++) {
2513				xaddr = addressXAD(xad);
2514				xlen = lengthXAD(xad);
2515				dbFree(ip, xaddr, (s64) xlen);
2516				xad->flag = 0;
2517				jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2518					 (ulong) xaddr, xlen);
2519			}
2520		} else if (maplock->flag & mlckFREEPXD) {
2521			pxdlock = (struct pxd_lock *) maplock;
2522			xaddr = addressPXD(&pxdlock->pxd);
2523			xlen = lengthPXD(&pxdlock->pxd);
2524			dbFree(ip, xaddr, (s64) xlen);
2525			jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2526				 (ulong) xaddr, xlen);
2527		} else {	/* (maplock->flag & mlckFREEPXDLIST) */
2528
2529			pxdlistlock = (struct xdlistlock *) maplock;
2530			pxd = pxdlistlock->xdlist;
2531			for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2532				xaddr = addressPXD(pxd);
2533				xlen = lengthPXD(pxd);
2534				dbFree(ip, xaddr, (s64) xlen);
2535				jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2536					 (ulong) xaddr, xlen);
2537			}
2538		}
2539	}
2540}
2541
2542/*
2543 *	txFreelock()
2544 *
2545 * function:	remove tlock from inode anonymous locklist
2546 */
2547void txFreelock(struct inode *ip)
2548{
2549	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
2550	struct tlock *xtlck, *tlck;
2551	lid_t xlid = 0, lid;
2552
2553	if (!jfs_ip->atlhead)
2554		return;
2555
2556	TXN_LOCK();
2557	xtlck = (struct tlock *) &jfs_ip->atlhead;
2558
2559	while ((lid = xtlck->next) != 0) {
2560		tlck = lid_to_tlock(lid);
2561		if (tlck->flag & tlckFREELOCK) {
2562			xtlck->next = tlck->next;
2563			txLockFree(lid);
2564		} else {
2565			xtlck = tlck;
2566			xlid = lid;
2567		}
2568	}
2569
2570	if (jfs_ip->atlhead)
2571		jfs_ip->atltail = xlid;
2572	else {
2573		jfs_ip->atltail = 0;
2574		/*
2575		 * If inode was on anon_list, remove it
2576		 */
2577		list_del_init(&jfs_ip->anon_inode_list);
2578	}
2579	TXN_UNLOCK();
2580}
2581
2582/*
2583 *	txAbort()
2584 *
2585 * function: abort tx before commit;
2586 *
2587 * frees line-locks and segment locks for all
2588 * segments in comdata structure.
2589 * Optionally sets state of file-system to FM_DIRTY in super-block.
2590 * log age of page-frames in memory for which caller has
2591 * are reset to 0 (to avoid logwarap).
2592 */
2593void txAbort(tid_t tid, int dirty)
2594{
2595	lid_t lid, next;
2596	struct metapage *mp;
2597	struct tblock *tblk = tid_to_tblock(tid);
2598	struct tlock *tlck;
2599
2600	/*
2601	 * free tlocks of the transaction
2602	 */
2603	for (lid = tblk->next; lid; lid = next) {
2604		tlck = lid_to_tlock(lid);
2605		next = tlck->next;
2606		mp = tlck->mp;
2607		JFS_IP(tlck->ip)->xtlid = 0;
2608
2609		if (mp) {
2610			mp->lid = 0;
2611
2612			/*
2613			 * reset lsn of page to avoid logwarap:
2614			 *
2615			 * (page may have been previously committed by another
2616			 * transaction(s) but has not been paged, i.e.,
2617			 * it may be on logsync list even though it has not
2618			 * been logged for the current tx.)
2619			 */
2620			if (mp->xflag & COMMIT_PAGE && mp->lsn)
2621				LogSyncRelease(mp);
2622		}
2623		/* insert tlock at head of freelist */
2624		TXN_LOCK();
2625		txLockFree(lid);
2626		TXN_UNLOCK();
2627	}
2628
2629	/* caller will free the transaction block */
2630
2631	tblk->next = tblk->last = 0;
2632
2633	/*
2634	 * mark filesystem dirty
2635	 */
2636	if (dirty)
2637		jfs_error(tblk->sb, "\n");
2638
2639	return;
2640}
2641
2642/*
2643 *	txLazyCommit(void)
2644 *
2645 *	All transactions except those changing ipimap (COMMIT_FORCE) are
2646 *	processed by this routine.  This insures that the inode and block
2647 *	allocation maps are updated in order.  For synchronous transactions,
2648 *	let the user thread finish processing after txUpdateMap() is called.
2649 */
2650static void txLazyCommit(struct tblock * tblk)
2651{
2652	struct jfs_log *log;
2653
2654	while (((tblk->flag & tblkGC_READY) == 0) &&
2655	       ((tblk->flag & tblkGC_UNLOCKED) == 0)) {
2656		/* We must have gotten ahead of the user thread
2657		 */
2658		jfs_info("jfs_lazycommit: tblk 0x%p not unlocked", tblk);
2659		yield();
2660	}
2661
2662	jfs_info("txLazyCommit: processing tblk 0x%p", tblk);
2663
2664	txUpdateMap(tblk);
2665
2666	log = (struct jfs_log *) JFS_SBI(tblk->sb)->log;
2667
2668	spin_lock_irq(&log->gclock);	// LOGGC_LOCK
2669
2670	tblk->flag |= tblkGC_COMMITTED;
2671
2672	if (tblk->flag & tblkGC_READY)
2673		log->gcrtc--;
2674
2675	wake_up_all(&tblk->gcwait);	// LOGGC_WAKEUP
2676
2677	/*
2678	 * Can't release log->gclock until we've tested tblk->flag
2679	 */
2680	if (tblk->flag & tblkGC_LAZY) {
2681		spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
2682		txUnlock(tblk);
2683		tblk->flag &= ~tblkGC_LAZY;
2684		txEnd(tblk - TxBlock);	/* Convert back to tid */
2685	} else
2686		spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
2687
2688	jfs_info("txLazyCommit: done: tblk = 0x%p", tblk);
2689}
2690
2691/*
2692 *	jfs_lazycommit(void)
2693 *
2694 *	To be run as a kernel daemon.  If lbmIODone is called in an interrupt
2695 *	context, or where blocking is not wanted, this routine will process
2696 *	committed transactions from the unlock queue.
2697 */
2698int jfs_lazycommit(void *arg)
2699{
2700	int WorkDone;
2701	struct tblock *tblk;
2702	unsigned long flags;
2703	struct jfs_sb_info *sbi;
2704
2705	set_freezable();
2706	do {
2707		LAZY_LOCK(flags);
2708		jfs_commit_thread_waking = 0;	/* OK to wake another thread */
2709		while (!list_empty(&TxAnchor.unlock_queue)) {
2710			WorkDone = 0;
2711			list_for_each_entry(tblk, &TxAnchor.unlock_queue,
2712					    cqueue) {
2713
2714				sbi = JFS_SBI(tblk->sb);
2715				/*
2716				 * For each volume, the transactions must be
2717				 * handled in order.  If another commit thread
2718				 * is handling a tblk for this superblock,
2719				 * skip it
2720				 */
2721				if (sbi->commit_state & IN_LAZYCOMMIT)
2722					continue;
2723
2724				sbi->commit_state |= IN_LAZYCOMMIT;
2725				WorkDone = 1;
2726
2727				/*
2728				 * Remove transaction from queue
2729				 */
2730				list_del(&tblk->cqueue);
2731
2732				LAZY_UNLOCK(flags);
2733				txLazyCommit(tblk);
2734				LAZY_LOCK(flags);
2735
2736				sbi->commit_state &= ~IN_LAZYCOMMIT;
2737				/*
2738				 * Don't continue in the for loop.  (We can't
2739				 * anyway, it's unsafe!)  We want to go back to
2740				 * the beginning of the list.
2741				 */
2742				break;
2743			}
2744
2745			/* If there was nothing to do, don't continue */
2746			if (!WorkDone)
2747				break;
2748		}
2749		/* In case a wakeup came while all threads were active */
2750		jfs_commit_thread_waking = 0;
2751
2752		if (freezing(current)) {
2753			LAZY_UNLOCK(flags);
2754			try_to_freeze();
2755		} else {
2756			DECLARE_WAITQUEUE(wq, current);
2757
2758			add_wait_queue(&jfs_commit_thread_wait, &wq);
2759			set_current_state(TASK_INTERRUPTIBLE);
2760			LAZY_UNLOCK(flags);
2761			schedule();
2762			remove_wait_queue(&jfs_commit_thread_wait, &wq);
2763		}
2764	} while (!kthread_should_stop());
2765
2766	if (!list_empty(&TxAnchor.unlock_queue))
2767		jfs_err("jfs_lazycommit being killed w/pending transactions!");
2768	else
2769		jfs_info("jfs_lazycommit being killed");
2770	return 0;
2771}
2772
2773void txLazyUnlock(struct tblock * tblk)
2774{
2775	unsigned long flags;
2776
2777	LAZY_LOCK(flags);
2778
2779	list_add_tail(&tblk->cqueue, &TxAnchor.unlock_queue);
2780	/*
2781	 * Don't wake up a commit thread if there is already one servicing
2782	 * this superblock, or if the last one we woke up hasn't started yet.
2783	 */
2784	if (!(JFS_SBI(tblk->sb)->commit_state & IN_LAZYCOMMIT) &&
2785	    !jfs_commit_thread_waking) {
2786		jfs_commit_thread_waking = 1;
2787		wake_up(&jfs_commit_thread_wait);
2788	}
2789	LAZY_UNLOCK(flags);
2790}
2791
2792static void LogSyncRelease(struct metapage * mp)
2793{
2794	struct jfs_log *log = mp->log;
2795
2796	assert(mp->nohomeok);
2797	assert(log);
2798	metapage_homeok(mp);
2799}
2800
2801/*
2802 *	txQuiesce
2803 *
2804 *	Block all new transactions and push anonymous transactions to
2805 *	completion
2806 *
2807 *	This does almost the same thing as jfs_sync below.  We don't
2808 *	worry about deadlocking when jfs_tlocks_low is set, since we would
2809 *	expect jfs_sync to get us out of that jam.
2810 */
2811void txQuiesce(struct super_block *sb)
2812{
2813	struct inode *ip;
2814	struct jfs_inode_info *jfs_ip;
2815	struct jfs_log *log = JFS_SBI(sb)->log;
2816	tid_t tid;
2817
2818	set_bit(log_QUIESCE, &log->flag);
2819
2820	TXN_LOCK();
2821restart:
2822	while (!list_empty(&TxAnchor.anon_list)) {
2823		jfs_ip = list_entry(TxAnchor.anon_list.next,
2824				    struct jfs_inode_info,
2825				    anon_inode_list);
2826		ip = &jfs_ip->vfs_inode;
2827
2828		/*
2829		 * inode will be removed from anonymous list
2830		 * when it is committed
2831		 */
2832		TXN_UNLOCK();
2833		tid = txBegin(ip->i_sb, COMMIT_INODE | COMMIT_FORCE);
2834		mutex_lock(&jfs_ip->commit_mutex);
2835		txCommit(tid, 1, &ip, 0);
2836		txEnd(tid);
2837		mutex_unlock(&jfs_ip->commit_mutex);
2838		/*
2839		 * Just to be safe.  I don't know how
2840		 * long we can run without blocking
2841		 */
2842		cond_resched();
2843		TXN_LOCK();
2844	}
2845
2846	/*
2847	 * If jfs_sync is running in parallel, there could be some inodes
2848	 * on anon_list2.  Let's check.
2849	 */
2850	if (!list_empty(&TxAnchor.anon_list2)) {
2851		list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list);
2852		goto restart;
2853	}
2854	TXN_UNLOCK();
2855
2856	/*
2857	 * We may need to kick off the group commit
2858	 */
2859	jfs_flush_journal(log, 0);
2860}
2861
2862/*
2863 * txResume()
2864 *
2865 * Allows transactions to start again following txQuiesce
2866 */
2867void txResume(struct super_block *sb)
2868{
2869	struct jfs_log *log = JFS_SBI(sb)->log;
2870
2871	clear_bit(log_QUIESCE, &log->flag);
2872	TXN_WAKEUP(&log->syncwait);
2873}
2874
2875/*
2876 *	jfs_sync(void)
2877 *
2878 *	To be run as a kernel daemon.  This is awakened when tlocks run low.
2879 *	We write any inodes that have anonymous tlocks so they will become
2880 *	available.
2881 */
2882int jfs_sync(void *arg)
2883{
2884	struct inode *ip;
2885	struct jfs_inode_info *jfs_ip;
2886	tid_t tid;
2887
2888	set_freezable();
2889	do {
2890		/*
2891		 * write each inode on the anonymous inode list
2892		 */
2893		TXN_LOCK();
2894		while (jfs_tlocks_low && !list_empty(&TxAnchor.anon_list)) {
2895			jfs_ip = list_entry(TxAnchor.anon_list.next,
2896					    struct jfs_inode_info,
2897					    anon_inode_list);
2898			ip = &jfs_ip->vfs_inode;
2899
2900			if (! igrab(ip)) {
2901				/*
2902				 * Inode is being freed
2903				 */
2904				list_del_init(&jfs_ip->anon_inode_list);
2905			} else if (mutex_trylock(&jfs_ip->commit_mutex)) {
2906				/*
2907				 * inode will be removed from anonymous list
2908				 * when it is committed
2909				 */
2910				TXN_UNLOCK();
2911				tid = txBegin(ip->i_sb, COMMIT_INODE);
2912				txCommit(tid, 1, &ip, 0);
2913				txEnd(tid);
2914				mutex_unlock(&jfs_ip->commit_mutex);
2915
2916				iput(ip);
2917				/*
2918				 * Just to be safe.  I don't know how
2919				 * long we can run without blocking
2920				 */
2921				cond_resched();
2922				TXN_LOCK();
2923			} else {
2924				/* We can't get the commit mutex.  It may
2925				 * be held by a thread waiting for tlock's
2926				 * so let's not block here.  Save it to
2927				 * put back on the anon_list.
2928				 */
2929
2930				/* Move from anon_list to anon_list2 */
2931				list_move(&jfs_ip->anon_inode_list,
2932					  &TxAnchor.anon_list2);
2933
2934				TXN_UNLOCK();
2935				iput(ip);
2936				TXN_LOCK();
2937			}
2938		}
2939		/* Add anon_list2 back to anon_list */
2940		list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list);
2941
2942		if (freezing(current)) {
2943			TXN_UNLOCK();
2944			try_to_freeze();
2945		} else {
2946			set_current_state(TASK_INTERRUPTIBLE);
2947			TXN_UNLOCK();
2948			schedule();
2949		}
2950	} while (!kthread_should_stop());
2951
2952	jfs_info("jfs_sync being killed");
2953	return 0;
2954}
2955
2956#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG)
2957int jfs_txanchor_proc_show(struct seq_file *m, void *v)
2958{
2959	char *freewait;
2960	char *freelockwait;
2961	char *lowlockwait;
2962
2963	freewait =
2964	    waitqueue_active(&TxAnchor.freewait) ? "active" : "empty";
2965	freelockwait =
2966	    waitqueue_active(&TxAnchor.freelockwait) ? "active" : "empty";
2967	lowlockwait =
2968	    waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty";
2969
2970	seq_printf(m,
2971		       "JFS TxAnchor\n"
2972		       "============\n"
2973		       "freetid = %d\n"
2974		       "freewait = %s\n"
2975		       "freelock = %d\n"
2976		       "freelockwait = %s\n"
2977		       "lowlockwait = %s\n"
2978		       "tlocksInUse = %d\n"
2979		       "jfs_tlocks_low = %d\n"
2980		       "unlock_queue is %sempty\n",
2981		       TxAnchor.freetid,
2982		       freewait,
2983		       TxAnchor.freelock,
2984		       freelockwait,
2985		       lowlockwait,
2986		       TxAnchor.tlocksInUse,
2987		       jfs_tlocks_low,
2988		       list_empty(&TxAnchor.unlock_queue) ? "" : "not ");
2989	return 0;
2990}
 
 
 
 
 
 
 
 
 
 
 
 
2991#endif
2992
2993#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS)
2994int jfs_txstats_proc_show(struct seq_file *m, void *v)
2995{
2996	seq_printf(m,
2997		       "JFS TxStats\n"
2998		       "===========\n"
2999		       "calls to txBegin = %d\n"
3000		       "txBegin blocked by sync barrier = %d\n"
3001		       "txBegin blocked by tlocks low = %d\n"
3002		       "txBegin blocked by no free tid = %d\n"
3003		       "calls to txBeginAnon = %d\n"
3004		       "txBeginAnon blocked by sync barrier = %d\n"
3005		       "txBeginAnon blocked by tlocks low = %d\n"
3006		       "calls to txLockAlloc = %d\n"
3007		       "tLockAlloc blocked by no free lock = %d\n",
3008		       TxStat.txBegin,
3009		       TxStat.txBegin_barrier,
3010		       TxStat.txBegin_lockslow,
3011		       TxStat.txBegin_freetid,
3012		       TxStat.txBeginAnon,
3013		       TxStat.txBeginAnon_barrier,
3014		       TxStat.txBeginAnon_lockslow,
3015		       TxStat.txLockAlloc,
3016		       TxStat.txLockAlloc_freelock);
3017	return 0;
3018}
 
 
 
 
 
 
 
 
 
 
 
 
3019#endif