Linux Audio

Check our new training course

Loading...
v6.2
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *   Copyright (C) International Business Machines Corp., 2000-2004
   4 *   Portions Copyright (C) Christoph Hellwig, 2001-2002
   5 */
   6
   7/*
   8 *	jfs_logmgr.c: log manager
   9 *
  10 * for related information, see transaction manager (jfs_txnmgr.c), and
  11 * recovery manager (jfs_logredo.c).
  12 *
  13 * note: for detail, RTFS.
  14 *
  15 *	log buffer manager:
  16 * special purpose buffer manager supporting log i/o requirements.
  17 * per log serial pageout of logpage
  18 * queuing i/o requests and redrive i/o at iodone
  19 * maintain current logpage buffer
  20 * no caching since append only
  21 * appropriate jfs buffer cache buffers as needed
  22 *
  23 *	group commit:
  24 * transactions which wrote COMMIT records in the same in-memory
  25 * log page during the pageout of previous/current log page(s) are
  26 * committed together by the pageout of the page.
  27 *
  28 *	TBD lazy commit:
  29 * transactions are committed asynchronously when the log page
  30 * containing it COMMIT is paged out when it becomes full;
  31 *
  32 *	serialization:
  33 * . a per log lock serialize log write.
  34 * . a per log lock serialize group commit.
  35 * . a per log lock serialize log open/close;
  36 *
  37 *	TBD log integrity:
  38 * careful-write (ping-pong) of last logpage to recover from crash
  39 * in overwrite.
  40 * detection of split (out-of-order) write of physical sectors
  41 * of last logpage via timestamp at end of each sector
  42 * with its mirror data array at trailer).
  43 *
  44 *	alternatives:
  45 * lsn - 64-bit monotonically increasing integer vs
  46 * 32-bit lspn and page eor.
  47 */
  48
  49#include <linux/fs.h>
  50#include <linux/blkdev.h>
  51#include <linux/interrupt.h>
  52#include <linux/completion.h>
  53#include <linux/kthread.h>
  54#include <linux/buffer_head.h>		/* for sync_blockdev() */
  55#include <linux/bio.h>
  56#include <linux/freezer.h>
  57#include <linux/export.h>
  58#include <linux/delay.h>
  59#include <linux/mutex.h>
  60#include <linux/seq_file.h>
  61#include <linux/slab.h>
  62#include "jfs_incore.h"
  63#include "jfs_filsys.h"
  64#include "jfs_metapage.h"
  65#include "jfs_superblock.h"
  66#include "jfs_txnmgr.h"
  67#include "jfs_debug.h"
  68
  69
  70/*
  71 * lbuf's ready to be redriven.  Protected by log_redrive_lock (jfsIO thread)
  72 */
  73static struct lbuf *log_redrive_list;
  74static DEFINE_SPINLOCK(log_redrive_lock);
  75
  76
  77/*
  78 *	log read/write serialization (per log)
  79 */
  80#define LOG_LOCK_INIT(log)	mutex_init(&(log)->loglock)
  81#define LOG_LOCK(log)		mutex_lock(&((log)->loglock))
  82#define LOG_UNLOCK(log)		mutex_unlock(&((log)->loglock))
  83
  84
  85/*
  86 *	log group commit serialization (per log)
  87 */
  88
  89#define LOGGC_LOCK_INIT(log)	spin_lock_init(&(log)->gclock)
  90#define LOGGC_LOCK(log)		spin_lock_irq(&(log)->gclock)
  91#define LOGGC_UNLOCK(log)	spin_unlock_irq(&(log)->gclock)
  92#define LOGGC_WAKEUP(tblk)	wake_up_all(&(tblk)->gcwait)
  93
  94/*
  95 *	log sync serialization (per log)
  96 */
  97#define	LOGSYNC_DELTA(logsize)		min((logsize)/8, 128*LOGPSIZE)
  98#define	LOGSYNC_BARRIER(logsize)	((logsize)/4)
  99/*
 100#define	LOGSYNC_DELTA(logsize)		min((logsize)/4, 256*LOGPSIZE)
 101#define	LOGSYNC_BARRIER(logsize)	((logsize)/2)
 102*/
 103
 104
 105/*
 106 *	log buffer cache synchronization
 107 */
 108static DEFINE_SPINLOCK(jfsLCacheLock);
 109
 110#define	LCACHE_LOCK(flags)	spin_lock_irqsave(&jfsLCacheLock, flags)
 111#define	LCACHE_UNLOCK(flags)	spin_unlock_irqrestore(&jfsLCacheLock, flags)
 112
 113/*
 114 * See __SLEEP_COND in jfs_locks.h
 115 */
 116#define LCACHE_SLEEP_COND(wq, cond, flags)	\
 117do {						\
 118	if (cond)				\
 119		break;				\
 120	__SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \
 121} while (0)
 122
 123#define	LCACHE_WAKEUP(event)	wake_up(event)
 124
 125
 126/*
 127 *	lbuf buffer cache (lCache) control
 128 */
 129/* log buffer manager pageout control (cumulative, inclusive) */
 130#define	lbmREAD		0x0001
 131#define	lbmWRITE	0x0002	/* enqueue at tail of write queue;
 132				 * init pageout if at head of queue;
 133				 */
 134#define	lbmRELEASE	0x0004	/* remove from write queue
 135				 * at completion of pageout;
 136				 * do not free/recycle it yet:
 137				 * caller will free it;
 138				 */
 139#define	lbmSYNC		0x0008	/* do not return to freelist
 140				 * when removed from write queue;
 141				 */
 142#define lbmFREE		0x0010	/* return to freelist
 143				 * at completion of pageout;
 144				 * the buffer may be recycled;
 145				 */
 146#define	lbmDONE		0x0020
 147#define	lbmERROR	0x0040
 148#define lbmGC		0x0080	/* lbmIODone to perform post-GC processing
 149				 * of log page
 150				 */
 151#define lbmDIRECT	0x0100
 152
 153/*
 154 * Global list of active external journals
 155 */
 156static LIST_HEAD(jfs_external_logs);
 157static struct jfs_log *dummy_log;
 158static DEFINE_MUTEX(jfs_log_mutex);
 159
 160/*
 161 * forward references
 162 */
 163static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk,
 164			 struct lrd * lrd, struct tlock * tlck);
 165
 166static int lmNextPage(struct jfs_log * log);
 167static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
 168			   int activate);
 169
 170static int open_inline_log(struct super_block *sb);
 171static int open_dummy_log(struct super_block *sb);
 172static int lbmLogInit(struct jfs_log * log);
 173static void lbmLogShutdown(struct jfs_log * log);
 174static struct lbuf *lbmAllocate(struct jfs_log * log, int);
 175static void lbmFree(struct lbuf * bp);
 176static void lbmfree(struct lbuf * bp);
 177static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp);
 178static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block);
 179static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag);
 180static int lbmIOWait(struct lbuf * bp, int flag);
 181static bio_end_io_t lbmIODone;
 182static void lbmStartIO(struct lbuf * bp);
 183static void lmGCwrite(struct jfs_log * log, int cant_block);
 184static int lmLogSync(struct jfs_log * log, int hard_sync);
 185
 186
 187
 188/*
 189 *	statistics
 190 */
 191#ifdef CONFIG_JFS_STATISTICS
 192static struct lmStat {
 193	uint commit;		/* # of commit */
 194	uint pagedone;		/* # of page written */
 195	uint submitted;		/* # of pages submitted */
 196	uint full_page;		/* # of full pages submitted */
 197	uint partial_page;	/* # of partial pages submitted */
 198} lmStat;
 199#endif
 200
 201static void write_special_inodes(struct jfs_log *log,
 202				 int (*writer)(struct address_space *))
 203{
 204	struct jfs_sb_info *sbi;
 205
 206	list_for_each_entry(sbi, &log->sb_list, log_list) {
 207		writer(sbi->ipbmap->i_mapping);
 208		writer(sbi->ipimap->i_mapping);
 209		writer(sbi->direct_inode->i_mapping);
 210	}
 211}
 212
 213/*
 214 * NAME:	lmLog()
 215 *
 216 * FUNCTION:	write a log record;
 217 *
 218 * PARAMETER:
 219 *
 220 * RETURN:	lsn - offset to the next log record to write (end-of-log);
 221 *		-1  - error;
 222 *
 223 * note: todo: log error handler
 224 */
 225int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 226	  struct tlock * tlck)
 227{
 228	int lsn;
 229	int diffp, difft;
 230	struct metapage *mp = NULL;
 231	unsigned long flags;
 232
 233	jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p",
 234		 log, tblk, lrd, tlck);
 235
 236	LOG_LOCK(log);
 237
 238	/* log by (out-of-transaction) JFS ? */
 239	if (tblk == NULL)
 240		goto writeRecord;
 241
 242	/* log from page ? */
 243	if (tlck == NULL ||
 244	    tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL)
 245		goto writeRecord;
 246
 247	/*
 248	 *	initialize/update page/transaction recovery lsn
 249	 */
 250	lsn = log->lsn;
 251
 252	LOGSYNC_LOCK(log, flags);
 253
 254	/*
 255	 * initialize page lsn if first log write of the page
 256	 */
 257	if (mp->lsn == 0) {
 258		mp->log = log;
 259		mp->lsn = lsn;
 260		log->count++;
 261
 262		/* insert page at tail of logsynclist */
 263		list_add_tail(&mp->synclist, &log->synclist);
 264	}
 265
 266	/*
 267	 *	initialize/update lsn of tblock of the page
 268	 *
 269	 * transaction inherits oldest lsn of pages associated
 270	 * with allocation/deallocation of resources (their
 271	 * log records are used to reconstruct allocation map
 272	 * at recovery time: inode for inode allocation map,
 273	 * B+-tree index of extent descriptors for block
 274	 * allocation map);
 275	 * allocation map pages inherit transaction lsn at
 276	 * commit time to allow forwarding log syncpt past log
 277	 * records associated with allocation/deallocation of
 278	 * resources only after persistent map of these map pages
 279	 * have been updated and propagated to home.
 280	 */
 281	/*
 282	 * initialize transaction lsn:
 283	 */
 284	if (tblk->lsn == 0) {
 285		/* inherit lsn of its first page logged */
 286		tblk->lsn = mp->lsn;
 287		log->count++;
 288
 289		/* insert tblock after the page on logsynclist */
 290		list_add(&tblk->synclist, &mp->synclist);
 291	}
 292	/*
 293	 * update transaction lsn:
 294	 */
 295	else {
 296		/* inherit oldest/smallest lsn of page */
 297		logdiff(diffp, mp->lsn, log);
 298		logdiff(difft, tblk->lsn, log);
 299		if (diffp < difft) {
 300			/* update tblock lsn with page lsn */
 301			tblk->lsn = mp->lsn;
 302
 303			/* move tblock after page on logsynclist */
 304			list_move(&tblk->synclist, &mp->synclist);
 305		}
 306	}
 307
 308	LOGSYNC_UNLOCK(log, flags);
 309
 310	/*
 311	 *	write the log record
 312	 */
 313      writeRecord:
 314	lsn = lmWriteRecord(log, tblk, lrd, tlck);
 315
 316	/*
 317	 * forward log syncpt if log reached next syncpt trigger
 318	 */
 319	logdiff(diffp, lsn, log);
 320	if (diffp >= log->nextsync)
 321		lsn = lmLogSync(log, 0);
 322
 323	/* update end-of-log lsn */
 324	log->lsn = lsn;
 325
 326	LOG_UNLOCK(log);
 327
 328	/* return end-of-log address */
 329	return lsn;
 330}
 331
 332/*
 333 * NAME:	lmWriteRecord()
 334 *
 335 * FUNCTION:	move the log record to current log page
 336 *
 337 * PARAMETER:	cd	- commit descriptor
 338 *
 339 * RETURN:	end-of-log address
 340 *
 341 * serialization: LOG_LOCK() held on entry/exit
 342 */
 343static int
 344lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 345	      struct tlock * tlck)
 346{
 347	int lsn = 0;		/* end-of-log address */
 348	struct lbuf *bp;	/* dst log page buffer */
 349	struct logpage *lp;	/* dst log page */
 350	caddr_t dst;		/* destination address in log page */
 351	int dstoffset;		/* end-of-log offset in log page */
 352	int freespace;		/* free space in log page */
 353	caddr_t p;		/* src meta-data page */
 354	caddr_t src;
 355	int srclen;
 356	int nbytes;		/* number of bytes to move */
 357	int i;
 358	int len;
 359	struct linelock *linelock;
 360	struct lv *lv;
 361	struct lvd *lvd;
 362	int l2linesize;
 363
 364	len = 0;
 365
 366	/* retrieve destination log page to write */
 367	bp = (struct lbuf *) log->bp;
 368	lp = (struct logpage *) bp->l_ldata;
 369	dstoffset = log->eor;
 370
 371	/* any log data to write ? */
 372	if (tlck == NULL)
 373		goto moveLrd;
 374
 375	/*
 376	 *	move log record data
 377	 */
 378	/* retrieve source meta-data page to log */
 379	if (tlck->flag & tlckPAGELOCK) {
 380		p = (caddr_t) (tlck->mp->data);
 381		linelock = (struct linelock *) & tlck->lock;
 382	}
 383	/* retrieve source in-memory inode to log */
 384	else if (tlck->flag & tlckINODELOCK) {
 385		if (tlck->type & tlckDTREE)
 386			p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot;
 387		else
 388			p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot;
 389		linelock = (struct linelock *) & tlck->lock;
 390	}
 
 
 
 
 
 
 
 
 391	else {
 392		jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck);
 393		return 0;	/* Probably should trap */
 394	}
 395	l2linesize = linelock->l2linesize;
 396
 397      moveData:
 398	ASSERT(linelock->index <= linelock->maxcnt);
 399
 400	lv = linelock->lv;
 401	for (i = 0; i < linelock->index; i++, lv++) {
 402		if (lv->length == 0)
 403			continue;
 404
 405		/* is page full ? */
 406		if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) {
 407			/* page become full: move on to next page */
 408			lmNextPage(log);
 409
 410			bp = log->bp;
 411			lp = (struct logpage *) bp->l_ldata;
 412			dstoffset = LOGPHDRSIZE;
 413		}
 414
 415		/*
 416		 * move log vector data
 417		 */
 418		src = (u8 *) p + (lv->offset << l2linesize);
 419		srclen = lv->length << l2linesize;
 420		len += srclen;
 421		while (srclen > 0) {
 422			freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
 423			nbytes = min(freespace, srclen);
 424			dst = (caddr_t) lp + dstoffset;
 425			memcpy(dst, src, nbytes);
 426			dstoffset += nbytes;
 427
 428			/* is page not full ? */
 429			if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
 430				break;
 431
 432			/* page become full: move on to next page */
 433			lmNextPage(log);
 434
 435			bp = (struct lbuf *) log->bp;
 436			lp = (struct logpage *) bp->l_ldata;
 437			dstoffset = LOGPHDRSIZE;
 438
 439			srclen -= nbytes;
 440			src += nbytes;
 441		}
 442
 443		/*
 444		 * move log vector descriptor
 445		 */
 446		len += 4;
 447		lvd = (struct lvd *) ((caddr_t) lp + dstoffset);
 448		lvd->offset = cpu_to_le16(lv->offset);
 449		lvd->length = cpu_to_le16(lv->length);
 450		dstoffset += 4;
 451		jfs_info("lmWriteRecord: lv offset:%d length:%d",
 452			 lv->offset, lv->length);
 453	}
 454
 455	if ((i = linelock->next)) {
 456		linelock = (struct linelock *) lid_to_tlock(i);
 457		goto moveData;
 458	}
 459
 460	/*
 461	 *	move log record descriptor
 462	 */
 463      moveLrd:
 464	lrd->length = cpu_to_le16(len);
 465
 466	src = (caddr_t) lrd;
 467	srclen = LOGRDSIZE;
 468
 469	while (srclen > 0) {
 470		freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
 471		nbytes = min(freespace, srclen);
 472		dst = (caddr_t) lp + dstoffset;
 473		memcpy(dst, src, nbytes);
 474
 475		dstoffset += nbytes;
 476		srclen -= nbytes;
 477
 478		/* are there more to move than freespace of page ? */
 479		if (srclen)
 480			goto pageFull;
 481
 482		/*
 483		 * end of log record descriptor
 484		 */
 485
 486		/* update last log record eor */
 487		log->eor = dstoffset;
 488		bp->l_eor = dstoffset;
 489		lsn = (log->page << L2LOGPSIZE) + dstoffset;
 490
 491		if (lrd->type & cpu_to_le16(LOG_COMMIT)) {
 492			tblk->clsn = lsn;
 493			jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn,
 494				 bp->l_eor);
 495
 496			INCREMENT(lmStat.commit);	/* # of commit */
 497
 498			/*
 499			 * enqueue tblock for group commit:
 500			 *
 501			 * enqueue tblock of non-trivial/synchronous COMMIT
 502			 * at tail of group commit queue
 503			 * (trivial/asynchronous COMMITs are ignored by
 504			 * group commit.)
 505			 */
 506			LOGGC_LOCK(log);
 507
 508			/* init tblock gc state */
 509			tblk->flag = tblkGC_QUEUE;
 510			tblk->bp = log->bp;
 511			tblk->pn = log->page;
 512			tblk->eor = log->eor;
 513
 514			/* enqueue transaction to commit queue */
 515			list_add_tail(&tblk->cqueue, &log->cqueue);
 516
 517			LOGGC_UNLOCK(log);
 518		}
 519
 520		jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x",
 521			le16_to_cpu(lrd->type), log->bp, log->page, dstoffset);
 522
 523		/* page not full ? */
 524		if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
 525			return lsn;
 526
 527	      pageFull:
 528		/* page become full: move on to next page */
 529		lmNextPage(log);
 530
 531		bp = (struct lbuf *) log->bp;
 532		lp = (struct logpage *) bp->l_ldata;
 533		dstoffset = LOGPHDRSIZE;
 534		src += nbytes;
 535	}
 536
 537	return lsn;
 538}
 539
 540
 541/*
 542 * NAME:	lmNextPage()
 543 *
 544 * FUNCTION:	write current page and allocate next page.
 545 *
 546 * PARAMETER:	log
 547 *
 548 * RETURN:	0
 549 *
 550 * serialization: LOG_LOCK() held on entry/exit
 551 */
 552static int lmNextPage(struct jfs_log * log)
 553{
 554	struct logpage *lp;
 555	int lspn;		/* log sequence page number */
 556	int pn;			/* current page number */
 557	struct lbuf *bp;
 558	struct lbuf *nextbp;
 559	struct tblock *tblk;
 560
 561	/* get current log page number and log sequence page number */
 562	pn = log->page;
 563	bp = log->bp;
 564	lp = (struct logpage *) bp->l_ldata;
 565	lspn = le32_to_cpu(lp->h.page);
 566
 567	LOGGC_LOCK(log);
 568
 569	/*
 570	 *	write or queue the full page at the tail of write queue
 571	 */
 572	/* get the tail tblk on commit queue */
 573	if (list_empty(&log->cqueue))
 574		tblk = NULL;
 575	else
 576		tblk = list_entry(log->cqueue.prev, struct tblock, cqueue);
 577
 578	/* every tblk who has COMMIT record on the current page,
 579	 * and has not been committed, must be on commit queue
 580	 * since tblk is queued at commit queueu at the time
 581	 * of writing its COMMIT record on the page before
 582	 * page becomes full (even though the tblk thread
 583	 * who wrote COMMIT record may have been suspended
 584	 * currently);
 585	 */
 586
 587	/* is page bound with outstanding tail tblk ? */
 588	if (tblk && tblk->pn == pn) {
 589		/* mark tblk for end-of-page */
 590		tblk->flag |= tblkGC_EOP;
 591
 592		if (log->cflag & logGC_PAGEOUT) {
 593			/* if page is not already on write queue,
 594			 * just enqueue (no lbmWRITE to prevent redrive)
 595			 * buffer to wqueue to ensure correct serial order
 596			 * of the pages since log pages will be added
 597			 * continuously
 598			 */
 599			if (bp->l_wqnext == NULL)
 600				lbmWrite(log, bp, 0, 0);
 601		} else {
 602			/*
 603			 * No current GC leader, initiate group commit
 604			 */
 605			log->cflag |= logGC_PAGEOUT;
 606			lmGCwrite(log, 0);
 607		}
 608	}
 609	/* page is not bound with outstanding tblk:
 610	 * init write or mark it to be redriven (lbmWRITE)
 611	 */
 612	else {
 613		/* finalize the page */
 614		bp->l_ceor = bp->l_eor;
 615		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 616		lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0);
 617	}
 618	LOGGC_UNLOCK(log);
 619
 620	/*
 621	 *	allocate/initialize next page
 622	 */
 623	/* if log wraps, the first data page of log is 2
 624	 * (0 never used, 1 is superblock).
 625	 */
 626	log->page = (pn == log->size - 1) ? 2 : pn + 1;
 627	log->eor = LOGPHDRSIZE;	/* ? valid page empty/full at logRedo() */
 628
 629	/* allocate/initialize next log page buffer */
 630	nextbp = lbmAllocate(log, log->page);
 631	nextbp->l_eor = log->eor;
 632	log->bp = nextbp;
 633
 634	/* initialize next log page */
 635	lp = (struct logpage *) nextbp->l_ldata;
 636	lp->h.page = lp->t.page = cpu_to_le32(lspn + 1);
 637	lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
 638
 639	return 0;
 640}
 641
 642
 643/*
 644 * NAME:	lmGroupCommit()
 645 *
 646 * FUNCTION:	group commit
 647 *	initiate pageout of the pages with COMMIT in the order of
 648 *	page number - redrive pageout of the page at the head of
 649 *	pageout queue until full page has been written.
 650 *
 651 * RETURN:
 652 *
 653 * NOTE:
 654 *	LOGGC_LOCK serializes log group commit queue, and
 655 *	transaction blocks on the commit queue.
 656 *	N.B. LOG_LOCK is NOT held during lmGroupCommit().
 657 */
 658int lmGroupCommit(struct jfs_log * log, struct tblock * tblk)
 659{
 660	int rc = 0;
 661
 662	LOGGC_LOCK(log);
 663
 664	/* group committed already ? */
 665	if (tblk->flag & tblkGC_COMMITTED) {
 666		if (tblk->flag & tblkGC_ERROR)
 667			rc = -EIO;
 668
 669		LOGGC_UNLOCK(log);
 670		return rc;
 671	}
 672	jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc);
 673
 674	if (tblk->xflag & COMMIT_LAZY)
 675		tblk->flag |= tblkGC_LAZY;
 676
 677	if ((!(log->cflag & logGC_PAGEOUT)) && (!list_empty(&log->cqueue)) &&
 678	    (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag)
 679	     || jfs_tlocks_low)) {
 680		/*
 681		 * No pageout in progress
 682		 *
 683		 * start group commit as its group leader.
 684		 */
 685		log->cflag |= logGC_PAGEOUT;
 686
 687		lmGCwrite(log, 0);
 688	}
 689
 690	if (tblk->xflag & COMMIT_LAZY) {
 691		/*
 692		 * Lazy transactions can leave now
 693		 */
 694		LOGGC_UNLOCK(log);
 695		return 0;
 696	}
 697
 698	/* lmGCwrite gives up LOGGC_LOCK, check again */
 699
 700	if (tblk->flag & tblkGC_COMMITTED) {
 701		if (tblk->flag & tblkGC_ERROR)
 702			rc = -EIO;
 703
 704		LOGGC_UNLOCK(log);
 705		return rc;
 706	}
 707
 708	/* upcount transaction waiting for completion
 709	 */
 710	log->gcrtc++;
 711	tblk->flag |= tblkGC_READY;
 712
 713	__SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED),
 714		     LOGGC_LOCK(log), LOGGC_UNLOCK(log));
 715
 716	/* removed from commit queue */
 717	if (tblk->flag & tblkGC_ERROR)
 718		rc = -EIO;
 719
 720	LOGGC_UNLOCK(log);
 721	return rc;
 722}
 723
 724/*
 725 * NAME:	lmGCwrite()
 726 *
 727 * FUNCTION:	group commit write
 728 *	initiate write of log page, building a group of all transactions
 729 *	with commit records on that page.
 730 *
 731 * RETURN:	None
 732 *
 733 * NOTE:
 734 *	LOGGC_LOCK must be held by caller.
 735 *	N.B. LOG_LOCK is NOT held during lmGroupCommit().
 736 */
 737static void lmGCwrite(struct jfs_log * log, int cant_write)
 738{
 739	struct lbuf *bp;
 740	struct logpage *lp;
 741	int gcpn;		/* group commit page number */
 742	struct tblock *tblk;
 743	struct tblock *xtblk = NULL;
 744
 745	/*
 746	 * build the commit group of a log page
 747	 *
 748	 * scan commit queue and make a commit group of all
 749	 * transactions with COMMIT records on the same log page.
 750	 */
 751	/* get the head tblk on the commit queue */
 752	gcpn = list_entry(log->cqueue.next, struct tblock, cqueue)->pn;
 753
 754	list_for_each_entry(tblk, &log->cqueue, cqueue) {
 755		if (tblk->pn != gcpn)
 756			break;
 757
 758		xtblk = tblk;
 759
 760		/* state transition: (QUEUE, READY) -> COMMIT */
 761		tblk->flag |= tblkGC_COMMIT;
 762	}
 763	tblk = xtblk;		/* last tblk of the page */
 764
 765	/*
 766	 * pageout to commit transactions on the log page.
 767	 */
 768	bp = (struct lbuf *) tblk->bp;
 769	lp = (struct logpage *) bp->l_ldata;
 770	/* is page already full ? */
 771	if (tblk->flag & tblkGC_EOP) {
 772		/* mark page to free at end of group commit of the page */
 773		tblk->flag &= ~tblkGC_EOP;
 774		tblk->flag |= tblkGC_FREE;
 775		bp->l_ceor = bp->l_eor;
 776		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 777		lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC,
 778			 cant_write);
 779		INCREMENT(lmStat.full_page);
 780	}
 781	/* page is not yet full */
 782	else {
 783		bp->l_ceor = tblk->eor;	/* ? bp->l_ceor = bp->l_eor; */
 784		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 785		lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write);
 786		INCREMENT(lmStat.partial_page);
 787	}
 788}
 789
 790/*
 791 * NAME:	lmPostGC()
 792 *
 793 * FUNCTION:	group commit post-processing
 794 *	Processes transactions after their commit records have been written
 795 *	to disk, redriving log I/O if necessary.
 796 *
 797 * RETURN:	None
 798 *
 799 * NOTE:
 800 *	This routine is called a interrupt time by lbmIODone
 801 */
 802static void lmPostGC(struct lbuf * bp)
 803{
 804	unsigned long flags;
 805	struct jfs_log *log = bp->l_log;
 806	struct logpage *lp;
 807	struct tblock *tblk, *temp;
 808
 809	//LOGGC_LOCK(log);
 810	spin_lock_irqsave(&log->gclock, flags);
 811	/*
 812	 * current pageout of group commit completed.
 813	 *
 814	 * remove/wakeup transactions from commit queue who were
 815	 * group committed with the current log page
 816	 */
 817	list_for_each_entry_safe(tblk, temp, &log->cqueue, cqueue) {
 818		if (!(tblk->flag & tblkGC_COMMIT))
 819			break;
 820		/* if transaction was marked GC_COMMIT then
 821		 * it has been shipped in the current pageout
 822		 * and made it to disk - it is committed.
 823		 */
 824
 825		if (bp->l_flag & lbmERROR)
 826			tblk->flag |= tblkGC_ERROR;
 827
 828		/* remove it from the commit queue */
 829		list_del(&tblk->cqueue);
 830		tblk->flag &= ~tblkGC_QUEUE;
 831
 832		if (tblk == log->flush_tblk) {
 833			/* we can stop flushing the log now */
 834			clear_bit(log_FLUSH, &log->flag);
 835			log->flush_tblk = NULL;
 836		}
 837
 838		jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk,
 839			 tblk->flag);
 840
 841		if (!(tblk->xflag & COMMIT_FORCE))
 842			/*
 843			 * Hand tblk over to lazy commit thread
 844			 */
 845			txLazyUnlock(tblk);
 846		else {
 847			/* state transition: COMMIT -> COMMITTED */
 848			tblk->flag |= tblkGC_COMMITTED;
 849
 850			if (tblk->flag & tblkGC_READY)
 851				log->gcrtc--;
 852
 853			LOGGC_WAKEUP(tblk);
 854		}
 855
 856		/* was page full before pageout ?
 857		 * (and this is the last tblk bound with the page)
 858		 */
 859		if (tblk->flag & tblkGC_FREE)
 860			lbmFree(bp);
 861		/* did page become full after pageout ?
 862		 * (and this is the last tblk bound with the page)
 863		 */
 864		else if (tblk->flag & tblkGC_EOP) {
 865			/* finalize the page */
 866			lp = (struct logpage *) bp->l_ldata;
 867			bp->l_ceor = bp->l_eor;
 868			lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
 869			jfs_info("lmPostGC: calling lbmWrite");
 870			lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE,
 871				 1);
 872		}
 873
 874	}
 875
 876	/* are there any transactions who have entered lnGroupCommit()
 877	 * (whose COMMITs are after that of the last log page written.
 878	 * They are waiting for new group commit (above at (SLEEP 1))
 879	 * or lazy transactions are on a full (queued) log page,
 880	 * select the latest ready transaction as new group leader and
 881	 * wake her up to lead her group.
 882	 */
 883	if ((!list_empty(&log->cqueue)) &&
 884	    ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) ||
 885	     test_bit(log_FLUSH, &log->flag) || jfs_tlocks_low))
 886		/*
 887		 * Call lmGCwrite with new group leader
 888		 */
 889		lmGCwrite(log, 1);
 890
 891	/* no transaction are ready yet (transactions are only just
 892	 * queued (GC_QUEUE) and not entered for group commit yet).
 893	 * the first transaction entering group commit
 894	 * will elect herself as new group leader.
 895	 */
 896	else
 897		log->cflag &= ~logGC_PAGEOUT;
 898
 899	//LOGGC_UNLOCK(log);
 900	spin_unlock_irqrestore(&log->gclock, flags);
 901	return;
 902}
 903
 904/*
 905 * NAME:	lmLogSync()
 906 *
 907 * FUNCTION:	write log SYNCPT record for specified log
 908 *	if new sync address is available
 909 *	(normally the case if sync() is executed by back-ground
 910 *	process).
 911 *	calculate new value of i_nextsync which determines when
 912 *	this code is called again.
 913 *
 914 * PARAMETERS:	log	- log structure
 915 *		hard_sync - 1 to force all metadata to be written
 916 *
 917 * RETURN:	0
 918 *
 919 * serialization: LOG_LOCK() held on entry/exit
 920 */
 921static int lmLogSync(struct jfs_log * log, int hard_sync)
 922{
 923	int logsize;
 924	int written;		/* written since last syncpt */
 925	int free;		/* free space left available */
 926	int delta;		/* additional delta to write normally */
 927	int more;		/* additional write granted */
 928	struct lrd lrd;
 929	int lsn;
 930	struct logsyncblk *lp;
 931	unsigned long flags;
 932
 933	/* push dirty metapages out to disk */
 934	if (hard_sync)
 935		write_special_inodes(log, filemap_fdatawrite);
 936	else
 937		write_special_inodes(log, filemap_flush);
 938
 939	/*
 940	 *	forward syncpt
 941	 */
 942	/* if last sync is same as last syncpt,
 943	 * invoke sync point forward processing to update sync.
 944	 */
 945
 946	if (log->sync == log->syncpt) {
 947		LOGSYNC_LOCK(log, flags);
 948		if (list_empty(&log->synclist))
 949			log->sync = log->lsn;
 950		else {
 951			lp = list_entry(log->synclist.next,
 952					struct logsyncblk, synclist);
 953			log->sync = lp->lsn;
 954		}
 955		LOGSYNC_UNLOCK(log, flags);
 956
 957	}
 958
 959	/* if sync is different from last syncpt,
 960	 * write a SYNCPT record with syncpt = sync.
 961	 * reset syncpt = sync
 962	 */
 963	if (log->sync != log->syncpt) {
 964		lrd.logtid = 0;
 965		lrd.backchain = 0;
 966		lrd.type = cpu_to_le16(LOG_SYNCPT);
 967		lrd.length = 0;
 968		lrd.log.syncpt.sync = cpu_to_le32(log->sync);
 969		lsn = lmWriteRecord(log, NULL, &lrd, NULL);
 970
 971		log->syncpt = log->sync;
 972	} else
 973		lsn = log->lsn;
 974
 975	/*
 976	 *	setup next syncpt trigger (SWAG)
 977	 */
 978	logsize = log->logsize;
 979
 980	logdiff(written, lsn, log);
 981	free = logsize - written;
 982	delta = LOGSYNC_DELTA(logsize);
 983	more = min(free / 2, delta);
 984	if (more < 2 * LOGPSIZE) {
 985		jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
 986		/*
 987		 *	log wrapping
 988		 *
 989		 * option 1 - panic ? No.!
 990		 * option 2 - shutdown file systems
 991		 *	      associated with log ?
 992		 * option 3 - extend log ?
 993		 * option 4 - second chance
 994		 *
 995		 * mark log wrapped, and continue.
 996		 * when all active transactions are completed,
 997		 * mark log valid for recovery.
 998		 * if crashed during invalid state, log state
 999		 * implies invalid log, forcing fsck().
1000		 */
1001		/* mark log state log wrap in log superblock */
1002		/* log->state = LOGWRAP; */
1003
1004		/* reset sync point computation */
1005		log->syncpt = log->sync = lsn;
1006		log->nextsync = delta;
1007	} else
1008		/* next syncpt trigger = written + more */
1009		log->nextsync = written + more;
1010
1011	/* if number of bytes written from last sync point is more
1012	 * than 1/4 of the log size, stop new transactions from
1013	 * starting until all current transactions are completed
1014	 * by setting syncbarrier flag.
1015	 */
1016	if (!test_bit(log_SYNCBARRIER, &log->flag) &&
1017	    (written > LOGSYNC_BARRIER(logsize)) && log->active) {
1018		set_bit(log_SYNCBARRIER, &log->flag);
1019		jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn,
1020			 log->syncpt);
1021		/*
1022		 * We may have to initiate group commit
1023		 */
1024		jfs_flush_journal(log, 0);
1025	}
1026
1027	return lsn;
1028}
1029
1030/*
1031 * NAME:	jfs_syncpt
1032 *
1033 * FUNCTION:	write log SYNCPT record for specified log
1034 *
1035 * PARAMETERS:	log	  - log structure
1036 *		hard_sync - set to 1 to force metadata to be written
1037 */
1038void jfs_syncpt(struct jfs_log *log, int hard_sync)
1039{	LOG_LOCK(log);
1040	if (!test_bit(log_QUIESCE, &log->flag))
1041		lmLogSync(log, hard_sync);
1042	LOG_UNLOCK(log);
1043}
1044
1045/*
1046 * NAME:	lmLogOpen()
1047 *
1048 * FUNCTION:	open the log on first open;
1049 *	insert filesystem in the active list of the log.
1050 *
1051 * PARAMETER:	ipmnt	- file system mount inode
1052 *		iplog	- log inode (out)
1053 *
1054 * RETURN:
1055 *
1056 * serialization:
1057 */
1058int lmLogOpen(struct super_block *sb)
1059{
1060	int rc;
1061	struct block_device *bdev;
1062	struct jfs_log *log;
1063	struct jfs_sb_info *sbi = JFS_SBI(sb);
1064
1065	if (sbi->flag & JFS_NOINTEGRITY)
1066		return open_dummy_log(sb);
1067
1068	if (sbi->mntflag & JFS_INLINELOG)
1069		return open_inline_log(sb);
1070
1071	mutex_lock(&jfs_log_mutex);
1072	list_for_each_entry(log, &jfs_external_logs, journal_list) {
1073		if (log->bdev->bd_dev == sbi->logdev) {
1074			if (!uuid_equal(&log->uuid, &sbi->loguuid)) {
1075				jfs_warn("wrong uuid on JFS journal");
1076				mutex_unlock(&jfs_log_mutex);
1077				return -EINVAL;
1078			}
1079			/*
1080			 * add file system to log active file system list
1081			 */
1082			if ((rc = lmLogFileSystem(log, sbi, 1))) {
1083				mutex_unlock(&jfs_log_mutex);
1084				return rc;
1085			}
1086			goto journal_found;
1087		}
1088	}
1089
1090	if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL))) {
1091		mutex_unlock(&jfs_log_mutex);
1092		return -ENOMEM;
1093	}
1094	INIT_LIST_HEAD(&log->sb_list);
1095	init_waitqueue_head(&log->syncwait);
1096
1097	/*
1098	 *	external log as separate logical volume
1099	 *
1100	 * file systems to log may have n-to-1 relationship;
1101	 */
1102
1103	bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
1104				 log);
1105	if (IS_ERR(bdev)) {
1106		rc = PTR_ERR(bdev);
1107		goto free;
1108	}
1109
1110	log->bdev = bdev;
1111	uuid_copy(&log->uuid, &sbi->loguuid);
1112
1113	/*
1114	 * initialize log:
1115	 */
1116	if ((rc = lmLogInit(log)))
1117		goto close;
1118
1119	list_add(&log->journal_list, &jfs_external_logs);
1120
1121	/*
1122	 * add file system to log active file system list
1123	 */
1124	if ((rc = lmLogFileSystem(log, sbi, 1)))
1125		goto shutdown;
1126
1127journal_found:
1128	LOG_LOCK(log);
1129	list_add(&sbi->log_list, &log->sb_list);
1130	sbi->log = log;
1131	LOG_UNLOCK(log);
1132
1133	mutex_unlock(&jfs_log_mutex);
1134	return 0;
1135
1136	/*
1137	 *	unwind on error
1138	 */
1139      shutdown:		/* unwind lbmLogInit() */
1140	list_del(&log->journal_list);
1141	lbmLogShutdown(log);
1142
1143      close:		/* close external log device */
1144	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1145
1146      free:		/* free log descriptor */
1147	mutex_unlock(&jfs_log_mutex);
1148	kfree(log);
1149
1150	jfs_warn("lmLogOpen: exit(%d)", rc);
1151	return rc;
1152}
1153
1154static int open_inline_log(struct super_block *sb)
1155{
1156	struct jfs_log *log;
1157	int rc;
1158
1159	if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL)))
1160		return -ENOMEM;
1161	INIT_LIST_HEAD(&log->sb_list);
1162	init_waitqueue_head(&log->syncwait);
1163
1164	set_bit(log_INLINELOG, &log->flag);
1165	log->bdev = sb->s_bdev;
1166	log->base = addressPXD(&JFS_SBI(sb)->logpxd);
1167	log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
1168	    (L2LOGPSIZE - sb->s_blocksize_bits);
1169	log->l2bsize = sb->s_blocksize_bits;
1170	ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits);
1171
1172	/*
1173	 * initialize log.
1174	 */
1175	if ((rc = lmLogInit(log))) {
1176		kfree(log);
1177		jfs_warn("lmLogOpen: exit(%d)", rc);
1178		return rc;
1179	}
1180
1181	list_add(&JFS_SBI(sb)->log_list, &log->sb_list);
1182	JFS_SBI(sb)->log = log;
1183
1184	return rc;
1185}
1186
1187static int open_dummy_log(struct super_block *sb)
1188{
1189	int rc;
1190
1191	mutex_lock(&jfs_log_mutex);
1192	if (!dummy_log) {
1193		dummy_log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL);
1194		if (!dummy_log) {
1195			mutex_unlock(&jfs_log_mutex);
1196			return -ENOMEM;
1197		}
1198		INIT_LIST_HEAD(&dummy_log->sb_list);
1199		init_waitqueue_head(&dummy_log->syncwait);
1200		dummy_log->no_integrity = 1;
1201		/* Make up some stuff */
1202		dummy_log->base = 0;
1203		dummy_log->size = 1024;
1204		rc = lmLogInit(dummy_log);
1205		if (rc) {
1206			kfree(dummy_log);
1207			dummy_log = NULL;
1208			mutex_unlock(&jfs_log_mutex);
1209			return rc;
1210		}
1211	}
1212
1213	LOG_LOCK(dummy_log);
1214	list_add(&JFS_SBI(sb)->log_list, &dummy_log->sb_list);
1215	JFS_SBI(sb)->log = dummy_log;
1216	LOG_UNLOCK(dummy_log);
1217	mutex_unlock(&jfs_log_mutex);
1218
1219	return 0;
1220}
1221
1222/*
1223 * NAME:	lmLogInit()
1224 *
1225 * FUNCTION:	log initialization at first log open.
1226 *
1227 *	logredo() (or logformat()) should have been run previously.
1228 *	initialize the log from log superblock.
1229 *	set the log state in the superblock to LOGMOUNT and
1230 *	write SYNCPT log record.
1231 *
1232 * PARAMETER:	log	- log structure
1233 *
1234 * RETURN:	0	- if ok
1235 *		-EINVAL	- bad log magic number or superblock dirty
1236 *		error returned from logwait()
1237 *
1238 * serialization: single first open thread
1239 */
1240int lmLogInit(struct jfs_log * log)
1241{
1242	int rc = 0;
1243	struct lrd lrd;
1244	struct logsuper *logsuper;
1245	struct lbuf *bpsuper;
1246	struct lbuf *bp;
1247	struct logpage *lp;
1248	int lsn = 0;
1249
1250	jfs_info("lmLogInit: log:0x%p", log);
1251
1252	/* initialize the group commit serialization lock */
1253	LOGGC_LOCK_INIT(log);
1254
1255	/* allocate/initialize the log write serialization lock */
1256	LOG_LOCK_INIT(log);
1257
1258	LOGSYNC_LOCK_INIT(log);
1259
1260	INIT_LIST_HEAD(&log->synclist);
1261
1262	INIT_LIST_HEAD(&log->cqueue);
1263	log->flush_tblk = NULL;
1264
1265	log->count = 0;
1266
1267	/*
1268	 * initialize log i/o
1269	 */
1270	if ((rc = lbmLogInit(log)))
1271		return rc;
1272
1273	if (!test_bit(log_INLINELOG, &log->flag))
1274		log->l2bsize = L2LOGPSIZE;
1275
1276	/* check for disabled journaling to disk */
1277	if (log->no_integrity) {
1278		/*
1279		 * Journal pages will still be filled.  When the time comes
1280		 * to actually do the I/O, the write is not done, and the
1281		 * endio routine is called directly.
1282		 */
1283		bp = lbmAllocate(log , 0);
1284		log->bp = bp;
1285		bp->l_pn = bp->l_eor = 0;
1286	} else {
1287		/*
1288		 * validate log superblock
1289		 */
1290		if ((rc = lbmRead(log, 1, &bpsuper)))
1291			goto errout10;
1292
1293		logsuper = (struct logsuper *) bpsuper->l_ldata;
1294
1295		if (logsuper->magic != cpu_to_le32(LOGMAGIC)) {
1296			jfs_warn("*** Log Format Error ! ***");
1297			rc = -EINVAL;
1298			goto errout20;
1299		}
1300
1301		/* logredo() should have been run successfully. */
1302		if (logsuper->state != cpu_to_le32(LOGREDONE)) {
1303			jfs_warn("*** Log Is Dirty ! ***");
1304			rc = -EINVAL;
1305			goto errout20;
1306		}
1307
1308		/* initialize log from log superblock */
1309		if (test_bit(log_INLINELOG,&log->flag)) {
1310			if (log->size != le32_to_cpu(logsuper->size)) {
1311				rc = -EINVAL;
1312				goto errout20;
1313			}
1314			jfs_info("lmLogInit: inline log:0x%p base:0x%Lx size:0x%x",
1315				 log, (unsigned long long)log->base, log->size);
1316		} else {
1317			if (!uuid_equal(&logsuper->uuid, &log->uuid)) {
1318				jfs_warn("wrong uuid on JFS log device");
1319				rc = -EINVAL;
1320				goto errout20;
1321			}
1322			log->size = le32_to_cpu(logsuper->size);
1323			log->l2bsize = le32_to_cpu(logsuper->l2bsize);
1324			jfs_info("lmLogInit: external log:0x%p base:0x%Lx size:0x%x",
1325				 log, (unsigned long long)log->base, log->size);
1326		}
1327
1328		log->page = le32_to_cpu(logsuper->end) / LOGPSIZE;
1329		log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page);
1330
1331		/*
1332		 * initialize for log append write mode
1333		 */
1334		/* establish current/end-of-log page/buffer */
1335		if ((rc = lbmRead(log, log->page, &bp)))
1336			goto errout20;
1337
1338		lp = (struct logpage *) bp->l_ldata;
1339
1340		jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d",
1341			 le32_to_cpu(logsuper->end), log->page, log->eor,
1342			 le16_to_cpu(lp->h.eor));
1343
1344		log->bp = bp;
1345		bp->l_pn = log->page;
1346		bp->l_eor = log->eor;
1347
1348		/* if current page is full, move on to next page */
1349		if (log->eor >= LOGPSIZE - LOGPTLRSIZE)
1350			lmNextPage(log);
1351
1352		/*
1353		 * initialize log syncpoint
1354		 */
1355		/*
1356		 * write the first SYNCPT record with syncpoint = 0
1357		 * (i.e., log redo up to HERE !);
1358		 * remove current page from lbm write queue at end of pageout
1359		 * (to write log superblock update), but do not release to
1360		 * freelist;
1361		 */
1362		lrd.logtid = 0;
1363		lrd.backchain = 0;
1364		lrd.type = cpu_to_le16(LOG_SYNCPT);
1365		lrd.length = 0;
1366		lrd.log.syncpt.sync = 0;
1367		lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1368		bp = log->bp;
1369		bp->l_ceor = bp->l_eor;
1370		lp = (struct logpage *) bp->l_ldata;
1371		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1372		lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0);
1373		if ((rc = lbmIOWait(bp, 0)))
1374			goto errout30;
1375
1376		/*
1377		 * update/write superblock
1378		 */
1379		logsuper->state = cpu_to_le32(LOGMOUNT);
1380		log->serial = le32_to_cpu(logsuper->serial) + 1;
1381		logsuper->serial = cpu_to_le32(log->serial);
1382		lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1383		if ((rc = lbmIOWait(bpsuper, lbmFREE)))
1384			goto errout30;
1385	}
1386
1387	/* initialize logsync parameters */
1388	log->logsize = (log->size - 2) << L2LOGPSIZE;
1389	log->lsn = lsn;
1390	log->syncpt = lsn;
1391	log->sync = log->syncpt;
1392	log->nextsync = LOGSYNC_DELTA(log->logsize);
1393
1394	jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x",
1395		 log->lsn, log->syncpt, log->sync);
1396
1397	/*
1398	 * initialize for lazy/group commit
1399	 */
1400	log->clsn = lsn;
1401
1402	return 0;
1403
1404	/*
1405	 *	unwind on error
1406	 */
1407      errout30:		/* release log page */
1408	log->wqueue = NULL;
1409	bp->l_wqnext = NULL;
1410	lbmFree(bp);
1411
1412      errout20:		/* release log superblock */
1413	lbmFree(bpsuper);
1414
1415      errout10:		/* unwind lbmLogInit() */
1416	lbmLogShutdown(log);
1417
1418	jfs_warn("lmLogInit: exit(%d)", rc);
1419	return rc;
1420}
1421
1422
1423/*
1424 * NAME:	lmLogClose()
1425 *
1426 * FUNCTION:	remove file system <ipmnt> from active list of log <iplog>
1427 *		and close it on last close.
1428 *
1429 * PARAMETER:	sb	- superblock
1430 *
1431 * RETURN:	errors from subroutines
1432 *
1433 * serialization:
1434 */
1435int lmLogClose(struct super_block *sb)
1436{
1437	struct jfs_sb_info *sbi = JFS_SBI(sb);
1438	struct jfs_log *log = sbi->log;
1439	struct block_device *bdev;
1440	int rc = 0;
1441
1442	jfs_info("lmLogClose: log:0x%p", log);
1443
1444	mutex_lock(&jfs_log_mutex);
1445	LOG_LOCK(log);
1446	list_del(&sbi->log_list);
1447	LOG_UNLOCK(log);
1448	sbi->log = NULL;
1449
1450	/*
1451	 * We need to make sure all of the "written" metapages
1452	 * actually make it to disk
1453	 */
1454	sync_blockdev(sb->s_bdev);
1455
1456	if (test_bit(log_INLINELOG, &log->flag)) {
1457		/*
1458		 *	in-line log in host file system
1459		 */
1460		rc = lmLogShutdown(log);
1461		kfree(log);
1462		goto out;
1463	}
1464
1465	if (!log->no_integrity)
1466		lmLogFileSystem(log, sbi, 0);
1467
1468	if (!list_empty(&log->sb_list))
1469		goto out;
1470
1471	/*
1472	 * TODO: ensure that the dummy_log is in a state to allow
1473	 * lbmLogShutdown to deallocate all the buffers and call
1474	 * kfree against dummy_log.  For now, leave dummy_log & its
1475	 * buffers in memory, and resuse if another no-integrity mount
1476	 * is requested.
1477	 */
1478	if (log->no_integrity)
1479		goto out;
1480
1481	/*
1482	 *	external log as separate logical volume
1483	 */
1484	list_del(&log->journal_list);
1485	bdev = log->bdev;
1486	rc = lmLogShutdown(log);
1487
1488	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1489
1490	kfree(log);
1491
1492      out:
1493	mutex_unlock(&jfs_log_mutex);
1494	jfs_info("lmLogClose: exit(%d)", rc);
1495	return rc;
1496}
1497
1498
1499/*
1500 * NAME:	jfs_flush_journal()
1501 *
1502 * FUNCTION:	initiate write of any outstanding transactions to the journal
1503 *		and optionally wait until they are all written to disk
1504 *
1505 *		wait == 0  flush until latest txn is committed, don't wait
1506 *		wait == 1  flush until latest txn is committed, wait
1507 *		wait > 1   flush until all txn's are complete, wait
1508 */
1509void jfs_flush_journal(struct jfs_log *log, int wait)
1510{
1511	int i;
1512	struct tblock *target = NULL;
1513
1514	/* jfs_write_inode may call us during read-only mount */
1515	if (!log)
1516		return;
1517
1518	jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait);
1519
1520	LOGGC_LOCK(log);
1521
1522	if (!list_empty(&log->cqueue)) {
1523		/*
1524		 * This ensures that we will keep writing to the journal as long
1525		 * as there are unwritten commit records
1526		 */
1527		target = list_entry(log->cqueue.prev, struct tblock, cqueue);
1528
1529		if (test_bit(log_FLUSH, &log->flag)) {
1530			/*
1531			 * We're already flushing.
1532			 * if flush_tblk is NULL, we are flushing everything,
1533			 * so leave it that way.  Otherwise, update it to the
1534			 * latest transaction
1535			 */
1536			if (log->flush_tblk)
1537				log->flush_tblk = target;
1538		} else {
1539			/* Only flush until latest transaction is committed */
1540			log->flush_tblk = target;
1541			set_bit(log_FLUSH, &log->flag);
1542
1543			/*
1544			 * Initiate I/O on outstanding transactions
1545			 */
1546			if (!(log->cflag & logGC_PAGEOUT)) {
1547				log->cflag |= logGC_PAGEOUT;
1548				lmGCwrite(log, 0);
1549			}
1550		}
1551	}
1552	if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) {
1553		/* Flush until all activity complete */
1554		set_bit(log_FLUSH, &log->flag);
1555		log->flush_tblk = NULL;
1556	}
1557
1558	if (wait && target && !(target->flag & tblkGC_COMMITTED)) {
1559		DECLARE_WAITQUEUE(__wait, current);
1560
1561		add_wait_queue(&target->gcwait, &__wait);
1562		set_current_state(TASK_UNINTERRUPTIBLE);
1563		LOGGC_UNLOCK(log);
1564		schedule();
1565		LOGGC_LOCK(log);
1566		remove_wait_queue(&target->gcwait, &__wait);
1567	}
1568	LOGGC_UNLOCK(log);
1569
1570	if (wait < 2)
1571		return;
1572
1573	write_special_inodes(log, filemap_fdatawrite);
1574
1575	/*
1576	 * If there was recent activity, we may need to wait
1577	 * for the lazycommit thread to catch up
1578	 */
1579	if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) {
1580		for (i = 0; i < 200; i++) {	/* Too much? */
1581			msleep(250);
1582			write_special_inodes(log, filemap_fdatawrite);
1583			if (list_empty(&log->cqueue) &&
1584			    list_empty(&log->synclist))
1585				break;
1586		}
1587	}
1588	assert(list_empty(&log->cqueue));
1589
1590#ifdef CONFIG_JFS_DEBUG
1591	if (!list_empty(&log->synclist)) {
1592		struct logsyncblk *lp;
1593
1594		printk(KERN_ERR "jfs_flush_journal: synclist not empty\n");
1595		list_for_each_entry(lp, &log->synclist, synclist) {
1596			if (lp->xflag & COMMIT_PAGE) {
1597				struct metapage *mp = (struct metapage *)lp;
1598				print_hex_dump(KERN_ERR, "metapage: ",
1599					       DUMP_PREFIX_ADDRESS, 16, 4,
1600					       mp, sizeof(struct metapage), 0);
1601				print_hex_dump(KERN_ERR, "page: ",
1602					       DUMP_PREFIX_ADDRESS, 16,
1603					       sizeof(long), mp->page,
1604					       sizeof(struct page), 0);
1605			} else
1606				print_hex_dump(KERN_ERR, "tblock:",
1607					       DUMP_PREFIX_ADDRESS, 16, 4,
1608					       lp, sizeof(struct tblock), 0);
1609		}
1610	}
1611#else
1612	WARN_ON(!list_empty(&log->synclist));
1613#endif
1614	clear_bit(log_FLUSH, &log->flag);
1615}
1616
1617/*
1618 * NAME:	lmLogShutdown()
1619 *
1620 * FUNCTION:	log shutdown at last LogClose().
1621 *
1622 *		write log syncpt record.
1623 *		update super block to set redone flag to 0.
1624 *
1625 * PARAMETER:	log	- log inode
1626 *
1627 * RETURN:	0	- success
1628 *
1629 * serialization: single last close thread
1630 */
1631int lmLogShutdown(struct jfs_log * log)
1632{
1633	int rc;
1634	struct lrd lrd;
1635	int lsn;
1636	struct logsuper *logsuper;
1637	struct lbuf *bpsuper;
1638	struct lbuf *bp;
1639	struct logpage *lp;
1640
1641	jfs_info("lmLogShutdown: log:0x%p", log);
1642
1643	jfs_flush_journal(log, 2);
1644
1645	/*
1646	 * write the last SYNCPT record with syncpoint = 0
1647	 * (i.e., log redo up to HERE !)
1648	 */
1649	lrd.logtid = 0;
1650	lrd.backchain = 0;
1651	lrd.type = cpu_to_le16(LOG_SYNCPT);
1652	lrd.length = 0;
1653	lrd.log.syncpt.sync = 0;
1654
1655	lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1656	bp = log->bp;
1657	lp = (struct logpage *) bp->l_ldata;
1658	lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1659	lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0);
1660	lbmIOWait(log->bp, lbmFREE);
1661	log->bp = NULL;
1662
1663	/*
1664	 * synchronous update log superblock
1665	 * mark log state as shutdown cleanly
1666	 * (i.e., Log does not need to be replayed).
1667	 */
1668	if ((rc = lbmRead(log, 1, &bpsuper)))
1669		goto out;
1670
1671	logsuper = (struct logsuper *) bpsuper->l_ldata;
1672	logsuper->state = cpu_to_le32(LOGREDONE);
1673	logsuper->end = cpu_to_le32(lsn);
1674	lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1675	rc = lbmIOWait(bpsuper, lbmFREE);
1676
1677	jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d",
1678		 lsn, log->page, log->eor);
1679
1680      out:
1681	/*
1682	 * shutdown per log i/o
1683	 */
1684	lbmLogShutdown(log);
1685
1686	if (rc) {
1687		jfs_warn("lmLogShutdown: exit(%d)", rc);
1688	}
1689	return rc;
1690}
1691
1692
1693/*
1694 * NAME:	lmLogFileSystem()
1695 *
1696 * FUNCTION:	insert (<activate> = true)/remove (<activate> = false)
1697 *	file system into/from log active file system list.
1698 *
1699 * PARAMETE:	log	- pointer to logs inode.
1700 *		fsdev	- kdev_t of filesystem.
1701 *		serial	- pointer to returned log serial number
1702 *		activate - insert/remove device from active list.
1703 *
1704 * RETURN:	0	- success
1705 *		errors returned by vms_iowait().
1706 */
1707static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
1708			   int activate)
1709{
1710	int rc = 0;
1711	int i;
1712	struct logsuper *logsuper;
1713	struct lbuf *bpsuper;
1714	uuid_t *uuid = &sbi->uuid;
1715
1716	/*
1717	 * insert/remove file system device to log active file system list.
1718	 */
1719	if ((rc = lbmRead(log, 1, &bpsuper)))
1720		return rc;
1721
1722	logsuper = (struct logsuper *) bpsuper->l_ldata;
1723	if (activate) {
1724		for (i = 0; i < MAX_ACTIVE; i++)
1725			if (uuid_is_null(&logsuper->active[i].uuid)) {
1726				uuid_copy(&logsuper->active[i].uuid, uuid);
1727				sbi->aggregate = i;
1728				break;
1729			}
1730		if (i == MAX_ACTIVE) {
1731			jfs_warn("Too many file systems sharing journal!");
1732			lbmFree(bpsuper);
1733			return -EMFILE;	/* Is there a better rc? */
1734		}
1735	} else {
1736		for (i = 0; i < MAX_ACTIVE; i++)
1737			if (uuid_equal(&logsuper->active[i].uuid, uuid)) {
1738				uuid_copy(&logsuper->active[i].uuid,
1739					  &uuid_null);
1740				break;
1741			}
1742		if (i == MAX_ACTIVE) {
1743			jfs_warn("Somebody stomped on the journal!");
1744			lbmFree(bpsuper);
1745			return -EIO;
1746		}
1747
1748	}
1749
1750	/*
1751	 * synchronous write log superblock:
1752	 *
1753	 * write sidestream bypassing write queue:
1754	 * at file system mount, log super block is updated for
1755	 * activation of the file system before any log record
1756	 * (MOUNT record) of the file system, and at file system
1757	 * unmount, all meta data for the file system has been
1758	 * flushed before log super block is updated for deactivation
1759	 * of the file system.
1760	 */
1761	lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1762	rc = lbmIOWait(bpsuper, lbmFREE);
1763
1764	return rc;
1765}
1766
1767/*
1768 *		log buffer manager (lbm)
1769 *		------------------------
1770 *
1771 * special purpose buffer manager supporting log i/o requirements.
1772 *
1773 * per log write queue:
1774 * log pageout occurs in serial order by fifo write queue and
1775 * restricting to a single i/o in pregress at any one time.
1776 * a circular singly-linked list
1777 * (log->wrqueue points to the tail, and buffers are linked via
1778 * bp->wrqueue field), and
1779 * maintains log page in pageout ot waiting for pageout in serial pageout.
1780 */
1781
1782/*
1783 *	lbmLogInit()
1784 *
1785 * initialize per log I/O setup at lmLogInit()
1786 */
1787static int lbmLogInit(struct jfs_log * log)
1788{				/* log inode */
1789	int i;
1790	struct lbuf *lbuf;
1791
1792	jfs_info("lbmLogInit: log:0x%p", log);
1793
1794	/* initialize current buffer cursor */
1795	log->bp = NULL;
1796
1797	/* initialize log device write queue */
1798	log->wqueue = NULL;
1799
1800	/*
1801	 * Each log has its own buffer pages allocated to it.  These are
1802	 * not managed by the page cache.  This ensures that a transaction
1803	 * writing to the log does not block trying to allocate a page from
1804	 * the page cache (for the log).  This would be bad, since page
1805	 * allocation waits on the kswapd thread that may be committing inodes
1806	 * which would cause log activity.  Was that clear?  I'm trying to
1807	 * avoid deadlock here.
1808	 */
1809	init_waitqueue_head(&log->free_wait);
1810
1811	log->lbuf_free = NULL;
1812
1813	for (i = 0; i < LOGPAGES;) {
1814		char *buffer;
1815		uint offset;
1816		struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
1817
1818		if (!page)
1819			goto error;
1820		buffer = page_address(page);
1821		for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) {
1822			lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL);
1823			if (lbuf == NULL) {
1824				if (offset == 0)
1825					__free_page(page);
1826				goto error;
1827			}
1828			if (offset) /* we already have one reference */
1829				get_page(page);
1830			lbuf->l_offset = offset;
1831			lbuf->l_ldata = buffer + offset;
1832			lbuf->l_page = page;
1833			lbuf->l_log = log;
1834			init_waitqueue_head(&lbuf->l_ioevent);
1835
1836			lbuf->l_freelist = log->lbuf_free;
1837			log->lbuf_free = lbuf;
1838			i++;
1839		}
1840	}
1841
1842	return (0);
1843
1844      error:
1845	lbmLogShutdown(log);
1846	return -ENOMEM;
1847}
1848
1849
1850/*
1851 *	lbmLogShutdown()
1852 *
1853 * finalize per log I/O setup at lmLogShutdown()
1854 */
1855static void lbmLogShutdown(struct jfs_log * log)
1856{
1857	struct lbuf *lbuf;
1858
1859	jfs_info("lbmLogShutdown: log:0x%p", log);
1860
1861	lbuf = log->lbuf_free;
1862	while (lbuf) {
1863		struct lbuf *next = lbuf->l_freelist;
1864		__free_page(lbuf->l_page);
1865		kfree(lbuf);
1866		lbuf = next;
1867	}
1868}
1869
1870
1871/*
1872 *	lbmAllocate()
1873 *
1874 * allocate an empty log buffer
1875 */
1876static struct lbuf *lbmAllocate(struct jfs_log * log, int pn)
1877{
1878	struct lbuf *bp;
1879	unsigned long flags;
1880
1881	/*
1882	 * recycle from log buffer freelist if any
1883	 */
1884	LCACHE_LOCK(flags);
1885	LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags);
1886	log->lbuf_free = bp->l_freelist;
1887	LCACHE_UNLOCK(flags);
1888
1889	bp->l_flag = 0;
1890
1891	bp->l_wqnext = NULL;
1892	bp->l_freelist = NULL;
1893
1894	bp->l_pn = pn;
1895	bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize));
1896	bp->l_ceor = 0;
1897
1898	return bp;
1899}
1900
1901
1902/*
1903 *	lbmFree()
1904 *
1905 * release a log buffer to freelist
1906 */
1907static void lbmFree(struct lbuf * bp)
1908{
1909	unsigned long flags;
1910
1911	LCACHE_LOCK(flags);
1912
1913	lbmfree(bp);
1914
1915	LCACHE_UNLOCK(flags);
1916}
1917
1918static void lbmfree(struct lbuf * bp)
1919{
1920	struct jfs_log *log = bp->l_log;
1921
1922	assert(bp->l_wqnext == NULL);
1923
1924	/*
1925	 * return the buffer to head of freelist
1926	 */
1927	bp->l_freelist = log->lbuf_free;
1928	log->lbuf_free = bp;
1929
1930	wake_up(&log->free_wait);
1931	return;
1932}
1933
1934
1935/*
1936 * NAME:	lbmRedrive
1937 *
1938 * FUNCTION:	add a log buffer to the log redrive list
1939 *
1940 * PARAMETER:
1941 *	bp	- log buffer
1942 *
1943 * NOTES:
1944 *	Takes log_redrive_lock.
1945 */
1946static inline void lbmRedrive(struct lbuf *bp)
1947{
1948	unsigned long flags;
1949
1950	spin_lock_irqsave(&log_redrive_lock, flags);
1951	bp->l_redrive_next = log_redrive_list;
1952	log_redrive_list = bp;
1953	spin_unlock_irqrestore(&log_redrive_lock, flags);
1954
1955	wake_up_process(jfsIOthread);
1956}
1957
1958
1959/*
1960 *	lbmRead()
1961 */
1962static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
1963{
1964	struct bio *bio;
1965	struct lbuf *bp;
1966
1967	/*
1968	 * allocate a log buffer
1969	 */
1970	*bpp = bp = lbmAllocate(log, pn);
1971	jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn);
1972
1973	bp->l_flag |= lbmREAD;
1974
1975	bio = bio_alloc(log->bdev, 1, REQ_OP_READ, GFP_NOFS);
 
1976	bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
 
 
1977	bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
1978	BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
1979
1980	bio->bi_end_io = lbmIODone;
1981	bio->bi_private = bp;
 
1982	/*check if journaling to disk has been disabled*/
1983	if (log->no_integrity) {
1984		bio->bi_iter.bi_size = 0;
1985		lbmIODone(bio);
1986	} else {
1987		submit_bio(bio);
1988	}
1989
1990	wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
1991
1992	return 0;
1993}
1994
1995
1996/*
1997 *	lbmWrite()
1998 *
1999 * buffer at head of pageout queue stays after completion of
2000 * partial-page pageout and redriven by explicit initiation of
2001 * pageout by caller until full-page pageout is completed and
2002 * released.
2003 *
2004 * device driver i/o done redrives pageout of new buffer at
2005 * head of pageout queue when current buffer at head of pageout
2006 * queue is released at the completion of its full-page pageout.
2007 *
2008 * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit().
2009 * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone()
2010 */
2011static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
2012		     int cant_block)
2013{
2014	struct lbuf *tail;
2015	unsigned long flags;
2016
2017	jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn);
2018
2019	/* map the logical block address to physical block address */
2020	bp->l_blkno =
2021	    log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2022
2023	LCACHE_LOCK(flags);		/* disable+lock */
2024
2025	/*
2026	 * initialize buffer for device driver
2027	 */
2028	bp->l_flag = flag;
2029
2030	/*
2031	 *	insert bp at tail of write queue associated with log
2032	 *
2033	 * (request is either for bp already/currently at head of queue
2034	 * or new bp to be inserted at tail)
2035	 */
2036	tail = log->wqueue;
2037
2038	/* is buffer not already on write queue ? */
2039	if (bp->l_wqnext == NULL) {
2040		/* insert at tail of wqueue */
2041		if (tail == NULL) {
2042			log->wqueue = bp;
2043			bp->l_wqnext = bp;
2044		} else {
2045			log->wqueue = bp;
2046			bp->l_wqnext = tail->l_wqnext;
2047			tail->l_wqnext = bp;
2048		}
2049
2050		tail = bp;
2051	}
2052
2053	/* is buffer at head of wqueue and for write ? */
2054	if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) {
2055		LCACHE_UNLOCK(flags);	/* unlock+enable */
2056		return;
2057	}
2058
2059	LCACHE_UNLOCK(flags);	/* unlock+enable */
2060
2061	if (cant_block)
2062		lbmRedrive(bp);
2063	else if (flag & lbmSYNC)
2064		lbmStartIO(bp);
2065	else {
2066		LOGGC_UNLOCK(log);
2067		lbmStartIO(bp);
2068		LOGGC_LOCK(log);
2069	}
2070}
2071
2072
2073/*
2074 *	lbmDirectWrite()
2075 *
2076 * initiate pageout bypassing write queue for sidestream
2077 * (e.g., log superblock) write;
2078 */
2079static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
2080{
2081	jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x",
2082		 bp, flag, bp->l_pn);
2083
2084	/*
2085	 * initialize buffer for device driver
2086	 */
2087	bp->l_flag = flag | lbmDIRECT;
2088
2089	/* map the logical block address to physical block address */
2090	bp->l_blkno =
2091	    log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2092
2093	/*
2094	 *	initiate pageout of the page
2095	 */
2096	lbmStartIO(bp);
2097}
2098
2099
2100/*
2101 * NAME:	lbmStartIO()
2102 *
2103 * FUNCTION:	Interface to DD strategy routine
2104 *
2105 * RETURN:	none
2106 *
2107 * serialization: LCACHE_LOCK() is NOT held during log i/o;
2108 */
2109static void lbmStartIO(struct lbuf * bp)
2110{
2111	struct bio *bio;
2112	struct jfs_log *log = bp->l_log;
2113
2114	jfs_info("lbmStartIO");
2115
2116	bio = bio_alloc(log->bdev, 1, REQ_OP_WRITE | REQ_SYNC, GFP_NOFS);
2117	bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
 
 
2118	bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
2119	BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
2120
2121	bio->bi_end_io = lbmIODone;
2122	bio->bi_private = bp;
 
2123
2124	/* check if journaling to disk has been disabled */
2125	if (log->no_integrity) {
2126		bio->bi_iter.bi_size = 0;
2127		lbmIODone(bio);
2128	} else {
2129		submit_bio(bio);
2130		INCREMENT(lmStat.submitted);
2131	}
2132}
2133
2134
2135/*
2136 *	lbmIOWait()
2137 */
2138static int lbmIOWait(struct lbuf * bp, int flag)
2139{
2140	unsigned long flags;
2141	int rc = 0;
2142
2143	jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2144
2145	LCACHE_LOCK(flags);		/* disable+lock */
2146
2147	LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags);
2148
2149	rc = (bp->l_flag & lbmERROR) ? -EIO : 0;
2150
2151	if (flag & lbmFREE)
2152		lbmfree(bp);
2153
2154	LCACHE_UNLOCK(flags);	/* unlock+enable */
2155
2156	jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2157	return rc;
2158}
2159
2160/*
2161 *	lbmIODone()
2162 *
2163 * executed at INTIODONE level
2164 */
2165static void lbmIODone(struct bio *bio)
2166{
2167	struct lbuf *bp = bio->bi_private;
2168	struct lbuf *nextbp, *tail;
2169	struct jfs_log *log;
2170	unsigned long flags;
2171
2172	/*
2173	 * get back jfs buffer bound to the i/o buffer
2174	 */
2175	jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag);
2176
2177	LCACHE_LOCK(flags);		/* disable+lock */
2178
2179	bp->l_flag |= lbmDONE;
2180
2181	if (bio->bi_status) {
2182		bp->l_flag |= lbmERROR;
2183
2184		jfs_err("lbmIODone: I/O error in JFS log");
2185	}
2186
2187	bio_put(bio);
2188
2189	/*
2190	 *	pagein completion
2191	 */
2192	if (bp->l_flag & lbmREAD) {
2193		bp->l_flag &= ~lbmREAD;
2194
2195		LCACHE_UNLOCK(flags);	/* unlock+enable */
2196
2197		/* wakeup I/O initiator */
2198		LCACHE_WAKEUP(&bp->l_ioevent);
2199
2200		return;
2201	}
2202
2203	/*
2204	 *	pageout completion
2205	 *
2206	 * the bp at the head of write queue has completed pageout.
2207	 *
2208	 * if single-commit/full-page pageout, remove the current buffer
2209	 * from head of pageout queue, and redrive pageout with
2210	 * the new buffer at head of pageout queue;
2211	 * otherwise, the partial-page pageout buffer stays at
2212	 * the head of pageout queue to be redriven for pageout
2213	 * by lmGroupCommit() until full-page pageout is completed.
2214	 */
2215	bp->l_flag &= ~lbmWRITE;
2216	INCREMENT(lmStat.pagedone);
2217
2218	/* update committed lsn */
2219	log = bp->l_log;
2220	log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor;
2221
2222	if (bp->l_flag & lbmDIRECT) {
2223		LCACHE_WAKEUP(&bp->l_ioevent);
2224		LCACHE_UNLOCK(flags);
2225		return;
2226	}
2227
2228	tail = log->wqueue;
2229
2230	/* single element queue */
2231	if (bp == tail) {
2232		/* remove head buffer of full-page pageout
2233		 * from log device write queue
2234		 */
2235		if (bp->l_flag & lbmRELEASE) {
2236			log->wqueue = NULL;
2237			bp->l_wqnext = NULL;
2238		}
2239	}
2240	/* multi element queue */
2241	else {
2242		/* remove head buffer of full-page pageout
2243		 * from log device write queue
2244		 */
2245		if (bp->l_flag & lbmRELEASE) {
2246			nextbp = tail->l_wqnext = bp->l_wqnext;
2247			bp->l_wqnext = NULL;
2248
2249			/*
2250			 * redrive pageout of next page at head of write queue:
2251			 * redrive next page without any bound tblk
2252			 * (i.e., page w/o any COMMIT records), or
2253			 * first page of new group commit which has been
2254			 * queued after current page (subsequent pageout
2255			 * is performed synchronously, except page without
2256			 * any COMMITs) by lmGroupCommit() as indicated
2257			 * by lbmWRITE flag;
2258			 */
2259			if (nextbp->l_flag & lbmWRITE) {
2260				/*
2261				 * We can't do the I/O at interrupt time.
2262				 * The jfsIO thread can do it
2263				 */
2264				lbmRedrive(nextbp);
2265			}
2266		}
2267	}
2268
2269	/*
2270	 *	synchronous pageout:
2271	 *
2272	 * buffer has not necessarily been removed from write queue
2273	 * (e.g., synchronous write of partial-page with COMMIT):
2274	 * leave buffer for i/o initiator to dispose
2275	 */
2276	if (bp->l_flag & lbmSYNC) {
2277		LCACHE_UNLOCK(flags);	/* unlock+enable */
2278
2279		/* wakeup I/O initiator */
2280		LCACHE_WAKEUP(&bp->l_ioevent);
2281	}
2282
2283	/*
2284	 *	Group Commit pageout:
2285	 */
2286	else if (bp->l_flag & lbmGC) {
2287		LCACHE_UNLOCK(flags);
2288		lmPostGC(bp);
2289	}
2290
2291	/*
2292	 *	asynchronous pageout:
2293	 *
2294	 * buffer must have been removed from write queue:
2295	 * insert buffer at head of freelist where it can be recycled
2296	 */
2297	else {
2298		assert(bp->l_flag & lbmRELEASE);
2299		assert(bp->l_flag & lbmFREE);
2300		lbmfree(bp);
2301
2302		LCACHE_UNLOCK(flags);	/* unlock+enable */
2303	}
2304}
2305
2306int jfsIOWait(void *arg)
2307{
2308	struct lbuf *bp;
2309
2310	do {
2311		spin_lock_irq(&log_redrive_lock);
2312		while ((bp = log_redrive_list)) {
2313			log_redrive_list = bp->l_redrive_next;
2314			bp->l_redrive_next = NULL;
2315			spin_unlock_irq(&log_redrive_lock);
2316			lbmStartIO(bp);
2317			spin_lock_irq(&log_redrive_lock);
2318		}
2319
2320		if (freezing(current)) {
2321			spin_unlock_irq(&log_redrive_lock);
2322			try_to_freeze();
2323		} else {
2324			set_current_state(TASK_INTERRUPTIBLE);
2325			spin_unlock_irq(&log_redrive_lock);
2326			schedule();
2327		}
2328	} while (!kthread_should_stop());
2329
2330	jfs_info("jfsIOWait being killed!");
2331	return 0;
2332}
2333
2334/*
2335 * NAME:	lmLogFormat()/jfs_logform()
2336 *
2337 * FUNCTION:	format file system log
2338 *
2339 * PARAMETERS:
2340 *	log	- volume log
2341 *	logAddress - start address of log space in FS block
2342 *	logSize	- length of log space in FS block;
2343 *
2344 * RETURN:	0	- success
2345 *		-EIO	- i/o error
2346 *
2347 * XXX: We're synchronously writing one page at a time.  This needs to
2348 *	be improved by writing multiple pages at once.
2349 */
2350int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
2351{
2352	int rc = -EIO;
2353	struct jfs_sb_info *sbi;
2354	struct logsuper *logsuper;
2355	struct logpage *lp;
2356	int lspn;		/* log sequence page number */
2357	struct lrd *lrd_ptr;
2358	int npages = 0;
2359	struct lbuf *bp;
2360
2361	jfs_info("lmLogFormat: logAddress:%Ld logSize:%d",
2362		 (long long)logAddress, logSize);
2363
2364	sbi = list_entry(log->sb_list.next, struct jfs_sb_info, log_list);
2365
2366	/* allocate a log buffer */
2367	bp = lbmAllocate(log, 1);
2368
2369	npages = logSize >> sbi->l2nbperpage;
2370
2371	/*
2372	 *	log space:
2373	 *
2374	 * page 0 - reserved;
2375	 * page 1 - log superblock;
2376	 * page 2 - log data page: A SYNC log record is written
2377	 *	    into this page at logform time;
2378	 * pages 3-N - log data page: set to empty log data pages;
2379	 */
2380	/*
2381	 *	init log superblock: log page 1
2382	 */
2383	logsuper = (struct logsuper *) bp->l_ldata;
2384
2385	logsuper->magic = cpu_to_le32(LOGMAGIC);
2386	logsuper->version = cpu_to_le32(LOGVERSION);
2387	logsuper->state = cpu_to_le32(LOGREDONE);
2388	logsuper->flag = cpu_to_le32(sbi->mntflag);	/* ? */
2389	logsuper->size = cpu_to_le32(npages);
2390	logsuper->bsize = cpu_to_le32(sbi->bsize);
2391	logsuper->l2bsize = cpu_to_le32(sbi->l2bsize);
2392	logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE);
2393
2394	bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2395	bp->l_blkno = logAddress + sbi->nbperpage;
2396	lbmStartIO(bp);
2397	if ((rc = lbmIOWait(bp, 0)))
2398		goto exit;
2399
2400	/*
2401	 *	init pages 2 to npages-1 as log data pages:
2402	 *
2403	 * log page sequence number (lpsn) initialization:
2404	 *
2405	 * pn:   0     1     2     3                 n-1
2406	 *       +-----+-----+=====+=====+===.....===+=====+
2407	 * lspn:             N-1   0     1           N-2
2408	 *                   <--- N page circular file ---->
2409	 *
2410	 * the N (= npages-2) data pages of the log is maintained as
2411	 * a circular file for the log records;
2412	 * lpsn grows by 1 monotonically as each log page is written
2413	 * to the circular file of the log;
2414	 * and setLogpage() will not reset the page number even if
2415	 * the eor is equal to LOGPHDRSIZE. In order for binary search
2416	 * still work in find log end process, we have to simulate the
2417	 * log wrap situation at the log format time.
2418	 * The 1st log page written will have the highest lpsn. Then
2419	 * the succeeding log pages will have ascending order of
2420	 * the lspn starting from 0, ... (N-2)
2421	 */
2422	lp = (struct logpage *) bp->l_ldata;
2423	/*
2424	 * initialize 1st log page to be written: lpsn = N - 1,
2425	 * write a SYNCPT log record is written to this page
2426	 */
2427	lp->h.page = lp->t.page = cpu_to_le32(npages - 3);
2428	lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE);
2429
2430	lrd_ptr = (struct lrd *) &lp->data;
2431	lrd_ptr->logtid = 0;
2432	lrd_ptr->backchain = 0;
2433	lrd_ptr->type = cpu_to_le16(LOG_SYNCPT);
2434	lrd_ptr->length = 0;
2435	lrd_ptr->log.syncpt.sync = 0;
2436
2437	bp->l_blkno += sbi->nbperpage;
2438	bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2439	lbmStartIO(bp);
2440	if ((rc = lbmIOWait(bp, 0)))
2441		goto exit;
2442
2443	/*
2444	 *	initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
2445	 */
2446	for (lspn = 0; lspn < npages - 3; lspn++) {
2447		lp->h.page = lp->t.page = cpu_to_le32(lspn);
2448		lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
2449
2450		bp->l_blkno += sbi->nbperpage;
2451		bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2452		lbmStartIO(bp);
2453		if ((rc = lbmIOWait(bp, 0)))
2454			goto exit;
2455	}
2456
2457	rc = 0;
2458exit:
2459	/*
2460	 *	finalize log
2461	 */
2462	/* release the buffer */
2463	lbmFree(bp);
2464
2465	return rc;
2466}
2467
2468#ifdef CONFIG_JFS_STATISTICS
2469int jfs_lmstats_proc_show(struct seq_file *m, void *v)
2470{
2471	seq_printf(m,
2472		       "JFS Logmgr stats\n"
2473		       "================\n"
2474		       "commits = %d\n"
2475		       "writes submitted = %d\n"
2476		       "writes completed = %d\n"
2477		       "full pages submitted = %d\n"
2478		       "partial pages submitted = %d\n",
2479		       lmStat.commit,
2480		       lmStat.submitted,
2481		       lmStat.pagedone,
2482		       lmStat.full_page,
2483		       lmStat.partial_page);
2484	return 0;
2485}
2486#endif /* CONFIG_JFS_STATISTICS */
v5.4
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *   Copyright (C) International Business Machines Corp., 2000-2004
   4 *   Portions Copyright (C) Christoph Hellwig, 2001-2002
   5 */
   6
   7/*
   8 *	jfs_logmgr.c: log manager
   9 *
  10 * for related information, see transaction manager (jfs_txnmgr.c), and
  11 * recovery manager (jfs_logredo.c).
  12 *
  13 * note: for detail, RTFS.
  14 *
  15 *	log buffer manager:
  16 * special purpose buffer manager supporting log i/o requirements.
  17 * per log serial pageout of logpage
  18 * queuing i/o requests and redrive i/o at iodone
  19 * maintain current logpage buffer
  20 * no caching since append only
  21 * appropriate jfs buffer cache buffers as needed
  22 *
  23 *	group commit:
  24 * transactions which wrote COMMIT records in the same in-memory
  25 * log page during the pageout of previous/current log page(s) are
  26 * committed together by the pageout of the page.
  27 *
  28 *	TBD lazy commit:
  29 * transactions are committed asynchronously when the log page
  30 * containing it COMMIT is paged out when it becomes full;
  31 *
  32 *	serialization:
  33 * . a per log lock serialize log write.
  34 * . a per log lock serialize group commit.
  35 * . a per log lock serialize log open/close;
  36 *
  37 *	TBD log integrity:
  38 * careful-write (ping-pong) of last logpage to recover from crash
  39 * in overwrite.
  40 * detection of split (out-of-order) write of physical sectors
  41 * of last logpage via timestamp at end of each sector
  42 * with its mirror data array at trailer).
  43 *
  44 *	alternatives:
  45 * lsn - 64-bit monotonically increasing integer vs
  46 * 32-bit lspn and page eor.
  47 */
  48
  49#include <linux/fs.h>
  50#include <linux/blkdev.h>
  51#include <linux/interrupt.h>
  52#include <linux/completion.h>
  53#include <linux/kthread.h>
  54#include <linux/buffer_head.h>		/* for sync_blockdev() */
  55#include <linux/bio.h>
  56#include <linux/freezer.h>
  57#include <linux/export.h>
  58#include <linux/delay.h>
  59#include <linux/mutex.h>
  60#include <linux/seq_file.h>
  61#include <linux/slab.h>
  62#include "jfs_incore.h"
  63#include "jfs_filsys.h"
  64#include "jfs_metapage.h"
  65#include "jfs_superblock.h"
  66#include "jfs_txnmgr.h"
  67#include "jfs_debug.h"
  68
  69
  70/*
  71 * lbuf's ready to be redriven.  Protected by log_redrive_lock (jfsIO thread)
  72 */
  73static struct lbuf *log_redrive_list;
  74static DEFINE_SPINLOCK(log_redrive_lock);
  75
  76
  77/*
  78 *	log read/write serialization (per log)
  79 */
  80#define LOG_LOCK_INIT(log)	mutex_init(&(log)->loglock)
  81#define LOG_LOCK(log)		mutex_lock(&((log)->loglock))
  82#define LOG_UNLOCK(log)		mutex_unlock(&((log)->loglock))
  83
  84
  85/*
  86 *	log group commit serialization (per log)
  87 */
  88
  89#define LOGGC_LOCK_INIT(log)	spin_lock_init(&(log)->gclock)
  90#define LOGGC_LOCK(log)		spin_lock_irq(&(log)->gclock)
  91#define LOGGC_UNLOCK(log)	spin_unlock_irq(&(log)->gclock)
  92#define LOGGC_WAKEUP(tblk)	wake_up_all(&(tblk)->gcwait)
  93
  94/*
  95 *	log sync serialization (per log)
  96 */
  97#define	LOGSYNC_DELTA(logsize)		min((logsize)/8, 128*LOGPSIZE)
  98#define	LOGSYNC_BARRIER(logsize)	((logsize)/4)
  99/*
 100#define	LOGSYNC_DELTA(logsize)		min((logsize)/4, 256*LOGPSIZE)
 101#define	LOGSYNC_BARRIER(logsize)	((logsize)/2)
 102*/
 103
 104
 105/*
 106 *	log buffer cache synchronization
 107 */
 108static DEFINE_SPINLOCK(jfsLCacheLock);
 109
 110#define	LCACHE_LOCK(flags)	spin_lock_irqsave(&jfsLCacheLock, flags)
 111#define	LCACHE_UNLOCK(flags)	spin_unlock_irqrestore(&jfsLCacheLock, flags)
 112
 113/*
 114 * See __SLEEP_COND in jfs_locks.h
 115 */
 116#define LCACHE_SLEEP_COND(wq, cond, flags)	\
 117do {						\
 118	if (cond)				\
 119		break;				\
 120	__SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \
 121} while (0)
 122
 123#define	LCACHE_WAKEUP(event)	wake_up(event)
 124
 125
 126/*
 127 *	lbuf buffer cache (lCache) control
 128 */
 129/* log buffer manager pageout control (cumulative, inclusive) */
 130#define	lbmREAD		0x0001
 131#define	lbmWRITE	0x0002	/* enqueue at tail of write queue;
 132				 * init pageout if at head of queue;
 133				 */
 134#define	lbmRELEASE	0x0004	/* remove from write queue
 135				 * at completion of pageout;
 136				 * do not free/recycle it yet:
 137				 * caller will free it;
 138				 */
 139#define	lbmSYNC		0x0008	/* do not return to freelist
 140				 * when removed from write queue;
 141				 */
 142#define lbmFREE		0x0010	/* return to freelist
 143				 * at completion of pageout;
 144				 * the buffer may be recycled;
 145				 */
 146#define	lbmDONE		0x0020
 147#define	lbmERROR	0x0040
 148#define lbmGC		0x0080	/* lbmIODone to perform post-GC processing
 149				 * of log page
 150				 */
 151#define lbmDIRECT	0x0100
 152
 153/*
 154 * Global list of active external journals
 155 */
 156static LIST_HEAD(jfs_external_logs);
 157static struct jfs_log *dummy_log;
 158static DEFINE_MUTEX(jfs_log_mutex);
 159
 160/*
 161 * forward references
 162 */
 163static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk,
 164			 struct lrd * lrd, struct tlock * tlck);
 165
 166static int lmNextPage(struct jfs_log * log);
 167static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
 168			   int activate);
 169
 170static int open_inline_log(struct super_block *sb);
 171static int open_dummy_log(struct super_block *sb);
 172static int lbmLogInit(struct jfs_log * log);
 173static void lbmLogShutdown(struct jfs_log * log);
 174static struct lbuf *lbmAllocate(struct jfs_log * log, int);
 175static void lbmFree(struct lbuf * bp);
 176static void lbmfree(struct lbuf * bp);
 177static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp);
 178static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block);
 179static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag);
 180static int lbmIOWait(struct lbuf * bp, int flag);
 181static bio_end_io_t lbmIODone;
 182static void lbmStartIO(struct lbuf * bp);
 183static void lmGCwrite(struct jfs_log * log, int cant_block);
 184static int lmLogSync(struct jfs_log * log, int hard_sync);
 185
 186
 187
 188/*
 189 *	statistics
 190 */
 191#ifdef CONFIG_JFS_STATISTICS
 192static struct lmStat {
 193	uint commit;		/* # of commit */
 194	uint pagedone;		/* # of page written */
 195	uint submitted;		/* # of pages submitted */
 196	uint full_page;		/* # of full pages submitted */
 197	uint partial_page;	/* # of partial pages submitted */
 198} lmStat;
 199#endif
 200
 201static void write_special_inodes(struct jfs_log *log,
 202				 int (*writer)(struct address_space *))
 203{
 204	struct jfs_sb_info *sbi;
 205
 206	list_for_each_entry(sbi, &log->sb_list, log_list) {
 207		writer(sbi->ipbmap->i_mapping);
 208		writer(sbi->ipimap->i_mapping);
 209		writer(sbi->direct_inode->i_mapping);
 210	}
 211}
 212
 213/*
 214 * NAME:	lmLog()
 215 *
 216 * FUNCTION:	write a log record;
 217 *
 218 * PARAMETER:
 219 *
 220 * RETURN:	lsn - offset to the next log record to write (end-of-log);
 221 *		-1  - error;
 222 *
 223 * note: todo: log error handler
 224 */
 225int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 226	  struct tlock * tlck)
 227{
 228	int lsn;
 229	int diffp, difft;
 230	struct metapage *mp = NULL;
 231	unsigned long flags;
 232
 233	jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p",
 234		 log, tblk, lrd, tlck);
 235
 236	LOG_LOCK(log);
 237
 238	/* log by (out-of-transaction) JFS ? */
 239	if (tblk == NULL)
 240		goto writeRecord;
 241
 242	/* log from page ? */
 243	if (tlck == NULL ||
 244	    tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL)
 245		goto writeRecord;
 246
 247	/*
 248	 *	initialize/update page/transaction recovery lsn
 249	 */
 250	lsn = log->lsn;
 251
 252	LOGSYNC_LOCK(log, flags);
 253
 254	/*
 255	 * initialize page lsn if first log write of the page
 256	 */
 257	if (mp->lsn == 0) {
 258		mp->log = log;
 259		mp->lsn = lsn;
 260		log->count++;
 261
 262		/* insert page at tail of logsynclist */
 263		list_add_tail(&mp->synclist, &log->synclist);
 264	}
 265
 266	/*
 267	 *	initialize/update lsn of tblock of the page
 268	 *
 269	 * transaction inherits oldest lsn of pages associated
 270	 * with allocation/deallocation of resources (their
 271	 * log records are used to reconstruct allocation map
 272	 * at recovery time: inode for inode allocation map,
 273	 * B+-tree index of extent descriptors for block
 274	 * allocation map);
 275	 * allocation map pages inherit transaction lsn at
 276	 * commit time to allow forwarding log syncpt past log
 277	 * records associated with allocation/deallocation of
 278	 * resources only after persistent map of these map pages
 279	 * have been updated and propagated to home.
 280	 */
 281	/*
 282	 * initialize transaction lsn:
 283	 */
 284	if (tblk->lsn == 0) {
 285		/* inherit lsn of its first page logged */
 286		tblk->lsn = mp->lsn;
 287		log->count++;
 288
 289		/* insert tblock after the page on logsynclist */
 290		list_add(&tblk->synclist, &mp->synclist);
 291	}
 292	/*
 293	 * update transaction lsn:
 294	 */
 295	else {
 296		/* inherit oldest/smallest lsn of page */
 297		logdiff(diffp, mp->lsn, log);
 298		logdiff(difft, tblk->lsn, log);
 299		if (diffp < difft) {
 300			/* update tblock lsn with page lsn */
 301			tblk->lsn = mp->lsn;
 302
 303			/* move tblock after page on logsynclist */
 304			list_move(&tblk->synclist, &mp->synclist);
 305		}
 306	}
 307
 308	LOGSYNC_UNLOCK(log, flags);
 309
 310	/*
 311	 *	write the log record
 312	 */
 313      writeRecord:
 314	lsn = lmWriteRecord(log, tblk, lrd, tlck);
 315
 316	/*
 317	 * forward log syncpt if log reached next syncpt trigger
 318	 */
 319	logdiff(diffp, lsn, log);
 320	if (diffp >= log->nextsync)
 321		lsn = lmLogSync(log, 0);
 322
 323	/* update end-of-log lsn */
 324	log->lsn = lsn;
 325
 326	LOG_UNLOCK(log);
 327
 328	/* return end-of-log address */
 329	return lsn;
 330}
 331
 332/*
 333 * NAME:	lmWriteRecord()
 334 *
 335 * FUNCTION:	move the log record to current log page
 336 *
 337 * PARAMETER:	cd	- commit descriptor
 338 *
 339 * RETURN:	end-of-log address
 340 *
 341 * serialization: LOG_LOCK() held on entry/exit
 342 */
 343static int
 344lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 345	      struct tlock * tlck)
 346{
 347	int lsn = 0;		/* end-of-log address */
 348	struct lbuf *bp;	/* dst log page buffer */
 349	struct logpage *lp;	/* dst log page */
 350	caddr_t dst;		/* destination address in log page */
 351	int dstoffset;		/* end-of-log offset in log page */
 352	int freespace;		/* free space in log page */
 353	caddr_t p;		/* src meta-data page */
 354	caddr_t src;
 355	int srclen;
 356	int nbytes;		/* number of bytes to move */
 357	int i;
 358	int len;
 359	struct linelock *linelock;
 360	struct lv *lv;
 361	struct lvd *lvd;
 362	int l2linesize;
 363
 364	len = 0;
 365
 366	/* retrieve destination log page to write */
 367	bp = (struct lbuf *) log->bp;
 368	lp = (struct logpage *) bp->l_ldata;
 369	dstoffset = log->eor;
 370
 371	/* any log data to write ? */
 372	if (tlck == NULL)
 373		goto moveLrd;
 374
 375	/*
 376	 *	move log record data
 377	 */
 378	/* retrieve source meta-data page to log */
 379	if (tlck->flag & tlckPAGELOCK) {
 380		p = (caddr_t) (tlck->mp->data);
 381		linelock = (struct linelock *) & tlck->lock;
 382	}
 383	/* retrieve source in-memory inode to log */
 384	else if (tlck->flag & tlckINODELOCK) {
 385		if (tlck->type & tlckDTREE)
 386			p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot;
 387		else
 388			p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot;
 389		linelock = (struct linelock *) & tlck->lock;
 390	}
 391#ifdef	_JFS_WIP
 392	else if (tlck->flag & tlckINLINELOCK) {
 393
 394		inlinelock = (struct inlinelock *) & tlck;
 395		p = (caddr_t) & inlinelock->pxd;
 396		linelock = (struct linelock *) & tlck;
 397	}
 398#endif				/* _JFS_WIP */
 399	else {
 400		jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck);
 401		return 0;	/* Probably should trap */
 402	}
 403	l2linesize = linelock->l2linesize;
 404
 405      moveData:
 406	ASSERT(linelock->index <= linelock->maxcnt);
 407
 408	lv = linelock->lv;
 409	for (i = 0; i < linelock->index; i++, lv++) {
 410		if (lv->length == 0)
 411			continue;
 412
 413		/* is page full ? */
 414		if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) {
 415			/* page become full: move on to next page */
 416			lmNextPage(log);
 417
 418			bp = log->bp;
 419			lp = (struct logpage *) bp->l_ldata;
 420			dstoffset = LOGPHDRSIZE;
 421		}
 422
 423		/*
 424		 * move log vector data
 425		 */
 426		src = (u8 *) p + (lv->offset << l2linesize);
 427		srclen = lv->length << l2linesize;
 428		len += srclen;
 429		while (srclen > 0) {
 430			freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
 431			nbytes = min(freespace, srclen);
 432			dst = (caddr_t) lp + dstoffset;
 433			memcpy(dst, src, nbytes);
 434			dstoffset += nbytes;
 435
 436			/* is page not full ? */
 437			if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
 438				break;
 439
 440			/* page become full: move on to next page */
 441			lmNextPage(log);
 442
 443			bp = (struct lbuf *) log->bp;
 444			lp = (struct logpage *) bp->l_ldata;
 445			dstoffset = LOGPHDRSIZE;
 446
 447			srclen -= nbytes;
 448			src += nbytes;
 449		}
 450
 451		/*
 452		 * move log vector descriptor
 453		 */
 454		len += 4;
 455		lvd = (struct lvd *) ((caddr_t) lp + dstoffset);
 456		lvd->offset = cpu_to_le16(lv->offset);
 457		lvd->length = cpu_to_le16(lv->length);
 458		dstoffset += 4;
 459		jfs_info("lmWriteRecord: lv offset:%d length:%d",
 460			 lv->offset, lv->length);
 461	}
 462
 463	if ((i = linelock->next)) {
 464		linelock = (struct linelock *) lid_to_tlock(i);
 465		goto moveData;
 466	}
 467
 468	/*
 469	 *	move log record descriptor
 470	 */
 471      moveLrd:
 472	lrd->length = cpu_to_le16(len);
 473
 474	src = (caddr_t) lrd;
 475	srclen = LOGRDSIZE;
 476
 477	while (srclen > 0) {
 478		freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
 479		nbytes = min(freespace, srclen);
 480		dst = (caddr_t) lp + dstoffset;
 481		memcpy(dst, src, nbytes);
 482
 483		dstoffset += nbytes;
 484		srclen -= nbytes;
 485
 486		/* are there more to move than freespace of page ? */
 487		if (srclen)
 488			goto pageFull;
 489
 490		/*
 491		 * end of log record descriptor
 492		 */
 493
 494		/* update last log record eor */
 495		log->eor = dstoffset;
 496		bp->l_eor = dstoffset;
 497		lsn = (log->page << L2LOGPSIZE) + dstoffset;
 498
 499		if (lrd->type & cpu_to_le16(LOG_COMMIT)) {
 500			tblk->clsn = lsn;
 501			jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn,
 502				 bp->l_eor);
 503
 504			INCREMENT(lmStat.commit);	/* # of commit */
 505
 506			/*
 507			 * enqueue tblock for group commit:
 508			 *
 509			 * enqueue tblock of non-trivial/synchronous COMMIT
 510			 * at tail of group commit queue
 511			 * (trivial/asynchronous COMMITs are ignored by
 512			 * group commit.)
 513			 */
 514			LOGGC_LOCK(log);
 515
 516			/* init tblock gc state */
 517			tblk->flag = tblkGC_QUEUE;
 518			tblk->bp = log->bp;
 519			tblk->pn = log->page;
 520			tblk->eor = log->eor;
 521
 522			/* enqueue transaction to commit queue */
 523			list_add_tail(&tblk->cqueue, &log->cqueue);
 524
 525			LOGGC_UNLOCK(log);
 526		}
 527
 528		jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x",
 529			le16_to_cpu(lrd->type), log->bp, log->page, dstoffset);
 530
 531		/* page not full ? */
 532		if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
 533			return lsn;
 534
 535	      pageFull:
 536		/* page become full: move on to next page */
 537		lmNextPage(log);
 538
 539		bp = (struct lbuf *) log->bp;
 540		lp = (struct logpage *) bp->l_ldata;
 541		dstoffset = LOGPHDRSIZE;
 542		src += nbytes;
 543	}
 544
 545	return lsn;
 546}
 547
 548
 549/*
 550 * NAME:	lmNextPage()
 551 *
 552 * FUNCTION:	write current page and allocate next page.
 553 *
 554 * PARAMETER:	log
 555 *
 556 * RETURN:	0
 557 *
 558 * serialization: LOG_LOCK() held on entry/exit
 559 */
 560static int lmNextPage(struct jfs_log * log)
 561{
 562	struct logpage *lp;
 563	int lspn;		/* log sequence page number */
 564	int pn;			/* current page number */
 565	struct lbuf *bp;
 566	struct lbuf *nextbp;
 567	struct tblock *tblk;
 568
 569	/* get current log page number and log sequence page number */
 570	pn = log->page;
 571	bp = log->bp;
 572	lp = (struct logpage *) bp->l_ldata;
 573	lspn = le32_to_cpu(lp->h.page);
 574
 575	LOGGC_LOCK(log);
 576
 577	/*
 578	 *	write or queue the full page at the tail of write queue
 579	 */
 580	/* get the tail tblk on commit queue */
 581	if (list_empty(&log->cqueue))
 582		tblk = NULL;
 583	else
 584		tblk = list_entry(log->cqueue.prev, struct tblock, cqueue);
 585
 586	/* every tblk who has COMMIT record on the current page,
 587	 * and has not been committed, must be on commit queue
 588	 * since tblk is queued at commit queueu at the time
 589	 * of writing its COMMIT record on the page before
 590	 * page becomes full (even though the tblk thread
 591	 * who wrote COMMIT record may have been suspended
 592	 * currently);
 593	 */
 594
 595	/* is page bound with outstanding tail tblk ? */
 596	if (tblk && tblk->pn == pn) {
 597		/* mark tblk for end-of-page */
 598		tblk->flag |= tblkGC_EOP;
 599
 600		if (log->cflag & logGC_PAGEOUT) {
 601			/* if page is not already on write queue,
 602			 * just enqueue (no lbmWRITE to prevent redrive)
 603			 * buffer to wqueue to ensure correct serial order
 604			 * of the pages since log pages will be added
 605			 * continuously
 606			 */
 607			if (bp->l_wqnext == NULL)
 608				lbmWrite(log, bp, 0, 0);
 609		} else {
 610			/*
 611			 * No current GC leader, initiate group commit
 612			 */
 613			log->cflag |= logGC_PAGEOUT;
 614			lmGCwrite(log, 0);
 615		}
 616	}
 617	/* page is not bound with outstanding tblk:
 618	 * init write or mark it to be redriven (lbmWRITE)
 619	 */
 620	else {
 621		/* finalize the page */
 622		bp->l_ceor = bp->l_eor;
 623		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 624		lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0);
 625	}
 626	LOGGC_UNLOCK(log);
 627
 628	/*
 629	 *	allocate/initialize next page
 630	 */
 631	/* if log wraps, the first data page of log is 2
 632	 * (0 never used, 1 is superblock).
 633	 */
 634	log->page = (pn == log->size - 1) ? 2 : pn + 1;
 635	log->eor = LOGPHDRSIZE;	/* ? valid page empty/full at logRedo() */
 636
 637	/* allocate/initialize next log page buffer */
 638	nextbp = lbmAllocate(log, log->page);
 639	nextbp->l_eor = log->eor;
 640	log->bp = nextbp;
 641
 642	/* initialize next log page */
 643	lp = (struct logpage *) nextbp->l_ldata;
 644	lp->h.page = lp->t.page = cpu_to_le32(lspn + 1);
 645	lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
 646
 647	return 0;
 648}
 649
 650
 651/*
 652 * NAME:	lmGroupCommit()
 653 *
 654 * FUNCTION:	group commit
 655 *	initiate pageout of the pages with COMMIT in the order of
 656 *	page number - redrive pageout of the page at the head of
 657 *	pageout queue until full page has been written.
 658 *
 659 * RETURN:
 660 *
 661 * NOTE:
 662 *	LOGGC_LOCK serializes log group commit queue, and
 663 *	transaction blocks on the commit queue.
 664 *	N.B. LOG_LOCK is NOT held during lmGroupCommit().
 665 */
 666int lmGroupCommit(struct jfs_log * log, struct tblock * tblk)
 667{
 668	int rc = 0;
 669
 670	LOGGC_LOCK(log);
 671
 672	/* group committed already ? */
 673	if (tblk->flag & tblkGC_COMMITTED) {
 674		if (tblk->flag & tblkGC_ERROR)
 675			rc = -EIO;
 676
 677		LOGGC_UNLOCK(log);
 678		return rc;
 679	}
 680	jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc);
 681
 682	if (tblk->xflag & COMMIT_LAZY)
 683		tblk->flag |= tblkGC_LAZY;
 684
 685	if ((!(log->cflag & logGC_PAGEOUT)) && (!list_empty(&log->cqueue)) &&
 686	    (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag)
 687	     || jfs_tlocks_low)) {
 688		/*
 689		 * No pageout in progress
 690		 *
 691		 * start group commit as its group leader.
 692		 */
 693		log->cflag |= logGC_PAGEOUT;
 694
 695		lmGCwrite(log, 0);
 696	}
 697
 698	if (tblk->xflag & COMMIT_LAZY) {
 699		/*
 700		 * Lazy transactions can leave now
 701		 */
 702		LOGGC_UNLOCK(log);
 703		return 0;
 704	}
 705
 706	/* lmGCwrite gives up LOGGC_LOCK, check again */
 707
 708	if (tblk->flag & tblkGC_COMMITTED) {
 709		if (tblk->flag & tblkGC_ERROR)
 710			rc = -EIO;
 711
 712		LOGGC_UNLOCK(log);
 713		return rc;
 714	}
 715
 716	/* upcount transaction waiting for completion
 717	 */
 718	log->gcrtc++;
 719	tblk->flag |= tblkGC_READY;
 720
 721	__SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED),
 722		     LOGGC_LOCK(log), LOGGC_UNLOCK(log));
 723
 724	/* removed from commit queue */
 725	if (tblk->flag & tblkGC_ERROR)
 726		rc = -EIO;
 727
 728	LOGGC_UNLOCK(log);
 729	return rc;
 730}
 731
 732/*
 733 * NAME:	lmGCwrite()
 734 *
 735 * FUNCTION:	group commit write
 736 *	initiate write of log page, building a group of all transactions
 737 *	with commit records on that page.
 738 *
 739 * RETURN:	None
 740 *
 741 * NOTE:
 742 *	LOGGC_LOCK must be held by caller.
 743 *	N.B. LOG_LOCK is NOT held during lmGroupCommit().
 744 */
 745static void lmGCwrite(struct jfs_log * log, int cant_write)
 746{
 747	struct lbuf *bp;
 748	struct logpage *lp;
 749	int gcpn;		/* group commit page number */
 750	struct tblock *tblk;
 751	struct tblock *xtblk = NULL;
 752
 753	/*
 754	 * build the commit group of a log page
 755	 *
 756	 * scan commit queue and make a commit group of all
 757	 * transactions with COMMIT records on the same log page.
 758	 */
 759	/* get the head tblk on the commit queue */
 760	gcpn = list_entry(log->cqueue.next, struct tblock, cqueue)->pn;
 761
 762	list_for_each_entry(tblk, &log->cqueue, cqueue) {
 763		if (tblk->pn != gcpn)
 764			break;
 765
 766		xtblk = tblk;
 767
 768		/* state transition: (QUEUE, READY) -> COMMIT */
 769		tblk->flag |= tblkGC_COMMIT;
 770	}
 771	tblk = xtblk;		/* last tblk of the page */
 772
 773	/*
 774	 * pageout to commit transactions on the log page.
 775	 */
 776	bp = (struct lbuf *) tblk->bp;
 777	lp = (struct logpage *) bp->l_ldata;
 778	/* is page already full ? */
 779	if (tblk->flag & tblkGC_EOP) {
 780		/* mark page to free at end of group commit of the page */
 781		tblk->flag &= ~tblkGC_EOP;
 782		tblk->flag |= tblkGC_FREE;
 783		bp->l_ceor = bp->l_eor;
 784		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 785		lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC,
 786			 cant_write);
 787		INCREMENT(lmStat.full_page);
 788	}
 789	/* page is not yet full */
 790	else {
 791		bp->l_ceor = tblk->eor;	/* ? bp->l_ceor = bp->l_eor; */
 792		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 793		lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write);
 794		INCREMENT(lmStat.partial_page);
 795	}
 796}
 797
 798/*
 799 * NAME:	lmPostGC()
 800 *
 801 * FUNCTION:	group commit post-processing
 802 *	Processes transactions after their commit records have been written
 803 *	to disk, redriving log I/O if necessary.
 804 *
 805 * RETURN:	None
 806 *
 807 * NOTE:
 808 *	This routine is called a interrupt time by lbmIODone
 809 */
 810static void lmPostGC(struct lbuf * bp)
 811{
 812	unsigned long flags;
 813	struct jfs_log *log = bp->l_log;
 814	struct logpage *lp;
 815	struct tblock *tblk, *temp;
 816
 817	//LOGGC_LOCK(log);
 818	spin_lock_irqsave(&log->gclock, flags);
 819	/*
 820	 * current pageout of group commit completed.
 821	 *
 822	 * remove/wakeup transactions from commit queue who were
 823	 * group committed with the current log page
 824	 */
 825	list_for_each_entry_safe(tblk, temp, &log->cqueue, cqueue) {
 826		if (!(tblk->flag & tblkGC_COMMIT))
 827			break;
 828		/* if transaction was marked GC_COMMIT then
 829		 * it has been shipped in the current pageout
 830		 * and made it to disk - it is committed.
 831		 */
 832
 833		if (bp->l_flag & lbmERROR)
 834			tblk->flag |= tblkGC_ERROR;
 835
 836		/* remove it from the commit queue */
 837		list_del(&tblk->cqueue);
 838		tblk->flag &= ~tblkGC_QUEUE;
 839
 840		if (tblk == log->flush_tblk) {
 841			/* we can stop flushing the log now */
 842			clear_bit(log_FLUSH, &log->flag);
 843			log->flush_tblk = NULL;
 844		}
 845
 846		jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk,
 847			 tblk->flag);
 848
 849		if (!(tblk->xflag & COMMIT_FORCE))
 850			/*
 851			 * Hand tblk over to lazy commit thread
 852			 */
 853			txLazyUnlock(tblk);
 854		else {
 855			/* state transition: COMMIT -> COMMITTED */
 856			tblk->flag |= tblkGC_COMMITTED;
 857
 858			if (tblk->flag & tblkGC_READY)
 859				log->gcrtc--;
 860
 861			LOGGC_WAKEUP(tblk);
 862		}
 863
 864		/* was page full before pageout ?
 865		 * (and this is the last tblk bound with the page)
 866		 */
 867		if (tblk->flag & tblkGC_FREE)
 868			lbmFree(bp);
 869		/* did page become full after pageout ?
 870		 * (and this is the last tblk bound with the page)
 871		 */
 872		else if (tblk->flag & tblkGC_EOP) {
 873			/* finalize the page */
 874			lp = (struct logpage *) bp->l_ldata;
 875			bp->l_ceor = bp->l_eor;
 876			lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
 877			jfs_info("lmPostGC: calling lbmWrite");
 878			lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE,
 879				 1);
 880		}
 881
 882	}
 883
 884	/* are there any transactions who have entered lnGroupCommit()
 885	 * (whose COMMITs are after that of the last log page written.
 886	 * They are waiting for new group commit (above at (SLEEP 1))
 887	 * or lazy transactions are on a full (queued) log page,
 888	 * select the latest ready transaction as new group leader and
 889	 * wake her up to lead her group.
 890	 */
 891	if ((!list_empty(&log->cqueue)) &&
 892	    ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) ||
 893	     test_bit(log_FLUSH, &log->flag) || jfs_tlocks_low))
 894		/*
 895		 * Call lmGCwrite with new group leader
 896		 */
 897		lmGCwrite(log, 1);
 898
 899	/* no transaction are ready yet (transactions are only just
 900	 * queued (GC_QUEUE) and not entered for group commit yet).
 901	 * the first transaction entering group commit
 902	 * will elect herself as new group leader.
 903	 */
 904	else
 905		log->cflag &= ~logGC_PAGEOUT;
 906
 907	//LOGGC_UNLOCK(log);
 908	spin_unlock_irqrestore(&log->gclock, flags);
 909	return;
 910}
 911
 912/*
 913 * NAME:	lmLogSync()
 914 *
 915 * FUNCTION:	write log SYNCPT record for specified log
 916 *	if new sync address is available
 917 *	(normally the case if sync() is executed by back-ground
 918 *	process).
 919 *	calculate new value of i_nextsync which determines when
 920 *	this code is called again.
 921 *
 922 * PARAMETERS:	log	- log structure
 923 *		hard_sync - 1 to force all metadata to be written
 924 *
 925 * RETURN:	0
 926 *
 927 * serialization: LOG_LOCK() held on entry/exit
 928 */
 929static int lmLogSync(struct jfs_log * log, int hard_sync)
 930{
 931	int logsize;
 932	int written;		/* written since last syncpt */
 933	int free;		/* free space left available */
 934	int delta;		/* additional delta to write normally */
 935	int more;		/* additional write granted */
 936	struct lrd lrd;
 937	int lsn;
 938	struct logsyncblk *lp;
 939	unsigned long flags;
 940
 941	/* push dirty metapages out to disk */
 942	if (hard_sync)
 943		write_special_inodes(log, filemap_fdatawrite);
 944	else
 945		write_special_inodes(log, filemap_flush);
 946
 947	/*
 948	 *	forward syncpt
 949	 */
 950	/* if last sync is same as last syncpt,
 951	 * invoke sync point forward processing to update sync.
 952	 */
 953
 954	if (log->sync == log->syncpt) {
 955		LOGSYNC_LOCK(log, flags);
 956		if (list_empty(&log->synclist))
 957			log->sync = log->lsn;
 958		else {
 959			lp = list_entry(log->synclist.next,
 960					struct logsyncblk, synclist);
 961			log->sync = lp->lsn;
 962		}
 963		LOGSYNC_UNLOCK(log, flags);
 964
 965	}
 966
 967	/* if sync is different from last syncpt,
 968	 * write a SYNCPT record with syncpt = sync.
 969	 * reset syncpt = sync
 970	 */
 971	if (log->sync != log->syncpt) {
 972		lrd.logtid = 0;
 973		lrd.backchain = 0;
 974		lrd.type = cpu_to_le16(LOG_SYNCPT);
 975		lrd.length = 0;
 976		lrd.log.syncpt.sync = cpu_to_le32(log->sync);
 977		lsn = lmWriteRecord(log, NULL, &lrd, NULL);
 978
 979		log->syncpt = log->sync;
 980	} else
 981		lsn = log->lsn;
 982
 983	/*
 984	 *	setup next syncpt trigger (SWAG)
 985	 */
 986	logsize = log->logsize;
 987
 988	logdiff(written, lsn, log);
 989	free = logsize - written;
 990	delta = LOGSYNC_DELTA(logsize);
 991	more = min(free / 2, delta);
 992	if (more < 2 * LOGPSIZE) {
 993		jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
 994		/*
 995		 *	log wrapping
 996		 *
 997		 * option 1 - panic ? No.!
 998		 * option 2 - shutdown file systems
 999		 *	      associated with log ?
1000		 * option 3 - extend log ?
1001		 * option 4 - second chance
1002		 *
1003		 * mark log wrapped, and continue.
1004		 * when all active transactions are completed,
1005		 * mark log valid for recovery.
1006		 * if crashed during invalid state, log state
1007		 * implies invalid log, forcing fsck().
1008		 */
1009		/* mark log state log wrap in log superblock */
1010		/* log->state = LOGWRAP; */
1011
1012		/* reset sync point computation */
1013		log->syncpt = log->sync = lsn;
1014		log->nextsync = delta;
1015	} else
1016		/* next syncpt trigger = written + more */
1017		log->nextsync = written + more;
1018
1019	/* if number of bytes written from last sync point is more
1020	 * than 1/4 of the log size, stop new transactions from
1021	 * starting until all current transactions are completed
1022	 * by setting syncbarrier flag.
1023	 */
1024	if (!test_bit(log_SYNCBARRIER, &log->flag) &&
1025	    (written > LOGSYNC_BARRIER(logsize)) && log->active) {
1026		set_bit(log_SYNCBARRIER, &log->flag);
1027		jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn,
1028			 log->syncpt);
1029		/*
1030		 * We may have to initiate group commit
1031		 */
1032		jfs_flush_journal(log, 0);
1033	}
1034
1035	return lsn;
1036}
1037
1038/*
1039 * NAME:	jfs_syncpt
1040 *
1041 * FUNCTION:	write log SYNCPT record for specified log
1042 *
1043 * PARAMETERS:	log	  - log structure
1044 *		hard_sync - set to 1 to force metadata to be written
1045 */
1046void jfs_syncpt(struct jfs_log *log, int hard_sync)
1047{	LOG_LOCK(log);
1048	if (!test_bit(log_QUIESCE, &log->flag))
1049		lmLogSync(log, hard_sync);
1050	LOG_UNLOCK(log);
1051}
1052
1053/*
1054 * NAME:	lmLogOpen()
1055 *
1056 * FUNCTION:	open the log on first open;
1057 *	insert filesystem in the active list of the log.
1058 *
1059 * PARAMETER:	ipmnt	- file system mount inode
1060 *		iplog	- log inode (out)
1061 *
1062 * RETURN:
1063 *
1064 * serialization:
1065 */
1066int lmLogOpen(struct super_block *sb)
1067{
1068	int rc;
1069	struct block_device *bdev;
1070	struct jfs_log *log;
1071	struct jfs_sb_info *sbi = JFS_SBI(sb);
1072
1073	if (sbi->flag & JFS_NOINTEGRITY)
1074		return open_dummy_log(sb);
1075
1076	if (sbi->mntflag & JFS_INLINELOG)
1077		return open_inline_log(sb);
1078
1079	mutex_lock(&jfs_log_mutex);
1080	list_for_each_entry(log, &jfs_external_logs, journal_list) {
1081		if (log->bdev->bd_dev == sbi->logdev) {
1082			if (!uuid_equal(&log->uuid, &sbi->loguuid)) {
1083				jfs_warn("wrong uuid on JFS journal");
1084				mutex_unlock(&jfs_log_mutex);
1085				return -EINVAL;
1086			}
1087			/*
1088			 * add file system to log active file system list
1089			 */
1090			if ((rc = lmLogFileSystem(log, sbi, 1))) {
1091				mutex_unlock(&jfs_log_mutex);
1092				return rc;
1093			}
1094			goto journal_found;
1095		}
1096	}
1097
1098	if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL))) {
1099		mutex_unlock(&jfs_log_mutex);
1100		return -ENOMEM;
1101	}
1102	INIT_LIST_HEAD(&log->sb_list);
1103	init_waitqueue_head(&log->syncwait);
1104
1105	/*
1106	 *	external log as separate logical volume
1107	 *
1108	 * file systems to log may have n-to-1 relationship;
1109	 */
1110
1111	bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
1112				 log);
1113	if (IS_ERR(bdev)) {
1114		rc = PTR_ERR(bdev);
1115		goto free;
1116	}
1117
1118	log->bdev = bdev;
1119	uuid_copy(&log->uuid, &sbi->loguuid);
1120
1121	/*
1122	 * initialize log:
1123	 */
1124	if ((rc = lmLogInit(log)))
1125		goto close;
1126
1127	list_add(&log->journal_list, &jfs_external_logs);
1128
1129	/*
1130	 * add file system to log active file system list
1131	 */
1132	if ((rc = lmLogFileSystem(log, sbi, 1)))
1133		goto shutdown;
1134
1135journal_found:
1136	LOG_LOCK(log);
1137	list_add(&sbi->log_list, &log->sb_list);
1138	sbi->log = log;
1139	LOG_UNLOCK(log);
1140
1141	mutex_unlock(&jfs_log_mutex);
1142	return 0;
1143
1144	/*
1145	 *	unwind on error
1146	 */
1147      shutdown:		/* unwind lbmLogInit() */
1148	list_del(&log->journal_list);
1149	lbmLogShutdown(log);
1150
1151      close:		/* close external log device */
1152	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1153
1154      free:		/* free log descriptor */
1155	mutex_unlock(&jfs_log_mutex);
1156	kfree(log);
1157
1158	jfs_warn("lmLogOpen: exit(%d)", rc);
1159	return rc;
1160}
1161
1162static int open_inline_log(struct super_block *sb)
1163{
1164	struct jfs_log *log;
1165	int rc;
1166
1167	if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL)))
1168		return -ENOMEM;
1169	INIT_LIST_HEAD(&log->sb_list);
1170	init_waitqueue_head(&log->syncwait);
1171
1172	set_bit(log_INLINELOG, &log->flag);
1173	log->bdev = sb->s_bdev;
1174	log->base = addressPXD(&JFS_SBI(sb)->logpxd);
1175	log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
1176	    (L2LOGPSIZE - sb->s_blocksize_bits);
1177	log->l2bsize = sb->s_blocksize_bits;
1178	ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits);
1179
1180	/*
1181	 * initialize log.
1182	 */
1183	if ((rc = lmLogInit(log))) {
1184		kfree(log);
1185		jfs_warn("lmLogOpen: exit(%d)", rc);
1186		return rc;
1187	}
1188
1189	list_add(&JFS_SBI(sb)->log_list, &log->sb_list);
1190	JFS_SBI(sb)->log = log;
1191
1192	return rc;
1193}
1194
1195static int open_dummy_log(struct super_block *sb)
1196{
1197	int rc;
1198
1199	mutex_lock(&jfs_log_mutex);
1200	if (!dummy_log) {
1201		dummy_log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL);
1202		if (!dummy_log) {
1203			mutex_unlock(&jfs_log_mutex);
1204			return -ENOMEM;
1205		}
1206		INIT_LIST_HEAD(&dummy_log->sb_list);
1207		init_waitqueue_head(&dummy_log->syncwait);
1208		dummy_log->no_integrity = 1;
1209		/* Make up some stuff */
1210		dummy_log->base = 0;
1211		dummy_log->size = 1024;
1212		rc = lmLogInit(dummy_log);
1213		if (rc) {
1214			kfree(dummy_log);
1215			dummy_log = NULL;
1216			mutex_unlock(&jfs_log_mutex);
1217			return rc;
1218		}
1219	}
1220
1221	LOG_LOCK(dummy_log);
1222	list_add(&JFS_SBI(sb)->log_list, &dummy_log->sb_list);
1223	JFS_SBI(sb)->log = dummy_log;
1224	LOG_UNLOCK(dummy_log);
1225	mutex_unlock(&jfs_log_mutex);
1226
1227	return 0;
1228}
1229
1230/*
1231 * NAME:	lmLogInit()
1232 *
1233 * FUNCTION:	log initialization at first log open.
1234 *
1235 *	logredo() (or logformat()) should have been run previously.
1236 *	initialize the log from log superblock.
1237 *	set the log state in the superblock to LOGMOUNT and
1238 *	write SYNCPT log record.
1239 *
1240 * PARAMETER:	log	- log structure
1241 *
1242 * RETURN:	0	- if ok
1243 *		-EINVAL	- bad log magic number or superblock dirty
1244 *		error returned from logwait()
1245 *
1246 * serialization: single first open thread
1247 */
1248int lmLogInit(struct jfs_log * log)
1249{
1250	int rc = 0;
1251	struct lrd lrd;
1252	struct logsuper *logsuper;
1253	struct lbuf *bpsuper;
1254	struct lbuf *bp;
1255	struct logpage *lp;
1256	int lsn = 0;
1257
1258	jfs_info("lmLogInit: log:0x%p", log);
1259
1260	/* initialize the group commit serialization lock */
1261	LOGGC_LOCK_INIT(log);
1262
1263	/* allocate/initialize the log write serialization lock */
1264	LOG_LOCK_INIT(log);
1265
1266	LOGSYNC_LOCK_INIT(log);
1267
1268	INIT_LIST_HEAD(&log->synclist);
1269
1270	INIT_LIST_HEAD(&log->cqueue);
1271	log->flush_tblk = NULL;
1272
1273	log->count = 0;
1274
1275	/*
1276	 * initialize log i/o
1277	 */
1278	if ((rc = lbmLogInit(log)))
1279		return rc;
1280
1281	if (!test_bit(log_INLINELOG, &log->flag))
1282		log->l2bsize = L2LOGPSIZE;
1283
1284	/* check for disabled journaling to disk */
1285	if (log->no_integrity) {
1286		/*
1287		 * Journal pages will still be filled.  When the time comes
1288		 * to actually do the I/O, the write is not done, and the
1289		 * endio routine is called directly.
1290		 */
1291		bp = lbmAllocate(log , 0);
1292		log->bp = bp;
1293		bp->l_pn = bp->l_eor = 0;
1294	} else {
1295		/*
1296		 * validate log superblock
1297		 */
1298		if ((rc = lbmRead(log, 1, &bpsuper)))
1299			goto errout10;
1300
1301		logsuper = (struct logsuper *) bpsuper->l_ldata;
1302
1303		if (logsuper->magic != cpu_to_le32(LOGMAGIC)) {
1304			jfs_warn("*** Log Format Error ! ***");
1305			rc = -EINVAL;
1306			goto errout20;
1307		}
1308
1309		/* logredo() should have been run successfully. */
1310		if (logsuper->state != cpu_to_le32(LOGREDONE)) {
1311			jfs_warn("*** Log Is Dirty ! ***");
1312			rc = -EINVAL;
1313			goto errout20;
1314		}
1315
1316		/* initialize log from log superblock */
1317		if (test_bit(log_INLINELOG,&log->flag)) {
1318			if (log->size != le32_to_cpu(logsuper->size)) {
1319				rc = -EINVAL;
1320				goto errout20;
1321			}
1322			jfs_info("lmLogInit: inline log:0x%p base:0x%Lx size:0x%x",
1323				 log, (unsigned long long)log->base, log->size);
1324		} else {
1325			if (!uuid_equal(&logsuper->uuid, &log->uuid)) {
1326				jfs_warn("wrong uuid on JFS log device");
 
1327				goto errout20;
1328			}
1329			log->size = le32_to_cpu(logsuper->size);
1330			log->l2bsize = le32_to_cpu(logsuper->l2bsize);
1331			jfs_info("lmLogInit: external log:0x%p base:0x%Lx size:0x%x",
1332				 log, (unsigned long long)log->base, log->size);
1333		}
1334
1335		log->page = le32_to_cpu(logsuper->end) / LOGPSIZE;
1336		log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page);
1337
1338		/*
1339		 * initialize for log append write mode
1340		 */
1341		/* establish current/end-of-log page/buffer */
1342		if ((rc = lbmRead(log, log->page, &bp)))
1343			goto errout20;
1344
1345		lp = (struct logpage *) bp->l_ldata;
1346
1347		jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d",
1348			 le32_to_cpu(logsuper->end), log->page, log->eor,
1349			 le16_to_cpu(lp->h.eor));
1350
1351		log->bp = bp;
1352		bp->l_pn = log->page;
1353		bp->l_eor = log->eor;
1354
1355		/* if current page is full, move on to next page */
1356		if (log->eor >= LOGPSIZE - LOGPTLRSIZE)
1357			lmNextPage(log);
1358
1359		/*
1360		 * initialize log syncpoint
1361		 */
1362		/*
1363		 * write the first SYNCPT record with syncpoint = 0
1364		 * (i.e., log redo up to HERE !);
1365		 * remove current page from lbm write queue at end of pageout
1366		 * (to write log superblock update), but do not release to
1367		 * freelist;
1368		 */
1369		lrd.logtid = 0;
1370		lrd.backchain = 0;
1371		lrd.type = cpu_to_le16(LOG_SYNCPT);
1372		lrd.length = 0;
1373		lrd.log.syncpt.sync = 0;
1374		lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1375		bp = log->bp;
1376		bp->l_ceor = bp->l_eor;
1377		lp = (struct logpage *) bp->l_ldata;
1378		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1379		lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0);
1380		if ((rc = lbmIOWait(bp, 0)))
1381			goto errout30;
1382
1383		/*
1384		 * update/write superblock
1385		 */
1386		logsuper->state = cpu_to_le32(LOGMOUNT);
1387		log->serial = le32_to_cpu(logsuper->serial) + 1;
1388		logsuper->serial = cpu_to_le32(log->serial);
1389		lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1390		if ((rc = lbmIOWait(bpsuper, lbmFREE)))
1391			goto errout30;
1392	}
1393
1394	/* initialize logsync parameters */
1395	log->logsize = (log->size - 2) << L2LOGPSIZE;
1396	log->lsn = lsn;
1397	log->syncpt = lsn;
1398	log->sync = log->syncpt;
1399	log->nextsync = LOGSYNC_DELTA(log->logsize);
1400
1401	jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x",
1402		 log->lsn, log->syncpt, log->sync);
1403
1404	/*
1405	 * initialize for lazy/group commit
1406	 */
1407	log->clsn = lsn;
1408
1409	return 0;
1410
1411	/*
1412	 *	unwind on error
1413	 */
1414      errout30:		/* release log page */
1415	log->wqueue = NULL;
1416	bp->l_wqnext = NULL;
1417	lbmFree(bp);
1418
1419      errout20:		/* release log superblock */
1420	lbmFree(bpsuper);
1421
1422      errout10:		/* unwind lbmLogInit() */
1423	lbmLogShutdown(log);
1424
1425	jfs_warn("lmLogInit: exit(%d)", rc);
1426	return rc;
1427}
1428
1429
1430/*
1431 * NAME:	lmLogClose()
1432 *
1433 * FUNCTION:	remove file system <ipmnt> from active list of log <iplog>
1434 *		and close it on last close.
1435 *
1436 * PARAMETER:	sb	- superblock
1437 *
1438 * RETURN:	errors from subroutines
1439 *
1440 * serialization:
1441 */
1442int lmLogClose(struct super_block *sb)
1443{
1444	struct jfs_sb_info *sbi = JFS_SBI(sb);
1445	struct jfs_log *log = sbi->log;
1446	struct block_device *bdev;
1447	int rc = 0;
1448
1449	jfs_info("lmLogClose: log:0x%p", log);
1450
1451	mutex_lock(&jfs_log_mutex);
1452	LOG_LOCK(log);
1453	list_del(&sbi->log_list);
1454	LOG_UNLOCK(log);
1455	sbi->log = NULL;
1456
1457	/*
1458	 * We need to make sure all of the "written" metapages
1459	 * actually make it to disk
1460	 */
1461	sync_blockdev(sb->s_bdev);
1462
1463	if (test_bit(log_INLINELOG, &log->flag)) {
1464		/*
1465		 *	in-line log in host file system
1466		 */
1467		rc = lmLogShutdown(log);
1468		kfree(log);
1469		goto out;
1470	}
1471
1472	if (!log->no_integrity)
1473		lmLogFileSystem(log, sbi, 0);
1474
1475	if (!list_empty(&log->sb_list))
1476		goto out;
1477
1478	/*
1479	 * TODO: ensure that the dummy_log is in a state to allow
1480	 * lbmLogShutdown to deallocate all the buffers and call
1481	 * kfree against dummy_log.  For now, leave dummy_log & its
1482	 * buffers in memory, and resuse if another no-integrity mount
1483	 * is requested.
1484	 */
1485	if (log->no_integrity)
1486		goto out;
1487
1488	/*
1489	 *	external log as separate logical volume
1490	 */
1491	list_del(&log->journal_list);
1492	bdev = log->bdev;
1493	rc = lmLogShutdown(log);
1494
1495	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1496
1497	kfree(log);
1498
1499      out:
1500	mutex_unlock(&jfs_log_mutex);
1501	jfs_info("lmLogClose: exit(%d)", rc);
1502	return rc;
1503}
1504
1505
1506/*
1507 * NAME:	jfs_flush_journal()
1508 *
1509 * FUNCTION:	initiate write of any outstanding transactions to the journal
1510 *		and optionally wait until they are all written to disk
1511 *
1512 *		wait == 0  flush until latest txn is committed, don't wait
1513 *		wait == 1  flush until latest txn is committed, wait
1514 *		wait > 1   flush until all txn's are complete, wait
1515 */
1516void jfs_flush_journal(struct jfs_log *log, int wait)
1517{
1518	int i;
1519	struct tblock *target = NULL;
1520
1521	/* jfs_write_inode may call us during read-only mount */
1522	if (!log)
1523		return;
1524
1525	jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait);
1526
1527	LOGGC_LOCK(log);
1528
1529	if (!list_empty(&log->cqueue)) {
1530		/*
1531		 * This ensures that we will keep writing to the journal as long
1532		 * as there are unwritten commit records
1533		 */
1534		target = list_entry(log->cqueue.prev, struct tblock, cqueue);
1535
1536		if (test_bit(log_FLUSH, &log->flag)) {
1537			/*
1538			 * We're already flushing.
1539			 * if flush_tblk is NULL, we are flushing everything,
1540			 * so leave it that way.  Otherwise, update it to the
1541			 * latest transaction
1542			 */
1543			if (log->flush_tblk)
1544				log->flush_tblk = target;
1545		} else {
1546			/* Only flush until latest transaction is committed */
1547			log->flush_tblk = target;
1548			set_bit(log_FLUSH, &log->flag);
1549
1550			/*
1551			 * Initiate I/O on outstanding transactions
1552			 */
1553			if (!(log->cflag & logGC_PAGEOUT)) {
1554				log->cflag |= logGC_PAGEOUT;
1555				lmGCwrite(log, 0);
1556			}
1557		}
1558	}
1559	if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) {
1560		/* Flush until all activity complete */
1561		set_bit(log_FLUSH, &log->flag);
1562		log->flush_tblk = NULL;
1563	}
1564
1565	if (wait && target && !(target->flag & tblkGC_COMMITTED)) {
1566		DECLARE_WAITQUEUE(__wait, current);
1567
1568		add_wait_queue(&target->gcwait, &__wait);
1569		set_current_state(TASK_UNINTERRUPTIBLE);
1570		LOGGC_UNLOCK(log);
1571		schedule();
1572		LOGGC_LOCK(log);
1573		remove_wait_queue(&target->gcwait, &__wait);
1574	}
1575	LOGGC_UNLOCK(log);
1576
1577	if (wait < 2)
1578		return;
1579
1580	write_special_inodes(log, filemap_fdatawrite);
1581
1582	/*
1583	 * If there was recent activity, we may need to wait
1584	 * for the lazycommit thread to catch up
1585	 */
1586	if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) {
1587		for (i = 0; i < 200; i++) {	/* Too much? */
1588			msleep(250);
1589			write_special_inodes(log, filemap_fdatawrite);
1590			if (list_empty(&log->cqueue) &&
1591			    list_empty(&log->synclist))
1592				break;
1593		}
1594	}
1595	assert(list_empty(&log->cqueue));
1596
1597#ifdef CONFIG_JFS_DEBUG
1598	if (!list_empty(&log->synclist)) {
1599		struct logsyncblk *lp;
1600
1601		printk(KERN_ERR "jfs_flush_journal: synclist not empty\n");
1602		list_for_each_entry(lp, &log->synclist, synclist) {
1603			if (lp->xflag & COMMIT_PAGE) {
1604				struct metapage *mp = (struct metapage *)lp;
1605				print_hex_dump(KERN_ERR, "metapage: ",
1606					       DUMP_PREFIX_ADDRESS, 16, 4,
1607					       mp, sizeof(struct metapage), 0);
1608				print_hex_dump(KERN_ERR, "page: ",
1609					       DUMP_PREFIX_ADDRESS, 16,
1610					       sizeof(long), mp->page,
1611					       sizeof(struct page), 0);
1612			} else
1613				print_hex_dump(KERN_ERR, "tblock:",
1614					       DUMP_PREFIX_ADDRESS, 16, 4,
1615					       lp, sizeof(struct tblock), 0);
1616		}
1617	}
1618#else
1619	WARN_ON(!list_empty(&log->synclist));
1620#endif
1621	clear_bit(log_FLUSH, &log->flag);
1622}
1623
1624/*
1625 * NAME:	lmLogShutdown()
1626 *
1627 * FUNCTION:	log shutdown at last LogClose().
1628 *
1629 *		write log syncpt record.
1630 *		update super block to set redone flag to 0.
1631 *
1632 * PARAMETER:	log	- log inode
1633 *
1634 * RETURN:	0	- success
1635 *
1636 * serialization: single last close thread
1637 */
1638int lmLogShutdown(struct jfs_log * log)
1639{
1640	int rc;
1641	struct lrd lrd;
1642	int lsn;
1643	struct logsuper *logsuper;
1644	struct lbuf *bpsuper;
1645	struct lbuf *bp;
1646	struct logpage *lp;
1647
1648	jfs_info("lmLogShutdown: log:0x%p", log);
1649
1650	jfs_flush_journal(log, 2);
1651
1652	/*
1653	 * write the last SYNCPT record with syncpoint = 0
1654	 * (i.e., log redo up to HERE !)
1655	 */
1656	lrd.logtid = 0;
1657	lrd.backchain = 0;
1658	lrd.type = cpu_to_le16(LOG_SYNCPT);
1659	lrd.length = 0;
1660	lrd.log.syncpt.sync = 0;
1661
1662	lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1663	bp = log->bp;
1664	lp = (struct logpage *) bp->l_ldata;
1665	lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1666	lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0);
1667	lbmIOWait(log->bp, lbmFREE);
1668	log->bp = NULL;
1669
1670	/*
1671	 * synchronous update log superblock
1672	 * mark log state as shutdown cleanly
1673	 * (i.e., Log does not need to be replayed).
1674	 */
1675	if ((rc = lbmRead(log, 1, &bpsuper)))
1676		goto out;
1677
1678	logsuper = (struct logsuper *) bpsuper->l_ldata;
1679	logsuper->state = cpu_to_le32(LOGREDONE);
1680	logsuper->end = cpu_to_le32(lsn);
1681	lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1682	rc = lbmIOWait(bpsuper, lbmFREE);
1683
1684	jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d",
1685		 lsn, log->page, log->eor);
1686
1687      out:
1688	/*
1689	 * shutdown per log i/o
1690	 */
1691	lbmLogShutdown(log);
1692
1693	if (rc) {
1694		jfs_warn("lmLogShutdown: exit(%d)", rc);
1695	}
1696	return rc;
1697}
1698
1699
1700/*
1701 * NAME:	lmLogFileSystem()
1702 *
1703 * FUNCTION:	insert (<activate> = true)/remove (<activate> = false)
1704 *	file system into/from log active file system list.
1705 *
1706 * PARAMETE:	log	- pointer to logs inode.
1707 *		fsdev	- kdev_t of filesystem.
1708 *		serial	- pointer to returned log serial number
1709 *		activate - insert/remove device from active list.
1710 *
1711 * RETURN:	0	- success
1712 *		errors returned by vms_iowait().
1713 */
1714static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
1715			   int activate)
1716{
1717	int rc = 0;
1718	int i;
1719	struct logsuper *logsuper;
1720	struct lbuf *bpsuper;
1721	uuid_t *uuid = &sbi->uuid;
1722
1723	/*
1724	 * insert/remove file system device to log active file system list.
1725	 */
1726	if ((rc = lbmRead(log, 1, &bpsuper)))
1727		return rc;
1728
1729	logsuper = (struct logsuper *) bpsuper->l_ldata;
1730	if (activate) {
1731		for (i = 0; i < MAX_ACTIVE; i++)
1732			if (uuid_is_null(&logsuper->active[i].uuid)) {
1733				uuid_copy(&logsuper->active[i].uuid, uuid);
1734				sbi->aggregate = i;
1735				break;
1736			}
1737		if (i == MAX_ACTIVE) {
1738			jfs_warn("Too many file systems sharing journal!");
1739			lbmFree(bpsuper);
1740			return -EMFILE;	/* Is there a better rc? */
1741		}
1742	} else {
1743		for (i = 0; i < MAX_ACTIVE; i++)
1744			if (uuid_equal(&logsuper->active[i].uuid, uuid)) {
1745				uuid_copy(&logsuper->active[i].uuid,
1746					  &uuid_null);
1747				break;
1748			}
1749		if (i == MAX_ACTIVE) {
1750			jfs_warn("Somebody stomped on the journal!");
1751			lbmFree(bpsuper);
1752			return -EIO;
1753		}
1754
1755	}
1756
1757	/*
1758	 * synchronous write log superblock:
1759	 *
1760	 * write sidestream bypassing write queue:
1761	 * at file system mount, log super block is updated for
1762	 * activation of the file system before any log record
1763	 * (MOUNT record) of the file system, and at file system
1764	 * unmount, all meta data for the file system has been
1765	 * flushed before log super block is updated for deactivation
1766	 * of the file system.
1767	 */
1768	lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1769	rc = lbmIOWait(bpsuper, lbmFREE);
1770
1771	return rc;
1772}
1773
1774/*
1775 *		log buffer manager (lbm)
1776 *		------------------------
1777 *
1778 * special purpose buffer manager supporting log i/o requirements.
1779 *
1780 * per log write queue:
1781 * log pageout occurs in serial order by fifo write queue and
1782 * restricting to a single i/o in pregress at any one time.
1783 * a circular singly-linked list
1784 * (log->wrqueue points to the tail, and buffers are linked via
1785 * bp->wrqueue field), and
1786 * maintains log page in pageout ot waiting for pageout in serial pageout.
1787 */
1788
1789/*
1790 *	lbmLogInit()
1791 *
1792 * initialize per log I/O setup at lmLogInit()
1793 */
1794static int lbmLogInit(struct jfs_log * log)
1795{				/* log inode */
1796	int i;
1797	struct lbuf *lbuf;
1798
1799	jfs_info("lbmLogInit: log:0x%p", log);
1800
1801	/* initialize current buffer cursor */
1802	log->bp = NULL;
1803
1804	/* initialize log device write queue */
1805	log->wqueue = NULL;
1806
1807	/*
1808	 * Each log has its own buffer pages allocated to it.  These are
1809	 * not managed by the page cache.  This ensures that a transaction
1810	 * writing to the log does not block trying to allocate a page from
1811	 * the page cache (for the log).  This would be bad, since page
1812	 * allocation waits on the kswapd thread that may be committing inodes
1813	 * which would cause log activity.  Was that clear?  I'm trying to
1814	 * avoid deadlock here.
1815	 */
1816	init_waitqueue_head(&log->free_wait);
1817
1818	log->lbuf_free = NULL;
1819
1820	for (i = 0; i < LOGPAGES;) {
1821		char *buffer;
1822		uint offset;
1823		struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
1824
1825		if (!page)
1826			goto error;
1827		buffer = page_address(page);
1828		for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) {
1829			lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL);
1830			if (lbuf == NULL) {
1831				if (offset == 0)
1832					__free_page(page);
1833				goto error;
1834			}
1835			if (offset) /* we already have one reference */
1836				get_page(page);
1837			lbuf->l_offset = offset;
1838			lbuf->l_ldata = buffer + offset;
1839			lbuf->l_page = page;
1840			lbuf->l_log = log;
1841			init_waitqueue_head(&lbuf->l_ioevent);
1842
1843			lbuf->l_freelist = log->lbuf_free;
1844			log->lbuf_free = lbuf;
1845			i++;
1846		}
1847	}
1848
1849	return (0);
1850
1851      error:
1852	lbmLogShutdown(log);
1853	return -ENOMEM;
1854}
1855
1856
1857/*
1858 *	lbmLogShutdown()
1859 *
1860 * finalize per log I/O setup at lmLogShutdown()
1861 */
1862static void lbmLogShutdown(struct jfs_log * log)
1863{
1864	struct lbuf *lbuf;
1865
1866	jfs_info("lbmLogShutdown: log:0x%p", log);
1867
1868	lbuf = log->lbuf_free;
1869	while (lbuf) {
1870		struct lbuf *next = lbuf->l_freelist;
1871		__free_page(lbuf->l_page);
1872		kfree(lbuf);
1873		lbuf = next;
1874	}
1875}
1876
1877
1878/*
1879 *	lbmAllocate()
1880 *
1881 * allocate an empty log buffer
1882 */
1883static struct lbuf *lbmAllocate(struct jfs_log * log, int pn)
1884{
1885	struct lbuf *bp;
1886	unsigned long flags;
1887
1888	/*
1889	 * recycle from log buffer freelist if any
1890	 */
1891	LCACHE_LOCK(flags);
1892	LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags);
1893	log->lbuf_free = bp->l_freelist;
1894	LCACHE_UNLOCK(flags);
1895
1896	bp->l_flag = 0;
1897
1898	bp->l_wqnext = NULL;
1899	bp->l_freelist = NULL;
1900
1901	bp->l_pn = pn;
1902	bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize));
1903	bp->l_ceor = 0;
1904
1905	return bp;
1906}
1907
1908
1909/*
1910 *	lbmFree()
1911 *
1912 * release a log buffer to freelist
1913 */
1914static void lbmFree(struct lbuf * bp)
1915{
1916	unsigned long flags;
1917
1918	LCACHE_LOCK(flags);
1919
1920	lbmfree(bp);
1921
1922	LCACHE_UNLOCK(flags);
1923}
1924
1925static void lbmfree(struct lbuf * bp)
1926{
1927	struct jfs_log *log = bp->l_log;
1928
1929	assert(bp->l_wqnext == NULL);
1930
1931	/*
1932	 * return the buffer to head of freelist
1933	 */
1934	bp->l_freelist = log->lbuf_free;
1935	log->lbuf_free = bp;
1936
1937	wake_up(&log->free_wait);
1938	return;
1939}
1940
1941
1942/*
1943 * NAME:	lbmRedrive
1944 *
1945 * FUNCTION:	add a log buffer to the log redrive list
1946 *
1947 * PARAMETER:
1948 *	bp	- log buffer
1949 *
1950 * NOTES:
1951 *	Takes log_redrive_lock.
1952 */
1953static inline void lbmRedrive(struct lbuf *bp)
1954{
1955	unsigned long flags;
1956
1957	spin_lock_irqsave(&log_redrive_lock, flags);
1958	bp->l_redrive_next = log_redrive_list;
1959	log_redrive_list = bp;
1960	spin_unlock_irqrestore(&log_redrive_lock, flags);
1961
1962	wake_up_process(jfsIOthread);
1963}
1964
1965
1966/*
1967 *	lbmRead()
1968 */
1969static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
1970{
1971	struct bio *bio;
1972	struct lbuf *bp;
1973
1974	/*
1975	 * allocate a log buffer
1976	 */
1977	*bpp = bp = lbmAllocate(log, pn);
1978	jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn);
1979
1980	bp->l_flag |= lbmREAD;
1981
1982	bio = bio_alloc(GFP_NOFS, 1);
1983
1984	bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
1985	bio_set_dev(bio, log->bdev);
1986
1987	bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
1988	BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
1989
1990	bio->bi_end_io = lbmIODone;
1991	bio->bi_private = bp;
1992	bio->bi_opf = REQ_OP_READ;
1993	/*check if journaling to disk has been disabled*/
1994	if (log->no_integrity) {
1995		bio->bi_iter.bi_size = 0;
1996		lbmIODone(bio);
1997	} else {
1998		submit_bio(bio);
1999	}
2000
2001	wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
2002
2003	return 0;
2004}
2005
2006
2007/*
2008 *	lbmWrite()
2009 *
2010 * buffer at head of pageout queue stays after completion of
2011 * partial-page pageout and redriven by explicit initiation of
2012 * pageout by caller until full-page pageout is completed and
2013 * released.
2014 *
2015 * device driver i/o done redrives pageout of new buffer at
2016 * head of pageout queue when current buffer at head of pageout
2017 * queue is released at the completion of its full-page pageout.
2018 *
2019 * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit().
2020 * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone()
2021 */
2022static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
2023		     int cant_block)
2024{
2025	struct lbuf *tail;
2026	unsigned long flags;
2027
2028	jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn);
2029
2030	/* map the logical block address to physical block address */
2031	bp->l_blkno =
2032	    log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2033
2034	LCACHE_LOCK(flags);		/* disable+lock */
2035
2036	/*
2037	 * initialize buffer for device driver
2038	 */
2039	bp->l_flag = flag;
2040
2041	/*
2042	 *	insert bp at tail of write queue associated with log
2043	 *
2044	 * (request is either for bp already/currently at head of queue
2045	 * or new bp to be inserted at tail)
2046	 */
2047	tail = log->wqueue;
2048
2049	/* is buffer not already on write queue ? */
2050	if (bp->l_wqnext == NULL) {
2051		/* insert at tail of wqueue */
2052		if (tail == NULL) {
2053			log->wqueue = bp;
2054			bp->l_wqnext = bp;
2055		} else {
2056			log->wqueue = bp;
2057			bp->l_wqnext = tail->l_wqnext;
2058			tail->l_wqnext = bp;
2059		}
2060
2061		tail = bp;
2062	}
2063
2064	/* is buffer at head of wqueue and for write ? */
2065	if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) {
2066		LCACHE_UNLOCK(flags);	/* unlock+enable */
2067		return;
2068	}
2069
2070	LCACHE_UNLOCK(flags);	/* unlock+enable */
2071
2072	if (cant_block)
2073		lbmRedrive(bp);
2074	else if (flag & lbmSYNC)
2075		lbmStartIO(bp);
2076	else {
2077		LOGGC_UNLOCK(log);
2078		lbmStartIO(bp);
2079		LOGGC_LOCK(log);
2080	}
2081}
2082
2083
2084/*
2085 *	lbmDirectWrite()
2086 *
2087 * initiate pageout bypassing write queue for sidestream
2088 * (e.g., log superblock) write;
2089 */
2090static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
2091{
2092	jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x",
2093		 bp, flag, bp->l_pn);
2094
2095	/*
2096	 * initialize buffer for device driver
2097	 */
2098	bp->l_flag = flag | lbmDIRECT;
2099
2100	/* map the logical block address to physical block address */
2101	bp->l_blkno =
2102	    log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2103
2104	/*
2105	 *	initiate pageout of the page
2106	 */
2107	lbmStartIO(bp);
2108}
2109
2110
2111/*
2112 * NAME:	lbmStartIO()
2113 *
2114 * FUNCTION:	Interface to DD strategy routine
2115 *
2116 * RETURN:	none
2117 *
2118 * serialization: LCACHE_LOCK() is NOT held during log i/o;
2119 */
2120static void lbmStartIO(struct lbuf * bp)
2121{
2122	struct bio *bio;
2123	struct jfs_log *log = bp->l_log;
2124
2125	jfs_info("lbmStartIO");
2126
2127	bio = bio_alloc(GFP_NOFS, 1);
2128	bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
2129	bio_set_dev(bio, log->bdev);
2130
2131	bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
2132	BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
2133
2134	bio->bi_end_io = lbmIODone;
2135	bio->bi_private = bp;
2136	bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
2137
2138	/* check if journaling to disk has been disabled */
2139	if (log->no_integrity) {
2140		bio->bi_iter.bi_size = 0;
2141		lbmIODone(bio);
2142	} else {
2143		submit_bio(bio);
2144		INCREMENT(lmStat.submitted);
2145	}
2146}
2147
2148
2149/*
2150 *	lbmIOWait()
2151 */
2152static int lbmIOWait(struct lbuf * bp, int flag)
2153{
2154	unsigned long flags;
2155	int rc = 0;
2156
2157	jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2158
2159	LCACHE_LOCK(flags);		/* disable+lock */
2160
2161	LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags);
2162
2163	rc = (bp->l_flag & lbmERROR) ? -EIO : 0;
2164
2165	if (flag & lbmFREE)
2166		lbmfree(bp);
2167
2168	LCACHE_UNLOCK(flags);	/* unlock+enable */
2169
2170	jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2171	return rc;
2172}
2173
2174/*
2175 *	lbmIODone()
2176 *
2177 * executed at INTIODONE level
2178 */
2179static void lbmIODone(struct bio *bio)
2180{
2181	struct lbuf *bp = bio->bi_private;
2182	struct lbuf *nextbp, *tail;
2183	struct jfs_log *log;
2184	unsigned long flags;
2185
2186	/*
2187	 * get back jfs buffer bound to the i/o buffer
2188	 */
2189	jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag);
2190
2191	LCACHE_LOCK(flags);		/* disable+lock */
2192
2193	bp->l_flag |= lbmDONE;
2194
2195	if (bio->bi_status) {
2196		bp->l_flag |= lbmERROR;
2197
2198		jfs_err("lbmIODone: I/O error in JFS log");
2199	}
2200
2201	bio_put(bio);
2202
2203	/*
2204	 *	pagein completion
2205	 */
2206	if (bp->l_flag & lbmREAD) {
2207		bp->l_flag &= ~lbmREAD;
2208
2209		LCACHE_UNLOCK(flags);	/* unlock+enable */
2210
2211		/* wakeup I/O initiator */
2212		LCACHE_WAKEUP(&bp->l_ioevent);
2213
2214		return;
2215	}
2216
2217	/*
2218	 *	pageout completion
2219	 *
2220	 * the bp at the head of write queue has completed pageout.
2221	 *
2222	 * if single-commit/full-page pageout, remove the current buffer
2223	 * from head of pageout queue, and redrive pageout with
2224	 * the new buffer at head of pageout queue;
2225	 * otherwise, the partial-page pageout buffer stays at
2226	 * the head of pageout queue to be redriven for pageout
2227	 * by lmGroupCommit() until full-page pageout is completed.
2228	 */
2229	bp->l_flag &= ~lbmWRITE;
2230	INCREMENT(lmStat.pagedone);
2231
2232	/* update committed lsn */
2233	log = bp->l_log;
2234	log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor;
2235
2236	if (bp->l_flag & lbmDIRECT) {
2237		LCACHE_WAKEUP(&bp->l_ioevent);
2238		LCACHE_UNLOCK(flags);
2239		return;
2240	}
2241
2242	tail = log->wqueue;
2243
2244	/* single element queue */
2245	if (bp == tail) {
2246		/* remove head buffer of full-page pageout
2247		 * from log device write queue
2248		 */
2249		if (bp->l_flag & lbmRELEASE) {
2250			log->wqueue = NULL;
2251			bp->l_wqnext = NULL;
2252		}
2253	}
2254	/* multi element queue */
2255	else {
2256		/* remove head buffer of full-page pageout
2257		 * from log device write queue
2258		 */
2259		if (bp->l_flag & lbmRELEASE) {
2260			nextbp = tail->l_wqnext = bp->l_wqnext;
2261			bp->l_wqnext = NULL;
2262
2263			/*
2264			 * redrive pageout of next page at head of write queue:
2265			 * redrive next page without any bound tblk
2266			 * (i.e., page w/o any COMMIT records), or
2267			 * first page of new group commit which has been
2268			 * queued after current page (subsequent pageout
2269			 * is performed synchronously, except page without
2270			 * any COMMITs) by lmGroupCommit() as indicated
2271			 * by lbmWRITE flag;
2272			 */
2273			if (nextbp->l_flag & lbmWRITE) {
2274				/*
2275				 * We can't do the I/O at interrupt time.
2276				 * The jfsIO thread can do it
2277				 */
2278				lbmRedrive(nextbp);
2279			}
2280		}
2281	}
2282
2283	/*
2284	 *	synchronous pageout:
2285	 *
2286	 * buffer has not necessarily been removed from write queue
2287	 * (e.g., synchronous write of partial-page with COMMIT):
2288	 * leave buffer for i/o initiator to dispose
2289	 */
2290	if (bp->l_flag & lbmSYNC) {
2291		LCACHE_UNLOCK(flags);	/* unlock+enable */
2292
2293		/* wakeup I/O initiator */
2294		LCACHE_WAKEUP(&bp->l_ioevent);
2295	}
2296
2297	/*
2298	 *	Group Commit pageout:
2299	 */
2300	else if (bp->l_flag & lbmGC) {
2301		LCACHE_UNLOCK(flags);
2302		lmPostGC(bp);
2303	}
2304
2305	/*
2306	 *	asynchronous pageout:
2307	 *
2308	 * buffer must have been removed from write queue:
2309	 * insert buffer at head of freelist where it can be recycled
2310	 */
2311	else {
2312		assert(bp->l_flag & lbmRELEASE);
2313		assert(bp->l_flag & lbmFREE);
2314		lbmfree(bp);
2315
2316		LCACHE_UNLOCK(flags);	/* unlock+enable */
2317	}
2318}
2319
2320int jfsIOWait(void *arg)
2321{
2322	struct lbuf *bp;
2323
2324	do {
2325		spin_lock_irq(&log_redrive_lock);
2326		while ((bp = log_redrive_list)) {
2327			log_redrive_list = bp->l_redrive_next;
2328			bp->l_redrive_next = NULL;
2329			spin_unlock_irq(&log_redrive_lock);
2330			lbmStartIO(bp);
2331			spin_lock_irq(&log_redrive_lock);
2332		}
2333
2334		if (freezing(current)) {
2335			spin_unlock_irq(&log_redrive_lock);
2336			try_to_freeze();
2337		} else {
2338			set_current_state(TASK_INTERRUPTIBLE);
2339			spin_unlock_irq(&log_redrive_lock);
2340			schedule();
2341		}
2342	} while (!kthread_should_stop());
2343
2344	jfs_info("jfsIOWait being killed!");
2345	return 0;
2346}
2347
2348/*
2349 * NAME:	lmLogFormat()/jfs_logform()
2350 *
2351 * FUNCTION:	format file system log
2352 *
2353 * PARAMETERS:
2354 *	log	- volume log
2355 *	logAddress - start address of log space in FS block
2356 *	logSize	- length of log space in FS block;
2357 *
2358 * RETURN:	0	- success
2359 *		-EIO	- i/o error
2360 *
2361 * XXX: We're synchronously writing one page at a time.  This needs to
2362 *	be improved by writing multiple pages at once.
2363 */
2364int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
2365{
2366	int rc = -EIO;
2367	struct jfs_sb_info *sbi;
2368	struct logsuper *logsuper;
2369	struct logpage *lp;
2370	int lspn;		/* log sequence page number */
2371	struct lrd *lrd_ptr;
2372	int npages = 0;
2373	struct lbuf *bp;
2374
2375	jfs_info("lmLogFormat: logAddress:%Ld logSize:%d",
2376		 (long long)logAddress, logSize);
2377
2378	sbi = list_entry(log->sb_list.next, struct jfs_sb_info, log_list);
2379
2380	/* allocate a log buffer */
2381	bp = lbmAllocate(log, 1);
2382
2383	npages = logSize >> sbi->l2nbperpage;
2384
2385	/*
2386	 *	log space:
2387	 *
2388	 * page 0 - reserved;
2389	 * page 1 - log superblock;
2390	 * page 2 - log data page: A SYNC log record is written
2391	 *	    into this page at logform time;
2392	 * pages 3-N - log data page: set to empty log data pages;
2393	 */
2394	/*
2395	 *	init log superblock: log page 1
2396	 */
2397	logsuper = (struct logsuper *) bp->l_ldata;
2398
2399	logsuper->magic = cpu_to_le32(LOGMAGIC);
2400	logsuper->version = cpu_to_le32(LOGVERSION);
2401	logsuper->state = cpu_to_le32(LOGREDONE);
2402	logsuper->flag = cpu_to_le32(sbi->mntflag);	/* ? */
2403	logsuper->size = cpu_to_le32(npages);
2404	logsuper->bsize = cpu_to_le32(sbi->bsize);
2405	logsuper->l2bsize = cpu_to_le32(sbi->l2bsize);
2406	logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE);
2407
2408	bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2409	bp->l_blkno = logAddress + sbi->nbperpage;
2410	lbmStartIO(bp);
2411	if ((rc = lbmIOWait(bp, 0)))
2412		goto exit;
2413
2414	/*
2415	 *	init pages 2 to npages-1 as log data pages:
2416	 *
2417	 * log page sequence number (lpsn) initialization:
2418	 *
2419	 * pn:   0     1     2     3                 n-1
2420	 *       +-----+-----+=====+=====+===.....===+=====+
2421	 * lspn:             N-1   0     1           N-2
2422	 *                   <--- N page circular file ---->
2423	 *
2424	 * the N (= npages-2) data pages of the log is maintained as
2425	 * a circular file for the log records;
2426	 * lpsn grows by 1 monotonically as each log page is written
2427	 * to the circular file of the log;
2428	 * and setLogpage() will not reset the page number even if
2429	 * the eor is equal to LOGPHDRSIZE. In order for binary search
2430	 * still work in find log end process, we have to simulate the
2431	 * log wrap situation at the log format time.
2432	 * The 1st log page written will have the highest lpsn. Then
2433	 * the succeeding log pages will have ascending order of
2434	 * the lspn starting from 0, ... (N-2)
2435	 */
2436	lp = (struct logpage *) bp->l_ldata;
2437	/*
2438	 * initialize 1st log page to be written: lpsn = N - 1,
2439	 * write a SYNCPT log record is written to this page
2440	 */
2441	lp->h.page = lp->t.page = cpu_to_le32(npages - 3);
2442	lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE);
2443
2444	lrd_ptr = (struct lrd *) &lp->data;
2445	lrd_ptr->logtid = 0;
2446	lrd_ptr->backchain = 0;
2447	lrd_ptr->type = cpu_to_le16(LOG_SYNCPT);
2448	lrd_ptr->length = 0;
2449	lrd_ptr->log.syncpt.sync = 0;
2450
2451	bp->l_blkno += sbi->nbperpage;
2452	bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2453	lbmStartIO(bp);
2454	if ((rc = lbmIOWait(bp, 0)))
2455		goto exit;
2456
2457	/*
2458	 *	initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
2459	 */
2460	for (lspn = 0; lspn < npages - 3; lspn++) {
2461		lp->h.page = lp->t.page = cpu_to_le32(lspn);
2462		lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
2463
2464		bp->l_blkno += sbi->nbperpage;
2465		bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2466		lbmStartIO(bp);
2467		if ((rc = lbmIOWait(bp, 0)))
2468			goto exit;
2469	}
2470
2471	rc = 0;
2472exit:
2473	/*
2474	 *	finalize log
2475	 */
2476	/* release the buffer */
2477	lbmFree(bp);
2478
2479	return rc;
2480}
2481
2482#ifdef CONFIG_JFS_STATISTICS
2483int jfs_lmstats_proc_show(struct seq_file *m, void *v)
2484{
2485	seq_printf(m,
2486		       "JFS Logmgr stats\n"
2487		       "================\n"
2488		       "commits = %d\n"
2489		       "writes submitted = %d\n"
2490		       "writes completed = %d\n"
2491		       "full pages submitted = %d\n"
2492		       "partial pages submitted = %d\n",
2493		       lmStat.commit,
2494		       lmStat.submitted,
2495		       lmStat.pagedone,
2496		       lmStat.full_page,
2497		       lmStat.partial_page);
2498	return 0;
2499}
2500#endif /* CONFIG_JFS_STATISTICS */