Linux Audio

Check our new training course

Loading...
v4.17
 
   1/*
   2 *   Copyright (C) International Business Machines Corp., 2000-2004
   3 *   Portions Copyright (C) Christoph Hellwig, 2001-2002
   4 *
   5 *   This program is free software;  you can redistribute it and/or modify
   6 *   it under the terms of the GNU General Public License as published by
   7 *   the Free Software Foundation; either version 2 of the License, or
   8 *   (at your option) any later version.
   9 *
  10 *   This program is distributed in the hope that it will be useful,
  11 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
  12 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  13 *   the GNU General Public License for more details.
  14 *
  15 *   You should have received a copy of the GNU General Public License
  16 *   along with this program;  if not, write to the Free Software
  17 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18 */
  19
  20/*
  21 *	jfs_logmgr.c: log manager
  22 *
  23 * for related information, see transaction manager (jfs_txnmgr.c), and
  24 * recovery manager (jfs_logredo.c).
  25 *
  26 * note: for detail, RTFS.
  27 *
  28 *	log buffer manager:
  29 * special purpose buffer manager supporting log i/o requirements.
  30 * per log serial pageout of logpage
  31 * queuing i/o requests and redrive i/o at iodone
  32 * maintain current logpage buffer
  33 * no caching since append only
  34 * appropriate jfs buffer cache buffers as needed
  35 *
  36 *	group commit:
  37 * transactions which wrote COMMIT records in the same in-memory
  38 * log page during the pageout of previous/current log page(s) are
  39 * committed together by the pageout of the page.
  40 *
  41 *	TBD lazy commit:
  42 * transactions are committed asynchronously when the log page
  43 * containing it COMMIT is paged out when it becomes full;
  44 *
  45 *	serialization:
  46 * . a per log lock serialize log write.
  47 * . a per log lock serialize group commit.
  48 * . a per log lock serialize log open/close;
  49 *
  50 *	TBD log integrity:
  51 * careful-write (ping-pong) of last logpage to recover from crash
  52 * in overwrite.
  53 * detection of split (out-of-order) write of physical sectors
  54 * of last logpage via timestamp at end of each sector
  55 * with its mirror data array at trailer).
  56 *
  57 *	alternatives:
  58 * lsn - 64-bit monotonically increasing integer vs
  59 * 32-bit lspn and page eor.
  60 */
  61
  62#include <linux/fs.h>
  63#include <linux/blkdev.h>
  64#include <linux/interrupt.h>
  65#include <linux/completion.h>
  66#include <linux/kthread.h>
  67#include <linux/buffer_head.h>		/* for sync_blockdev() */
  68#include <linux/bio.h>
  69#include <linux/freezer.h>
  70#include <linux/export.h>
  71#include <linux/delay.h>
  72#include <linux/mutex.h>
  73#include <linux/seq_file.h>
  74#include <linux/slab.h>
  75#include "jfs_incore.h"
  76#include "jfs_filsys.h"
  77#include "jfs_metapage.h"
  78#include "jfs_superblock.h"
  79#include "jfs_txnmgr.h"
  80#include "jfs_debug.h"
  81
  82
  83/*
  84 * lbuf's ready to be redriven.  Protected by log_redrive_lock (jfsIO thread)
  85 */
  86static struct lbuf *log_redrive_list;
  87static DEFINE_SPINLOCK(log_redrive_lock);
  88
  89
  90/*
  91 *	log read/write serialization (per log)
  92 */
  93#define LOG_LOCK_INIT(log)	mutex_init(&(log)->loglock)
  94#define LOG_LOCK(log)		mutex_lock(&((log)->loglock))
  95#define LOG_UNLOCK(log)		mutex_unlock(&((log)->loglock))
  96
  97
  98/*
  99 *	log group commit serialization (per log)
 100 */
 101
 102#define LOGGC_LOCK_INIT(log)	spin_lock_init(&(log)->gclock)
 103#define LOGGC_LOCK(log)		spin_lock_irq(&(log)->gclock)
 104#define LOGGC_UNLOCK(log)	spin_unlock_irq(&(log)->gclock)
 105#define LOGGC_WAKEUP(tblk)	wake_up_all(&(tblk)->gcwait)
 106
 107/*
 108 *	log sync serialization (per log)
 109 */
 110#define	LOGSYNC_DELTA(logsize)		min((logsize)/8, 128*LOGPSIZE)
 111#define	LOGSYNC_BARRIER(logsize)	((logsize)/4)
 112/*
 113#define	LOGSYNC_DELTA(logsize)		min((logsize)/4, 256*LOGPSIZE)
 114#define	LOGSYNC_BARRIER(logsize)	((logsize)/2)
 115*/
 116
 117
 118/*
 119 *	log buffer cache synchronization
 120 */
 121static DEFINE_SPINLOCK(jfsLCacheLock);
 122
 123#define	LCACHE_LOCK(flags)	spin_lock_irqsave(&jfsLCacheLock, flags)
 124#define	LCACHE_UNLOCK(flags)	spin_unlock_irqrestore(&jfsLCacheLock, flags)
 125
 126/*
 127 * See __SLEEP_COND in jfs_locks.h
 128 */
 129#define LCACHE_SLEEP_COND(wq, cond, flags)	\
 130do {						\
 131	if (cond)				\
 132		break;				\
 133	__SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \
 134} while (0)
 135
 136#define	LCACHE_WAKEUP(event)	wake_up(event)
 137
 138
 139/*
 140 *	lbuf buffer cache (lCache) control
 141 */
 142/* log buffer manager pageout control (cumulative, inclusive) */
 143#define	lbmREAD		0x0001
 144#define	lbmWRITE	0x0002	/* enqueue at tail of write queue;
 145				 * init pageout if at head of queue;
 146				 */
 147#define	lbmRELEASE	0x0004	/* remove from write queue
 148				 * at completion of pageout;
 149				 * do not free/recycle it yet:
 150				 * caller will free it;
 151				 */
 152#define	lbmSYNC		0x0008	/* do not return to freelist
 153				 * when removed from write queue;
 154				 */
 155#define lbmFREE		0x0010	/* return to freelist
 156				 * at completion of pageout;
 157				 * the buffer may be recycled;
 158				 */
 159#define	lbmDONE		0x0020
 160#define	lbmERROR	0x0040
 161#define lbmGC		0x0080	/* lbmIODone to perform post-GC processing
 162				 * of log page
 163				 */
 164#define lbmDIRECT	0x0100
 165
 166/*
 167 * Global list of active external journals
 168 */
 169static LIST_HEAD(jfs_external_logs);
 170static struct jfs_log *dummy_log;
 171static DEFINE_MUTEX(jfs_log_mutex);
 172
 173/*
 174 * forward references
 175 */
 176static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk,
 177			 struct lrd * lrd, struct tlock * tlck);
 178
 179static int lmNextPage(struct jfs_log * log);
 180static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
 181			   int activate);
 182
 183static int open_inline_log(struct super_block *sb);
 184static int open_dummy_log(struct super_block *sb);
 185static int lbmLogInit(struct jfs_log * log);
 186static void lbmLogShutdown(struct jfs_log * log);
 187static struct lbuf *lbmAllocate(struct jfs_log * log, int);
 188static void lbmFree(struct lbuf * bp);
 189static void lbmfree(struct lbuf * bp);
 190static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp);
 191static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block);
 192static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag);
 193static int lbmIOWait(struct lbuf * bp, int flag);
 194static bio_end_io_t lbmIODone;
 195static void lbmStartIO(struct lbuf * bp);
 196static void lmGCwrite(struct jfs_log * log, int cant_block);
 197static int lmLogSync(struct jfs_log * log, int hard_sync);
 198
 199
 200
 201/*
 202 *	statistics
 203 */
 204#ifdef CONFIG_JFS_STATISTICS
 205static struct lmStat {
 206	uint commit;		/* # of commit */
 207	uint pagedone;		/* # of page written */
 208	uint submitted;		/* # of pages submitted */
 209	uint full_page;		/* # of full pages submitted */
 210	uint partial_page;	/* # of partial pages submitted */
 211} lmStat;
 212#endif
 213
 214static void write_special_inodes(struct jfs_log *log,
 215				 int (*writer)(struct address_space *))
 216{
 217	struct jfs_sb_info *sbi;
 218
 219	list_for_each_entry(sbi, &log->sb_list, log_list) {
 220		writer(sbi->ipbmap->i_mapping);
 221		writer(sbi->ipimap->i_mapping);
 222		writer(sbi->direct_inode->i_mapping);
 223	}
 224}
 225
 226/*
 227 * NAME:	lmLog()
 228 *
 229 * FUNCTION:	write a log record;
 230 *
 231 * PARAMETER:
 232 *
 233 * RETURN:	lsn - offset to the next log record to write (end-of-log);
 234 *		-1  - error;
 235 *
 236 * note: todo: log error handler
 237 */
 238int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 239	  struct tlock * tlck)
 240{
 241	int lsn;
 242	int diffp, difft;
 243	struct metapage *mp = NULL;
 244	unsigned long flags;
 245
 246	jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p",
 247		 log, tblk, lrd, tlck);
 248
 249	LOG_LOCK(log);
 250
 251	/* log by (out-of-transaction) JFS ? */
 252	if (tblk == NULL)
 253		goto writeRecord;
 254
 255	/* log from page ? */
 256	if (tlck == NULL ||
 257	    tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL)
 258		goto writeRecord;
 259
 260	/*
 261	 *	initialize/update page/transaction recovery lsn
 262	 */
 263	lsn = log->lsn;
 264
 265	LOGSYNC_LOCK(log, flags);
 266
 267	/*
 268	 * initialize page lsn if first log write of the page
 269	 */
 270	if (mp->lsn == 0) {
 271		mp->log = log;
 272		mp->lsn = lsn;
 273		log->count++;
 274
 275		/* insert page at tail of logsynclist */
 276		list_add_tail(&mp->synclist, &log->synclist);
 277	}
 278
 279	/*
 280	 *	initialize/update lsn of tblock of the page
 281	 *
 282	 * transaction inherits oldest lsn of pages associated
 283	 * with allocation/deallocation of resources (their
 284	 * log records are used to reconstruct allocation map
 285	 * at recovery time: inode for inode allocation map,
 286	 * B+-tree index of extent descriptors for block
 287	 * allocation map);
 288	 * allocation map pages inherit transaction lsn at
 289	 * commit time to allow forwarding log syncpt past log
 290	 * records associated with allocation/deallocation of
 291	 * resources only after persistent map of these map pages
 292	 * have been updated and propagated to home.
 293	 */
 294	/*
 295	 * initialize transaction lsn:
 296	 */
 297	if (tblk->lsn == 0) {
 298		/* inherit lsn of its first page logged */
 299		tblk->lsn = mp->lsn;
 300		log->count++;
 301
 302		/* insert tblock after the page on logsynclist */
 303		list_add(&tblk->synclist, &mp->synclist);
 304	}
 305	/*
 306	 * update transaction lsn:
 307	 */
 308	else {
 309		/* inherit oldest/smallest lsn of page */
 310		logdiff(diffp, mp->lsn, log);
 311		logdiff(difft, tblk->lsn, log);
 312		if (diffp < difft) {
 313			/* update tblock lsn with page lsn */
 314			tblk->lsn = mp->lsn;
 315
 316			/* move tblock after page on logsynclist */
 317			list_move(&tblk->synclist, &mp->synclist);
 318		}
 319	}
 320
 321	LOGSYNC_UNLOCK(log, flags);
 322
 323	/*
 324	 *	write the log record
 325	 */
 326      writeRecord:
 327	lsn = lmWriteRecord(log, tblk, lrd, tlck);
 328
 329	/*
 330	 * forward log syncpt if log reached next syncpt trigger
 331	 */
 332	logdiff(diffp, lsn, log);
 333	if (diffp >= log->nextsync)
 334		lsn = lmLogSync(log, 0);
 335
 336	/* update end-of-log lsn */
 337	log->lsn = lsn;
 338
 339	LOG_UNLOCK(log);
 340
 341	/* return end-of-log address */
 342	return lsn;
 343}
 344
 345/*
 346 * NAME:	lmWriteRecord()
 347 *
 348 * FUNCTION:	move the log record to current log page
 349 *
 350 * PARAMETER:	cd	- commit descriptor
 351 *
 352 * RETURN:	end-of-log address
 353 *
 354 * serialization: LOG_LOCK() held on entry/exit
 355 */
 356static int
 357lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 358	      struct tlock * tlck)
 359{
 360	int lsn = 0;		/* end-of-log address */
 361	struct lbuf *bp;	/* dst log page buffer */
 362	struct logpage *lp;	/* dst log page */
 363	caddr_t dst;		/* destination address in log page */
 364	int dstoffset;		/* end-of-log offset in log page */
 365	int freespace;		/* free space in log page */
 366	caddr_t p;		/* src meta-data page */
 367	caddr_t src;
 368	int srclen;
 369	int nbytes;		/* number of bytes to move */
 370	int i;
 371	int len;
 372	struct linelock *linelock;
 373	struct lv *lv;
 374	struct lvd *lvd;
 375	int l2linesize;
 376
 377	len = 0;
 378
 379	/* retrieve destination log page to write */
 380	bp = (struct lbuf *) log->bp;
 381	lp = (struct logpage *) bp->l_ldata;
 382	dstoffset = log->eor;
 383
 384	/* any log data to write ? */
 385	if (tlck == NULL)
 386		goto moveLrd;
 387
 388	/*
 389	 *	move log record data
 390	 */
 391	/* retrieve source meta-data page to log */
 392	if (tlck->flag & tlckPAGELOCK) {
 393		p = (caddr_t) (tlck->mp->data);
 394		linelock = (struct linelock *) & tlck->lock;
 395	}
 396	/* retrieve source in-memory inode to log */
 397	else if (tlck->flag & tlckINODELOCK) {
 398		if (tlck->type & tlckDTREE)
 399			p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot;
 400		else
 401			p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot;
 402		linelock = (struct linelock *) & tlck->lock;
 403	}
 404#ifdef	_JFS_WIP
 405	else if (tlck->flag & tlckINLINELOCK) {
 406
 407		inlinelock = (struct inlinelock *) & tlck;
 408		p = (caddr_t) & inlinelock->pxd;
 409		linelock = (struct linelock *) & tlck;
 410	}
 411#endif				/* _JFS_WIP */
 412	else {
 413		jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck);
 414		return 0;	/* Probably should trap */
 415	}
 416	l2linesize = linelock->l2linesize;
 417
 418      moveData:
 419	ASSERT(linelock->index <= linelock->maxcnt);
 420
 421	lv = linelock->lv;
 422	for (i = 0; i < linelock->index; i++, lv++) {
 423		if (lv->length == 0)
 424			continue;
 425
 426		/* is page full ? */
 427		if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) {
 428			/* page become full: move on to next page */
 429			lmNextPage(log);
 430
 431			bp = log->bp;
 432			lp = (struct logpage *) bp->l_ldata;
 433			dstoffset = LOGPHDRSIZE;
 434		}
 435
 436		/*
 437		 * move log vector data
 438		 */
 439		src = (u8 *) p + (lv->offset << l2linesize);
 440		srclen = lv->length << l2linesize;
 441		len += srclen;
 442		while (srclen > 0) {
 443			freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
 444			nbytes = min(freespace, srclen);
 445			dst = (caddr_t) lp + dstoffset;
 446			memcpy(dst, src, nbytes);
 447			dstoffset += nbytes;
 448
 449			/* is page not full ? */
 450			if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
 451				break;
 452
 453			/* page become full: move on to next page */
 454			lmNextPage(log);
 455
 456			bp = (struct lbuf *) log->bp;
 457			lp = (struct logpage *) bp->l_ldata;
 458			dstoffset = LOGPHDRSIZE;
 459
 460			srclen -= nbytes;
 461			src += nbytes;
 462		}
 463
 464		/*
 465		 * move log vector descriptor
 466		 */
 467		len += 4;
 468		lvd = (struct lvd *) ((caddr_t) lp + dstoffset);
 469		lvd->offset = cpu_to_le16(lv->offset);
 470		lvd->length = cpu_to_le16(lv->length);
 471		dstoffset += 4;
 472		jfs_info("lmWriteRecord: lv offset:%d length:%d",
 473			 lv->offset, lv->length);
 474	}
 475
 476	if ((i = linelock->next)) {
 477		linelock = (struct linelock *) lid_to_tlock(i);
 478		goto moveData;
 479	}
 480
 481	/*
 482	 *	move log record descriptor
 483	 */
 484      moveLrd:
 485	lrd->length = cpu_to_le16(len);
 486
 487	src = (caddr_t) lrd;
 488	srclen = LOGRDSIZE;
 489
 490	while (srclen > 0) {
 491		freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
 492		nbytes = min(freespace, srclen);
 493		dst = (caddr_t) lp + dstoffset;
 494		memcpy(dst, src, nbytes);
 495
 496		dstoffset += nbytes;
 497		srclen -= nbytes;
 498
 499		/* are there more to move than freespace of page ? */
 500		if (srclen)
 501			goto pageFull;
 502
 503		/*
 504		 * end of log record descriptor
 505		 */
 506
 507		/* update last log record eor */
 508		log->eor = dstoffset;
 509		bp->l_eor = dstoffset;
 510		lsn = (log->page << L2LOGPSIZE) + dstoffset;
 511
 512		if (lrd->type & cpu_to_le16(LOG_COMMIT)) {
 513			tblk->clsn = lsn;
 514			jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn,
 515				 bp->l_eor);
 516
 517			INCREMENT(lmStat.commit);	/* # of commit */
 518
 519			/*
 520			 * enqueue tblock for group commit:
 521			 *
 522			 * enqueue tblock of non-trivial/synchronous COMMIT
 523			 * at tail of group commit queue
 524			 * (trivial/asynchronous COMMITs are ignored by
 525			 * group commit.)
 526			 */
 527			LOGGC_LOCK(log);
 528
 529			/* init tblock gc state */
 530			tblk->flag = tblkGC_QUEUE;
 531			tblk->bp = log->bp;
 532			tblk->pn = log->page;
 533			tblk->eor = log->eor;
 534
 535			/* enqueue transaction to commit queue */
 536			list_add_tail(&tblk->cqueue, &log->cqueue);
 537
 538			LOGGC_UNLOCK(log);
 539		}
 540
 541		jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x",
 542			le16_to_cpu(lrd->type), log->bp, log->page, dstoffset);
 543
 544		/* page not full ? */
 545		if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
 546			return lsn;
 547
 548	      pageFull:
 549		/* page become full: move on to next page */
 550		lmNextPage(log);
 551
 552		bp = (struct lbuf *) log->bp;
 553		lp = (struct logpage *) bp->l_ldata;
 554		dstoffset = LOGPHDRSIZE;
 555		src += nbytes;
 556	}
 557
 558	return lsn;
 559}
 560
 561
 562/*
 563 * NAME:	lmNextPage()
 564 *
 565 * FUNCTION:	write current page and allocate next page.
 566 *
 567 * PARAMETER:	log
 568 *
 569 * RETURN:	0
 570 *
 571 * serialization: LOG_LOCK() held on entry/exit
 572 */
 573static int lmNextPage(struct jfs_log * log)
 574{
 575	struct logpage *lp;
 576	int lspn;		/* log sequence page number */
 577	int pn;			/* current page number */
 578	struct lbuf *bp;
 579	struct lbuf *nextbp;
 580	struct tblock *tblk;
 581
 582	/* get current log page number and log sequence page number */
 583	pn = log->page;
 584	bp = log->bp;
 585	lp = (struct logpage *) bp->l_ldata;
 586	lspn = le32_to_cpu(lp->h.page);
 587
 588	LOGGC_LOCK(log);
 589
 590	/*
 591	 *	write or queue the full page at the tail of write queue
 592	 */
 593	/* get the tail tblk on commit queue */
 594	if (list_empty(&log->cqueue))
 595		tblk = NULL;
 596	else
 597		tblk = list_entry(log->cqueue.prev, struct tblock, cqueue);
 598
 599	/* every tblk who has COMMIT record on the current page,
 600	 * and has not been committed, must be on commit queue
 601	 * since tblk is queued at commit queueu at the time
 602	 * of writing its COMMIT record on the page before
 603	 * page becomes full (even though the tblk thread
 604	 * who wrote COMMIT record may have been suspended
 605	 * currently);
 606	 */
 607
 608	/* is page bound with outstanding tail tblk ? */
 609	if (tblk && tblk->pn == pn) {
 610		/* mark tblk for end-of-page */
 611		tblk->flag |= tblkGC_EOP;
 612
 613		if (log->cflag & logGC_PAGEOUT) {
 614			/* if page is not already on write queue,
 615			 * just enqueue (no lbmWRITE to prevent redrive)
 616			 * buffer to wqueue to ensure correct serial order
 617			 * of the pages since log pages will be added
 618			 * continuously
 619			 */
 620			if (bp->l_wqnext == NULL)
 621				lbmWrite(log, bp, 0, 0);
 622		} else {
 623			/*
 624			 * No current GC leader, initiate group commit
 625			 */
 626			log->cflag |= logGC_PAGEOUT;
 627			lmGCwrite(log, 0);
 628		}
 629	}
 630	/* page is not bound with outstanding tblk:
 631	 * init write or mark it to be redriven (lbmWRITE)
 632	 */
 633	else {
 634		/* finalize the page */
 635		bp->l_ceor = bp->l_eor;
 636		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 637		lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0);
 638	}
 639	LOGGC_UNLOCK(log);
 640
 641	/*
 642	 *	allocate/initialize next page
 643	 */
 644	/* if log wraps, the first data page of log is 2
 645	 * (0 never used, 1 is superblock).
 646	 */
 647	log->page = (pn == log->size - 1) ? 2 : pn + 1;
 648	log->eor = LOGPHDRSIZE;	/* ? valid page empty/full at logRedo() */
 649
 650	/* allocate/initialize next log page buffer */
 651	nextbp = lbmAllocate(log, log->page);
 652	nextbp->l_eor = log->eor;
 653	log->bp = nextbp;
 654
 655	/* initialize next log page */
 656	lp = (struct logpage *) nextbp->l_ldata;
 657	lp->h.page = lp->t.page = cpu_to_le32(lspn + 1);
 658	lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
 659
 660	return 0;
 661}
 662
 663
 664/*
 665 * NAME:	lmGroupCommit()
 666 *
 667 * FUNCTION:	group commit
 668 *	initiate pageout of the pages with COMMIT in the order of
 669 *	page number - redrive pageout of the page at the head of
 670 *	pageout queue until full page has been written.
 671 *
 672 * RETURN:
 673 *
 674 * NOTE:
 675 *	LOGGC_LOCK serializes log group commit queue, and
 676 *	transaction blocks on the commit queue.
 677 *	N.B. LOG_LOCK is NOT held during lmGroupCommit().
 678 */
 679int lmGroupCommit(struct jfs_log * log, struct tblock * tblk)
 680{
 681	int rc = 0;
 682
 683	LOGGC_LOCK(log);
 684
 685	/* group committed already ? */
 686	if (tblk->flag & tblkGC_COMMITTED) {
 687		if (tblk->flag & tblkGC_ERROR)
 688			rc = -EIO;
 689
 690		LOGGC_UNLOCK(log);
 691		return rc;
 692	}
 693	jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc);
 694
 695	if (tblk->xflag & COMMIT_LAZY)
 696		tblk->flag |= tblkGC_LAZY;
 697
 698	if ((!(log->cflag & logGC_PAGEOUT)) && (!list_empty(&log->cqueue)) &&
 699	    (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag)
 700	     || jfs_tlocks_low)) {
 701		/*
 702		 * No pageout in progress
 703		 *
 704		 * start group commit as its group leader.
 705		 */
 706		log->cflag |= logGC_PAGEOUT;
 707
 708		lmGCwrite(log, 0);
 709	}
 710
 711	if (tblk->xflag & COMMIT_LAZY) {
 712		/*
 713		 * Lazy transactions can leave now
 714		 */
 715		LOGGC_UNLOCK(log);
 716		return 0;
 717	}
 718
 719	/* lmGCwrite gives up LOGGC_LOCK, check again */
 720
 721	if (tblk->flag & tblkGC_COMMITTED) {
 722		if (tblk->flag & tblkGC_ERROR)
 723			rc = -EIO;
 724
 725		LOGGC_UNLOCK(log);
 726		return rc;
 727	}
 728
 729	/* upcount transaction waiting for completion
 730	 */
 731	log->gcrtc++;
 732	tblk->flag |= tblkGC_READY;
 733
 734	__SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED),
 735		     LOGGC_LOCK(log), LOGGC_UNLOCK(log));
 736
 737	/* removed from commit queue */
 738	if (tblk->flag & tblkGC_ERROR)
 739		rc = -EIO;
 740
 741	LOGGC_UNLOCK(log);
 742	return rc;
 743}
 744
 745/*
 746 * NAME:	lmGCwrite()
 747 *
 748 * FUNCTION:	group commit write
 749 *	initiate write of log page, building a group of all transactions
 750 *	with commit records on that page.
 751 *
 752 * RETURN:	None
 753 *
 754 * NOTE:
 755 *	LOGGC_LOCK must be held by caller.
 756 *	N.B. LOG_LOCK is NOT held during lmGroupCommit().
 757 */
 758static void lmGCwrite(struct jfs_log * log, int cant_write)
 759{
 760	struct lbuf *bp;
 761	struct logpage *lp;
 762	int gcpn;		/* group commit page number */
 763	struct tblock *tblk;
 764	struct tblock *xtblk = NULL;
 765
 766	/*
 767	 * build the commit group of a log page
 768	 *
 769	 * scan commit queue and make a commit group of all
 770	 * transactions with COMMIT records on the same log page.
 771	 */
 772	/* get the head tblk on the commit queue */
 773	gcpn = list_entry(log->cqueue.next, struct tblock, cqueue)->pn;
 774
 775	list_for_each_entry(tblk, &log->cqueue, cqueue) {
 776		if (tblk->pn != gcpn)
 777			break;
 778
 779		xtblk = tblk;
 780
 781		/* state transition: (QUEUE, READY) -> COMMIT */
 782		tblk->flag |= tblkGC_COMMIT;
 783	}
 784	tblk = xtblk;		/* last tblk of the page */
 785
 786	/*
 787	 * pageout to commit transactions on the log page.
 788	 */
 789	bp = (struct lbuf *) tblk->bp;
 790	lp = (struct logpage *) bp->l_ldata;
 791	/* is page already full ? */
 792	if (tblk->flag & tblkGC_EOP) {
 793		/* mark page to free at end of group commit of the page */
 794		tblk->flag &= ~tblkGC_EOP;
 795		tblk->flag |= tblkGC_FREE;
 796		bp->l_ceor = bp->l_eor;
 797		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 798		lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC,
 799			 cant_write);
 800		INCREMENT(lmStat.full_page);
 801	}
 802	/* page is not yet full */
 803	else {
 804		bp->l_ceor = tblk->eor;	/* ? bp->l_ceor = bp->l_eor; */
 805		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 806		lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write);
 807		INCREMENT(lmStat.partial_page);
 808	}
 809}
 810
 811/*
 812 * NAME:	lmPostGC()
 813 *
 814 * FUNCTION:	group commit post-processing
 815 *	Processes transactions after their commit records have been written
 816 *	to disk, redriving log I/O if necessary.
 817 *
 818 * RETURN:	None
 819 *
 820 * NOTE:
 821 *	This routine is called a interrupt time by lbmIODone
 822 */
 823static void lmPostGC(struct lbuf * bp)
 824{
 825	unsigned long flags;
 826	struct jfs_log *log = bp->l_log;
 827	struct logpage *lp;
 828	struct tblock *tblk, *temp;
 829
 830	//LOGGC_LOCK(log);
 831	spin_lock_irqsave(&log->gclock, flags);
 832	/*
 833	 * current pageout of group commit completed.
 834	 *
 835	 * remove/wakeup transactions from commit queue who were
 836	 * group committed with the current log page
 837	 */
 838	list_for_each_entry_safe(tblk, temp, &log->cqueue, cqueue) {
 839		if (!(tblk->flag & tblkGC_COMMIT))
 840			break;
 841		/* if transaction was marked GC_COMMIT then
 842		 * it has been shipped in the current pageout
 843		 * and made it to disk - it is committed.
 844		 */
 845
 846		if (bp->l_flag & lbmERROR)
 847			tblk->flag |= tblkGC_ERROR;
 848
 849		/* remove it from the commit queue */
 850		list_del(&tblk->cqueue);
 851		tblk->flag &= ~tblkGC_QUEUE;
 852
 853		if (tblk == log->flush_tblk) {
 854			/* we can stop flushing the log now */
 855			clear_bit(log_FLUSH, &log->flag);
 856			log->flush_tblk = NULL;
 857		}
 858
 859		jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk,
 860			 tblk->flag);
 861
 862		if (!(tblk->xflag & COMMIT_FORCE))
 863			/*
 864			 * Hand tblk over to lazy commit thread
 865			 */
 866			txLazyUnlock(tblk);
 867		else {
 868			/* state transition: COMMIT -> COMMITTED */
 869			tblk->flag |= tblkGC_COMMITTED;
 870
 871			if (tblk->flag & tblkGC_READY)
 872				log->gcrtc--;
 873
 874			LOGGC_WAKEUP(tblk);
 875		}
 876
 877		/* was page full before pageout ?
 878		 * (and this is the last tblk bound with the page)
 879		 */
 880		if (tblk->flag & tblkGC_FREE)
 881			lbmFree(bp);
 882		/* did page become full after pageout ?
 883		 * (and this is the last tblk bound with the page)
 884		 */
 885		else if (tblk->flag & tblkGC_EOP) {
 886			/* finalize the page */
 887			lp = (struct logpage *) bp->l_ldata;
 888			bp->l_ceor = bp->l_eor;
 889			lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
 890			jfs_info("lmPostGC: calling lbmWrite");
 891			lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE,
 892				 1);
 893		}
 894
 895	}
 896
 897	/* are there any transactions who have entered lnGroupCommit()
 898	 * (whose COMMITs are after that of the last log page written.
 899	 * They are waiting for new group commit (above at (SLEEP 1))
 900	 * or lazy transactions are on a full (queued) log page,
 901	 * select the latest ready transaction as new group leader and
 902	 * wake her up to lead her group.
 903	 */
 904	if ((!list_empty(&log->cqueue)) &&
 905	    ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) ||
 906	     test_bit(log_FLUSH, &log->flag) || jfs_tlocks_low))
 907		/*
 908		 * Call lmGCwrite with new group leader
 909		 */
 910		lmGCwrite(log, 1);
 911
 912	/* no transaction are ready yet (transactions are only just
 913	 * queued (GC_QUEUE) and not entered for group commit yet).
 914	 * the first transaction entering group commit
 915	 * will elect herself as new group leader.
 916	 */
 917	else
 918		log->cflag &= ~logGC_PAGEOUT;
 919
 920	//LOGGC_UNLOCK(log);
 921	spin_unlock_irqrestore(&log->gclock, flags);
 922	return;
 923}
 924
 925/*
 926 * NAME:	lmLogSync()
 927 *
 928 * FUNCTION:	write log SYNCPT record for specified log
 929 *	if new sync address is available
 930 *	(normally the case if sync() is executed by back-ground
 931 *	process).
 932 *	calculate new value of i_nextsync which determines when
 933 *	this code is called again.
 934 *
 935 * PARAMETERS:	log	- log structure
 936 *		hard_sync - 1 to force all metadata to be written
 937 *
 938 * RETURN:	0
 939 *
 940 * serialization: LOG_LOCK() held on entry/exit
 941 */
 942static int lmLogSync(struct jfs_log * log, int hard_sync)
 943{
 944	int logsize;
 945	int written;		/* written since last syncpt */
 946	int free;		/* free space left available */
 947	int delta;		/* additional delta to write normally */
 948	int more;		/* additional write granted */
 949	struct lrd lrd;
 950	int lsn;
 951	struct logsyncblk *lp;
 952	unsigned long flags;
 953
 954	/* push dirty metapages out to disk */
 955	if (hard_sync)
 956		write_special_inodes(log, filemap_fdatawrite);
 957	else
 958		write_special_inodes(log, filemap_flush);
 959
 960	/*
 961	 *	forward syncpt
 962	 */
 963	/* if last sync is same as last syncpt,
 964	 * invoke sync point forward processing to update sync.
 965	 */
 966
 967	if (log->sync == log->syncpt) {
 968		LOGSYNC_LOCK(log, flags);
 969		if (list_empty(&log->synclist))
 970			log->sync = log->lsn;
 971		else {
 972			lp = list_entry(log->synclist.next,
 973					struct logsyncblk, synclist);
 974			log->sync = lp->lsn;
 975		}
 976		LOGSYNC_UNLOCK(log, flags);
 977
 978	}
 979
 980	/* if sync is different from last syncpt,
 981	 * write a SYNCPT record with syncpt = sync.
 982	 * reset syncpt = sync
 983	 */
 984	if (log->sync != log->syncpt) {
 985		lrd.logtid = 0;
 986		lrd.backchain = 0;
 987		lrd.type = cpu_to_le16(LOG_SYNCPT);
 988		lrd.length = 0;
 989		lrd.log.syncpt.sync = cpu_to_le32(log->sync);
 990		lsn = lmWriteRecord(log, NULL, &lrd, NULL);
 991
 992		log->syncpt = log->sync;
 993	} else
 994		lsn = log->lsn;
 995
 996	/*
 997	 *	setup next syncpt trigger (SWAG)
 998	 */
 999	logsize = log->logsize;
1000
1001	logdiff(written, lsn, log);
1002	free = logsize - written;
1003	delta = LOGSYNC_DELTA(logsize);
1004	more = min(free / 2, delta);
1005	if (more < 2 * LOGPSIZE) {
1006		jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
1007		/*
1008		 *	log wrapping
1009		 *
1010		 * option 1 - panic ? No.!
1011		 * option 2 - shutdown file systems
1012		 *	      associated with log ?
1013		 * option 3 - extend log ?
1014		 * option 4 - second chance
1015		 *
1016		 * mark log wrapped, and continue.
1017		 * when all active transactions are completed,
1018		 * mark log valid for recovery.
1019		 * if crashed during invalid state, log state
1020		 * implies invalid log, forcing fsck().
1021		 */
1022		/* mark log state log wrap in log superblock */
1023		/* log->state = LOGWRAP; */
1024
1025		/* reset sync point computation */
1026		log->syncpt = log->sync = lsn;
1027		log->nextsync = delta;
1028	} else
1029		/* next syncpt trigger = written + more */
1030		log->nextsync = written + more;
1031
1032	/* if number of bytes written from last sync point is more
1033	 * than 1/4 of the log size, stop new transactions from
1034	 * starting until all current transactions are completed
1035	 * by setting syncbarrier flag.
1036	 */
1037	if (!test_bit(log_SYNCBARRIER, &log->flag) &&
1038	    (written > LOGSYNC_BARRIER(logsize)) && log->active) {
1039		set_bit(log_SYNCBARRIER, &log->flag);
1040		jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn,
1041			 log->syncpt);
1042		/*
1043		 * We may have to initiate group commit
1044		 */
1045		jfs_flush_journal(log, 0);
1046	}
1047
1048	return lsn;
1049}
1050
1051/*
1052 * NAME:	jfs_syncpt
1053 *
1054 * FUNCTION:	write log SYNCPT record for specified log
1055 *
1056 * PARAMETERS:	log	  - log structure
1057 *		hard_sync - set to 1 to force metadata to be written
1058 */
1059void jfs_syncpt(struct jfs_log *log, int hard_sync)
1060{	LOG_LOCK(log);
1061	if (!test_bit(log_QUIESCE, &log->flag))
1062		lmLogSync(log, hard_sync);
1063	LOG_UNLOCK(log);
1064}
1065
1066/*
1067 * NAME:	lmLogOpen()
1068 *
1069 * FUNCTION:	open the log on first open;
1070 *	insert filesystem in the active list of the log.
1071 *
1072 * PARAMETER:	ipmnt	- file system mount inode
1073 *		iplog	- log inode (out)
1074 *
1075 * RETURN:
1076 *
1077 * serialization:
1078 */
1079int lmLogOpen(struct super_block *sb)
1080{
1081	int rc;
1082	struct block_device *bdev;
1083	struct jfs_log *log;
1084	struct jfs_sb_info *sbi = JFS_SBI(sb);
1085
1086	if (sbi->flag & JFS_NOINTEGRITY)
1087		return open_dummy_log(sb);
1088
1089	if (sbi->mntflag & JFS_INLINELOG)
1090		return open_inline_log(sb);
1091
1092	mutex_lock(&jfs_log_mutex);
1093	list_for_each_entry(log, &jfs_external_logs, journal_list) {
1094		if (log->bdev->bd_dev == sbi->logdev) {
1095			if (memcmp(log->uuid, sbi->loguuid,
1096				   sizeof(log->uuid))) {
1097				jfs_warn("wrong uuid on JFS journal");
1098				mutex_unlock(&jfs_log_mutex);
1099				return -EINVAL;
1100			}
1101			/*
1102			 * add file system to log active file system list
1103			 */
1104			if ((rc = lmLogFileSystem(log, sbi, 1))) {
1105				mutex_unlock(&jfs_log_mutex);
1106				return rc;
1107			}
1108			goto journal_found;
1109		}
1110	}
1111
1112	if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL))) {
1113		mutex_unlock(&jfs_log_mutex);
1114		return -ENOMEM;
1115	}
1116	INIT_LIST_HEAD(&log->sb_list);
1117	init_waitqueue_head(&log->syncwait);
1118
1119	/*
1120	 *	external log as separate logical volume
1121	 *
1122	 * file systems to log may have n-to-1 relationship;
1123	 */
1124
1125	bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
1126				 log);
1127	if (IS_ERR(bdev)) {
1128		rc = PTR_ERR(bdev);
1129		goto free;
1130	}
1131
1132	log->bdev = bdev;
1133	memcpy(log->uuid, sbi->loguuid, sizeof(log->uuid));
1134
1135	/*
1136	 * initialize log:
1137	 */
1138	if ((rc = lmLogInit(log)))
1139		goto close;
1140
1141	list_add(&log->journal_list, &jfs_external_logs);
1142
1143	/*
1144	 * add file system to log active file system list
1145	 */
1146	if ((rc = lmLogFileSystem(log, sbi, 1)))
1147		goto shutdown;
1148
1149journal_found:
1150	LOG_LOCK(log);
1151	list_add(&sbi->log_list, &log->sb_list);
1152	sbi->log = log;
1153	LOG_UNLOCK(log);
1154
1155	mutex_unlock(&jfs_log_mutex);
1156	return 0;
1157
1158	/*
1159	 *	unwind on error
1160	 */
1161      shutdown:		/* unwind lbmLogInit() */
1162	list_del(&log->journal_list);
1163	lbmLogShutdown(log);
1164
1165      close:		/* close external log device */
1166	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1167
1168      free:		/* free log descriptor */
1169	mutex_unlock(&jfs_log_mutex);
1170	kfree(log);
1171
1172	jfs_warn("lmLogOpen: exit(%d)", rc);
1173	return rc;
1174}
1175
1176static int open_inline_log(struct super_block *sb)
1177{
1178	struct jfs_log *log;
1179	int rc;
1180
1181	if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL)))
1182		return -ENOMEM;
1183	INIT_LIST_HEAD(&log->sb_list);
1184	init_waitqueue_head(&log->syncwait);
1185
1186	set_bit(log_INLINELOG, &log->flag);
1187	log->bdev = sb->s_bdev;
1188	log->base = addressPXD(&JFS_SBI(sb)->logpxd);
1189	log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
1190	    (L2LOGPSIZE - sb->s_blocksize_bits);
1191	log->l2bsize = sb->s_blocksize_bits;
1192	ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits);
1193
1194	/*
1195	 * initialize log.
1196	 */
1197	if ((rc = lmLogInit(log))) {
1198		kfree(log);
1199		jfs_warn("lmLogOpen: exit(%d)", rc);
1200		return rc;
1201	}
1202
1203	list_add(&JFS_SBI(sb)->log_list, &log->sb_list);
1204	JFS_SBI(sb)->log = log;
1205
1206	return rc;
1207}
1208
1209static int open_dummy_log(struct super_block *sb)
1210{
1211	int rc;
1212
1213	mutex_lock(&jfs_log_mutex);
1214	if (!dummy_log) {
1215		dummy_log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL);
1216		if (!dummy_log) {
1217			mutex_unlock(&jfs_log_mutex);
1218			return -ENOMEM;
1219		}
1220		INIT_LIST_HEAD(&dummy_log->sb_list);
1221		init_waitqueue_head(&dummy_log->syncwait);
1222		dummy_log->no_integrity = 1;
1223		/* Make up some stuff */
1224		dummy_log->base = 0;
1225		dummy_log->size = 1024;
1226		rc = lmLogInit(dummy_log);
1227		if (rc) {
1228			kfree(dummy_log);
1229			dummy_log = NULL;
1230			mutex_unlock(&jfs_log_mutex);
1231			return rc;
1232		}
1233	}
1234
1235	LOG_LOCK(dummy_log);
1236	list_add(&JFS_SBI(sb)->log_list, &dummy_log->sb_list);
1237	JFS_SBI(sb)->log = dummy_log;
1238	LOG_UNLOCK(dummy_log);
1239	mutex_unlock(&jfs_log_mutex);
1240
1241	return 0;
1242}
1243
1244/*
1245 * NAME:	lmLogInit()
1246 *
1247 * FUNCTION:	log initialization at first log open.
1248 *
1249 *	logredo() (or logformat()) should have been run previously.
1250 *	initialize the log from log superblock.
1251 *	set the log state in the superblock to LOGMOUNT and
1252 *	write SYNCPT log record.
1253 *
1254 * PARAMETER:	log	- log structure
1255 *
1256 * RETURN:	0	- if ok
1257 *		-EINVAL	- bad log magic number or superblock dirty
1258 *		error returned from logwait()
1259 *
1260 * serialization: single first open thread
1261 */
1262int lmLogInit(struct jfs_log * log)
1263{
1264	int rc = 0;
1265	struct lrd lrd;
1266	struct logsuper *logsuper;
1267	struct lbuf *bpsuper;
1268	struct lbuf *bp;
1269	struct logpage *lp;
1270	int lsn = 0;
1271
1272	jfs_info("lmLogInit: log:0x%p", log);
1273
1274	/* initialize the group commit serialization lock */
1275	LOGGC_LOCK_INIT(log);
1276
1277	/* allocate/initialize the log write serialization lock */
1278	LOG_LOCK_INIT(log);
1279
1280	LOGSYNC_LOCK_INIT(log);
1281
1282	INIT_LIST_HEAD(&log->synclist);
1283
1284	INIT_LIST_HEAD(&log->cqueue);
1285	log->flush_tblk = NULL;
1286
1287	log->count = 0;
1288
1289	/*
1290	 * initialize log i/o
1291	 */
1292	if ((rc = lbmLogInit(log)))
1293		return rc;
1294
1295	if (!test_bit(log_INLINELOG, &log->flag))
1296		log->l2bsize = L2LOGPSIZE;
1297
1298	/* check for disabled journaling to disk */
1299	if (log->no_integrity) {
1300		/*
1301		 * Journal pages will still be filled.  When the time comes
1302		 * to actually do the I/O, the write is not done, and the
1303		 * endio routine is called directly.
1304		 */
1305		bp = lbmAllocate(log , 0);
1306		log->bp = bp;
1307		bp->l_pn = bp->l_eor = 0;
1308	} else {
1309		/*
1310		 * validate log superblock
1311		 */
1312		if ((rc = lbmRead(log, 1, &bpsuper)))
1313			goto errout10;
1314
1315		logsuper = (struct logsuper *) bpsuper->l_ldata;
1316
1317		if (logsuper->magic != cpu_to_le32(LOGMAGIC)) {
1318			jfs_warn("*** Log Format Error ! ***");
1319			rc = -EINVAL;
1320			goto errout20;
1321		}
1322
1323		/* logredo() should have been run successfully. */
1324		if (logsuper->state != cpu_to_le32(LOGREDONE)) {
1325			jfs_warn("*** Log Is Dirty ! ***");
1326			rc = -EINVAL;
1327			goto errout20;
1328		}
1329
1330		/* initialize log from log superblock */
1331		if (test_bit(log_INLINELOG,&log->flag)) {
1332			if (log->size != le32_to_cpu(logsuper->size)) {
1333				rc = -EINVAL;
1334				goto errout20;
1335			}
1336			jfs_info("lmLogInit: inline log:0x%p base:0x%Lx size:0x%x",
1337				 log, (unsigned long long)log->base, log->size);
1338		} else {
1339			if (memcmp(logsuper->uuid, log->uuid, 16)) {
1340				jfs_warn("wrong uuid on JFS log device");
 
1341				goto errout20;
1342			}
1343			log->size = le32_to_cpu(logsuper->size);
1344			log->l2bsize = le32_to_cpu(logsuper->l2bsize);
1345			jfs_info("lmLogInit: external log:0x%p base:0x%Lx size:0x%x",
1346				 log, (unsigned long long)log->base, log->size);
1347		}
1348
1349		log->page = le32_to_cpu(logsuper->end) / LOGPSIZE;
1350		log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page);
1351
1352		/*
1353		 * initialize for log append write mode
1354		 */
1355		/* establish current/end-of-log page/buffer */
1356		if ((rc = lbmRead(log, log->page, &bp)))
1357			goto errout20;
1358
1359		lp = (struct logpage *) bp->l_ldata;
1360
1361		jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d",
1362			 le32_to_cpu(logsuper->end), log->page, log->eor,
1363			 le16_to_cpu(lp->h.eor));
1364
1365		log->bp = bp;
1366		bp->l_pn = log->page;
1367		bp->l_eor = log->eor;
1368
1369		/* if current page is full, move on to next page */
1370		if (log->eor >= LOGPSIZE - LOGPTLRSIZE)
1371			lmNextPage(log);
1372
1373		/*
1374		 * initialize log syncpoint
1375		 */
1376		/*
1377		 * write the first SYNCPT record with syncpoint = 0
1378		 * (i.e., log redo up to HERE !);
1379		 * remove current page from lbm write queue at end of pageout
1380		 * (to write log superblock update), but do not release to
1381		 * freelist;
1382		 */
1383		lrd.logtid = 0;
1384		lrd.backchain = 0;
1385		lrd.type = cpu_to_le16(LOG_SYNCPT);
1386		lrd.length = 0;
1387		lrd.log.syncpt.sync = 0;
1388		lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1389		bp = log->bp;
1390		bp->l_ceor = bp->l_eor;
1391		lp = (struct logpage *) bp->l_ldata;
1392		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1393		lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0);
1394		if ((rc = lbmIOWait(bp, 0)))
1395			goto errout30;
1396
1397		/*
1398		 * update/write superblock
1399		 */
1400		logsuper->state = cpu_to_le32(LOGMOUNT);
1401		log->serial = le32_to_cpu(logsuper->serial) + 1;
1402		logsuper->serial = cpu_to_le32(log->serial);
1403		lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1404		if ((rc = lbmIOWait(bpsuper, lbmFREE)))
1405			goto errout30;
1406	}
1407
1408	/* initialize logsync parameters */
1409	log->logsize = (log->size - 2) << L2LOGPSIZE;
1410	log->lsn = lsn;
1411	log->syncpt = lsn;
1412	log->sync = log->syncpt;
1413	log->nextsync = LOGSYNC_DELTA(log->logsize);
1414
1415	jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x",
1416		 log->lsn, log->syncpt, log->sync);
1417
1418	/*
1419	 * initialize for lazy/group commit
1420	 */
1421	log->clsn = lsn;
1422
1423	return 0;
1424
1425	/*
1426	 *	unwind on error
1427	 */
1428      errout30:		/* release log page */
1429	log->wqueue = NULL;
1430	bp->l_wqnext = NULL;
1431	lbmFree(bp);
1432
1433      errout20:		/* release log superblock */
1434	lbmFree(bpsuper);
1435
1436      errout10:		/* unwind lbmLogInit() */
1437	lbmLogShutdown(log);
1438
1439	jfs_warn("lmLogInit: exit(%d)", rc);
1440	return rc;
1441}
1442
1443
1444/*
1445 * NAME:	lmLogClose()
1446 *
1447 * FUNCTION:	remove file system <ipmnt> from active list of log <iplog>
1448 *		and close it on last close.
1449 *
1450 * PARAMETER:	sb	- superblock
1451 *
1452 * RETURN:	errors from subroutines
1453 *
1454 * serialization:
1455 */
1456int lmLogClose(struct super_block *sb)
1457{
1458	struct jfs_sb_info *sbi = JFS_SBI(sb);
1459	struct jfs_log *log = sbi->log;
1460	struct block_device *bdev;
1461	int rc = 0;
1462
1463	jfs_info("lmLogClose: log:0x%p", log);
1464
1465	mutex_lock(&jfs_log_mutex);
1466	LOG_LOCK(log);
1467	list_del(&sbi->log_list);
1468	LOG_UNLOCK(log);
1469	sbi->log = NULL;
1470
1471	/*
1472	 * We need to make sure all of the "written" metapages
1473	 * actually make it to disk
1474	 */
1475	sync_blockdev(sb->s_bdev);
1476
1477	if (test_bit(log_INLINELOG, &log->flag)) {
1478		/*
1479		 *	in-line log in host file system
1480		 */
1481		rc = lmLogShutdown(log);
1482		kfree(log);
1483		goto out;
1484	}
1485
1486	if (!log->no_integrity)
1487		lmLogFileSystem(log, sbi, 0);
1488
1489	if (!list_empty(&log->sb_list))
1490		goto out;
1491
1492	/*
1493	 * TODO: ensure that the dummy_log is in a state to allow
1494	 * lbmLogShutdown to deallocate all the buffers and call
1495	 * kfree against dummy_log.  For now, leave dummy_log & its
1496	 * buffers in memory, and resuse if another no-integrity mount
1497	 * is requested.
1498	 */
1499	if (log->no_integrity)
1500		goto out;
1501
1502	/*
1503	 *	external log as separate logical volume
1504	 */
1505	list_del(&log->journal_list);
1506	bdev = log->bdev;
1507	rc = lmLogShutdown(log);
1508
1509	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1510
1511	kfree(log);
1512
1513      out:
1514	mutex_unlock(&jfs_log_mutex);
1515	jfs_info("lmLogClose: exit(%d)", rc);
1516	return rc;
1517}
1518
1519
1520/*
1521 * NAME:	jfs_flush_journal()
1522 *
1523 * FUNCTION:	initiate write of any outstanding transactions to the journal
1524 *		and optionally wait until they are all written to disk
1525 *
1526 *		wait == 0  flush until latest txn is committed, don't wait
1527 *		wait == 1  flush until latest txn is committed, wait
1528 *		wait > 1   flush until all txn's are complete, wait
1529 */
1530void jfs_flush_journal(struct jfs_log *log, int wait)
1531{
1532	int i;
1533	struct tblock *target = NULL;
1534
1535	/* jfs_write_inode may call us during read-only mount */
1536	if (!log)
1537		return;
1538
1539	jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait);
1540
1541	LOGGC_LOCK(log);
1542
1543	if (!list_empty(&log->cqueue)) {
1544		/*
1545		 * This ensures that we will keep writing to the journal as long
1546		 * as there are unwritten commit records
1547		 */
1548		target = list_entry(log->cqueue.prev, struct tblock, cqueue);
1549
1550		if (test_bit(log_FLUSH, &log->flag)) {
1551			/*
1552			 * We're already flushing.
1553			 * if flush_tblk is NULL, we are flushing everything,
1554			 * so leave it that way.  Otherwise, update it to the
1555			 * latest transaction
1556			 */
1557			if (log->flush_tblk)
1558				log->flush_tblk = target;
1559		} else {
1560			/* Only flush until latest transaction is committed */
1561			log->flush_tblk = target;
1562			set_bit(log_FLUSH, &log->flag);
1563
1564			/*
1565			 * Initiate I/O on outstanding transactions
1566			 */
1567			if (!(log->cflag & logGC_PAGEOUT)) {
1568				log->cflag |= logGC_PAGEOUT;
1569				lmGCwrite(log, 0);
1570			}
1571		}
1572	}
1573	if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) {
1574		/* Flush until all activity complete */
1575		set_bit(log_FLUSH, &log->flag);
1576		log->flush_tblk = NULL;
1577	}
1578
1579	if (wait && target && !(target->flag & tblkGC_COMMITTED)) {
1580		DECLARE_WAITQUEUE(__wait, current);
1581
1582		add_wait_queue(&target->gcwait, &__wait);
1583		set_current_state(TASK_UNINTERRUPTIBLE);
1584		LOGGC_UNLOCK(log);
1585		schedule();
1586		LOGGC_LOCK(log);
1587		remove_wait_queue(&target->gcwait, &__wait);
1588	}
1589	LOGGC_UNLOCK(log);
1590
1591	if (wait < 2)
1592		return;
1593
1594	write_special_inodes(log, filemap_fdatawrite);
1595
1596	/*
1597	 * If there was recent activity, we may need to wait
1598	 * for the lazycommit thread to catch up
1599	 */
1600	if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) {
1601		for (i = 0; i < 200; i++) {	/* Too much? */
1602			msleep(250);
1603			write_special_inodes(log, filemap_fdatawrite);
1604			if (list_empty(&log->cqueue) &&
1605			    list_empty(&log->synclist))
1606				break;
1607		}
1608	}
1609	assert(list_empty(&log->cqueue));
1610
1611#ifdef CONFIG_JFS_DEBUG
1612	if (!list_empty(&log->synclist)) {
1613		struct logsyncblk *lp;
1614
1615		printk(KERN_ERR "jfs_flush_journal: synclist not empty\n");
1616		list_for_each_entry(lp, &log->synclist, synclist) {
1617			if (lp->xflag & COMMIT_PAGE) {
1618				struct metapage *mp = (struct metapage *)lp;
1619				print_hex_dump(KERN_ERR, "metapage: ",
1620					       DUMP_PREFIX_ADDRESS, 16, 4,
1621					       mp, sizeof(struct metapage), 0);
1622				print_hex_dump(KERN_ERR, "page: ",
1623					       DUMP_PREFIX_ADDRESS, 16,
1624					       sizeof(long), mp->page,
1625					       sizeof(struct page), 0);
1626			} else
1627				print_hex_dump(KERN_ERR, "tblock:",
1628					       DUMP_PREFIX_ADDRESS, 16, 4,
1629					       lp, sizeof(struct tblock), 0);
1630		}
1631	}
1632#else
1633	WARN_ON(!list_empty(&log->synclist));
1634#endif
1635	clear_bit(log_FLUSH, &log->flag);
1636}
1637
1638/*
1639 * NAME:	lmLogShutdown()
1640 *
1641 * FUNCTION:	log shutdown at last LogClose().
1642 *
1643 *		write log syncpt record.
1644 *		update super block to set redone flag to 0.
1645 *
1646 * PARAMETER:	log	- log inode
1647 *
1648 * RETURN:	0	- success
1649 *
1650 * serialization: single last close thread
1651 */
1652int lmLogShutdown(struct jfs_log * log)
1653{
1654	int rc;
1655	struct lrd lrd;
1656	int lsn;
1657	struct logsuper *logsuper;
1658	struct lbuf *bpsuper;
1659	struct lbuf *bp;
1660	struct logpage *lp;
1661
1662	jfs_info("lmLogShutdown: log:0x%p", log);
1663
1664	jfs_flush_journal(log, 2);
1665
1666	/*
1667	 * write the last SYNCPT record with syncpoint = 0
1668	 * (i.e., log redo up to HERE !)
1669	 */
1670	lrd.logtid = 0;
1671	lrd.backchain = 0;
1672	lrd.type = cpu_to_le16(LOG_SYNCPT);
1673	lrd.length = 0;
1674	lrd.log.syncpt.sync = 0;
1675
1676	lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1677	bp = log->bp;
1678	lp = (struct logpage *) bp->l_ldata;
1679	lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1680	lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0);
1681	lbmIOWait(log->bp, lbmFREE);
1682	log->bp = NULL;
1683
1684	/*
1685	 * synchronous update log superblock
1686	 * mark log state as shutdown cleanly
1687	 * (i.e., Log does not need to be replayed).
1688	 */
1689	if ((rc = lbmRead(log, 1, &bpsuper)))
1690		goto out;
1691
1692	logsuper = (struct logsuper *) bpsuper->l_ldata;
1693	logsuper->state = cpu_to_le32(LOGREDONE);
1694	logsuper->end = cpu_to_le32(lsn);
1695	lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1696	rc = lbmIOWait(bpsuper, lbmFREE);
1697
1698	jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d",
1699		 lsn, log->page, log->eor);
1700
1701      out:
1702	/*
1703	 * shutdown per log i/o
1704	 */
1705	lbmLogShutdown(log);
1706
1707	if (rc) {
1708		jfs_warn("lmLogShutdown: exit(%d)", rc);
1709	}
1710	return rc;
1711}
1712
1713
1714/*
1715 * NAME:	lmLogFileSystem()
1716 *
1717 * FUNCTION:	insert (<activate> = true)/remove (<activate> = false)
1718 *	file system into/from log active file system list.
1719 *
1720 * PARAMETE:	log	- pointer to logs inode.
1721 *		fsdev	- kdev_t of filesystem.
1722 *		serial	- pointer to returned log serial number
1723 *		activate - insert/remove device from active list.
1724 *
1725 * RETURN:	0	- success
1726 *		errors returned by vms_iowait().
1727 */
1728static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
1729			   int activate)
1730{
1731	int rc = 0;
1732	int i;
1733	struct logsuper *logsuper;
1734	struct lbuf *bpsuper;
1735	char *uuid = sbi->uuid;
1736
1737	/*
1738	 * insert/remove file system device to log active file system list.
1739	 */
1740	if ((rc = lbmRead(log, 1, &bpsuper)))
1741		return rc;
1742
1743	logsuper = (struct logsuper *) bpsuper->l_ldata;
1744	if (activate) {
1745		for (i = 0; i < MAX_ACTIVE; i++)
1746			if (!memcmp(logsuper->active[i].uuid, NULL_UUID, 16)) {
1747				memcpy(logsuper->active[i].uuid, uuid, 16);
1748				sbi->aggregate = i;
1749				break;
1750			}
1751		if (i == MAX_ACTIVE) {
1752			jfs_warn("Too many file systems sharing journal!");
1753			lbmFree(bpsuper);
1754			return -EMFILE;	/* Is there a better rc? */
1755		}
1756	} else {
1757		for (i = 0; i < MAX_ACTIVE; i++)
1758			if (!memcmp(logsuper->active[i].uuid, uuid, 16)) {
1759				memcpy(logsuper->active[i].uuid, NULL_UUID, 16);
 
1760				break;
1761			}
1762		if (i == MAX_ACTIVE) {
1763			jfs_warn("Somebody stomped on the journal!");
1764			lbmFree(bpsuper);
1765			return -EIO;
1766		}
1767
1768	}
1769
1770	/*
1771	 * synchronous write log superblock:
1772	 *
1773	 * write sidestream bypassing write queue:
1774	 * at file system mount, log super block is updated for
1775	 * activation of the file system before any log record
1776	 * (MOUNT record) of the file system, and at file system
1777	 * unmount, all meta data for the file system has been
1778	 * flushed before log super block is updated for deactivation
1779	 * of the file system.
1780	 */
1781	lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1782	rc = lbmIOWait(bpsuper, lbmFREE);
1783
1784	return rc;
1785}
1786
1787/*
1788 *		log buffer manager (lbm)
1789 *		------------------------
1790 *
1791 * special purpose buffer manager supporting log i/o requirements.
1792 *
1793 * per log write queue:
1794 * log pageout occurs in serial order by fifo write queue and
1795 * restricting to a single i/o in pregress at any one time.
1796 * a circular singly-linked list
1797 * (log->wrqueue points to the tail, and buffers are linked via
1798 * bp->wrqueue field), and
1799 * maintains log page in pageout ot waiting for pageout in serial pageout.
1800 */
1801
1802/*
1803 *	lbmLogInit()
1804 *
1805 * initialize per log I/O setup at lmLogInit()
1806 */
1807static int lbmLogInit(struct jfs_log * log)
1808{				/* log inode */
1809	int i;
1810	struct lbuf *lbuf;
1811
1812	jfs_info("lbmLogInit: log:0x%p", log);
1813
1814	/* initialize current buffer cursor */
1815	log->bp = NULL;
1816
1817	/* initialize log device write queue */
1818	log->wqueue = NULL;
1819
1820	/*
1821	 * Each log has its own buffer pages allocated to it.  These are
1822	 * not managed by the page cache.  This ensures that a transaction
1823	 * writing to the log does not block trying to allocate a page from
1824	 * the page cache (for the log).  This would be bad, since page
1825	 * allocation waits on the kswapd thread that may be committing inodes
1826	 * which would cause log activity.  Was that clear?  I'm trying to
1827	 * avoid deadlock here.
1828	 */
1829	init_waitqueue_head(&log->free_wait);
1830
1831	log->lbuf_free = NULL;
1832
1833	for (i = 0; i < LOGPAGES;) {
1834		char *buffer;
1835		uint offset;
1836		struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
1837
1838		if (!page)
1839			goto error;
1840		buffer = page_address(page);
1841		for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) {
1842			lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL);
1843			if (lbuf == NULL) {
1844				if (offset == 0)
1845					__free_page(page);
1846				goto error;
1847			}
1848			if (offset) /* we already have one reference */
1849				get_page(page);
1850			lbuf->l_offset = offset;
1851			lbuf->l_ldata = buffer + offset;
1852			lbuf->l_page = page;
1853			lbuf->l_log = log;
1854			init_waitqueue_head(&lbuf->l_ioevent);
1855
1856			lbuf->l_freelist = log->lbuf_free;
1857			log->lbuf_free = lbuf;
1858			i++;
1859		}
1860	}
1861
1862	return (0);
1863
1864      error:
1865	lbmLogShutdown(log);
1866	return -ENOMEM;
1867}
1868
1869
1870/*
1871 *	lbmLogShutdown()
1872 *
1873 * finalize per log I/O setup at lmLogShutdown()
1874 */
1875static void lbmLogShutdown(struct jfs_log * log)
1876{
1877	struct lbuf *lbuf;
1878
1879	jfs_info("lbmLogShutdown: log:0x%p", log);
1880
1881	lbuf = log->lbuf_free;
1882	while (lbuf) {
1883		struct lbuf *next = lbuf->l_freelist;
1884		__free_page(lbuf->l_page);
1885		kfree(lbuf);
1886		lbuf = next;
1887	}
1888}
1889
1890
1891/*
1892 *	lbmAllocate()
1893 *
1894 * allocate an empty log buffer
1895 */
1896static struct lbuf *lbmAllocate(struct jfs_log * log, int pn)
1897{
1898	struct lbuf *bp;
1899	unsigned long flags;
1900
1901	/*
1902	 * recycle from log buffer freelist if any
1903	 */
1904	LCACHE_LOCK(flags);
1905	LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags);
1906	log->lbuf_free = bp->l_freelist;
1907	LCACHE_UNLOCK(flags);
1908
1909	bp->l_flag = 0;
1910
1911	bp->l_wqnext = NULL;
1912	bp->l_freelist = NULL;
1913
1914	bp->l_pn = pn;
1915	bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize));
1916	bp->l_ceor = 0;
1917
1918	return bp;
1919}
1920
1921
1922/*
1923 *	lbmFree()
1924 *
1925 * release a log buffer to freelist
1926 */
1927static void lbmFree(struct lbuf * bp)
1928{
1929	unsigned long flags;
1930
1931	LCACHE_LOCK(flags);
1932
1933	lbmfree(bp);
1934
1935	LCACHE_UNLOCK(flags);
1936}
1937
1938static void lbmfree(struct lbuf * bp)
1939{
1940	struct jfs_log *log = bp->l_log;
1941
1942	assert(bp->l_wqnext == NULL);
1943
1944	/*
1945	 * return the buffer to head of freelist
1946	 */
1947	bp->l_freelist = log->lbuf_free;
1948	log->lbuf_free = bp;
1949
1950	wake_up(&log->free_wait);
1951	return;
1952}
1953
1954
1955/*
1956 * NAME:	lbmRedrive
1957 *
1958 * FUNCTION:	add a log buffer to the log redrive list
1959 *
1960 * PARAMETER:
1961 *	bp	- log buffer
1962 *
1963 * NOTES:
1964 *	Takes log_redrive_lock.
1965 */
1966static inline void lbmRedrive(struct lbuf *bp)
1967{
1968	unsigned long flags;
1969
1970	spin_lock_irqsave(&log_redrive_lock, flags);
1971	bp->l_redrive_next = log_redrive_list;
1972	log_redrive_list = bp;
1973	spin_unlock_irqrestore(&log_redrive_lock, flags);
1974
1975	wake_up_process(jfsIOthread);
1976}
1977
1978
1979/*
1980 *	lbmRead()
1981 */
1982static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
1983{
1984	struct bio *bio;
1985	struct lbuf *bp;
1986
1987	/*
1988	 * allocate a log buffer
1989	 */
1990	*bpp = bp = lbmAllocate(log, pn);
1991	jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn);
1992
1993	bp->l_flag |= lbmREAD;
1994
1995	bio = bio_alloc(GFP_NOFS, 1);
1996
1997	bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
1998	bio_set_dev(bio, log->bdev);
1999
2000	bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
2001	BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
2002
2003	bio->bi_end_io = lbmIODone;
2004	bio->bi_private = bp;
2005	bio->bi_opf = REQ_OP_READ;
2006	/*check if journaling to disk has been disabled*/
2007	if (log->no_integrity) {
2008		bio->bi_iter.bi_size = 0;
2009		lbmIODone(bio);
2010	} else {
2011		submit_bio(bio);
2012	}
2013
2014	wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
2015
2016	return 0;
2017}
2018
2019
2020/*
2021 *	lbmWrite()
2022 *
2023 * buffer at head of pageout queue stays after completion of
2024 * partial-page pageout and redriven by explicit initiation of
2025 * pageout by caller until full-page pageout is completed and
2026 * released.
2027 *
2028 * device driver i/o done redrives pageout of new buffer at
2029 * head of pageout queue when current buffer at head of pageout
2030 * queue is released at the completion of its full-page pageout.
2031 *
2032 * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit().
2033 * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone()
2034 */
2035static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
2036		     int cant_block)
2037{
2038	struct lbuf *tail;
2039	unsigned long flags;
2040
2041	jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn);
2042
2043	/* map the logical block address to physical block address */
2044	bp->l_blkno =
2045	    log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2046
2047	LCACHE_LOCK(flags);		/* disable+lock */
2048
2049	/*
2050	 * initialize buffer for device driver
2051	 */
2052	bp->l_flag = flag;
2053
2054	/*
2055	 *	insert bp at tail of write queue associated with log
2056	 *
2057	 * (request is either for bp already/currently at head of queue
2058	 * or new bp to be inserted at tail)
2059	 */
2060	tail = log->wqueue;
2061
2062	/* is buffer not already on write queue ? */
2063	if (bp->l_wqnext == NULL) {
2064		/* insert at tail of wqueue */
2065		if (tail == NULL) {
2066			log->wqueue = bp;
2067			bp->l_wqnext = bp;
2068		} else {
2069			log->wqueue = bp;
2070			bp->l_wqnext = tail->l_wqnext;
2071			tail->l_wqnext = bp;
2072		}
2073
2074		tail = bp;
2075	}
2076
2077	/* is buffer at head of wqueue and for write ? */
2078	if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) {
2079		LCACHE_UNLOCK(flags);	/* unlock+enable */
2080		return;
2081	}
2082
2083	LCACHE_UNLOCK(flags);	/* unlock+enable */
2084
2085	if (cant_block)
2086		lbmRedrive(bp);
2087	else if (flag & lbmSYNC)
2088		lbmStartIO(bp);
2089	else {
2090		LOGGC_UNLOCK(log);
2091		lbmStartIO(bp);
2092		LOGGC_LOCK(log);
2093	}
2094}
2095
2096
2097/*
2098 *	lbmDirectWrite()
2099 *
2100 * initiate pageout bypassing write queue for sidestream
2101 * (e.g., log superblock) write;
2102 */
2103static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
2104{
2105	jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x",
2106		 bp, flag, bp->l_pn);
2107
2108	/*
2109	 * initialize buffer for device driver
2110	 */
2111	bp->l_flag = flag | lbmDIRECT;
2112
2113	/* map the logical block address to physical block address */
2114	bp->l_blkno =
2115	    log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2116
2117	/*
2118	 *	initiate pageout of the page
2119	 */
2120	lbmStartIO(bp);
2121}
2122
2123
2124/*
2125 * NAME:	lbmStartIO()
2126 *
2127 * FUNCTION:	Interface to DD strategy routine
2128 *
2129 * RETURN:	none
2130 *
2131 * serialization: LCACHE_LOCK() is NOT held during log i/o;
2132 */
2133static void lbmStartIO(struct lbuf * bp)
2134{
2135	struct bio *bio;
2136	struct jfs_log *log = bp->l_log;
 
2137
2138	jfs_info("lbmStartIO");
2139
2140	bio = bio_alloc(GFP_NOFS, 1);
2141	bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
2142	bio_set_dev(bio, log->bdev);
2143
2144	bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
 
 
 
2145	BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
2146
2147	bio->bi_end_io = lbmIODone;
2148	bio->bi_private = bp;
2149	bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
2150
2151	/* check if journaling to disk has been disabled */
2152	if (log->no_integrity) {
2153		bio->bi_iter.bi_size = 0;
2154		lbmIODone(bio);
2155	} else {
2156		submit_bio(bio);
2157		INCREMENT(lmStat.submitted);
2158	}
2159}
2160
2161
2162/*
2163 *	lbmIOWait()
2164 */
2165static int lbmIOWait(struct lbuf * bp, int flag)
2166{
2167	unsigned long flags;
2168	int rc = 0;
2169
2170	jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2171
2172	LCACHE_LOCK(flags);		/* disable+lock */
2173
2174	LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags);
2175
2176	rc = (bp->l_flag & lbmERROR) ? -EIO : 0;
2177
2178	if (flag & lbmFREE)
2179		lbmfree(bp);
2180
2181	LCACHE_UNLOCK(flags);	/* unlock+enable */
2182
2183	jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2184	return rc;
2185}
2186
2187/*
2188 *	lbmIODone()
2189 *
2190 * executed at INTIODONE level
2191 */
2192static void lbmIODone(struct bio *bio)
2193{
2194	struct lbuf *bp = bio->bi_private;
2195	struct lbuf *nextbp, *tail;
2196	struct jfs_log *log;
2197	unsigned long flags;
2198
2199	/*
2200	 * get back jfs buffer bound to the i/o buffer
2201	 */
2202	jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag);
2203
2204	LCACHE_LOCK(flags);		/* disable+lock */
2205
2206	bp->l_flag |= lbmDONE;
2207
2208	if (bio->bi_status) {
2209		bp->l_flag |= lbmERROR;
2210
2211		jfs_err("lbmIODone: I/O error in JFS log");
2212	}
2213
2214	bio_put(bio);
2215
2216	/*
2217	 *	pagein completion
2218	 */
2219	if (bp->l_flag & lbmREAD) {
2220		bp->l_flag &= ~lbmREAD;
2221
2222		LCACHE_UNLOCK(flags);	/* unlock+enable */
2223
2224		/* wakeup I/O initiator */
2225		LCACHE_WAKEUP(&bp->l_ioevent);
2226
2227		return;
2228	}
2229
2230	/*
2231	 *	pageout completion
2232	 *
2233	 * the bp at the head of write queue has completed pageout.
2234	 *
2235	 * if single-commit/full-page pageout, remove the current buffer
2236	 * from head of pageout queue, and redrive pageout with
2237	 * the new buffer at head of pageout queue;
2238	 * otherwise, the partial-page pageout buffer stays at
2239	 * the head of pageout queue to be redriven for pageout
2240	 * by lmGroupCommit() until full-page pageout is completed.
2241	 */
2242	bp->l_flag &= ~lbmWRITE;
2243	INCREMENT(lmStat.pagedone);
2244
2245	/* update committed lsn */
2246	log = bp->l_log;
2247	log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor;
2248
2249	if (bp->l_flag & lbmDIRECT) {
2250		LCACHE_WAKEUP(&bp->l_ioevent);
2251		LCACHE_UNLOCK(flags);
2252		return;
2253	}
2254
2255	tail = log->wqueue;
2256
2257	/* single element queue */
2258	if (bp == tail) {
2259		/* remove head buffer of full-page pageout
2260		 * from log device write queue
2261		 */
2262		if (bp->l_flag & lbmRELEASE) {
2263			log->wqueue = NULL;
2264			bp->l_wqnext = NULL;
2265		}
2266	}
2267	/* multi element queue */
2268	else {
2269		/* remove head buffer of full-page pageout
2270		 * from log device write queue
2271		 */
2272		if (bp->l_flag & lbmRELEASE) {
2273			nextbp = tail->l_wqnext = bp->l_wqnext;
2274			bp->l_wqnext = NULL;
2275
2276			/*
2277			 * redrive pageout of next page at head of write queue:
2278			 * redrive next page without any bound tblk
2279			 * (i.e., page w/o any COMMIT records), or
2280			 * first page of new group commit which has been
2281			 * queued after current page (subsequent pageout
2282			 * is performed synchronously, except page without
2283			 * any COMMITs) by lmGroupCommit() as indicated
2284			 * by lbmWRITE flag;
2285			 */
2286			if (nextbp->l_flag & lbmWRITE) {
2287				/*
2288				 * We can't do the I/O at interrupt time.
2289				 * The jfsIO thread can do it
2290				 */
2291				lbmRedrive(nextbp);
2292			}
2293		}
2294	}
2295
2296	/*
2297	 *	synchronous pageout:
2298	 *
2299	 * buffer has not necessarily been removed from write queue
2300	 * (e.g., synchronous write of partial-page with COMMIT):
2301	 * leave buffer for i/o initiator to dispose
2302	 */
2303	if (bp->l_flag & lbmSYNC) {
2304		LCACHE_UNLOCK(flags);	/* unlock+enable */
2305
2306		/* wakeup I/O initiator */
2307		LCACHE_WAKEUP(&bp->l_ioevent);
2308	}
2309
2310	/*
2311	 *	Group Commit pageout:
2312	 */
2313	else if (bp->l_flag & lbmGC) {
2314		LCACHE_UNLOCK(flags);
2315		lmPostGC(bp);
2316	}
2317
2318	/*
2319	 *	asynchronous pageout:
2320	 *
2321	 * buffer must have been removed from write queue:
2322	 * insert buffer at head of freelist where it can be recycled
2323	 */
2324	else {
2325		assert(bp->l_flag & lbmRELEASE);
2326		assert(bp->l_flag & lbmFREE);
2327		lbmfree(bp);
2328
2329		LCACHE_UNLOCK(flags);	/* unlock+enable */
2330	}
2331}
2332
2333int jfsIOWait(void *arg)
2334{
2335	struct lbuf *bp;
2336
2337	do {
2338		spin_lock_irq(&log_redrive_lock);
2339		while ((bp = log_redrive_list)) {
2340			log_redrive_list = bp->l_redrive_next;
2341			bp->l_redrive_next = NULL;
2342			spin_unlock_irq(&log_redrive_lock);
2343			lbmStartIO(bp);
2344			spin_lock_irq(&log_redrive_lock);
2345		}
2346
2347		if (freezing(current)) {
2348			spin_unlock_irq(&log_redrive_lock);
2349			try_to_freeze();
2350		} else {
2351			set_current_state(TASK_INTERRUPTIBLE);
2352			spin_unlock_irq(&log_redrive_lock);
2353			schedule();
2354		}
2355	} while (!kthread_should_stop());
2356
2357	jfs_info("jfsIOWait being killed!");
2358	return 0;
2359}
2360
2361/*
2362 * NAME:	lmLogFormat()/jfs_logform()
2363 *
2364 * FUNCTION:	format file system log
2365 *
2366 * PARAMETERS:
2367 *	log	- volume log
2368 *	logAddress - start address of log space in FS block
2369 *	logSize	- length of log space in FS block;
2370 *
2371 * RETURN:	0	- success
2372 *		-EIO	- i/o error
2373 *
2374 * XXX: We're synchronously writing one page at a time.  This needs to
2375 *	be improved by writing multiple pages at once.
2376 */
2377int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
2378{
2379	int rc = -EIO;
2380	struct jfs_sb_info *sbi;
2381	struct logsuper *logsuper;
2382	struct logpage *lp;
2383	int lspn;		/* log sequence page number */
2384	struct lrd *lrd_ptr;
2385	int npages = 0;
2386	struct lbuf *bp;
2387
2388	jfs_info("lmLogFormat: logAddress:%Ld logSize:%d",
2389		 (long long)logAddress, logSize);
2390
2391	sbi = list_entry(log->sb_list.next, struct jfs_sb_info, log_list);
2392
2393	/* allocate a log buffer */
2394	bp = lbmAllocate(log, 1);
2395
2396	npages = logSize >> sbi->l2nbperpage;
2397
2398	/*
2399	 *	log space:
2400	 *
2401	 * page 0 - reserved;
2402	 * page 1 - log superblock;
2403	 * page 2 - log data page: A SYNC log record is written
2404	 *	    into this page at logform time;
2405	 * pages 3-N - log data page: set to empty log data pages;
2406	 */
2407	/*
2408	 *	init log superblock: log page 1
2409	 */
2410	logsuper = (struct logsuper *) bp->l_ldata;
2411
2412	logsuper->magic = cpu_to_le32(LOGMAGIC);
2413	logsuper->version = cpu_to_le32(LOGVERSION);
2414	logsuper->state = cpu_to_le32(LOGREDONE);
2415	logsuper->flag = cpu_to_le32(sbi->mntflag);	/* ? */
2416	logsuper->size = cpu_to_le32(npages);
2417	logsuper->bsize = cpu_to_le32(sbi->bsize);
2418	logsuper->l2bsize = cpu_to_le32(sbi->l2bsize);
2419	logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE);
2420
2421	bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2422	bp->l_blkno = logAddress + sbi->nbperpage;
2423	lbmStartIO(bp);
2424	if ((rc = lbmIOWait(bp, 0)))
2425		goto exit;
2426
2427	/*
2428	 *	init pages 2 to npages-1 as log data pages:
2429	 *
2430	 * log page sequence number (lpsn) initialization:
2431	 *
2432	 * pn:   0     1     2     3                 n-1
2433	 *       +-----+-----+=====+=====+===.....===+=====+
2434	 * lspn:             N-1   0     1           N-2
2435	 *                   <--- N page circular file ---->
2436	 *
2437	 * the N (= npages-2) data pages of the log is maintained as
2438	 * a circular file for the log records;
2439	 * lpsn grows by 1 monotonically as each log page is written
2440	 * to the circular file of the log;
2441	 * and setLogpage() will not reset the page number even if
2442	 * the eor is equal to LOGPHDRSIZE. In order for binary search
2443	 * still work in find log end process, we have to simulate the
2444	 * log wrap situation at the log format time.
2445	 * The 1st log page written will have the highest lpsn. Then
2446	 * the succeeding log pages will have ascending order of
2447	 * the lspn starting from 0, ... (N-2)
2448	 */
2449	lp = (struct logpage *) bp->l_ldata;
2450	/*
2451	 * initialize 1st log page to be written: lpsn = N - 1,
2452	 * write a SYNCPT log record is written to this page
2453	 */
2454	lp->h.page = lp->t.page = cpu_to_le32(npages - 3);
2455	lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE);
2456
2457	lrd_ptr = (struct lrd *) &lp->data;
2458	lrd_ptr->logtid = 0;
2459	lrd_ptr->backchain = 0;
2460	lrd_ptr->type = cpu_to_le16(LOG_SYNCPT);
2461	lrd_ptr->length = 0;
2462	lrd_ptr->log.syncpt.sync = 0;
2463
2464	bp->l_blkno += sbi->nbperpage;
2465	bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2466	lbmStartIO(bp);
2467	if ((rc = lbmIOWait(bp, 0)))
2468		goto exit;
2469
2470	/*
2471	 *	initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
2472	 */
2473	for (lspn = 0; lspn < npages - 3; lspn++) {
2474		lp->h.page = lp->t.page = cpu_to_le32(lspn);
2475		lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
2476
2477		bp->l_blkno += sbi->nbperpage;
2478		bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2479		lbmStartIO(bp);
2480		if ((rc = lbmIOWait(bp, 0)))
2481			goto exit;
2482	}
2483
2484	rc = 0;
2485exit:
2486	/*
2487	 *	finalize log
2488	 */
2489	/* release the buffer */
2490	lbmFree(bp);
2491
2492	return rc;
2493}
2494
2495#ifdef CONFIG_JFS_STATISTICS
2496static int jfs_lmstats_proc_show(struct seq_file *m, void *v)
2497{
2498	seq_printf(m,
2499		       "JFS Logmgr stats\n"
2500		       "================\n"
2501		       "commits = %d\n"
2502		       "writes submitted = %d\n"
2503		       "writes completed = %d\n"
2504		       "full pages submitted = %d\n"
2505		       "partial pages submitted = %d\n",
2506		       lmStat.commit,
2507		       lmStat.submitted,
2508		       lmStat.pagedone,
2509		       lmStat.full_page,
2510		       lmStat.partial_page);
2511	return 0;
2512}
2513
2514static int jfs_lmstats_proc_open(struct inode *inode, struct file *file)
2515{
2516	return single_open(file, jfs_lmstats_proc_show, NULL);
2517}
2518
2519const struct file_operations jfs_lmstats_proc_fops = {
2520	.open		= jfs_lmstats_proc_open,
2521	.read		= seq_read,
2522	.llseek		= seq_lseek,
2523	.release	= single_release,
2524};
2525#endif /* CONFIG_JFS_STATISTICS */
v6.13.7
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *   Copyright (C) International Business Machines Corp., 2000-2004
   4 *   Portions Copyright (C) Christoph Hellwig, 2001-2002
 
 
 
 
 
 
 
 
 
 
 
 
 
 
   5 */
   6
   7/*
   8 *	jfs_logmgr.c: log manager
   9 *
  10 * for related information, see transaction manager (jfs_txnmgr.c), and
  11 * recovery manager (jfs_logredo.c).
  12 *
  13 * note: for detail, RTFS.
  14 *
  15 *	log buffer manager:
  16 * special purpose buffer manager supporting log i/o requirements.
  17 * per log serial pageout of logpage
  18 * queuing i/o requests and redrive i/o at iodone
  19 * maintain current logpage buffer
  20 * no caching since append only
  21 * appropriate jfs buffer cache buffers as needed
  22 *
  23 *	group commit:
  24 * transactions which wrote COMMIT records in the same in-memory
  25 * log page during the pageout of previous/current log page(s) are
  26 * committed together by the pageout of the page.
  27 *
  28 *	TBD lazy commit:
  29 * transactions are committed asynchronously when the log page
  30 * containing it COMMIT is paged out when it becomes full;
  31 *
  32 *	serialization:
  33 * . a per log lock serialize log write.
  34 * . a per log lock serialize group commit.
  35 * . a per log lock serialize log open/close;
  36 *
  37 *	TBD log integrity:
  38 * careful-write (ping-pong) of last logpage to recover from crash
  39 * in overwrite.
  40 * detection of split (out-of-order) write of physical sectors
  41 * of last logpage via timestamp at end of each sector
  42 * with its mirror data array at trailer).
  43 *
  44 *	alternatives:
  45 * lsn - 64-bit monotonically increasing integer vs
  46 * 32-bit lspn and page eor.
  47 */
  48
  49#include <linux/fs.h>
  50#include <linux/blkdev.h>
  51#include <linux/interrupt.h>
  52#include <linux/completion.h>
  53#include <linux/kthread.h>
  54#include <linux/buffer_head.h>		/* for sync_blockdev() */
  55#include <linux/bio.h>
  56#include <linux/freezer.h>
  57#include <linux/export.h>
  58#include <linux/delay.h>
  59#include <linux/mutex.h>
  60#include <linux/seq_file.h>
  61#include <linux/slab.h>
  62#include "jfs_incore.h"
  63#include "jfs_filsys.h"
  64#include "jfs_metapage.h"
  65#include "jfs_superblock.h"
  66#include "jfs_txnmgr.h"
  67#include "jfs_debug.h"
  68
  69
  70/*
  71 * lbuf's ready to be redriven.  Protected by log_redrive_lock (jfsIO thread)
  72 */
  73static struct lbuf *log_redrive_list;
  74static DEFINE_SPINLOCK(log_redrive_lock);
  75
  76
  77/*
  78 *	log read/write serialization (per log)
  79 */
  80#define LOG_LOCK_INIT(log)	mutex_init(&(log)->loglock)
  81#define LOG_LOCK(log)		mutex_lock(&((log)->loglock))
  82#define LOG_UNLOCK(log)		mutex_unlock(&((log)->loglock))
  83
  84
  85/*
  86 *	log group commit serialization (per log)
  87 */
  88
  89#define LOGGC_LOCK_INIT(log)	spin_lock_init(&(log)->gclock)
  90#define LOGGC_LOCK(log)		spin_lock_irq(&(log)->gclock)
  91#define LOGGC_UNLOCK(log)	spin_unlock_irq(&(log)->gclock)
  92#define LOGGC_WAKEUP(tblk)	wake_up_all(&(tblk)->gcwait)
  93
  94/*
  95 *	log sync serialization (per log)
  96 */
  97#define	LOGSYNC_DELTA(logsize)		min((logsize)/8, 128*LOGPSIZE)
  98#define	LOGSYNC_BARRIER(logsize)	((logsize)/4)
  99/*
 100#define	LOGSYNC_DELTA(logsize)		min((logsize)/4, 256*LOGPSIZE)
 101#define	LOGSYNC_BARRIER(logsize)	((logsize)/2)
 102*/
 103
 104
 105/*
 106 *	log buffer cache synchronization
 107 */
 108static DEFINE_SPINLOCK(jfsLCacheLock);
 109
 110#define	LCACHE_LOCK(flags)	spin_lock_irqsave(&jfsLCacheLock, flags)
 111#define	LCACHE_UNLOCK(flags)	spin_unlock_irqrestore(&jfsLCacheLock, flags)
 112
 113/*
 114 * See __SLEEP_COND in jfs_locks.h
 115 */
 116#define LCACHE_SLEEP_COND(wq, cond, flags)	\
 117do {						\
 118	if (cond)				\
 119		break;				\
 120	__SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \
 121} while (0)
 122
 123#define	LCACHE_WAKEUP(event)	wake_up(event)
 124
 125
 126/*
 127 *	lbuf buffer cache (lCache) control
 128 */
 129/* log buffer manager pageout control (cumulative, inclusive) */
 130#define	lbmREAD		0x0001
 131#define	lbmWRITE	0x0002	/* enqueue at tail of write queue;
 132				 * init pageout if at head of queue;
 133				 */
 134#define	lbmRELEASE	0x0004	/* remove from write queue
 135				 * at completion of pageout;
 136				 * do not free/recycle it yet:
 137				 * caller will free it;
 138				 */
 139#define	lbmSYNC		0x0008	/* do not return to freelist
 140				 * when removed from write queue;
 141				 */
 142#define lbmFREE		0x0010	/* return to freelist
 143				 * at completion of pageout;
 144				 * the buffer may be recycled;
 145				 */
 146#define	lbmDONE		0x0020
 147#define	lbmERROR	0x0040
 148#define lbmGC		0x0080	/* lbmIODone to perform post-GC processing
 149				 * of log page
 150				 */
 151#define lbmDIRECT	0x0100
 152
 153/*
 154 * Global list of active external journals
 155 */
 156static LIST_HEAD(jfs_external_logs);
 157static struct jfs_log *dummy_log;
 158static DEFINE_MUTEX(jfs_log_mutex);
 159
 160/*
 161 * forward references
 162 */
 163static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk,
 164			 struct lrd * lrd, struct tlock * tlck);
 165
 166static int lmNextPage(struct jfs_log * log);
 167static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
 168			   int activate);
 169
 170static int open_inline_log(struct super_block *sb);
 171static int open_dummy_log(struct super_block *sb);
 172static int lbmLogInit(struct jfs_log * log);
 173static void lbmLogShutdown(struct jfs_log * log);
 174static struct lbuf *lbmAllocate(struct jfs_log * log, int);
 175static void lbmFree(struct lbuf * bp);
 176static void lbmfree(struct lbuf * bp);
 177static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp);
 178static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block);
 179static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag);
 180static int lbmIOWait(struct lbuf * bp, int flag);
 181static bio_end_io_t lbmIODone;
 182static void lbmStartIO(struct lbuf * bp);
 183static void lmGCwrite(struct jfs_log * log, int cant_block);
 184static int lmLogSync(struct jfs_log * log, int hard_sync);
 185
 186
 187
 188/*
 189 *	statistics
 190 */
 191#ifdef CONFIG_JFS_STATISTICS
 192static struct lmStat {
 193	uint commit;		/* # of commit */
 194	uint pagedone;		/* # of page written */
 195	uint submitted;		/* # of pages submitted */
 196	uint full_page;		/* # of full pages submitted */
 197	uint partial_page;	/* # of partial pages submitted */
 198} lmStat;
 199#endif
 200
 201static void write_special_inodes(struct jfs_log *log,
 202				 int (*writer)(struct address_space *))
 203{
 204	struct jfs_sb_info *sbi;
 205
 206	list_for_each_entry(sbi, &log->sb_list, log_list) {
 207		writer(sbi->ipbmap->i_mapping);
 208		writer(sbi->ipimap->i_mapping);
 209		writer(sbi->direct_inode->i_mapping);
 210	}
 211}
 212
 213/*
 214 * NAME:	lmLog()
 215 *
 216 * FUNCTION:	write a log record;
 217 *
 218 * PARAMETER:
 219 *
 220 * RETURN:	lsn - offset to the next log record to write (end-of-log);
 221 *		-1  - error;
 222 *
 223 * note: todo: log error handler
 224 */
 225int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 226	  struct tlock * tlck)
 227{
 228	int lsn;
 229	int diffp, difft;
 230	struct metapage *mp = NULL;
 231	unsigned long flags;
 232
 233	jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p",
 234		 log, tblk, lrd, tlck);
 235
 236	LOG_LOCK(log);
 237
 238	/* log by (out-of-transaction) JFS ? */
 239	if (tblk == NULL)
 240		goto writeRecord;
 241
 242	/* log from page ? */
 243	if (tlck == NULL ||
 244	    tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL)
 245		goto writeRecord;
 246
 247	/*
 248	 *	initialize/update page/transaction recovery lsn
 249	 */
 250	lsn = log->lsn;
 251
 252	LOGSYNC_LOCK(log, flags);
 253
 254	/*
 255	 * initialize page lsn if first log write of the page
 256	 */
 257	if (mp->lsn == 0) {
 258		mp->log = log;
 259		mp->lsn = lsn;
 260		log->count++;
 261
 262		/* insert page at tail of logsynclist */
 263		list_add_tail(&mp->synclist, &log->synclist);
 264	}
 265
 266	/*
 267	 *	initialize/update lsn of tblock of the page
 268	 *
 269	 * transaction inherits oldest lsn of pages associated
 270	 * with allocation/deallocation of resources (their
 271	 * log records are used to reconstruct allocation map
 272	 * at recovery time: inode for inode allocation map,
 273	 * B+-tree index of extent descriptors for block
 274	 * allocation map);
 275	 * allocation map pages inherit transaction lsn at
 276	 * commit time to allow forwarding log syncpt past log
 277	 * records associated with allocation/deallocation of
 278	 * resources only after persistent map of these map pages
 279	 * have been updated and propagated to home.
 280	 */
 281	/*
 282	 * initialize transaction lsn:
 283	 */
 284	if (tblk->lsn == 0) {
 285		/* inherit lsn of its first page logged */
 286		tblk->lsn = mp->lsn;
 287		log->count++;
 288
 289		/* insert tblock after the page on logsynclist */
 290		list_add(&tblk->synclist, &mp->synclist);
 291	}
 292	/*
 293	 * update transaction lsn:
 294	 */
 295	else {
 296		/* inherit oldest/smallest lsn of page */
 297		logdiff(diffp, mp->lsn, log);
 298		logdiff(difft, tblk->lsn, log);
 299		if (diffp < difft) {
 300			/* update tblock lsn with page lsn */
 301			tblk->lsn = mp->lsn;
 302
 303			/* move tblock after page on logsynclist */
 304			list_move(&tblk->synclist, &mp->synclist);
 305		}
 306	}
 307
 308	LOGSYNC_UNLOCK(log, flags);
 309
 310	/*
 311	 *	write the log record
 312	 */
 313      writeRecord:
 314	lsn = lmWriteRecord(log, tblk, lrd, tlck);
 315
 316	/*
 317	 * forward log syncpt if log reached next syncpt trigger
 318	 */
 319	logdiff(diffp, lsn, log);
 320	if (diffp >= log->nextsync)
 321		lsn = lmLogSync(log, 0);
 322
 323	/* update end-of-log lsn */
 324	log->lsn = lsn;
 325
 326	LOG_UNLOCK(log);
 327
 328	/* return end-of-log address */
 329	return lsn;
 330}
 331
 332/*
 333 * NAME:	lmWriteRecord()
 334 *
 335 * FUNCTION:	move the log record to current log page
 336 *
 337 * PARAMETER:	cd	- commit descriptor
 338 *
 339 * RETURN:	end-of-log address
 340 *
 341 * serialization: LOG_LOCK() held on entry/exit
 342 */
 343static int
 344lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 345	      struct tlock * tlck)
 346{
 347	int lsn = 0;		/* end-of-log address */
 348	struct lbuf *bp;	/* dst log page buffer */
 349	struct logpage *lp;	/* dst log page */
 350	caddr_t dst;		/* destination address in log page */
 351	int dstoffset;		/* end-of-log offset in log page */
 352	int freespace;		/* free space in log page */
 353	caddr_t p;		/* src meta-data page */
 354	caddr_t src;
 355	int srclen;
 356	int nbytes;		/* number of bytes to move */
 357	int i;
 358	int len;
 359	struct linelock *linelock;
 360	struct lv *lv;
 361	struct lvd *lvd;
 362	int l2linesize;
 363
 364	len = 0;
 365
 366	/* retrieve destination log page to write */
 367	bp = (struct lbuf *) log->bp;
 368	lp = (struct logpage *) bp->l_ldata;
 369	dstoffset = log->eor;
 370
 371	/* any log data to write ? */
 372	if (tlck == NULL)
 373		goto moveLrd;
 374
 375	/*
 376	 *	move log record data
 377	 */
 378	/* retrieve source meta-data page to log */
 379	if (tlck->flag & tlckPAGELOCK) {
 380		p = (caddr_t) (tlck->mp->data);
 381		linelock = (struct linelock *) & tlck->lock;
 382	}
 383	/* retrieve source in-memory inode to log */
 384	else if (tlck->flag & tlckINODELOCK) {
 385		if (tlck->type & tlckDTREE)
 386			p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot;
 387		else
 388			p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot;
 389		linelock = (struct linelock *) & tlck->lock;
 390	}
 
 
 
 
 
 
 
 
 391	else {
 392		jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck);
 393		return 0;	/* Probably should trap */
 394	}
 395	l2linesize = linelock->l2linesize;
 396
 397      moveData:
 398	ASSERT(linelock->index <= linelock->maxcnt);
 399
 400	lv = linelock->lv;
 401	for (i = 0; i < linelock->index; i++, lv++) {
 402		if (lv->length == 0)
 403			continue;
 404
 405		/* is page full ? */
 406		if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) {
 407			/* page become full: move on to next page */
 408			lmNextPage(log);
 409
 410			bp = log->bp;
 411			lp = (struct logpage *) bp->l_ldata;
 412			dstoffset = LOGPHDRSIZE;
 413		}
 414
 415		/*
 416		 * move log vector data
 417		 */
 418		src = (u8 *) p + (lv->offset << l2linesize);
 419		srclen = lv->length << l2linesize;
 420		len += srclen;
 421		while (srclen > 0) {
 422			freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
 423			nbytes = min(freespace, srclen);
 424			dst = (caddr_t) lp + dstoffset;
 425			memcpy(dst, src, nbytes);
 426			dstoffset += nbytes;
 427
 428			/* is page not full ? */
 429			if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
 430				break;
 431
 432			/* page become full: move on to next page */
 433			lmNextPage(log);
 434
 435			bp = (struct lbuf *) log->bp;
 436			lp = (struct logpage *) bp->l_ldata;
 437			dstoffset = LOGPHDRSIZE;
 438
 439			srclen -= nbytes;
 440			src += nbytes;
 441		}
 442
 443		/*
 444		 * move log vector descriptor
 445		 */
 446		len += 4;
 447		lvd = (struct lvd *) ((caddr_t) lp + dstoffset);
 448		lvd->offset = cpu_to_le16(lv->offset);
 449		lvd->length = cpu_to_le16(lv->length);
 450		dstoffset += 4;
 451		jfs_info("lmWriteRecord: lv offset:%d length:%d",
 452			 lv->offset, lv->length);
 453	}
 454
 455	if ((i = linelock->next)) {
 456		linelock = (struct linelock *) lid_to_tlock(i);
 457		goto moveData;
 458	}
 459
 460	/*
 461	 *	move log record descriptor
 462	 */
 463      moveLrd:
 464	lrd->length = cpu_to_le16(len);
 465
 466	src = (caddr_t) lrd;
 467	srclen = LOGRDSIZE;
 468
 469	while (srclen > 0) {
 470		freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
 471		nbytes = min(freespace, srclen);
 472		dst = (caddr_t) lp + dstoffset;
 473		memcpy(dst, src, nbytes);
 474
 475		dstoffset += nbytes;
 476		srclen -= nbytes;
 477
 478		/* are there more to move than freespace of page ? */
 479		if (srclen)
 480			goto pageFull;
 481
 482		/*
 483		 * end of log record descriptor
 484		 */
 485
 486		/* update last log record eor */
 487		log->eor = dstoffset;
 488		bp->l_eor = dstoffset;
 489		lsn = (log->page << L2LOGPSIZE) + dstoffset;
 490
 491		if (lrd->type & cpu_to_le16(LOG_COMMIT)) {
 492			tblk->clsn = lsn;
 493			jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn,
 494				 bp->l_eor);
 495
 496			INCREMENT(lmStat.commit);	/* # of commit */
 497
 498			/*
 499			 * enqueue tblock for group commit:
 500			 *
 501			 * enqueue tblock of non-trivial/synchronous COMMIT
 502			 * at tail of group commit queue
 503			 * (trivial/asynchronous COMMITs are ignored by
 504			 * group commit.)
 505			 */
 506			LOGGC_LOCK(log);
 507
 508			/* init tblock gc state */
 509			tblk->flag = tblkGC_QUEUE;
 510			tblk->bp = log->bp;
 511			tblk->pn = log->page;
 512			tblk->eor = log->eor;
 513
 514			/* enqueue transaction to commit queue */
 515			list_add_tail(&tblk->cqueue, &log->cqueue);
 516
 517			LOGGC_UNLOCK(log);
 518		}
 519
 520		jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x",
 521			le16_to_cpu(lrd->type), log->bp, log->page, dstoffset);
 522
 523		/* page not full ? */
 524		if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
 525			return lsn;
 526
 527	      pageFull:
 528		/* page become full: move on to next page */
 529		lmNextPage(log);
 530
 531		bp = (struct lbuf *) log->bp;
 532		lp = (struct logpage *) bp->l_ldata;
 533		dstoffset = LOGPHDRSIZE;
 534		src += nbytes;
 535	}
 536
 537	return lsn;
 538}
 539
 540
 541/*
 542 * NAME:	lmNextPage()
 543 *
 544 * FUNCTION:	write current page and allocate next page.
 545 *
 546 * PARAMETER:	log
 547 *
 548 * RETURN:	0
 549 *
 550 * serialization: LOG_LOCK() held on entry/exit
 551 */
 552static int lmNextPage(struct jfs_log * log)
 553{
 554	struct logpage *lp;
 555	int lspn;		/* log sequence page number */
 556	int pn;			/* current page number */
 557	struct lbuf *bp;
 558	struct lbuf *nextbp;
 559	struct tblock *tblk;
 560
 561	/* get current log page number and log sequence page number */
 562	pn = log->page;
 563	bp = log->bp;
 564	lp = (struct logpage *) bp->l_ldata;
 565	lspn = le32_to_cpu(lp->h.page);
 566
 567	LOGGC_LOCK(log);
 568
 569	/*
 570	 *	write or queue the full page at the tail of write queue
 571	 */
 572	/* get the tail tblk on commit queue */
 573	if (list_empty(&log->cqueue))
 574		tblk = NULL;
 575	else
 576		tblk = list_entry(log->cqueue.prev, struct tblock, cqueue);
 577
 578	/* every tblk who has COMMIT record on the current page,
 579	 * and has not been committed, must be on commit queue
 580	 * since tblk is queued at commit queueu at the time
 581	 * of writing its COMMIT record on the page before
 582	 * page becomes full (even though the tblk thread
 583	 * who wrote COMMIT record may have been suspended
 584	 * currently);
 585	 */
 586
 587	/* is page bound with outstanding tail tblk ? */
 588	if (tblk && tblk->pn == pn) {
 589		/* mark tblk for end-of-page */
 590		tblk->flag |= tblkGC_EOP;
 591
 592		if (log->cflag & logGC_PAGEOUT) {
 593			/* if page is not already on write queue,
 594			 * just enqueue (no lbmWRITE to prevent redrive)
 595			 * buffer to wqueue to ensure correct serial order
 596			 * of the pages since log pages will be added
 597			 * continuously
 598			 */
 599			if (bp->l_wqnext == NULL)
 600				lbmWrite(log, bp, 0, 0);
 601		} else {
 602			/*
 603			 * No current GC leader, initiate group commit
 604			 */
 605			log->cflag |= logGC_PAGEOUT;
 606			lmGCwrite(log, 0);
 607		}
 608	}
 609	/* page is not bound with outstanding tblk:
 610	 * init write or mark it to be redriven (lbmWRITE)
 611	 */
 612	else {
 613		/* finalize the page */
 614		bp->l_ceor = bp->l_eor;
 615		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 616		lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0);
 617	}
 618	LOGGC_UNLOCK(log);
 619
 620	/*
 621	 *	allocate/initialize next page
 622	 */
 623	/* if log wraps, the first data page of log is 2
 624	 * (0 never used, 1 is superblock).
 625	 */
 626	log->page = (pn == log->size - 1) ? 2 : pn + 1;
 627	log->eor = LOGPHDRSIZE;	/* ? valid page empty/full at logRedo() */
 628
 629	/* allocate/initialize next log page buffer */
 630	nextbp = lbmAllocate(log, log->page);
 631	nextbp->l_eor = log->eor;
 632	log->bp = nextbp;
 633
 634	/* initialize next log page */
 635	lp = (struct logpage *) nextbp->l_ldata;
 636	lp->h.page = lp->t.page = cpu_to_le32(lspn + 1);
 637	lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
 638
 639	return 0;
 640}
 641
 642
 643/*
 644 * NAME:	lmGroupCommit()
 645 *
 646 * FUNCTION:	group commit
 647 *	initiate pageout of the pages with COMMIT in the order of
 648 *	page number - redrive pageout of the page at the head of
 649 *	pageout queue until full page has been written.
 650 *
 651 * RETURN:
 652 *
 653 * NOTE:
 654 *	LOGGC_LOCK serializes log group commit queue, and
 655 *	transaction blocks on the commit queue.
 656 *	N.B. LOG_LOCK is NOT held during lmGroupCommit().
 657 */
 658int lmGroupCommit(struct jfs_log * log, struct tblock * tblk)
 659{
 660	int rc = 0;
 661
 662	LOGGC_LOCK(log);
 663
 664	/* group committed already ? */
 665	if (tblk->flag & tblkGC_COMMITTED) {
 666		if (tblk->flag & tblkGC_ERROR)
 667			rc = -EIO;
 668
 669		LOGGC_UNLOCK(log);
 670		return rc;
 671	}
 672	jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc);
 673
 674	if (tblk->xflag & COMMIT_LAZY)
 675		tblk->flag |= tblkGC_LAZY;
 676
 677	if ((!(log->cflag & logGC_PAGEOUT)) && (!list_empty(&log->cqueue)) &&
 678	    (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag)
 679	     || jfs_tlocks_low)) {
 680		/*
 681		 * No pageout in progress
 682		 *
 683		 * start group commit as its group leader.
 684		 */
 685		log->cflag |= logGC_PAGEOUT;
 686
 687		lmGCwrite(log, 0);
 688	}
 689
 690	if (tblk->xflag & COMMIT_LAZY) {
 691		/*
 692		 * Lazy transactions can leave now
 693		 */
 694		LOGGC_UNLOCK(log);
 695		return 0;
 696	}
 697
 698	/* lmGCwrite gives up LOGGC_LOCK, check again */
 699
 700	if (tblk->flag & tblkGC_COMMITTED) {
 701		if (tblk->flag & tblkGC_ERROR)
 702			rc = -EIO;
 703
 704		LOGGC_UNLOCK(log);
 705		return rc;
 706	}
 707
 708	/* upcount transaction waiting for completion
 709	 */
 710	log->gcrtc++;
 711	tblk->flag |= tblkGC_READY;
 712
 713	__SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED),
 714		     LOGGC_LOCK(log), LOGGC_UNLOCK(log));
 715
 716	/* removed from commit queue */
 717	if (tblk->flag & tblkGC_ERROR)
 718		rc = -EIO;
 719
 720	LOGGC_UNLOCK(log);
 721	return rc;
 722}
 723
 724/*
 725 * NAME:	lmGCwrite()
 726 *
 727 * FUNCTION:	group commit write
 728 *	initiate write of log page, building a group of all transactions
 729 *	with commit records on that page.
 730 *
 731 * RETURN:	None
 732 *
 733 * NOTE:
 734 *	LOGGC_LOCK must be held by caller.
 735 *	N.B. LOG_LOCK is NOT held during lmGroupCommit().
 736 */
 737static void lmGCwrite(struct jfs_log * log, int cant_write)
 738{
 739	struct lbuf *bp;
 740	struct logpage *lp;
 741	int gcpn;		/* group commit page number */
 742	struct tblock *tblk;
 743	struct tblock *xtblk = NULL;
 744
 745	/*
 746	 * build the commit group of a log page
 747	 *
 748	 * scan commit queue and make a commit group of all
 749	 * transactions with COMMIT records on the same log page.
 750	 */
 751	/* get the head tblk on the commit queue */
 752	gcpn = list_entry(log->cqueue.next, struct tblock, cqueue)->pn;
 753
 754	list_for_each_entry(tblk, &log->cqueue, cqueue) {
 755		if (tblk->pn != gcpn)
 756			break;
 757
 758		xtblk = tblk;
 759
 760		/* state transition: (QUEUE, READY) -> COMMIT */
 761		tblk->flag |= tblkGC_COMMIT;
 762	}
 763	tblk = xtblk;		/* last tblk of the page */
 764
 765	/*
 766	 * pageout to commit transactions on the log page.
 767	 */
 768	bp = (struct lbuf *) tblk->bp;
 769	lp = (struct logpage *) bp->l_ldata;
 770	/* is page already full ? */
 771	if (tblk->flag & tblkGC_EOP) {
 772		/* mark page to free at end of group commit of the page */
 773		tblk->flag &= ~tblkGC_EOP;
 774		tblk->flag |= tblkGC_FREE;
 775		bp->l_ceor = bp->l_eor;
 776		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 777		lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC,
 778			 cant_write);
 779		INCREMENT(lmStat.full_page);
 780	}
 781	/* page is not yet full */
 782	else {
 783		bp->l_ceor = tblk->eor;	/* ? bp->l_ceor = bp->l_eor; */
 784		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 785		lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write);
 786		INCREMENT(lmStat.partial_page);
 787	}
 788}
 789
 790/*
 791 * NAME:	lmPostGC()
 792 *
 793 * FUNCTION:	group commit post-processing
 794 *	Processes transactions after their commit records have been written
 795 *	to disk, redriving log I/O if necessary.
 796 *
 797 * RETURN:	None
 798 *
 799 * NOTE:
 800 *	This routine is called a interrupt time by lbmIODone
 801 */
 802static void lmPostGC(struct lbuf * bp)
 803{
 804	unsigned long flags;
 805	struct jfs_log *log = bp->l_log;
 806	struct logpage *lp;
 807	struct tblock *tblk, *temp;
 808
 809	//LOGGC_LOCK(log);
 810	spin_lock_irqsave(&log->gclock, flags);
 811	/*
 812	 * current pageout of group commit completed.
 813	 *
 814	 * remove/wakeup transactions from commit queue who were
 815	 * group committed with the current log page
 816	 */
 817	list_for_each_entry_safe(tblk, temp, &log->cqueue, cqueue) {
 818		if (!(tblk->flag & tblkGC_COMMIT))
 819			break;
 820		/* if transaction was marked GC_COMMIT then
 821		 * it has been shipped in the current pageout
 822		 * and made it to disk - it is committed.
 823		 */
 824
 825		if (bp->l_flag & lbmERROR)
 826			tblk->flag |= tblkGC_ERROR;
 827
 828		/* remove it from the commit queue */
 829		list_del(&tblk->cqueue);
 830		tblk->flag &= ~tblkGC_QUEUE;
 831
 832		if (tblk == log->flush_tblk) {
 833			/* we can stop flushing the log now */
 834			clear_bit(log_FLUSH, &log->flag);
 835			log->flush_tblk = NULL;
 836		}
 837
 838		jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk,
 839			 tblk->flag);
 840
 841		if (!(tblk->xflag & COMMIT_FORCE))
 842			/*
 843			 * Hand tblk over to lazy commit thread
 844			 */
 845			txLazyUnlock(tblk);
 846		else {
 847			/* state transition: COMMIT -> COMMITTED */
 848			tblk->flag |= tblkGC_COMMITTED;
 849
 850			if (tblk->flag & tblkGC_READY)
 851				log->gcrtc--;
 852
 853			LOGGC_WAKEUP(tblk);
 854		}
 855
 856		/* was page full before pageout ?
 857		 * (and this is the last tblk bound with the page)
 858		 */
 859		if (tblk->flag & tblkGC_FREE)
 860			lbmFree(bp);
 861		/* did page become full after pageout ?
 862		 * (and this is the last tblk bound with the page)
 863		 */
 864		else if (tblk->flag & tblkGC_EOP) {
 865			/* finalize the page */
 866			lp = (struct logpage *) bp->l_ldata;
 867			bp->l_ceor = bp->l_eor;
 868			lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
 869			jfs_info("lmPostGC: calling lbmWrite");
 870			lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE,
 871				 1);
 872		}
 873
 874	}
 875
 876	/* are there any transactions who have entered lnGroupCommit()
 877	 * (whose COMMITs are after that of the last log page written.
 878	 * They are waiting for new group commit (above at (SLEEP 1))
 879	 * or lazy transactions are on a full (queued) log page,
 880	 * select the latest ready transaction as new group leader and
 881	 * wake her up to lead her group.
 882	 */
 883	if ((!list_empty(&log->cqueue)) &&
 884	    ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) ||
 885	     test_bit(log_FLUSH, &log->flag) || jfs_tlocks_low))
 886		/*
 887		 * Call lmGCwrite with new group leader
 888		 */
 889		lmGCwrite(log, 1);
 890
 891	/* no transaction are ready yet (transactions are only just
 892	 * queued (GC_QUEUE) and not entered for group commit yet).
 893	 * the first transaction entering group commit
 894	 * will elect herself as new group leader.
 895	 */
 896	else
 897		log->cflag &= ~logGC_PAGEOUT;
 898
 899	//LOGGC_UNLOCK(log);
 900	spin_unlock_irqrestore(&log->gclock, flags);
 901	return;
 902}
 903
 904/*
 905 * NAME:	lmLogSync()
 906 *
 907 * FUNCTION:	write log SYNCPT record for specified log
 908 *	if new sync address is available
 909 *	(normally the case if sync() is executed by back-ground
 910 *	process).
 911 *	calculate new value of i_nextsync which determines when
 912 *	this code is called again.
 913 *
 914 * PARAMETERS:	log	- log structure
 915 *		hard_sync - 1 to force all metadata to be written
 916 *
 917 * RETURN:	0
 918 *
 919 * serialization: LOG_LOCK() held on entry/exit
 920 */
 921static int lmLogSync(struct jfs_log * log, int hard_sync)
 922{
 923	int logsize;
 924	int written;		/* written since last syncpt */
 925	int free;		/* free space left available */
 926	int delta;		/* additional delta to write normally */
 927	int more;		/* additional write granted */
 928	struct lrd lrd;
 929	int lsn;
 930	struct logsyncblk *lp;
 931	unsigned long flags;
 932
 933	/* push dirty metapages out to disk */
 934	if (hard_sync)
 935		write_special_inodes(log, filemap_fdatawrite);
 936	else
 937		write_special_inodes(log, filemap_flush);
 938
 939	/*
 940	 *	forward syncpt
 941	 */
 942	/* if last sync is same as last syncpt,
 943	 * invoke sync point forward processing to update sync.
 944	 */
 945
 946	if (log->sync == log->syncpt) {
 947		LOGSYNC_LOCK(log, flags);
 948		if (list_empty(&log->synclist))
 949			log->sync = log->lsn;
 950		else {
 951			lp = list_entry(log->synclist.next,
 952					struct logsyncblk, synclist);
 953			log->sync = lp->lsn;
 954		}
 955		LOGSYNC_UNLOCK(log, flags);
 956
 957	}
 958
 959	/* if sync is different from last syncpt,
 960	 * write a SYNCPT record with syncpt = sync.
 961	 * reset syncpt = sync
 962	 */
 963	if (log->sync != log->syncpt) {
 964		lrd.logtid = 0;
 965		lrd.backchain = 0;
 966		lrd.type = cpu_to_le16(LOG_SYNCPT);
 967		lrd.length = 0;
 968		lrd.log.syncpt.sync = cpu_to_le32(log->sync);
 969		lsn = lmWriteRecord(log, NULL, &lrd, NULL);
 970
 971		log->syncpt = log->sync;
 972	} else
 973		lsn = log->lsn;
 974
 975	/*
 976	 *	setup next syncpt trigger (SWAG)
 977	 */
 978	logsize = log->logsize;
 979
 980	logdiff(written, lsn, log);
 981	free = logsize - written;
 982	delta = LOGSYNC_DELTA(logsize);
 983	more = min(free / 2, delta);
 984	if (more < 2 * LOGPSIZE) {
 985		jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
 986		/*
 987		 *	log wrapping
 988		 *
 989		 * option 1 - panic ? No.!
 990		 * option 2 - shutdown file systems
 991		 *	      associated with log ?
 992		 * option 3 - extend log ?
 993		 * option 4 - second chance
 994		 *
 995		 * mark log wrapped, and continue.
 996		 * when all active transactions are completed,
 997		 * mark log valid for recovery.
 998		 * if crashed during invalid state, log state
 999		 * implies invalid log, forcing fsck().
1000		 */
1001		/* mark log state log wrap in log superblock */
1002		/* log->state = LOGWRAP; */
1003
1004		/* reset sync point computation */
1005		log->syncpt = log->sync = lsn;
1006		log->nextsync = delta;
1007	} else
1008		/* next syncpt trigger = written + more */
1009		log->nextsync = written + more;
1010
1011	/* if number of bytes written from last sync point is more
1012	 * than 1/4 of the log size, stop new transactions from
1013	 * starting until all current transactions are completed
1014	 * by setting syncbarrier flag.
1015	 */
1016	if (!test_bit(log_SYNCBARRIER, &log->flag) &&
1017	    (written > LOGSYNC_BARRIER(logsize)) && log->active) {
1018		set_bit(log_SYNCBARRIER, &log->flag);
1019		jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn,
1020			 log->syncpt);
1021		/*
1022		 * We may have to initiate group commit
1023		 */
1024		jfs_flush_journal(log, 0);
1025	}
1026
1027	return lsn;
1028}
1029
1030/*
1031 * NAME:	jfs_syncpt
1032 *
1033 * FUNCTION:	write log SYNCPT record for specified log
1034 *
1035 * PARAMETERS:	log	  - log structure
1036 *		hard_sync - set to 1 to force metadata to be written
1037 */
1038void jfs_syncpt(struct jfs_log *log, int hard_sync)
1039{	LOG_LOCK(log);
1040	if (!test_bit(log_QUIESCE, &log->flag))
1041		lmLogSync(log, hard_sync);
1042	LOG_UNLOCK(log);
1043}
1044
1045/*
1046 * NAME:	lmLogOpen()
1047 *
1048 * FUNCTION:	open the log on first open;
1049 *	insert filesystem in the active list of the log.
1050 *
1051 * PARAMETER:	ipmnt	- file system mount inode
1052 *		iplog	- log inode (out)
1053 *
1054 * RETURN:
1055 *
1056 * serialization:
1057 */
1058int lmLogOpen(struct super_block *sb)
1059{
1060	int rc;
1061	struct file *bdev_file;
1062	struct jfs_log *log;
1063	struct jfs_sb_info *sbi = JFS_SBI(sb);
1064
1065	if (sbi->flag & JFS_NOINTEGRITY)
1066		return open_dummy_log(sb);
1067
1068	if (sbi->mntflag & JFS_INLINELOG)
1069		return open_inline_log(sb);
1070
1071	mutex_lock(&jfs_log_mutex);
1072	list_for_each_entry(log, &jfs_external_logs, journal_list) {
1073		if (file_bdev(log->bdev_file)->bd_dev == sbi->logdev) {
1074			if (!uuid_equal(&log->uuid, &sbi->loguuid)) {
 
1075				jfs_warn("wrong uuid on JFS journal");
1076				mutex_unlock(&jfs_log_mutex);
1077				return -EINVAL;
1078			}
1079			/*
1080			 * add file system to log active file system list
1081			 */
1082			if ((rc = lmLogFileSystem(log, sbi, 1))) {
1083				mutex_unlock(&jfs_log_mutex);
1084				return rc;
1085			}
1086			goto journal_found;
1087		}
1088	}
1089
1090	if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL))) {
1091		mutex_unlock(&jfs_log_mutex);
1092		return -ENOMEM;
1093	}
1094	INIT_LIST_HEAD(&log->sb_list);
1095	init_waitqueue_head(&log->syncwait);
1096
1097	/*
1098	 *	external log as separate logical volume
1099	 *
1100	 * file systems to log may have n-to-1 relationship;
1101	 */
1102
1103	bdev_file = bdev_file_open_by_dev(sbi->logdev,
1104			BLK_OPEN_READ | BLK_OPEN_WRITE, log, NULL);
1105	if (IS_ERR(bdev_file)) {
1106		rc = PTR_ERR(bdev_file);
1107		goto free;
1108	}
1109
1110	log->bdev_file = bdev_file;
1111	uuid_copy(&log->uuid, &sbi->loguuid);
1112
1113	/*
1114	 * initialize log:
1115	 */
1116	if ((rc = lmLogInit(log)))
1117		goto close;
1118
1119	list_add(&log->journal_list, &jfs_external_logs);
1120
1121	/*
1122	 * add file system to log active file system list
1123	 */
1124	if ((rc = lmLogFileSystem(log, sbi, 1)))
1125		goto shutdown;
1126
1127journal_found:
1128	LOG_LOCK(log);
1129	list_add(&sbi->log_list, &log->sb_list);
1130	sbi->log = log;
1131	LOG_UNLOCK(log);
1132
1133	mutex_unlock(&jfs_log_mutex);
1134	return 0;
1135
1136	/*
1137	 *	unwind on error
1138	 */
1139      shutdown:		/* unwind lbmLogInit() */
1140	list_del(&log->journal_list);
1141	lbmLogShutdown(log);
1142
1143      close:		/* close external log device */
1144	bdev_fput(bdev_file);
1145
1146      free:		/* free log descriptor */
1147	mutex_unlock(&jfs_log_mutex);
1148	kfree(log);
1149
1150	jfs_warn("lmLogOpen: exit(%d)", rc);
1151	return rc;
1152}
1153
1154static int open_inline_log(struct super_block *sb)
1155{
1156	struct jfs_log *log;
1157	int rc;
1158
1159	if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL)))
1160		return -ENOMEM;
1161	INIT_LIST_HEAD(&log->sb_list);
1162	init_waitqueue_head(&log->syncwait);
1163
1164	set_bit(log_INLINELOG, &log->flag);
1165	log->bdev_file = sb->s_bdev_file;
1166	log->base = addressPXD(&JFS_SBI(sb)->logpxd);
1167	log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
1168	    (L2LOGPSIZE - sb->s_blocksize_bits);
1169	log->l2bsize = sb->s_blocksize_bits;
1170	ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits);
1171
1172	/*
1173	 * initialize log.
1174	 */
1175	if ((rc = lmLogInit(log))) {
1176		kfree(log);
1177		jfs_warn("lmLogOpen: exit(%d)", rc);
1178		return rc;
1179	}
1180
1181	list_add(&JFS_SBI(sb)->log_list, &log->sb_list);
1182	JFS_SBI(sb)->log = log;
1183
1184	return rc;
1185}
1186
1187static int open_dummy_log(struct super_block *sb)
1188{
1189	int rc;
1190
1191	mutex_lock(&jfs_log_mutex);
1192	if (!dummy_log) {
1193		dummy_log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL);
1194		if (!dummy_log) {
1195			mutex_unlock(&jfs_log_mutex);
1196			return -ENOMEM;
1197		}
1198		INIT_LIST_HEAD(&dummy_log->sb_list);
1199		init_waitqueue_head(&dummy_log->syncwait);
1200		dummy_log->no_integrity = 1;
1201		/* Make up some stuff */
1202		dummy_log->base = 0;
1203		dummy_log->size = 1024;
1204		rc = lmLogInit(dummy_log);
1205		if (rc) {
1206			kfree(dummy_log);
1207			dummy_log = NULL;
1208			mutex_unlock(&jfs_log_mutex);
1209			return rc;
1210		}
1211	}
1212
1213	LOG_LOCK(dummy_log);
1214	list_add(&JFS_SBI(sb)->log_list, &dummy_log->sb_list);
1215	JFS_SBI(sb)->log = dummy_log;
1216	LOG_UNLOCK(dummy_log);
1217	mutex_unlock(&jfs_log_mutex);
1218
1219	return 0;
1220}
1221
1222/*
1223 * NAME:	lmLogInit()
1224 *
1225 * FUNCTION:	log initialization at first log open.
1226 *
1227 *	logredo() (or logformat()) should have been run previously.
1228 *	initialize the log from log superblock.
1229 *	set the log state in the superblock to LOGMOUNT and
1230 *	write SYNCPT log record.
1231 *
1232 * PARAMETER:	log	- log structure
1233 *
1234 * RETURN:	0	- if ok
1235 *		-EINVAL	- bad log magic number or superblock dirty
1236 *		error returned from logwait()
1237 *
1238 * serialization: single first open thread
1239 */
1240int lmLogInit(struct jfs_log * log)
1241{
1242	int rc = 0;
1243	struct lrd lrd;
1244	struct logsuper *logsuper;
1245	struct lbuf *bpsuper;
1246	struct lbuf *bp;
1247	struct logpage *lp;
1248	int lsn = 0;
1249
1250	jfs_info("lmLogInit: log:0x%p", log);
1251
1252	/* initialize the group commit serialization lock */
1253	LOGGC_LOCK_INIT(log);
1254
1255	/* allocate/initialize the log write serialization lock */
1256	LOG_LOCK_INIT(log);
1257
1258	LOGSYNC_LOCK_INIT(log);
1259
1260	INIT_LIST_HEAD(&log->synclist);
1261
1262	INIT_LIST_HEAD(&log->cqueue);
1263	log->flush_tblk = NULL;
1264
1265	log->count = 0;
1266
1267	/*
1268	 * initialize log i/o
1269	 */
1270	if ((rc = lbmLogInit(log)))
1271		return rc;
1272
1273	if (!test_bit(log_INLINELOG, &log->flag))
1274		log->l2bsize = L2LOGPSIZE;
1275
1276	/* check for disabled journaling to disk */
1277	if (log->no_integrity) {
1278		/*
1279		 * Journal pages will still be filled.  When the time comes
1280		 * to actually do the I/O, the write is not done, and the
1281		 * endio routine is called directly.
1282		 */
1283		bp = lbmAllocate(log , 0);
1284		log->bp = bp;
1285		bp->l_pn = bp->l_eor = 0;
1286	} else {
1287		/*
1288		 * validate log superblock
1289		 */
1290		if ((rc = lbmRead(log, 1, &bpsuper)))
1291			goto errout10;
1292
1293		logsuper = (struct logsuper *) bpsuper->l_ldata;
1294
1295		if (logsuper->magic != cpu_to_le32(LOGMAGIC)) {
1296			jfs_warn("*** Log Format Error ! ***");
1297			rc = -EINVAL;
1298			goto errout20;
1299		}
1300
1301		/* logredo() should have been run successfully. */
1302		if (logsuper->state != cpu_to_le32(LOGREDONE)) {
1303			jfs_warn("*** Log Is Dirty ! ***");
1304			rc = -EINVAL;
1305			goto errout20;
1306		}
1307
1308		/* initialize log from log superblock */
1309		if (test_bit(log_INLINELOG,&log->flag)) {
1310			if (log->size != le32_to_cpu(logsuper->size)) {
1311				rc = -EINVAL;
1312				goto errout20;
1313			}
1314			jfs_info("lmLogInit: inline log:0x%p base:0x%Lx size:0x%x",
1315				 log, (unsigned long long)log->base, log->size);
1316		} else {
1317			if (!uuid_equal(&logsuper->uuid, &log->uuid)) {
1318				jfs_warn("wrong uuid on JFS log device");
1319				rc = -EINVAL;
1320				goto errout20;
1321			}
1322			log->size = le32_to_cpu(logsuper->size);
1323			log->l2bsize = le32_to_cpu(logsuper->l2bsize);
1324			jfs_info("lmLogInit: external log:0x%p base:0x%Lx size:0x%x",
1325				 log, (unsigned long long)log->base, log->size);
1326		}
1327
1328		log->page = le32_to_cpu(logsuper->end) / LOGPSIZE;
1329		log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page);
1330
1331		/*
1332		 * initialize for log append write mode
1333		 */
1334		/* establish current/end-of-log page/buffer */
1335		if ((rc = lbmRead(log, log->page, &bp)))
1336			goto errout20;
1337
1338		lp = (struct logpage *) bp->l_ldata;
1339
1340		jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d",
1341			 le32_to_cpu(logsuper->end), log->page, log->eor,
1342			 le16_to_cpu(lp->h.eor));
1343
1344		log->bp = bp;
1345		bp->l_pn = log->page;
1346		bp->l_eor = log->eor;
1347
1348		/* if current page is full, move on to next page */
1349		if (log->eor >= LOGPSIZE - LOGPTLRSIZE)
1350			lmNextPage(log);
1351
1352		/*
1353		 * initialize log syncpoint
1354		 */
1355		/*
1356		 * write the first SYNCPT record with syncpoint = 0
1357		 * (i.e., log redo up to HERE !);
1358		 * remove current page from lbm write queue at end of pageout
1359		 * (to write log superblock update), but do not release to
1360		 * freelist;
1361		 */
1362		lrd.logtid = 0;
1363		lrd.backchain = 0;
1364		lrd.type = cpu_to_le16(LOG_SYNCPT);
1365		lrd.length = 0;
1366		lrd.log.syncpt.sync = 0;
1367		lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1368		bp = log->bp;
1369		bp->l_ceor = bp->l_eor;
1370		lp = (struct logpage *) bp->l_ldata;
1371		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1372		lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0);
1373		if ((rc = lbmIOWait(bp, 0)))
1374			goto errout30;
1375
1376		/*
1377		 * update/write superblock
1378		 */
1379		logsuper->state = cpu_to_le32(LOGMOUNT);
1380		log->serial = le32_to_cpu(logsuper->serial) + 1;
1381		logsuper->serial = cpu_to_le32(log->serial);
1382		lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1383		if ((rc = lbmIOWait(bpsuper, lbmFREE)))
1384			goto errout30;
1385	}
1386
1387	/* initialize logsync parameters */
1388	log->logsize = (log->size - 2) << L2LOGPSIZE;
1389	log->lsn = lsn;
1390	log->syncpt = lsn;
1391	log->sync = log->syncpt;
1392	log->nextsync = LOGSYNC_DELTA(log->logsize);
1393
1394	jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x",
1395		 log->lsn, log->syncpt, log->sync);
1396
1397	/*
1398	 * initialize for lazy/group commit
1399	 */
1400	log->clsn = lsn;
1401
1402	return 0;
1403
1404	/*
1405	 *	unwind on error
1406	 */
1407      errout30:		/* release log page */
1408	log->wqueue = NULL;
1409	bp->l_wqnext = NULL;
1410	lbmFree(bp);
1411
1412      errout20:		/* release log superblock */
1413	lbmFree(bpsuper);
1414
1415      errout10:		/* unwind lbmLogInit() */
1416	lbmLogShutdown(log);
1417
1418	jfs_warn("lmLogInit: exit(%d)", rc);
1419	return rc;
1420}
1421
1422
1423/*
1424 * NAME:	lmLogClose()
1425 *
1426 * FUNCTION:	remove file system <ipmnt> from active list of log <iplog>
1427 *		and close it on last close.
1428 *
1429 * PARAMETER:	sb	- superblock
1430 *
1431 * RETURN:	errors from subroutines
1432 *
1433 * serialization:
1434 */
1435int lmLogClose(struct super_block *sb)
1436{
1437	struct jfs_sb_info *sbi = JFS_SBI(sb);
1438	struct jfs_log *log = sbi->log;
1439	struct file *bdev_file;
1440	int rc = 0;
1441
1442	jfs_info("lmLogClose: log:0x%p", log);
1443
1444	mutex_lock(&jfs_log_mutex);
1445	LOG_LOCK(log);
1446	list_del(&sbi->log_list);
1447	LOG_UNLOCK(log);
1448	sbi->log = NULL;
1449
1450	/*
1451	 * We need to make sure all of the "written" metapages
1452	 * actually make it to disk
1453	 */
1454	sync_blockdev(sb->s_bdev);
1455
1456	if (test_bit(log_INLINELOG, &log->flag)) {
1457		/*
1458		 *	in-line log in host file system
1459		 */
1460		rc = lmLogShutdown(log);
1461		kfree(log);
1462		goto out;
1463	}
1464
1465	if (!log->no_integrity)
1466		lmLogFileSystem(log, sbi, 0);
1467
1468	if (!list_empty(&log->sb_list))
1469		goto out;
1470
1471	/*
1472	 * TODO: ensure that the dummy_log is in a state to allow
1473	 * lbmLogShutdown to deallocate all the buffers and call
1474	 * kfree against dummy_log.  For now, leave dummy_log & its
1475	 * buffers in memory, and resuse if another no-integrity mount
1476	 * is requested.
1477	 */
1478	if (log->no_integrity)
1479		goto out;
1480
1481	/*
1482	 *	external log as separate logical volume
1483	 */
1484	list_del(&log->journal_list);
1485	bdev_file = log->bdev_file;
1486	rc = lmLogShutdown(log);
1487
1488	bdev_fput(bdev_file);
1489
1490	kfree(log);
1491
1492      out:
1493	mutex_unlock(&jfs_log_mutex);
1494	jfs_info("lmLogClose: exit(%d)", rc);
1495	return rc;
1496}
1497
1498
1499/*
1500 * NAME:	jfs_flush_journal()
1501 *
1502 * FUNCTION:	initiate write of any outstanding transactions to the journal
1503 *		and optionally wait until they are all written to disk
1504 *
1505 *		wait == 0  flush until latest txn is committed, don't wait
1506 *		wait == 1  flush until latest txn is committed, wait
1507 *		wait > 1   flush until all txn's are complete, wait
1508 */
1509void jfs_flush_journal(struct jfs_log *log, int wait)
1510{
1511	int i;
1512	struct tblock *target = NULL;
1513
1514	/* jfs_write_inode may call us during read-only mount */
1515	if (!log)
1516		return;
1517
1518	jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait);
1519
1520	LOGGC_LOCK(log);
1521
1522	if (!list_empty(&log->cqueue)) {
1523		/*
1524		 * This ensures that we will keep writing to the journal as long
1525		 * as there are unwritten commit records
1526		 */
1527		target = list_entry(log->cqueue.prev, struct tblock, cqueue);
1528
1529		if (test_bit(log_FLUSH, &log->flag)) {
1530			/*
1531			 * We're already flushing.
1532			 * if flush_tblk is NULL, we are flushing everything,
1533			 * so leave it that way.  Otherwise, update it to the
1534			 * latest transaction
1535			 */
1536			if (log->flush_tblk)
1537				log->flush_tblk = target;
1538		} else {
1539			/* Only flush until latest transaction is committed */
1540			log->flush_tblk = target;
1541			set_bit(log_FLUSH, &log->flag);
1542
1543			/*
1544			 * Initiate I/O on outstanding transactions
1545			 */
1546			if (!(log->cflag & logGC_PAGEOUT)) {
1547				log->cflag |= logGC_PAGEOUT;
1548				lmGCwrite(log, 0);
1549			}
1550		}
1551	}
1552	if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) {
1553		/* Flush until all activity complete */
1554		set_bit(log_FLUSH, &log->flag);
1555		log->flush_tblk = NULL;
1556	}
1557
1558	if (wait && target && !(target->flag & tblkGC_COMMITTED)) {
1559		DECLARE_WAITQUEUE(__wait, current);
1560
1561		add_wait_queue(&target->gcwait, &__wait);
1562		set_current_state(TASK_UNINTERRUPTIBLE);
1563		LOGGC_UNLOCK(log);
1564		schedule();
1565		LOGGC_LOCK(log);
1566		remove_wait_queue(&target->gcwait, &__wait);
1567	}
1568	LOGGC_UNLOCK(log);
1569
1570	if (wait < 2)
1571		return;
1572
1573	write_special_inodes(log, filemap_fdatawrite);
1574
1575	/*
1576	 * If there was recent activity, we may need to wait
1577	 * for the lazycommit thread to catch up
1578	 */
1579	if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) {
1580		for (i = 0; i < 200; i++) {	/* Too much? */
1581			msleep(250);
1582			write_special_inodes(log, filemap_fdatawrite);
1583			if (list_empty(&log->cqueue) &&
1584			    list_empty(&log->synclist))
1585				break;
1586		}
1587	}
1588	assert(list_empty(&log->cqueue));
1589
1590#ifdef CONFIG_JFS_DEBUG
1591	if (!list_empty(&log->synclist)) {
1592		struct logsyncblk *lp;
1593
1594		printk(KERN_ERR "jfs_flush_journal: synclist not empty\n");
1595		list_for_each_entry(lp, &log->synclist, synclist) {
1596			if (lp->xflag & COMMIT_PAGE) {
1597				struct metapage *mp = (struct metapage *)lp;
1598				print_hex_dump(KERN_ERR, "metapage: ",
1599					       DUMP_PREFIX_ADDRESS, 16, 4,
1600					       mp, sizeof(struct metapage), 0);
1601				print_hex_dump(KERN_ERR, "page: ",
1602					       DUMP_PREFIX_ADDRESS, 16,
1603					       sizeof(long), mp->folio,
1604					       sizeof(struct page), 0);
1605			} else
1606				print_hex_dump(KERN_ERR, "tblock:",
1607					       DUMP_PREFIX_ADDRESS, 16, 4,
1608					       lp, sizeof(struct tblock), 0);
1609		}
1610	}
1611#else
1612	WARN_ON(!list_empty(&log->synclist));
1613#endif
1614	clear_bit(log_FLUSH, &log->flag);
1615}
1616
1617/*
1618 * NAME:	lmLogShutdown()
1619 *
1620 * FUNCTION:	log shutdown at last LogClose().
1621 *
1622 *		write log syncpt record.
1623 *		update super block to set redone flag to 0.
1624 *
1625 * PARAMETER:	log	- log inode
1626 *
1627 * RETURN:	0	- success
1628 *
1629 * serialization: single last close thread
1630 */
1631int lmLogShutdown(struct jfs_log * log)
1632{
1633	int rc;
1634	struct lrd lrd;
1635	int lsn;
1636	struct logsuper *logsuper;
1637	struct lbuf *bpsuper;
1638	struct lbuf *bp;
1639	struct logpage *lp;
1640
1641	jfs_info("lmLogShutdown: log:0x%p", log);
1642
1643	jfs_flush_journal(log, 2);
1644
1645	/*
1646	 * write the last SYNCPT record with syncpoint = 0
1647	 * (i.e., log redo up to HERE !)
1648	 */
1649	lrd.logtid = 0;
1650	lrd.backchain = 0;
1651	lrd.type = cpu_to_le16(LOG_SYNCPT);
1652	lrd.length = 0;
1653	lrd.log.syncpt.sync = 0;
1654
1655	lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1656	bp = log->bp;
1657	lp = (struct logpage *) bp->l_ldata;
1658	lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1659	lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0);
1660	lbmIOWait(log->bp, lbmFREE);
1661	log->bp = NULL;
1662
1663	/*
1664	 * synchronous update log superblock
1665	 * mark log state as shutdown cleanly
1666	 * (i.e., Log does not need to be replayed).
1667	 */
1668	if ((rc = lbmRead(log, 1, &bpsuper)))
1669		goto out;
1670
1671	logsuper = (struct logsuper *) bpsuper->l_ldata;
1672	logsuper->state = cpu_to_le32(LOGREDONE);
1673	logsuper->end = cpu_to_le32(lsn);
1674	lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1675	rc = lbmIOWait(bpsuper, lbmFREE);
1676
1677	jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d",
1678		 lsn, log->page, log->eor);
1679
1680      out:
1681	/*
1682	 * shutdown per log i/o
1683	 */
1684	lbmLogShutdown(log);
1685
1686	if (rc) {
1687		jfs_warn("lmLogShutdown: exit(%d)", rc);
1688	}
1689	return rc;
1690}
1691
1692
1693/*
1694 * NAME:	lmLogFileSystem()
1695 *
1696 * FUNCTION:	insert (<activate> = true)/remove (<activate> = false)
1697 *	file system into/from log active file system list.
1698 *
1699 * PARAMETE:	log	- pointer to logs inode.
1700 *		fsdev	- kdev_t of filesystem.
1701 *		serial	- pointer to returned log serial number
1702 *		activate - insert/remove device from active list.
1703 *
1704 * RETURN:	0	- success
1705 *		errors returned by vms_iowait().
1706 */
1707static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
1708			   int activate)
1709{
1710	int rc = 0;
1711	int i;
1712	struct logsuper *logsuper;
1713	struct lbuf *bpsuper;
1714	uuid_t *uuid = &sbi->uuid;
1715
1716	/*
1717	 * insert/remove file system device to log active file system list.
1718	 */
1719	if ((rc = lbmRead(log, 1, &bpsuper)))
1720		return rc;
1721
1722	logsuper = (struct logsuper *) bpsuper->l_ldata;
1723	if (activate) {
1724		for (i = 0; i < MAX_ACTIVE; i++)
1725			if (uuid_is_null(&logsuper->active[i].uuid)) {
1726				uuid_copy(&logsuper->active[i].uuid, uuid);
1727				sbi->aggregate = i;
1728				break;
1729			}
1730		if (i == MAX_ACTIVE) {
1731			jfs_warn("Too many file systems sharing journal!");
1732			lbmFree(bpsuper);
1733			return -EMFILE;	/* Is there a better rc? */
1734		}
1735	} else {
1736		for (i = 0; i < MAX_ACTIVE; i++)
1737			if (uuid_equal(&logsuper->active[i].uuid, uuid)) {
1738				uuid_copy(&logsuper->active[i].uuid,
1739					  &uuid_null);
1740				break;
1741			}
1742		if (i == MAX_ACTIVE) {
1743			jfs_warn("Somebody stomped on the journal!");
1744			lbmFree(bpsuper);
1745			return -EIO;
1746		}
1747
1748	}
1749
1750	/*
1751	 * synchronous write log superblock:
1752	 *
1753	 * write sidestream bypassing write queue:
1754	 * at file system mount, log super block is updated for
1755	 * activation of the file system before any log record
1756	 * (MOUNT record) of the file system, and at file system
1757	 * unmount, all meta data for the file system has been
1758	 * flushed before log super block is updated for deactivation
1759	 * of the file system.
1760	 */
1761	lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1762	rc = lbmIOWait(bpsuper, lbmFREE);
1763
1764	return rc;
1765}
1766
1767/*
1768 *		log buffer manager (lbm)
1769 *		------------------------
1770 *
1771 * special purpose buffer manager supporting log i/o requirements.
1772 *
1773 * per log write queue:
1774 * log pageout occurs in serial order by fifo write queue and
1775 * restricting to a single i/o in pregress at any one time.
1776 * a circular singly-linked list
1777 * (log->wrqueue points to the tail, and buffers are linked via
1778 * bp->wrqueue field), and
1779 * maintains log page in pageout ot waiting for pageout in serial pageout.
1780 */
1781
1782/*
1783 *	lbmLogInit()
1784 *
1785 * initialize per log I/O setup at lmLogInit()
1786 */
1787static int lbmLogInit(struct jfs_log * log)
1788{				/* log inode */
1789	int i;
1790	struct lbuf *lbuf;
1791
1792	jfs_info("lbmLogInit: log:0x%p", log);
1793
1794	/* initialize current buffer cursor */
1795	log->bp = NULL;
1796
1797	/* initialize log device write queue */
1798	log->wqueue = NULL;
1799
1800	/*
1801	 * Each log has its own buffer pages allocated to it.  These are
1802	 * not managed by the page cache.  This ensures that a transaction
1803	 * writing to the log does not block trying to allocate a page from
1804	 * the page cache (for the log).  This would be bad, since page
1805	 * allocation waits on the kswapd thread that may be committing inodes
1806	 * which would cause log activity.  Was that clear?  I'm trying to
1807	 * avoid deadlock here.
1808	 */
1809	init_waitqueue_head(&log->free_wait);
1810
1811	log->lbuf_free = NULL;
1812
1813	for (i = 0; i < LOGPAGES;) {
1814		char *buffer;
1815		uint offset;
1816		struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
1817
1818		if (!page)
1819			goto error;
1820		buffer = page_address(page);
1821		for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) {
1822			lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL);
1823			if (lbuf == NULL) {
1824				if (offset == 0)
1825					__free_page(page);
1826				goto error;
1827			}
1828			if (offset) /* we already have one reference */
1829				get_page(page);
1830			lbuf->l_offset = offset;
1831			lbuf->l_ldata = buffer + offset;
1832			lbuf->l_page = page;
1833			lbuf->l_log = log;
1834			init_waitqueue_head(&lbuf->l_ioevent);
1835
1836			lbuf->l_freelist = log->lbuf_free;
1837			log->lbuf_free = lbuf;
1838			i++;
1839		}
1840	}
1841
1842	return (0);
1843
1844      error:
1845	lbmLogShutdown(log);
1846	return -ENOMEM;
1847}
1848
1849
1850/*
1851 *	lbmLogShutdown()
1852 *
1853 * finalize per log I/O setup at lmLogShutdown()
1854 */
1855static void lbmLogShutdown(struct jfs_log * log)
1856{
1857	struct lbuf *lbuf;
1858
1859	jfs_info("lbmLogShutdown: log:0x%p", log);
1860
1861	lbuf = log->lbuf_free;
1862	while (lbuf) {
1863		struct lbuf *next = lbuf->l_freelist;
1864		__free_page(lbuf->l_page);
1865		kfree(lbuf);
1866		lbuf = next;
1867	}
1868}
1869
1870
1871/*
1872 *	lbmAllocate()
1873 *
1874 * allocate an empty log buffer
1875 */
1876static struct lbuf *lbmAllocate(struct jfs_log * log, int pn)
1877{
1878	struct lbuf *bp;
1879	unsigned long flags;
1880
1881	/*
1882	 * recycle from log buffer freelist if any
1883	 */
1884	LCACHE_LOCK(flags);
1885	LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags);
1886	log->lbuf_free = bp->l_freelist;
1887	LCACHE_UNLOCK(flags);
1888
1889	bp->l_flag = 0;
1890
1891	bp->l_wqnext = NULL;
1892	bp->l_freelist = NULL;
1893
1894	bp->l_pn = pn;
1895	bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize));
1896	bp->l_ceor = 0;
1897
1898	return bp;
1899}
1900
1901
1902/*
1903 *	lbmFree()
1904 *
1905 * release a log buffer to freelist
1906 */
1907static void lbmFree(struct lbuf * bp)
1908{
1909	unsigned long flags;
1910
1911	LCACHE_LOCK(flags);
1912
1913	lbmfree(bp);
1914
1915	LCACHE_UNLOCK(flags);
1916}
1917
1918static void lbmfree(struct lbuf * bp)
1919{
1920	struct jfs_log *log = bp->l_log;
1921
1922	assert(bp->l_wqnext == NULL);
1923
1924	/*
1925	 * return the buffer to head of freelist
1926	 */
1927	bp->l_freelist = log->lbuf_free;
1928	log->lbuf_free = bp;
1929
1930	wake_up(&log->free_wait);
1931	return;
1932}
1933
1934
1935/*
1936 * NAME:	lbmRedrive
1937 *
1938 * FUNCTION:	add a log buffer to the log redrive list
1939 *
1940 * PARAMETER:
1941 *	bp	- log buffer
1942 *
1943 * NOTES:
1944 *	Takes log_redrive_lock.
1945 */
1946static inline void lbmRedrive(struct lbuf *bp)
1947{
1948	unsigned long flags;
1949
1950	spin_lock_irqsave(&log_redrive_lock, flags);
1951	bp->l_redrive_next = log_redrive_list;
1952	log_redrive_list = bp;
1953	spin_unlock_irqrestore(&log_redrive_lock, flags);
1954
1955	wake_up_process(jfsIOthread);
1956}
1957
1958
1959/*
1960 *	lbmRead()
1961 */
1962static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
1963{
1964	struct bio *bio;
1965	struct lbuf *bp;
1966
1967	/*
1968	 * allocate a log buffer
1969	 */
1970	*bpp = bp = lbmAllocate(log, pn);
1971	jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn);
1972
1973	bp->l_flag |= lbmREAD;
1974
1975	bio = bio_alloc(file_bdev(log->bdev_file), 1, REQ_OP_READ, GFP_NOFS);
 
1976	bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
1977	__bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
 
 
1978	BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
1979
1980	bio->bi_end_io = lbmIODone;
1981	bio->bi_private = bp;
 
1982	/*check if journaling to disk has been disabled*/
1983	if (log->no_integrity) {
1984		bio->bi_iter.bi_size = 0;
1985		lbmIODone(bio);
1986	} else {
1987		submit_bio(bio);
1988	}
1989
1990	wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
1991
1992	return 0;
1993}
1994
1995
1996/*
1997 *	lbmWrite()
1998 *
1999 * buffer at head of pageout queue stays after completion of
2000 * partial-page pageout and redriven by explicit initiation of
2001 * pageout by caller until full-page pageout is completed and
2002 * released.
2003 *
2004 * device driver i/o done redrives pageout of new buffer at
2005 * head of pageout queue when current buffer at head of pageout
2006 * queue is released at the completion of its full-page pageout.
2007 *
2008 * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit().
2009 * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone()
2010 */
2011static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
2012		     int cant_block)
2013{
2014	struct lbuf *tail;
2015	unsigned long flags;
2016
2017	jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn);
2018
2019	/* map the logical block address to physical block address */
2020	bp->l_blkno =
2021	    log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2022
2023	LCACHE_LOCK(flags);		/* disable+lock */
2024
2025	/*
2026	 * initialize buffer for device driver
2027	 */
2028	bp->l_flag = flag;
2029
2030	/*
2031	 *	insert bp at tail of write queue associated with log
2032	 *
2033	 * (request is either for bp already/currently at head of queue
2034	 * or new bp to be inserted at tail)
2035	 */
2036	tail = log->wqueue;
2037
2038	/* is buffer not already on write queue ? */
2039	if (bp->l_wqnext == NULL) {
2040		/* insert at tail of wqueue */
2041		if (tail == NULL) {
2042			log->wqueue = bp;
2043			bp->l_wqnext = bp;
2044		} else {
2045			log->wqueue = bp;
2046			bp->l_wqnext = tail->l_wqnext;
2047			tail->l_wqnext = bp;
2048		}
2049
2050		tail = bp;
2051	}
2052
2053	/* is buffer at head of wqueue and for write ? */
2054	if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) {
2055		LCACHE_UNLOCK(flags);	/* unlock+enable */
2056		return;
2057	}
2058
2059	LCACHE_UNLOCK(flags);	/* unlock+enable */
2060
2061	if (cant_block)
2062		lbmRedrive(bp);
2063	else if (flag & lbmSYNC)
2064		lbmStartIO(bp);
2065	else {
2066		LOGGC_UNLOCK(log);
2067		lbmStartIO(bp);
2068		LOGGC_LOCK(log);
2069	}
2070}
2071
2072
2073/*
2074 *	lbmDirectWrite()
2075 *
2076 * initiate pageout bypassing write queue for sidestream
2077 * (e.g., log superblock) write;
2078 */
2079static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
2080{
2081	jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x",
2082		 bp, flag, bp->l_pn);
2083
2084	/*
2085	 * initialize buffer for device driver
2086	 */
2087	bp->l_flag = flag | lbmDIRECT;
2088
2089	/* map the logical block address to physical block address */
2090	bp->l_blkno =
2091	    log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2092
2093	/*
2094	 *	initiate pageout of the page
2095	 */
2096	lbmStartIO(bp);
2097}
2098
2099
2100/*
2101 * NAME:	lbmStartIO()
2102 *
2103 * FUNCTION:	Interface to DD strategy routine
2104 *
2105 * RETURN:	none
2106 *
2107 * serialization: LCACHE_LOCK() is NOT held during log i/o;
2108 */
2109static void lbmStartIO(struct lbuf * bp)
2110{
2111	struct bio *bio;
2112	struct jfs_log *log = bp->l_log;
2113	struct block_device *bdev = NULL;
2114
2115	jfs_info("lbmStartIO");
2116
2117	if (!log->no_integrity)
2118		bdev = file_bdev(log->bdev_file);
 
2119
2120	bio = bio_alloc(bdev, 1, REQ_OP_WRITE | REQ_SYNC,
2121			GFP_NOFS);
2122	bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
2123	__bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
2124	BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
2125
2126	bio->bi_end_io = lbmIODone;
2127	bio->bi_private = bp;
 
2128
2129	/* check if journaling to disk has been disabled */
2130	if (log->no_integrity) {
2131		bio->bi_iter.bi_size = 0;
2132		lbmIODone(bio);
2133	} else {
2134		submit_bio(bio);
2135		INCREMENT(lmStat.submitted);
2136	}
2137}
2138
2139
2140/*
2141 *	lbmIOWait()
2142 */
2143static int lbmIOWait(struct lbuf * bp, int flag)
2144{
2145	unsigned long flags;
2146	int rc = 0;
2147
2148	jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2149
2150	LCACHE_LOCK(flags);		/* disable+lock */
2151
2152	LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags);
2153
2154	rc = (bp->l_flag & lbmERROR) ? -EIO : 0;
2155
2156	if (flag & lbmFREE)
2157		lbmfree(bp);
2158
2159	LCACHE_UNLOCK(flags);	/* unlock+enable */
2160
2161	jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2162	return rc;
2163}
2164
2165/*
2166 *	lbmIODone()
2167 *
2168 * executed at INTIODONE level
2169 */
2170static void lbmIODone(struct bio *bio)
2171{
2172	struct lbuf *bp = bio->bi_private;
2173	struct lbuf *nextbp, *tail;
2174	struct jfs_log *log;
2175	unsigned long flags;
2176
2177	/*
2178	 * get back jfs buffer bound to the i/o buffer
2179	 */
2180	jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag);
2181
2182	LCACHE_LOCK(flags);		/* disable+lock */
2183
2184	bp->l_flag |= lbmDONE;
2185
2186	if (bio->bi_status) {
2187		bp->l_flag |= lbmERROR;
2188
2189		jfs_err("lbmIODone: I/O error in JFS log");
2190	}
2191
2192	bio_put(bio);
2193
2194	/*
2195	 *	pagein completion
2196	 */
2197	if (bp->l_flag & lbmREAD) {
2198		bp->l_flag &= ~lbmREAD;
2199
2200		LCACHE_UNLOCK(flags);	/* unlock+enable */
2201
2202		/* wakeup I/O initiator */
2203		LCACHE_WAKEUP(&bp->l_ioevent);
2204
2205		return;
2206	}
2207
2208	/*
2209	 *	pageout completion
2210	 *
2211	 * the bp at the head of write queue has completed pageout.
2212	 *
2213	 * if single-commit/full-page pageout, remove the current buffer
2214	 * from head of pageout queue, and redrive pageout with
2215	 * the new buffer at head of pageout queue;
2216	 * otherwise, the partial-page pageout buffer stays at
2217	 * the head of pageout queue to be redriven for pageout
2218	 * by lmGroupCommit() until full-page pageout is completed.
2219	 */
2220	bp->l_flag &= ~lbmWRITE;
2221	INCREMENT(lmStat.pagedone);
2222
2223	/* update committed lsn */
2224	log = bp->l_log;
2225	log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor;
2226
2227	if (bp->l_flag & lbmDIRECT) {
2228		LCACHE_WAKEUP(&bp->l_ioevent);
2229		LCACHE_UNLOCK(flags);
2230		return;
2231	}
2232
2233	tail = log->wqueue;
2234
2235	/* single element queue */
2236	if (bp == tail) {
2237		/* remove head buffer of full-page pageout
2238		 * from log device write queue
2239		 */
2240		if (bp->l_flag & lbmRELEASE) {
2241			log->wqueue = NULL;
2242			bp->l_wqnext = NULL;
2243		}
2244	}
2245	/* multi element queue */
2246	else {
2247		/* remove head buffer of full-page pageout
2248		 * from log device write queue
2249		 */
2250		if (bp->l_flag & lbmRELEASE) {
2251			nextbp = tail->l_wqnext = bp->l_wqnext;
2252			bp->l_wqnext = NULL;
2253
2254			/*
2255			 * redrive pageout of next page at head of write queue:
2256			 * redrive next page without any bound tblk
2257			 * (i.e., page w/o any COMMIT records), or
2258			 * first page of new group commit which has been
2259			 * queued after current page (subsequent pageout
2260			 * is performed synchronously, except page without
2261			 * any COMMITs) by lmGroupCommit() as indicated
2262			 * by lbmWRITE flag;
2263			 */
2264			if (nextbp->l_flag & lbmWRITE) {
2265				/*
2266				 * We can't do the I/O at interrupt time.
2267				 * The jfsIO thread can do it
2268				 */
2269				lbmRedrive(nextbp);
2270			}
2271		}
2272	}
2273
2274	/*
2275	 *	synchronous pageout:
2276	 *
2277	 * buffer has not necessarily been removed from write queue
2278	 * (e.g., synchronous write of partial-page with COMMIT):
2279	 * leave buffer for i/o initiator to dispose
2280	 */
2281	if (bp->l_flag & lbmSYNC) {
2282		LCACHE_UNLOCK(flags);	/* unlock+enable */
2283
2284		/* wakeup I/O initiator */
2285		LCACHE_WAKEUP(&bp->l_ioevent);
2286	}
2287
2288	/*
2289	 *	Group Commit pageout:
2290	 */
2291	else if (bp->l_flag & lbmGC) {
2292		LCACHE_UNLOCK(flags);
2293		lmPostGC(bp);
2294	}
2295
2296	/*
2297	 *	asynchronous pageout:
2298	 *
2299	 * buffer must have been removed from write queue:
2300	 * insert buffer at head of freelist where it can be recycled
2301	 */
2302	else {
2303		assert(bp->l_flag & lbmRELEASE);
2304		assert(bp->l_flag & lbmFREE);
2305		lbmfree(bp);
2306
2307		LCACHE_UNLOCK(flags);	/* unlock+enable */
2308	}
2309}
2310
2311int jfsIOWait(void *arg)
2312{
2313	struct lbuf *bp;
2314
2315	do {
2316		spin_lock_irq(&log_redrive_lock);
2317		while ((bp = log_redrive_list)) {
2318			log_redrive_list = bp->l_redrive_next;
2319			bp->l_redrive_next = NULL;
2320			spin_unlock_irq(&log_redrive_lock);
2321			lbmStartIO(bp);
2322			spin_lock_irq(&log_redrive_lock);
2323		}
2324
2325		if (freezing(current)) {
2326			spin_unlock_irq(&log_redrive_lock);
2327			try_to_freeze();
2328		} else {
2329			set_current_state(TASK_INTERRUPTIBLE);
2330			spin_unlock_irq(&log_redrive_lock);
2331			schedule();
2332		}
2333	} while (!kthread_should_stop());
2334
2335	jfs_info("jfsIOWait being killed!");
2336	return 0;
2337}
2338
2339/*
2340 * NAME:	lmLogFormat()/jfs_logform()
2341 *
2342 * FUNCTION:	format file system log
2343 *
2344 * PARAMETERS:
2345 *	log	- volume log
2346 *	logAddress - start address of log space in FS block
2347 *	logSize	- length of log space in FS block;
2348 *
2349 * RETURN:	0	- success
2350 *		-EIO	- i/o error
2351 *
2352 * XXX: We're synchronously writing one page at a time.  This needs to
2353 *	be improved by writing multiple pages at once.
2354 */
2355int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
2356{
2357	int rc = -EIO;
2358	struct jfs_sb_info *sbi;
2359	struct logsuper *logsuper;
2360	struct logpage *lp;
2361	int lspn;		/* log sequence page number */
2362	struct lrd *lrd_ptr;
2363	int npages = 0;
2364	struct lbuf *bp;
2365
2366	jfs_info("lmLogFormat: logAddress:%Ld logSize:%d",
2367		 (long long)logAddress, logSize);
2368
2369	sbi = list_entry(log->sb_list.next, struct jfs_sb_info, log_list);
2370
2371	/* allocate a log buffer */
2372	bp = lbmAllocate(log, 1);
2373
2374	npages = logSize >> sbi->l2nbperpage;
2375
2376	/*
2377	 *	log space:
2378	 *
2379	 * page 0 - reserved;
2380	 * page 1 - log superblock;
2381	 * page 2 - log data page: A SYNC log record is written
2382	 *	    into this page at logform time;
2383	 * pages 3-N - log data page: set to empty log data pages;
2384	 */
2385	/*
2386	 *	init log superblock: log page 1
2387	 */
2388	logsuper = (struct logsuper *) bp->l_ldata;
2389
2390	logsuper->magic = cpu_to_le32(LOGMAGIC);
2391	logsuper->version = cpu_to_le32(LOGVERSION);
2392	logsuper->state = cpu_to_le32(LOGREDONE);
2393	logsuper->flag = cpu_to_le32(sbi->mntflag);	/* ? */
2394	logsuper->size = cpu_to_le32(npages);
2395	logsuper->bsize = cpu_to_le32(sbi->bsize);
2396	logsuper->l2bsize = cpu_to_le32(sbi->l2bsize);
2397	logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE);
2398
2399	bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2400	bp->l_blkno = logAddress + sbi->nbperpage;
2401	lbmStartIO(bp);
2402	if ((rc = lbmIOWait(bp, 0)))
2403		goto exit;
2404
2405	/*
2406	 *	init pages 2 to npages-1 as log data pages:
2407	 *
2408	 * log page sequence number (lpsn) initialization:
2409	 *
2410	 * pn:   0     1     2     3                 n-1
2411	 *       +-----+-----+=====+=====+===.....===+=====+
2412	 * lspn:             N-1   0     1           N-2
2413	 *                   <--- N page circular file ---->
2414	 *
2415	 * the N (= npages-2) data pages of the log is maintained as
2416	 * a circular file for the log records;
2417	 * lpsn grows by 1 monotonically as each log page is written
2418	 * to the circular file of the log;
2419	 * and setLogpage() will not reset the page number even if
2420	 * the eor is equal to LOGPHDRSIZE. In order for binary search
2421	 * still work in find log end process, we have to simulate the
2422	 * log wrap situation at the log format time.
2423	 * The 1st log page written will have the highest lpsn. Then
2424	 * the succeeding log pages will have ascending order of
2425	 * the lspn starting from 0, ... (N-2)
2426	 */
2427	lp = (struct logpage *) bp->l_ldata;
2428	/*
2429	 * initialize 1st log page to be written: lpsn = N - 1,
2430	 * write a SYNCPT log record is written to this page
2431	 */
2432	lp->h.page = lp->t.page = cpu_to_le32(npages - 3);
2433	lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE);
2434
2435	lrd_ptr = (struct lrd *) &lp->data;
2436	lrd_ptr->logtid = 0;
2437	lrd_ptr->backchain = 0;
2438	lrd_ptr->type = cpu_to_le16(LOG_SYNCPT);
2439	lrd_ptr->length = 0;
2440	lrd_ptr->log.syncpt.sync = 0;
2441
2442	bp->l_blkno += sbi->nbperpage;
2443	bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2444	lbmStartIO(bp);
2445	if ((rc = lbmIOWait(bp, 0)))
2446		goto exit;
2447
2448	/*
2449	 *	initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
2450	 */
2451	for (lspn = 0; lspn < npages - 3; lspn++) {
2452		lp->h.page = lp->t.page = cpu_to_le32(lspn);
2453		lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
2454
2455		bp->l_blkno += sbi->nbperpage;
2456		bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2457		lbmStartIO(bp);
2458		if ((rc = lbmIOWait(bp, 0)))
2459			goto exit;
2460	}
2461
2462	rc = 0;
2463exit:
2464	/*
2465	 *	finalize log
2466	 */
2467	/* release the buffer */
2468	lbmFree(bp);
2469
2470	return rc;
2471}
2472
2473#ifdef CONFIG_JFS_STATISTICS
2474int jfs_lmstats_proc_show(struct seq_file *m, void *v)
2475{
2476	seq_printf(m,
2477		       "JFS Logmgr stats\n"
2478		       "================\n"
2479		       "commits = %d\n"
2480		       "writes submitted = %d\n"
2481		       "writes completed = %d\n"
2482		       "full pages submitted = %d\n"
2483		       "partial pages submitted = %d\n",
2484		       lmStat.commit,
2485		       lmStat.submitted,
2486		       lmStat.pagedone,
2487		       lmStat.full_page,
2488		       lmStat.partial_page);
2489	return 0;
2490}
 
 
 
 
 
 
 
 
 
 
 
 
2491#endif /* CONFIG_JFS_STATISTICS */