Linux Audio

Check our new training course

Real-Time Linux with PREEMPT_RT training

Feb 18-20, 2025
Register
Loading...
v4.17
   1/*
   2 *   Copyright (C) International Business Machines Corp., 2000-2004
   3 *   Portions Copyright (C) Christoph Hellwig, 2001-2002
   4 *
   5 *   This program is free software;  you can redistribute it and/or modify
   6 *   it under the terms of the GNU General Public License as published by
   7 *   the Free Software Foundation; either version 2 of the License, or
   8 *   (at your option) any later version.
   9 *
  10 *   This program is distributed in the hope that it will be useful,
  11 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
  12 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  13 *   the GNU General Public License for more details.
  14 *
  15 *   You should have received a copy of the GNU General Public License
  16 *   along with this program;  if not, write to the Free Software
  17 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18 */
  19
  20/*
  21 *	jfs_logmgr.c: log manager
  22 *
  23 * for related information, see transaction manager (jfs_txnmgr.c), and
  24 * recovery manager (jfs_logredo.c).
  25 *
  26 * note: for detail, RTFS.
  27 *
  28 *	log buffer manager:
  29 * special purpose buffer manager supporting log i/o requirements.
  30 * per log serial pageout of logpage
  31 * queuing i/o requests and redrive i/o at iodone
  32 * maintain current logpage buffer
  33 * no caching since append only
  34 * appropriate jfs buffer cache buffers as needed
  35 *
  36 *	group commit:
  37 * transactions which wrote COMMIT records in the same in-memory
  38 * log page during the pageout of previous/current log page(s) are
  39 * committed together by the pageout of the page.
  40 *
  41 *	TBD lazy commit:
  42 * transactions are committed asynchronously when the log page
  43 * containing it COMMIT is paged out when it becomes full;
  44 *
  45 *	serialization:
  46 * . a per log lock serialize log write.
  47 * . a per log lock serialize group commit.
  48 * . a per log lock serialize log open/close;
  49 *
  50 *	TBD log integrity:
  51 * careful-write (ping-pong) of last logpage to recover from crash
  52 * in overwrite.
  53 * detection of split (out-of-order) write of physical sectors
  54 * of last logpage via timestamp at end of each sector
  55 * with its mirror data array at trailer).
  56 *
  57 *	alternatives:
  58 * lsn - 64-bit monotonically increasing integer vs
  59 * 32-bit lspn and page eor.
  60 */
  61
  62#include <linux/fs.h>
  63#include <linux/blkdev.h>
  64#include <linux/interrupt.h>
  65#include <linux/completion.h>
  66#include <linux/kthread.h>
  67#include <linux/buffer_head.h>		/* for sync_blockdev() */
  68#include <linux/bio.h>
  69#include <linux/freezer.h>
  70#include <linux/export.h>
  71#include <linux/delay.h>
  72#include <linux/mutex.h>
  73#include <linux/seq_file.h>
  74#include <linux/slab.h>
  75#include "jfs_incore.h"
  76#include "jfs_filsys.h"
  77#include "jfs_metapage.h"
  78#include "jfs_superblock.h"
  79#include "jfs_txnmgr.h"
  80#include "jfs_debug.h"
  81
  82
  83/*
  84 * lbuf's ready to be redriven.  Protected by log_redrive_lock (jfsIO thread)
  85 */
  86static struct lbuf *log_redrive_list;
  87static DEFINE_SPINLOCK(log_redrive_lock);
  88
  89
  90/*
  91 *	log read/write serialization (per log)
  92 */
  93#define LOG_LOCK_INIT(log)	mutex_init(&(log)->loglock)
  94#define LOG_LOCK(log)		mutex_lock(&((log)->loglock))
  95#define LOG_UNLOCK(log)		mutex_unlock(&((log)->loglock))
  96
  97
  98/*
  99 *	log group commit serialization (per log)
 100 */
 101
 102#define LOGGC_LOCK_INIT(log)	spin_lock_init(&(log)->gclock)
 103#define LOGGC_LOCK(log)		spin_lock_irq(&(log)->gclock)
 104#define LOGGC_UNLOCK(log)	spin_unlock_irq(&(log)->gclock)
 105#define LOGGC_WAKEUP(tblk)	wake_up_all(&(tblk)->gcwait)
 106
 107/*
 108 *	log sync serialization (per log)
 109 */
 110#define	LOGSYNC_DELTA(logsize)		min((logsize)/8, 128*LOGPSIZE)
 111#define	LOGSYNC_BARRIER(logsize)	((logsize)/4)
 112/*
 113#define	LOGSYNC_DELTA(logsize)		min((logsize)/4, 256*LOGPSIZE)
 114#define	LOGSYNC_BARRIER(logsize)	((logsize)/2)
 115*/
 116
 117
 118/*
 119 *	log buffer cache synchronization
 120 */
 121static DEFINE_SPINLOCK(jfsLCacheLock);
 122
 123#define	LCACHE_LOCK(flags)	spin_lock_irqsave(&jfsLCacheLock, flags)
 124#define	LCACHE_UNLOCK(flags)	spin_unlock_irqrestore(&jfsLCacheLock, flags)
 125
 126/*
 127 * See __SLEEP_COND in jfs_locks.h
 128 */
 129#define LCACHE_SLEEP_COND(wq, cond, flags)	\
 130do {						\
 131	if (cond)				\
 132		break;				\
 133	__SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \
 134} while (0)
 135
 136#define	LCACHE_WAKEUP(event)	wake_up(event)
 137
 138
 139/*
 140 *	lbuf buffer cache (lCache) control
 141 */
 142/* log buffer manager pageout control (cumulative, inclusive) */
 143#define	lbmREAD		0x0001
 144#define	lbmWRITE	0x0002	/* enqueue at tail of write queue;
 145				 * init pageout if at head of queue;
 146				 */
 147#define	lbmRELEASE	0x0004	/* remove from write queue
 148				 * at completion of pageout;
 149				 * do not free/recycle it yet:
 150				 * caller will free it;
 151				 */
 152#define	lbmSYNC		0x0008	/* do not return to freelist
 153				 * when removed from write queue;
 154				 */
 155#define lbmFREE		0x0010	/* return to freelist
 156				 * at completion of pageout;
 157				 * the buffer may be recycled;
 158				 */
 159#define	lbmDONE		0x0020
 160#define	lbmERROR	0x0040
 161#define lbmGC		0x0080	/* lbmIODone to perform post-GC processing
 162				 * of log page
 163				 */
 164#define lbmDIRECT	0x0100
 165
 166/*
 167 * Global list of active external journals
 168 */
 169static LIST_HEAD(jfs_external_logs);
 170static struct jfs_log *dummy_log;
 171static DEFINE_MUTEX(jfs_log_mutex);
 172
 173/*
 174 * forward references
 175 */
 176static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk,
 177			 struct lrd * lrd, struct tlock * tlck);
 178
 179static int lmNextPage(struct jfs_log * log);
 180static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
 181			   int activate);
 182
 183static int open_inline_log(struct super_block *sb);
 184static int open_dummy_log(struct super_block *sb);
 185static int lbmLogInit(struct jfs_log * log);
 186static void lbmLogShutdown(struct jfs_log * log);
 187static struct lbuf *lbmAllocate(struct jfs_log * log, int);
 188static void lbmFree(struct lbuf * bp);
 189static void lbmfree(struct lbuf * bp);
 190static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp);
 191static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block);
 192static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag);
 193static int lbmIOWait(struct lbuf * bp, int flag);
 194static bio_end_io_t lbmIODone;
 195static void lbmStartIO(struct lbuf * bp);
 196static void lmGCwrite(struct jfs_log * log, int cant_block);
 197static int lmLogSync(struct jfs_log * log, int hard_sync);
 198
 199
 200
 201/*
 202 *	statistics
 203 */
 204#ifdef CONFIG_JFS_STATISTICS
 205static struct lmStat {
 206	uint commit;		/* # of commit */
 207	uint pagedone;		/* # of page written */
 208	uint submitted;		/* # of pages submitted */
 209	uint full_page;		/* # of full pages submitted */
 210	uint partial_page;	/* # of partial pages submitted */
 211} lmStat;
 212#endif
 213
 214static void write_special_inodes(struct jfs_log *log,
 215				 int (*writer)(struct address_space *))
 216{
 217	struct jfs_sb_info *sbi;
 218
 219	list_for_each_entry(sbi, &log->sb_list, log_list) {
 220		writer(sbi->ipbmap->i_mapping);
 221		writer(sbi->ipimap->i_mapping);
 222		writer(sbi->direct_inode->i_mapping);
 223	}
 224}
 225
 226/*
 227 * NAME:	lmLog()
 228 *
 229 * FUNCTION:	write a log record;
 230 *
 231 * PARAMETER:
 232 *
 233 * RETURN:	lsn - offset to the next log record to write (end-of-log);
 234 *		-1  - error;
 235 *
 236 * note: todo: log error handler
 237 */
 238int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 239	  struct tlock * tlck)
 240{
 241	int lsn;
 242	int diffp, difft;
 243	struct metapage *mp = NULL;
 244	unsigned long flags;
 245
 246	jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p",
 247		 log, tblk, lrd, tlck);
 248
 249	LOG_LOCK(log);
 250
 251	/* log by (out-of-transaction) JFS ? */
 252	if (tblk == NULL)
 253		goto writeRecord;
 254
 255	/* log from page ? */
 256	if (tlck == NULL ||
 257	    tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL)
 258		goto writeRecord;
 259
 260	/*
 261	 *	initialize/update page/transaction recovery lsn
 262	 */
 263	lsn = log->lsn;
 264
 265	LOGSYNC_LOCK(log, flags);
 266
 267	/*
 268	 * initialize page lsn if first log write of the page
 269	 */
 270	if (mp->lsn == 0) {
 271		mp->log = log;
 272		mp->lsn = lsn;
 273		log->count++;
 274
 275		/* insert page at tail of logsynclist */
 276		list_add_tail(&mp->synclist, &log->synclist);
 277	}
 278
 279	/*
 280	 *	initialize/update lsn of tblock of the page
 281	 *
 282	 * transaction inherits oldest lsn of pages associated
 283	 * with allocation/deallocation of resources (their
 284	 * log records are used to reconstruct allocation map
 285	 * at recovery time: inode for inode allocation map,
 286	 * B+-tree index of extent descriptors for block
 287	 * allocation map);
 288	 * allocation map pages inherit transaction lsn at
 289	 * commit time to allow forwarding log syncpt past log
 290	 * records associated with allocation/deallocation of
 291	 * resources only after persistent map of these map pages
 292	 * have been updated and propagated to home.
 293	 */
 294	/*
 295	 * initialize transaction lsn:
 296	 */
 297	if (tblk->lsn == 0) {
 298		/* inherit lsn of its first page logged */
 299		tblk->lsn = mp->lsn;
 300		log->count++;
 301
 302		/* insert tblock after the page on logsynclist */
 303		list_add(&tblk->synclist, &mp->synclist);
 304	}
 305	/*
 306	 * update transaction lsn:
 307	 */
 308	else {
 309		/* inherit oldest/smallest lsn of page */
 310		logdiff(diffp, mp->lsn, log);
 311		logdiff(difft, tblk->lsn, log);
 312		if (diffp < difft) {
 313			/* update tblock lsn with page lsn */
 314			tblk->lsn = mp->lsn;
 315
 316			/* move tblock after page on logsynclist */
 317			list_move(&tblk->synclist, &mp->synclist);
 318		}
 319	}
 320
 321	LOGSYNC_UNLOCK(log, flags);
 322
 323	/*
 324	 *	write the log record
 325	 */
 326      writeRecord:
 327	lsn = lmWriteRecord(log, tblk, lrd, tlck);
 328
 329	/*
 330	 * forward log syncpt if log reached next syncpt trigger
 331	 */
 332	logdiff(diffp, lsn, log);
 333	if (diffp >= log->nextsync)
 334		lsn = lmLogSync(log, 0);
 335
 336	/* update end-of-log lsn */
 337	log->lsn = lsn;
 338
 339	LOG_UNLOCK(log);
 340
 341	/* return end-of-log address */
 342	return lsn;
 343}
 344
 345/*
 346 * NAME:	lmWriteRecord()
 347 *
 348 * FUNCTION:	move the log record to current log page
 349 *
 350 * PARAMETER:	cd	- commit descriptor
 351 *
 352 * RETURN:	end-of-log address
 353 *
 354 * serialization: LOG_LOCK() held on entry/exit
 355 */
 356static int
 357lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 358	      struct tlock * tlck)
 359{
 360	int lsn = 0;		/* end-of-log address */
 361	struct lbuf *bp;	/* dst log page buffer */
 362	struct logpage *lp;	/* dst log page */
 363	caddr_t dst;		/* destination address in log page */
 364	int dstoffset;		/* end-of-log offset in log page */
 365	int freespace;		/* free space in log page */
 366	caddr_t p;		/* src meta-data page */
 367	caddr_t src;
 368	int srclen;
 369	int nbytes;		/* number of bytes to move */
 370	int i;
 371	int len;
 372	struct linelock *linelock;
 373	struct lv *lv;
 374	struct lvd *lvd;
 375	int l2linesize;
 376
 377	len = 0;
 378
 379	/* retrieve destination log page to write */
 380	bp = (struct lbuf *) log->bp;
 381	lp = (struct logpage *) bp->l_ldata;
 382	dstoffset = log->eor;
 383
 384	/* any log data to write ? */
 385	if (tlck == NULL)
 386		goto moveLrd;
 387
 388	/*
 389	 *	move log record data
 390	 */
 391	/* retrieve source meta-data page to log */
 392	if (tlck->flag & tlckPAGELOCK) {
 393		p = (caddr_t) (tlck->mp->data);
 394		linelock = (struct linelock *) & tlck->lock;
 395	}
 396	/* retrieve source in-memory inode to log */
 397	else if (tlck->flag & tlckINODELOCK) {
 398		if (tlck->type & tlckDTREE)
 399			p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot;
 400		else
 401			p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot;
 402		linelock = (struct linelock *) & tlck->lock;
 403	}
 404#ifdef	_JFS_WIP
 405	else if (tlck->flag & tlckINLINELOCK) {
 406
 407		inlinelock = (struct inlinelock *) & tlck;
 408		p = (caddr_t) & inlinelock->pxd;
 409		linelock = (struct linelock *) & tlck;
 410	}
 411#endif				/* _JFS_WIP */
 412	else {
 413		jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck);
 414		return 0;	/* Probably should trap */
 415	}
 416	l2linesize = linelock->l2linesize;
 417
 418      moveData:
 419	ASSERT(linelock->index <= linelock->maxcnt);
 420
 421	lv = linelock->lv;
 422	for (i = 0; i < linelock->index; i++, lv++) {
 423		if (lv->length == 0)
 424			continue;
 425
 426		/* is page full ? */
 427		if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) {
 428			/* page become full: move on to next page */
 429			lmNextPage(log);
 430
 431			bp = log->bp;
 432			lp = (struct logpage *) bp->l_ldata;
 433			dstoffset = LOGPHDRSIZE;
 434		}
 435
 436		/*
 437		 * move log vector data
 438		 */
 439		src = (u8 *) p + (lv->offset << l2linesize);
 440		srclen = lv->length << l2linesize;
 441		len += srclen;
 442		while (srclen > 0) {
 443			freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
 444			nbytes = min(freespace, srclen);
 445			dst = (caddr_t) lp + dstoffset;
 446			memcpy(dst, src, nbytes);
 447			dstoffset += nbytes;
 448
 449			/* is page not full ? */
 450			if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
 451				break;
 452
 453			/* page become full: move on to next page */
 454			lmNextPage(log);
 455
 456			bp = (struct lbuf *) log->bp;
 457			lp = (struct logpage *) bp->l_ldata;
 458			dstoffset = LOGPHDRSIZE;
 459
 460			srclen -= nbytes;
 461			src += nbytes;
 462		}
 463
 464		/*
 465		 * move log vector descriptor
 466		 */
 467		len += 4;
 468		lvd = (struct lvd *) ((caddr_t) lp + dstoffset);
 469		lvd->offset = cpu_to_le16(lv->offset);
 470		lvd->length = cpu_to_le16(lv->length);
 471		dstoffset += 4;
 472		jfs_info("lmWriteRecord: lv offset:%d length:%d",
 473			 lv->offset, lv->length);
 474	}
 475
 476	if ((i = linelock->next)) {
 477		linelock = (struct linelock *) lid_to_tlock(i);
 478		goto moveData;
 479	}
 480
 481	/*
 482	 *	move log record descriptor
 483	 */
 484      moveLrd:
 485	lrd->length = cpu_to_le16(len);
 486
 487	src = (caddr_t) lrd;
 488	srclen = LOGRDSIZE;
 489
 490	while (srclen > 0) {
 491		freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
 492		nbytes = min(freespace, srclen);
 493		dst = (caddr_t) lp + dstoffset;
 494		memcpy(dst, src, nbytes);
 495
 496		dstoffset += nbytes;
 497		srclen -= nbytes;
 498
 499		/* are there more to move than freespace of page ? */
 500		if (srclen)
 501			goto pageFull;
 502
 503		/*
 504		 * end of log record descriptor
 505		 */
 506
 507		/* update last log record eor */
 508		log->eor = dstoffset;
 509		bp->l_eor = dstoffset;
 510		lsn = (log->page << L2LOGPSIZE) + dstoffset;
 511
 512		if (lrd->type & cpu_to_le16(LOG_COMMIT)) {
 513			tblk->clsn = lsn;
 514			jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn,
 515				 bp->l_eor);
 516
 517			INCREMENT(lmStat.commit);	/* # of commit */
 518
 519			/*
 520			 * enqueue tblock for group commit:
 521			 *
 522			 * enqueue tblock of non-trivial/synchronous COMMIT
 523			 * at tail of group commit queue
 524			 * (trivial/asynchronous COMMITs are ignored by
 525			 * group commit.)
 526			 */
 527			LOGGC_LOCK(log);
 528
 529			/* init tblock gc state */
 530			tblk->flag = tblkGC_QUEUE;
 531			tblk->bp = log->bp;
 532			tblk->pn = log->page;
 533			tblk->eor = log->eor;
 534
 535			/* enqueue transaction to commit queue */
 536			list_add_tail(&tblk->cqueue, &log->cqueue);
 537
 538			LOGGC_UNLOCK(log);
 539		}
 540
 541		jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x",
 542			le16_to_cpu(lrd->type), log->bp, log->page, dstoffset);
 543
 544		/* page not full ? */
 545		if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
 546			return lsn;
 547
 548	      pageFull:
 549		/* page become full: move on to next page */
 550		lmNextPage(log);
 551
 552		bp = (struct lbuf *) log->bp;
 553		lp = (struct logpage *) bp->l_ldata;
 554		dstoffset = LOGPHDRSIZE;
 555		src += nbytes;
 556	}
 557
 558	return lsn;
 559}
 560
 561
 562/*
 563 * NAME:	lmNextPage()
 564 *
 565 * FUNCTION:	write current page and allocate next page.
 566 *
 567 * PARAMETER:	log
 568 *
 569 * RETURN:	0
 570 *
 571 * serialization: LOG_LOCK() held on entry/exit
 572 */
 573static int lmNextPage(struct jfs_log * log)
 574{
 575	struct logpage *lp;
 576	int lspn;		/* log sequence page number */
 577	int pn;			/* current page number */
 578	struct lbuf *bp;
 579	struct lbuf *nextbp;
 580	struct tblock *tblk;
 581
 582	/* get current log page number and log sequence page number */
 583	pn = log->page;
 584	bp = log->bp;
 585	lp = (struct logpage *) bp->l_ldata;
 586	lspn = le32_to_cpu(lp->h.page);
 587
 588	LOGGC_LOCK(log);
 589
 590	/*
 591	 *	write or queue the full page at the tail of write queue
 592	 */
 593	/* get the tail tblk on commit queue */
 594	if (list_empty(&log->cqueue))
 595		tblk = NULL;
 596	else
 597		tblk = list_entry(log->cqueue.prev, struct tblock, cqueue);
 598
 599	/* every tblk who has COMMIT record on the current page,
 600	 * and has not been committed, must be on commit queue
 601	 * since tblk is queued at commit queueu at the time
 602	 * of writing its COMMIT record on the page before
 603	 * page becomes full (even though the tblk thread
 604	 * who wrote COMMIT record may have been suspended
 605	 * currently);
 606	 */
 607
 608	/* is page bound with outstanding tail tblk ? */
 609	if (tblk && tblk->pn == pn) {
 610		/* mark tblk for end-of-page */
 611		tblk->flag |= tblkGC_EOP;
 612
 613		if (log->cflag & logGC_PAGEOUT) {
 614			/* if page is not already on write queue,
 615			 * just enqueue (no lbmWRITE to prevent redrive)
 616			 * buffer to wqueue to ensure correct serial order
 617			 * of the pages since log pages will be added
 618			 * continuously
 619			 */
 620			if (bp->l_wqnext == NULL)
 621				lbmWrite(log, bp, 0, 0);
 622		} else {
 623			/*
 624			 * No current GC leader, initiate group commit
 625			 */
 626			log->cflag |= logGC_PAGEOUT;
 627			lmGCwrite(log, 0);
 628		}
 629	}
 630	/* page is not bound with outstanding tblk:
 631	 * init write or mark it to be redriven (lbmWRITE)
 632	 */
 633	else {
 634		/* finalize the page */
 635		bp->l_ceor = bp->l_eor;
 636		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 637		lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0);
 638	}
 639	LOGGC_UNLOCK(log);
 640
 641	/*
 642	 *	allocate/initialize next page
 643	 */
 644	/* if log wraps, the first data page of log is 2
 645	 * (0 never used, 1 is superblock).
 646	 */
 647	log->page = (pn == log->size - 1) ? 2 : pn + 1;
 648	log->eor = LOGPHDRSIZE;	/* ? valid page empty/full at logRedo() */
 649
 650	/* allocate/initialize next log page buffer */
 651	nextbp = lbmAllocate(log, log->page);
 652	nextbp->l_eor = log->eor;
 653	log->bp = nextbp;
 654
 655	/* initialize next log page */
 656	lp = (struct logpage *) nextbp->l_ldata;
 657	lp->h.page = lp->t.page = cpu_to_le32(lspn + 1);
 658	lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
 659
 660	return 0;
 661}
 662
 663
 664/*
 665 * NAME:	lmGroupCommit()
 666 *
 667 * FUNCTION:	group commit
 668 *	initiate pageout of the pages with COMMIT in the order of
 669 *	page number - redrive pageout of the page at the head of
 670 *	pageout queue until full page has been written.
 671 *
 672 * RETURN:
 673 *
 674 * NOTE:
 675 *	LOGGC_LOCK serializes log group commit queue, and
 676 *	transaction blocks on the commit queue.
 677 *	N.B. LOG_LOCK is NOT held during lmGroupCommit().
 678 */
 679int lmGroupCommit(struct jfs_log * log, struct tblock * tblk)
 680{
 681	int rc = 0;
 682
 683	LOGGC_LOCK(log);
 684
 685	/* group committed already ? */
 686	if (tblk->flag & tblkGC_COMMITTED) {
 687		if (tblk->flag & tblkGC_ERROR)
 688			rc = -EIO;
 689
 690		LOGGC_UNLOCK(log);
 691		return rc;
 692	}
 693	jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc);
 694
 695	if (tblk->xflag & COMMIT_LAZY)
 696		tblk->flag |= tblkGC_LAZY;
 697
 698	if ((!(log->cflag & logGC_PAGEOUT)) && (!list_empty(&log->cqueue)) &&
 699	    (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag)
 700	     || jfs_tlocks_low)) {
 701		/*
 702		 * No pageout in progress
 703		 *
 704		 * start group commit as its group leader.
 705		 */
 706		log->cflag |= logGC_PAGEOUT;
 707
 708		lmGCwrite(log, 0);
 709	}
 710
 711	if (tblk->xflag & COMMIT_LAZY) {
 712		/*
 713		 * Lazy transactions can leave now
 714		 */
 715		LOGGC_UNLOCK(log);
 716		return 0;
 717	}
 718
 719	/* lmGCwrite gives up LOGGC_LOCK, check again */
 720
 721	if (tblk->flag & tblkGC_COMMITTED) {
 722		if (tblk->flag & tblkGC_ERROR)
 723			rc = -EIO;
 724
 725		LOGGC_UNLOCK(log);
 726		return rc;
 727	}
 728
 729	/* upcount transaction waiting for completion
 730	 */
 731	log->gcrtc++;
 732	tblk->flag |= tblkGC_READY;
 733
 734	__SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED),
 735		     LOGGC_LOCK(log), LOGGC_UNLOCK(log));
 736
 737	/* removed from commit queue */
 738	if (tblk->flag & tblkGC_ERROR)
 739		rc = -EIO;
 740
 741	LOGGC_UNLOCK(log);
 742	return rc;
 743}
 744
 745/*
 746 * NAME:	lmGCwrite()
 747 *
 748 * FUNCTION:	group commit write
 749 *	initiate write of log page, building a group of all transactions
 750 *	with commit records on that page.
 751 *
 752 * RETURN:	None
 753 *
 754 * NOTE:
 755 *	LOGGC_LOCK must be held by caller.
 756 *	N.B. LOG_LOCK is NOT held during lmGroupCommit().
 757 */
 758static void lmGCwrite(struct jfs_log * log, int cant_write)
 759{
 760	struct lbuf *bp;
 761	struct logpage *lp;
 762	int gcpn;		/* group commit page number */
 763	struct tblock *tblk;
 764	struct tblock *xtblk = NULL;
 765
 766	/*
 767	 * build the commit group of a log page
 768	 *
 769	 * scan commit queue and make a commit group of all
 770	 * transactions with COMMIT records on the same log page.
 771	 */
 772	/* get the head tblk on the commit queue */
 773	gcpn = list_entry(log->cqueue.next, struct tblock, cqueue)->pn;
 774
 775	list_for_each_entry(tblk, &log->cqueue, cqueue) {
 776		if (tblk->pn != gcpn)
 777			break;
 778
 779		xtblk = tblk;
 780
 781		/* state transition: (QUEUE, READY) -> COMMIT */
 782		tblk->flag |= tblkGC_COMMIT;
 783	}
 784	tblk = xtblk;		/* last tblk of the page */
 785
 786	/*
 787	 * pageout to commit transactions on the log page.
 788	 */
 789	bp = (struct lbuf *) tblk->bp;
 790	lp = (struct logpage *) bp->l_ldata;
 791	/* is page already full ? */
 792	if (tblk->flag & tblkGC_EOP) {
 793		/* mark page to free at end of group commit of the page */
 794		tblk->flag &= ~tblkGC_EOP;
 795		tblk->flag |= tblkGC_FREE;
 796		bp->l_ceor = bp->l_eor;
 797		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 798		lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC,
 799			 cant_write);
 800		INCREMENT(lmStat.full_page);
 801	}
 802	/* page is not yet full */
 803	else {
 804		bp->l_ceor = tblk->eor;	/* ? bp->l_ceor = bp->l_eor; */
 805		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 806		lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write);
 807		INCREMENT(lmStat.partial_page);
 808	}
 809}
 810
 811/*
 812 * NAME:	lmPostGC()
 813 *
 814 * FUNCTION:	group commit post-processing
 815 *	Processes transactions after their commit records have been written
 816 *	to disk, redriving log I/O if necessary.
 817 *
 818 * RETURN:	None
 819 *
 820 * NOTE:
 821 *	This routine is called a interrupt time by lbmIODone
 822 */
 823static void lmPostGC(struct lbuf * bp)
 824{
 825	unsigned long flags;
 826	struct jfs_log *log = bp->l_log;
 827	struct logpage *lp;
 828	struct tblock *tblk, *temp;
 829
 830	//LOGGC_LOCK(log);
 831	spin_lock_irqsave(&log->gclock, flags);
 832	/*
 833	 * current pageout of group commit completed.
 834	 *
 835	 * remove/wakeup transactions from commit queue who were
 836	 * group committed with the current log page
 837	 */
 838	list_for_each_entry_safe(tblk, temp, &log->cqueue, cqueue) {
 839		if (!(tblk->flag & tblkGC_COMMIT))
 840			break;
 841		/* if transaction was marked GC_COMMIT then
 842		 * it has been shipped in the current pageout
 843		 * and made it to disk - it is committed.
 844		 */
 845
 846		if (bp->l_flag & lbmERROR)
 847			tblk->flag |= tblkGC_ERROR;
 848
 849		/* remove it from the commit queue */
 850		list_del(&tblk->cqueue);
 851		tblk->flag &= ~tblkGC_QUEUE;
 852
 853		if (tblk == log->flush_tblk) {
 854			/* we can stop flushing the log now */
 855			clear_bit(log_FLUSH, &log->flag);
 856			log->flush_tblk = NULL;
 857		}
 858
 859		jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk,
 860			 tblk->flag);
 861
 862		if (!(tblk->xflag & COMMIT_FORCE))
 863			/*
 864			 * Hand tblk over to lazy commit thread
 865			 */
 866			txLazyUnlock(tblk);
 867		else {
 868			/* state transition: COMMIT -> COMMITTED */
 869			tblk->flag |= tblkGC_COMMITTED;
 870
 871			if (tblk->flag & tblkGC_READY)
 872				log->gcrtc--;
 873
 874			LOGGC_WAKEUP(tblk);
 875		}
 876
 877		/* was page full before pageout ?
 878		 * (and this is the last tblk bound with the page)
 879		 */
 880		if (tblk->flag & tblkGC_FREE)
 881			lbmFree(bp);
 882		/* did page become full after pageout ?
 883		 * (and this is the last tblk bound with the page)
 884		 */
 885		else if (tblk->flag & tblkGC_EOP) {
 886			/* finalize the page */
 887			lp = (struct logpage *) bp->l_ldata;
 888			bp->l_ceor = bp->l_eor;
 889			lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
 890			jfs_info("lmPostGC: calling lbmWrite");
 891			lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE,
 892				 1);
 893		}
 894
 895	}
 896
 897	/* are there any transactions who have entered lnGroupCommit()
 898	 * (whose COMMITs are after that of the last log page written.
 899	 * They are waiting for new group commit (above at (SLEEP 1))
 900	 * or lazy transactions are on a full (queued) log page,
 901	 * select the latest ready transaction as new group leader and
 902	 * wake her up to lead her group.
 903	 */
 904	if ((!list_empty(&log->cqueue)) &&
 905	    ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) ||
 906	     test_bit(log_FLUSH, &log->flag) || jfs_tlocks_low))
 907		/*
 908		 * Call lmGCwrite with new group leader
 909		 */
 910		lmGCwrite(log, 1);
 911
 912	/* no transaction are ready yet (transactions are only just
 913	 * queued (GC_QUEUE) and not entered for group commit yet).
 914	 * the first transaction entering group commit
 915	 * will elect herself as new group leader.
 916	 */
 917	else
 918		log->cflag &= ~logGC_PAGEOUT;
 919
 920	//LOGGC_UNLOCK(log);
 921	spin_unlock_irqrestore(&log->gclock, flags);
 922	return;
 923}
 924
 925/*
 926 * NAME:	lmLogSync()
 927 *
 928 * FUNCTION:	write log SYNCPT record for specified log
 929 *	if new sync address is available
 930 *	(normally the case if sync() is executed by back-ground
 931 *	process).
 932 *	calculate new value of i_nextsync which determines when
 933 *	this code is called again.
 934 *
 935 * PARAMETERS:	log	- log structure
 936 *		hard_sync - 1 to force all metadata to be written
 937 *
 938 * RETURN:	0
 939 *
 940 * serialization: LOG_LOCK() held on entry/exit
 941 */
 942static int lmLogSync(struct jfs_log * log, int hard_sync)
 943{
 944	int logsize;
 945	int written;		/* written since last syncpt */
 946	int free;		/* free space left available */
 947	int delta;		/* additional delta to write normally */
 948	int more;		/* additional write granted */
 949	struct lrd lrd;
 950	int lsn;
 951	struct logsyncblk *lp;
 952	unsigned long flags;
 953
 954	/* push dirty metapages out to disk */
 955	if (hard_sync)
 956		write_special_inodes(log, filemap_fdatawrite);
 957	else
 958		write_special_inodes(log, filemap_flush);
 959
 960	/*
 961	 *	forward syncpt
 962	 */
 963	/* if last sync is same as last syncpt,
 964	 * invoke sync point forward processing to update sync.
 965	 */
 966
 967	if (log->sync == log->syncpt) {
 968		LOGSYNC_LOCK(log, flags);
 969		if (list_empty(&log->synclist))
 970			log->sync = log->lsn;
 971		else {
 972			lp = list_entry(log->synclist.next,
 973					struct logsyncblk, synclist);
 974			log->sync = lp->lsn;
 975		}
 976		LOGSYNC_UNLOCK(log, flags);
 977
 978	}
 979
 980	/* if sync is different from last syncpt,
 981	 * write a SYNCPT record with syncpt = sync.
 982	 * reset syncpt = sync
 983	 */
 984	if (log->sync != log->syncpt) {
 985		lrd.logtid = 0;
 986		lrd.backchain = 0;
 987		lrd.type = cpu_to_le16(LOG_SYNCPT);
 988		lrd.length = 0;
 989		lrd.log.syncpt.sync = cpu_to_le32(log->sync);
 990		lsn = lmWriteRecord(log, NULL, &lrd, NULL);
 991
 992		log->syncpt = log->sync;
 993	} else
 994		lsn = log->lsn;
 995
 996	/*
 997	 *	setup next syncpt trigger (SWAG)
 998	 */
 999	logsize = log->logsize;
1000
1001	logdiff(written, lsn, log);
1002	free = logsize - written;
1003	delta = LOGSYNC_DELTA(logsize);
1004	more = min(free / 2, delta);
1005	if (more < 2 * LOGPSIZE) {
1006		jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
1007		/*
1008		 *	log wrapping
1009		 *
1010		 * option 1 - panic ? No.!
1011		 * option 2 - shutdown file systems
1012		 *	      associated with log ?
1013		 * option 3 - extend log ?
1014		 * option 4 - second chance
1015		 *
1016		 * mark log wrapped, and continue.
1017		 * when all active transactions are completed,
1018		 * mark log valid for recovery.
1019		 * if crashed during invalid state, log state
1020		 * implies invalid log, forcing fsck().
1021		 */
1022		/* mark log state log wrap in log superblock */
1023		/* log->state = LOGWRAP; */
1024
1025		/* reset sync point computation */
1026		log->syncpt = log->sync = lsn;
1027		log->nextsync = delta;
1028	} else
1029		/* next syncpt trigger = written + more */
1030		log->nextsync = written + more;
1031
1032	/* if number of bytes written from last sync point is more
1033	 * than 1/4 of the log size, stop new transactions from
1034	 * starting until all current transactions are completed
1035	 * by setting syncbarrier flag.
1036	 */
1037	if (!test_bit(log_SYNCBARRIER, &log->flag) &&
1038	    (written > LOGSYNC_BARRIER(logsize)) && log->active) {
1039		set_bit(log_SYNCBARRIER, &log->flag);
1040		jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn,
1041			 log->syncpt);
1042		/*
1043		 * We may have to initiate group commit
1044		 */
1045		jfs_flush_journal(log, 0);
1046	}
1047
1048	return lsn;
1049}
1050
1051/*
1052 * NAME:	jfs_syncpt
1053 *
1054 * FUNCTION:	write log SYNCPT record for specified log
1055 *
1056 * PARAMETERS:	log	  - log structure
1057 *		hard_sync - set to 1 to force metadata to be written
1058 */
1059void jfs_syncpt(struct jfs_log *log, int hard_sync)
1060{	LOG_LOCK(log);
1061	if (!test_bit(log_QUIESCE, &log->flag))
1062		lmLogSync(log, hard_sync);
1063	LOG_UNLOCK(log);
1064}
1065
1066/*
1067 * NAME:	lmLogOpen()
1068 *
1069 * FUNCTION:	open the log on first open;
1070 *	insert filesystem in the active list of the log.
1071 *
1072 * PARAMETER:	ipmnt	- file system mount inode
1073 *		iplog	- log inode (out)
1074 *
1075 * RETURN:
1076 *
1077 * serialization:
1078 */
1079int lmLogOpen(struct super_block *sb)
1080{
1081	int rc;
1082	struct block_device *bdev;
1083	struct jfs_log *log;
1084	struct jfs_sb_info *sbi = JFS_SBI(sb);
1085
1086	if (sbi->flag & JFS_NOINTEGRITY)
1087		return open_dummy_log(sb);
1088
1089	if (sbi->mntflag & JFS_INLINELOG)
1090		return open_inline_log(sb);
1091
1092	mutex_lock(&jfs_log_mutex);
1093	list_for_each_entry(log, &jfs_external_logs, journal_list) {
1094		if (log->bdev->bd_dev == sbi->logdev) {
1095			if (memcmp(log->uuid, sbi->loguuid,
1096				   sizeof(log->uuid))) {
1097				jfs_warn("wrong uuid on JFS journal");
1098				mutex_unlock(&jfs_log_mutex);
1099				return -EINVAL;
1100			}
1101			/*
1102			 * add file system to log active file system list
1103			 */
1104			if ((rc = lmLogFileSystem(log, sbi, 1))) {
1105				mutex_unlock(&jfs_log_mutex);
1106				return rc;
1107			}
1108			goto journal_found;
1109		}
1110	}
1111
1112	if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL))) {
1113		mutex_unlock(&jfs_log_mutex);
1114		return -ENOMEM;
1115	}
1116	INIT_LIST_HEAD(&log->sb_list);
1117	init_waitqueue_head(&log->syncwait);
1118
1119	/*
1120	 *	external log as separate logical volume
1121	 *
1122	 * file systems to log may have n-to-1 relationship;
1123	 */
1124
1125	bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
1126				 log);
1127	if (IS_ERR(bdev)) {
1128		rc = PTR_ERR(bdev);
1129		goto free;
1130	}
1131
1132	log->bdev = bdev;
1133	memcpy(log->uuid, sbi->loguuid, sizeof(log->uuid));
1134
1135	/*
1136	 * initialize log:
1137	 */
1138	if ((rc = lmLogInit(log)))
1139		goto close;
1140
1141	list_add(&log->journal_list, &jfs_external_logs);
1142
1143	/*
1144	 * add file system to log active file system list
1145	 */
1146	if ((rc = lmLogFileSystem(log, sbi, 1)))
1147		goto shutdown;
1148
1149journal_found:
1150	LOG_LOCK(log);
1151	list_add(&sbi->log_list, &log->sb_list);
1152	sbi->log = log;
1153	LOG_UNLOCK(log);
1154
1155	mutex_unlock(&jfs_log_mutex);
1156	return 0;
1157
1158	/*
1159	 *	unwind on error
1160	 */
1161      shutdown:		/* unwind lbmLogInit() */
1162	list_del(&log->journal_list);
1163	lbmLogShutdown(log);
1164
1165      close:		/* close external log device */
1166	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1167
1168      free:		/* free log descriptor */
1169	mutex_unlock(&jfs_log_mutex);
1170	kfree(log);
1171
1172	jfs_warn("lmLogOpen: exit(%d)", rc);
1173	return rc;
1174}
1175
1176static int open_inline_log(struct super_block *sb)
1177{
1178	struct jfs_log *log;
1179	int rc;
1180
1181	if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL)))
1182		return -ENOMEM;
1183	INIT_LIST_HEAD(&log->sb_list);
1184	init_waitqueue_head(&log->syncwait);
1185
1186	set_bit(log_INLINELOG, &log->flag);
1187	log->bdev = sb->s_bdev;
1188	log->base = addressPXD(&JFS_SBI(sb)->logpxd);
1189	log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
1190	    (L2LOGPSIZE - sb->s_blocksize_bits);
1191	log->l2bsize = sb->s_blocksize_bits;
1192	ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits);
1193
1194	/*
1195	 * initialize log.
1196	 */
1197	if ((rc = lmLogInit(log))) {
1198		kfree(log);
1199		jfs_warn("lmLogOpen: exit(%d)", rc);
1200		return rc;
1201	}
1202
1203	list_add(&JFS_SBI(sb)->log_list, &log->sb_list);
1204	JFS_SBI(sb)->log = log;
1205
1206	return rc;
1207}
1208
1209static int open_dummy_log(struct super_block *sb)
1210{
1211	int rc;
1212
1213	mutex_lock(&jfs_log_mutex);
1214	if (!dummy_log) {
1215		dummy_log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL);
1216		if (!dummy_log) {
1217			mutex_unlock(&jfs_log_mutex);
1218			return -ENOMEM;
1219		}
1220		INIT_LIST_HEAD(&dummy_log->sb_list);
1221		init_waitqueue_head(&dummy_log->syncwait);
1222		dummy_log->no_integrity = 1;
1223		/* Make up some stuff */
1224		dummy_log->base = 0;
1225		dummy_log->size = 1024;
1226		rc = lmLogInit(dummy_log);
1227		if (rc) {
1228			kfree(dummy_log);
1229			dummy_log = NULL;
1230			mutex_unlock(&jfs_log_mutex);
1231			return rc;
1232		}
1233	}
1234
1235	LOG_LOCK(dummy_log);
1236	list_add(&JFS_SBI(sb)->log_list, &dummy_log->sb_list);
1237	JFS_SBI(sb)->log = dummy_log;
1238	LOG_UNLOCK(dummy_log);
1239	mutex_unlock(&jfs_log_mutex);
1240
1241	return 0;
1242}
1243
1244/*
1245 * NAME:	lmLogInit()
1246 *
1247 * FUNCTION:	log initialization at first log open.
1248 *
1249 *	logredo() (or logformat()) should have been run previously.
1250 *	initialize the log from log superblock.
1251 *	set the log state in the superblock to LOGMOUNT and
1252 *	write SYNCPT log record.
1253 *
1254 * PARAMETER:	log	- log structure
1255 *
1256 * RETURN:	0	- if ok
1257 *		-EINVAL	- bad log magic number or superblock dirty
1258 *		error returned from logwait()
1259 *
1260 * serialization: single first open thread
1261 */
1262int lmLogInit(struct jfs_log * log)
1263{
1264	int rc = 0;
1265	struct lrd lrd;
1266	struct logsuper *logsuper;
1267	struct lbuf *bpsuper;
1268	struct lbuf *bp;
1269	struct logpage *lp;
1270	int lsn = 0;
1271
1272	jfs_info("lmLogInit: log:0x%p", log);
1273
1274	/* initialize the group commit serialization lock */
1275	LOGGC_LOCK_INIT(log);
1276
1277	/* allocate/initialize the log write serialization lock */
1278	LOG_LOCK_INIT(log);
1279
1280	LOGSYNC_LOCK_INIT(log);
1281
1282	INIT_LIST_HEAD(&log->synclist);
1283
1284	INIT_LIST_HEAD(&log->cqueue);
1285	log->flush_tblk = NULL;
1286
1287	log->count = 0;
1288
1289	/*
1290	 * initialize log i/o
1291	 */
1292	if ((rc = lbmLogInit(log)))
1293		return rc;
1294
1295	if (!test_bit(log_INLINELOG, &log->flag))
1296		log->l2bsize = L2LOGPSIZE;
1297
1298	/* check for disabled journaling to disk */
1299	if (log->no_integrity) {
1300		/*
1301		 * Journal pages will still be filled.  When the time comes
1302		 * to actually do the I/O, the write is not done, and the
1303		 * endio routine is called directly.
1304		 */
1305		bp = lbmAllocate(log , 0);
1306		log->bp = bp;
1307		bp->l_pn = bp->l_eor = 0;
1308	} else {
1309		/*
1310		 * validate log superblock
1311		 */
1312		if ((rc = lbmRead(log, 1, &bpsuper)))
1313			goto errout10;
1314
1315		logsuper = (struct logsuper *) bpsuper->l_ldata;
1316
1317		if (logsuper->magic != cpu_to_le32(LOGMAGIC)) {
1318			jfs_warn("*** Log Format Error ! ***");
1319			rc = -EINVAL;
1320			goto errout20;
1321		}
1322
1323		/* logredo() should have been run successfully. */
1324		if (logsuper->state != cpu_to_le32(LOGREDONE)) {
1325			jfs_warn("*** Log Is Dirty ! ***");
1326			rc = -EINVAL;
1327			goto errout20;
1328		}
1329
1330		/* initialize log from log superblock */
1331		if (test_bit(log_INLINELOG,&log->flag)) {
1332			if (log->size != le32_to_cpu(logsuper->size)) {
1333				rc = -EINVAL;
1334				goto errout20;
1335			}
1336			jfs_info("lmLogInit: inline log:0x%p base:0x%Lx size:0x%x",
1337				 log, (unsigned long long)log->base, log->size);
 
1338		} else {
1339			if (memcmp(logsuper->uuid, log->uuid, 16)) {
1340				jfs_warn("wrong uuid on JFS log device");
1341				goto errout20;
1342			}
1343			log->size = le32_to_cpu(logsuper->size);
1344			log->l2bsize = le32_to_cpu(logsuper->l2bsize);
1345			jfs_info("lmLogInit: external log:0x%p base:0x%Lx size:0x%x",
1346				 log, (unsigned long long)log->base, log->size);
 
1347		}
1348
1349		log->page = le32_to_cpu(logsuper->end) / LOGPSIZE;
1350		log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page);
1351
1352		/*
1353		 * initialize for log append write mode
1354		 */
1355		/* establish current/end-of-log page/buffer */
1356		if ((rc = lbmRead(log, log->page, &bp)))
1357			goto errout20;
1358
1359		lp = (struct logpage *) bp->l_ldata;
1360
1361		jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d",
1362			 le32_to_cpu(logsuper->end), log->page, log->eor,
1363			 le16_to_cpu(lp->h.eor));
1364
1365		log->bp = bp;
1366		bp->l_pn = log->page;
1367		bp->l_eor = log->eor;
1368
1369		/* if current page is full, move on to next page */
1370		if (log->eor >= LOGPSIZE - LOGPTLRSIZE)
1371			lmNextPage(log);
1372
1373		/*
1374		 * initialize log syncpoint
1375		 */
1376		/*
1377		 * write the first SYNCPT record with syncpoint = 0
1378		 * (i.e., log redo up to HERE !);
1379		 * remove current page from lbm write queue at end of pageout
1380		 * (to write log superblock update), but do not release to
1381		 * freelist;
1382		 */
1383		lrd.logtid = 0;
1384		lrd.backchain = 0;
1385		lrd.type = cpu_to_le16(LOG_SYNCPT);
1386		lrd.length = 0;
1387		lrd.log.syncpt.sync = 0;
1388		lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1389		bp = log->bp;
1390		bp->l_ceor = bp->l_eor;
1391		lp = (struct logpage *) bp->l_ldata;
1392		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1393		lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0);
1394		if ((rc = lbmIOWait(bp, 0)))
1395			goto errout30;
1396
1397		/*
1398		 * update/write superblock
1399		 */
1400		logsuper->state = cpu_to_le32(LOGMOUNT);
1401		log->serial = le32_to_cpu(logsuper->serial) + 1;
1402		logsuper->serial = cpu_to_le32(log->serial);
1403		lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1404		if ((rc = lbmIOWait(bpsuper, lbmFREE)))
1405			goto errout30;
1406	}
1407
1408	/* initialize logsync parameters */
1409	log->logsize = (log->size - 2) << L2LOGPSIZE;
1410	log->lsn = lsn;
1411	log->syncpt = lsn;
1412	log->sync = log->syncpt;
1413	log->nextsync = LOGSYNC_DELTA(log->logsize);
1414
1415	jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x",
1416		 log->lsn, log->syncpt, log->sync);
1417
1418	/*
1419	 * initialize for lazy/group commit
1420	 */
1421	log->clsn = lsn;
1422
1423	return 0;
1424
1425	/*
1426	 *	unwind on error
1427	 */
1428      errout30:		/* release log page */
1429	log->wqueue = NULL;
1430	bp->l_wqnext = NULL;
1431	lbmFree(bp);
1432
1433      errout20:		/* release log superblock */
1434	lbmFree(bpsuper);
1435
1436      errout10:		/* unwind lbmLogInit() */
1437	lbmLogShutdown(log);
1438
1439	jfs_warn("lmLogInit: exit(%d)", rc);
1440	return rc;
1441}
1442
1443
1444/*
1445 * NAME:	lmLogClose()
1446 *
1447 * FUNCTION:	remove file system <ipmnt> from active list of log <iplog>
1448 *		and close it on last close.
1449 *
1450 * PARAMETER:	sb	- superblock
1451 *
1452 * RETURN:	errors from subroutines
1453 *
1454 * serialization:
1455 */
1456int lmLogClose(struct super_block *sb)
1457{
1458	struct jfs_sb_info *sbi = JFS_SBI(sb);
1459	struct jfs_log *log = sbi->log;
1460	struct block_device *bdev;
1461	int rc = 0;
1462
1463	jfs_info("lmLogClose: log:0x%p", log);
1464
1465	mutex_lock(&jfs_log_mutex);
1466	LOG_LOCK(log);
1467	list_del(&sbi->log_list);
1468	LOG_UNLOCK(log);
1469	sbi->log = NULL;
1470
1471	/*
1472	 * We need to make sure all of the "written" metapages
1473	 * actually make it to disk
1474	 */
1475	sync_blockdev(sb->s_bdev);
1476
1477	if (test_bit(log_INLINELOG, &log->flag)) {
1478		/*
1479		 *	in-line log in host file system
1480		 */
1481		rc = lmLogShutdown(log);
1482		kfree(log);
1483		goto out;
1484	}
1485
1486	if (!log->no_integrity)
1487		lmLogFileSystem(log, sbi, 0);
1488
1489	if (!list_empty(&log->sb_list))
1490		goto out;
1491
1492	/*
1493	 * TODO: ensure that the dummy_log is in a state to allow
1494	 * lbmLogShutdown to deallocate all the buffers and call
1495	 * kfree against dummy_log.  For now, leave dummy_log & its
1496	 * buffers in memory, and resuse if another no-integrity mount
1497	 * is requested.
1498	 */
1499	if (log->no_integrity)
1500		goto out;
1501
1502	/*
1503	 *	external log as separate logical volume
1504	 */
1505	list_del(&log->journal_list);
1506	bdev = log->bdev;
1507	rc = lmLogShutdown(log);
1508
1509	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1510
1511	kfree(log);
1512
1513      out:
1514	mutex_unlock(&jfs_log_mutex);
1515	jfs_info("lmLogClose: exit(%d)", rc);
1516	return rc;
1517}
1518
1519
1520/*
1521 * NAME:	jfs_flush_journal()
1522 *
1523 * FUNCTION:	initiate write of any outstanding transactions to the journal
1524 *		and optionally wait until they are all written to disk
1525 *
1526 *		wait == 0  flush until latest txn is committed, don't wait
1527 *		wait == 1  flush until latest txn is committed, wait
1528 *		wait > 1   flush until all txn's are complete, wait
1529 */
1530void jfs_flush_journal(struct jfs_log *log, int wait)
1531{
1532	int i;
1533	struct tblock *target = NULL;
1534
1535	/* jfs_write_inode may call us during read-only mount */
1536	if (!log)
1537		return;
1538
1539	jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait);
1540
1541	LOGGC_LOCK(log);
1542
1543	if (!list_empty(&log->cqueue)) {
1544		/*
1545		 * This ensures that we will keep writing to the journal as long
1546		 * as there are unwritten commit records
1547		 */
1548		target = list_entry(log->cqueue.prev, struct tblock, cqueue);
1549
1550		if (test_bit(log_FLUSH, &log->flag)) {
1551			/*
1552			 * We're already flushing.
1553			 * if flush_tblk is NULL, we are flushing everything,
1554			 * so leave it that way.  Otherwise, update it to the
1555			 * latest transaction
1556			 */
1557			if (log->flush_tblk)
1558				log->flush_tblk = target;
1559		} else {
1560			/* Only flush until latest transaction is committed */
1561			log->flush_tblk = target;
1562			set_bit(log_FLUSH, &log->flag);
1563
1564			/*
1565			 * Initiate I/O on outstanding transactions
1566			 */
1567			if (!(log->cflag & logGC_PAGEOUT)) {
1568				log->cflag |= logGC_PAGEOUT;
1569				lmGCwrite(log, 0);
1570			}
1571		}
1572	}
1573	if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) {
1574		/* Flush until all activity complete */
1575		set_bit(log_FLUSH, &log->flag);
1576		log->flush_tblk = NULL;
1577	}
1578
1579	if (wait && target && !(target->flag & tblkGC_COMMITTED)) {
1580		DECLARE_WAITQUEUE(__wait, current);
1581
1582		add_wait_queue(&target->gcwait, &__wait);
1583		set_current_state(TASK_UNINTERRUPTIBLE);
1584		LOGGC_UNLOCK(log);
1585		schedule();
1586		LOGGC_LOCK(log);
1587		remove_wait_queue(&target->gcwait, &__wait);
1588	}
1589	LOGGC_UNLOCK(log);
1590
1591	if (wait < 2)
1592		return;
1593
1594	write_special_inodes(log, filemap_fdatawrite);
1595
1596	/*
1597	 * If there was recent activity, we may need to wait
1598	 * for the lazycommit thread to catch up
1599	 */
1600	if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) {
1601		for (i = 0; i < 200; i++) {	/* Too much? */
1602			msleep(250);
1603			write_special_inodes(log, filemap_fdatawrite);
1604			if (list_empty(&log->cqueue) &&
1605			    list_empty(&log->synclist))
1606				break;
1607		}
1608	}
1609	assert(list_empty(&log->cqueue));
1610
1611#ifdef CONFIG_JFS_DEBUG
1612	if (!list_empty(&log->synclist)) {
1613		struct logsyncblk *lp;
1614
1615		printk(KERN_ERR "jfs_flush_journal: synclist not empty\n");
1616		list_for_each_entry(lp, &log->synclist, synclist) {
1617			if (lp->xflag & COMMIT_PAGE) {
1618				struct metapage *mp = (struct metapage *)lp;
1619				print_hex_dump(KERN_ERR, "metapage: ",
1620					       DUMP_PREFIX_ADDRESS, 16, 4,
1621					       mp, sizeof(struct metapage), 0);
1622				print_hex_dump(KERN_ERR, "page: ",
1623					       DUMP_PREFIX_ADDRESS, 16,
1624					       sizeof(long), mp->page,
1625					       sizeof(struct page), 0);
1626			} else
1627				print_hex_dump(KERN_ERR, "tblock:",
1628					       DUMP_PREFIX_ADDRESS, 16, 4,
1629					       lp, sizeof(struct tblock), 0);
1630		}
1631	}
1632#else
1633	WARN_ON(!list_empty(&log->synclist));
1634#endif
1635	clear_bit(log_FLUSH, &log->flag);
1636}
1637
1638/*
1639 * NAME:	lmLogShutdown()
1640 *
1641 * FUNCTION:	log shutdown at last LogClose().
1642 *
1643 *		write log syncpt record.
1644 *		update super block to set redone flag to 0.
1645 *
1646 * PARAMETER:	log	- log inode
1647 *
1648 * RETURN:	0	- success
1649 *
1650 * serialization: single last close thread
1651 */
1652int lmLogShutdown(struct jfs_log * log)
1653{
1654	int rc;
1655	struct lrd lrd;
1656	int lsn;
1657	struct logsuper *logsuper;
1658	struct lbuf *bpsuper;
1659	struct lbuf *bp;
1660	struct logpage *lp;
1661
1662	jfs_info("lmLogShutdown: log:0x%p", log);
1663
1664	jfs_flush_journal(log, 2);
1665
1666	/*
1667	 * write the last SYNCPT record with syncpoint = 0
1668	 * (i.e., log redo up to HERE !)
1669	 */
1670	lrd.logtid = 0;
1671	lrd.backchain = 0;
1672	lrd.type = cpu_to_le16(LOG_SYNCPT);
1673	lrd.length = 0;
1674	lrd.log.syncpt.sync = 0;
1675
1676	lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1677	bp = log->bp;
1678	lp = (struct logpage *) bp->l_ldata;
1679	lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1680	lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0);
1681	lbmIOWait(log->bp, lbmFREE);
1682	log->bp = NULL;
1683
1684	/*
1685	 * synchronous update log superblock
1686	 * mark log state as shutdown cleanly
1687	 * (i.e., Log does not need to be replayed).
1688	 */
1689	if ((rc = lbmRead(log, 1, &bpsuper)))
1690		goto out;
1691
1692	logsuper = (struct logsuper *) bpsuper->l_ldata;
1693	logsuper->state = cpu_to_le32(LOGREDONE);
1694	logsuper->end = cpu_to_le32(lsn);
1695	lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1696	rc = lbmIOWait(bpsuper, lbmFREE);
1697
1698	jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d",
1699		 lsn, log->page, log->eor);
1700
1701      out:
1702	/*
1703	 * shutdown per log i/o
1704	 */
1705	lbmLogShutdown(log);
1706
1707	if (rc) {
1708		jfs_warn("lmLogShutdown: exit(%d)", rc);
1709	}
1710	return rc;
1711}
1712
1713
1714/*
1715 * NAME:	lmLogFileSystem()
1716 *
1717 * FUNCTION:	insert (<activate> = true)/remove (<activate> = false)
1718 *	file system into/from log active file system list.
1719 *
1720 * PARAMETE:	log	- pointer to logs inode.
1721 *		fsdev	- kdev_t of filesystem.
1722 *		serial	- pointer to returned log serial number
1723 *		activate - insert/remove device from active list.
1724 *
1725 * RETURN:	0	- success
1726 *		errors returned by vms_iowait().
1727 */
1728static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
1729			   int activate)
1730{
1731	int rc = 0;
1732	int i;
1733	struct logsuper *logsuper;
1734	struct lbuf *bpsuper;
1735	char *uuid = sbi->uuid;
1736
1737	/*
1738	 * insert/remove file system device to log active file system list.
1739	 */
1740	if ((rc = lbmRead(log, 1, &bpsuper)))
1741		return rc;
1742
1743	logsuper = (struct logsuper *) bpsuper->l_ldata;
1744	if (activate) {
1745		for (i = 0; i < MAX_ACTIVE; i++)
1746			if (!memcmp(logsuper->active[i].uuid, NULL_UUID, 16)) {
1747				memcpy(logsuper->active[i].uuid, uuid, 16);
1748				sbi->aggregate = i;
1749				break;
1750			}
1751		if (i == MAX_ACTIVE) {
1752			jfs_warn("Too many file systems sharing journal!");
1753			lbmFree(bpsuper);
1754			return -EMFILE;	/* Is there a better rc? */
1755		}
1756	} else {
1757		for (i = 0; i < MAX_ACTIVE; i++)
1758			if (!memcmp(logsuper->active[i].uuid, uuid, 16)) {
1759				memcpy(logsuper->active[i].uuid, NULL_UUID, 16);
1760				break;
1761			}
1762		if (i == MAX_ACTIVE) {
1763			jfs_warn("Somebody stomped on the journal!");
1764			lbmFree(bpsuper);
1765			return -EIO;
1766		}
1767
1768	}
1769
1770	/*
1771	 * synchronous write log superblock:
1772	 *
1773	 * write sidestream bypassing write queue:
1774	 * at file system mount, log super block is updated for
1775	 * activation of the file system before any log record
1776	 * (MOUNT record) of the file system, and at file system
1777	 * unmount, all meta data for the file system has been
1778	 * flushed before log super block is updated for deactivation
1779	 * of the file system.
1780	 */
1781	lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1782	rc = lbmIOWait(bpsuper, lbmFREE);
1783
1784	return rc;
1785}
1786
1787/*
1788 *		log buffer manager (lbm)
1789 *		------------------------
1790 *
1791 * special purpose buffer manager supporting log i/o requirements.
1792 *
1793 * per log write queue:
1794 * log pageout occurs in serial order by fifo write queue and
1795 * restricting to a single i/o in pregress at any one time.
1796 * a circular singly-linked list
1797 * (log->wrqueue points to the tail, and buffers are linked via
1798 * bp->wrqueue field), and
1799 * maintains log page in pageout ot waiting for pageout in serial pageout.
1800 */
1801
1802/*
1803 *	lbmLogInit()
1804 *
1805 * initialize per log I/O setup at lmLogInit()
1806 */
1807static int lbmLogInit(struct jfs_log * log)
1808{				/* log inode */
1809	int i;
1810	struct lbuf *lbuf;
1811
1812	jfs_info("lbmLogInit: log:0x%p", log);
1813
1814	/* initialize current buffer cursor */
1815	log->bp = NULL;
1816
1817	/* initialize log device write queue */
1818	log->wqueue = NULL;
1819
1820	/*
1821	 * Each log has its own buffer pages allocated to it.  These are
1822	 * not managed by the page cache.  This ensures that a transaction
1823	 * writing to the log does not block trying to allocate a page from
1824	 * the page cache (for the log).  This would be bad, since page
1825	 * allocation waits on the kswapd thread that may be committing inodes
1826	 * which would cause log activity.  Was that clear?  I'm trying to
1827	 * avoid deadlock here.
1828	 */
1829	init_waitqueue_head(&log->free_wait);
1830
1831	log->lbuf_free = NULL;
1832
1833	for (i = 0; i < LOGPAGES;) {
1834		char *buffer;
1835		uint offset;
1836		struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
1837
1838		if (!page)
1839			goto error;
1840		buffer = page_address(page);
1841		for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) {
1842			lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL);
1843			if (lbuf == NULL) {
1844				if (offset == 0)
1845					__free_page(page);
1846				goto error;
1847			}
1848			if (offset) /* we already have one reference */
1849				get_page(page);
1850			lbuf->l_offset = offset;
1851			lbuf->l_ldata = buffer + offset;
1852			lbuf->l_page = page;
1853			lbuf->l_log = log;
1854			init_waitqueue_head(&lbuf->l_ioevent);
1855
1856			lbuf->l_freelist = log->lbuf_free;
1857			log->lbuf_free = lbuf;
1858			i++;
1859		}
1860	}
1861
1862	return (0);
1863
1864      error:
1865	lbmLogShutdown(log);
1866	return -ENOMEM;
1867}
1868
1869
1870/*
1871 *	lbmLogShutdown()
1872 *
1873 * finalize per log I/O setup at lmLogShutdown()
1874 */
1875static void lbmLogShutdown(struct jfs_log * log)
1876{
1877	struct lbuf *lbuf;
1878
1879	jfs_info("lbmLogShutdown: log:0x%p", log);
1880
1881	lbuf = log->lbuf_free;
1882	while (lbuf) {
1883		struct lbuf *next = lbuf->l_freelist;
1884		__free_page(lbuf->l_page);
1885		kfree(lbuf);
1886		lbuf = next;
1887	}
1888}
1889
1890
1891/*
1892 *	lbmAllocate()
1893 *
1894 * allocate an empty log buffer
1895 */
1896static struct lbuf *lbmAllocate(struct jfs_log * log, int pn)
1897{
1898	struct lbuf *bp;
1899	unsigned long flags;
1900
1901	/*
1902	 * recycle from log buffer freelist if any
1903	 */
1904	LCACHE_LOCK(flags);
1905	LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags);
1906	log->lbuf_free = bp->l_freelist;
1907	LCACHE_UNLOCK(flags);
1908
1909	bp->l_flag = 0;
1910
1911	bp->l_wqnext = NULL;
1912	bp->l_freelist = NULL;
1913
1914	bp->l_pn = pn;
1915	bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize));
1916	bp->l_ceor = 0;
1917
1918	return bp;
1919}
1920
1921
1922/*
1923 *	lbmFree()
1924 *
1925 * release a log buffer to freelist
1926 */
1927static void lbmFree(struct lbuf * bp)
1928{
1929	unsigned long flags;
1930
1931	LCACHE_LOCK(flags);
1932
1933	lbmfree(bp);
1934
1935	LCACHE_UNLOCK(flags);
1936}
1937
1938static void lbmfree(struct lbuf * bp)
1939{
1940	struct jfs_log *log = bp->l_log;
1941
1942	assert(bp->l_wqnext == NULL);
1943
1944	/*
1945	 * return the buffer to head of freelist
1946	 */
1947	bp->l_freelist = log->lbuf_free;
1948	log->lbuf_free = bp;
1949
1950	wake_up(&log->free_wait);
1951	return;
1952}
1953
1954
1955/*
1956 * NAME:	lbmRedrive
1957 *
1958 * FUNCTION:	add a log buffer to the log redrive list
1959 *
1960 * PARAMETER:
1961 *	bp	- log buffer
1962 *
1963 * NOTES:
1964 *	Takes log_redrive_lock.
1965 */
1966static inline void lbmRedrive(struct lbuf *bp)
1967{
1968	unsigned long flags;
1969
1970	spin_lock_irqsave(&log_redrive_lock, flags);
1971	bp->l_redrive_next = log_redrive_list;
1972	log_redrive_list = bp;
1973	spin_unlock_irqrestore(&log_redrive_lock, flags);
1974
1975	wake_up_process(jfsIOthread);
1976}
1977
1978
1979/*
1980 *	lbmRead()
1981 */
1982static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
1983{
1984	struct bio *bio;
1985	struct lbuf *bp;
1986
1987	/*
1988	 * allocate a log buffer
1989	 */
1990	*bpp = bp = lbmAllocate(log, pn);
1991	jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn);
1992
1993	bp->l_flag |= lbmREAD;
1994
1995	bio = bio_alloc(GFP_NOFS, 1);
1996
1997	bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
1998	bio_set_dev(bio, log->bdev);
1999
2000	bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
2001	BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
2002
2003	bio->bi_end_io = lbmIODone;
2004	bio->bi_private = bp;
2005	bio->bi_opf = REQ_OP_READ;
2006	/*check if journaling to disk has been disabled*/
2007	if (log->no_integrity) {
2008		bio->bi_iter.bi_size = 0;
2009		lbmIODone(bio);
2010	} else {
2011		submit_bio(bio);
2012	}
2013
2014	wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
2015
2016	return 0;
2017}
2018
2019
2020/*
2021 *	lbmWrite()
2022 *
2023 * buffer at head of pageout queue stays after completion of
2024 * partial-page pageout and redriven by explicit initiation of
2025 * pageout by caller until full-page pageout is completed and
2026 * released.
2027 *
2028 * device driver i/o done redrives pageout of new buffer at
2029 * head of pageout queue when current buffer at head of pageout
2030 * queue is released at the completion of its full-page pageout.
2031 *
2032 * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit().
2033 * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone()
2034 */
2035static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
2036		     int cant_block)
2037{
2038	struct lbuf *tail;
2039	unsigned long flags;
2040
2041	jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn);
2042
2043	/* map the logical block address to physical block address */
2044	bp->l_blkno =
2045	    log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2046
2047	LCACHE_LOCK(flags);		/* disable+lock */
2048
2049	/*
2050	 * initialize buffer for device driver
2051	 */
2052	bp->l_flag = flag;
2053
2054	/*
2055	 *	insert bp at tail of write queue associated with log
2056	 *
2057	 * (request is either for bp already/currently at head of queue
2058	 * or new bp to be inserted at tail)
2059	 */
2060	tail = log->wqueue;
2061
2062	/* is buffer not already on write queue ? */
2063	if (bp->l_wqnext == NULL) {
2064		/* insert at tail of wqueue */
2065		if (tail == NULL) {
2066			log->wqueue = bp;
2067			bp->l_wqnext = bp;
2068		} else {
2069			log->wqueue = bp;
2070			bp->l_wqnext = tail->l_wqnext;
2071			tail->l_wqnext = bp;
2072		}
2073
2074		tail = bp;
2075	}
2076
2077	/* is buffer at head of wqueue and for write ? */
2078	if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) {
2079		LCACHE_UNLOCK(flags);	/* unlock+enable */
2080		return;
2081	}
2082
2083	LCACHE_UNLOCK(flags);	/* unlock+enable */
2084
2085	if (cant_block)
2086		lbmRedrive(bp);
2087	else if (flag & lbmSYNC)
2088		lbmStartIO(bp);
2089	else {
2090		LOGGC_UNLOCK(log);
2091		lbmStartIO(bp);
2092		LOGGC_LOCK(log);
2093	}
2094}
2095
2096
2097/*
2098 *	lbmDirectWrite()
2099 *
2100 * initiate pageout bypassing write queue for sidestream
2101 * (e.g., log superblock) write;
2102 */
2103static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
2104{
2105	jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x",
2106		 bp, flag, bp->l_pn);
2107
2108	/*
2109	 * initialize buffer for device driver
2110	 */
2111	bp->l_flag = flag | lbmDIRECT;
2112
2113	/* map the logical block address to physical block address */
2114	bp->l_blkno =
2115	    log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2116
2117	/*
2118	 *	initiate pageout of the page
2119	 */
2120	lbmStartIO(bp);
2121}
2122
2123
2124/*
2125 * NAME:	lbmStartIO()
2126 *
2127 * FUNCTION:	Interface to DD strategy routine
2128 *
2129 * RETURN:	none
2130 *
2131 * serialization: LCACHE_LOCK() is NOT held during log i/o;
2132 */
2133static void lbmStartIO(struct lbuf * bp)
2134{
2135	struct bio *bio;
2136	struct jfs_log *log = bp->l_log;
2137
2138	jfs_info("lbmStartIO");
2139
2140	bio = bio_alloc(GFP_NOFS, 1);
2141	bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
2142	bio_set_dev(bio, log->bdev);
2143
2144	bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
2145	BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
2146
2147	bio->bi_end_io = lbmIODone;
2148	bio->bi_private = bp;
2149	bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
2150
2151	/* check if journaling to disk has been disabled */
2152	if (log->no_integrity) {
2153		bio->bi_iter.bi_size = 0;
2154		lbmIODone(bio);
2155	} else {
2156		submit_bio(bio);
2157		INCREMENT(lmStat.submitted);
2158	}
2159}
2160
2161
2162/*
2163 *	lbmIOWait()
2164 */
2165static int lbmIOWait(struct lbuf * bp, int flag)
2166{
2167	unsigned long flags;
2168	int rc = 0;
2169
2170	jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2171
2172	LCACHE_LOCK(flags);		/* disable+lock */
2173
2174	LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags);
2175
2176	rc = (bp->l_flag & lbmERROR) ? -EIO : 0;
2177
2178	if (flag & lbmFREE)
2179		lbmfree(bp);
2180
2181	LCACHE_UNLOCK(flags);	/* unlock+enable */
2182
2183	jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2184	return rc;
2185}
2186
2187/*
2188 *	lbmIODone()
2189 *
2190 * executed at INTIODONE level
2191 */
2192static void lbmIODone(struct bio *bio)
2193{
2194	struct lbuf *bp = bio->bi_private;
2195	struct lbuf *nextbp, *tail;
2196	struct jfs_log *log;
2197	unsigned long flags;
2198
2199	/*
2200	 * get back jfs buffer bound to the i/o buffer
2201	 */
2202	jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag);
2203
2204	LCACHE_LOCK(flags);		/* disable+lock */
2205
2206	bp->l_flag |= lbmDONE;
2207
2208	if (bio->bi_status) {
2209		bp->l_flag |= lbmERROR;
2210
2211		jfs_err("lbmIODone: I/O error in JFS log");
2212	}
2213
2214	bio_put(bio);
2215
2216	/*
2217	 *	pagein completion
2218	 */
2219	if (bp->l_flag & lbmREAD) {
2220		bp->l_flag &= ~lbmREAD;
2221
2222		LCACHE_UNLOCK(flags);	/* unlock+enable */
2223
2224		/* wakeup I/O initiator */
2225		LCACHE_WAKEUP(&bp->l_ioevent);
2226
2227		return;
2228	}
2229
2230	/*
2231	 *	pageout completion
2232	 *
2233	 * the bp at the head of write queue has completed pageout.
2234	 *
2235	 * if single-commit/full-page pageout, remove the current buffer
2236	 * from head of pageout queue, and redrive pageout with
2237	 * the new buffer at head of pageout queue;
2238	 * otherwise, the partial-page pageout buffer stays at
2239	 * the head of pageout queue to be redriven for pageout
2240	 * by lmGroupCommit() until full-page pageout is completed.
2241	 */
2242	bp->l_flag &= ~lbmWRITE;
2243	INCREMENT(lmStat.pagedone);
2244
2245	/* update committed lsn */
2246	log = bp->l_log;
2247	log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor;
2248
2249	if (bp->l_flag & lbmDIRECT) {
2250		LCACHE_WAKEUP(&bp->l_ioevent);
2251		LCACHE_UNLOCK(flags);
2252		return;
2253	}
2254
2255	tail = log->wqueue;
2256
2257	/* single element queue */
2258	if (bp == tail) {
2259		/* remove head buffer of full-page pageout
2260		 * from log device write queue
2261		 */
2262		if (bp->l_flag & lbmRELEASE) {
2263			log->wqueue = NULL;
2264			bp->l_wqnext = NULL;
2265		}
2266	}
2267	/* multi element queue */
2268	else {
2269		/* remove head buffer of full-page pageout
2270		 * from log device write queue
2271		 */
2272		if (bp->l_flag & lbmRELEASE) {
2273			nextbp = tail->l_wqnext = bp->l_wqnext;
2274			bp->l_wqnext = NULL;
2275
2276			/*
2277			 * redrive pageout of next page at head of write queue:
2278			 * redrive next page without any bound tblk
2279			 * (i.e., page w/o any COMMIT records), or
2280			 * first page of new group commit which has been
2281			 * queued after current page (subsequent pageout
2282			 * is performed synchronously, except page without
2283			 * any COMMITs) by lmGroupCommit() as indicated
2284			 * by lbmWRITE flag;
2285			 */
2286			if (nextbp->l_flag & lbmWRITE) {
2287				/*
2288				 * We can't do the I/O at interrupt time.
2289				 * The jfsIO thread can do it
2290				 */
2291				lbmRedrive(nextbp);
2292			}
2293		}
2294	}
2295
2296	/*
2297	 *	synchronous pageout:
2298	 *
2299	 * buffer has not necessarily been removed from write queue
2300	 * (e.g., synchronous write of partial-page with COMMIT):
2301	 * leave buffer for i/o initiator to dispose
2302	 */
2303	if (bp->l_flag & lbmSYNC) {
2304		LCACHE_UNLOCK(flags);	/* unlock+enable */
2305
2306		/* wakeup I/O initiator */
2307		LCACHE_WAKEUP(&bp->l_ioevent);
2308	}
2309
2310	/*
2311	 *	Group Commit pageout:
2312	 */
2313	else if (bp->l_flag & lbmGC) {
2314		LCACHE_UNLOCK(flags);
2315		lmPostGC(bp);
2316	}
2317
2318	/*
2319	 *	asynchronous pageout:
2320	 *
2321	 * buffer must have been removed from write queue:
2322	 * insert buffer at head of freelist where it can be recycled
2323	 */
2324	else {
2325		assert(bp->l_flag & lbmRELEASE);
2326		assert(bp->l_flag & lbmFREE);
2327		lbmfree(bp);
2328
2329		LCACHE_UNLOCK(flags);	/* unlock+enable */
2330	}
2331}
2332
2333int jfsIOWait(void *arg)
2334{
2335	struct lbuf *bp;
2336
2337	do {
2338		spin_lock_irq(&log_redrive_lock);
2339		while ((bp = log_redrive_list)) {
2340			log_redrive_list = bp->l_redrive_next;
2341			bp->l_redrive_next = NULL;
2342			spin_unlock_irq(&log_redrive_lock);
2343			lbmStartIO(bp);
2344			spin_lock_irq(&log_redrive_lock);
2345		}
2346
2347		if (freezing(current)) {
2348			spin_unlock_irq(&log_redrive_lock);
2349			try_to_freeze();
2350		} else {
2351			set_current_state(TASK_INTERRUPTIBLE);
2352			spin_unlock_irq(&log_redrive_lock);
2353			schedule();
2354		}
2355	} while (!kthread_should_stop());
2356
2357	jfs_info("jfsIOWait being killed!");
2358	return 0;
2359}
2360
2361/*
2362 * NAME:	lmLogFormat()/jfs_logform()
2363 *
2364 * FUNCTION:	format file system log
2365 *
2366 * PARAMETERS:
2367 *	log	- volume log
2368 *	logAddress - start address of log space in FS block
2369 *	logSize	- length of log space in FS block;
2370 *
2371 * RETURN:	0	- success
2372 *		-EIO	- i/o error
2373 *
2374 * XXX: We're synchronously writing one page at a time.  This needs to
2375 *	be improved by writing multiple pages at once.
2376 */
2377int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
2378{
2379	int rc = -EIO;
2380	struct jfs_sb_info *sbi;
2381	struct logsuper *logsuper;
2382	struct logpage *lp;
2383	int lspn;		/* log sequence page number */
2384	struct lrd *lrd_ptr;
2385	int npages = 0;
2386	struct lbuf *bp;
2387
2388	jfs_info("lmLogFormat: logAddress:%Ld logSize:%d",
2389		 (long long)logAddress, logSize);
2390
2391	sbi = list_entry(log->sb_list.next, struct jfs_sb_info, log_list);
2392
2393	/* allocate a log buffer */
2394	bp = lbmAllocate(log, 1);
2395
2396	npages = logSize >> sbi->l2nbperpage;
2397
2398	/*
2399	 *	log space:
2400	 *
2401	 * page 0 - reserved;
2402	 * page 1 - log superblock;
2403	 * page 2 - log data page: A SYNC log record is written
2404	 *	    into this page at logform time;
2405	 * pages 3-N - log data page: set to empty log data pages;
2406	 */
2407	/*
2408	 *	init log superblock: log page 1
2409	 */
2410	logsuper = (struct logsuper *) bp->l_ldata;
2411
2412	logsuper->magic = cpu_to_le32(LOGMAGIC);
2413	logsuper->version = cpu_to_le32(LOGVERSION);
2414	logsuper->state = cpu_to_le32(LOGREDONE);
2415	logsuper->flag = cpu_to_le32(sbi->mntflag);	/* ? */
2416	logsuper->size = cpu_to_le32(npages);
2417	logsuper->bsize = cpu_to_le32(sbi->bsize);
2418	logsuper->l2bsize = cpu_to_le32(sbi->l2bsize);
2419	logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE);
2420
2421	bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2422	bp->l_blkno = logAddress + sbi->nbperpage;
2423	lbmStartIO(bp);
2424	if ((rc = lbmIOWait(bp, 0)))
2425		goto exit;
2426
2427	/*
2428	 *	init pages 2 to npages-1 as log data pages:
2429	 *
2430	 * log page sequence number (lpsn) initialization:
2431	 *
2432	 * pn:   0     1     2     3                 n-1
2433	 *       +-----+-----+=====+=====+===.....===+=====+
2434	 * lspn:             N-1   0     1           N-2
2435	 *                   <--- N page circular file ---->
2436	 *
2437	 * the N (= npages-2) data pages of the log is maintained as
2438	 * a circular file for the log records;
2439	 * lpsn grows by 1 monotonically as each log page is written
2440	 * to the circular file of the log;
2441	 * and setLogpage() will not reset the page number even if
2442	 * the eor is equal to LOGPHDRSIZE. In order for binary search
2443	 * still work in find log end process, we have to simulate the
2444	 * log wrap situation at the log format time.
2445	 * The 1st log page written will have the highest lpsn. Then
2446	 * the succeeding log pages will have ascending order of
2447	 * the lspn starting from 0, ... (N-2)
2448	 */
2449	lp = (struct logpage *) bp->l_ldata;
2450	/*
2451	 * initialize 1st log page to be written: lpsn = N - 1,
2452	 * write a SYNCPT log record is written to this page
2453	 */
2454	lp->h.page = lp->t.page = cpu_to_le32(npages - 3);
2455	lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE);
2456
2457	lrd_ptr = (struct lrd *) &lp->data;
2458	lrd_ptr->logtid = 0;
2459	lrd_ptr->backchain = 0;
2460	lrd_ptr->type = cpu_to_le16(LOG_SYNCPT);
2461	lrd_ptr->length = 0;
2462	lrd_ptr->log.syncpt.sync = 0;
2463
2464	bp->l_blkno += sbi->nbperpage;
2465	bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2466	lbmStartIO(bp);
2467	if ((rc = lbmIOWait(bp, 0)))
2468		goto exit;
2469
2470	/*
2471	 *	initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
2472	 */
2473	for (lspn = 0; lspn < npages - 3; lspn++) {
2474		lp->h.page = lp->t.page = cpu_to_le32(lspn);
2475		lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
2476
2477		bp->l_blkno += sbi->nbperpage;
2478		bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2479		lbmStartIO(bp);
2480		if ((rc = lbmIOWait(bp, 0)))
2481			goto exit;
2482	}
2483
2484	rc = 0;
2485exit:
2486	/*
2487	 *	finalize log
2488	 */
2489	/* release the buffer */
2490	lbmFree(bp);
2491
2492	return rc;
2493}
2494
2495#ifdef CONFIG_JFS_STATISTICS
2496static int jfs_lmstats_proc_show(struct seq_file *m, void *v)
2497{
2498	seq_printf(m,
2499		       "JFS Logmgr stats\n"
2500		       "================\n"
2501		       "commits = %d\n"
2502		       "writes submitted = %d\n"
2503		       "writes completed = %d\n"
2504		       "full pages submitted = %d\n"
2505		       "partial pages submitted = %d\n",
2506		       lmStat.commit,
2507		       lmStat.submitted,
2508		       lmStat.pagedone,
2509		       lmStat.full_page,
2510		       lmStat.partial_page);
2511	return 0;
2512}
2513
2514static int jfs_lmstats_proc_open(struct inode *inode, struct file *file)
2515{
2516	return single_open(file, jfs_lmstats_proc_show, NULL);
2517}
2518
2519const struct file_operations jfs_lmstats_proc_fops = {
 
2520	.open		= jfs_lmstats_proc_open,
2521	.read		= seq_read,
2522	.llseek		= seq_lseek,
2523	.release	= single_release,
2524};
2525#endif /* CONFIG_JFS_STATISTICS */
v4.6
   1/*
   2 *   Copyright (C) International Business Machines Corp., 2000-2004
   3 *   Portions Copyright (C) Christoph Hellwig, 2001-2002
   4 *
   5 *   This program is free software;  you can redistribute it and/or modify
   6 *   it under the terms of the GNU General Public License as published by
   7 *   the Free Software Foundation; either version 2 of the License, or
   8 *   (at your option) any later version.
   9 *
  10 *   This program is distributed in the hope that it will be useful,
  11 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
  12 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  13 *   the GNU General Public License for more details.
  14 *
  15 *   You should have received a copy of the GNU General Public License
  16 *   along with this program;  if not, write to the Free Software
  17 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18 */
  19
  20/*
  21 *	jfs_logmgr.c: log manager
  22 *
  23 * for related information, see transaction manager (jfs_txnmgr.c), and
  24 * recovery manager (jfs_logredo.c).
  25 *
  26 * note: for detail, RTFS.
  27 *
  28 *	log buffer manager:
  29 * special purpose buffer manager supporting log i/o requirements.
  30 * per log serial pageout of logpage
  31 * queuing i/o requests and redrive i/o at iodone
  32 * maintain current logpage buffer
  33 * no caching since append only
  34 * appropriate jfs buffer cache buffers as needed
  35 *
  36 *	group commit:
  37 * transactions which wrote COMMIT records in the same in-memory
  38 * log page during the pageout of previous/current log page(s) are
  39 * committed together by the pageout of the page.
  40 *
  41 *	TBD lazy commit:
  42 * transactions are committed asynchronously when the log page
  43 * containing it COMMIT is paged out when it becomes full;
  44 *
  45 *	serialization:
  46 * . a per log lock serialize log write.
  47 * . a per log lock serialize group commit.
  48 * . a per log lock serialize log open/close;
  49 *
  50 *	TBD log integrity:
  51 * careful-write (ping-pong) of last logpage to recover from crash
  52 * in overwrite.
  53 * detection of split (out-of-order) write of physical sectors
  54 * of last logpage via timestamp at end of each sector
  55 * with its mirror data array at trailer).
  56 *
  57 *	alternatives:
  58 * lsn - 64-bit monotonically increasing integer vs
  59 * 32-bit lspn and page eor.
  60 */
  61
  62#include <linux/fs.h>
  63#include <linux/blkdev.h>
  64#include <linux/interrupt.h>
  65#include <linux/completion.h>
  66#include <linux/kthread.h>
  67#include <linux/buffer_head.h>		/* for sync_blockdev() */
  68#include <linux/bio.h>
  69#include <linux/freezer.h>
  70#include <linux/export.h>
  71#include <linux/delay.h>
  72#include <linux/mutex.h>
  73#include <linux/seq_file.h>
  74#include <linux/slab.h>
  75#include "jfs_incore.h"
  76#include "jfs_filsys.h"
  77#include "jfs_metapage.h"
  78#include "jfs_superblock.h"
  79#include "jfs_txnmgr.h"
  80#include "jfs_debug.h"
  81
  82
  83/*
  84 * lbuf's ready to be redriven.  Protected by log_redrive_lock (jfsIO thread)
  85 */
  86static struct lbuf *log_redrive_list;
  87static DEFINE_SPINLOCK(log_redrive_lock);
  88
  89
  90/*
  91 *	log read/write serialization (per log)
  92 */
  93#define LOG_LOCK_INIT(log)	mutex_init(&(log)->loglock)
  94#define LOG_LOCK(log)		mutex_lock(&((log)->loglock))
  95#define LOG_UNLOCK(log)		mutex_unlock(&((log)->loglock))
  96
  97
  98/*
  99 *	log group commit serialization (per log)
 100 */
 101
 102#define LOGGC_LOCK_INIT(log)	spin_lock_init(&(log)->gclock)
 103#define LOGGC_LOCK(log)		spin_lock_irq(&(log)->gclock)
 104#define LOGGC_UNLOCK(log)	spin_unlock_irq(&(log)->gclock)
 105#define LOGGC_WAKEUP(tblk)	wake_up_all(&(tblk)->gcwait)
 106
 107/*
 108 *	log sync serialization (per log)
 109 */
 110#define	LOGSYNC_DELTA(logsize)		min((logsize)/8, 128*LOGPSIZE)
 111#define	LOGSYNC_BARRIER(logsize)	((logsize)/4)
 112/*
 113#define	LOGSYNC_DELTA(logsize)		min((logsize)/4, 256*LOGPSIZE)
 114#define	LOGSYNC_BARRIER(logsize)	((logsize)/2)
 115*/
 116
 117
 118/*
 119 *	log buffer cache synchronization
 120 */
 121static DEFINE_SPINLOCK(jfsLCacheLock);
 122
 123#define	LCACHE_LOCK(flags)	spin_lock_irqsave(&jfsLCacheLock, flags)
 124#define	LCACHE_UNLOCK(flags)	spin_unlock_irqrestore(&jfsLCacheLock, flags)
 125
 126/*
 127 * See __SLEEP_COND in jfs_locks.h
 128 */
 129#define LCACHE_SLEEP_COND(wq, cond, flags)	\
 130do {						\
 131	if (cond)				\
 132		break;				\
 133	__SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \
 134} while (0)
 135
 136#define	LCACHE_WAKEUP(event)	wake_up(event)
 137
 138
 139/*
 140 *	lbuf buffer cache (lCache) control
 141 */
 142/* log buffer manager pageout control (cumulative, inclusive) */
 143#define	lbmREAD		0x0001
 144#define	lbmWRITE	0x0002	/* enqueue at tail of write queue;
 145				 * init pageout if at head of queue;
 146				 */
 147#define	lbmRELEASE	0x0004	/* remove from write queue
 148				 * at completion of pageout;
 149				 * do not free/recycle it yet:
 150				 * caller will free it;
 151				 */
 152#define	lbmSYNC		0x0008	/* do not return to freelist
 153				 * when removed from write queue;
 154				 */
 155#define lbmFREE		0x0010	/* return to freelist
 156				 * at completion of pageout;
 157				 * the buffer may be recycled;
 158				 */
 159#define	lbmDONE		0x0020
 160#define	lbmERROR	0x0040
 161#define lbmGC		0x0080	/* lbmIODone to perform post-GC processing
 162				 * of log page
 163				 */
 164#define lbmDIRECT	0x0100
 165
 166/*
 167 * Global list of active external journals
 168 */
 169static LIST_HEAD(jfs_external_logs);
 170static struct jfs_log *dummy_log;
 171static DEFINE_MUTEX(jfs_log_mutex);
 172
 173/*
 174 * forward references
 175 */
 176static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk,
 177			 struct lrd * lrd, struct tlock * tlck);
 178
 179static int lmNextPage(struct jfs_log * log);
 180static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
 181			   int activate);
 182
 183static int open_inline_log(struct super_block *sb);
 184static int open_dummy_log(struct super_block *sb);
 185static int lbmLogInit(struct jfs_log * log);
 186static void lbmLogShutdown(struct jfs_log * log);
 187static struct lbuf *lbmAllocate(struct jfs_log * log, int);
 188static void lbmFree(struct lbuf * bp);
 189static void lbmfree(struct lbuf * bp);
 190static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp);
 191static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block);
 192static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag);
 193static int lbmIOWait(struct lbuf * bp, int flag);
 194static bio_end_io_t lbmIODone;
 195static void lbmStartIO(struct lbuf * bp);
 196static void lmGCwrite(struct jfs_log * log, int cant_block);
 197static int lmLogSync(struct jfs_log * log, int hard_sync);
 198
 199
 200
 201/*
 202 *	statistics
 203 */
 204#ifdef CONFIG_JFS_STATISTICS
 205static struct lmStat {
 206	uint commit;		/* # of commit */
 207	uint pagedone;		/* # of page written */
 208	uint submitted;		/* # of pages submitted */
 209	uint full_page;		/* # of full pages submitted */
 210	uint partial_page;	/* # of partial pages submitted */
 211} lmStat;
 212#endif
 213
 214static void write_special_inodes(struct jfs_log *log,
 215				 int (*writer)(struct address_space *))
 216{
 217	struct jfs_sb_info *sbi;
 218
 219	list_for_each_entry(sbi, &log->sb_list, log_list) {
 220		writer(sbi->ipbmap->i_mapping);
 221		writer(sbi->ipimap->i_mapping);
 222		writer(sbi->direct_inode->i_mapping);
 223	}
 224}
 225
 226/*
 227 * NAME:	lmLog()
 228 *
 229 * FUNCTION:	write a log record;
 230 *
 231 * PARAMETER:
 232 *
 233 * RETURN:	lsn - offset to the next log record to write (end-of-log);
 234 *		-1  - error;
 235 *
 236 * note: todo: log error handler
 237 */
 238int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 239	  struct tlock * tlck)
 240{
 241	int lsn;
 242	int diffp, difft;
 243	struct metapage *mp = NULL;
 244	unsigned long flags;
 245
 246	jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p",
 247		 log, tblk, lrd, tlck);
 248
 249	LOG_LOCK(log);
 250
 251	/* log by (out-of-transaction) JFS ? */
 252	if (tblk == NULL)
 253		goto writeRecord;
 254
 255	/* log from page ? */
 256	if (tlck == NULL ||
 257	    tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL)
 258		goto writeRecord;
 259
 260	/*
 261	 *	initialize/update page/transaction recovery lsn
 262	 */
 263	lsn = log->lsn;
 264
 265	LOGSYNC_LOCK(log, flags);
 266
 267	/*
 268	 * initialize page lsn if first log write of the page
 269	 */
 270	if (mp->lsn == 0) {
 271		mp->log = log;
 272		mp->lsn = lsn;
 273		log->count++;
 274
 275		/* insert page at tail of logsynclist */
 276		list_add_tail(&mp->synclist, &log->synclist);
 277	}
 278
 279	/*
 280	 *	initialize/update lsn of tblock of the page
 281	 *
 282	 * transaction inherits oldest lsn of pages associated
 283	 * with allocation/deallocation of resources (their
 284	 * log records are used to reconstruct allocation map
 285	 * at recovery time: inode for inode allocation map,
 286	 * B+-tree index of extent descriptors for block
 287	 * allocation map);
 288	 * allocation map pages inherit transaction lsn at
 289	 * commit time to allow forwarding log syncpt past log
 290	 * records associated with allocation/deallocation of
 291	 * resources only after persistent map of these map pages
 292	 * have been updated and propagated to home.
 293	 */
 294	/*
 295	 * initialize transaction lsn:
 296	 */
 297	if (tblk->lsn == 0) {
 298		/* inherit lsn of its first page logged */
 299		tblk->lsn = mp->lsn;
 300		log->count++;
 301
 302		/* insert tblock after the page on logsynclist */
 303		list_add(&tblk->synclist, &mp->synclist);
 304	}
 305	/*
 306	 * update transaction lsn:
 307	 */
 308	else {
 309		/* inherit oldest/smallest lsn of page */
 310		logdiff(diffp, mp->lsn, log);
 311		logdiff(difft, tblk->lsn, log);
 312		if (diffp < difft) {
 313			/* update tblock lsn with page lsn */
 314			tblk->lsn = mp->lsn;
 315
 316			/* move tblock after page on logsynclist */
 317			list_move(&tblk->synclist, &mp->synclist);
 318		}
 319	}
 320
 321	LOGSYNC_UNLOCK(log, flags);
 322
 323	/*
 324	 *	write the log record
 325	 */
 326      writeRecord:
 327	lsn = lmWriteRecord(log, tblk, lrd, tlck);
 328
 329	/*
 330	 * forward log syncpt if log reached next syncpt trigger
 331	 */
 332	logdiff(diffp, lsn, log);
 333	if (diffp >= log->nextsync)
 334		lsn = lmLogSync(log, 0);
 335
 336	/* update end-of-log lsn */
 337	log->lsn = lsn;
 338
 339	LOG_UNLOCK(log);
 340
 341	/* return end-of-log address */
 342	return lsn;
 343}
 344
 345/*
 346 * NAME:	lmWriteRecord()
 347 *
 348 * FUNCTION:	move the log record to current log page
 349 *
 350 * PARAMETER:	cd	- commit descriptor
 351 *
 352 * RETURN:	end-of-log address
 353 *
 354 * serialization: LOG_LOCK() held on entry/exit
 355 */
 356static int
 357lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 358	      struct tlock * tlck)
 359{
 360	int lsn = 0;		/* end-of-log address */
 361	struct lbuf *bp;	/* dst log page buffer */
 362	struct logpage *lp;	/* dst log page */
 363	caddr_t dst;		/* destination address in log page */
 364	int dstoffset;		/* end-of-log offset in log page */
 365	int freespace;		/* free space in log page */
 366	caddr_t p;		/* src meta-data page */
 367	caddr_t src;
 368	int srclen;
 369	int nbytes;		/* number of bytes to move */
 370	int i;
 371	int len;
 372	struct linelock *linelock;
 373	struct lv *lv;
 374	struct lvd *lvd;
 375	int l2linesize;
 376
 377	len = 0;
 378
 379	/* retrieve destination log page to write */
 380	bp = (struct lbuf *) log->bp;
 381	lp = (struct logpage *) bp->l_ldata;
 382	dstoffset = log->eor;
 383
 384	/* any log data to write ? */
 385	if (tlck == NULL)
 386		goto moveLrd;
 387
 388	/*
 389	 *	move log record data
 390	 */
 391	/* retrieve source meta-data page to log */
 392	if (tlck->flag & tlckPAGELOCK) {
 393		p = (caddr_t) (tlck->mp->data);
 394		linelock = (struct linelock *) & tlck->lock;
 395	}
 396	/* retrieve source in-memory inode to log */
 397	else if (tlck->flag & tlckINODELOCK) {
 398		if (tlck->type & tlckDTREE)
 399			p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot;
 400		else
 401			p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot;
 402		linelock = (struct linelock *) & tlck->lock;
 403	}
 404#ifdef	_JFS_WIP
 405	else if (tlck->flag & tlckINLINELOCK) {
 406
 407		inlinelock = (struct inlinelock *) & tlck;
 408		p = (caddr_t) & inlinelock->pxd;
 409		linelock = (struct linelock *) & tlck;
 410	}
 411#endif				/* _JFS_WIP */
 412	else {
 413		jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck);
 414		return 0;	/* Probably should trap */
 415	}
 416	l2linesize = linelock->l2linesize;
 417
 418      moveData:
 419	ASSERT(linelock->index <= linelock->maxcnt);
 420
 421	lv = linelock->lv;
 422	for (i = 0; i < linelock->index; i++, lv++) {
 423		if (lv->length == 0)
 424			continue;
 425
 426		/* is page full ? */
 427		if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) {
 428			/* page become full: move on to next page */
 429			lmNextPage(log);
 430
 431			bp = log->bp;
 432			lp = (struct logpage *) bp->l_ldata;
 433			dstoffset = LOGPHDRSIZE;
 434		}
 435
 436		/*
 437		 * move log vector data
 438		 */
 439		src = (u8 *) p + (lv->offset << l2linesize);
 440		srclen = lv->length << l2linesize;
 441		len += srclen;
 442		while (srclen > 0) {
 443			freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
 444			nbytes = min(freespace, srclen);
 445			dst = (caddr_t) lp + dstoffset;
 446			memcpy(dst, src, nbytes);
 447			dstoffset += nbytes;
 448
 449			/* is page not full ? */
 450			if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
 451				break;
 452
 453			/* page become full: move on to next page */
 454			lmNextPage(log);
 455
 456			bp = (struct lbuf *) log->bp;
 457			lp = (struct logpage *) bp->l_ldata;
 458			dstoffset = LOGPHDRSIZE;
 459
 460			srclen -= nbytes;
 461			src += nbytes;
 462		}
 463
 464		/*
 465		 * move log vector descriptor
 466		 */
 467		len += 4;
 468		lvd = (struct lvd *) ((caddr_t) lp + dstoffset);
 469		lvd->offset = cpu_to_le16(lv->offset);
 470		lvd->length = cpu_to_le16(lv->length);
 471		dstoffset += 4;
 472		jfs_info("lmWriteRecord: lv offset:%d length:%d",
 473			 lv->offset, lv->length);
 474	}
 475
 476	if ((i = linelock->next)) {
 477		linelock = (struct linelock *) lid_to_tlock(i);
 478		goto moveData;
 479	}
 480
 481	/*
 482	 *	move log record descriptor
 483	 */
 484      moveLrd:
 485	lrd->length = cpu_to_le16(len);
 486
 487	src = (caddr_t) lrd;
 488	srclen = LOGRDSIZE;
 489
 490	while (srclen > 0) {
 491		freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
 492		nbytes = min(freespace, srclen);
 493		dst = (caddr_t) lp + dstoffset;
 494		memcpy(dst, src, nbytes);
 495
 496		dstoffset += nbytes;
 497		srclen -= nbytes;
 498
 499		/* are there more to move than freespace of page ? */
 500		if (srclen)
 501			goto pageFull;
 502
 503		/*
 504		 * end of log record descriptor
 505		 */
 506
 507		/* update last log record eor */
 508		log->eor = dstoffset;
 509		bp->l_eor = dstoffset;
 510		lsn = (log->page << L2LOGPSIZE) + dstoffset;
 511
 512		if (lrd->type & cpu_to_le16(LOG_COMMIT)) {
 513			tblk->clsn = lsn;
 514			jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn,
 515				 bp->l_eor);
 516
 517			INCREMENT(lmStat.commit);	/* # of commit */
 518
 519			/*
 520			 * enqueue tblock for group commit:
 521			 *
 522			 * enqueue tblock of non-trivial/synchronous COMMIT
 523			 * at tail of group commit queue
 524			 * (trivial/asynchronous COMMITs are ignored by
 525			 * group commit.)
 526			 */
 527			LOGGC_LOCK(log);
 528
 529			/* init tblock gc state */
 530			tblk->flag = tblkGC_QUEUE;
 531			tblk->bp = log->bp;
 532			tblk->pn = log->page;
 533			tblk->eor = log->eor;
 534
 535			/* enqueue transaction to commit queue */
 536			list_add_tail(&tblk->cqueue, &log->cqueue);
 537
 538			LOGGC_UNLOCK(log);
 539		}
 540
 541		jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x",
 542			le16_to_cpu(lrd->type), log->bp, log->page, dstoffset);
 543
 544		/* page not full ? */
 545		if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
 546			return lsn;
 547
 548	      pageFull:
 549		/* page become full: move on to next page */
 550		lmNextPage(log);
 551
 552		bp = (struct lbuf *) log->bp;
 553		lp = (struct logpage *) bp->l_ldata;
 554		dstoffset = LOGPHDRSIZE;
 555		src += nbytes;
 556	}
 557
 558	return lsn;
 559}
 560
 561
 562/*
 563 * NAME:	lmNextPage()
 564 *
 565 * FUNCTION:	write current page and allocate next page.
 566 *
 567 * PARAMETER:	log
 568 *
 569 * RETURN:	0
 570 *
 571 * serialization: LOG_LOCK() held on entry/exit
 572 */
 573static int lmNextPage(struct jfs_log * log)
 574{
 575	struct logpage *lp;
 576	int lspn;		/* log sequence page number */
 577	int pn;			/* current page number */
 578	struct lbuf *bp;
 579	struct lbuf *nextbp;
 580	struct tblock *tblk;
 581
 582	/* get current log page number and log sequence page number */
 583	pn = log->page;
 584	bp = log->bp;
 585	lp = (struct logpage *) bp->l_ldata;
 586	lspn = le32_to_cpu(lp->h.page);
 587
 588	LOGGC_LOCK(log);
 589
 590	/*
 591	 *	write or queue the full page at the tail of write queue
 592	 */
 593	/* get the tail tblk on commit queue */
 594	if (list_empty(&log->cqueue))
 595		tblk = NULL;
 596	else
 597		tblk = list_entry(log->cqueue.prev, struct tblock, cqueue);
 598
 599	/* every tblk who has COMMIT record on the current page,
 600	 * and has not been committed, must be on commit queue
 601	 * since tblk is queued at commit queueu at the time
 602	 * of writing its COMMIT record on the page before
 603	 * page becomes full (even though the tblk thread
 604	 * who wrote COMMIT record may have been suspended
 605	 * currently);
 606	 */
 607
 608	/* is page bound with outstanding tail tblk ? */
 609	if (tblk && tblk->pn == pn) {
 610		/* mark tblk for end-of-page */
 611		tblk->flag |= tblkGC_EOP;
 612
 613		if (log->cflag & logGC_PAGEOUT) {
 614			/* if page is not already on write queue,
 615			 * just enqueue (no lbmWRITE to prevent redrive)
 616			 * buffer to wqueue to ensure correct serial order
 617			 * of the pages since log pages will be added
 618			 * continuously
 619			 */
 620			if (bp->l_wqnext == NULL)
 621				lbmWrite(log, bp, 0, 0);
 622		} else {
 623			/*
 624			 * No current GC leader, initiate group commit
 625			 */
 626			log->cflag |= logGC_PAGEOUT;
 627			lmGCwrite(log, 0);
 628		}
 629	}
 630	/* page is not bound with outstanding tblk:
 631	 * init write or mark it to be redriven (lbmWRITE)
 632	 */
 633	else {
 634		/* finalize the page */
 635		bp->l_ceor = bp->l_eor;
 636		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 637		lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0);
 638	}
 639	LOGGC_UNLOCK(log);
 640
 641	/*
 642	 *	allocate/initialize next page
 643	 */
 644	/* if log wraps, the first data page of log is 2
 645	 * (0 never used, 1 is superblock).
 646	 */
 647	log->page = (pn == log->size - 1) ? 2 : pn + 1;
 648	log->eor = LOGPHDRSIZE;	/* ? valid page empty/full at logRedo() */
 649
 650	/* allocate/initialize next log page buffer */
 651	nextbp = lbmAllocate(log, log->page);
 652	nextbp->l_eor = log->eor;
 653	log->bp = nextbp;
 654
 655	/* initialize next log page */
 656	lp = (struct logpage *) nextbp->l_ldata;
 657	lp->h.page = lp->t.page = cpu_to_le32(lspn + 1);
 658	lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
 659
 660	return 0;
 661}
 662
 663
 664/*
 665 * NAME:	lmGroupCommit()
 666 *
 667 * FUNCTION:	group commit
 668 *	initiate pageout of the pages with COMMIT in the order of
 669 *	page number - redrive pageout of the page at the head of
 670 *	pageout queue until full page has been written.
 671 *
 672 * RETURN:
 673 *
 674 * NOTE:
 675 *	LOGGC_LOCK serializes log group commit queue, and
 676 *	transaction blocks on the commit queue.
 677 *	N.B. LOG_LOCK is NOT held during lmGroupCommit().
 678 */
 679int lmGroupCommit(struct jfs_log * log, struct tblock * tblk)
 680{
 681	int rc = 0;
 682
 683	LOGGC_LOCK(log);
 684
 685	/* group committed already ? */
 686	if (tblk->flag & tblkGC_COMMITTED) {
 687		if (tblk->flag & tblkGC_ERROR)
 688			rc = -EIO;
 689
 690		LOGGC_UNLOCK(log);
 691		return rc;
 692	}
 693	jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc);
 694
 695	if (tblk->xflag & COMMIT_LAZY)
 696		tblk->flag |= tblkGC_LAZY;
 697
 698	if ((!(log->cflag & logGC_PAGEOUT)) && (!list_empty(&log->cqueue)) &&
 699	    (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag)
 700	     || jfs_tlocks_low)) {
 701		/*
 702		 * No pageout in progress
 703		 *
 704		 * start group commit as its group leader.
 705		 */
 706		log->cflag |= logGC_PAGEOUT;
 707
 708		lmGCwrite(log, 0);
 709	}
 710
 711	if (tblk->xflag & COMMIT_LAZY) {
 712		/*
 713		 * Lazy transactions can leave now
 714		 */
 715		LOGGC_UNLOCK(log);
 716		return 0;
 717	}
 718
 719	/* lmGCwrite gives up LOGGC_LOCK, check again */
 720
 721	if (tblk->flag & tblkGC_COMMITTED) {
 722		if (tblk->flag & tblkGC_ERROR)
 723			rc = -EIO;
 724
 725		LOGGC_UNLOCK(log);
 726		return rc;
 727	}
 728
 729	/* upcount transaction waiting for completion
 730	 */
 731	log->gcrtc++;
 732	tblk->flag |= tblkGC_READY;
 733
 734	__SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED),
 735		     LOGGC_LOCK(log), LOGGC_UNLOCK(log));
 736
 737	/* removed from commit queue */
 738	if (tblk->flag & tblkGC_ERROR)
 739		rc = -EIO;
 740
 741	LOGGC_UNLOCK(log);
 742	return rc;
 743}
 744
 745/*
 746 * NAME:	lmGCwrite()
 747 *
 748 * FUNCTION:	group commit write
 749 *	initiate write of log page, building a group of all transactions
 750 *	with commit records on that page.
 751 *
 752 * RETURN:	None
 753 *
 754 * NOTE:
 755 *	LOGGC_LOCK must be held by caller.
 756 *	N.B. LOG_LOCK is NOT held during lmGroupCommit().
 757 */
 758static void lmGCwrite(struct jfs_log * log, int cant_write)
 759{
 760	struct lbuf *bp;
 761	struct logpage *lp;
 762	int gcpn;		/* group commit page number */
 763	struct tblock *tblk;
 764	struct tblock *xtblk = NULL;
 765
 766	/*
 767	 * build the commit group of a log page
 768	 *
 769	 * scan commit queue and make a commit group of all
 770	 * transactions with COMMIT records on the same log page.
 771	 */
 772	/* get the head tblk on the commit queue */
 773	gcpn = list_entry(log->cqueue.next, struct tblock, cqueue)->pn;
 774
 775	list_for_each_entry(tblk, &log->cqueue, cqueue) {
 776		if (tblk->pn != gcpn)
 777			break;
 778
 779		xtblk = tblk;
 780
 781		/* state transition: (QUEUE, READY) -> COMMIT */
 782		tblk->flag |= tblkGC_COMMIT;
 783	}
 784	tblk = xtblk;		/* last tblk of the page */
 785
 786	/*
 787	 * pageout to commit transactions on the log page.
 788	 */
 789	bp = (struct lbuf *) tblk->bp;
 790	lp = (struct logpage *) bp->l_ldata;
 791	/* is page already full ? */
 792	if (tblk->flag & tblkGC_EOP) {
 793		/* mark page to free at end of group commit of the page */
 794		tblk->flag &= ~tblkGC_EOP;
 795		tblk->flag |= tblkGC_FREE;
 796		bp->l_ceor = bp->l_eor;
 797		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 798		lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC,
 799			 cant_write);
 800		INCREMENT(lmStat.full_page);
 801	}
 802	/* page is not yet full */
 803	else {
 804		bp->l_ceor = tblk->eor;	/* ? bp->l_ceor = bp->l_eor; */
 805		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 806		lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write);
 807		INCREMENT(lmStat.partial_page);
 808	}
 809}
 810
 811/*
 812 * NAME:	lmPostGC()
 813 *
 814 * FUNCTION:	group commit post-processing
 815 *	Processes transactions after their commit records have been written
 816 *	to disk, redriving log I/O if necessary.
 817 *
 818 * RETURN:	None
 819 *
 820 * NOTE:
 821 *	This routine is called a interrupt time by lbmIODone
 822 */
 823static void lmPostGC(struct lbuf * bp)
 824{
 825	unsigned long flags;
 826	struct jfs_log *log = bp->l_log;
 827	struct logpage *lp;
 828	struct tblock *tblk, *temp;
 829
 830	//LOGGC_LOCK(log);
 831	spin_lock_irqsave(&log->gclock, flags);
 832	/*
 833	 * current pageout of group commit completed.
 834	 *
 835	 * remove/wakeup transactions from commit queue who were
 836	 * group committed with the current log page
 837	 */
 838	list_for_each_entry_safe(tblk, temp, &log->cqueue, cqueue) {
 839		if (!(tblk->flag & tblkGC_COMMIT))
 840			break;
 841		/* if transaction was marked GC_COMMIT then
 842		 * it has been shipped in the current pageout
 843		 * and made it to disk - it is committed.
 844		 */
 845
 846		if (bp->l_flag & lbmERROR)
 847			tblk->flag |= tblkGC_ERROR;
 848
 849		/* remove it from the commit queue */
 850		list_del(&tblk->cqueue);
 851		tblk->flag &= ~tblkGC_QUEUE;
 852
 853		if (tblk == log->flush_tblk) {
 854			/* we can stop flushing the log now */
 855			clear_bit(log_FLUSH, &log->flag);
 856			log->flush_tblk = NULL;
 857		}
 858
 859		jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk,
 860			 tblk->flag);
 861
 862		if (!(tblk->xflag & COMMIT_FORCE))
 863			/*
 864			 * Hand tblk over to lazy commit thread
 865			 */
 866			txLazyUnlock(tblk);
 867		else {
 868			/* state transition: COMMIT -> COMMITTED */
 869			tblk->flag |= tblkGC_COMMITTED;
 870
 871			if (tblk->flag & tblkGC_READY)
 872				log->gcrtc--;
 873
 874			LOGGC_WAKEUP(tblk);
 875		}
 876
 877		/* was page full before pageout ?
 878		 * (and this is the last tblk bound with the page)
 879		 */
 880		if (tblk->flag & tblkGC_FREE)
 881			lbmFree(bp);
 882		/* did page become full after pageout ?
 883		 * (and this is the last tblk bound with the page)
 884		 */
 885		else if (tblk->flag & tblkGC_EOP) {
 886			/* finalize the page */
 887			lp = (struct logpage *) bp->l_ldata;
 888			bp->l_ceor = bp->l_eor;
 889			lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
 890			jfs_info("lmPostGC: calling lbmWrite");
 891			lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE,
 892				 1);
 893		}
 894
 895	}
 896
 897	/* are there any transactions who have entered lnGroupCommit()
 898	 * (whose COMMITs are after that of the last log page written.
 899	 * They are waiting for new group commit (above at (SLEEP 1))
 900	 * or lazy transactions are on a full (queued) log page,
 901	 * select the latest ready transaction as new group leader and
 902	 * wake her up to lead her group.
 903	 */
 904	if ((!list_empty(&log->cqueue)) &&
 905	    ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) ||
 906	     test_bit(log_FLUSH, &log->flag) || jfs_tlocks_low))
 907		/*
 908		 * Call lmGCwrite with new group leader
 909		 */
 910		lmGCwrite(log, 1);
 911
 912	/* no transaction are ready yet (transactions are only just
 913	 * queued (GC_QUEUE) and not entered for group commit yet).
 914	 * the first transaction entering group commit
 915	 * will elect herself as new group leader.
 916	 */
 917	else
 918		log->cflag &= ~logGC_PAGEOUT;
 919
 920	//LOGGC_UNLOCK(log);
 921	spin_unlock_irqrestore(&log->gclock, flags);
 922	return;
 923}
 924
 925/*
 926 * NAME:	lmLogSync()
 927 *
 928 * FUNCTION:	write log SYNCPT record for specified log
 929 *	if new sync address is available
 930 *	(normally the case if sync() is executed by back-ground
 931 *	process).
 932 *	calculate new value of i_nextsync which determines when
 933 *	this code is called again.
 934 *
 935 * PARAMETERS:	log	- log structure
 936 *		hard_sync - 1 to force all metadata to be written
 937 *
 938 * RETURN:	0
 939 *
 940 * serialization: LOG_LOCK() held on entry/exit
 941 */
 942static int lmLogSync(struct jfs_log * log, int hard_sync)
 943{
 944	int logsize;
 945	int written;		/* written since last syncpt */
 946	int free;		/* free space left available */
 947	int delta;		/* additional delta to write normally */
 948	int more;		/* additional write granted */
 949	struct lrd lrd;
 950	int lsn;
 951	struct logsyncblk *lp;
 952	unsigned long flags;
 953
 954	/* push dirty metapages out to disk */
 955	if (hard_sync)
 956		write_special_inodes(log, filemap_fdatawrite);
 957	else
 958		write_special_inodes(log, filemap_flush);
 959
 960	/*
 961	 *	forward syncpt
 962	 */
 963	/* if last sync is same as last syncpt,
 964	 * invoke sync point forward processing to update sync.
 965	 */
 966
 967	if (log->sync == log->syncpt) {
 968		LOGSYNC_LOCK(log, flags);
 969		if (list_empty(&log->synclist))
 970			log->sync = log->lsn;
 971		else {
 972			lp = list_entry(log->synclist.next,
 973					struct logsyncblk, synclist);
 974			log->sync = lp->lsn;
 975		}
 976		LOGSYNC_UNLOCK(log, flags);
 977
 978	}
 979
 980	/* if sync is different from last syncpt,
 981	 * write a SYNCPT record with syncpt = sync.
 982	 * reset syncpt = sync
 983	 */
 984	if (log->sync != log->syncpt) {
 985		lrd.logtid = 0;
 986		lrd.backchain = 0;
 987		lrd.type = cpu_to_le16(LOG_SYNCPT);
 988		lrd.length = 0;
 989		lrd.log.syncpt.sync = cpu_to_le32(log->sync);
 990		lsn = lmWriteRecord(log, NULL, &lrd, NULL);
 991
 992		log->syncpt = log->sync;
 993	} else
 994		lsn = log->lsn;
 995
 996	/*
 997	 *	setup next syncpt trigger (SWAG)
 998	 */
 999	logsize = log->logsize;
1000
1001	logdiff(written, lsn, log);
1002	free = logsize - written;
1003	delta = LOGSYNC_DELTA(logsize);
1004	more = min(free / 2, delta);
1005	if (more < 2 * LOGPSIZE) {
1006		jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
1007		/*
1008		 *	log wrapping
1009		 *
1010		 * option 1 - panic ? No.!
1011		 * option 2 - shutdown file systems
1012		 *	      associated with log ?
1013		 * option 3 - extend log ?
1014		 * option 4 - second chance
1015		 *
1016		 * mark log wrapped, and continue.
1017		 * when all active transactions are completed,
1018		 * mark log valid for recovery.
1019		 * if crashed during invalid state, log state
1020		 * implies invalid log, forcing fsck().
1021		 */
1022		/* mark log state log wrap in log superblock */
1023		/* log->state = LOGWRAP; */
1024
1025		/* reset sync point computation */
1026		log->syncpt = log->sync = lsn;
1027		log->nextsync = delta;
1028	} else
1029		/* next syncpt trigger = written + more */
1030		log->nextsync = written + more;
1031
1032	/* if number of bytes written from last sync point is more
1033	 * than 1/4 of the log size, stop new transactions from
1034	 * starting until all current transactions are completed
1035	 * by setting syncbarrier flag.
1036	 */
1037	if (!test_bit(log_SYNCBARRIER, &log->flag) &&
1038	    (written > LOGSYNC_BARRIER(logsize)) && log->active) {
1039		set_bit(log_SYNCBARRIER, &log->flag);
1040		jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn,
1041			 log->syncpt);
1042		/*
1043		 * We may have to initiate group commit
1044		 */
1045		jfs_flush_journal(log, 0);
1046	}
1047
1048	return lsn;
1049}
1050
1051/*
1052 * NAME:	jfs_syncpt
1053 *
1054 * FUNCTION:	write log SYNCPT record for specified log
1055 *
1056 * PARAMETERS:	log	  - log structure
1057 *		hard_sync - set to 1 to force metadata to be written
1058 */
1059void jfs_syncpt(struct jfs_log *log, int hard_sync)
1060{	LOG_LOCK(log);
1061	if (!test_bit(log_QUIESCE, &log->flag))
1062		lmLogSync(log, hard_sync);
1063	LOG_UNLOCK(log);
1064}
1065
1066/*
1067 * NAME:	lmLogOpen()
1068 *
1069 * FUNCTION:	open the log on first open;
1070 *	insert filesystem in the active list of the log.
1071 *
1072 * PARAMETER:	ipmnt	- file system mount inode
1073 *		iplog	- log inode (out)
1074 *
1075 * RETURN:
1076 *
1077 * serialization:
1078 */
1079int lmLogOpen(struct super_block *sb)
1080{
1081	int rc;
1082	struct block_device *bdev;
1083	struct jfs_log *log;
1084	struct jfs_sb_info *sbi = JFS_SBI(sb);
1085
1086	if (sbi->flag & JFS_NOINTEGRITY)
1087		return open_dummy_log(sb);
1088
1089	if (sbi->mntflag & JFS_INLINELOG)
1090		return open_inline_log(sb);
1091
1092	mutex_lock(&jfs_log_mutex);
1093	list_for_each_entry(log, &jfs_external_logs, journal_list) {
1094		if (log->bdev->bd_dev == sbi->logdev) {
1095			if (memcmp(log->uuid, sbi->loguuid,
1096				   sizeof(log->uuid))) {
1097				jfs_warn("wrong uuid on JFS journal\n");
1098				mutex_unlock(&jfs_log_mutex);
1099				return -EINVAL;
1100			}
1101			/*
1102			 * add file system to log active file system list
1103			 */
1104			if ((rc = lmLogFileSystem(log, sbi, 1))) {
1105				mutex_unlock(&jfs_log_mutex);
1106				return rc;
1107			}
1108			goto journal_found;
1109		}
1110	}
1111
1112	if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL))) {
1113		mutex_unlock(&jfs_log_mutex);
1114		return -ENOMEM;
1115	}
1116	INIT_LIST_HEAD(&log->sb_list);
1117	init_waitqueue_head(&log->syncwait);
1118
1119	/*
1120	 *	external log as separate logical volume
1121	 *
1122	 * file systems to log may have n-to-1 relationship;
1123	 */
1124
1125	bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
1126				 log);
1127	if (IS_ERR(bdev)) {
1128		rc = PTR_ERR(bdev);
1129		goto free;
1130	}
1131
1132	log->bdev = bdev;
1133	memcpy(log->uuid, sbi->loguuid, sizeof(log->uuid));
1134
1135	/*
1136	 * initialize log:
1137	 */
1138	if ((rc = lmLogInit(log)))
1139		goto close;
1140
1141	list_add(&log->journal_list, &jfs_external_logs);
1142
1143	/*
1144	 * add file system to log active file system list
1145	 */
1146	if ((rc = lmLogFileSystem(log, sbi, 1)))
1147		goto shutdown;
1148
1149journal_found:
1150	LOG_LOCK(log);
1151	list_add(&sbi->log_list, &log->sb_list);
1152	sbi->log = log;
1153	LOG_UNLOCK(log);
1154
1155	mutex_unlock(&jfs_log_mutex);
1156	return 0;
1157
1158	/*
1159	 *	unwind on error
1160	 */
1161      shutdown:		/* unwind lbmLogInit() */
1162	list_del(&log->journal_list);
1163	lbmLogShutdown(log);
1164
1165      close:		/* close external log device */
1166	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1167
1168      free:		/* free log descriptor */
1169	mutex_unlock(&jfs_log_mutex);
1170	kfree(log);
1171
1172	jfs_warn("lmLogOpen: exit(%d)", rc);
1173	return rc;
1174}
1175
1176static int open_inline_log(struct super_block *sb)
1177{
1178	struct jfs_log *log;
1179	int rc;
1180
1181	if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL)))
1182		return -ENOMEM;
1183	INIT_LIST_HEAD(&log->sb_list);
1184	init_waitqueue_head(&log->syncwait);
1185
1186	set_bit(log_INLINELOG, &log->flag);
1187	log->bdev = sb->s_bdev;
1188	log->base = addressPXD(&JFS_SBI(sb)->logpxd);
1189	log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
1190	    (L2LOGPSIZE - sb->s_blocksize_bits);
1191	log->l2bsize = sb->s_blocksize_bits;
1192	ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits);
1193
1194	/*
1195	 * initialize log.
1196	 */
1197	if ((rc = lmLogInit(log))) {
1198		kfree(log);
1199		jfs_warn("lmLogOpen: exit(%d)", rc);
1200		return rc;
1201	}
1202
1203	list_add(&JFS_SBI(sb)->log_list, &log->sb_list);
1204	JFS_SBI(sb)->log = log;
1205
1206	return rc;
1207}
1208
1209static int open_dummy_log(struct super_block *sb)
1210{
1211	int rc;
1212
1213	mutex_lock(&jfs_log_mutex);
1214	if (!dummy_log) {
1215		dummy_log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL);
1216		if (!dummy_log) {
1217			mutex_unlock(&jfs_log_mutex);
1218			return -ENOMEM;
1219		}
1220		INIT_LIST_HEAD(&dummy_log->sb_list);
1221		init_waitqueue_head(&dummy_log->syncwait);
1222		dummy_log->no_integrity = 1;
1223		/* Make up some stuff */
1224		dummy_log->base = 0;
1225		dummy_log->size = 1024;
1226		rc = lmLogInit(dummy_log);
1227		if (rc) {
1228			kfree(dummy_log);
1229			dummy_log = NULL;
1230			mutex_unlock(&jfs_log_mutex);
1231			return rc;
1232		}
1233	}
1234
1235	LOG_LOCK(dummy_log);
1236	list_add(&JFS_SBI(sb)->log_list, &dummy_log->sb_list);
1237	JFS_SBI(sb)->log = dummy_log;
1238	LOG_UNLOCK(dummy_log);
1239	mutex_unlock(&jfs_log_mutex);
1240
1241	return 0;
1242}
1243
1244/*
1245 * NAME:	lmLogInit()
1246 *
1247 * FUNCTION:	log initialization at first log open.
1248 *
1249 *	logredo() (or logformat()) should have been run previously.
1250 *	initialize the log from log superblock.
1251 *	set the log state in the superblock to LOGMOUNT and
1252 *	write SYNCPT log record.
1253 *
1254 * PARAMETER:	log	- log structure
1255 *
1256 * RETURN:	0	- if ok
1257 *		-EINVAL	- bad log magic number or superblock dirty
1258 *		error returned from logwait()
1259 *
1260 * serialization: single first open thread
1261 */
1262int lmLogInit(struct jfs_log * log)
1263{
1264	int rc = 0;
1265	struct lrd lrd;
1266	struct logsuper *logsuper;
1267	struct lbuf *bpsuper;
1268	struct lbuf *bp;
1269	struct logpage *lp;
1270	int lsn = 0;
1271
1272	jfs_info("lmLogInit: log:0x%p", log);
1273
1274	/* initialize the group commit serialization lock */
1275	LOGGC_LOCK_INIT(log);
1276
1277	/* allocate/initialize the log write serialization lock */
1278	LOG_LOCK_INIT(log);
1279
1280	LOGSYNC_LOCK_INIT(log);
1281
1282	INIT_LIST_HEAD(&log->synclist);
1283
1284	INIT_LIST_HEAD(&log->cqueue);
1285	log->flush_tblk = NULL;
1286
1287	log->count = 0;
1288
1289	/*
1290	 * initialize log i/o
1291	 */
1292	if ((rc = lbmLogInit(log)))
1293		return rc;
1294
1295	if (!test_bit(log_INLINELOG, &log->flag))
1296		log->l2bsize = L2LOGPSIZE;
1297
1298	/* check for disabled journaling to disk */
1299	if (log->no_integrity) {
1300		/*
1301		 * Journal pages will still be filled.  When the time comes
1302		 * to actually do the I/O, the write is not done, and the
1303		 * endio routine is called directly.
1304		 */
1305		bp = lbmAllocate(log , 0);
1306		log->bp = bp;
1307		bp->l_pn = bp->l_eor = 0;
1308	} else {
1309		/*
1310		 * validate log superblock
1311		 */
1312		if ((rc = lbmRead(log, 1, &bpsuper)))
1313			goto errout10;
1314
1315		logsuper = (struct logsuper *) bpsuper->l_ldata;
1316
1317		if (logsuper->magic != cpu_to_le32(LOGMAGIC)) {
1318			jfs_warn("*** Log Format Error ! ***");
1319			rc = -EINVAL;
1320			goto errout20;
1321		}
1322
1323		/* logredo() should have been run successfully. */
1324		if (logsuper->state != cpu_to_le32(LOGREDONE)) {
1325			jfs_warn("*** Log Is Dirty ! ***");
1326			rc = -EINVAL;
1327			goto errout20;
1328		}
1329
1330		/* initialize log from log superblock */
1331		if (test_bit(log_INLINELOG,&log->flag)) {
1332			if (log->size != le32_to_cpu(logsuper->size)) {
1333				rc = -EINVAL;
1334				goto errout20;
1335			}
1336			jfs_info("lmLogInit: inline log:0x%p base:0x%Lx "
1337				 "size:0x%x", log,
1338				 (unsigned long long) log->base, log->size);
1339		} else {
1340			if (memcmp(logsuper->uuid, log->uuid, 16)) {
1341				jfs_warn("wrong uuid on JFS log device");
1342				goto errout20;
1343			}
1344			log->size = le32_to_cpu(logsuper->size);
1345			log->l2bsize = le32_to_cpu(logsuper->l2bsize);
1346			jfs_info("lmLogInit: external log:0x%p base:0x%Lx "
1347				 "size:0x%x", log,
1348				 (unsigned long long) log->base, log->size);
1349		}
1350
1351		log->page = le32_to_cpu(logsuper->end) / LOGPSIZE;
1352		log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page);
1353
1354		/*
1355		 * initialize for log append write mode
1356		 */
1357		/* establish current/end-of-log page/buffer */
1358		if ((rc = lbmRead(log, log->page, &bp)))
1359			goto errout20;
1360
1361		lp = (struct logpage *) bp->l_ldata;
1362
1363		jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d",
1364			 le32_to_cpu(logsuper->end), log->page, log->eor,
1365			 le16_to_cpu(lp->h.eor));
1366
1367		log->bp = bp;
1368		bp->l_pn = log->page;
1369		bp->l_eor = log->eor;
1370
1371		/* if current page is full, move on to next page */
1372		if (log->eor >= LOGPSIZE - LOGPTLRSIZE)
1373			lmNextPage(log);
1374
1375		/*
1376		 * initialize log syncpoint
1377		 */
1378		/*
1379		 * write the first SYNCPT record with syncpoint = 0
1380		 * (i.e., log redo up to HERE !);
1381		 * remove current page from lbm write queue at end of pageout
1382		 * (to write log superblock update), but do not release to
1383		 * freelist;
1384		 */
1385		lrd.logtid = 0;
1386		lrd.backchain = 0;
1387		lrd.type = cpu_to_le16(LOG_SYNCPT);
1388		lrd.length = 0;
1389		lrd.log.syncpt.sync = 0;
1390		lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1391		bp = log->bp;
1392		bp->l_ceor = bp->l_eor;
1393		lp = (struct logpage *) bp->l_ldata;
1394		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1395		lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0);
1396		if ((rc = lbmIOWait(bp, 0)))
1397			goto errout30;
1398
1399		/*
1400		 * update/write superblock
1401		 */
1402		logsuper->state = cpu_to_le32(LOGMOUNT);
1403		log->serial = le32_to_cpu(logsuper->serial) + 1;
1404		logsuper->serial = cpu_to_le32(log->serial);
1405		lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1406		if ((rc = lbmIOWait(bpsuper, lbmFREE)))
1407			goto errout30;
1408	}
1409
1410	/* initialize logsync parameters */
1411	log->logsize = (log->size - 2) << L2LOGPSIZE;
1412	log->lsn = lsn;
1413	log->syncpt = lsn;
1414	log->sync = log->syncpt;
1415	log->nextsync = LOGSYNC_DELTA(log->logsize);
1416
1417	jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x",
1418		 log->lsn, log->syncpt, log->sync);
1419
1420	/*
1421	 * initialize for lazy/group commit
1422	 */
1423	log->clsn = lsn;
1424
1425	return 0;
1426
1427	/*
1428	 *	unwind on error
1429	 */
1430      errout30:		/* release log page */
1431	log->wqueue = NULL;
1432	bp->l_wqnext = NULL;
1433	lbmFree(bp);
1434
1435      errout20:		/* release log superblock */
1436	lbmFree(bpsuper);
1437
1438      errout10:		/* unwind lbmLogInit() */
1439	lbmLogShutdown(log);
1440
1441	jfs_warn("lmLogInit: exit(%d)", rc);
1442	return rc;
1443}
1444
1445
1446/*
1447 * NAME:	lmLogClose()
1448 *
1449 * FUNCTION:	remove file system <ipmnt> from active list of log <iplog>
1450 *		and close it on last close.
1451 *
1452 * PARAMETER:	sb	- superblock
1453 *
1454 * RETURN:	errors from subroutines
1455 *
1456 * serialization:
1457 */
1458int lmLogClose(struct super_block *sb)
1459{
1460	struct jfs_sb_info *sbi = JFS_SBI(sb);
1461	struct jfs_log *log = sbi->log;
1462	struct block_device *bdev;
1463	int rc = 0;
1464
1465	jfs_info("lmLogClose: log:0x%p", log);
1466
1467	mutex_lock(&jfs_log_mutex);
1468	LOG_LOCK(log);
1469	list_del(&sbi->log_list);
1470	LOG_UNLOCK(log);
1471	sbi->log = NULL;
1472
1473	/*
1474	 * We need to make sure all of the "written" metapages
1475	 * actually make it to disk
1476	 */
1477	sync_blockdev(sb->s_bdev);
1478
1479	if (test_bit(log_INLINELOG, &log->flag)) {
1480		/*
1481		 *	in-line log in host file system
1482		 */
1483		rc = lmLogShutdown(log);
1484		kfree(log);
1485		goto out;
1486	}
1487
1488	if (!log->no_integrity)
1489		lmLogFileSystem(log, sbi, 0);
1490
1491	if (!list_empty(&log->sb_list))
1492		goto out;
1493
1494	/*
1495	 * TODO: ensure that the dummy_log is in a state to allow
1496	 * lbmLogShutdown to deallocate all the buffers and call
1497	 * kfree against dummy_log.  For now, leave dummy_log & its
1498	 * buffers in memory, and resuse if another no-integrity mount
1499	 * is requested.
1500	 */
1501	if (log->no_integrity)
1502		goto out;
1503
1504	/*
1505	 *	external log as separate logical volume
1506	 */
1507	list_del(&log->journal_list);
1508	bdev = log->bdev;
1509	rc = lmLogShutdown(log);
1510
1511	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1512
1513	kfree(log);
1514
1515      out:
1516	mutex_unlock(&jfs_log_mutex);
1517	jfs_info("lmLogClose: exit(%d)", rc);
1518	return rc;
1519}
1520
1521
1522/*
1523 * NAME:	jfs_flush_journal()
1524 *
1525 * FUNCTION:	initiate write of any outstanding transactions to the journal
1526 *		and optionally wait until they are all written to disk
1527 *
1528 *		wait == 0  flush until latest txn is committed, don't wait
1529 *		wait == 1  flush until latest txn is committed, wait
1530 *		wait > 1   flush until all txn's are complete, wait
1531 */
1532void jfs_flush_journal(struct jfs_log *log, int wait)
1533{
1534	int i;
1535	struct tblock *target = NULL;
1536
1537	/* jfs_write_inode may call us during read-only mount */
1538	if (!log)
1539		return;
1540
1541	jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait);
1542
1543	LOGGC_LOCK(log);
1544
1545	if (!list_empty(&log->cqueue)) {
1546		/*
1547		 * This ensures that we will keep writing to the journal as long
1548		 * as there are unwritten commit records
1549		 */
1550		target = list_entry(log->cqueue.prev, struct tblock, cqueue);
1551
1552		if (test_bit(log_FLUSH, &log->flag)) {
1553			/*
1554			 * We're already flushing.
1555			 * if flush_tblk is NULL, we are flushing everything,
1556			 * so leave it that way.  Otherwise, update it to the
1557			 * latest transaction
1558			 */
1559			if (log->flush_tblk)
1560				log->flush_tblk = target;
1561		} else {
1562			/* Only flush until latest transaction is committed */
1563			log->flush_tblk = target;
1564			set_bit(log_FLUSH, &log->flag);
1565
1566			/*
1567			 * Initiate I/O on outstanding transactions
1568			 */
1569			if (!(log->cflag & logGC_PAGEOUT)) {
1570				log->cflag |= logGC_PAGEOUT;
1571				lmGCwrite(log, 0);
1572			}
1573		}
1574	}
1575	if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) {
1576		/* Flush until all activity complete */
1577		set_bit(log_FLUSH, &log->flag);
1578		log->flush_tblk = NULL;
1579	}
1580
1581	if (wait && target && !(target->flag & tblkGC_COMMITTED)) {
1582		DECLARE_WAITQUEUE(__wait, current);
1583
1584		add_wait_queue(&target->gcwait, &__wait);
1585		set_current_state(TASK_UNINTERRUPTIBLE);
1586		LOGGC_UNLOCK(log);
1587		schedule();
1588		LOGGC_LOCK(log);
1589		remove_wait_queue(&target->gcwait, &__wait);
1590	}
1591	LOGGC_UNLOCK(log);
1592
1593	if (wait < 2)
1594		return;
1595
1596	write_special_inodes(log, filemap_fdatawrite);
1597
1598	/*
1599	 * If there was recent activity, we may need to wait
1600	 * for the lazycommit thread to catch up
1601	 */
1602	if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) {
1603		for (i = 0; i < 200; i++) {	/* Too much? */
1604			msleep(250);
1605			write_special_inodes(log, filemap_fdatawrite);
1606			if (list_empty(&log->cqueue) &&
1607			    list_empty(&log->synclist))
1608				break;
1609		}
1610	}
1611	assert(list_empty(&log->cqueue));
1612
1613#ifdef CONFIG_JFS_DEBUG
1614	if (!list_empty(&log->synclist)) {
1615		struct logsyncblk *lp;
1616
1617		printk(KERN_ERR "jfs_flush_journal: synclist not empty\n");
1618		list_for_each_entry(lp, &log->synclist, synclist) {
1619			if (lp->xflag & COMMIT_PAGE) {
1620				struct metapage *mp = (struct metapage *)lp;
1621				print_hex_dump(KERN_ERR, "metapage: ",
1622					       DUMP_PREFIX_ADDRESS, 16, 4,
1623					       mp, sizeof(struct metapage), 0);
1624				print_hex_dump(KERN_ERR, "page: ",
1625					       DUMP_PREFIX_ADDRESS, 16,
1626					       sizeof(long), mp->page,
1627					       sizeof(struct page), 0);
1628			} else
1629				print_hex_dump(KERN_ERR, "tblock:",
1630					       DUMP_PREFIX_ADDRESS, 16, 4,
1631					       lp, sizeof(struct tblock), 0);
1632		}
1633	}
1634#else
1635	WARN_ON(!list_empty(&log->synclist));
1636#endif
1637	clear_bit(log_FLUSH, &log->flag);
1638}
1639
1640/*
1641 * NAME:	lmLogShutdown()
1642 *
1643 * FUNCTION:	log shutdown at last LogClose().
1644 *
1645 *		write log syncpt record.
1646 *		update super block to set redone flag to 0.
1647 *
1648 * PARAMETER:	log	- log inode
1649 *
1650 * RETURN:	0	- success
1651 *
1652 * serialization: single last close thread
1653 */
1654int lmLogShutdown(struct jfs_log * log)
1655{
1656	int rc;
1657	struct lrd lrd;
1658	int lsn;
1659	struct logsuper *logsuper;
1660	struct lbuf *bpsuper;
1661	struct lbuf *bp;
1662	struct logpage *lp;
1663
1664	jfs_info("lmLogShutdown: log:0x%p", log);
1665
1666	jfs_flush_journal(log, 2);
1667
1668	/*
1669	 * write the last SYNCPT record with syncpoint = 0
1670	 * (i.e., log redo up to HERE !)
1671	 */
1672	lrd.logtid = 0;
1673	lrd.backchain = 0;
1674	lrd.type = cpu_to_le16(LOG_SYNCPT);
1675	lrd.length = 0;
1676	lrd.log.syncpt.sync = 0;
1677
1678	lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1679	bp = log->bp;
1680	lp = (struct logpage *) bp->l_ldata;
1681	lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1682	lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0);
1683	lbmIOWait(log->bp, lbmFREE);
1684	log->bp = NULL;
1685
1686	/*
1687	 * synchronous update log superblock
1688	 * mark log state as shutdown cleanly
1689	 * (i.e., Log does not need to be replayed).
1690	 */
1691	if ((rc = lbmRead(log, 1, &bpsuper)))
1692		goto out;
1693
1694	logsuper = (struct logsuper *) bpsuper->l_ldata;
1695	logsuper->state = cpu_to_le32(LOGREDONE);
1696	logsuper->end = cpu_to_le32(lsn);
1697	lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1698	rc = lbmIOWait(bpsuper, lbmFREE);
1699
1700	jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d",
1701		 lsn, log->page, log->eor);
1702
1703      out:
1704	/*
1705	 * shutdown per log i/o
1706	 */
1707	lbmLogShutdown(log);
1708
1709	if (rc) {
1710		jfs_warn("lmLogShutdown: exit(%d)", rc);
1711	}
1712	return rc;
1713}
1714
1715
1716/*
1717 * NAME:	lmLogFileSystem()
1718 *
1719 * FUNCTION:	insert (<activate> = true)/remove (<activate> = false)
1720 *	file system into/from log active file system list.
1721 *
1722 * PARAMETE:	log	- pointer to logs inode.
1723 *		fsdev	- kdev_t of filesystem.
1724 *		serial	- pointer to returned log serial number
1725 *		activate - insert/remove device from active list.
1726 *
1727 * RETURN:	0	- success
1728 *		errors returned by vms_iowait().
1729 */
1730static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
1731			   int activate)
1732{
1733	int rc = 0;
1734	int i;
1735	struct logsuper *logsuper;
1736	struct lbuf *bpsuper;
1737	char *uuid = sbi->uuid;
1738
1739	/*
1740	 * insert/remove file system device to log active file system list.
1741	 */
1742	if ((rc = lbmRead(log, 1, &bpsuper)))
1743		return rc;
1744
1745	logsuper = (struct logsuper *) bpsuper->l_ldata;
1746	if (activate) {
1747		for (i = 0; i < MAX_ACTIVE; i++)
1748			if (!memcmp(logsuper->active[i].uuid, NULL_UUID, 16)) {
1749				memcpy(logsuper->active[i].uuid, uuid, 16);
1750				sbi->aggregate = i;
1751				break;
1752			}
1753		if (i == MAX_ACTIVE) {
1754			jfs_warn("Too many file systems sharing journal!");
1755			lbmFree(bpsuper);
1756			return -EMFILE;	/* Is there a better rc? */
1757		}
1758	} else {
1759		for (i = 0; i < MAX_ACTIVE; i++)
1760			if (!memcmp(logsuper->active[i].uuid, uuid, 16)) {
1761				memcpy(logsuper->active[i].uuid, NULL_UUID, 16);
1762				break;
1763			}
1764		if (i == MAX_ACTIVE) {
1765			jfs_warn("Somebody stomped on the journal!");
1766			lbmFree(bpsuper);
1767			return -EIO;
1768		}
1769
1770	}
1771
1772	/*
1773	 * synchronous write log superblock:
1774	 *
1775	 * write sidestream bypassing write queue:
1776	 * at file system mount, log super block is updated for
1777	 * activation of the file system before any log record
1778	 * (MOUNT record) of the file system, and at file system
1779	 * unmount, all meta data for the file system has been
1780	 * flushed before log super block is updated for deactivation
1781	 * of the file system.
1782	 */
1783	lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1784	rc = lbmIOWait(bpsuper, lbmFREE);
1785
1786	return rc;
1787}
1788
1789/*
1790 *		log buffer manager (lbm)
1791 *		------------------------
1792 *
1793 * special purpose buffer manager supporting log i/o requirements.
1794 *
1795 * per log write queue:
1796 * log pageout occurs in serial order by fifo write queue and
1797 * restricting to a single i/o in pregress at any one time.
1798 * a circular singly-linked list
1799 * (log->wrqueue points to the tail, and buffers are linked via
1800 * bp->wrqueue field), and
1801 * maintains log page in pageout ot waiting for pageout in serial pageout.
1802 */
1803
1804/*
1805 *	lbmLogInit()
1806 *
1807 * initialize per log I/O setup at lmLogInit()
1808 */
1809static int lbmLogInit(struct jfs_log * log)
1810{				/* log inode */
1811	int i;
1812	struct lbuf *lbuf;
1813
1814	jfs_info("lbmLogInit: log:0x%p", log);
1815
1816	/* initialize current buffer cursor */
1817	log->bp = NULL;
1818
1819	/* initialize log device write queue */
1820	log->wqueue = NULL;
1821
1822	/*
1823	 * Each log has its own buffer pages allocated to it.  These are
1824	 * not managed by the page cache.  This ensures that a transaction
1825	 * writing to the log does not block trying to allocate a page from
1826	 * the page cache (for the log).  This would be bad, since page
1827	 * allocation waits on the kswapd thread that may be committing inodes
1828	 * which would cause log activity.  Was that clear?  I'm trying to
1829	 * avoid deadlock here.
1830	 */
1831	init_waitqueue_head(&log->free_wait);
1832
1833	log->lbuf_free = NULL;
1834
1835	for (i = 0; i < LOGPAGES;) {
1836		char *buffer;
1837		uint offset;
1838		struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
1839
1840		if (!page)
1841			goto error;
1842		buffer = page_address(page);
1843		for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) {
1844			lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL);
1845			if (lbuf == NULL) {
1846				if (offset == 0)
1847					__free_page(page);
1848				goto error;
1849			}
1850			if (offset) /* we already have one reference */
1851				get_page(page);
1852			lbuf->l_offset = offset;
1853			lbuf->l_ldata = buffer + offset;
1854			lbuf->l_page = page;
1855			lbuf->l_log = log;
1856			init_waitqueue_head(&lbuf->l_ioevent);
1857
1858			lbuf->l_freelist = log->lbuf_free;
1859			log->lbuf_free = lbuf;
1860			i++;
1861		}
1862	}
1863
1864	return (0);
1865
1866      error:
1867	lbmLogShutdown(log);
1868	return -ENOMEM;
1869}
1870
1871
1872/*
1873 *	lbmLogShutdown()
1874 *
1875 * finalize per log I/O setup at lmLogShutdown()
1876 */
1877static void lbmLogShutdown(struct jfs_log * log)
1878{
1879	struct lbuf *lbuf;
1880
1881	jfs_info("lbmLogShutdown: log:0x%p", log);
1882
1883	lbuf = log->lbuf_free;
1884	while (lbuf) {
1885		struct lbuf *next = lbuf->l_freelist;
1886		__free_page(lbuf->l_page);
1887		kfree(lbuf);
1888		lbuf = next;
1889	}
1890}
1891
1892
1893/*
1894 *	lbmAllocate()
1895 *
1896 * allocate an empty log buffer
1897 */
1898static struct lbuf *lbmAllocate(struct jfs_log * log, int pn)
1899{
1900	struct lbuf *bp;
1901	unsigned long flags;
1902
1903	/*
1904	 * recycle from log buffer freelist if any
1905	 */
1906	LCACHE_LOCK(flags);
1907	LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags);
1908	log->lbuf_free = bp->l_freelist;
1909	LCACHE_UNLOCK(flags);
1910
1911	bp->l_flag = 0;
1912
1913	bp->l_wqnext = NULL;
1914	bp->l_freelist = NULL;
1915
1916	bp->l_pn = pn;
1917	bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize));
1918	bp->l_ceor = 0;
1919
1920	return bp;
1921}
1922
1923
1924/*
1925 *	lbmFree()
1926 *
1927 * release a log buffer to freelist
1928 */
1929static void lbmFree(struct lbuf * bp)
1930{
1931	unsigned long flags;
1932
1933	LCACHE_LOCK(flags);
1934
1935	lbmfree(bp);
1936
1937	LCACHE_UNLOCK(flags);
1938}
1939
1940static void lbmfree(struct lbuf * bp)
1941{
1942	struct jfs_log *log = bp->l_log;
1943
1944	assert(bp->l_wqnext == NULL);
1945
1946	/*
1947	 * return the buffer to head of freelist
1948	 */
1949	bp->l_freelist = log->lbuf_free;
1950	log->lbuf_free = bp;
1951
1952	wake_up(&log->free_wait);
1953	return;
1954}
1955
1956
1957/*
1958 * NAME:	lbmRedrive
1959 *
1960 * FUNCTION:	add a log buffer to the log redrive list
1961 *
1962 * PARAMETER:
1963 *	bp	- log buffer
1964 *
1965 * NOTES:
1966 *	Takes log_redrive_lock.
1967 */
1968static inline void lbmRedrive(struct lbuf *bp)
1969{
1970	unsigned long flags;
1971
1972	spin_lock_irqsave(&log_redrive_lock, flags);
1973	bp->l_redrive_next = log_redrive_list;
1974	log_redrive_list = bp;
1975	spin_unlock_irqrestore(&log_redrive_lock, flags);
1976
1977	wake_up_process(jfsIOthread);
1978}
1979
1980
1981/*
1982 *	lbmRead()
1983 */
1984static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
1985{
1986	struct bio *bio;
1987	struct lbuf *bp;
1988
1989	/*
1990	 * allocate a log buffer
1991	 */
1992	*bpp = bp = lbmAllocate(log, pn);
1993	jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn);
1994
1995	bp->l_flag |= lbmREAD;
1996
1997	bio = bio_alloc(GFP_NOFS, 1);
1998
1999	bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
2000	bio->bi_bdev = log->bdev;
2001
2002	bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
2003	BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
2004
2005	bio->bi_end_io = lbmIODone;
2006	bio->bi_private = bp;
 
2007	/*check if journaling to disk has been disabled*/
2008	if (log->no_integrity) {
2009		bio->bi_iter.bi_size = 0;
2010		lbmIODone(bio);
2011	} else {
2012		submit_bio(READ_SYNC, bio);
2013	}
2014
2015	wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
2016
2017	return 0;
2018}
2019
2020
2021/*
2022 *	lbmWrite()
2023 *
2024 * buffer at head of pageout queue stays after completion of
2025 * partial-page pageout and redriven by explicit initiation of
2026 * pageout by caller until full-page pageout is completed and
2027 * released.
2028 *
2029 * device driver i/o done redrives pageout of new buffer at
2030 * head of pageout queue when current buffer at head of pageout
2031 * queue is released at the completion of its full-page pageout.
2032 *
2033 * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit().
2034 * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone()
2035 */
2036static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
2037		     int cant_block)
2038{
2039	struct lbuf *tail;
2040	unsigned long flags;
2041
2042	jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn);
2043
2044	/* map the logical block address to physical block address */
2045	bp->l_blkno =
2046	    log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2047
2048	LCACHE_LOCK(flags);		/* disable+lock */
2049
2050	/*
2051	 * initialize buffer for device driver
2052	 */
2053	bp->l_flag = flag;
2054
2055	/*
2056	 *	insert bp at tail of write queue associated with log
2057	 *
2058	 * (request is either for bp already/currently at head of queue
2059	 * or new bp to be inserted at tail)
2060	 */
2061	tail = log->wqueue;
2062
2063	/* is buffer not already on write queue ? */
2064	if (bp->l_wqnext == NULL) {
2065		/* insert at tail of wqueue */
2066		if (tail == NULL) {
2067			log->wqueue = bp;
2068			bp->l_wqnext = bp;
2069		} else {
2070			log->wqueue = bp;
2071			bp->l_wqnext = tail->l_wqnext;
2072			tail->l_wqnext = bp;
2073		}
2074
2075		tail = bp;
2076	}
2077
2078	/* is buffer at head of wqueue and for write ? */
2079	if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) {
2080		LCACHE_UNLOCK(flags);	/* unlock+enable */
2081		return;
2082	}
2083
2084	LCACHE_UNLOCK(flags);	/* unlock+enable */
2085
2086	if (cant_block)
2087		lbmRedrive(bp);
2088	else if (flag & lbmSYNC)
2089		lbmStartIO(bp);
2090	else {
2091		LOGGC_UNLOCK(log);
2092		lbmStartIO(bp);
2093		LOGGC_LOCK(log);
2094	}
2095}
2096
2097
2098/*
2099 *	lbmDirectWrite()
2100 *
2101 * initiate pageout bypassing write queue for sidestream
2102 * (e.g., log superblock) write;
2103 */
2104static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
2105{
2106	jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x",
2107		 bp, flag, bp->l_pn);
2108
2109	/*
2110	 * initialize buffer for device driver
2111	 */
2112	bp->l_flag = flag | lbmDIRECT;
2113
2114	/* map the logical block address to physical block address */
2115	bp->l_blkno =
2116	    log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2117
2118	/*
2119	 *	initiate pageout of the page
2120	 */
2121	lbmStartIO(bp);
2122}
2123
2124
2125/*
2126 * NAME:	lbmStartIO()
2127 *
2128 * FUNCTION:	Interface to DD strategy routine
2129 *
2130 * RETURN:	none
2131 *
2132 * serialization: LCACHE_LOCK() is NOT held during log i/o;
2133 */
2134static void lbmStartIO(struct lbuf * bp)
2135{
2136	struct bio *bio;
2137	struct jfs_log *log = bp->l_log;
2138
2139	jfs_info("lbmStartIO\n");
2140
2141	bio = bio_alloc(GFP_NOFS, 1);
2142	bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
2143	bio->bi_bdev = log->bdev;
2144
2145	bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
2146	BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
2147
2148	bio->bi_end_io = lbmIODone;
2149	bio->bi_private = bp;
 
2150
2151	/* check if journaling to disk has been disabled */
2152	if (log->no_integrity) {
2153		bio->bi_iter.bi_size = 0;
2154		lbmIODone(bio);
2155	} else {
2156		submit_bio(WRITE_SYNC, bio);
2157		INCREMENT(lmStat.submitted);
2158	}
2159}
2160
2161
2162/*
2163 *	lbmIOWait()
2164 */
2165static int lbmIOWait(struct lbuf * bp, int flag)
2166{
2167	unsigned long flags;
2168	int rc = 0;
2169
2170	jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2171
2172	LCACHE_LOCK(flags);		/* disable+lock */
2173
2174	LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags);
2175
2176	rc = (bp->l_flag & lbmERROR) ? -EIO : 0;
2177
2178	if (flag & lbmFREE)
2179		lbmfree(bp);
2180
2181	LCACHE_UNLOCK(flags);	/* unlock+enable */
2182
2183	jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2184	return rc;
2185}
2186
2187/*
2188 *	lbmIODone()
2189 *
2190 * executed at INTIODONE level
2191 */
2192static void lbmIODone(struct bio *bio)
2193{
2194	struct lbuf *bp = bio->bi_private;
2195	struct lbuf *nextbp, *tail;
2196	struct jfs_log *log;
2197	unsigned long flags;
2198
2199	/*
2200	 * get back jfs buffer bound to the i/o buffer
2201	 */
2202	jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag);
2203
2204	LCACHE_LOCK(flags);		/* disable+lock */
2205
2206	bp->l_flag |= lbmDONE;
2207
2208	if (bio->bi_error) {
2209		bp->l_flag |= lbmERROR;
2210
2211		jfs_err("lbmIODone: I/O error in JFS log");
2212	}
2213
2214	bio_put(bio);
2215
2216	/*
2217	 *	pagein completion
2218	 */
2219	if (bp->l_flag & lbmREAD) {
2220		bp->l_flag &= ~lbmREAD;
2221
2222		LCACHE_UNLOCK(flags);	/* unlock+enable */
2223
2224		/* wakeup I/O initiator */
2225		LCACHE_WAKEUP(&bp->l_ioevent);
2226
2227		return;
2228	}
2229
2230	/*
2231	 *	pageout completion
2232	 *
2233	 * the bp at the head of write queue has completed pageout.
2234	 *
2235	 * if single-commit/full-page pageout, remove the current buffer
2236	 * from head of pageout queue, and redrive pageout with
2237	 * the new buffer at head of pageout queue;
2238	 * otherwise, the partial-page pageout buffer stays at
2239	 * the head of pageout queue to be redriven for pageout
2240	 * by lmGroupCommit() until full-page pageout is completed.
2241	 */
2242	bp->l_flag &= ~lbmWRITE;
2243	INCREMENT(lmStat.pagedone);
2244
2245	/* update committed lsn */
2246	log = bp->l_log;
2247	log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor;
2248
2249	if (bp->l_flag & lbmDIRECT) {
2250		LCACHE_WAKEUP(&bp->l_ioevent);
2251		LCACHE_UNLOCK(flags);
2252		return;
2253	}
2254
2255	tail = log->wqueue;
2256
2257	/* single element queue */
2258	if (bp == tail) {
2259		/* remove head buffer of full-page pageout
2260		 * from log device write queue
2261		 */
2262		if (bp->l_flag & lbmRELEASE) {
2263			log->wqueue = NULL;
2264			bp->l_wqnext = NULL;
2265		}
2266	}
2267	/* multi element queue */
2268	else {
2269		/* remove head buffer of full-page pageout
2270		 * from log device write queue
2271		 */
2272		if (bp->l_flag & lbmRELEASE) {
2273			nextbp = tail->l_wqnext = bp->l_wqnext;
2274			bp->l_wqnext = NULL;
2275
2276			/*
2277			 * redrive pageout of next page at head of write queue:
2278			 * redrive next page without any bound tblk
2279			 * (i.e., page w/o any COMMIT records), or
2280			 * first page of new group commit which has been
2281			 * queued after current page (subsequent pageout
2282			 * is performed synchronously, except page without
2283			 * any COMMITs) by lmGroupCommit() as indicated
2284			 * by lbmWRITE flag;
2285			 */
2286			if (nextbp->l_flag & lbmWRITE) {
2287				/*
2288				 * We can't do the I/O at interrupt time.
2289				 * The jfsIO thread can do it
2290				 */
2291				lbmRedrive(nextbp);
2292			}
2293		}
2294	}
2295
2296	/*
2297	 *	synchronous pageout:
2298	 *
2299	 * buffer has not necessarily been removed from write queue
2300	 * (e.g., synchronous write of partial-page with COMMIT):
2301	 * leave buffer for i/o initiator to dispose
2302	 */
2303	if (bp->l_flag & lbmSYNC) {
2304		LCACHE_UNLOCK(flags);	/* unlock+enable */
2305
2306		/* wakeup I/O initiator */
2307		LCACHE_WAKEUP(&bp->l_ioevent);
2308	}
2309
2310	/*
2311	 *	Group Commit pageout:
2312	 */
2313	else if (bp->l_flag & lbmGC) {
2314		LCACHE_UNLOCK(flags);
2315		lmPostGC(bp);
2316	}
2317
2318	/*
2319	 *	asynchronous pageout:
2320	 *
2321	 * buffer must have been removed from write queue:
2322	 * insert buffer at head of freelist where it can be recycled
2323	 */
2324	else {
2325		assert(bp->l_flag & lbmRELEASE);
2326		assert(bp->l_flag & lbmFREE);
2327		lbmfree(bp);
2328
2329		LCACHE_UNLOCK(flags);	/* unlock+enable */
2330	}
2331}
2332
2333int jfsIOWait(void *arg)
2334{
2335	struct lbuf *bp;
2336
2337	do {
2338		spin_lock_irq(&log_redrive_lock);
2339		while ((bp = log_redrive_list)) {
2340			log_redrive_list = bp->l_redrive_next;
2341			bp->l_redrive_next = NULL;
2342			spin_unlock_irq(&log_redrive_lock);
2343			lbmStartIO(bp);
2344			spin_lock_irq(&log_redrive_lock);
2345		}
2346
2347		if (freezing(current)) {
2348			spin_unlock_irq(&log_redrive_lock);
2349			try_to_freeze();
2350		} else {
2351			set_current_state(TASK_INTERRUPTIBLE);
2352			spin_unlock_irq(&log_redrive_lock);
2353			schedule();
2354		}
2355	} while (!kthread_should_stop());
2356
2357	jfs_info("jfsIOWait being killed!");
2358	return 0;
2359}
2360
2361/*
2362 * NAME:	lmLogFormat()/jfs_logform()
2363 *
2364 * FUNCTION:	format file system log
2365 *
2366 * PARAMETERS:
2367 *	log	- volume log
2368 *	logAddress - start address of log space in FS block
2369 *	logSize	- length of log space in FS block;
2370 *
2371 * RETURN:	0	- success
2372 *		-EIO	- i/o error
2373 *
2374 * XXX: We're synchronously writing one page at a time.  This needs to
2375 *	be improved by writing multiple pages at once.
2376 */
2377int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
2378{
2379	int rc = -EIO;
2380	struct jfs_sb_info *sbi;
2381	struct logsuper *logsuper;
2382	struct logpage *lp;
2383	int lspn;		/* log sequence page number */
2384	struct lrd *lrd_ptr;
2385	int npages = 0;
2386	struct lbuf *bp;
2387
2388	jfs_info("lmLogFormat: logAddress:%Ld logSize:%d",
2389		 (long long)logAddress, logSize);
2390
2391	sbi = list_entry(log->sb_list.next, struct jfs_sb_info, log_list);
2392
2393	/* allocate a log buffer */
2394	bp = lbmAllocate(log, 1);
2395
2396	npages = logSize >> sbi->l2nbperpage;
2397
2398	/*
2399	 *	log space:
2400	 *
2401	 * page 0 - reserved;
2402	 * page 1 - log superblock;
2403	 * page 2 - log data page: A SYNC log record is written
2404	 *	    into this page at logform time;
2405	 * pages 3-N - log data page: set to empty log data pages;
2406	 */
2407	/*
2408	 *	init log superblock: log page 1
2409	 */
2410	logsuper = (struct logsuper *) bp->l_ldata;
2411
2412	logsuper->magic = cpu_to_le32(LOGMAGIC);
2413	logsuper->version = cpu_to_le32(LOGVERSION);
2414	logsuper->state = cpu_to_le32(LOGREDONE);
2415	logsuper->flag = cpu_to_le32(sbi->mntflag);	/* ? */
2416	logsuper->size = cpu_to_le32(npages);
2417	logsuper->bsize = cpu_to_le32(sbi->bsize);
2418	logsuper->l2bsize = cpu_to_le32(sbi->l2bsize);
2419	logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE);
2420
2421	bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2422	bp->l_blkno = logAddress + sbi->nbperpage;
2423	lbmStartIO(bp);
2424	if ((rc = lbmIOWait(bp, 0)))
2425		goto exit;
2426
2427	/*
2428	 *	init pages 2 to npages-1 as log data pages:
2429	 *
2430	 * log page sequence number (lpsn) initialization:
2431	 *
2432	 * pn:   0     1     2     3                 n-1
2433	 *       +-----+-----+=====+=====+===.....===+=====+
2434	 * lspn:             N-1   0     1           N-2
2435	 *                   <--- N page circular file ---->
2436	 *
2437	 * the N (= npages-2) data pages of the log is maintained as
2438	 * a circular file for the log records;
2439	 * lpsn grows by 1 monotonically as each log page is written
2440	 * to the circular file of the log;
2441	 * and setLogpage() will not reset the page number even if
2442	 * the eor is equal to LOGPHDRSIZE. In order for binary search
2443	 * still work in find log end process, we have to simulate the
2444	 * log wrap situation at the log format time.
2445	 * The 1st log page written will have the highest lpsn. Then
2446	 * the succeeding log pages will have ascending order of
2447	 * the lspn starting from 0, ... (N-2)
2448	 */
2449	lp = (struct logpage *) bp->l_ldata;
2450	/*
2451	 * initialize 1st log page to be written: lpsn = N - 1,
2452	 * write a SYNCPT log record is written to this page
2453	 */
2454	lp->h.page = lp->t.page = cpu_to_le32(npages - 3);
2455	lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE);
2456
2457	lrd_ptr = (struct lrd *) &lp->data;
2458	lrd_ptr->logtid = 0;
2459	lrd_ptr->backchain = 0;
2460	lrd_ptr->type = cpu_to_le16(LOG_SYNCPT);
2461	lrd_ptr->length = 0;
2462	lrd_ptr->log.syncpt.sync = 0;
2463
2464	bp->l_blkno += sbi->nbperpage;
2465	bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2466	lbmStartIO(bp);
2467	if ((rc = lbmIOWait(bp, 0)))
2468		goto exit;
2469
2470	/*
2471	 *	initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
2472	 */
2473	for (lspn = 0; lspn < npages - 3; lspn++) {
2474		lp->h.page = lp->t.page = cpu_to_le32(lspn);
2475		lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
2476
2477		bp->l_blkno += sbi->nbperpage;
2478		bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2479		lbmStartIO(bp);
2480		if ((rc = lbmIOWait(bp, 0)))
2481			goto exit;
2482	}
2483
2484	rc = 0;
2485exit:
2486	/*
2487	 *	finalize log
2488	 */
2489	/* release the buffer */
2490	lbmFree(bp);
2491
2492	return rc;
2493}
2494
2495#ifdef CONFIG_JFS_STATISTICS
2496static int jfs_lmstats_proc_show(struct seq_file *m, void *v)
2497{
2498	seq_printf(m,
2499		       "JFS Logmgr stats\n"
2500		       "================\n"
2501		       "commits = %d\n"
2502		       "writes submitted = %d\n"
2503		       "writes completed = %d\n"
2504		       "full pages submitted = %d\n"
2505		       "partial pages submitted = %d\n",
2506		       lmStat.commit,
2507		       lmStat.submitted,
2508		       lmStat.pagedone,
2509		       lmStat.full_page,
2510		       lmStat.partial_page);
2511	return 0;
2512}
2513
2514static int jfs_lmstats_proc_open(struct inode *inode, struct file *file)
2515{
2516	return single_open(file, jfs_lmstats_proc_show, NULL);
2517}
2518
2519const struct file_operations jfs_lmstats_proc_fops = {
2520	.owner		= THIS_MODULE,
2521	.open		= jfs_lmstats_proc_open,
2522	.read		= seq_read,
2523	.llseek		= seq_lseek,
2524	.release	= single_release,
2525};
2526#endif /* CONFIG_JFS_STATISTICS */