Linux Audio

Check our new training course

In-person Linux kernel drivers training

Jun 16-20, 2025
Register
Loading...
v3.15
 
   1/*
   2 *   Copyright (C) International Business Machines Corp., 2000-2004
   3 *   Portions Copyright (C) Christoph Hellwig, 2001-2002
   4 *
   5 *   This program is free software;  you can redistribute it and/or modify
   6 *   it under the terms of the GNU General Public License as published by
   7 *   the Free Software Foundation; either version 2 of the License, or
   8 *   (at your option) any later version.
   9 *
  10 *   This program is distributed in the hope that it will be useful,
  11 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
  12 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  13 *   the GNU General Public License for more details.
  14 *
  15 *   You should have received a copy of the GNU General Public License
  16 *   along with this program;  if not, write to the Free Software
  17 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18 */
  19
  20/*
  21 *	jfs_logmgr.c: log manager
  22 *
  23 * for related information, see transaction manager (jfs_txnmgr.c), and
  24 * recovery manager (jfs_logredo.c).
  25 *
  26 * note: for detail, RTFS.
  27 *
  28 *	log buffer manager:
  29 * special purpose buffer manager supporting log i/o requirements.
  30 * per log serial pageout of logpage
  31 * queuing i/o requests and redrive i/o at iodone
  32 * maintain current logpage buffer
  33 * no caching since append only
  34 * appropriate jfs buffer cache buffers as needed
  35 *
  36 *	group commit:
  37 * transactions which wrote COMMIT records in the same in-memory
  38 * log page during the pageout of previous/current log page(s) are
  39 * committed together by the pageout of the page.
  40 *
  41 *	TBD lazy commit:
  42 * transactions are committed asynchronously when the log page
  43 * containing it COMMIT is paged out when it becomes full;
  44 *
  45 *	serialization:
  46 * . a per log lock serialize log write.
  47 * . a per log lock serialize group commit.
  48 * . a per log lock serialize log open/close;
  49 *
  50 *	TBD log integrity:
  51 * careful-write (ping-pong) of last logpage to recover from crash
  52 * in overwrite.
  53 * detection of split (out-of-order) write of physical sectors
  54 * of last logpage via timestamp at end of each sector
  55 * with its mirror data array at trailer).
  56 *
  57 *	alternatives:
  58 * lsn - 64-bit monotonically increasing integer vs
  59 * 32-bit lspn and page eor.
  60 */
  61
  62#include <linux/fs.h>
  63#include <linux/blkdev.h>
  64#include <linux/interrupt.h>
  65#include <linux/completion.h>
  66#include <linux/kthread.h>
  67#include <linux/buffer_head.h>		/* for sync_blockdev() */
  68#include <linux/bio.h>
  69#include <linux/freezer.h>
  70#include <linux/export.h>
  71#include <linux/delay.h>
  72#include <linux/mutex.h>
  73#include <linux/seq_file.h>
  74#include <linux/slab.h>
  75#include "jfs_incore.h"
  76#include "jfs_filsys.h"
  77#include "jfs_metapage.h"
  78#include "jfs_superblock.h"
  79#include "jfs_txnmgr.h"
  80#include "jfs_debug.h"
  81
  82
  83/*
  84 * lbuf's ready to be redriven.  Protected by log_redrive_lock (jfsIO thread)
  85 */
  86static struct lbuf *log_redrive_list;
  87static DEFINE_SPINLOCK(log_redrive_lock);
  88
  89
  90/*
  91 *	log read/write serialization (per log)
  92 */
  93#define LOG_LOCK_INIT(log)	mutex_init(&(log)->loglock)
  94#define LOG_LOCK(log)		mutex_lock(&((log)->loglock))
  95#define LOG_UNLOCK(log)		mutex_unlock(&((log)->loglock))
  96
  97
  98/*
  99 *	log group commit serialization (per log)
 100 */
 101
 102#define LOGGC_LOCK_INIT(log)	spin_lock_init(&(log)->gclock)
 103#define LOGGC_LOCK(log)		spin_lock_irq(&(log)->gclock)
 104#define LOGGC_UNLOCK(log)	spin_unlock_irq(&(log)->gclock)
 105#define LOGGC_WAKEUP(tblk)	wake_up_all(&(tblk)->gcwait)
 106
 107/*
 108 *	log sync serialization (per log)
 109 */
 110#define	LOGSYNC_DELTA(logsize)		min((logsize)/8, 128*LOGPSIZE)
 111#define	LOGSYNC_BARRIER(logsize)	((logsize)/4)
 112/*
 113#define	LOGSYNC_DELTA(logsize)		min((logsize)/4, 256*LOGPSIZE)
 114#define	LOGSYNC_BARRIER(logsize)	((logsize)/2)
 115*/
 116
 117
 118/*
 119 *	log buffer cache synchronization
 120 */
 121static DEFINE_SPINLOCK(jfsLCacheLock);
 122
 123#define	LCACHE_LOCK(flags)	spin_lock_irqsave(&jfsLCacheLock, flags)
 124#define	LCACHE_UNLOCK(flags)	spin_unlock_irqrestore(&jfsLCacheLock, flags)
 125
 126/*
 127 * See __SLEEP_COND in jfs_locks.h
 128 */
 129#define LCACHE_SLEEP_COND(wq, cond, flags)	\
 130do {						\
 131	if (cond)				\
 132		break;				\
 133	__SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \
 134} while (0)
 135
 136#define	LCACHE_WAKEUP(event)	wake_up(event)
 137
 138
 139/*
 140 *	lbuf buffer cache (lCache) control
 141 */
 142/* log buffer manager pageout control (cumulative, inclusive) */
 143#define	lbmREAD		0x0001
 144#define	lbmWRITE	0x0002	/* enqueue at tail of write queue;
 145				 * init pageout if at head of queue;
 146				 */
 147#define	lbmRELEASE	0x0004	/* remove from write queue
 148				 * at completion of pageout;
 149				 * do not free/recycle it yet:
 150				 * caller will free it;
 151				 */
 152#define	lbmSYNC		0x0008	/* do not return to freelist
 153				 * when removed from write queue;
 154				 */
 155#define lbmFREE		0x0010	/* return to freelist
 156				 * at completion of pageout;
 157				 * the buffer may be recycled;
 158				 */
 159#define	lbmDONE		0x0020
 160#define	lbmERROR	0x0040
 161#define lbmGC		0x0080	/* lbmIODone to perform post-GC processing
 162				 * of log page
 163				 */
 164#define lbmDIRECT	0x0100
 165
 166/*
 167 * Global list of active external journals
 168 */
 169static LIST_HEAD(jfs_external_logs);
 170static struct jfs_log *dummy_log = NULL;
 171static DEFINE_MUTEX(jfs_log_mutex);
 172
 173/*
 174 * forward references
 175 */
 176static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk,
 177			 struct lrd * lrd, struct tlock * tlck);
 178
 179static int lmNextPage(struct jfs_log * log);
 180static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
 181			   int activate);
 182
 183static int open_inline_log(struct super_block *sb);
 184static int open_dummy_log(struct super_block *sb);
 185static int lbmLogInit(struct jfs_log * log);
 186static void lbmLogShutdown(struct jfs_log * log);
 187static struct lbuf *lbmAllocate(struct jfs_log * log, int);
 188static void lbmFree(struct lbuf * bp);
 189static void lbmfree(struct lbuf * bp);
 190static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp);
 191static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block);
 192static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag);
 193static int lbmIOWait(struct lbuf * bp, int flag);
 194static bio_end_io_t lbmIODone;
 195static void lbmStartIO(struct lbuf * bp);
 196static void lmGCwrite(struct jfs_log * log, int cant_block);
 197static int lmLogSync(struct jfs_log * log, int hard_sync);
 198
 199
 200
 201/*
 202 *	statistics
 203 */
 204#ifdef CONFIG_JFS_STATISTICS
 205static struct lmStat {
 206	uint commit;		/* # of commit */
 207	uint pagedone;		/* # of page written */
 208	uint submitted;		/* # of pages submitted */
 209	uint full_page;		/* # of full pages submitted */
 210	uint partial_page;	/* # of partial pages submitted */
 211} lmStat;
 212#endif
 213
 214static void write_special_inodes(struct jfs_log *log,
 215				 int (*writer)(struct address_space *))
 216{
 217	struct jfs_sb_info *sbi;
 218
 219	list_for_each_entry(sbi, &log->sb_list, log_list) {
 220		writer(sbi->ipbmap->i_mapping);
 221		writer(sbi->ipimap->i_mapping);
 222		writer(sbi->direct_inode->i_mapping);
 223	}
 224}
 225
 226/*
 227 * NAME:	lmLog()
 228 *
 229 * FUNCTION:	write a log record;
 230 *
 231 * PARAMETER:
 232 *
 233 * RETURN:	lsn - offset to the next log record to write (end-of-log);
 234 *		-1  - error;
 235 *
 236 * note: todo: log error handler
 237 */
 238int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 239	  struct tlock * tlck)
 240{
 241	int lsn;
 242	int diffp, difft;
 243	struct metapage *mp = NULL;
 244	unsigned long flags;
 245
 246	jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p",
 247		 log, tblk, lrd, tlck);
 248
 249	LOG_LOCK(log);
 250
 251	/* log by (out-of-transaction) JFS ? */
 252	if (tblk == NULL)
 253		goto writeRecord;
 254
 255	/* log from page ? */
 256	if (tlck == NULL ||
 257	    tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL)
 258		goto writeRecord;
 259
 260	/*
 261	 *	initialize/update page/transaction recovery lsn
 262	 */
 263	lsn = log->lsn;
 264
 265	LOGSYNC_LOCK(log, flags);
 266
 267	/*
 268	 * initialize page lsn if first log write of the page
 269	 */
 270	if (mp->lsn == 0) {
 271		mp->log = log;
 272		mp->lsn = lsn;
 273		log->count++;
 274
 275		/* insert page at tail of logsynclist */
 276		list_add_tail(&mp->synclist, &log->synclist);
 277	}
 278
 279	/*
 280	 *	initialize/update lsn of tblock of the page
 281	 *
 282	 * transaction inherits oldest lsn of pages associated
 283	 * with allocation/deallocation of resources (their
 284	 * log records are used to reconstruct allocation map
 285	 * at recovery time: inode for inode allocation map,
 286	 * B+-tree index of extent descriptors for block
 287	 * allocation map);
 288	 * allocation map pages inherit transaction lsn at
 289	 * commit time to allow forwarding log syncpt past log
 290	 * records associated with allocation/deallocation of
 291	 * resources only after persistent map of these map pages
 292	 * have been updated and propagated to home.
 293	 */
 294	/*
 295	 * initialize transaction lsn:
 296	 */
 297	if (tblk->lsn == 0) {
 298		/* inherit lsn of its first page logged */
 299		tblk->lsn = mp->lsn;
 300		log->count++;
 301
 302		/* insert tblock after the page on logsynclist */
 303		list_add(&tblk->synclist, &mp->synclist);
 304	}
 305	/*
 306	 * update transaction lsn:
 307	 */
 308	else {
 309		/* inherit oldest/smallest lsn of page */
 310		logdiff(diffp, mp->lsn, log);
 311		logdiff(difft, tblk->lsn, log);
 312		if (diffp < difft) {
 313			/* update tblock lsn with page lsn */
 314			tblk->lsn = mp->lsn;
 315
 316			/* move tblock after page on logsynclist */
 317			list_move(&tblk->synclist, &mp->synclist);
 318		}
 319	}
 320
 321	LOGSYNC_UNLOCK(log, flags);
 322
 323	/*
 324	 *	write the log record
 325	 */
 326      writeRecord:
 327	lsn = lmWriteRecord(log, tblk, lrd, tlck);
 328
 329	/*
 330	 * forward log syncpt if log reached next syncpt trigger
 331	 */
 332	logdiff(diffp, lsn, log);
 333	if (diffp >= log->nextsync)
 334		lsn = lmLogSync(log, 0);
 335
 336	/* update end-of-log lsn */
 337	log->lsn = lsn;
 338
 339	LOG_UNLOCK(log);
 340
 341	/* return end-of-log address */
 342	return lsn;
 343}
 344
 345/*
 346 * NAME:	lmWriteRecord()
 347 *
 348 * FUNCTION:	move the log record to current log page
 349 *
 350 * PARAMETER:	cd	- commit descriptor
 351 *
 352 * RETURN:	end-of-log address
 353 *
 354 * serialization: LOG_LOCK() held on entry/exit
 355 */
 356static int
 357lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 358	      struct tlock * tlck)
 359{
 360	int lsn = 0;		/* end-of-log address */
 361	struct lbuf *bp;	/* dst log page buffer */
 362	struct logpage *lp;	/* dst log page */
 363	caddr_t dst;		/* destination address in log page */
 364	int dstoffset;		/* end-of-log offset in log page */
 365	int freespace;		/* free space in log page */
 366	caddr_t p;		/* src meta-data page */
 367	caddr_t src;
 368	int srclen;
 369	int nbytes;		/* number of bytes to move */
 370	int i;
 371	int len;
 372	struct linelock *linelock;
 373	struct lv *lv;
 374	struct lvd *lvd;
 375	int l2linesize;
 376
 377	len = 0;
 378
 379	/* retrieve destination log page to write */
 380	bp = (struct lbuf *) log->bp;
 381	lp = (struct logpage *) bp->l_ldata;
 382	dstoffset = log->eor;
 383
 384	/* any log data to write ? */
 385	if (tlck == NULL)
 386		goto moveLrd;
 387
 388	/*
 389	 *	move log record data
 390	 */
 391	/* retrieve source meta-data page to log */
 392	if (tlck->flag & tlckPAGELOCK) {
 393		p = (caddr_t) (tlck->mp->data);
 394		linelock = (struct linelock *) & tlck->lock;
 395	}
 396	/* retrieve source in-memory inode to log */
 397	else if (tlck->flag & tlckINODELOCK) {
 398		if (tlck->type & tlckDTREE)
 399			p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot;
 400		else
 401			p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot;
 402		linelock = (struct linelock *) & tlck->lock;
 403	}
 404#ifdef	_JFS_WIP
 405	else if (tlck->flag & tlckINLINELOCK) {
 406
 407		inlinelock = (struct inlinelock *) & tlck;
 408		p = (caddr_t) & inlinelock->pxd;
 409		linelock = (struct linelock *) & tlck;
 410	}
 411#endif				/* _JFS_WIP */
 412	else {
 413		jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck);
 414		return 0;	/* Probably should trap */
 415	}
 416	l2linesize = linelock->l2linesize;
 417
 418      moveData:
 419	ASSERT(linelock->index <= linelock->maxcnt);
 420
 421	lv = linelock->lv;
 422	for (i = 0; i < linelock->index; i++, lv++) {
 423		if (lv->length == 0)
 424			continue;
 425
 426		/* is page full ? */
 427		if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) {
 428			/* page become full: move on to next page */
 429			lmNextPage(log);
 430
 431			bp = log->bp;
 432			lp = (struct logpage *) bp->l_ldata;
 433			dstoffset = LOGPHDRSIZE;
 434		}
 435
 436		/*
 437		 * move log vector data
 438		 */
 439		src = (u8 *) p + (lv->offset << l2linesize);
 440		srclen = lv->length << l2linesize;
 441		len += srclen;
 442		while (srclen > 0) {
 443			freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
 444			nbytes = min(freespace, srclen);
 445			dst = (caddr_t) lp + dstoffset;
 446			memcpy(dst, src, nbytes);
 447			dstoffset += nbytes;
 448
 449			/* is page not full ? */
 450			if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
 451				break;
 452
 453			/* page become full: move on to next page */
 454			lmNextPage(log);
 455
 456			bp = (struct lbuf *) log->bp;
 457			lp = (struct logpage *) bp->l_ldata;
 458			dstoffset = LOGPHDRSIZE;
 459
 460			srclen -= nbytes;
 461			src += nbytes;
 462		}
 463
 464		/*
 465		 * move log vector descriptor
 466		 */
 467		len += 4;
 468		lvd = (struct lvd *) ((caddr_t) lp + dstoffset);
 469		lvd->offset = cpu_to_le16(lv->offset);
 470		lvd->length = cpu_to_le16(lv->length);
 471		dstoffset += 4;
 472		jfs_info("lmWriteRecord: lv offset:%d length:%d",
 473			 lv->offset, lv->length);
 474	}
 475
 476	if ((i = linelock->next)) {
 477		linelock = (struct linelock *) lid_to_tlock(i);
 478		goto moveData;
 479	}
 480
 481	/*
 482	 *	move log record descriptor
 483	 */
 484      moveLrd:
 485	lrd->length = cpu_to_le16(len);
 486
 487	src = (caddr_t) lrd;
 488	srclen = LOGRDSIZE;
 489
 490	while (srclen > 0) {
 491		freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
 492		nbytes = min(freespace, srclen);
 493		dst = (caddr_t) lp + dstoffset;
 494		memcpy(dst, src, nbytes);
 495
 496		dstoffset += nbytes;
 497		srclen -= nbytes;
 498
 499		/* are there more to move than freespace of page ? */
 500		if (srclen)
 501			goto pageFull;
 502
 503		/*
 504		 * end of log record descriptor
 505		 */
 506
 507		/* update last log record eor */
 508		log->eor = dstoffset;
 509		bp->l_eor = dstoffset;
 510		lsn = (log->page << L2LOGPSIZE) + dstoffset;
 511
 512		if (lrd->type & cpu_to_le16(LOG_COMMIT)) {
 513			tblk->clsn = lsn;
 514			jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn,
 515				 bp->l_eor);
 516
 517			INCREMENT(lmStat.commit);	/* # of commit */
 518
 519			/*
 520			 * enqueue tblock for group commit:
 521			 *
 522			 * enqueue tblock of non-trivial/synchronous COMMIT
 523			 * at tail of group commit queue
 524			 * (trivial/asynchronous COMMITs are ignored by
 525			 * group commit.)
 526			 */
 527			LOGGC_LOCK(log);
 528
 529			/* init tblock gc state */
 530			tblk->flag = tblkGC_QUEUE;
 531			tblk->bp = log->bp;
 532			tblk->pn = log->page;
 533			tblk->eor = log->eor;
 534
 535			/* enqueue transaction to commit queue */
 536			list_add_tail(&tblk->cqueue, &log->cqueue);
 537
 538			LOGGC_UNLOCK(log);
 539		}
 540
 541		jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x",
 542			le16_to_cpu(lrd->type), log->bp, log->page, dstoffset);
 543
 544		/* page not full ? */
 545		if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
 546			return lsn;
 547
 548	      pageFull:
 549		/* page become full: move on to next page */
 550		lmNextPage(log);
 551
 552		bp = (struct lbuf *) log->bp;
 553		lp = (struct logpage *) bp->l_ldata;
 554		dstoffset = LOGPHDRSIZE;
 555		src += nbytes;
 556	}
 557
 558	return lsn;
 559}
 560
 561
 562/*
 563 * NAME:	lmNextPage()
 564 *
 565 * FUNCTION:	write current page and allocate next page.
 566 *
 567 * PARAMETER:	log
 568 *
 569 * RETURN:	0
 570 *
 571 * serialization: LOG_LOCK() held on entry/exit
 572 */
 573static int lmNextPage(struct jfs_log * log)
 574{
 575	struct logpage *lp;
 576	int lspn;		/* log sequence page number */
 577	int pn;			/* current page number */
 578	struct lbuf *bp;
 579	struct lbuf *nextbp;
 580	struct tblock *tblk;
 581
 582	/* get current log page number and log sequence page number */
 583	pn = log->page;
 584	bp = log->bp;
 585	lp = (struct logpage *) bp->l_ldata;
 586	lspn = le32_to_cpu(lp->h.page);
 587
 588	LOGGC_LOCK(log);
 589
 590	/*
 591	 *	write or queue the full page at the tail of write queue
 592	 */
 593	/* get the tail tblk on commit queue */
 594	if (list_empty(&log->cqueue))
 595		tblk = NULL;
 596	else
 597		tblk = list_entry(log->cqueue.prev, struct tblock, cqueue);
 598
 599	/* every tblk who has COMMIT record on the current page,
 600	 * and has not been committed, must be on commit queue
 601	 * since tblk is queued at commit queueu at the time
 602	 * of writing its COMMIT record on the page before
 603	 * page becomes full (even though the tblk thread
 604	 * who wrote COMMIT record may have been suspended
 605	 * currently);
 606	 */
 607
 608	/* is page bound with outstanding tail tblk ? */
 609	if (tblk && tblk->pn == pn) {
 610		/* mark tblk for end-of-page */
 611		tblk->flag |= tblkGC_EOP;
 612
 613		if (log->cflag & logGC_PAGEOUT) {
 614			/* if page is not already on write queue,
 615			 * just enqueue (no lbmWRITE to prevent redrive)
 616			 * buffer to wqueue to ensure correct serial order
 617			 * of the pages since log pages will be added
 618			 * continuously
 619			 */
 620			if (bp->l_wqnext == NULL)
 621				lbmWrite(log, bp, 0, 0);
 622		} else {
 623			/*
 624			 * No current GC leader, initiate group commit
 625			 */
 626			log->cflag |= logGC_PAGEOUT;
 627			lmGCwrite(log, 0);
 628		}
 629	}
 630	/* page is not bound with outstanding tblk:
 631	 * init write or mark it to be redriven (lbmWRITE)
 632	 */
 633	else {
 634		/* finalize the page */
 635		bp->l_ceor = bp->l_eor;
 636		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 637		lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0);
 638	}
 639	LOGGC_UNLOCK(log);
 640
 641	/*
 642	 *	allocate/initialize next page
 643	 */
 644	/* if log wraps, the first data page of log is 2
 645	 * (0 never used, 1 is superblock).
 646	 */
 647	log->page = (pn == log->size - 1) ? 2 : pn + 1;
 648	log->eor = LOGPHDRSIZE;	/* ? valid page empty/full at logRedo() */
 649
 650	/* allocate/initialize next log page buffer */
 651	nextbp = lbmAllocate(log, log->page);
 652	nextbp->l_eor = log->eor;
 653	log->bp = nextbp;
 654
 655	/* initialize next log page */
 656	lp = (struct logpage *) nextbp->l_ldata;
 657	lp->h.page = lp->t.page = cpu_to_le32(lspn + 1);
 658	lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
 659
 660	return 0;
 661}
 662
 663
 664/*
 665 * NAME:	lmGroupCommit()
 666 *
 667 * FUNCTION:	group commit
 668 *	initiate pageout of the pages with COMMIT in the order of
 669 *	page number - redrive pageout of the page at the head of
 670 *	pageout queue until full page has been written.
 671 *
 672 * RETURN:
 673 *
 674 * NOTE:
 675 *	LOGGC_LOCK serializes log group commit queue, and
 676 *	transaction blocks on the commit queue.
 677 *	N.B. LOG_LOCK is NOT held during lmGroupCommit().
 678 */
 679int lmGroupCommit(struct jfs_log * log, struct tblock * tblk)
 680{
 681	int rc = 0;
 682
 683	LOGGC_LOCK(log);
 684
 685	/* group committed already ? */
 686	if (tblk->flag & tblkGC_COMMITTED) {
 687		if (tblk->flag & tblkGC_ERROR)
 688			rc = -EIO;
 689
 690		LOGGC_UNLOCK(log);
 691		return rc;
 692	}
 693	jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc);
 694
 695	if (tblk->xflag & COMMIT_LAZY)
 696		tblk->flag |= tblkGC_LAZY;
 697
 698	if ((!(log->cflag & logGC_PAGEOUT)) && (!list_empty(&log->cqueue)) &&
 699	    (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag)
 700	     || jfs_tlocks_low)) {
 701		/*
 702		 * No pageout in progress
 703		 *
 704		 * start group commit as its group leader.
 705		 */
 706		log->cflag |= logGC_PAGEOUT;
 707
 708		lmGCwrite(log, 0);
 709	}
 710
 711	if (tblk->xflag & COMMIT_LAZY) {
 712		/*
 713		 * Lazy transactions can leave now
 714		 */
 715		LOGGC_UNLOCK(log);
 716		return 0;
 717	}
 718
 719	/* lmGCwrite gives up LOGGC_LOCK, check again */
 720
 721	if (tblk->flag & tblkGC_COMMITTED) {
 722		if (tblk->flag & tblkGC_ERROR)
 723			rc = -EIO;
 724
 725		LOGGC_UNLOCK(log);
 726		return rc;
 727	}
 728
 729	/* upcount transaction waiting for completion
 730	 */
 731	log->gcrtc++;
 732	tblk->flag |= tblkGC_READY;
 733
 734	__SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED),
 735		     LOGGC_LOCK(log), LOGGC_UNLOCK(log));
 736
 737	/* removed from commit queue */
 738	if (tblk->flag & tblkGC_ERROR)
 739		rc = -EIO;
 740
 741	LOGGC_UNLOCK(log);
 742	return rc;
 743}
 744
 745/*
 746 * NAME:	lmGCwrite()
 747 *
 748 * FUNCTION:	group commit write
 749 *	initiate write of log page, building a group of all transactions
 750 *	with commit records on that page.
 751 *
 752 * RETURN:	None
 753 *
 754 * NOTE:
 755 *	LOGGC_LOCK must be held by caller.
 756 *	N.B. LOG_LOCK is NOT held during lmGroupCommit().
 757 */
 758static void lmGCwrite(struct jfs_log * log, int cant_write)
 759{
 760	struct lbuf *bp;
 761	struct logpage *lp;
 762	int gcpn;		/* group commit page number */
 763	struct tblock *tblk;
 764	struct tblock *xtblk = NULL;
 765
 766	/*
 767	 * build the commit group of a log page
 768	 *
 769	 * scan commit queue and make a commit group of all
 770	 * transactions with COMMIT records on the same log page.
 771	 */
 772	/* get the head tblk on the commit queue */
 773	gcpn = list_entry(log->cqueue.next, struct tblock, cqueue)->pn;
 774
 775	list_for_each_entry(tblk, &log->cqueue, cqueue) {
 776		if (tblk->pn != gcpn)
 777			break;
 778
 779		xtblk = tblk;
 780
 781		/* state transition: (QUEUE, READY) -> COMMIT */
 782		tblk->flag |= tblkGC_COMMIT;
 783	}
 784	tblk = xtblk;		/* last tblk of the page */
 785
 786	/*
 787	 * pageout to commit transactions on the log page.
 788	 */
 789	bp = (struct lbuf *) tblk->bp;
 790	lp = (struct logpage *) bp->l_ldata;
 791	/* is page already full ? */
 792	if (tblk->flag & tblkGC_EOP) {
 793		/* mark page to free at end of group commit of the page */
 794		tblk->flag &= ~tblkGC_EOP;
 795		tblk->flag |= tblkGC_FREE;
 796		bp->l_ceor = bp->l_eor;
 797		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 798		lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC,
 799			 cant_write);
 800		INCREMENT(lmStat.full_page);
 801	}
 802	/* page is not yet full */
 803	else {
 804		bp->l_ceor = tblk->eor;	/* ? bp->l_ceor = bp->l_eor; */
 805		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 806		lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write);
 807		INCREMENT(lmStat.partial_page);
 808	}
 809}
 810
 811/*
 812 * NAME:	lmPostGC()
 813 *
 814 * FUNCTION:	group commit post-processing
 815 *	Processes transactions after their commit records have been written
 816 *	to disk, redriving log I/O if necessary.
 817 *
 818 * RETURN:	None
 819 *
 820 * NOTE:
 821 *	This routine is called a interrupt time by lbmIODone
 822 */
 823static void lmPostGC(struct lbuf * bp)
 824{
 825	unsigned long flags;
 826	struct jfs_log *log = bp->l_log;
 827	struct logpage *lp;
 828	struct tblock *tblk, *temp;
 829
 830	//LOGGC_LOCK(log);
 831	spin_lock_irqsave(&log->gclock, flags);
 832	/*
 833	 * current pageout of group commit completed.
 834	 *
 835	 * remove/wakeup transactions from commit queue who were
 836	 * group committed with the current log page
 837	 */
 838	list_for_each_entry_safe(tblk, temp, &log->cqueue, cqueue) {
 839		if (!(tblk->flag & tblkGC_COMMIT))
 840			break;
 841		/* if transaction was marked GC_COMMIT then
 842		 * it has been shipped in the current pageout
 843		 * and made it to disk - it is committed.
 844		 */
 845
 846		if (bp->l_flag & lbmERROR)
 847			tblk->flag |= tblkGC_ERROR;
 848
 849		/* remove it from the commit queue */
 850		list_del(&tblk->cqueue);
 851		tblk->flag &= ~tblkGC_QUEUE;
 852
 853		if (tblk == log->flush_tblk) {
 854			/* we can stop flushing the log now */
 855			clear_bit(log_FLUSH, &log->flag);
 856			log->flush_tblk = NULL;
 857		}
 858
 859		jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk,
 860			 tblk->flag);
 861
 862		if (!(tblk->xflag & COMMIT_FORCE))
 863			/*
 864			 * Hand tblk over to lazy commit thread
 865			 */
 866			txLazyUnlock(tblk);
 867		else {
 868			/* state transition: COMMIT -> COMMITTED */
 869			tblk->flag |= tblkGC_COMMITTED;
 870
 871			if (tblk->flag & tblkGC_READY)
 872				log->gcrtc--;
 873
 874			LOGGC_WAKEUP(tblk);
 875		}
 876
 877		/* was page full before pageout ?
 878		 * (and this is the last tblk bound with the page)
 879		 */
 880		if (tblk->flag & tblkGC_FREE)
 881			lbmFree(bp);
 882		/* did page become full after pageout ?
 883		 * (and this is the last tblk bound with the page)
 884		 */
 885		else if (tblk->flag & tblkGC_EOP) {
 886			/* finalize the page */
 887			lp = (struct logpage *) bp->l_ldata;
 888			bp->l_ceor = bp->l_eor;
 889			lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
 890			jfs_info("lmPostGC: calling lbmWrite");
 891			lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE,
 892				 1);
 893		}
 894
 895	}
 896
 897	/* are there any transactions who have entered lnGroupCommit()
 898	 * (whose COMMITs are after that of the last log page written.
 899	 * They are waiting for new group commit (above at (SLEEP 1))
 900	 * or lazy transactions are on a full (queued) log page,
 901	 * select the latest ready transaction as new group leader and
 902	 * wake her up to lead her group.
 903	 */
 904	if ((!list_empty(&log->cqueue)) &&
 905	    ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) ||
 906	     test_bit(log_FLUSH, &log->flag) || jfs_tlocks_low))
 907		/*
 908		 * Call lmGCwrite with new group leader
 909		 */
 910		lmGCwrite(log, 1);
 911
 912	/* no transaction are ready yet (transactions are only just
 913	 * queued (GC_QUEUE) and not entered for group commit yet).
 914	 * the first transaction entering group commit
 915	 * will elect herself as new group leader.
 916	 */
 917	else
 918		log->cflag &= ~logGC_PAGEOUT;
 919
 920	//LOGGC_UNLOCK(log);
 921	spin_unlock_irqrestore(&log->gclock, flags);
 922	return;
 923}
 924
 925/*
 926 * NAME:	lmLogSync()
 927 *
 928 * FUNCTION:	write log SYNCPT record for specified log
 929 *	if new sync address is available
 930 *	(normally the case if sync() is executed by back-ground
 931 *	process).
 932 *	calculate new value of i_nextsync which determines when
 933 *	this code is called again.
 934 *
 935 * PARAMETERS:	log	- log structure
 936 *		hard_sync - 1 to force all metadata to be written
 937 *
 938 * RETURN:	0
 939 *
 940 * serialization: LOG_LOCK() held on entry/exit
 941 */
 942static int lmLogSync(struct jfs_log * log, int hard_sync)
 943{
 944	int logsize;
 945	int written;		/* written since last syncpt */
 946	int free;		/* free space left available */
 947	int delta;		/* additional delta to write normally */
 948	int more;		/* additional write granted */
 949	struct lrd lrd;
 950	int lsn;
 951	struct logsyncblk *lp;
 952	unsigned long flags;
 953
 954	/* push dirty metapages out to disk */
 955	if (hard_sync)
 956		write_special_inodes(log, filemap_fdatawrite);
 957	else
 958		write_special_inodes(log, filemap_flush);
 959
 960	/*
 961	 *	forward syncpt
 962	 */
 963	/* if last sync is same as last syncpt,
 964	 * invoke sync point forward processing to update sync.
 965	 */
 966
 967	if (log->sync == log->syncpt) {
 968		LOGSYNC_LOCK(log, flags);
 969		if (list_empty(&log->synclist))
 970			log->sync = log->lsn;
 971		else {
 972			lp = list_entry(log->synclist.next,
 973					struct logsyncblk, synclist);
 974			log->sync = lp->lsn;
 975		}
 976		LOGSYNC_UNLOCK(log, flags);
 977
 978	}
 979
 980	/* if sync is different from last syncpt,
 981	 * write a SYNCPT record with syncpt = sync.
 982	 * reset syncpt = sync
 983	 */
 984	if (log->sync != log->syncpt) {
 985		lrd.logtid = 0;
 986		lrd.backchain = 0;
 987		lrd.type = cpu_to_le16(LOG_SYNCPT);
 988		lrd.length = 0;
 989		lrd.log.syncpt.sync = cpu_to_le32(log->sync);
 990		lsn = lmWriteRecord(log, NULL, &lrd, NULL);
 991
 992		log->syncpt = log->sync;
 993	} else
 994		lsn = log->lsn;
 995
 996	/*
 997	 *	setup next syncpt trigger (SWAG)
 998	 */
 999	logsize = log->logsize;
1000
1001	logdiff(written, lsn, log);
1002	free = logsize - written;
1003	delta = LOGSYNC_DELTA(logsize);
1004	more = min(free / 2, delta);
1005	if (more < 2 * LOGPSIZE) {
1006		jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
1007		/*
1008		 *	log wrapping
1009		 *
1010		 * option 1 - panic ? No.!
1011		 * option 2 - shutdown file systems
1012		 *	      associated with log ?
1013		 * option 3 - extend log ?
1014		 * option 4 - second chance
1015		 *
1016		 * mark log wrapped, and continue.
1017		 * when all active transactions are completed,
1018		 * mark log valid for recovery.
1019		 * if crashed during invalid state, log state
1020		 * implies invalid log, forcing fsck().
1021		 */
1022		/* mark log state log wrap in log superblock */
1023		/* log->state = LOGWRAP; */
1024
1025		/* reset sync point computation */
1026		log->syncpt = log->sync = lsn;
1027		log->nextsync = delta;
1028	} else
1029		/* next syncpt trigger = written + more */
1030		log->nextsync = written + more;
1031
1032	/* if number of bytes written from last sync point is more
1033	 * than 1/4 of the log size, stop new transactions from
1034	 * starting until all current transactions are completed
1035	 * by setting syncbarrier flag.
1036	 */
1037	if (!test_bit(log_SYNCBARRIER, &log->flag) &&
1038	    (written > LOGSYNC_BARRIER(logsize)) && log->active) {
1039		set_bit(log_SYNCBARRIER, &log->flag);
1040		jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn,
1041			 log->syncpt);
1042		/*
1043		 * We may have to initiate group commit
1044		 */
1045		jfs_flush_journal(log, 0);
1046	}
1047
1048	return lsn;
1049}
1050
1051/*
1052 * NAME:	jfs_syncpt
1053 *
1054 * FUNCTION:	write log SYNCPT record for specified log
1055 *
1056 * PARAMETERS:	log	  - log structure
1057 *		hard_sync - set to 1 to force metadata to be written
1058 */
1059void jfs_syncpt(struct jfs_log *log, int hard_sync)
1060{	LOG_LOCK(log);
1061	if (!test_bit(log_QUIESCE, &log->flag))
1062		lmLogSync(log, hard_sync);
1063	LOG_UNLOCK(log);
1064}
1065
1066/*
1067 * NAME:	lmLogOpen()
1068 *
1069 * FUNCTION:	open the log on first open;
1070 *	insert filesystem in the active list of the log.
1071 *
1072 * PARAMETER:	ipmnt	- file system mount inode
1073 *		iplog	- log inode (out)
1074 *
1075 * RETURN:
1076 *
1077 * serialization:
1078 */
1079int lmLogOpen(struct super_block *sb)
1080{
1081	int rc;
1082	struct block_device *bdev;
1083	struct jfs_log *log;
1084	struct jfs_sb_info *sbi = JFS_SBI(sb);
1085
1086	if (sbi->flag & JFS_NOINTEGRITY)
1087		return open_dummy_log(sb);
1088
1089	if (sbi->mntflag & JFS_INLINELOG)
1090		return open_inline_log(sb);
1091
1092	mutex_lock(&jfs_log_mutex);
1093	list_for_each_entry(log, &jfs_external_logs, journal_list) {
1094		if (log->bdev->bd_dev == sbi->logdev) {
1095			if (memcmp(log->uuid, sbi->loguuid,
1096				   sizeof(log->uuid))) {
1097				jfs_warn("wrong uuid on JFS journal\n");
1098				mutex_unlock(&jfs_log_mutex);
1099				return -EINVAL;
1100			}
1101			/*
1102			 * add file system to log active file system list
1103			 */
1104			if ((rc = lmLogFileSystem(log, sbi, 1))) {
1105				mutex_unlock(&jfs_log_mutex);
1106				return rc;
1107			}
1108			goto journal_found;
1109		}
1110	}
1111
1112	if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL))) {
1113		mutex_unlock(&jfs_log_mutex);
1114		return -ENOMEM;
1115	}
1116	INIT_LIST_HEAD(&log->sb_list);
1117	init_waitqueue_head(&log->syncwait);
1118
1119	/*
1120	 *	external log as separate logical volume
1121	 *
1122	 * file systems to log may have n-to-1 relationship;
1123	 */
1124
1125	bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
1126				 log);
1127	if (IS_ERR(bdev)) {
1128		rc = PTR_ERR(bdev);
1129		goto free;
1130	}
1131
1132	log->bdev = bdev;
1133	memcpy(log->uuid, sbi->loguuid, sizeof(log->uuid));
1134
1135	/*
1136	 * initialize log:
1137	 */
1138	if ((rc = lmLogInit(log)))
1139		goto close;
1140
1141	list_add(&log->journal_list, &jfs_external_logs);
1142
1143	/*
1144	 * add file system to log active file system list
1145	 */
1146	if ((rc = lmLogFileSystem(log, sbi, 1)))
1147		goto shutdown;
1148
1149journal_found:
1150	LOG_LOCK(log);
1151	list_add(&sbi->log_list, &log->sb_list);
1152	sbi->log = log;
1153	LOG_UNLOCK(log);
1154
1155	mutex_unlock(&jfs_log_mutex);
1156	return 0;
1157
1158	/*
1159	 *	unwind on error
1160	 */
1161      shutdown:		/* unwind lbmLogInit() */
1162	list_del(&log->journal_list);
1163	lbmLogShutdown(log);
1164
1165      close:		/* close external log device */
1166	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1167
1168      free:		/* free log descriptor */
1169	mutex_unlock(&jfs_log_mutex);
1170	kfree(log);
1171
1172	jfs_warn("lmLogOpen: exit(%d)", rc);
1173	return rc;
1174}
1175
1176static int open_inline_log(struct super_block *sb)
1177{
1178	struct jfs_log *log;
1179	int rc;
1180
1181	if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL)))
1182		return -ENOMEM;
1183	INIT_LIST_HEAD(&log->sb_list);
1184	init_waitqueue_head(&log->syncwait);
1185
1186	set_bit(log_INLINELOG, &log->flag);
1187	log->bdev = sb->s_bdev;
1188	log->base = addressPXD(&JFS_SBI(sb)->logpxd);
1189	log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
1190	    (L2LOGPSIZE - sb->s_blocksize_bits);
1191	log->l2bsize = sb->s_blocksize_bits;
1192	ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits);
1193
1194	/*
1195	 * initialize log.
1196	 */
1197	if ((rc = lmLogInit(log))) {
1198		kfree(log);
1199		jfs_warn("lmLogOpen: exit(%d)", rc);
1200		return rc;
1201	}
1202
1203	list_add(&JFS_SBI(sb)->log_list, &log->sb_list);
1204	JFS_SBI(sb)->log = log;
1205
1206	return rc;
1207}
1208
1209static int open_dummy_log(struct super_block *sb)
1210{
1211	int rc;
1212
1213	mutex_lock(&jfs_log_mutex);
1214	if (!dummy_log) {
1215		dummy_log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL);
1216		if (!dummy_log) {
1217			mutex_unlock(&jfs_log_mutex);
1218			return -ENOMEM;
1219		}
1220		INIT_LIST_HEAD(&dummy_log->sb_list);
1221		init_waitqueue_head(&dummy_log->syncwait);
1222		dummy_log->no_integrity = 1;
1223		/* Make up some stuff */
1224		dummy_log->base = 0;
1225		dummy_log->size = 1024;
1226		rc = lmLogInit(dummy_log);
1227		if (rc) {
1228			kfree(dummy_log);
1229			dummy_log = NULL;
1230			mutex_unlock(&jfs_log_mutex);
1231			return rc;
1232		}
1233	}
1234
1235	LOG_LOCK(dummy_log);
1236	list_add(&JFS_SBI(sb)->log_list, &dummy_log->sb_list);
1237	JFS_SBI(sb)->log = dummy_log;
1238	LOG_UNLOCK(dummy_log);
1239	mutex_unlock(&jfs_log_mutex);
1240
1241	return 0;
1242}
1243
1244/*
1245 * NAME:	lmLogInit()
1246 *
1247 * FUNCTION:	log initialization at first log open.
1248 *
1249 *	logredo() (or logformat()) should have been run previously.
1250 *	initialize the log from log superblock.
1251 *	set the log state in the superblock to LOGMOUNT and
1252 *	write SYNCPT log record.
1253 *
1254 * PARAMETER:	log	- log structure
1255 *
1256 * RETURN:	0	- if ok
1257 *		-EINVAL	- bad log magic number or superblock dirty
1258 *		error returned from logwait()
1259 *
1260 * serialization: single first open thread
1261 */
1262int lmLogInit(struct jfs_log * log)
1263{
1264	int rc = 0;
1265	struct lrd lrd;
1266	struct logsuper *logsuper;
1267	struct lbuf *bpsuper;
1268	struct lbuf *bp;
1269	struct logpage *lp;
1270	int lsn = 0;
1271
1272	jfs_info("lmLogInit: log:0x%p", log);
1273
1274	/* initialize the group commit serialization lock */
1275	LOGGC_LOCK_INIT(log);
1276
1277	/* allocate/initialize the log write serialization lock */
1278	LOG_LOCK_INIT(log);
1279
1280	LOGSYNC_LOCK_INIT(log);
1281
1282	INIT_LIST_HEAD(&log->synclist);
1283
1284	INIT_LIST_HEAD(&log->cqueue);
1285	log->flush_tblk = NULL;
1286
1287	log->count = 0;
1288
1289	/*
1290	 * initialize log i/o
1291	 */
1292	if ((rc = lbmLogInit(log)))
1293		return rc;
1294
1295	if (!test_bit(log_INLINELOG, &log->flag))
1296		log->l2bsize = L2LOGPSIZE;
1297
1298	/* check for disabled journaling to disk */
1299	if (log->no_integrity) {
1300		/*
1301		 * Journal pages will still be filled.  When the time comes
1302		 * to actually do the I/O, the write is not done, and the
1303		 * endio routine is called directly.
1304		 */
1305		bp = lbmAllocate(log , 0);
1306		log->bp = bp;
1307		bp->l_pn = bp->l_eor = 0;
1308	} else {
1309		/*
1310		 * validate log superblock
1311		 */
1312		if ((rc = lbmRead(log, 1, &bpsuper)))
1313			goto errout10;
1314
1315		logsuper = (struct logsuper *) bpsuper->l_ldata;
1316
1317		if (logsuper->magic != cpu_to_le32(LOGMAGIC)) {
1318			jfs_warn("*** Log Format Error ! ***");
1319			rc = -EINVAL;
1320			goto errout20;
1321		}
1322
1323		/* logredo() should have been run successfully. */
1324		if (logsuper->state != cpu_to_le32(LOGREDONE)) {
1325			jfs_warn("*** Log Is Dirty ! ***");
1326			rc = -EINVAL;
1327			goto errout20;
1328		}
1329
1330		/* initialize log from log superblock */
1331		if (test_bit(log_INLINELOG,&log->flag)) {
1332			if (log->size != le32_to_cpu(logsuper->size)) {
1333				rc = -EINVAL;
1334				goto errout20;
1335			}
1336			jfs_info("lmLogInit: inline log:0x%p base:0x%Lx "
1337				 "size:0x%x", log,
1338				 (unsigned long long) log->base, log->size);
1339		} else {
1340			if (memcmp(logsuper->uuid, log->uuid, 16)) {
1341				jfs_warn("wrong uuid on JFS log device");
 
1342				goto errout20;
1343			}
1344			log->size = le32_to_cpu(logsuper->size);
1345			log->l2bsize = le32_to_cpu(logsuper->l2bsize);
1346			jfs_info("lmLogInit: external log:0x%p base:0x%Lx "
1347				 "size:0x%x", log,
1348				 (unsigned long long) log->base, log->size);
1349		}
1350
1351		log->page = le32_to_cpu(logsuper->end) / LOGPSIZE;
1352		log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page);
1353
1354		/*
1355		 * initialize for log append write mode
1356		 */
1357		/* establish current/end-of-log page/buffer */
1358		if ((rc = lbmRead(log, log->page, &bp)))
1359			goto errout20;
1360
1361		lp = (struct logpage *) bp->l_ldata;
1362
1363		jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d",
1364			 le32_to_cpu(logsuper->end), log->page, log->eor,
1365			 le16_to_cpu(lp->h.eor));
1366
1367		log->bp = bp;
1368		bp->l_pn = log->page;
1369		bp->l_eor = log->eor;
1370
1371		/* if current page is full, move on to next page */
1372		if (log->eor >= LOGPSIZE - LOGPTLRSIZE)
1373			lmNextPage(log);
1374
1375		/*
1376		 * initialize log syncpoint
1377		 */
1378		/*
1379		 * write the first SYNCPT record with syncpoint = 0
1380		 * (i.e., log redo up to HERE !);
1381		 * remove current page from lbm write queue at end of pageout
1382		 * (to write log superblock update), but do not release to
1383		 * freelist;
1384		 */
1385		lrd.logtid = 0;
1386		lrd.backchain = 0;
1387		lrd.type = cpu_to_le16(LOG_SYNCPT);
1388		lrd.length = 0;
1389		lrd.log.syncpt.sync = 0;
1390		lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1391		bp = log->bp;
1392		bp->l_ceor = bp->l_eor;
1393		lp = (struct logpage *) bp->l_ldata;
1394		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1395		lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0);
1396		if ((rc = lbmIOWait(bp, 0)))
1397			goto errout30;
1398
1399		/*
1400		 * update/write superblock
1401		 */
1402		logsuper->state = cpu_to_le32(LOGMOUNT);
1403		log->serial = le32_to_cpu(logsuper->serial) + 1;
1404		logsuper->serial = cpu_to_le32(log->serial);
1405		lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1406		if ((rc = lbmIOWait(bpsuper, lbmFREE)))
1407			goto errout30;
1408	}
1409
1410	/* initialize logsync parameters */
1411	log->logsize = (log->size - 2) << L2LOGPSIZE;
1412	log->lsn = lsn;
1413	log->syncpt = lsn;
1414	log->sync = log->syncpt;
1415	log->nextsync = LOGSYNC_DELTA(log->logsize);
1416
1417	jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x",
1418		 log->lsn, log->syncpt, log->sync);
1419
1420	/*
1421	 * initialize for lazy/group commit
1422	 */
1423	log->clsn = lsn;
1424
1425	return 0;
1426
1427	/*
1428	 *	unwind on error
1429	 */
1430      errout30:		/* release log page */
1431	log->wqueue = NULL;
1432	bp->l_wqnext = NULL;
1433	lbmFree(bp);
1434
1435      errout20:		/* release log superblock */
1436	lbmFree(bpsuper);
1437
1438      errout10:		/* unwind lbmLogInit() */
1439	lbmLogShutdown(log);
1440
1441	jfs_warn("lmLogInit: exit(%d)", rc);
1442	return rc;
1443}
1444
1445
1446/*
1447 * NAME:	lmLogClose()
1448 *
1449 * FUNCTION:	remove file system <ipmnt> from active list of log <iplog>
1450 *		and close it on last close.
1451 *
1452 * PARAMETER:	sb	- superblock
1453 *
1454 * RETURN:	errors from subroutines
1455 *
1456 * serialization:
1457 */
1458int lmLogClose(struct super_block *sb)
1459{
1460	struct jfs_sb_info *sbi = JFS_SBI(sb);
1461	struct jfs_log *log = sbi->log;
1462	struct block_device *bdev;
1463	int rc = 0;
1464
1465	jfs_info("lmLogClose: log:0x%p", log);
1466
1467	mutex_lock(&jfs_log_mutex);
1468	LOG_LOCK(log);
1469	list_del(&sbi->log_list);
1470	LOG_UNLOCK(log);
1471	sbi->log = NULL;
1472
1473	/*
1474	 * We need to make sure all of the "written" metapages
1475	 * actually make it to disk
1476	 */
1477	sync_blockdev(sb->s_bdev);
1478
1479	if (test_bit(log_INLINELOG, &log->flag)) {
1480		/*
1481		 *	in-line log in host file system
1482		 */
1483		rc = lmLogShutdown(log);
1484		kfree(log);
1485		goto out;
1486	}
1487
1488	if (!log->no_integrity)
1489		lmLogFileSystem(log, sbi, 0);
1490
1491	if (!list_empty(&log->sb_list))
1492		goto out;
1493
1494	/*
1495	 * TODO: ensure that the dummy_log is in a state to allow
1496	 * lbmLogShutdown to deallocate all the buffers and call
1497	 * kfree against dummy_log.  For now, leave dummy_log & its
1498	 * buffers in memory, and resuse if another no-integrity mount
1499	 * is requested.
1500	 */
1501	if (log->no_integrity)
1502		goto out;
1503
1504	/*
1505	 *	external log as separate logical volume
1506	 */
1507	list_del(&log->journal_list);
1508	bdev = log->bdev;
1509	rc = lmLogShutdown(log);
1510
1511	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1512
1513	kfree(log);
1514
1515      out:
1516	mutex_unlock(&jfs_log_mutex);
1517	jfs_info("lmLogClose: exit(%d)", rc);
1518	return rc;
1519}
1520
1521
1522/*
1523 * NAME:	jfs_flush_journal()
1524 *
1525 * FUNCTION:	initiate write of any outstanding transactions to the journal
1526 *		and optionally wait until they are all written to disk
1527 *
1528 *		wait == 0  flush until latest txn is committed, don't wait
1529 *		wait == 1  flush until latest txn is committed, wait
1530 *		wait > 1   flush until all txn's are complete, wait
1531 */
1532void jfs_flush_journal(struct jfs_log *log, int wait)
1533{
1534	int i;
1535	struct tblock *target = NULL;
1536
1537	/* jfs_write_inode may call us during read-only mount */
1538	if (!log)
1539		return;
1540
1541	jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait);
1542
1543	LOGGC_LOCK(log);
1544
1545	if (!list_empty(&log->cqueue)) {
1546		/*
1547		 * This ensures that we will keep writing to the journal as long
1548		 * as there are unwritten commit records
1549		 */
1550		target = list_entry(log->cqueue.prev, struct tblock, cqueue);
1551
1552		if (test_bit(log_FLUSH, &log->flag)) {
1553			/*
1554			 * We're already flushing.
1555			 * if flush_tblk is NULL, we are flushing everything,
1556			 * so leave it that way.  Otherwise, update it to the
1557			 * latest transaction
1558			 */
1559			if (log->flush_tblk)
1560				log->flush_tblk = target;
1561		} else {
1562			/* Only flush until latest transaction is committed */
1563			log->flush_tblk = target;
1564			set_bit(log_FLUSH, &log->flag);
1565
1566			/*
1567			 * Initiate I/O on outstanding transactions
1568			 */
1569			if (!(log->cflag & logGC_PAGEOUT)) {
1570				log->cflag |= logGC_PAGEOUT;
1571				lmGCwrite(log, 0);
1572			}
1573		}
1574	}
1575	if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) {
1576		/* Flush until all activity complete */
1577		set_bit(log_FLUSH, &log->flag);
1578		log->flush_tblk = NULL;
1579	}
1580
1581	if (wait && target && !(target->flag & tblkGC_COMMITTED)) {
1582		DECLARE_WAITQUEUE(__wait, current);
1583
1584		add_wait_queue(&target->gcwait, &__wait);
1585		set_current_state(TASK_UNINTERRUPTIBLE);
1586		LOGGC_UNLOCK(log);
1587		schedule();
1588		__set_current_state(TASK_RUNNING);
1589		LOGGC_LOCK(log);
1590		remove_wait_queue(&target->gcwait, &__wait);
1591	}
1592	LOGGC_UNLOCK(log);
1593
1594	if (wait < 2)
1595		return;
1596
1597	write_special_inodes(log, filemap_fdatawrite);
1598
1599	/*
1600	 * If there was recent activity, we may need to wait
1601	 * for the lazycommit thread to catch up
1602	 */
1603	if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) {
1604		for (i = 0; i < 200; i++) {	/* Too much? */
1605			msleep(250);
1606			write_special_inodes(log, filemap_fdatawrite);
1607			if (list_empty(&log->cqueue) &&
1608			    list_empty(&log->synclist))
1609				break;
1610		}
1611	}
1612	assert(list_empty(&log->cqueue));
1613
1614#ifdef CONFIG_JFS_DEBUG
1615	if (!list_empty(&log->synclist)) {
1616		struct logsyncblk *lp;
1617
1618		printk(KERN_ERR "jfs_flush_journal: synclist not empty\n");
1619		list_for_each_entry(lp, &log->synclist, synclist) {
1620			if (lp->xflag & COMMIT_PAGE) {
1621				struct metapage *mp = (struct metapage *)lp;
1622				print_hex_dump(KERN_ERR, "metapage: ",
1623					       DUMP_PREFIX_ADDRESS, 16, 4,
1624					       mp, sizeof(struct metapage), 0);
1625				print_hex_dump(KERN_ERR, "page: ",
1626					       DUMP_PREFIX_ADDRESS, 16,
1627					       sizeof(long), mp->page,
1628					       sizeof(struct page), 0);
1629			} else
1630				print_hex_dump(KERN_ERR, "tblock:",
1631					       DUMP_PREFIX_ADDRESS, 16, 4,
1632					       lp, sizeof(struct tblock), 0);
1633		}
1634	}
1635#else
1636	WARN_ON(!list_empty(&log->synclist));
1637#endif
1638	clear_bit(log_FLUSH, &log->flag);
1639}
1640
1641/*
1642 * NAME:	lmLogShutdown()
1643 *
1644 * FUNCTION:	log shutdown at last LogClose().
1645 *
1646 *		write log syncpt record.
1647 *		update super block to set redone flag to 0.
1648 *
1649 * PARAMETER:	log	- log inode
1650 *
1651 * RETURN:	0	- success
1652 *
1653 * serialization: single last close thread
1654 */
1655int lmLogShutdown(struct jfs_log * log)
1656{
1657	int rc;
1658	struct lrd lrd;
1659	int lsn;
1660	struct logsuper *logsuper;
1661	struct lbuf *bpsuper;
1662	struct lbuf *bp;
1663	struct logpage *lp;
1664
1665	jfs_info("lmLogShutdown: log:0x%p", log);
1666
1667	jfs_flush_journal(log, 2);
1668
1669	/*
1670	 * write the last SYNCPT record with syncpoint = 0
1671	 * (i.e., log redo up to HERE !)
1672	 */
1673	lrd.logtid = 0;
1674	lrd.backchain = 0;
1675	lrd.type = cpu_to_le16(LOG_SYNCPT);
1676	lrd.length = 0;
1677	lrd.log.syncpt.sync = 0;
1678
1679	lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1680	bp = log->bp;
1681	lp = (struct logpage *) bp->l_ldata;
1682	lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1683	lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0);
1684	lbmIOWait(log->bp, lbmFREE);
1685	log->bp = NULL;
1686
1687	/*
1688	 * synchronous update log superblock
1689	 * mark log state as shutdown cleanly
1690	 * (i.e., Log does not need to be replayed).
1691	 */
1692	if ((rc = lbmRead(log, 1, &bpsuper)))
1693		goto out;
1694
1695	logsuper = (struct logsuper *) bpsuper->l_ldata;
1696	logsuper->state = cpu_to_le32(LOGREDONE);
1697	logsuper->end = cpu_to_le32(lsn);
1698	lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1699	rc = lbmIOWait(bpsuper, lbmFREE);
1700
1701	jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d",
1702		 lsn, log->page, log->eor);
1703
1704      out:
1705	/*
1706	 * shutdown per log i/o
1707	 */
1708	lbmLogShutdown(log);
1709
1710	if (rc) {
1711		jfs_warn("lmLogShutdown: exit(%d)", rc);
1712	}
1713	return rc;
1714}
1715
1716
1717/*
1718 * NAME:	lmLogFileSystem()
1719 *
1720 * FUNCTION:	insert (<activate> = true)/remove (<activate> = false)
1721 *	file system into/from log active file system list.
1722 *
1723 * PARAMETE:	log	- pointer to logs inode.
1724 *		fsdev	- kdev_t of filesystem.
1725 *		serial	- pointer to returned log serial number
1726 *		activate - insert/remove device from active list.
1727 *
1728 * RETURN:	0	- success
1729 *		errors returned by vms_iowait().
1730 */
1731static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
1732			   int activate)
1733{
1734	int rc = 0;
1735	int i;
1736	struct logsuper *logsuper;
1737	struct lbuf *bpsuper;
1738	char *uuid = sbi->uuid;
1739
1740	/*
1741	 * insert/remove file system device to log active file system list.
1742	 */
1743	if ((rc = lbmRead(log, 1, &bpsuper)))
1744		return rc;
1745
1746	logsuper = (struct logsuper *) bpsuper->l_ldata;
1747	if (activate) {
1748		for (i = 0; i < MAX_ACTIVE; i++)
1749			if (!memcmp(logsuper->active[i].uuid, NULL_UUID, 16)) {
1750				memcpy(logsuper->active[i].uuid, uuid, 16);
1751				sbi->aggregate = i;
1752				break;
1753			}
1754		if (i == MAX_ACTIVE) {
1755			jfs_warn("Too many file systems sharing journal!");
1756			lbmFree(bpsuper);
1757			return -EMFILE;	/* Is there a better rc? */
1758		}
1759	} else {
1760		for (i = 0; i < MAX_ACTIVE; i++)
1761			if (!memcmp(logsuper->active[i].uuid, uuid, 16)) {
1762				memcpy(logsuper->active[i].uuid, NULL_UUID, 16);
 
1763				break;
1764			}
1765		if (i == MAX_ACTIVE) {
1766			jfs_warn("Somebody stomped on the journal!");
1767			lbmFree(bpsuper);
1768			return -EIO;
1769		}
1770
1771	}
1772
1773	/*
1774	 * synchronous write log superblock:
1775	 *
1776	 * write sidestream bypassing write queue:
1777	 * at file system mount, log super block is updated for
1778	 * activation of the file system before any log record
1779	 * (MOUNT record) of the file system, and at file system
1780	 * unmount, all meta data for the file system has been
1781	 * flushed before log super block is updated for deactivation
1782	 * of the file system.
1783	 */
1784	lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1785	rc = lbmIOWait(bpsuper, lbmFREE);
1786
1787	return rc;
1788}
1789
1790/*
1791 *		log buffer manager (lbm)
1792 *		------------------------
1793 *
1794 * special purpose buffer manager supporting log i/o requirements.
1795 *
1796 * per log write queue:
1797 * log pageout occurs in serial order by fifo write queue and
1798 * restricting to a single i/o in pregress at any one time.
1799 * a circular singly-linked list
1800 * (log->wrqueue points to the tail, and buffers are linked via
1801 * bp->wrqueue field), and
1802 * maintains log page in pageout ot waiting for pageout in serial pageout.
1803 */
1804
1805/*
1806 *	lbmLogInit()
1807 *
1808 * initialize per log I/O setup at lmLogInit()
1809 */
1810static int lbmLogInit(struct jfs_log * log)
1811{				/* log inode */
1812	int i;
1813	struct lbuf *lbuf;
1814
1815	jfs_info("lbmLogInit: log:0x%p", log);
1816
1817	/* initialize current buffer cursor */
1818	log->bp = NULL;
1819
1820	/* initialize log device write queue */
1821	log->wqueue = NULL;
1822
1823	/*
1824	 * Each log has its own buffer pages allocated to it.  These are
1825	 * not managed by the page cache.  This ensures that a transaction
1826	 * writing to the log does not block trying to allocate a page from
1827	 * the page cache (for the log).  This would be bad, since page
1828	 * allocation waits on the kswapd thread that may be committing inodes
1829	 * which would cause log activity.  Was that clear?  I'm trying to
1830	 * avoid deadlock here.
1831	 */
1832	init_waitqueue_head(&log->free_wait);
1833
1834	log->lbuf_free = NULL;
1835
1836	for (i = 0; i < LOGPAGES;) {
1837		char *buffer;
1838		uint offset;
1839		struct page *page;
1840
1841		buffer = (char *) get_zeroed_page(GFP_KERNEL);
1842		if (buffer == NULL)
1843			goto error;
1844		page = virt_to_page(buffer);
1845		for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) {
1846			lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL);
1847			if (lbuf == NULL) {
1848				if (offset == 0)
1849					free_page((unsigned long) buffer);
1850				goto error;
1851			}
1852			if (offset) /* we already have one reference */
1853				get_page(page);
1854			lbuf->l_offset = offset;
1855			lbuf->l_ldata = buffer + offset;
1856			lbuf->l_page = page;
1857			lbuf->l_log = log;
1858			init_waitqueue_head(&lbuf->l_ioevent);
1859
1860			lbuf->l_freelist = log->lbuf_free;
1861			log->lbuf_free = lbuf;
1862			i++;
1863		}
1864	}
1865
1866	return (0);
1867
1868      error:
1869	lbmLogShutdown(log);
1870	return -ENOMEM;
1871}
1872
1873
1874/*
1875 *	lbmLogShutdown()
1876 *
1877 * finalize per log I/O setup at lmLogShutdown()
1878 */
1879static void lbmLogShutdown(struct jfs_log * log)
1880{
1881	struct lbuf *lbuf;
1882
1883	jfs_info("lbmLogShutdown: log:0x%p", log);
1884
1885	lbuf = log->lbuf_free;
1886	while (lbuf) {
1887		struct lbuf *next = lbuf->l_freelist;
1888		__free_page(lbuf->l_page);
1889		kfree(lbuf);
1890		lbuf = next;
1891	}
1892}
1893
1894
1895/*
1896 *	lbmAllocate()
1897 *
1898 * allocate an empty log buffer
1899 */
1900static struct lbuf *lbmAllocate(struct jfs_log * log, int pn)
1901{
1902	struct lbuf *bp;
1903	unsigned long flags;
1904
1905	/*
1906	 * recycle from log buffer freelist if any
1907	 */
1908	LCACHE_LOCK(flags);
1909	LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags);
1910	log->lbuf_free = bp->l_freelist;
1911	LCACHE_UNLOCK(flags);
1912
1913	bp->l_flag = 0;
1914
1915	bp->l_wqnext = NULL;
1916	bp->l_freelist = NULL;
1917
1918	bp->l_pn = pn;
1919	bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize));
1920	bp->l_ceor = 0;
1921
1922	return bp;
1923}
1924
1925
1926/*
1927 *	lbmFree()
1928 *
1929 * release a log buffer to freelist
1930 */
1931static void lbmFree(struct lbuf * bp)
1932{
1933	unsigned long flags;
1934
1935	LCACHE_LOCK(flags);
1936
1937	lbmfree(bp);
1938
1939	LCACHE_UNLOCK(flags);
1940}
1941
1942static void lbmfree(struct lbuf * bp)
1943{
1944	struct jfs_log *log = bp->l_log;
1945
1946	assert(bp->l_wqnext == NULL);
1947
1948	/*
1949	 * return the buffer to head of freelist
1950	 */
1951	bp->l_freelist = log->lbuf_free;
1952	log->lbuf_free = bp;
1953
1954	wake_up(&log->free_wait);
1955	return;
1956}
1957
1958
1959/*
1960 * NAME:	lbmRedrive
1961 *
1962 * FUNCTION:	add a log buffer to the log redrive list
1963 *
1964 * PARAMETER:
1965 *	bp	- log buffer
1966 *
1967 * NOTES:
1968 *	Takes log_redrive_lock.
1969 */
1970static inline void lbmRedrive(struct lbuf *bp)
1971{
1972	unsigned long flags;
1973
1974	spin_lock_irqsave(&log_redrive_lock, flags);
1975	bp->l_redrive_next = log_redrive_list;
1976	log_redrive_list = bp;
1977	spin_unlock_irqrestore(&log_redrive_lock, flags);
1978
1979	wake_up_process(jfsIOthread);
1980}
1981
1982
1983/*
1984 *	lbmRead()
1985 */
1986static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
1987{
1988	struct bio *bio;
1989	struct lbuf *bp;
1990
1991	/*
1992	 * allocate a log buffer
1993	 */
1994	*bpp = bp = lbmAllocate(log, pn);
1995	jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn);
1996
1997	bp->l_flag |= lbmREAD;
1998
1999	bio = bio_alloc(GFP_NOFS, 1);
2000
2001	bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
2002	bio->bi_bdev = log->bdev;
2003	bio->bi_io_vec[0].bv_page = bp->l_page;
2004	bio->bi_io_vec[0].bv_len = LOGPSIZE;
2005	bio->bi_io_vec[0].bv_offset = bp->l_offset;
2006
2007	bio->bi_vcnt = 1;
2008	bio->bi_iter.bi_size = LOGPSIZE;
2009
2010	bio->bi_end_io = lbmIODone;
2011	bio->bi_private = bp;
 
2012	/*check if journaling to disk has been disabled*/
2013	if (log->no_integrity) {
2014		bio->bi_iter.bi_size = 0;
2015		lbmIODone(bio, 0);
2016	} else {
2017		submit_bio(READ_SYNC, bio);
2018	}
2019
2020	wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
2021
2022	return 0;
2023}
2024
2025
2026/*
2027 *	lbmWrite()
2028 *
2029 * buffer at head of pageout queue stays after completion of
2030 * partial-page pageout and redriven by explicit initiation of
2031 * pageout by caller until full-page pageout is completed and
2032 * released.
2033 *
2034 * device driver i/o done redrives pageout of new buffer at
2035 * head of pageout queue when current buffer at head of pageout
2036 * queue is released at the completion of its full-page pageout.
2037 *
2038 * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit().
2039 * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone()
2040 */
2041static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
2042		     int cant_block)
2043{
2044	struct lbuf *tail;
2045	unsigned long flags;
2046
2047	jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn);
2048
2049	/* map the logical block address to physical block address */
2050	bp->l_blkno =
2051	    log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2052
2053	LCACHE_LOCK(flags);		/* disable+lock */
2054
2055	/*
2056	 * initialize buffer for device driver
2057	 */
2058	bp->l_flag = flag;
2059
2060	/*
2061	 *	insert bp at tail of write queue associated with log
2062	 *
2063	 * (request is either for bp already/currently at head of queue
2064	 * or new bp to be inserted at tail)
2065	 */
2066	tail = log->wqueue;
2067
2068	/* is buffer not already on write queue ? */
2069	if (bp->l_wqnext == NULL) {
2070		/* insert at tail of wqueue */
2071		if (tail == NULL) {
2072			log->wqueue = bp;
2073			bp->l_wqnext = bp;
2074		} else {
2075			log->wqueue = bp;
2076			bp->l_wqnext = tail->l_wqnext;
2077			tail->l_wqnext = bp;
2078		}
2079
2080		tail = bp;
2081	}
2082
2083	/* is buffer at head of wqueue and for write ? */
2084	if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) {
2085		LCACHE_UNLOCK(flags);	/* unlock+enable */
2086		return;
2087	}
2088
2089	LCACHE_UNLOCK(flags);	/* unlock+enable */
2090
2091	if (cant_block)
2092		lbmRedrive(bp);
2093	else if (flag & lbmSYNC)
2094		lbmStartIO(bp);
2095	else {
2096		LOGGC_UNLOCK(log);
2097		lbmStartIO(bp);
2098		LOGGC_LOCK(log);
2099	}
2100}
2101
2102
2103/*
2104 *	lbmDirectWrite()
2105 *
2106 * initiate pageout bypassing write queue for sidestream
2107 * (e.g., log superblock) write;
2108 */
2109static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
2110{
2111	jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x",
2112		 bp, flag, bp->l_pn);
2113
2114	/*
2115	 * initialize buffer for device driver
2116	 */
2117	bp->l_flag = flag | lbmDIRECT;
2118
2119	/* map the logical block address to physical block address */
2120	bp->l_blkno =
2121	    log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2122
2123	/*
2124	 *	initiate pageout of the page
2125	 */
2126	lbmStartIO(bp);
2127}
2128
2129
2130/*
2131 * NAME:	lbmStartIO()
2132 *
2133 * FUNCTION:	Interface to DD strategy routine
2134 *
2135 * RETURN:	none
2136 *
2137 * serialization: LCACHE_LOCK() is NOT held during log i/o;
2138 */
2139static void lbmStartIO(struct lbuf * bp)
2140{
2141	struct bio *bio;
2142	struct jfs_log *log = bp->l_log;
2143
2144	jfs_info("lbmStartIO\n");
2145
2146	bio = bio_alloc(GFP_NOFS, 1);
2147	bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
2148	bio->bi_bdev = log->bdev;
2149	bio->bi_io_vec[0].bv_page = bp->l_page;
2150	bio->bi_io_vec[0].bv_len = LOGPSIZE;
2151	bio->bi_io_vec[0].bv_offset = bp->l_offset;
2152
2153	bio->bi_vcnt = 1;
2154	bio->bi_iter.bi_size = LOGPSIZE;
2155
2156	bio->bi_end_io = lbmIODone;
2157	bio->bi_private = bp;
 
2158
2159	/* check if journaling to disk has been disabled */
2160	if (log->no_integrity) {
2161		bio->bi_iter.bi_size = 0;
2162		lbmIODone(bio, 0);
2163	} else {
2164		submit_bio(WRITE_SYNC, bio);
2165		INCREMENT(lmStat.submitted);
2166	}
2167}
2168
2169
2170/*
2171 *	lbmIOWait()
2172 */
2173static int lbmIOWait(struct lbuf * bp, int flag)
2174{
2175	unsigned long flags;
2176	int rc = 0;
2177
2178	jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2179
2180	LCACHE_LOCK(flags);		/* disable+lock */
2181
2182	LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags);
2183
2184	rc = (bp->l_flag & lbmERROR) ? -EIO : 0;
2185
2186	if (flag & lbmFREE)
2187		lbmfree(bp);
2188
2189	LCACHE_UNLOCK(flags);	/* unlock+enable */
2190
2191	jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2192	return rc;
2193}
2194
2195/*
2196 *	lbmIODone()
2197 *
2198 * executed at INTIODONE level
2199 */
2200static void lbmIODone(struct bio *bio, int error)
2201{
2202	struct lbuf *bp = bio->bi_private;
2203	struct lbuf *nextbp, *tail;
2204	struct jfs_log *log;
2205	unsigned long flags;
2206
2207	/*
2208	 * get back jfs buffer bound to the i/o buffer
2209	 */
2210	jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag);
2211
2212	LCACHE_LOCK(flags);		/* disable+lock */
2213
2214	bp->l_flag |= lbmDONE;
2215
2216	if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
2217		bp->l_flag |= lbmERROR;
2218
2219		jfs_err("lbmIODone: I/O error in JFS log");
2220	}
2221
2222	bio_put(bio);
2223
2224	/*
2225	 *	pagein completion
2226	 */
2227	if (bp->l_flag & lbmREAD) {
2228		bp->l_flag &= ~lbmREAD;
2229
2230		LCACHE_UNLOCK(flags);	/* unlock+enable */
2231
2232		/* wakeup I/O initiator */
2233		LCACHE_WAKEUP(&bp->l_ioevent);
2234
2235		return;
2236	}
2237
2238	/*
2239	 *	pageout completion
2240	 *
2241	 * the bp at the head of write queue has completed pageout.
2242	 *
2243	 * if single-commit/full-page pageout, remove the current buffer
2244	 * from head of pageout queue, and redrive pageout with
2245	 * the new buffer at head of pageout queue;
2246	 * otherwise, the partial-page pageout buffer stays at
2247	 * the head of pageout queue to be redriven for pageout
2248	 * by lmGroupCommit() until full-page pageout is completed.
2249	 */
2250	bp->l_flag &= ~lbmWRITE;
2251	INCREMENT(lmStat.pagedone);
2252
2253	/* update committed lsn */
2254	log = bp->l_log;
2255	log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor;
2256
2257	if (bp->l_flag & lbmDIRECT) {
2258		LCACHE_WAKEUP(&bp->l_ioevent);
2259		LCACHE_UNLOCK(flags);
2260		return;
2261	}
2262
2263	tail = log->wqueue;
2264
2265	/* single element queue */
2266	if (bp == tail) {
2267		/* remove head buffer of full-page pageout
2268		 * from log device write queue
2269		 */
2270		if (bp->l_flag & lbmRELEASE) {
2271			log->wqueue = NULL;
2272			bp->l_wqnext = NULL;
2273		}
2274	}
2275	/* multi element queue */
2276	else {
2277		/* remove head buffer of full-page pageout
2278		 * from log device write queue
2279		 */
2280		if (bp->l_flag & lbmRELEASE) {
2281			nextbp = tail->l_wqnext = bp->l_wqnext;
2282			bp->l_wqnext = NULL;
2283
2284			/*
2285			 * redrive pageout of next page at head of write queue:
2286			 * redrive next page without any bound tblk
2287			 * (i.e., page w/o any COMMIT records), or
2288			 * first page of new group commit which has been
2289			 * queued after current page (subsequent pageout
2290			 * is performed synchronously, except page without
2291			 * any COMMITs) by lmGroupCommit() as indicated
2292			 * by lbmWRITE flag;
2293			 */
2294			if (nextbp->l_flag & lbmWRITE) {
2295				/*
2296				 * We can't do the I/O at interrupt time.
2297				 * The jfsIO thread can do it
2298				 */
2299				lbmRedrive(nextbp);
2300			}
2301		}
2302	}
2303
2304	/*
2305	 *	synchronous pageout:
2306	 *
2307	 * buffer has not necessarily been removed from write queue
2308	 * (e.g., synchronous write of partial-page with COMMIT):
2309	 * leave buffer for i/o initiator to dispose
2310	 */
2311	if (bp->l_flag & lbmSYNC) {
2312		LCACHE_UNLOCK(flags);	/* unlock+enable */
2313
2314		/* wakeup I/O initiator */
2315		LCACHE_WAKEUP(&bp->l_ioevent);
2316	}
2317
2318	/*
2319	 *	Group Commit pageout:
2320	 */
2321	else if (bp->l_flag & lbmGC) {
2322		LCACHE_UNLOCK(flags);
2323		lmPostGC(bp);
2324	}
2325
2326	/*
2327	 *	asynchronous pageout:
2328	 *
2329	 * buffer must have been removed from write queue:
2330	 * insert buffer at head of freelist where it can be recycled
2331	 */
2332	else {
2333		assert(bp->l_flag & lbmRELEASE);
2334		assert(bp->l_flag & lbmFREE);
2335		lbmfree(bp);
2336
2337		LCACHE_UNLOCK(flags);	/* unlock+enable */
2338	}
2339}
2340
2341int jfsIOWait(void *arg)
2342{
2343	struct lbuf *bp;
2344
2345	do {
2346		spin_lock_irq(&log_redrive_lock);
2347		while ((bp = log_redrive_list)) {
2348			log_redrive_list = bp->l_redrive_next;
2349			bp->l_redrive_next = NULL;
2350			spin_unlock_irq(&log_redrive_lock);
2351			lbmStartIO(bp);
2352			spin_lock_irq(&log_redrive_lock);
2353		}
2354
2355		if (freezing(current)) {
2356			spin_unlock_irq(&log_redrive_lock);
2357			try_to_freeze();
2358		} else {
2359			set_current_state(TASK_INTERRUPTIBLE);
2360			spin_unlock_irq(&log_redrive_lock);
2361			schedule();
2362			__set_current_state(TASK_RUNNING);
2363		}
2364	} while (!kthread_should_stop());
2365
2366	jfs_info("jfsIOWait being killed!");
2367	return 0;
2368}
2369
2370/*
2371 * NAME:	lmLogFormat()/jfs_logform()
2372 *
2373 * FUNCTION:	format file system log
2374 *
2375 * PARAMETERS:
2376 *	log	- volume log
2377 *	logAddress - start address of log space in FS block
2378 *	logSize	- length of log space in FS block;
2379 *
2380 * RETURN:	0	- success
2381 *		-EIO	- i/o error
2382 *
2383 * XXX: We're synchronously writing one page at a time.  This needs to
2384 *	be improved by writing multiple pages at once.
2385 */
2386int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
2387{
2388	int rc = -EIO;
2389	struct jfs_sb_info *sbi;
2390	struct logsuper *logsuper;
2391	struct logpage *lp;
2392	int lspn;		/* log sequence page number */
2393	struct lrd *lrd_ptr;
2394	int npages = 0;
2395	struct lbuf *bp;
2396
2397	jfs_info("lmLogFormat: logAddress:%Ld logSize:%d",
2398		 (long long)logAddress, logSize);
2399
2400	sbi = list_entry(log->sb_list.next, struct jfs_sb_info, log_list);
2401
2402	/* allocate a log buffer */
2403	bp = lbmAllocate(log, 1);
2404
2405	npages = logSize >> sbi->l2nbperpage;
2406
2407	/*
2408	 *	log space:
2409	 *
2410	 * page 0 - reserved;
2411	 * page 1 - log superblock;
2412	 * page 2 - log data page: A SYNC log record is written
2413	 *	    into this page at logform time;
2414	 * pages 3-N - log data page: set to empty log data pages;
2415	 */
2416	/*
2417	 *	init log superblock: log page 1
2418	 */
2419	logsuper = (struct logsuper *) bp->l_ldata;
2420
2421	logsuper->magic = cpu_to_le32(LOGMAGIC);
2422	logsuper->version = cpu_to_le32(LOGVERSION);
2423	logsuper->state = cpu_to_le32(LOGREDONE);
2424	logsuper->flag = cpu_to_le32(sbi->mntflag);	/* ? */
2425	logsuper->size = cpu_to_le32(npages);
2426	logsuper->bsize = cpu_to_le32(sbi->bsize);
2427	logsuper->l2bsize = cpu_to_le32(sbi->l2bsize);
2428	logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE);
2429
2430	bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2431	bp->l_blkno = logAddress + sbi->nbperpage;
2432	lbmStartIO(bp);
2433	if ((rc = lbmIOWait(bp, 0)))
2434		goto exit;
2435
2436	/*
2437	 *	init pages 2 to npages-1 as log data pages:
2438	 *
2439	 * log page sequence number (lpsn) initialization:
2440	 *
2441	 * pn:   0     1     2     3                 n-1
2442	 *       +-----+-----+=====+=====+===.....===+=====+
2443	 * lspn:             N-1   0     1           N-2
2444	 *                   <--- N page circular file ---->
2445	 *
2446	 * the N (= npages-2) data pages of the log is maintained as
2447	 * a circular file for the log records;
2448	 * lpsn grows by 1 monotonically as each log page is written
2449	 * to the circular file of the log;
2450	 * and setLogpage() will not reset the page number even if
2451	 * the eor is equal to LOGPHDRSIZE. In order for binary search
2452	 * still work in find log end process, we have to simulate the
2453	 * log wrap situation at the log format time.
2454	 * The 1st log page written will have the highest lpsn. Then
2455	 * the succeeding log pages will have ascending order of
2456	 * the lspn starting from 0, ... (N-2)
2457	 */
2458	lp = (struct logpage *) bp->l_ldata;
2459	/*
2460	 * initialize 1st log page to be written: lpsn = N - 1,
2461	 * write a SYNCPT log record is written to this page
2462	 */
2463	lp->h.page = lp->t.page = cpu_to_le32(npages - 3);
2464	lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE);
2465
2466	lrd_ptr = (struct lrd *) &lp->data;
2467	lrd_ptr->logtid = 0;
2468	lrd_ptr->backchain = 0;
2469	lrd_ptr->type = cpu_to_le16(LOG_SYNCPT);
2470	lrd_ptr->length = 0;
2471	lrd_ptr->log.syncpt.sync = 0;
2472
2473	bp->l_blkno += sbi->nbperpage;
2474	bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2475	lbmStartIO(bp);
2476	if ((rc = lbmIOWait(bp, 0)))
2477		goto exit;
2478
2479	/*
2480	 *	initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
2481	 */
2482	for (lspn = 0; lspn < npages - 3; lspn++) {
2483		lp->h.page = lp->t.page = cpu_to_le32(lspn);
2484		lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
2485
2486		bp->l_blkno += sbi->nbperpage;
2487		bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2488		lbmStartIO(bp);
2489		if ((rc = lbmIOWait(bp, 0)))
2490			goto exit;
2491	}
2492
2493	rc = 0;
2494exit:
2495	/*
2496	 *	finalize log
2497	 */
2498	/* release the buffer */
2499	lbmFree(bp);
2500
2501	return rc;
2502}
2503
2504#ifdef CONFIG_JFS_STATISTICS
2505static int jfs_lmstats_proc_show(struct seq_file *m, void *v)
2506{
2507	seq_printf(m,
2508		       "JFS Logmgr stats\n"
2509		       "================\n"
2510		       "commits = %d\n"
2511		       "writes submitted = %d\n"
2512		       "writes completed = %d\n"
2513		       "full pages submitted = %d\n"
2514		       "partial pages submitted = %d\n",
2515		       lmStat.commit,
2516		       lmStat.submitted,
2517		       lmStat.pagedone,
2518		       lmStat.full_page,
2519		       lmStat.partial_page);
2520	return 0;
2521}
2522
2523static int jfs_lmstats_proc_open(struct inode *inode, struct file *file)
2524{
2525	return single_open(file, jfs_lmstats_proc_show, NULL);
2526}
2527
2528const struct file_operations jfs_lmstats_proc_fops = {
2529	.owner		= THIS_MODULE,
2530	.open		= jfs_lmstats_proc_open,
2531	.read		= seq_read,
2532	.llseek		= seq_lseek,
2533	.release	= single_release,
2534};
2535#endif /* CONFIG_JFS_STATISTICS */
v5.14.15
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *   Copyright (C) International Business Machines Corp., 2000-2004
   4 *   Portions Copyright (C) Christoph Hellwig, 2001-2002
 
 
 
 
 
 
 
 
 
 
 
 
 
 
   5 */
   6
   7/*
   8 *	jfs_logmgr.c: log manager
   9 *
  10 * for related information, see transaction manager (jfs_txnmgr.c), and
  11 * recovery manager (jfs_logredo.c).
  12 *
  13 * note: for detail, RTFS.
  14 *
  15 *	log buffer manager:
  16 * special purpose buffer manager supporting log i/o requirements.
  17 * per log serial pageout of logpage
  18 * queuing i/o requests and redrive i/o at iodone
  19 * maintain current logpage buffer
  20 * no caching since append only
  21 * appropriate jfs buffer cache buffers as needed
  22 *
  23 *	group commit:
  24 * transactions which wrote COMMIT records in the same in-memory
  25 * log page during the pageout of previous/current log page(s) are
  26 * committed together by the pageout of the page.
  27 *
  28 *	TBD lazy commit:
  29 * transactions are committed asynchronously when the log page
  30 * containing it COMMIT is paged out when it becomes full;
  31 *
  32 *	serialization:
  33 * . a per log lock serialize log write.
  34 * . a per log lock serialize group commit.
  35 * . a per log lock serialize log open/close;
  36 *
  37 *	TBD log integrity:
  38 * careful-write (ping-pong) of last logpage to recover from crash
  39 * in overwrite.
  40 * detection of split (out-of-order) write of physical sectors
  41 * of last logpage via timestamp at end of each sector
  42 * with its mirror data array at trailer).
  43 *
  44 *	alternatives:
  45 * lsn - 64-bit monotonically increasing integer vs
  46 * 32-bit lspn and page eor.
  47 */
  48
  49#include <linux/fs.h>
  50#include <linux/blkdev.h>
  51#include <linux/interrupt.h>
  52#include <linux/completion.h>
  53#include <linux/kthread.h>
  54#include <linux/buffer_head.h>		/* for sync_blockdev() */
  55#include <linux/bio.h>
  56#include <linux/freezer.h>
  57#include <linux/export.h>
  58#include <linux/delay.h>
  59#include <linux/mutex.h>
  60#include <linux/seq_file.h>
  61#include <linux/slab.h>
  62#include "jfs_incore.h"
  63#include "jfs_filsys.h"
  64#include "jfs_metapage.h"
  65#include "jfs_superblock.h"
  66#include "jfs_txnmgr.h"
  67#include "jfs_debug.h"
  68
  69
  70/*
  71 * lbuf's ready to be redriven.  Protected by log_redrive_lock (jfsIO thread)
  72 */
  73static struct lbuf *log_redrive_list;
  74static DEFINE_SPINLOCK(log_redrive_lock);
  75
  76
  77/*
  78 *	log read/write serialization (per log)
  79 */
  80#define LOG_LOCK_INIT(log)	mutex_init(&(log)->loglock)
  81#define LOG_LOCK(log)		mutex_lock(&((log)->loglock))
  82#define LOG_UNLOCK(log)		mutex_unlock(&((log)->loglock))
  83
  84
  85/*
  86 *	log group commit serialization (per log)
  87 */
  88
  89#define LOGGC_LOCK_INIT(log)	spin_lock_init(&(log)->gclock)
  90#define LOGGC_LOCK(log)		spin_lock_irq(&(log)->gclock)
  91#define LOGGC_UNLOCK(log)	spin_unlock_irq(&(log)->gclock)
  92#define LOGGC_WAKEUP(tblk)	wake_up_all(&(tblk)->gcwait)
  93
  94/*
  95 *	log sync serialization (per log)
  96 */
  97#define	LOGSYNC_DELTA(logsize)		min((logsize)/8, 128*LOGPSIZE)
  98#define	LOGSYNC_BARRIER(logsize)	((logsize)/4)
  99/*
 100#define	LOGSYNC_DELTA(logsize)		min((logsize)/4, 256*LOGPSIZE)
 101#define	LOGSYNC_BARRIER(logsize)	((logsize)/2)
 102*/
 103
 104
 105/*
 106 *	log buffer cache synchronization
 107 */
 108static DEFINE_SPINLOCK(jfsLCacheLock);
 109
 110#define	LCACHE_LOCK(flags)	spin_lock_irqsave(&jfsLCacheLock, flags)
 111#define	LCACHE_UNLOCK(flags)	spin_unlock_irqrestore(&jfsLCacheLock, flags)
 112
 113/*
 114 * See __SLEEP_COND in jfs_locks.h
 115 */
 116#define LCACHE_SLEEP_COND(wq, cond, flags)	\
 117do {						\
 118	if (cond)				\
 119		break;				\
 120	__SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \
 121} while (0)
 122
 123#define	LCACHE_WAKEUP(event)	wake_up(event)
 124
 125
 126/*
 127 *	lbuf buffer cache (lCache) control
 128 */
 129/* log buffer manager pageout control (cumulative, inclusive) */
 130#define	lbmREAD		0x0001
 131#define	lbmWRITE	0x0002	/* enqueue at tail of write queue;
 132				 * init pageout if at head of queue;
 133				 */
 134#define	lbmRELEASE	0x0004	/* remove from write queue
 135				 * at completion of pageout;
 136				 * do not free/recycle it yet:
 137				 * caller will free it;
 138				 */
 139#define	lbmSYNC		0x0008	/* do not return to freelist
 140				 * when removed from write queue;
 141				 */
 142#define lbmFREE		0x0010	/* return to freelist
 143				 * at completion of pageout;
 144				 * the buffer may be recycled;
 145				 */
 146#define	lbmDONE		0x0020
 147#define	lbmERROR	0x0040
 148#define lbmGC		0x0080	/* lbmIODone to perform post-GC processing
 149				 * of log page
 150				 */
 151#define lbmDIRECT	0x0100
 152
 153/*
 154 * Global list of active external journals
 155 */
 156static LIST_HEAD(jfs_external_logs);
 157static struct jfs_log *dummy_log;
 158static DEFINE_MUTEX(jfs_log_mutex);
 159
 160/*
 161 * forward references
 162 */
 163static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk,
 164			 struct lrd * lrd, struct tlock * tlck);
 165
 166static int lmNextPage(struct jfs_log * log);
 167static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
 168			   int activate);
 169
 170static int open_inline_log(struct super_block *sb);
 171static int open_dummy_log(struct super_block *sb);
 172static int lbmLogInit(struct jfs_log * log);
 173static void lbmLogShutdown(struct jfs_log * log);
 174static struct lbuf *lbmAllocate(struct jfs_log * log, int);
 175static void lbmFree(struct lbuf * bp);
 176static void lbmfree(struct lbuf * bp);
 177static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp);
 178static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block);
 179static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag);
 180static int lbmIOWait(struct lbuf * bp, int flag);
 181static bio_end_io_t lbmIODone;
 182static void lbmStartIO(struct lbuf * bp);
 183static void lmGCwrite(struct jfs_log * log, int cant_block);
 184static int lmLogSync(struct jfs_log * log, int hard_sync);
 185
 186
 187
 188/*
 189 *	statistics
 190 */
 191#ifdef CONFIG_JFS_STATISTICS
 192static struct lmStat {
 193	uint commit;		/* # of commit */
 194	uint pagedone;		/* # of page written */
 195	uint submitted;		/* # of pages submitted */
 196	uint full_page;		/* # of full pages submitted */
 197	uint partial_page;	/* # of partial pages submitted */
 198} lmStat;
 199#endif
 200
 201static void write_special_inodes(struct jfs_log *log,
 202				 int (*writer)(struct address_space *))
 203{
 204	struct jfs_sb_info *sbi;
 205
 206	list_for_each_entry(sbi, &log->sb_list, log_list) {
 207		writer(sbi->ipbmap->i_mapping);
 208		writer(sbi->ipimap->i_mapping);
 209		writer(sbi->direct_inode->i_mapping);
 210	}
 211}
 212
 213/*
 214 * NAME:	lmLog()
 215 *
 216 * FUNCTION:	write a log record;
 217 *
 218 * PARAMETER:
 219 *
 220 * RETURN:	lsn - offset to the next log record to write (end-of-log);
 221 *		-1  - error;
 222 *
 223 * note: todo: log error handler
 224 */
 225int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 226	  struct tlock * tlck)
 227{
 228	int lsn;
 229	int diffp, difft;
 230	struct metapage *mp = NULL;
 231	unsigned long flags;
 232
 233	jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p",
 234		 log, tblk, lrd, tlck);
 235
 236	LOG_LOCK(log);
 237
 238	/* log by (out-of-transaction) JFS ? */
 239	if (tblk == NULL)
 240		goto writeRecord;
 241
 242	/* log from page ? */
 243	if (tlck == NULL ||
 244	    tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL)
 245		goto writeRecord;
 246
 247	/*
 248	 *	initialize/update page/transaction recovery lsn
 249	 */
 250	lsn = log->lsn;
 251
 252	LOGSYNC_LOCK(log, flags);
 253
 254	/*
 255	 * initialize page lsn if first log write of the page
 256	 */
 257	if (mp->lsn == 0) {
 258		mp->log = log;
 259		mp->lsn = lsn;
 260		log->count++;
 261
 262		/* insert page at tail of logsynclist */
 263		list_add_tail(&mp->synclist, &log->synclist);
 264	}
 265
 266	/*
 267	 *	initialize/update lsn of tblock of the page
 268	 *
 269	 * transaction inherits oldest lsn of pages associated
 270	 * with allocation/deallocation of resources (their
 271	 * log records are used to reconstruct allocation map
 272	 * at recovery time: inode for inode allocation map,
 273	 * B+-tree index of extent descriptors for block
 274	 * allocation map);
 275	 * allocation map pages inherit transaction lsn at
 276	 * commit time to allow forwarding log syncpt past log
 277	 * records associated with allocation/deallocation of
 278	 * resources only after persistent map of these map pages
 279	 * have been updated and propagated to home.
 280	 */
 281	/*
 282	 * initialize transaction lsn:
 283	 */
 284	if (tblk->lsn == 0) {
 285		/* inherit lsn of its first page logged */
 286		tblk->lsn = mp->lsn;
 287		log->count++;
 288
 289		/* insert tblock after the page on logsynclist */
 290		list_add(&tblk->synclist, &mp->synclist);
 291	}
 292	/*
 293	 * update transaction lsn:
 294	 */
 295	else {
 296		/* inherit oldest/smallest lsn of page */
 297		logdiff(diffp, mp->lsn, log);
 298		logdiff(difft, tblk->lsn, log);
 299		if (diffp < difft) {
 300			/* update tblock lsn with page lsn */
 301			tblk->lsn = mp->lsn;
 302
 303			/* move tblock after page on logsynclist */
 304			list_move(&tblk->synclist, &mp->synclist);
 305		}
 306	}
 307
 308	LOGSYNC_UNLOCK(log, flags);
 309
 310	/*
 311	 *	write the log record
 312	 */
 313      writeRecord:
 314	lsn = lmWriteRecord(log, tblk, lrd, tlck);
 315
 316	/*
 317	 * forward log syncpt if log reached next syncpt trigger
 318	 */
 319	logdiff(diffp, lsn, log);
 320	if (diffp >= log->nextsync)
 321		lsn = lmLogSync(log, 0);
 322
 323	/* update end-of-log lsn */
 324	log->lsn = lsn;
 325
 326	LOG_UNLOCK(log);
 327
 328	/* return end-of-log address */
 329	return lsn;
 330}
 331
 332/*
 333 * NAME:	lmWriteRecord()
 334 *
 335 * FUNCTION:	move the log record to current log page
 336 *
 337 * PARAMETER:	cd	- commit descriptor
 338 *
 339 * RETURN:	end-of-log address
 340 *
 341 * serialization: LOG_LOCK() held on entry/exit
 342 */
 343static int
 344lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 345	      struct tlock * tlck)
 346{
 347	int lsn = 0;		/* end-of-log address */
 348	struct lbuf *bp;	/* dst log page buffer */
 349	struct logpage *lp;	/* dst log page */
 350	caddr_t dst;		/* destination address in log page */
 351	int dstoffset;		/* end-of-log offset in log page */
 352	int freespace;		/* free space in log page */
 353	caddr_t p;		/* src meta-data page */
 354	caddr_t src;
 355	int srclen;
 356	int nbytes;		/* number of bytes to move */
 357	int i;
 358	int len;
 359	struct linelock *linelock;
 360	struct lv *lv;
 361	struct lvd *lvd;
 362	int l2linesize;
 363
 364	len = 0;
 365
 366	/* retrieve destination log page to write */
 367	bp = (struct lbuf *) log->bp;
 368	lp = (struct logpage *) bp->l_ldata;
 369	dstoffset = log->eor;
 370
 371	/* any log data to write ? */
 372	if (tlck == NULL)
 373		goto moveLrd;
 374
 375	/*
 376	 *	move log record data
 377	 */
 378	/* retrieve source meta-data page to log */
 379	if (tlck->flag & tlckPAGELOCK) {
 380		p = (caddr_t) (tlck->mp->data);
 381		linelock = (struct linelock *) & tlck->lock;
 382	}
 383	/* retrieve source in-memory inode to log */
 384	else if (tlck->flag & tlckINODELOCK) {
 385		if (tlck->type & tlckDTREE)
 386			p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot;
 387		else
 388			p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot;
 389		linelock = (struct linelock *) & tlck->lock;
 390	}
 391#ifdef	_JFS_WIP
 392	else if (tlck->flag & tlckINLINELOCK) {
 393
 394		inlinelock = (struct inlinelock *) & tlck;
 395		p = (caddr_t) & inlinelock->pxd;
 396		linelock = (struct linelock *) & tlck;
 397	}
 398#endif				/* _JFS_WIP */
 399	else {
 400		jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck);
 401		return 0;	/* Probably should trap */
 402	}
 403	l2linesize = linelock->l2linesize;
 404
 405      moveData:
 406	ASSERT(linelock->index <= linelock->maxcnt);
 407
 408	lv = linelock->lv;
 409	for (i = 0; i < linelock->index; i++, lv++) {
 410		if (lv->length == 0)
 411			continue;
 412
 413		/* is page full ? */
 414		if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) {
 415			/* page become full: move on to next page */
 416			lmNextPage(log);
 417
 418			bp = log->bp;
 419			lp = (struct logpage *) bp->l_ldata;
 420			dstoffset = LOGPHDRSIZE;
 421		}
 422
 423		/*
 424		 * move log vector data
 425		 */
 426		src = (u8 *) p + (lv->offset << l2linesize);
 427		srclen = lv->length << l2linesize;
 428		len += srclen;
 429		while (srclen > 0) {
 430			freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
 431			nbytes = min(freespace, srclen);
 432			dst = (caddr_t) lp + dstoffset;
 433			memcpy(dst, src, nbytes);
 434			dstoffset += nbytes;
 435
 436			/* is page not full ? */
 437			if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
 438				break;
 439
 440			/* page become full: move on to next page */
 441			lmNextPage(log);
 442
 443			bp = (struct lbuf *) log->bp;
 444			lp = (struct logpage *) bp->l_ldata;
 445			dstoffset = LOGPHDRSIZE;
 446
 447			srclen -= nbytes;
 448			src += nbytes;
 449		}
 450
 451		/*
 452		 * move log vector descriptor
 453		 */
 454		len += 4;
 455		lvd = (struct lvd *) ((caddr_t) lp + dstoffset);
 456		lvd->offset = cpu_to_le16(lv->offset);
 457		lvd->length = cpu_to_le16(lv->length);
 458		dstoffset += 4;
 459		jfs_info("lmWriteRecord: lv offset:%d length:%d",
 460			 lv->offset, lv->length);
 461	}
 462
 463	if ((i = linelock->next)) {
 464		linelock = (struct linelock *) lid_to_tlock(i);
 465		goto moveData;
 466	}
 467
 468	/*
 469	 *	move log record descriptor
 470	 */
 471      moveLrd:
 472	lrd->length = cpu_to_le16(len);
 473
 474	src = (caddr_t) lrd;
 475	srclen = LOGRDSIZE;
 476
 477	while (srclen > 0) {
 478		freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
 479		nbytes = min(freespace, srclen);
 480		dst = (caddr_t) lp + dstoffset;
 481		memcpy(dst, src, nbytes);
 482
 483		dstoffset += nbytes;
 484		srclen -= nbytes;
 485
 486		/* are there more to move than freespace of page ? */
 487		if (srclen)
 488			goto pageFull;
 489
 490		/*
 491		 * end of log record descriptor
 492		 */
 493
 494		/* update last log record eor */
 495		log->eor = dstoffset;
 496		bp->l_eor = dstoffset;
 497		lsn = (log->page << L2LOGPSIZE) + dstoffset;
 498
 499		if (lrd->type & cpu_to_le16(LOG_COMMIT)) {
 500			tblk->clsn = lsn;
 501			jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn,
 502				 bp->l_eor);
 503
 504			INCREMENT(lmStat.commit);	/* # of commit */
 505
 506			/*
 507			 * enqueue tblock for group commit:
 508			 *
 509			 * enqueue tblock of non-trivial/synchronous COMMIT
 510			 * at tail of group commit queue
 511			 * (trivial/asynchronous COMMITs are ignored by
 512			 * group commit.)
 513			 */
 514			LOGGC_LOCK(log);
 515
 516			/* init tblock gc state */
 517			tblk->flag = tblkGC_QUEUE;
 518			tblk->bp = log->bp;
 519			tblk->pn = log->page;
 520			tblk->eor = log->eor;
 521
 522			/* enqueue transaction to commit queue */
 523			list_add_tail(&tblk->cqueue, &log->cqueue);
 524
 525			LOGGC_UNLOCK(log);
 526		}
 527
 528		jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x",
 529			le16_to_cpu(lrd->type), log->bp, log->page, dstoffset);
 530
 531		/* page not full ? */
 532		if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
 533			return lsn;
 534
 535	      pageFull:
 536		/* page become full: move on to next page */
 537		lmNextPage(log);
 538
 539		bp = (struct lbuf *) log->bp;
 540		lp = (struct logpage *) bp->l_ldata;
 541		dstoffset = LOGPHDRSIZE;
 542		src += nbytes;
 543	}
 544
 545	return lsn;
 546}
 547
 548
 549/*
 550 * NAME:	lmNextPage()
 551 *
 552 * FUNCTION:	write current page and allocate next page.
 553 *
 554 * PARAMETER:	log
 555 *
 556 * RETURN:	0
 557 *
 558 * serialization: LOG_LOCK() held on entry/exit
 559 */
 560static int lmNextPage(struct jfs_log * log)
 561{
 562	struct logpage *lp;
 563	int lspn;		/* log sequence page number */
 564	int pn;			/* current page number */
 565	struct lbuf *bp;
 566	struct lbuf *nextbp;
 567	struct tblock *tblk;
 568
 569	/* get current log page number and log sequence page number */
 570	pn = log->page;
 571	bp = log->bp;
 572	lp = (struct logpage *) bp->l_ldata;
 573	lspn = le32_to_cpu(lp->h.page);
 574
 575	LOGGC_LOCK(log);
 576
 577	/*
 578	 *	write or queue the full page at the tail of write queue
 579	 */
 580	/* get the tail tblk on commit queue */
 581	if (list_empty(&log->cqueue))
 582		tblk = NULL;
 583	else
 584		tblk = list_entry(log->cqueue.prev, struct tblock, cqueue);
 585
 586	/* every tblk who has COMMIT record on the current page,
 587	 * and has not been committed, must be on commit queue
 588	 * since tblk is queued at commit queueu at the time
 589	 * of writing its COMMIT record on the page before
 590	 * page becomes full (even though the tblk thread
 591	 * who wrote COMMIT record may have been suspended
 592	 * currently);
 593	 */
 594
 595	/* is page bound with outstanding tail tblk ? */
 596	if (tblk && tblk->pn == pn) {
 597		/* mark tblk for end-of-page */
 598		tblk->flag |= tblkGC_EOP;
 599
 600		if (log->cflag & logGC_PAGEOUT) {
 601			/* if page is not already on write queue,
 602			 * just enqueue (no lbmWRITE to prevent redrive)
 603			 * buffer to wqueue to ensure correct serial order
 604			 * of the pages since log pages will be added
 605			 * continuously
 606			 */
 607			if (bp->l_wqnext == NULL)
 608				lbmWrite(log, bp, 0, 0);
 609		} else {
 610			/*
 611			 * No current GC leader, initiate group commit
 612			 */
 613			log->cflag |= logGC_PAGEOUT;
 614			lmGCwrite(log, 0);
 615		}
 616	}
 617	/* page is not bound with outstanding tblk:
 618	 * init write or mark it to be redriven (lbmWRITE)
 619	 */
 620	else {
 621		/* finalize the page */
 622		bp->l_ceor = bp->l_eor;
 623		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 624		lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0);
 625	}
 626	LOGGC_UNLOCK(log);
 627
 628	/*
 629	 *	allocate/initialize next page
 630	 */
 631	/* if log wraps, the first data page of log is 2
 632	 * (0 never used, 1 is superblock).
 633	 */
 634	log->page = (pn == log->size - 1) ? 2 : pn + 1;
 635	log->eor = LOGPHDRSIZE;	/* ? valid page empty/full at logRedo() */
 636
 637	/* allocate/initialize next log page buffer */
 638	nextbp = lbmAllocate(log, log->page);
 639	nextbp->l_eor = log->eor;
 640	log->bp = nextbp;
 641
 642	/* initialize next log page */
 643	lp = (struct logpage *) nextbp->l_ldata;
 644	lp->h.page = lp->t.page = cpu_to_le32(lspn + 1);
 645	lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
 646
 647	return 0;
 648}
 649
 650
 651/*
 652 * NAME:	lmGroupCommit()
 653 *
 654 * FUNCTION:	group commit
 655 *	initiate pageout of the pages with COMMIT in the order of
 656 *	page number - redrive pageout of the page at the head of
 657 *	pageout queue until full page has been written.
 658 *
 659 * RETURN:
 660 *
 661 * NOTE:
 662 *	LOGGC_LOCK serializes log group commit queue, and
 663 *	transaction blocks on the commit queue.
 664 *	N.B. LOG_LOCK is NOT held during lmGroupCommit().
 665 */
 666int lmGroupCommit(struct jfs_log * log, struct tblock * tblk)
 667{
 668	int rc = 0;
 669
 670	LOGGC_LOCK(log);
 671
 672	/* group committed already ? */
 673	if (tblk->flag & tblkGC_COMMITTED) {
 674		if (tblk->flag & tblkGC_ERROR)
 675			rc = -EIO;
 676
 677		LOGGC_UNLOCK(log);
 678		return rc;
 679	}
 680	jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc);
 681
 682	if (tblk->xflag & COMMIT_LAZY)
 683		tblk->flag |= tblkGC_LAZY;
 684
 685	if ((!(log->cflag & logGC_PAGEOUT)) && (!list_empty(&log->cqueue)) &&
 686	    (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag)
 687	     || jfs_tlocks_low)) {
 688		/*
 689		 * No pageout in progress
 690		 *
 691		 * start group commit as its group leader.
 692		 */
 693		log->cflag |= logGC_PAGEOUT;
 694
 695		lmGCwrite(log, 0);
 696	}
 697
 698	if (tblk->xflag & COMMIT_LAZY) {
 699		/*
 700		 * Lazy transactions can leave now
 701		 */
 702		LOGGC_UNLOCK(log);
 703		return 0;
 704	}
 705
 706	/* lmGCwrite gives up LOGGC_LOCK, check again */
 707
 708	if (tblk->flag & tblkGC_COMMITTED) {
 709		if (tblk->flag & tblkGC_ERROR)
 710			rc = -EIO;
 711
 712		LOGGC_UNLOCK(log);
 713		return rc;
 714	}
 715
 716	/* upcount transaction waiting for completion
 717	 */
 718	log->gcrtc++;
 719	tblk->flag |= tblkGC_READY;
 720
 721	__SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED),
 722		     LOGGC_LOCK(log), LOGGC_UNLOCK(log));
 723
 724	/* removed from commit queue */
 725	if (tblk->flag & tblkGC_ERROR)
 726		rc = -EIO;
 727
 728	LOGGC_UNLOCK(log);
 729	return rc;
 730}
 731
 732/*
 733 * NAME:	lmGCwrite()
 734 *
 735 * FUNCTION:	group commit write
 736 *	initiate write of log page, building a group of all transactions
 737 *	with commit records on that page.
 738 *
 739 * RETURN:	None
 740 *
 741 * NOTE:
 742 *	LOGGC_LOCK must be held by caller.
 743 *	N.B. LOG_LOCK is NOT held during lmGroupCommit().
 744 */
 745static void lmGCwrite(struct jfs_log * log, int cant_write)
 746{
 747	struct lbuf *bp;
 748	struct logpage *lp;
 749	int gcpn;		/* group commit page number */
 750	struct tblock *tblk;
 751	struct tblock *xtblk = NULL;
 752
 753	/*
 754	 * build the commit group of a log page
 755	 *
 756	 * scan commit queue and make a commit group of all
 757	 * transactions with COMMIT records on the same log page.
 758	 */
 759	/* get the head tblk on the commit queue */
 760	gcpn = list_entry(log->cqueue.next, struct tblock, cqueue)->pn;
 761
 762	list_for_each_entry(tblk, &log->cqueue, cqueue) {
 763		if (tblk->pn != gcpn)
 764			break;
 765
 766		xtblk = tblk;
 767
 768		/* state transition: (QUEUE, READY) -> COMMIT */
 769		tblk->flag |= tblkGC_COMMIT;
 770	}
 771	tblk = xtblk;		/* last tblk of the page */
 772
 773	/*
 774	 * pageout to commit transactions on the log page.
 775	 */
 776	bp = (struct lbuf *) tblk->bp;
 777	lp = (struct logpage *) bp->l_ldata;
 778	/* is page already full ? */
 779	if (tblk->flag & tblkGC_EOP) {
 780		/* mark page to free at end of group commit of the page */
 781		tblk->flag &= ~tblkGC_EOP;
 782		tblk->flag |= tblkGC_FREE;
 783		bp->l_ceor = bp->l_eor;
 784		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 785		lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC,
 786			 cant_write);
 787		INCREMENT(lmStat.full_page);
 788	}
 789	/* page is not yet full */
 790	else {
 791		bp->l_ceor = tblk->eor;	/* ? bp->l_ceor = bp->l_eor; */
 792		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 793		lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write);
 794		INCREMENT(lmStat.partial_page);
 795	}
 796}
 797
 798/*
 799 * NAME:	lmPostGC()
 800 *
 801 * FUNCTION:	group commit post-processing
 802 *	Processes transactions after their commit records have been written
 803 *	to disk, redriving log I/O if necessary.
 804 *
 805 * RETURN:	None
 806 *
 807 * NOTE:
 808 *	This routine is called a interrupt time by lbmIODone
 809 */
 810static void lmPostGC(struct lbuf * bp)
 811{
 812	unsigned long flags;
 813	struct jfs_log *log = bp->l_log;
 814	struct logpage *lp;
 815	struct tblock *tblk, *temp;
 816
 817	//LOGGC_LOCK(log);
 818	spin_lock_irqsave(&log->gclock, flags);
 819	/*
 820	 * current pageout of group commit completed.
 821	 *
 822	 * remove/wakeup transactions from commit queue who were
 823	 * group committed with the current log page
 824	 */
 825	list_for_each_entry_safe(tblk, temp, &log->cqueue, cqueue) {
 826		if (!(tblk->flag & tblkGC_COMMIT))
 827			break;
 828		/* if transaction was marked GC_COMMIT then
 829		 * it has been shipped in the current pageout
 830		 * and made it to disk - it is committed.
 831		 */
 832
 833		if (bp->l_flag & lbmERROR)
 834			tblk->flag |= tblkGC_ERROR;
 835
 836		/* remove it from the commit queue */
 837		list_del(&tblk->cqueue);
 838		tblk->flag &= ~tblkGC_QUEUE;
 839
 840		if (tblk == log->flush_tblk) {
 841			/* we can stop flushing the log now */
 842			clear_bit(log_FLUSH, &log->flag);
 843			log->flush_tblk = NULL;
 844		}
 845
 846		jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk,
 847			 tblk->flag);
 848
 849		if (!(tblk->xflag & COMMIT_FORCE))
 850			/*
 851			 * Hand tblk over to lazy commit thread
 852			 */
 853			txLazyUnlock(tblk);
 854		else {
 855			/* state transition: COMMIT -> COMMITTED */
 856			tblk->flag |= tblkGC_COMMITTED;
 857
 858			if (tblk->flag & tblkGC_READY)
 859				log->gcrtc--;
 860
 861			LOGGC_WAKEUP(tblk);
 862		}
 863
 864		/* was page full before pageout ?
 865		 * (and this is the last tblk bound with the page)
 866		 */
 867		if (tblk->flag & tblkGC_FREE)
 868			lbmFree(bp);
 869		/* did page become full after pageout ?
 870		 * (and this is the last tblk bound with the page)
 871		 */
 872		else if (tblk->flag & tblkGC_EOP) {
 873			/* finalize the page */
 874			lp = (struct logpage *) bp->l_ldata;
 875			bp->l_ceor = bp->l_eor;
 876			lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
 877			jfs_info("lmPostGC: calling lbmWrite");
 878			lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE,
 879				 1);
 880		}
 881
 882	}
 883
 884	/* are there any transactions who have entered lnGroupCommit()
 885	 * (whose COMMITs are after that of the last log page written.
 886	 * They are waiting for new group commit (above at (SLEEP 1))
 887	 * or lazy transactions are on a full (queued) log page,
 888	 * select the latest ready transaction as new group leader and
 889	 * wake her up to lead her group.
 890	 */
 891	if ((!list_empty(&log->cqueue)) &&
 892	    ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) ||
 893	     test_bit(log_FLUSH, &log->flag) || jfs_tlocks_low))
 894		/*
 895		 * Call lmGCwrite with new group leader
 896		 */
 897		lmGCwrite(log, 1);
 898
 899	/* no transaction are ready yet (transactions are only just
 900	 * queued (GC_QUEUE) and not entered for group commit yet).
 901	 * the first transaction entering group commit
 902	 * will elect herself as new group leader.
 903	 */
 904	else
 905		log->cflag &= ~logGC_PAGEOUT;
 906
 907	//LOGGC_UNLOCK(log);
 908	spin_unlock_irqrestore(&log->gclock, flags);
 909	return;
 910}
 911
 912/*
 913 * NAME:	lmLogSync()
 914 *
 915 * FUNCTION:	write log SYNCPT record for specified log
 916 *	if new sync address is available
 917 *	(normally the case if sync() is executed by back-ground
 918 *	process).
 919 *	calculate new value of i_nextsync which determines when
 920 *	this code is called again.
 921 *
 922 * PARAMETERS:	log	- log structure
 923 *		hard_sync - 1 to force all metadata to be written
 924 *
 925 * RETURN:	0
 926 *
 927 * serialization: LOG_LOCK() held on entry/exit
 928 */
 929static int lmLogSync(struct jfs_log * log, int hard_sync)
 930{
 931	int logsize;
 932	int written;		/* written since last syncpt */
 933	int free;		/* free space left available */
 934	int delta;		/* additional delta to write normally */
 935	int more;		/* additional write granted */
 936	struct lrd lrd;
 937	int lsn;
 938	struct logsyncblk *lp;
 939	unsigned long flags;
 940
 941	/* push dirty metapages out to disk */
 942	if (hard_sync)
 943		write_special_inodes(log, filemap_fdatawrite);
 944	else
 945		write_special_inodes(log, filemap_flush);
 946
 947	/*
 948	 *	forward syncpt
 949	 */
 950	/* if last sync is same as last syncpt,
 951	 * invoke sync point forward processing to update sync.
 952	 */
 953
 954	if (log->sync == log->syncpt) {
 955		LOGSYNC_LOCK(log, flags);
 956		if (list_empty(&log->synclist))
 957			log->sync = log->lsn;
 958		else {
 959			lp = list_entry(log->synclist.next,
 960					struct logsyncblk, synclist);
 961			log->sync = lp->lsn;
 962		}
 963		LOGSYNC_UNLOCK(log, flags);
 964
 965	}
 966
 967	/* if sync is different from last syncpt,
 968	 * write a SYNCPT record with syncpt = sync.
 969	 * reset syncpt = sync
 970	 */
 971	if (log->sync != log->syncpt) {
 972		lrd.logtid = 0;
 973		lrd.backchain = 0;
 974		lrd.type = cpu_to_le16(LOG_SYNCPT);
 975		lrd.length = 0;
 976		lrd.log.syncpt.sync = cpu_to_le32(log->sync);
 977		lsn = lmWriteRecord(log, NULL, &lrd, NULL);
 978
 979		log->syncpt = log->sync;
 980	} else
 981		lsn = log->lsn;
 982
 983	/*
 984	 *	setup next syncpt trigger (SWAG)
 985	 */
 986	logsize = log->logsize;
 987
 988	logdiff(written, lsn, log);
 989	free = logsize - written;
 990	delta = LOGSYNC_DELTA(logsize);
 991	more = min(free / 2, delta);
 992	if (more < 2 * LOGPSIZE) {
 993		jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
 994		/*
 995		 *	log wrapping
 996		 *
 997		 * option 1 - panic ? No.!
 998		 * option 2 - shutdown file systems
 999		 *	      associated with log ?
1000		 * option 3 - extend log ?
1001		 * option 4 - second chance
1002		 *
1003		 * mark log wrapped, and continue.
1004		 * when all active transactions are completed,
1005		 * mark log valid for recovery.
1006		 * if crashed during invalid state, log state
1007		 * implies invalid log, forcing fsck().
1008		 */
1009		/* mark log state log wrap in log superblock */
1010		/* log->state = LOGWRAP; */
1011
1012		/* reset sync point computation */
1013		log->syncpt = log->sync = lsn;
1014		log->nextsync = delta;
1015	} else
1016		/* next syncpt trigger = written + more */
1017		log->nextsync = written + more;
1018
1019	/* if number of bytes written from last sync point is more
1020	 * than 1/4 of the log size, stop new transactions from
1021	 * starting until all current transactions are completed
1022	 * by setting syncbarrier flag.
1023	 */
1024	if (!test_bit(log_SYNCBARRIER, &log->flag) &&
1025	    (written > LOGSYNC_BARRIER(logsize)) && log->active) {
1026		set_bit(log_SYNCBARRIER, &log->flag);
1027		jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn,
1028			 log->syncpt);
1029		/*
1030		 * We may have to initiate group commit
1031		 */
1032		jfs_flush_journal(log, 0);
1033	}
1034
1035	return lsn;
1036}
1037
1038/*
1039 * NAME:	jfs_syncpt
1040 *
1041 * FUNCTION:	write log SYNCPT record for specified log
1042 *
1043 * PARAMETERS:	log	  - log structure
1044 *		hard_sync - set to 1 to force metadata to be written
1045 */
1046void jfs_syncpt(struct jfs_log *log, int hard_sync)
1047{	LOG_LOCK(log);
1048	if (!test_bit(log_QUIESCE, &log->flag))
1049		lmLogSync(log, hard_sync);
1050	LOG_UNLOCK(log);
1051}
1052
1053/*
1054 * NAME:	lmLogOpen()
1055 *
1056 * FUNCTION:	open the log on first open;
1057 *	insert filesystem in the active list of the log.
1058 *
1059 * PARAMETER:	ipmnt	- file system mount inode
1060 *		iplog	- log inode (out)
1061 *
1062 * RETURN:
1063 *
1064 * serialization:
1065 */
1066int lmLogOpen(struct super_block *sb)
1067{
1068	int rc;
1069	struct block_device *bdev;
1070	struct jfs_log *log;
1071	struct jfs_sb_info *sbi = JFS_SBI(sb);
1072
1073	if (sbi->flag & JFS_NOINTEGRITY)
1074		return open_dummy_log(sb);
1075
1076	if (sbi->mntflag & JFS_INLINELOG)
1077		return open_inline_log(sb);
1078
1079	mutex_lock(&jfs_log_mutex);
1080	list_for_each_entry(log, &jfs_external_logs, journal_list) {
1081		if (log->bdev->bd_dev == sbi->logdev) {
1082			if (!uuid_equal(&log->uuid, &sbi->loguuid)) {
1083				jfs_warn("wrong uuid on JFS journal");
 
1084				mutex_unlock(&jfs_log_mutex);
1085				return -EINVAL;
1086			}
1087			/*
1088			 * add file system to log active file system list
1089			 */
1090			if ((rc = lmLogFileSystem(log, sbi, 1))) {
1091				mutex_unlock(&jfs_log_mutex);
1092				return rc;
1093			}
1094			goto journal_found;
1095		}
1096	}
1097
1098	if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL))) {
1099		mutex_unlock(&jfs_log_mutex);
1100		return -ENOMEM;
1101	}
1102	INIT_LIST_HEAD(&log->sb_list);
1103	init_waitqueue_head(&log->syncwait);
1104
1105	/*
1106	 *	external log as separate logical volume
1107	 *
1108	 * file systems to log may have n-to-1 relationship;
1109	 */
1110
1111	bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
1112				 log);
1113	if (IS_ERR(bdev)) {
1114		rc = PTR_ERR(bdev);
1115		goto free;
1116	}
1117
1118	log->bdev = bdev;
1119	uuid_copy(&log->uuid, &sbi->loguuid);
1120
1121	/*
1122	 * initialize log:
1123	 */
1124	if ((rc = lmLogInit(log)))
1125		goto close;
1126
1127	list_add(&log->journal_list, &jfs_external_logs);
1128
1129	/*
1130	 * add file system to log active file system list
1131	 */
1132	if ((rc = lmLogFileSystem(log, sbi, 1)))
1133		goto shutdown;
1134
1135journal_found:
1136	LOG_LOCK(log);
1137	list_add(&sbi->log_list, &log->sb_list);
1138	sbi->log = log;
1139	LOG_UNLOCK(log);
1140
1141	mutex_unlock(&jfs_log_mutex);
1142	return 0;
1143
1144	/*
1145	 *	unwind on error
1146	 */
1147      shutdown:		/* unwind lbmLogInit() */
1148	list_del(&log->journal_list);
1149	lbmLogShutdown(log);
1150
1151      close:		/* close external log device */
1152	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1153
1154      free:		/* free log descriptor */
1155	mutex_unlock(&jfs_log_mutex);
1156	kfree(log);
1157
1158	jfs_warn("lmLogOpen: exit(%d)", rc);
1159	return rc;
1160}
1161
1162static int open_inline_log(struct super_block *sb)
1163{
1164	struct jfs_log *log;
1165	int rc;
1166
1167	if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL)))
1168		return -ENOMEM;
1169	INIT_LIST_HEAD(&log->sb_list);
1170	init_waitqueue_head(&log->syncwait);
1171
1172	set_bit(log_INLINELOG, &log->flag);
1173	log->bdev = sb->s_bdev;
1174	log->base = addressPXD(&JFS_SBI(sb)->logpxd);
1175	log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
1176	    (L2LOGPSIZE - sb->s_blocksize_bits);
1177	log->l2bsize = sb->s_blocksize_bits;
1178	ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits);
1179
1180	/*
1181	 * initialize log.
1182	 */
1183	if ((rc = lmLogInit(log))) {
1184		kfree(log);
1185		jfs_warn("lmLogOpen: exit(%d)", rc);
1186		return rc;
1187	}
1188
1189	list_add(&JFS_SBI(sb)->log_list, &log->sb_list);
1190	JFS_SBI(sb)->log = log;
1191
1192	return rc;
1193}
1194
1195static int open_dummy_log(struct super_block *sb)
1196{
1197	int rc;
1198
1199	mutex_lock(&jfs_log_mutex);
1200	if (!dummy_log) {
1201		dummy_log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL);
1202		if (!dummy_log) {
1203			mutex_unlock(&jfs_log_mutex);
1204			return -ENOMEM;
1205		}
1206		INIT_LIST_HEAD(&dummy_log->sb_list);
1207		init_waitqueue_head(&dummy_log->syncwait);
1208		dummy_log->no_integrity = 1;
1209		/* Make up some stuff */
1210		dummy_log->base = 0;
1211		dummy_log->size = 1024;
1212		rc = lmLogInit(dummy_log);
1213		if (rc) {
1214			kfree(dummy_log);
1215			dummy_log = NULL;
1216			mutex_unlock(&jfs_log_mutex);
1217			return rc;
1218		}
1219	}
1220
1221	LOG_LOCK(dummy_log);
1222	list_add(&JFS_SBI(sb)->log_list, &dummy_log->sb_list);
1223	JFS_SBI(sb)->log = dummy_log;
1224	LOG_UNLOCK(dummy_log);
1225	mutex_unlock(&jfs_log_mutex);
1226
1227	return 0;
1228}
1229
1230/*
1231 * NAME:	lmLogInit()
1232 *
1233 * FUNCTION:	log initialization at first log open.
1234 *
1235 *	logredo() (or logformat()) should have been run previously.
1236 *	initialize the log from log superblock.
1237 *	set the log state in the superblock to LOGMOUNT and
1238 *	write SYNCPT log record.
1239 *
1240 * PARAMETER:	log	- log structure
1241 *
1242 * RETURN:	0	- if ok
1243 *		-EINVAL	- bad log magic number or superblock dirty
1244 *		error returned from logwait()
1245 *
1246 * serialization: single first open thread
1247 */
1248int lmLogInit(struct jfs_log * log)
1249{
1250	int rc = 0;
1251	struct lrd lrd;
1252	struct logsuper *logsuper;
1253	struct lbuf *bpsuper;
1254	struct lbuf *bp;
1255	struct logpage *lp;
1256	int lsn = 0;
1257
1258	jfs_info("lmLogInit: log:0x%p", log);
1259
1260	/* initialize the group commit serialization lock */
1261	LOGGC_LOCK_INIT(log);
1262
1263	/* allocate/initialize the log write serialization lock */
1264	LOG_LOCK_INIT(log);
1265
1266	LOGSYNC_LOCK_INIT(log);
1267
1268	INIT_LIST_HEAD(&log->synclist);
1269
1270	INIT_LIST_HEAD(&log->cqueue);
1271	log->flush_tblk = NULL;
1272
1273	log->count = 0;
1274
1275	/*
1276	 * initialize log i/o
1277	 */
1278	if ((rc = lbmLogInit(log)))
1279		return rc;
1280
1281	if (!test_bit(log_INLINELOG, &log->flag))
1282		log->l2bsize = L2LOGPSIZE;
1283
1284	/* check for disabled journaling to disk */
1285	if (log->no_integrity) {
1286		/*
1287		 * Journal pages will still be filled.  When the time comes
1288		 * to actually do the I/O, the write is not done, and the
1289		 * endio routine is called directly.
1290		 */
1291		bp = lbmAllocate(log , 0);
1292		log->bp = bp;
1293		bp->l_pn = bp->l_eor = 0;
1294	} else {
1295		/*
1296		 * validate log superblock
1297		 */
1298		if ((rc = lbmRead(log, 1, &bpsuper)))
1299			goto errout10;
1300
1301		logsuper = (struct logsuper *) bpsuper->l_ldata;
1302
1303		if (logsuper->magic != cpu_to_le32(LOGMAGIC)) {
1304			jfs_warn("*** Log Format Error ! ***");
1305			rc = -EINVAL;
1306			goto errout20;
1307		}
1308
1309		/* logredo() should have been run successfully. */
1310		if (logsuper->state != cpu_to_le32(LOGREDONE)) {
1311			jfs_warn("*** Log Is Dirty ! ***");
1312			rc = -EINVAL;
1313			goto errout20;
1314		}
1315
1316		/* initialize log from log superblock */
1317		if (test_bit(log_INLINELOG,&log->flag)) {
1318			if (log->size != le32_to_cpu(logsuper->size)) {
1319				rc = -EINVAL;
1320				goto errout20;
1321			}
1322			jfs_info("lmLogInit: inline log:0x%p base:0x%Lx size:0x%x",
1323				 log, (unsigned long long)log->base, log->size);
 
1324		} else {
1325			if (!uuid_equal(&logsuper->uuid, &log->uuid)) {
1326				jfs_warn("wrong uuid on JFS log device");
1327				rc = -EINVAL;
1328				goto errout20;
1329			}
1330			log->size = le32_to_cpu(logsuper->size);
1331			log->l2bsize = le32_to_cpu(logsuper->l2bsize);
1332			jfs_info("lmLogInit: external log:0x%p base:0x%Lx size:0x%x",
1333				 log, (unsigned long long)log->base, log->size);
 
1334		}
1335
1336		log->page = le32_to_cpu(logsuper->end) / LOGPSIZE;
1337		log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page);
1338
1339		/*
1340		 * initialize for log append write mode
1341		 */
1342		/* establish current/end-of-log page/buffer */
1343		if ((rc = lbmRead(log, log->page, &bp)))
1344			goto errout20;
1345
1346		lp = (struct logpage *) bp->l_ldata;
1347
1348		jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d",
1349			 le32_to_cpu(logsuper->end), log->page, log->eor,
1350			 le16_to_cpu(lp->h.eor));
1351
1352		log->bp = bp;
1353		bp->l_pn = log->page;
1354		bp->l_eor = log->eor;
1355
1356		/* if current page is full, move on to next page */
1357		if (log->eor >= LOGPSIZE - LOGPTLRSIZE)
1358			lmNextPage(log);
1359
1360		/*
1361		 * initialize log syncpoint
1362		 */
1363		/*
1364		 * write the first SYNCPT record with syncpoint = 0
1365		 * (i.e., log redo up to HERE !);
1366		 * remove current page from lbm write queue at end of pageout
1367		 * (to write log superblock update), but do not release to
1368		 * freelist;
1369		 */
1370		lrd.logtid = 0;
1371		lrd.backchain = 0;
1372		lrd.type = cpu_to_le16(LOG_SYNCPT);
1373		lrd.length = 0;
1374		lrd.log.syncpt.sync = 0;
1375		lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1376		bp = log->bp;
1377		bp->l_ceor = bp->l_eor;
1378		lp = (struct logpage *) bp->l_ldata;
1379		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1380		lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0);
1381		if ((rc = lbmIOWait(bp, 0)))
1382			goto errout30;
1383
1384		/*
1385		 * update/write superblock
1386		 */
1387		logsuper->state = cpu_to_le32(LOGMOUNT);
1388		log->serial = le32_to_cpu(logsuper->serial) + 1;
1389		logsuper->serial = cpu_to_le32(log->serial);
1390		lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1391		if ((rc = lbmIOWait(bpsuper, lbmFREE)))
1392			goto errout30;
1393	}
1394
1395	/* initialize logsync parameters */
1396	log->logsize = (log->size - 2) << L2LOGPSIZE;
1397	log->lsn = lsn;
1398	log->syncpt = lsn;
1399	log->sync = log->syncpt;
1400	log->nextsync = LOGSYNC_DELTA(log->logsize);
1401
1402	jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x",
1403		 log->lsn, log->syncpt, log->sync);
1404
1405	/*
1406	 * initialize for lazy/group commit
1407	 */
1408	log->clsn = lsn;
1409
1410	return 0;
1411
1412	/*
1413	 *	unwind on error
1414	 */
1415      errout30:		/* release log page */
1416	log->wqueue = NULL;
1417	bp->l_wqnext = NULL;
1418	lbmFree(bp);
1419
1420      errout20:		/* release log superblock */
1421	lbmFree(bpsuper);
1422
1423      errout10:		/* unwind lbmLogInit() */
1424	lbmLogShutdown(log);
1425
1426	jfs_warn("lmLogInit: exit(%d)", rc);
1427	return rc;
1428}
1429
1430
1431/*
1432 * NAME:	lmLogClose()
1433 *
1434 * FUNCTION:	remove file system <ipmnt> from active list of log <iplog>
1435 *		and close it on last close.
1436 *
1437 * PARAMETER:	sb	- superblock
1438 *
1439 * RETURN:	errors from subroutines
1440 *
1441 * serialization:
1442 */
1443int lmLogClose(struct super_block *sb)
1444{
1445	struct jfs_sb_info *sbi = JFS_SBI(sb);
1446	struct jfs_log *log = sbi->log;
1447	struct block_device *bdev;
1448	int rc = 0;
1449
1450	jfs_info("lmLogClose: log:0x%p", log);
1451
1452	mutex_lock(&jfs_log_mutex);
1453	LOG_LOCK(log);
1454	list_del(&sbi->log_list);
1455	LOG_UNLOCK(log);
1456	sbi->log = NULL;
1457
1458	/*
1459	 * We need to make sure all of the "written" metapages
1460	 * actually make it to disk
1461	 */
1462	sync_blockdev(sb->s_bdev);
1463
1464	if (test_bit(log_INLINELOG, &log->flag)) {
1465		/*
1466		 *	in-line log in host file system
1467		 */
1468		rc = lmLogShutdown(log);
1469		kfree(log);
1470		goto out;
1471	}
1472
1473	if (!log->no_integrity)
1474		lmLogFileSystem(log, sbi, 0);
1475
1476	if (!list_empty(&log->sb_list))
1477		goto out;
1478
1479	/*
1480	 * TODO: ensure that the dummy_log is in a state to allow
1481	 * lbmLogShutdown to deallocate all the buffers and call
1482	 * kfree against dummy_log.  For now, leave dummy_log & its
1483	 * buffers in memory, and resuse if another no-integrity mount
1484	 * is requested.
1485	 */
1486	if (log->no_integrity)
1487		goto out;
1488
1489	/*
1490	 *	external log as separate logical volume
1491	 */
1492	list_del(&log->journal_list);
1493	bdev = log->bdev;
1494	rc = lmLogShutdown(log);
1495
1496	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1497
1498	kfree(log);
1499
1500      out:
1501	mutex_unlock(&jfs_log_mutex);
1502	jfs_info("lmLogClose: exit(%d)", rc);
1503	return rc;
1504}
1505
1506
1507/*
1508 * NAME:	jfs_flush_journal()
1509 *
1510 * FUNCTION:	initiate write of any outstanding transactions to the journal
1511 *		and optionally wait until they are all written to disk
1512 *
1513 *		wait == 0  flush until latest txn is committed, don't wait
1514 *		wait == 1  flush until latest txn is committed, wait
1515 *		wait > 1   flush until all txn's are complete, wait
1516 */
1517void jfs_flush_journal(struct jfs_log *log, int wait)
1518{
1519	int i;
1520	struct tblock *target = NULL;
1521
1522	/* jfs_write_inode may call us during read-only mount */
1523	if (!log)
1524		return;
1525
1526	jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait);
1527
1528	LOGGC_LOCK(log);
1529
1530	if (!list_empty(&log->cqueue)) {
1531		/*
1532		 * This ensures that we will keep writing to the journal as long
1533		 * as there are unwritten commit records
1534		 */
1535		target = list_entry(log->cqueue.prev, struct tblock, cqueue);
1536
1537		if (test_bit(log_FLUSH, &log->flag)) {
1538			/*
1539			 * We're already flushing.
1540			 * if flush_tblk is NULL, we are flushing everything,
1541			 * so leave it that way.  Otherwise, update it to the
1542			 * latest transaction
1543			 */
1544			if (log->flush_tblk)
1545				log->flush_tblk = target;
1546		} else {
1547			/* Only flush until latest transaction is committed */
1548			log->flush_tblk = target;
1549			set_bit(log_FLUSH, &log->flag);
1550
1551			/*
1552			 * Initiate I/O on outstanding transactions
1553			 */
1554			if (!(log->cflag & logGC_PAGEOUT)) {
1555				log->cflag |= logGC_PAGEOUT;
1556				lmGCwrite(log, 0);
1557			}
1558		}
1559	}
1560	if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) {
1561		/* Flush until all activity complete */
1562		set_bit(log_FLUSH, &log->flag);
1563		log->flush_tblk = NULL;
1564	}
1565
1566	if (wait && target && !(target->flag & tblkGC_COMMITTED)) {
1567		DECLARE_WAITQUEUE(__wait, current);
1568
1569		add_wait_queue(&target->gcwait, &__wait);
1570		set_current_state(TASK_UNINTERRUPTIBLE);
1571		LOGGC_UNLOCK(log);
1572		schedule();
 
1573		LOGGC_LOCK(log);
1574		remove_wait_queue(&target->gcwait, &__wait);
1575	}
1576	LOGGC_UNLOCK(log);
1577
1578	if (wait < 2)
1579		return;
1580
1581	write_special_inodes(log, filemap_fdatawrite);
1582
1583	/*
1584	 * If there was recent activity, we may need to wait
1585	 * for the lazycommit thread to catch up
1586	 */
1587	if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) {
1588		for (i = 0; i < 200; i++) {	/* Too much? */
1589			msleep(250);
1590			write_special_inodes(log, filemap_fdatawrite);
1591			if (list_empty(&log->cqueue) &&
1592			    list_empty(&log->synclist))
1593				break;
1594		}
1595	}
1596	assert(list_empty(&log->cqueue));
1597
1598#ifdef CONFIG_JFS_DEBUG
1599	if (!list_empty(&log->synclist)) {
1600		struct logsyncblk *lp;
1601
1602		printk(KERN_ERR "jfs_flush_journal: synclist not empty\n");
1603		list_for_each_entry(lp, &log->synclist, synclist) {
1604			if (lp->xflag & COMMIT_PAGE) {
1605				struct metapage *mp = (struct metapage *)lp;
1606				print_hex_dump(KERN_ERR, "metapage: ",
1607					       DUMP_PREFIX_ADDRESS, 16, 4,
1608					       mp, sizeof(struct metapage), 0);
1609				print_hex_dump(KERN_ERR, "page: ",
1610					       DUMP_PREFIX_ADDRESS, 16,
1611					       sizeof(long), mp->page,
1612					       sizeof(struct page), 0);
1613			} else
1614				print_hex_dump(KERN_ERR, "tblock:",
1615					       DUMP_PREFIX_ADDRESS, 16, 4,
1616					       lp, sizeof(struct tblock), 0);
1617		}
1618	}
1619#else
1620	WARN_ON(!list_empty(&log->synclist));
1621#endif
1622	clear_bit(log_FLUSH, &log->flag);
1623}
1624
1625/*
1626 * NAME:	lmLogShutdown()
1627 *
1628 * FUNCTION:	log shutdown at last LogClose().
1629 *
1630 *		write log syncpt record.
1631 *		update super block to set redone flag to 0.
1632 *
1633 * PARAMETER:	log	- log inode
1634 *
1635 * RETURN:	0	- success
1636 *
1637 * serialization: single last close thread
1638 */
1639int lmLogShutdown(struct jfs_log * log)
1640{
1641	int rc;
1642	struct lrd lrd;
1643	int lsn;
1644	struct logsuper *logsuper;
1645	struct lbuf *bpsuper;
1646	struct lbuf *bp;
1647	struct logpage *lp;
1648
1649	jfs_info("lmLogShutdown: log:0x%p", log);
1650
1651	jfs_flush_journal(log, 2);
1652
1653	/*
1654	 * write the last SYNCPT record with syncpoint = 0
1655	 * (i.e., log redo up to HERE !)
1656	 */
1657	lrd.logtid = 0;
1658	lrd.backchain = 0;
1659	lrd.type = cpu_to_le16(LOG_SYNCPT);
1660	lrd.length = 0;
1661	lrd.log.syncpt.sync = 0;
1662
1663	lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1664	bp = log->bp;
1665	lp = (struct logpage *) bp->l_ldata;
1666	lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1667	lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0);
1668	lbmIOWait(log->bp, lbmFREE);
1669	log->bp = NULL;
1670
1671	/*
1672	 * synchronous update log superblock
1673	 * mark log state as shutdown cleanly
1674	 * (i.e., Log does not need to be replayed).
1675	 */
1676	if ((rc = lbmRead(log, 1, &bpsuper)))
1677		goto out;
1678
1679	logsuper = (struct logsuper *) bpsuper->l_ldata;
1680	logsuper->state = cpu_to_le32(LOGREDONE);
1681	logsuper->end = cpu_to_le32(lsn);
1682	lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1683	rc = lbmIOWait(bpsuper, lbmFREE);
1684
1685	jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d",
1686		 lsn, log->page, log->eor);
1687
1688      out:
1689	/*
1690	 * shutdown per log i/o
1691	 */
1692	lbmLogShutdown(log);
1693
1694	if (rc) {
1695		jfs_warn("lmLogShutdown: exit(%d)", rc);
1696	}
1697	return rc;
1698}
1699
1700
1701/*
1702 * NAME:	lmLogFileSystem()
1703 *
1704 * FUNCTION:	insert (<activate> = true)/remove (<activate> = false)
1705 *	file system into/from log active file system list.
1706 *
1707 * PARAMETE:	log	- pointer to logs inode.
1708 *		fsdev	- kdev_t of filesystem.
1709 *		serial	- pointer to returned log serial number
1710 *		activate - insert/remove device from active list.
1711 *
1712 * RETURN:	0	- success
1713 *		errors returned by vms_iowait().
1714 */
1715static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
1716			   int activate)
1717{
1718	int rc = 0;
1719	int i;
1720	struct logsuper *logsuper;
1721	struct lbuf *bpsuper;
1722	uuid_t *uuid = &sbi->uuid;
1723
1724	/*
1725	 * insert/remove file system device to log active file system list.
1726	 */
1727	if ((rc = lbmRead(log, 1, &bpsuper)))
1728		return rc;
1729
1730	logsuper = (struct logsuper *) bpsuper->l_ldata;
1731	if (activate) {
1732		for (i = 0; i < MAX_ACTIVE; i++)
1733			if (uuid_is_null(&logsuper->active[i].uuid)) {
1734				uuid_copy(&logsuper->active[i].uuid, uuid);
1735				sbi->aggregate = i;
1736				break;
1737			}
1738		if (i == MAX_ACTIVE) {
1739			jfs_warn("Too many file systems sharing journal!");
1740			lbmFree(bpsuper);
1741			return -EMFILE;	/* Is there a better rc? */
1742		}
1743	} else {
1744		for (i = 0; i < MAX_ACTIVE; i++)
1745			if (uuid_equal(&logsuper->active[i].uuid, uuid)) {
1746				uuid_copy(&logsuper->active[i].uuid,
1747					  &uuid_null);
1748				break;
1749			}
1750		if (i == MAX_ACTIVE) {
1751			jfs_warn("Somebody stomped on the journal!");
1752			lbmFree(bpsuper);
1753			return -EIO;
1754		}
1755
1756	}
1757
1758	/*
1759	 * synchronous write log superblock:
1760	 *
1761	 * write sidestream bypassing write queue:
1762	 * at file system mount, log super block is updated for
1763	 * activation of the file system before any log record
1764	 * (MOUNT record) of the file system, and at file system
1765	 * unmount, all meta data for the file system has been
1766	 * flushed before log super block is updated for deactivation
1767	 * of the file system.
1768	 */
1769	lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1770	rc = lbmIOWait(bpsuper, lbmFREE);
1771
1772	return rc;
1773}
1774
1775/*
1776 *		log buffer manager (lbm)
1777 *		------------------------
1778 *
1779 * special purpose buffer manager supporting log i/o requirements.
1780 *
1781 * per log write queue:
1782 * log pageout occurs in serial order by fifo write queue and
1783 * restricting to a single i/o in pregress at any one time.
1784 * a circular singly-linked list
1785 * (log->wrqueue points to the tail, and buffers are linked via
1786 * bp->wrqueue field), and
1787 * maintains log page in pageout ot waiting for pageout in serial pageout.
1788 */
1789
1790/*
1791 *	lbmLogInit()
1792 *
1793 * initialize per log I/O setup at lmLogInit()
1794 */
1795static int lbmLogInit(struct jfs_log * log)
1796{				/* log inode */
1797	int i;
1798	struct lbuf *lbuf;
1799
1800	jfs_info("lbmLogInit: log:0x%p", log);
1801
1802	/* initialize current buffer cursor */
1803	log->bp = NULL;
1804
1805	/* initialize log device write queue */
1806	log->wqueue = NULL;
1807
1808	/*
1809	 * Each log has its own buffer pages allocated to it.  These are
1810	 * not managed by the page cache.  This ensures that a transaction
1811	 * writing to the log does not block trying to allocate a page from
1812	 * the page cache (for the log).  This would be bad, since page
1813	 * allocation waits on the kswapd thread that may be committing inodes
1814	 * which would cause log activity.  Was that clear?  I'm trying to
1815	 * avoid deadlock here.
1816	 */
1817	init_waitqueue_head(&log->free_wait);
1818
1819	log->lbuf_free = NULL;
1820
1821	for (i = 0; i < LOGPAGES;) {
1822		char *buffer;
1823		uint offset;
1824		struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
1825
1826		if (!page)
 
1827			goto error;
1828		buffer = page_address(page);
1829		for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) {
1830			lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL);
1831			if (lbuf == NULL) {
1832				if (offset == 0)
1833					__free_page(page);
1834				goto error;
1835			}
1836			if (offset) /* we already have one reference */
1837				get_page(page);
1838			lbuf->l_offset = offset;
1839			lbuf->l_ldata = buffer + offset;
1840			lbuf->l_page = page;
1841			lbuf->l_log = log;
1842			init_waitqueue_head(&lbuf->l_ioevent);
1843
1844			lbuf->l_freelist = log->lbuf_free;
1845			log->lbuf_free = lbuf;
1846			i++;
1847		}
1848	}
1849
1850	return (0);
1851
1852      error:
1853	lbmLogShutdown(log);
1854	return -ENOMEM;
1855}
1856
1857
1858/*
1859 *	lbmLogShutdown()
1860 *
1861 * finalize per log I/O setup at lmLogShutdown()
1862 */
1863static void lbmLogShutdown(struct jfs_log * log)
1864{
1865	struct lbuf *lbuf;
1866
1867	jfs_info("lbmLogShutdown: log:0x%p", log);
1868
1869	lbuf = log->lbuf_free;
1870	while (lbuf) {
1871		struct lbuf *next = lbuf->l_freelist;
1872		__free_page(lbuf->l_page);
1873		kfree(lbuf);
1874		lbuf = next;
1875	}
1876}
1877
1878
1879/*
1880 *	lbmAllocate()
1881 *
1882 * allocate an empty log buffer
1883 */
1884static struct lbuf *lbmAllocate(struct jfs_log * log, int pn)
1885{
1886	struct lbuf *bp;
1887	unsigned long flags;
1888
1889	/*
1890	 * recycle from log buffer freelist if any
1891	 */
1892	LCACHE_LOCK(flags);
1893	LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags);
1894	log->lbuf_free = bp->l_freelist;
1895	LCACHE_UNLOCK(flags);
1896
1897	bp->l_flag = 0;
1898
1899	bp->l_wqnext = NULL;
1900	bp->l_freelist = NULL;
1901
1902	bp->l_pn = pn;
1903	bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize));
1904	bp->l_ceor = 0;
1905
1906	return bp;
1907}
1908
1909
1910/*
1911 *	lbmFree()
1912 *
1913 * release a log buffer to freelist
1914 */
1915static void lbmFree(struct lbuf * bp)
1916{
1917	unsigned long flags;
1918
1919	LCACHE_LOCK(flags);
1920
1921	lbmfree(bp);
1922
1923	LCACHE_UNLOCK(flags);
1924}
1925
1926static void lbmfree(struct lbuf * bp)
1927{
1928	struct jfs_log *log = bp->l_log;
1929
1930	assert(bp->l_wqnext == NULL);
1931
1932	/*
1933	 * return the buffer to head of freelist
1934	 */
1935	bp->l_freelist = log->lbuf_free;
1936	log->lbuf_free = bp;
1937
1938	wake_up(&log->free_wait);
1939	return;
1940}
1941
1942
1943/*
1944 * NAME:	lbmRedrive
1945 *
1946 * FUNCTION:	add a log buffer to the log redrive list
1947 *
1948 * PARAMETER:
1949 *	bp	- log buffer
1950 *
1951 * NOTES:
1952 *	Takes log_redrive_lock.
1953 */
1954static inline void lbmRedrive(struct lbuf *bp)
1955{
1956	unsigned long flags;
1957
1958	spin_lock_irqsave(&log_redrive_lock, flags);
1959	bp->l_redrive_next = log_redrive_list;
1960	log_redrive_list = bp;
1961	spin_unlock_irqrestore(&log_redrive_lock, flags);
1962
1963	wake_up_process(jfsIOthread);
1964}
1965
1966
1967/*
1968 *	lbmRead()
1969 */
1970static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
1971{
1972	struct bio *bio;
1973	struct lbuf *bp;
1974
1975	/*
1976	 * allocate a log buffer
1977	 */
1978	*bpp = bp = lbmAllocate(log, pn);
1979	jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn);
1980
1981	bp->l_flag |= lbmREAD;
1982
1983	bio = bio_alloc(GFP_NOFS, 1);
1984
1985	bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
1986	bio_set_dev(bio, log->bdev);
 
 
 
1987
1988	bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
1989	BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
1990
1991	bio->bi_end_io = lbmIODone;
1992	bio->bi_private = bp;
1993	bio->bi_opf = REQ_OP_READ;
1994	/*check if journaling to disk has been disabled*/
1995	if (log->no_integrity) {
1996		bio->bi_iter.bi_size = 0;
1997		lbmIODone(bio);
1998	} else {
1999		submit_bio(bio);
2000	}
2001
2002	wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
2003
2004	return 0;
2005}
2006
2007
2008/*
2009 *	lbmWrite()
2010 *
2011 * buffer at head of pageout queue stays after completion of
2012 * partial-page pageout and redriven by explicit initiation of
2013 * pageout by caller until full-page pageout is completed and
2014 * released.
2015 *
2016 * device driver i/o done redrives pageout of new buffer at
2017 * head of pageout queue when current buffer at head of pageout
2018 * queue is released at the completion of its full-page pageout.
2019 *
2020 * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit().
2021 * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone()
2022 */
2023static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
2024		     int cant_block)
2025{
2026	struct lbuf *tail;
2027	unsigned long flags;
2028
2029	jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn);
2030
2031	/* map the logical block address to physical block address */
2032	bp->l_blkno =
2033	    log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2034
2035	LCACHE_LOCK(flags);		/* disable+lock */
2036
2037	/*
2038	 * initialize buffer for device driver
2039	 */
2040	bp->l_flag = flag;
2041
2042	/*
2043	 *	insert bp at tail of write queue associated with log
2044	 *
2045	 * (request is either for bp already/currently at head of queue
2046	 * or new bp to be inserted at tail)
2047	 */
2048	tail = log->wqueue;
2049
2050	/* is buffer not already on write queue ? */
2051	if (bp->l_wqnext == NULL) {
2052		/* insert at tail of wqueue */
2053		if (tail == NULL) {
2054			log->wqueue = bp;
2055			bp->l_wqnext = bp;
2056		} else {
2057			log->wqueue = bp;
2058			bp->l_wqnext = tail->l_wqnext;
2059			tail->l_wqnext = bp;
2060		}
2061
2062		tail = bp;
2063	}
2064
2065	/* is buffer at head of wqueue and for write ? */
2066	if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) {
2067		LCACHE_UNLOCK(flags);	/* unlock+enable */
2068		return;
2069	}
2070
2071	LCACHE_UNLOCK(flags);	/* unlock+enable */
2072
2073	if (cant_block)
2074		lbmRedrive(bp);
2075	else if (flag & lbmSYNC)
2076		lbmStartIO(bp);
2077	else {
2078		LOGGC_UNLOCK(log);
2079		lbmStartIO(bp);
2080		LOGGC_LOCK(log);
2081	}
2082}
2083
2084
2085/*
2086 *	lbmDirectWrite()
2087 *
2088 * initiate pageout bypassing write queue for sidestream
2089 * (e.g., log superblock) write;
2090 */
2091static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
2092{
2093	jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x",
2094		 bp, flag, bp->l_pn);
2095
2096	/*
2097	 * initialize buffer for device driver
2098	 */
2099	bp->l_flag = flag | lbmDIRECT;
2100
2101	/* map the logical block address to physical block address */
2102	bp->l_blkno =
2103	    log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2104
2105	/*
2106	 *	initiate pageout of the page
2107	 */
2108	lbmStartIO(bp);
2109}
2110
2111
2112/*
2113 * NAME:	lbmStartIO()
2114 *
2115 * FUNCTION:	Interface to DD strategy routine
2116 *
2117 * RETURN:	none
2118 *
2119 * serialization: LCACHE_LOCK() is NOT held during log i/o;
2120 */
2121static void lbmStartIO(struct lbuf * bp)
2122{
2123	struct bio *bio;
2124	struct jfs_log *log = bp->l_log;
2125
2126	jfs_info("lbmStartIO");
2127
2128	bio = bio_alloc(GFP_NOFS, 1);
2129	bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
2130	bio_set_dev(bio, log->bdev);
 
 
 
2131
2132	bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
2133	BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
2134
2135	bio->bi_end_io = lbmIODone;
2136	bio->bi_private = bp;
2137	bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
2138
2139	/* check if journaling to disk has been disabled */
2140	if (log->no_integrity) {
2141		bio->bi_iter.bi_size = 0;
2142		lbmIODone(bio);
2143	} else {
2144		submit_bio(bio);
2145		INCREMENT(lmStat.submitted);
2146	}
2147}
2148
2149
2150/*
2151 *	lbmIOWait()
2152 */
2153static int lbmIOWait(struct lbuf * bp, int flag)
2154{
2155	unsigned long flags;
2156	int rc = 0;
2157
2158	jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2159
2160	LCACHE_LOCK(flags);		/* disable+lock */
2161
2162	LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags);
2163
2164	rc = (bp->l_flag & lbmERROR) ? -EIO : 0;
2165
2166	if (flag & lbmFREE)
2167		lbmfree(bp);
2168
2169	LCACHE_UNLOCK(flags);	/* unlock+enable */
2170
2171	jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2172	return rc;
2173}
2174
2175/*
2176 *	lbmIODone()
2177 *
2178 * executed at INTIODONE level
2179 */
2180static void lbmIODone(struct bio *bio)
2181{
2182	struct lbuf *bp = bio->bi_private;
2183	struct lbuf *nextbp, *tail;
2184	struct jfs_log *log;
2185	unsigned long flags;
2186
2187	/*
2188	 * get back jfs buffer bound to the i/o buffer
2189	 */
2190	jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag);
2191
2192	LCACHE_LOCK(flags);		/* disable+lock */
2193
2194	bp->l_flag |= lbmDONE;
2195
2196	if (bio->bi_status) {
2197		bp->l_flag |= lbmERROR;
2198
2199		jfs_err("lbmIODone: I/O error in JFS log");
2200	}
2201
2202	bio_put(bio);
2203
2204	/*
2205	 *	pagein completion
2206	 */
2207	if (bp->l_flag & lbmREAD) {
2208		bp->l_flag &= ~lbmREAD;
2209
2210		LCACHE_UNLOCK(flags);	/* unlock+enable */
2211
2212		/* wakeup I/O initiator */
2213		LCACHE_WAKEUP(&bp->l_ioevent);
2214
2215		return;
2216	}
2217
2218	/*
2219	 *	pageout completion
2220	 *
2221	 * the bp at the head of write queue has completed pageout.
2222	 *
2223	 * if single-commit/full-page pageout, remove the current buffer
2224	 * from head of pageout queue, and redrive pageout with
2225	 * the new buffer at head of pageout queue;
2226	 * otherwise, the partial-page pageout buffer stays at
2227	 * the head of pageout queue to be redriven for pageout
2228	 * by lmGroupCommit() until full-page pageout is completed.
2229	 */
2230	bp->l_flag &= ~lbmWRITE;
2231	INCREMENT(lmStat.pagedone);
2232
2233	/* update committed lsn */
2234	log = bp->l_log;
2235	log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor;
2236
2237	if (bp->l_flag & lbmDIRECT) {
2238		LCACHE_WAKEUP(&bp->l_ioevent);
2239		LCACHE_UNLOCK(flags);
2240		return;
2241	}
2242
2243	tail = log->wqueue;
2244
2245	/* single element queue */
2246	if (bp == tail) {
2247		/* remove head buffer of full-page pageout
2248		 * from log device write queue
2249		 */
2250		if (bp->l_flag & lbmRELEASE) {
2251			log->wqueue = NULL;
2252			bp->l_wqnext = NULL;
2253		}
2254	}
2255	/* multi element queue */
2256	else {
2257		/* remove head buffer of full-page pageout
2258		 * from log device write queue
2259		 */
2260		if (bp->l_flag & lbmRELEASE) {
2261			nextbp = tail->l_wqnext = bp->l_wqnext;
2262			bp->l_wqnext = NULL;
2263
2264			/*
2265			 * redrive pageout of next page at head of write queue:
2266			 * redrive next page without any bound tblk
2267			 * (i.e., page w/o any COMMIT records), or
2268			 * first page of new group commit which has been
2269			 * queued after current page (subsequent pageout
2270			 * is performed synchronously, except page without
2271			 * any COMMITs) by lmGroupCommit() as indicated
2272			 * by lbmWRITE flag;
2273			 */
2274			if (nextbp->l_flag & lbmWRITE) {
2275				/*
2276				 * We can't do the I/O at interrupt time.
2277				 * The jfsIO thread can do it
2278				 */
2279				lbmRedrive(nextbp);
2280			}
2281		}
2282	}
2283
2284	/*
2285	 *	synchronous pageout:
2286	 *
2287	 * buffer has not necessarily been removed from write queue
2288	 * (e.g., synchronous write of partial-page with COMMIT):
2289	 * leave buffer for i/o initiator to dispose
2290	 */
2291	if (bp->l_flag & lbmSYNC) {
2292		LCACHE_UNLOCK(flags);	/* unlock+enable */
2293
2294		/* wakeup I/O initiator */
2295		LCACHE_WAKEUP(&bp->l_ioevent);
2296	}
2297
2298	/*
2299	 *	Group Commit pageout:
2300	 */
2301	else if (bp->l_flag & lbmGC) {
2302		LCACHE_UNLOCK(flags);
2303		lmPostGC(bp);
2304	}
2305
2306	/*
2307	 *	asynchronous pageout:
2308	 *
2309	 * buffer must have been removed from write queue:
2310	 * insert buffer at head of freelist where it can be recycled
2311	 */
2312	else {
2313		assert(bp->l_flag & lbmRELEASE);
2314		assert(bp->l_flag & lbmFREE);
2315		lbmfree(bp);
2316
2317		LCACHE_UNLOCK(flags);	/* unlock+enable */
2318	}
2319}
2320
2321int jfsIOWait(void *arg)
2322{
2323	struct lbuf *bp;
2324
2325	do {
2326		spin_lock_irq(&log_redrive_lock);
2327		while ((bp = log_redrive_list)) {
2328			log_redrive_list = bp->l_redrive_next;
2329			bp->l_redrive_next = NULL;
2330			spin_unlock_irq(&log_redrive_lock);
2331			lbmStartIO(bp);
2332			spin_lock_irq(&log_redrive_lock);
2333		}
2334
2335		if (freezing(current)) {
2336			spin_unlock_irq(&log_redrive_lock);
2337			try_to_freeze();
2338		} else {
2339			set_current_state(TASK_INTERRUPTIBLE);
2340			spin_unlock_irq(&log_redrive_lock);
2341			schedule();
 
2342		}
2343	} while (!kthread_should_stop());
2344
2345	jfs_info("jfsIOWait being killed!");
2346	return 0;
2347}
2348
2349/*
2350 * NAME:	lmLogFormat()/jfs_logform()
2351 *
2352 * FUNCTION:	format file system log
2353 *
2354 * PARAMETERS:
2355 *	log	- volume log
2356 *	logAddress - start address of log space in FS block
2357 *	logSize	- length of log space in FS block;
2358 *
2359 * RETURN:	0	- success
2360 *		-EIO	- i/o error
2361 *
2362 * XXX: We're synchronously writing one page at a time.  This needs to
2363 *	be improved by writing multiple pages at once.
2364 */
2365int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
2366{
2367	int rc = -EIO;
2368	struct jfs_sb_info *sbi;
2369	struct logsuper *logsuper;
2370	struct logpage *lp;
2371	int lspn;		/* log sequence page number */
2372	struct lrd *lrd_ptr;
2373	int npages = 0;
2374	struct lbuf *bp;
2375
2376	jfs_info("lmLogFormat: logAddress:%Ld logSize:%d",
2377		 (long long)logAddress, logSize);
2378
2379	sbi = list_entry(log->sb_list.next, struct jfs_sb_info, log_list);
2380
2381	/* allocate a log buffer */
2382	bp = lbmAllocate(log, 1);
2383
2384	npages = logSize >> sbi->l2nbperpage;
2385
2386	/*
2387	 *	log space:
2388	 *
2389	 * page 0 - reserved;
2390	 * page 1 - log superblock;
2391	 * page 2 - log data page: A SYNC log record is written
2392	 *	    into this page at logform time;
2393	 * pages 3-N - log data page: set to empty log data pages;
2394	 */
2395	/*
2396	 *	init log superblock: log page 1
2397	 */
2398	logsuper = (struct logsuper *) bp->l_ldata;
2399
2400	logsuper->magic = cpu_to_le32(LOGMAGIC);
2401	logsuper->version = cpu_to_le32(LOGVERSION);
2402	logsuper->state = cpu_to_le32(LOGREDONE);
2403	logsuper->flag = cpu_to_le32(sbi->mntflag);	/* ? */
2404	logsuper->size = cpu_to_le32(npages);
2405	logsuper->bsize = cpu_to_le32(sbi->bsize);
2406	logsuper->l2bsize = cpu_to_le32(sbi->l2bsize);
2407	logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE);
2408
2409	bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2410	bp->l_blkno = logAddress + sbi->nbperpage;
2411	lbmStartIO(bp);
2412	if ((rc = lbmIOWait(bp, 0)))
2413		goto exit;
2414
2415	/*
2416	 *	init pages 2 to npages-1 as log data pages:
2417	 *
2418	 * log page sequence number (lpsn) initialization:
2419	 *
2420	 * pn:   0     1     2     3                 n-1
2421	 *       +-----+-----+=====+=====+===.....===+=====+
2422	 * lspn:             N-1   0     1           N-2
2423	 *                   <--- N page circular file ---->
2424	 *
2425	 * the N (= npages-2) data pages of the log is maintained as
2426	 * a circular file for the log records;
2427	 * lpsn grows by 1 monotonically as each log page is written
2428	 * to the circular file of the log;
2429	 * and setLogpage() will not reset the page number even if
2430	 * the eor is equal to LOGPHDRSIZE. In order for binary search
2431	 * still work in find log end process, we have to simulate the
2432	 * log wrap situation at the log format time.
2433	 * The 1st log page written will have the highest lpsn. Then
2434	 * the succeeding log pages will have ascending order of
2435	 * the lspn starting from 0, ... (N-2)
2436	 */
2437	lp = (struct logpage *) bp->l_ldata;
2438	/*
2439	 * initialize 1st log page to be written: lpsn = N - 1,
2440	 * write a SYNCPT log record is written to this page
2441	 */
2442	lp->h.page = lp->t.page = cpu_to_le32(npages - 3);
2443	lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE);
2444
2445	lrd_ptr = (struct lrd *) &lp->data;
2446	lrd_ptr->logtid = 0;
2447	lrd_ptr->backchain = 0;
2448	lrd_ptr->type = cpu_to_le16(LOG_SYNCPT);
2449	lrd_ptr->length = 0;
2450	lrd_ptr->log.syncpt.sync = 0;
2451
2452	bp->l_blkno += sbi->nbperpage;
2453	bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2454	lbmStartIO(bp);
2455	if ((rc = lbmIOWait(bp, 0)))
2456		goto exit;
2457
2458	/*
2459	 *	initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
2460	 */
2461	for (lspn = 0; lspn < npages - 3; lspn++) {
2462		lp->h.page = lp->t.page = cpu_to_le32(lspn);
2463		lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
2464
2465		bp->l_blkno += sbi->nbperpage;
2466		bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2467		lbmStartIO(bp);
2468		if ((rc = lbmIOWait(bp, 0)))
2469			goto exit;
2470	}
2471
2472	rc = 0;
2473exit:
2474	/*
2475	 *	finalize log
2476	 */
2477	/* release the buffer */
2478	lbmFree(bp);
2479
2480	return rc;
2481}
2482
2483#ifdef CONFIG_JFS_STATISTICS
2484int jfs_lmstats_proc_show(struct seq_file *m, void *v)
2485{
2486	seq_printf(m,
2487		       "JFS Logmgr stats\n"
2488		       "================\n"
2489		       "commits = %d\n"
2490		       "writes submitted = %d\n"
2491		       "writes completed = %d\n"
2492		       "full pages submitted = %d\n"
2493		       "partial pages submitted = %d\n",
2494		       lmStat.commit,
2495		       lmStat.submitted,
2496		       lmStat.pagedone,
2497		       lmStat.full_page,
2498		       lmStat.partial_page);
2499	return 0;
2500}
 
 
 
 
 
 
 
 
 
 
 
 
 
2501#endif /* CONFIG_JFS_STATISTICS */