recovery.c - fs/jbd/recovery.c - Linux diff v3.5.6 - Bootlin Elixir Cross Referencer

  1/*
  2 * linux/fs/jbd/recovery.c
  3 *
  4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
  5 *
  6 * Copyright 1999-2000 Red Hat Software --- All Rights Reserved
  7 *
  8 * This file is part of the Linux kernel and is made available under
  9 * the terms of the GNU General Public License, version 2, or at your
 10 * option, any later version, incorporated herein by reference.
 11 *
 12 * Journal recovery routines for the generic filesystem journaling code;
 13 * part of the ext2fs journaling system.
 14 */
 15
 16#ifndef __KERNEL__
 17#include "jfs_user.h"
 18#else
 19#include <linux/time.h>
 20#include <linux/fs.h>
 21#include <linux/jbd.h>
 22#include <linux/errno.h>
 23#include <linux/blkdev.h>
 24#endif
 25
 26/*
 27 * Maintain information about the progress of the recovery job, so that
 28 * the different passes can carry information between them.
 29 */
 30struct recovery_info
 31{
 32	tid_t		start_transaction;
 33	tid_t		end_transaction;
 34
 35	int		nr_replays;
 36	int		nr_revokes;
 37	int		nr_revoke_hits;
 38};
 39
 40enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY};
 41static int do_one_pass(journal_t *journal,
 42				struct recovery_info *info, enum passtype pass);
 43static int scan_revoke_records(journal_t *, struct buffer_head *,
 44				tid_t, struct recovery_info *);
 45
 46#ifdef __KERNEL__
 47
 48/* Release readahead buffers after use */
 49static void journal_brelse_array(struct buffer_head *b[], int n)
 50{
 51	while (--n >= 0)
 52		brelse (b[n]);
 53}
 54
 55
 56/*
 57 * When reading from the journal, we are going through the block device
 58 * layer directly and so there is no readahead being done for us.  We
 59 * need to implement any readahead ourselves if we want it to happen at
 60 * all.  Recovery is basically one long sequential read, so make sure we
 61 * do the IO in reasonably large chunks.
 62 *
 63 * This is not so critical that we need to be enormously clever about
 64 * the readahead size, though.  128K is a purely arbitrary, good-enough
 65 * fixed value.
 66 */
 67
 68#define MAXBUF 8
 69static int do_readahead(journal_t *journal, unsigned int start)
 70{
 71	int err;
 72	unsigned int max, nbufs, next;
 73	unsigned int blocknr;
 74	struct buffer_head *bh;
 75
 76	struct buffer_head * bufs[MAXBUF];
 77
 78	/* Do up to 128K of readahead */
 79	max = start + (128 * 1024 / journal->j_blocksize);
 80	if (max > journal->j_maxlen)
 81		max = journal->j_maxlen;
 82
 83	/* Do the readahead itself.  We'll submit MAXBUF buffer_heads at
 84	 * a time to the block device IO layer. */
 85
 86	nbufs = 0;
 87
 88	for (next = start; next < max; next++) {
 89		err = journal_bmap(journal, next, &blocknr);
 90
 91		if (err) {
 92			printk (KERN_ERR "JBD: bad block at offset %u\n",
 93				next);
 94			goto failed;
 95		}
 96
 97		bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
 98		if (!bh) {
 99			err = -ENOMEM;
100			goto failed;
101		}
102
103		if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
104			bufs[nbufs++] = bh;
105			if (nbufs == MAXBUF) {
106				ll_rw_block(READ, nbufs, bufs);
107				journal_brelse_array(bufs, nbufs);
108				nbufs = 0;
109			}
110		} else
111			brelse(bh);
112	}
113
114	if (nbufs)
115		ll_rw_block(READ, nbufs, bufs);
116	err = 0;
117
118failed:
119	if (nbufs)
120		journal_brelse_array(bufs, nbufs);
121	return err;
122}
123
124#endif /* __KERNEL__ */
125
126
127/*
128 * Read a block from the journal
129 */
130
131static int jread(struct buffer_head **bhp, journal_t *journal,
132		 unsigned int offset)
133{
134	int err;
135	unsigned int blocknr;
136	struct buffer_head *bh;
137
138	*bhp = NULL;
139
140	if (offset >= journal->j_maxlen) {
141		printk(KERN_ERR "JBD: corrupted journal superblock\n");
142		return -EIO;
143	}
144
145	err = journal_bmap(journal, offset, &blocknr);
146
147	if (err) {
148		printk (KERN_ERR "JBD: bad block at offset %u\n",
149			offset);
150		return err;
151	}
152
153	bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
154	if (!bh)
155		return -ENOMEM;
156
157	if (!buffer_uptodate(bh)) {
158		/* If this is a brand new buffer, start readahead.
159                   Otherwise, we assume we are already reading it.  */
160		if (!buffer_req(bh))
161			do_readahead(journal, offset);
162		wait_on_buffer(bh);
163	}
164
165	if (!buffer_uptodate(bh)) {
166		printk (KERN_ERR "JBD: Failed to read block at offset %u\n",
167			offset);
168		brelse(bh);
169		return -EIO;
170	}
171
172	*bhp = bh;
173	return 0;
174}
175
176
177/*
178 * Count the number of in-use tags in a journal descriptor block.
179 */
180
181static int count_tags(struct buffer_head *bh, int size)
182{
183	char *			tagp;
184	journal_block_tag_t *	tag;
185	int			nr = 0;
186
187	tagp = &bh->b_data[sizeof(journal_header_t)];
188
189	while ((tagp - bh->b_data + sizeof(journal_block_tag_t)) <= size) {
190		tag = (journal_block_tag_t *) tagp;
191
192		nr++;
193		tagp += sizeof(journal_block_tag_t);
194		if (!(tag->t_flags & cpu_to_be32(JFS_FLAG_SAME_UUID)))
195			tagp += 16;
196
197		if (tag->t_flags & cpu_to_be32(JFS_FLAG_LAST_TAG))
198			break;
199	}
200
201	return nr;
202}
203
204
205/* Make sure we wrap around the log correctly! */
206#define wrap(journal, var)						\
207do {									\
208	if (var >= (journal)->j_last)					\
209		var -= ((journal)->j_last - (journal)->j_first);	\
210} while (0)
211
212/**
213 * journal_recover - recovers a on-disk journal
214 * @journal: the journal to recover
215 *
216 * The primary function for recovering the log contents when mounting a
217 * journaled device.
218 *
219 * Recovery is done in three passes.  In the first pass, we look for the
220 * end of the log.  In the second, we assemble the list of revoke
221 * blocks.  In the third and final pass, we replay any un-revoked blocks
222 * in the log.
223 */
224int journal_recover(journal_t *journal)
225{
226	int			err, err2;
227	journal_superblock_t *	sb;
228
229	struct recovery_info	info;
230
231	memset(&info, 0, sizeof(info));
232	sb = journal->j_superblock;
233
234	/*
235	 * The journal superblock's s_start field (the current log head)
236	 * is always zero if, and only if, the journal was cleanly
237	 * unmounted.
238	 */
239
240	if (!sb->s_start) {
241		jbd_debug(1, "No recovery required, last transaction %d\n",
242			  be32_to_cpu(sb->s_sequence));
243		journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
244		return 0;
245	}
246
247	err = do_one_pass(journal, &info, PASS_SCAN);
248	if (!err)
249		err = do_one_pass(journal, &info, PASS_REVOKE);
250	if (!err)
251		err = do_one_pass(journal, &info, PASS_REPLAY);
252
253	jbd_debug(1, "JBD: recovery, exit status %d, "
254		  "recovered transactions %u to %u\n",
255		  err, info.start_transaction, info.end_transaction);
256	jbd_debug(1, "JBD: Replayed %d and revoked %d/%d blocks\n",
257		  info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
258
259	/* Restart the log at the next transaction ID, thus invalidating
260	 * any existing commit records in the log. */
261	journal->j_transaction_sequence = ++info.end_transaction;
262
263	journal_clear_revoke(journal);
264	err2 = sync_blockdev(journal->j_fs_dev);
265	if (!err)
266		err = err2;
267	/* Flush disk caches to get replayed data on the permanent storage */
268	if (journal->j_flags & JFS_BARRIER)
269		blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
270
271	return err;
272}
273
274/**
275 * journal_skip_recovery - Start journal and wipe exiting records
276 * @journal: journal to startup
277 *
278 * Locate any valid recovery information from the journal and set up the
279 * journal structures in memory to ignore it (presumably because the
280 * caller has evidence that it is out of date).
281 * This function does'nt appear to be exorted..
282 *
283 * We perform one pass over the journal to allow us to tell the user how
284 * much recovery information is being erased, and to let us initialise
285 * the journal transaction sequence numbers to the next unused ID.
286 */
287int journal_skip_recovery(journal_t *journal)
288{
289	int			err;
290	struct recovery_info	info;
291
292	memset (&info, 0, sizeof(info));
293
294	err = do_one_pass(journal, &info, PASS_SCAN);
295
296	if (err) {
297		printk(KERN_ERR "JBD: error %d scanning journal\n", err);
298		++journal->j_transaction_sequence;
299	} else {
300#ifdef CONFIG_JBD_DEBUG
301		int dropped = info.end_transaction -
302			      be32_to_cpu(journal->j_superblock->s_sequence);
303		jbd_debug(1,
304			  "JBD: ignoring %d transaction%s from the journal.\n",
305			  dropped, (dropped == 1) ? "" : "s");
306#endif
307		journal->j_transaction_sequence = ++info.end_transaction;
308	}
309
310	journal->j_tail = 0;
311	return err;
312}
313
314static int do_one_pass(journal_t *journal,
315			struct recovery_info *info, enum passtype pass)
316{
317	unsigned int		first_commit_ID, next_commit_ID;
318	unsigned int		next_log_block;
319	int			err, success = 0;
320	journal_superblock_t *	sb;
321	journal_header_t *	tmp;
322	struct buffer_head *	bh;
323	unsigned int		sequence;
324	int			blocktype;
325
326	/*
327	 * First thing is to establish what we expect to find in the log
328	 * (in terms of transaction IDs), and where (in terms of log
329	 * block offsets): query the superblock.
330	 */
331
332	sb = journal->j_superblock;
333	next_commit_ID = be32_to_cpu(sb->s_sequence);
334	next_log_block = be32_to_cpu(sb->s_start);
335
336	first_commit_ID = next_commit_ID;
337	if (pass == PASS_SCAN)
338		info->start_transaction = first_commit_ID;
339
340	jbd_debug(1, "Starting recovery pass %d\n", pass);
341
342	/*
343	 * Now we walk through the log, transaction by transaction,
344	 * making sure that each transaction has a commit block in the
345	 * expected place.  Each complete transaction gets replayed back
346	 * into the main filesystem.
347	 */
348
349	while (1) {
350		int			flags;
351		char *			tagp;
352		journal_block_tag_t *	tag;
353		struct buffer_head *	obh;
354		struct buffer_head *	nbh;
355
356		cond_resched();
357
358		/* If we already know where to stop the log traversal,
359		 * check right now that we haven't gone past the end of
360		 * the log. */
361
362		if (pass != PASS_SCAN)
363			if (tid_geq(next_commit_ID, info->end_transaction))
364				break;
365
366		jbd_debug(2, "Scanning for sequence ID %u at %u/%u\n",
367			  next_commit_ID, next_log_block, journal->j_last);
368
369		/* Skip over each chunk of the transaction looking
370		 * either the next descriptor block or the final commit
371		 * record. */
372
373		jbd_debug(3, "JBD: checking block %u\n", next_log_block);
374		err = jread(&bh, journal, next_log_block);
375		if (err)
376			goto failed;
377
378		next_log_block++;
379		wrap(journal, next_log_block);
380
381		/* What kind of buffer is it?
382		 *
383		 * If it is a descriptor block, check that it has the
384		 * expected sequence number.  Otherwise, we're all done
385		 * here. */
386
387		tmp = (journal_header_t *)bh->b_data;
388
389		if (tmp->h_magic != cpu_to_be32(JFS_MAGIC_NUMBER)) {
390			brelse(bh);
391			break;
392		}
393
394		blocktype = be32_to_cpu(tmp->h_blocktype);
395		sequence = be32_to_cpu(tmp->h_sequence);
396		jbd_debug(3, "Found magic %d, sequence %d\n",
397			  blocktype, sequence);
398
399		if (sequence != next_commit_ID) {
400			brelse(bh);
401			break;
402		}
403
404		/* OK, we have a valid descriptor block which matches
405		 * all of the sequence number checks.  What are we going
406		 * to do with it?  That depends on the pass... */
407
408		switch(blocktype) {
409		case JFS_DESCRIPTOR_BLOCK:
410			/* If it is a valid descriptor block, replay it
411			 * in pass REPLAY; otherwise, just skip over the
412			 * blocks it describes. */
413			if (pass != PASS_REPLAY) {
414				next_log_block +=
415					count_tags(bh, journal->j_blocksize);
416				wrap(journal, next_log_block);
417				brelse(bh);
418				continue;
419			}
420
421			/* A descriptor block: we can now write all of
422			 * the data blocks.  Yay, useful work is finally
423			 * getting done here! */
424
425			tagp = &bh->b_data[sizeof(journal_header_t)];
426			while ((tagp - bh->b_data +sizeof(journal_block_tag_t))
427			       <= journal->j_blocksize) {
428				unsigned int io_block;
429
430				tag = (journal_block_tag_t *) tagp;
431				flags = be32_to_cpu(tag->t_flags);
432
433				io_block = next_log_block++;
434				wrap(journal, next_log_block);
435				err = jread(&obh, journal, io_block);
436				if (err) {
437					/* Recover what we can, but
438					 * report failure at the end. */
439					success = err;
440					printk (KERN_ERR
441						"JBD: IO error %d recovering "
442						"block %u in log\n",
443						err, io_block);
444				} else {
445					unsigned int blocknr;
446
447					J_ASSERT(obh != NULL);
448					blocknr = be32_to_cpu(tag->t_blocknr);
449
450					/* If the block has been
451					 * revoked, then we're all done
452					 * here. */
453					if (journal_test_revoke
454					    (journal, blocknr,
455					     next_commit_ID)) {
456						brelse(obh);
457						++info->nr_revoke_hits;
458						goto skip_write;
459					}
460
461					/* Find a buffer for the new
462					 * data being restored */
463					nbh = __getblk(journal->j_fs_dev,
464							blocknr,
465							journal->j_blocksize);
466					if (nbh == NULL) {
467						printk(KERN_ERR
468						       "JBD: Out of memory "
469						       "during recovery.\n");
470						err = -ENOMEM;
471						brelse(bh);
472						brelse(obh);
473						goto failed;
474					}
475
476					lock_buffer(nbh);
477					memcpy(nbh->b_data, obh->b_data,
478							journal->j_blocksize);
479					if (flags & JFS_FLAG_ESCAPE) {
480						*((__be32 *)nbh->b_data) =
481						cpu_to_be32(JFS_MAGIC_NUMBER);
482					}
483
484					BUFFER_TRACE(nbh, "marking dirty");
485					set_buffer_uptodate(nbh);
486					mark_buffer_dirty(nbh);
487					BUFFER_TRACE(nbh, "marking uptodate");
488					++info->nr_replays;
489					/* ll_rw_block(WRITE, 1, &nbh); */
490					unlock_buffer(nbh);
491					brelse(obh);
492					brelse(nbh);
493				}
494
495			skip_write:
496				tagp += sizeof(journal_block_tag_t);
497				if (!(flags & JFS_FLAG_SAME_UUID))
498					tagp += 16;
499
500				if (flags & JFS_FLAG_LAST_TAG)
501					break;
502			}
503
504			brelse(bh);
505			continue;
506
507		case JFS_COMMIT_BLOCK:
508			/* Found an expected commit block: not much to
509			 * do other than move on to the next sequence
510			 * number. */
511			brelse(bh);
512			next_commit_ID++;
513			continue;
514
515		case JFS_REVOKE_BLOCK:
516			/* If we aren't in the REVOKE pass, then we can
517			 * just skip over this block. */
518			if (pass != PASS_REVOKE) {
519				brelse(bh);
520				continue;
521			}
522
523			err = scan_revoke_records(journal, bh,
524						  next_commit_ID, info);
525			brelse(bh);
526			if (err)
527				goto failed;
528			continue;
529
530		default:
531			jbd_debug(3, "Unrecognised magic %d, end of scan.\n",
532				  blocktype);
533			brelse(bh);
534			goto done;
535		}
536	}
537
538 done:
539	/*
540	 * We broke out of the log scan loop: either we came to the
541	 * known end of the log or we found an unexpected block in the
542	 * log.  If the latter happened, then we know that the "current"
543	 * transaction marks the end of the valid log.
544	 */
545
546	if (pass == PASS_SCAN)
547		info->end_transaction = next_commit_ID;
548	else {
549		/* It's really bad news if different passes end up at
550		 * different places (but possible due to IO errors). */
551		if (info->end_transaction != next_commit_ID) {
552			printk (KERN_ERR "JBD: recovery pass %d ended at "
553				"transaction %u, expected %u\n",
554				pass, next_commit_ID, info->end_transaction);
555			if (!success)
556				success = -EIO;
557		}
558	}
559
560	return success;
561
562 failed:
563	return err;
564}
565
566
567/* Scan a revoke record, marking all blocks mentioned as revoked. */
568
569static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
570			       tid_t sequence, struct recovery_info *info)
571{
572	journal_revoke_header_t *header;
573	int offset, max;
574
575	header = (journal_revoke_header_t *) bh->b_data;
576	offset = sizeof(journal_revoke_header_t);
577	max = be32_to_cpu(header->r_count);
578
579	while (offset < max) {
580		unsigned int blocknr;
581		int err;
582
583		blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
584		offset += 4;
585		err = journal_set_revoke(journal, blocknr, sequence);
586		if (err)
587			return err;
588		++info->nr_revokes;
589	}
590	return 0;
591}

  1/*
  2 * linux/fs/jbd/recovery.c
  3 *
  4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
  5 *
  6 * Copyright 1999-2000 Red Hat Software --- All Rights Reserved
  7 *
  8 * This file is part of the Linux kernel and is made available under
  9 * the terms of the GNU General Public License, version 2, or at your
 10 * option, any later version, incorporated herein by reference.
 11 *
 12 * Journal recovery routines for the generic filesystem journaling code;
 13 * part of the ext2fs journaling system.
 14 */
 15
 16#ifndef __KERNEL__
 17#include "jfs_user.h"
 18#else
 19#include <linux/time.h>
 20#include <linux/fs.h>
 21#include <linux/jbd.h>
 22#include <linux/errno.h>
 
 23#endif
 24
 25/*
 26 * Maintain information about the progress of the recovery job, so that
 27 * the different passes can carry information between them.
 28 */
 29struct recovery_info
 30{
 31	tid_t		start_transaction;
 32	tid_t		end_transaction;
 33
 34	int		nr_replays;
 35	int		nr_revokes;
 36	int		nr_revoke_hits;
 37};
 38
 39enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY};
 40static int do_one_pass(journal_t *journal,
 41				struct recovery_info *info, enum passtype pass);
 42static int scan_revoke_records(journal_t *, struct buffer_head *,
 43				tid_t, struct recovery_info *);
 44
 45#ifdef __KERNEL__
 46
 47/* Release readahead buffers after use */
 48static void journal_brelse_array(struct buffer_head *b[], int n)
 49{
 50	while (--n >= 0)
 51		brelse (b[n]);
 52}
 53
 54
 55/*
 56 * When reading from the journal, we are going through the block device
 57 * layer directly and so there is no readahead being done for us.  We
 58 * need to implement any readahead ourselves if we want it to happen at
 59 * all.  Recovery is basically one long sequential read, so make sure we
 60 * do the IO in reasonably large chunks.
 61 *
 62 * This is not so critical that we need to be enormously clever about
 63 * the readahead size, though.  128K is a purely arbitrary, good-enough
 64 * fixed value.
 65 */
 66
 67#define MAXBUF 8
 68static int do_readahead(journal_t *journal, unsigned int start)
 69{
 70	int err;
 71	unsigned int max, nbufs, next;
 72	unsigned int blocknr;
 73	struct buffer_head *bh;
 74
 75	struct buffer_head * bufs[MAXBUF];
 76
 77	/* Do up to 128K of readahead */
 78	max = start + (128 * 1024 / journal->j_blocksize);
 79	if (max > journal->j_maxlen)
 80		max = journal->j_maxlen;
 81
 82	/* Do the readahead itself.  We'll submit MAXBUF buffer_heads at
 83	 * a time to the block device IO layer. */
 84
 85	nbufs = 0;
 86
 87	for (next = start; next < max; next++) {
 88		err = journal_bmap(journal, next, &blocknr);
 89
 90		if (err) {
 91			printk (KERN_ERR "JBD: bad block at offset %u\n",
 92				next);
 93			goto failed;
 94		}
 95
 96		bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
 97		if (!bh) {
 98			err = -ENOMEM;
 99			goto failed;
100		}
101
102		if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
103			bufs[nbufs++] = bh;
104			if (nbufs == MAXBUF) {
105				ll_rw_block(READ, nbufs, bufs);
106				journal_brelse_array(bufs, nbufs);
107				nbufs = 0;
108			}
109		} else
110			brelse(bh);
111	}
112
113	if (nbufs)
114		ll_rw_block(READ, nbufs, bufs);
115	err = 0;
116
117failed:
118	if (nbufs)
119		journal_brelse_array(bufs, nbufs);
120	return err;
121}
122
123#endif /* __KERNEL__ */
124
125
126/*
127 * Read a block from the journal
128 */
129
130static int jread(struct buffer_head **bhp, journal_t *journal,
131		 unsigned int offset)
132{
133	int err;
134	unsigned int blocknr;
135	struct buffer_head *bh;
136
137	*bhp = NULL;
138
139	if (offset >= journal->j_maxlen) {
140		printk(KERN_ERR "JBD: corrupted journal superblock\n");
141		return -EIO;
142	}
143
144	err = journal_bmap(journal, offset, &blocknr);
145
146	if (err) {
147		printk (KERN_ERR "JBD: bad block at offset %u\n",
148			offset);
149		return err;
150	}
151
152	bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
153	if (!bh)
154		return -ENOMEM;
155
156	if (!buffer_uptodate(bh)) {
157		/* If this is a brand new buffer, start readahead.
158                   Otherwise, we assume we are already reading it.  */
159		if (!buffer_req(bh))
160			do_readahead(journal, offset);
161		wait_on_buffer(bh);
162	}
163
164	if (!buffer_uptodate(bh)) {
165		printk (KERN_ERR "JBD: Failed to read block at offset %u\n",
166			offset);
167		brelse(bh);
168		return -EIO;
169	}
170
171	*bhp = bh;
172	return 0;
173}
174
175
176/*
177 * Count the number of in-use tags in a journal descriptor block.
178 */
179
180static int count_tags(struct buffer_head *bh, int size)
181{
182	char *			tagp;
183	journal_block_tag_t *	tag;
184	int			nr = 0;
185
186	tagp = &bh->b_data[sizeof(journal_header_t)];
187
188	while ((tagp - bh->b_data + sizeof(journal_block_tag_t)) <= size) {
189		tag = (journal_block_tag_t *) tagp;
190
191		nr++;
192		tagp += sizeof(journal_block_tag_t);
193		if (!(tag->t_flags & cpu_to_be32(JFS_FLAG_SAME_UUID)))
194			tagp += 16;
195
196		if (tag->t_flags & cpu_to_be32(JFS_FLAG_LAST_TAG))
197			break;
198	}
199
200	return nr;
201}
202
203
204/* Make sure we wrap around the log correctly! */
205#define wrap(journal, var)						\
206do {									\
207	if (var >= (journal)->j_last)					\
208		var -= ((journal)->j_last - (journal)->j_first);	\
209} while (0)
210
211/**
212 * journal_recover - recovers a on-disk journal
213 * @journal: the journal to recover
214 *
215 * The primary function for recovering the log contents when mounting a
216 * journaled device.
217 *
218 * Recovery is done in three passes.  In the first pass, we look for the
219 * end of the log.  In the second, we assemble the list of revoke
220 * blocks.  In the third and final pass, we replay any un-revoked blocks
221 * in the log.
222 */
223int journal_recover(journal_t *journal)
224{
225	int			err, err2;
226	journal_superblock_t *	sb;
227
228	struct recovery_info	info;
229
230	memset(&info, 0, sizeof(info));
231	sb = journal->j_superblock;
232
233	/*
234	 * The journal superblock's s_start field (the current log head)
235	 * is always zero if, and only if, the journal was cleanly
236	 * unmounted.
237	 */
238
239	if (!sb->s_start) {
240		jbd_debug(1, "No recovery required, last transaction %d\n",
241			  be32_to_cpu(sb->s_sequence));
242		journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
243		return 0;
244	}
245
246	err = do_one_pass(journal, &info, PASS_SCAN);
247	if (!err)
248		err = do_one_pass(journal, &info, PASS_REVOKE);
249	if (!err)
250		err = do_one_pass(journal, &info, PASS_REPLAY);
251
252	jbd_debug(1, "JBD: recovery, exit status %d, "
253		  "recovered transactions %u to %u\n",
254		  err, info.start_transaction, info.end_transaction);
255	jbd_debug(1, "JBD: Replayed %d and revoked %d/%d blocks\n",
256		  info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
257
258	/* Restart the log at the next transaction ID, thus invalidating
259	 * any existing commit records in the log. */
260	journal->j_transaction_sequence = ++info.end_transaction;
261
262	journal_clear_revoke(journal);
263	err2 = sync_blockdev(journal->j_fs_dev);
264	if (!err)
265		err = err2;
 
 
 
266
267	return err;
268}
269
270/**
271 * journal_skip_recovery - Start journal and wipe exiting records
272 * @journal: journal to startup
273 *
274 * Locate any valid recovery information from the journal and set up the
275 * journal structures in memory to ignore it (presumably because the
276 * caller has evidence that it is out of date).
277 * This function does'nt appear to be exorted..
278 *
279 * We perform one pass over the journal to allow us to tell the user how
280 * much recovery information is being erased, and to let us initialise
281 * the journal transaction sequence numbers to the next unused ID.
282 */
283int journal_skip_recovery(journal_t *journal)
284{
285	int			err;
286	struct recovery_info	info;
287
288	memset (&info, 0, sizeof(info));
289
290	err = do_one_pass(journal, &info, PASS_SCAN);
291
292	if (err) {
293		printk(KERN_ERR "JBD: error %d scanning journal\n", err);
294		++journal->j_transaction_sequence;
295	} else {
296#ifdef CONFIG_JBD_DEBUG
297		int dropped = info.end_transaction -
298			      be32_to_cpu(journal->j_superblock->s_sequence);
299		jbd_debug(1,
300			  "JBD: ignoring %d transaction%s from the journal.\n",
301			  dropped, (dropped == 1) ? "" : "s");
302#endif
303		journal->j_transaction_sequence = ++info.end_transaction;
304	}
305
306	journal->j_tail = 0;
307	return err;
308}
309
310static int do_one_pass(journal_t *journal,
311			struct recovery_info *info, enum passtype pass)
312{
313	unsigned int		first_commit_ID, next_commit_ID;
314	unsigned int		next_log_block;
315	int			err, success = 0;
316	journal_superblock_t *	sb;
317	journal_header_t *	tmp;
318	struct buffer_head *	bh;
319	unsigned int		sequence;
320	int			blocktype;
321
322	/*
323	 * First thing is to establish what we expect to find in the log
324	 * (in terms of transaction IDs), and where (in terms of log
325	 * block offsets): query the superblock.
326	 */
327
328	sb = journal->j_superblock;
329	next_commit_ID = be32_to_cpu(sb->s_sequence);
330	next_log_block = be32_to_cpu(sb->s_start);
331
332	first_commit_ID = next_commit_ID;
333	if (pass == PASS_SCAN)
334		info->start_transaction = first_commit_ID;
335
336	jbd_debug(1, "Starting recovery pass %d\n", pass);
337
338	/*
339	 * Now we walk through the log, transaction by transaction,
340	 * making sure that each transaction has a commit block in the
341	 * expected place.  Each complete transaction gets replayed back
342	 * into the main filesystem.
343	 */
344
345	while (1) {
346		int			flags;
347		char *			tagp;
348		journal_block_tag_t *	tag;
349		struct buffer_head *	obh;
350		struct buffer_head *	nbh;
351
352		cond_resched();
353
354		/* If we already know where to stop the log traversal,
355		 * check right now that we haven't gone past the end of
356		 * the log. */
357
358		if (pass != PASS_SCAN)
359			if (tid_geq(next_commit_ID, info->end_transaction))
360				break;
361
362		jbd_debug(2, "Scanning for sequence ID %u at %u/%u\n",
363			  next_commit_ID, next_log_block, journal->j_last);
364
365		/* Skip over each chunk of the transaction looking
366		 * either the next descriptor block or the final commit
367		 * record. */
368
369		jbd_debug(3, "JBD: checking block %u\n", next_log_block);
370		err = jread(&bh, journal, next_log_block);
371		if (err)
372			goto failed;
373
374		next_log_block++;
375		wrap(journal, next_log_block);
376
377		/* What kind of buffer is it?
378		 *
379		 * If it is a descriptor block, check that it has the
380		 * expected sequence number.  Otherwise, we're all done
381		 * here. */
382
383		tmp = (journal_header_t *)bh->b_data;
384
385		if (tmp->h_magic != cpu_to_be32(JFS_MAGIC_NUMBER)) {
386			brelse(bh);
387			break;
388		}
389
390		blocktype = be32_to_cpu(tmp->h_blocktype);
391		sequence = be32_to_cpu(tmp->h_sequence);
392		jbd_debug(3, "Found magic %d, sequence %d\n",
393			  blocktype, sequence);
394
395		if (sequence != next_commit_ID) {
396			brelse(bh);
397			break;
398		}
399
400		/* OK, we have a valid descriptor block which matches
401		 * all of the sequence number checks.  What are we going
402		 * to do with it?  That depends on the pass... */
403
404		switch(blocktype) {
405		case JFS_DESCRIPTOR_BLOCK:
406			/* If it is a valid descriptor block, replay it
407			 * in pass REPLAY; otherwise, just skip over the
408			 * blocks it describes. */
409			if (pass != PASS_REPLAY) {
410				next_log_block +=
411					count_tags(bh, journal->j_blocksize);
412				wrap(journal, next_log_block);
413				brelse(bh);
414				continue;
415			}
416
417			/* A descriptor block: we can now write all of
418			 * the data blocks.  Yay, useful work is finally
419			 * getting done here! */
420
421			tagp = &bh->b_data[sizeof(journal_header_t)];
422			while ((tagp - bh->b_data +sizeof(journal_block_tag_t))
423			       <= journal->j_blocksize) {
424				unsigned int io_block;
425
426				tag = (journal_block_tag_t *) tagp;
427				flags = be32_to_cpu(tag->t_flags);
428
429				io_block = next_log_block++;
430				wrap(journal, next_log_block);
431				err = jread(&obh, journal, io_block);
432				if (err) {
433					/* Recover what we can, but
434					 * report failure at the end. */
435					success = err;
436					printk (KERN_ERR
437						"JBD: IO error %d recovering "
438						"block %u in log\n",
439						err, io_block);
440				} else {
441					unsigned int blocknr;
442
443					J_ASSERT(obh != NULL);
444					blocknr = be32_to_cpu(tag->t_blocknr);
445
446					/* If the block has been
447					 * revoked, then we're all done
448					 * here. */
449					if (journal_test_revoke
450					    (journal, blocknr,
451					     next_commit_ID)) {
452						brelse(obh);
453						++info->nr_revoke_hits;
454						goto skip_write;
455					}
456
457					/* Find a buffer for the new
458					 * data being restored */
459					nbh = __getblk(journal->j_fs_dev,
460							blocknr,
461							journal->j_blocksize);
462					if (nbh == NULL) {
463						printk(KERN_ERR
464						       "JBD: Out of memory "
465						       "during recovery.\n");
466						err = -ENOMEM;
467						brelse(bh);
468						brelse(obh);
469						goto failed;
470					}
471
472					lock_buffer(nbh);
473					memcpy(nbh->b_data, obh->b_data,
474							journal->j_blocksize);
475					if (flags & JFS_FLAG_ESCAPE) {
476						*((__be32 *)nbh->b_data) =
477						cpu_to_be32(JFS_MAGIC_NUMBER);
478					}
479
480					BUFFER_TRACE(nbh, "marking dirty");
481					set_buffer_uptodate(nbh);
482					mark_buffer_dirty(nbh);
483					BUFFER_TRACE(nbh, "marking uptodate");
484					++info->nr_replays;
485					/* ll_rw_block(WRITE, 1, &nbh); */
486					unlock_buffer(nbh);
487					brelse(obh);
488					brelse(nbh);
489				}
490
491			skip_write:
492				tagp += sizeof(journal_block_tag_t);
493				if (!(flags & JFS_FLAG_SAME_UUID))
494					tagp += 16;
495
496				if (flags & JFS_FLAG_LAST_TAG)
497					break;
498			}
499
500			brelse(bh);
501			continue;
502
503		case JFS_COMMIT_BLOCK:
504			/* Found an expected commit block: not much to
505			 * do other than move on to the next sequence
506			 * number. */
507			brelse(bh);
508			next_commit_ID++;
509			continue;
510
511		case JFS_REVOKE_BLOCK:
512			/* If we aren't in the REVOKE pass, then we can
513			 * just skip over this block. */
514			if (pass != PASS_REVOKE) {
515				brelse(bh);
516				continue;
517			}
518
519			err = scan_revoke_records(journal, bh,
520						  next_commit_ID, info);
521			brelse(bh);
522			if (err)
523				goto failed;
524			continue;
525
526		default:
527			jbd_debug(3, "Unrecognised magic %d, end of scan.\n",
528				  blocktype);
529			brelse(bh);
530			goto done;
531		}
532	}
533
534 done:
535	/*
536	 * We broke out of the log scan loop: either we came to the
537	 * known end of the log or we found an unexpected block in the
538	 * log.  If the latter happened, then we know that the "current"
539	 * transaction marks the end of the valid log.
540	 */
541
542	if (pass == PASS_SCAN)
543		info->end_transaction = next_commit_ID;
544	else {
545		/* It's really bad news if different passes end up at
546		 * different places (but possible due to IO errors). */
547		if (info->end_transaction != next_commit_ID) {
548			printk (KERN_ERR "JBD: recovery pass %d ended at "
549				"transaction %u, expected %u\n",
550				pass, next_commit_ID, info->end_transaction);
551			if (!success)
552				success = -EIO;
553		}
554	}
555
556	return success;
557
558 failed:
559	return err;
560}
561
562
563/* Scan a revoke record, marking all blocks mentioned as revoked. */
564
565static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
566			       tid_t sequence, struct recovery_info *info)
567{
568	journal_revoke_header_t *header;
569	int offset, max;
570
571	header = (journal_revoke_header_t *) bh->b_data;
572	offset = sizeof(journal_revoke_header_t);
573	max = be32_to_cpu(header->r_count);
574
575	while (offset < max) {
576		unsigned int blocknr;
577		int err;
578
579		blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
580		offset += 4;
581		err = journal_set_revoke(journal, blocknr, sequence);
582		if (err)
583			return err;
584		++info->nr_revokes;
585	}
586	return 0;
587}