Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
  4 * All Rights Reserved.
  5 */
  6#include "xfs.h"
  7#include "xfs_fs.h"
  8#include "xfs_shared.h"
  9#include "xfs_format.h"
 10#include "xfs_log_format.h"
 11#include "xfs_trans_resv.h"
 12#include "xfs_mount.h"
 13#include "xfs_inode.h"
 14#include "xfs_trans.h"
 15#include "xfs_inode_item.h"
 16#include "xfs_trace.h"
 17#include "xfs_trans_priv.h"
 18#include "xfs_buf_item.h"
 19#include "xfs_log.h"
 20#include "xfs_error.h"
 21#include "xfs_log_priv.h"
 22#include "xfs_log_recover.h"
 23#include "xfs_icache.h"
 24#include "xfs_bmap_btree.h"
 25
 26STATIC void
 27xlog_recover_inode_ra_pass2(
 28	struct xlog                     *log,
 29	struct xlog_recover_item        *item)
 30{
 31	if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
 32		struct xfs_inode_log_format	*ilfp = item->ri_buf[0].i_addr;
 33
 34		xlog_buf_readahead(log, ilfp->ilf_blkno, ilfp->ilf_len,
 35				   &xfs_inode_buf_ra_ops);
 36	} else {
 37		struct xfs_inode_log_format_32	*ilfp = item->ri_buf[0].i_addr;
 38
 39		xlog_buf_readahead(log, ilfp->ilf_blkno, ilfp->ilf_len,
 40				   &xfs_inode_buf_ra_ops);
 41	}
 42}
 43
 44/*
 45 * Inode fork owner changes
 46 *
 47 * If we have been told that we have to reparent the inode fork, it's because an
 48 * extent swap operation on a CRC enabled filesystem has been done and we are
 49 * replaying it. We need to walk the BMBT of the appropriate fork and change the
 50 * owners of it.
 51 *
 52 * The complexity here is that we don't have an inode context to work with, so
 53 * after we've replayed the inode we need to instantiate one.  This is where the
 54 * fun begins.
 55 *
 56 * We are in the middle of log recovery, so we can't run transactions. That
 57 * means we cannot use cache coherent inode instantiation via xfs_iget(), as
 58 * that will result in the corresponding iput() running the inode through
 59 * xfs_inactive(). If we've just replayed an inode core that changes the link
 60 * count to zero (i.e. it's been unlinked), then xfs_inactive() will run
 61 * transactions (bad!).
 62 *
 63 * So, to avoid this, we instantiate an inode directly from the inode core we've
 64 * just recovered. We have the buffer still locked, and all we really need to
 65 * instantiate is the inode core and the forks being modified. We can do this
 66 * manually, then run the inode btree owner change, and then tear down the
 67 * xfs_inode without having to run any transactions at all.
 68 *
 69 * Also, because we don't have a transaction context available here but need to
 70 * gather all the buffers we modify for writeback so we pass the buffer_list
 71 * instead for the operation to use.
 72 */
 73
 74STATIC int
 75xfs_recover_inode_owner_change(
 76	struct xfs_mount	*mp,
 77	struct xfs_dinode	*dip,
 78	struct xfs_inode_log_format *in_f,
 79	struct list_head	*buffer_list)
 80{
 81	struct xfs_inode	*ip;
 82	int			error;
 83
 84	ASSERT(in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER));
 85
 86	ip = xfs_inode_alloc(mp, in_f->ilf_ino);
 87	if (!ip)
 88		return -ENOMEM;
 89
 90	/* instantiate the inode */
 91	ASSERT(dip->di_version >= 3);
 92
 93	error = xfs_inode_from_disk(ip, dip);
 94	if (error)
 95		goto out_free_ip;
 96
 97	if (in_f->ilf_fields & XFS_ILOG_DOWNER) {
 98		ASSERT(in_f->ilf_fields & XFS_ILOG_DBROOT);
 99		error = xfs_bmbt_change_owner(NULL, ip, XFS_DATA_FORK,
100					      ip->i_ino, buffer_list);
101		if (error)
102			goto out_free_ip;
103	}
104
105	if (in_f->ilf_fields & XFS_ILOG_AOWNER) {
106		ASSERT(in_f->ilf_fields & XFS_ILOG_ABROOT);
107		error = xfs_bmbt_change_owner(NULL, ip, XFS_ATTR_FORK,
108					      ip->i_ino, buffer_list);
109		if (error)
110			goto out_free_ip;
111	}
112
113out_free_ip:
114	xfs_inode_free(ip);
115	return error;
116}
117
118static inline bool xfs_log_dinode_has_bigtime(const struct xfs_log_dinode *ld)
119{
120	return ld->di_version >= 3 &&
121	       (ld->di_flags2 & XFS_DIFLAG2_BIGTIME);
122}
123
124/* Convert a log timestamp to an ondisk timestamp. */
125static inline xfs_timestamp_t
126xfs_log_dinode_to_disk_ts(
127	struct xfs_log_dinode		*from,
128	const xfs_log_timestamp_t	its)
129{
130	struct xfs_legacy_timestamp	*lts;
131	struct xfs_log_legacy_timestamp	*lits;
132	xfs_timestamp_t			ts;
133
134	if (xfs_log_dinode_has_bigtime(from))
135		return cpu_to_be64(its);
136
137	lts = (struct xfs_legacy_timestamp *)&ts;
138	lits = (struct xfs_log_legacy_timestamp *)&its;
139	lts->t_sec = cpu_to_be32(lits->t_sec);
140	lts->t_nsec = cpu_to_be32(lits->t_nsec);
141
142	return ts;
143}
144
145STATIC void
146xfs_log_dinode_to_disk(
147	struct xfs_log_dinode	*from,
148	struct xfs_dinode	*to,
149	xfs_lsn_t		lsn)
150{
151	to->di_magic = cpu_to_be16(from->di_magic);
152	to->di_mode = cpu_to_be16(from->di_mode);
153	to->di_version = from->di_version;
154	to->di_format = from->di_format;
155	to->di_onlink = 0;
156	to->di_uid = cpu_to_be32(from->di_uid);
157	to->di_gid = cpu_to_be32(from->di_gid);
158	to->di_nlink = cpu_to_be32(from->di_nlink);
159	to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
160	to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
161	memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
162
163	to->di_atime = xfs_log_dinode_to_disk_ts(from, from->di_atime);
164	to->di_mtime = xfs_log_dinode_to_disk_ts(from, from->di_mtime);
165	to->di_ctime = xfs_log_dinode_to_disk_ts(from, from->di_ctime);
166
167	to->di_size = cpu_to_be64(from->di_size);
168	to->di_nblocks = cpu_to_be64(from->di_nblocks);
169	to->di_extsize = cpu_to_be32(from->di_extsize);
170	to->di_nextents = cpu_to_be32(from->di_nextents);
171	to->di_anextents = cpu_to_be16(from->di_anextents);
172	to->di_forkoff = from->di_forkoff;
173	to->di_aformat = from->di_aformat;
174	to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
175	to->di_dmstate = cpu_to_be16(from->di_dmstate);
176	to->di_flags = cpu_to_be16(from->di_flags);
177	to->di_gen = cpu_to_be32(from->di_gen);
178
179	if (from->di_version == 3) {
180		to->di_changecount = cpu_to_be64(from->di_changecount);
181		to->di_crtime = xfs_log_dinode_to_disk_ts(from,
182							  from->di_crtime);
183		to->di_flags2 = cpu_to_be64(from->di_flags2);
184		to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
185		to->di_ino = cpu_to_be64(from->di_ino);
186		to->di_lsn = cpu_to_be64(lsn);
187		memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
188		uuid_copy(&to->di_uuid, &from->di_uuid);
189		to->di_flushiter = 0;
190	} else {
191		to->di_flushiter = cpu_to_be16(from->di_flushiter);
192	}
193}
194
195STATIC int
196xlog_recover_inode_commit_pass2(
197	struct xlog			*log,
198	struct list_head		*buffer_list,
199	struct xlog_recover_item	*item,
200	xfs_lsn_t			current_lsn)
201{
202	struct xfs_inode_log_format	*in_f;
203	struct xfs_mount		*mp = log->l_mp;
204	struct xfs_buf			*bp;
205	struct xfs_dinode		*dip;
206	int				len;
207	char				*src;
208	char				*dest;
209	int				error;
210	int				attr_index;
211	uint				fields;
212	struct xfs_log_dinode		*ldip;
213	uint				isize;
214	int				need_free = 0;
215
216	if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
217		in_f = item->ri_buf[0].i_addr;
218	} else {
219		in_f = kmem_alloc(sizeof(struct xfs_inode_log_format), 0);
220		need_free = 1;
221		error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f);
222		if (error)
223			goto error;
224	}
225
226	/*
227	 * Inode buffers can be freed, look out for it,
228	 * and do not replay the inode.
229	 */
230	if (xlog_is_buffer_cancelled(log, in_f->ilf_blkno, in_f->ilf_len)) {
231		error = 0;
232		trace_xfs_log_recover_inode_cancel(log, in_f);
233		goto error;
234	}
235	trace_xfs_log_recover_inode_recover(log, in_f);
236
237	error = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len,
238			0, &bp, &xfs_inode_buf_ops);
239	if (error)
240		goto error;
241	ASSERT(in_f->ilf_fields & XFS_ILOG_CORE);
242	dip = xfs_buf_offset(bp, in_f->ilf_boffset);
243
244	/*
245	 * Make sure the place we're flushing out to really looks
246	 * like an inode!
247	 */
248	if (XFS_IS_CORRUPT(mp, !xfs_verify_magic16(bp, dip->di_magic))) {
249		xfs_alert(mp,
250	"%s: Bad inode magic number, dip = "PTR_FMT", dino bp = "PTR_FMT", ino = %Ld",
251			__func__, dip, bp, in_f->ilf_ino);
252		error = -EFSCORRUPTED;
253		goto out_release;
254	}
255	ldip = item->ri_buf[1].i_addr;
256	if (XFS_IS_CORRUPT(mp, ldip->di_magic != XFS_DINODE_MAGIC)) {
257		xfs_alert(mp,
258			"%s: Bad inode log record, rec ptr "PTR_FMT", ino %Ld",
259			__func__, item, in_f->ilf_ino);
260		error = -EFSCORRUPTED;
261		goto out_release;
262	}
263
264	/*
265	 * If the inode has an LSN in it, recover the inode only if the on-disk
266	 * inode's LSN is older than the lsn of the transaction we are
267	 * replaying. We can have multiple checkpoints with the same start LSN,
268	 * so the current LSN being equal to the on-disk LSN doesn't necessarily
269	 * mean that the on-disk inode is more recent than the change being
270	 * replayed.
271	 *
272	 * We must check the current_lsn against the on-disk inode
273	 * here because the we can't trust the log dinode to contain a valid LSN
274	 * (see comment below before replaying the log dinode for details).
275	 *
276	 * Note: we still need to replay an owner change even though the inode
277	 * is more recent than the transaction as there is no guarantee that all
278	 * the btree blocks are more recent than this transaction, too.
279	 */
280	if (dip->di_version >= 3) {
281		xfs_lsn_t	lsn = be64_to_cpu(dip->di_lsn);
282
283		if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) > 0) {
284			trace_xfs_log_recover_inode_skip(log, in_f);
285			error = 0;
286			goto out_owner_change;
287		}
288	}
289
290	/*
291	 * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes
292	 * are transactional and if ordering is necessary we can determine that
293	 * more accurately by the LSN field in the V3 inode core. Don't trust
294	 * the inode versions we might be changing them here - use the
295	 * superblock flag to determine whether we need to look at di_flushiter
296	 * to skip replay when the on disk inode is newer than the log one
297	 */
298	if (!xfs_sb_version_has_v3inode(&mp->m_sb) &&
299	    ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
300		/*
301		 * Deal with the wrap case, DI_MAX_FLUSH is less
302		 * than smaller numbers
303		 */
304		if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
305		    ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) {
306			/* do nothing */
307		} else {
308			trace_xfs_log_recover_inode_skip(log, in_f);
309			error = 0;
310			goto out_release;
311		}
312	}
313
314	/* Take the opportunity to reset the flush iteration count */
315	ldip->di_flushiter = 0;
316
317	if (unlikely(S_ISREG(ldip->di_mode))) {
318		if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
319		    (ldip->di_format != XFS_DINODE_FMT_BTREE)) {
320			XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
321					 XFS_ERRLEVEL_LOW, mp, ldip,
322					 sizeof(*ldip));
323			xfs_alert(mp,
324		"%s: Bad regular inode log record, rec ptr "PTR_FMT", "
325		"ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld",
326				__func__, item, dip, bp, in_f->ilf_ino);
327			error = -EFSCORRUPTED;
328			goto out_release;
329		}
330	} else if (unlikely(S_ISDIR(ldip->di_mode))) {
331		if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
332		    (ldip->di_format != XFS_DINODE_FMT_BTREE) &&
333		    (ldip->di_format != XFS_DINODE_FMT_LOCAL)) {
334			XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)",
335					     XFS_ERRLEVEL_LOW, mp, ldip,
336					     sizeof(*ldip));
337			xfs_alert(mp,
338		"%s: Bad dir inode log record, rec ptr "PTR_FMT", "
339		"ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld",
340				__func__, item, dip, bp, in_f->ilf_ino);
341			error = -EFSCORRUPTED;
342			goto out_release;
343		}
344	}
345	if (unlikely(ldip->di_nextents + ldip->di_anextents > ldip->di_nblocks)){
346		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
347				     XFS_ERRLEVEL_LOW, mp, ldip,
348				     sizeof(*ldip));
349		xfs_alert(mp,
350	"%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", "
351	"dino bp "PTR_FMT", ino %Ld, total extents = %d, nblocks = %Ld",
352			__func__, item, dip, bp, in_f->ilf_ino,
353			ldip->di_nextents + ldip->di_anextents,
354			ldip->di_nblocks);
355		error = -EFSCORRUPTED;
356		goto out_release;
357	}
358	if (unlikely(ldip->di_forkoff > mp->m_sb.sb_inodesize)) {
359		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
360				     XFS_ERRLEVEL_LOW, mp, ldip,
361				     sizeof(*ldip));
362		xfs_alert(mp,
363	"%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", "
364	"dino bp "PTR_FMT", ino %Ld, forkoff 0x%x", __func__,
365			item, dip, bp, in_f->ilf_ino, ldip->di_forkoff);
366		error = -EFSCORRUPTED;
367		goto out_release;
368	}
369	isize = xfs_log_dinode_size(mp);
370	if (unlikely(item->ri_buf[1].i_len > isize)) {
371		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
372				     XFS_ERRLEVEL_LOW, mp, ldip,
373				     sizeof(*ldip));
374		xfs_alert(mp,
375			"%s: Bad inode log record length %d, rec ptr "PTR_FMT,
376			__func__, item->ri_buf[1].i_len, item);
377		error = -EFSCORRUPTED;
378		goto out_release;
379	}
380
381	/*
382	 * Recover the log dinode inode into the on disk inode.
383	 *
384	 * The LSN in the log dinode is garbage - it can be zero or reflect
385	 * stale in-memory runtime state that isn't coherent with the changes
386	 * logged in this transaction or the changes written to the on-disk
387	 * inode.  Hence we write the current lSN into the inode because that
388	 * matches what xfs_iflush() would write inode the inode when flushing
389	 * the changes in this transaction.
390	 */
391	xfs_log_dinode_to_disk(ldip, dip, current_lsn);
392
393	fields = in_f->ilf_fields;
394	if (fields & XFS_ILOG_DEV)
395		xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev);
396
397	if (in_f->ilf_size == 2)
398		goto out_owner_change;
399	len = item->ri_buf[2].i_len;
400	src = item->ri_buf[2].i_addr;
401	ASSERT(in_f->ilf_size <= 4);
402	ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK));
403	ASSERT(!(fields & XFS_ILOG_DFORK) ||
404	       (len == in_f->ilf_dsize));
405
406	switch (fields & XFS_ILOG_DFORK) {
407	case XFS_ILOG_DDATA:
408	case XFS_ILOG_DEXT:
409		memcpy(XFS_DFORK_DPTR(dip), src, len);
410		break;
411
412	case XFS_ILOG_DBROOT:
413		xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len,
414				 (struct xfs_bmdr_block *)XFS_DFORK_DPTR(dip),
415				 XFS_DFORK_DSIZE(dip, mp));
416		break;
417
418	default:
419		/*
420		 * There are no data fork flags set.
421		 */
422		ASSERT((fields & XFS_ILOG_DFORK) == 0);
423		break;
424	}
425
426	/*
427	 * If we logged any attribute data, recover it.  There may or
428	 * may not have been any other non-core data logged in this
429	 * transaction.
430	 */
431	if (in_f->ilf_fields & XFS_ILOG_AFORK) {
432		if (in_f->ilf_fields & XFS_ILOG_DFORK) {
433			attr_index = 3;
434		} else {
435			attr_index = 2;
436		}
437		len = item->ri_buf[attr_index].i_len;
438		src = item->ri_buf[attr_index].i_addr;
439		ASSERT(len == in_f->ilf_asize);
440
441		switch (in_f->ilf_fields & XFS_ILOG_AFORK) {
442		case XFS_ILOG_ADATA:
443		case XFS_ILOG_AEXT:
444			dest = XFS_DFORK_APTR(dip);
445			ASSERT(len <= XFS_DFORK_ASIZE(dip, mp));
446			memcpy(dest, src, len);
447			break;
448
449		case XFS_ILOG_ABROOT:
450			dest = XFS_DFORK_APTR(dip);
451			xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src,
452					 len, (struct xfs_bmdr_block *)dest,
453					 XFS_DFORK_ASIZE(dip, mp));
454			break;
455
456		default:
457			xfs_warn(log->l_mp, "%s: Invalid flag", __func__);
458			ASSERT(0);
459			error = -EFSCORRUPTED;
460			goto out_release;
461		}
462	}
463
464out_owner_change:
465	/* Recover the swapext owner change unless inode has been deleted */
466	if ((in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER)) &&
467	    (dip->di_mode != 0))
468		error = xfs_recover_inode_owner_change(mp, dip, in_f,
469						       buffer_list);
470	/* re-generate the checksum. */
471	xfs_dinode_calc_crc(log->l_mp, dip);
472
473	ASSERT(bp->b_mount == mp);
474	bp->b_flags |= _XBF_LOGRECOVERY;
475	xfs_buf_delwri_queue(bp, buffer_list);
476
477out_release:
478	xfs_buf_relse(bp);
479error:
480	if (need_free)
481		kmem_free(in_f);
482	return error;
483}
484
485const struct xlog_recover_item_ops xlog_inode_item_ops = {
486	.item_type		= XFS_LI_INODE,
487	.ra_pass2		= xlog_recover_inode_ra_pass2,
488	.commit_pass2		= xlog_recover_inode_commit_pass2,
489};