Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.15.
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * Copyright (c) 2022 Fujitsu.  All Rights Reserved.
  4 */
  5
  6#include "xfs.h"
  7#include "xfs_shared.h"
  8#include "xfs_format.h"
  9#include "xfs_log_format.h"
 10#include "xfs_trans_resv.h"
 11#include "xfs_mount.h"
 12#include "xfs_alloc.h"
 13#include "xfs_bit.h"
 14#include "xfs_btree.h"
 15#include "xfs_inode.h"
 16#include "xfs_icache.h"
 17#include "xfs_rmap.h"
 18#include "xfs_rmap_btree.h"
 19#include "xfs_rtalloc.h"
 20#include "xfs_trans.h"
 21#include "xfs_ag.h"
 22
 23#include <linux/mm.h>
 24#include <linux/dax.h>
 25#include <linux/fs.h>
 26
 27struct xfs_failure_info {
 28	xfs_agblock_t		startblock;
 29	xfs_extlen_t		blockcount;
 30	int			mf_flags;
 31	bool			want_shutdown;
 32};
 33
 34static pgoff_t
 35xfs_failure_pgoff(
 36	struct xfs_mount		*mp,
 37	const struct xfs_rmap_irec	*rec,
 38	const struct xfs_failure_info	*notify)
 39{
 40	loff_t				pos = XFS_FSB_TO_B(mp, rec->rm_offset);
 41
 42	if (notify->startblock > rec->rm_startblock)
 43		pos += XFS_FSB_TO_B(mp,
 44				notify->startblock - rec->rm_startblock);
 45	return pos >> PAGE_SHIFT;
 46}
 47
 48static unsigned long
 49xfs_failure_pgcnt(
 50	struct xfs_mount		*mp,
 51	const struct xfs_rmap_irec	*rec,
 52	const struct xfs_failure_info	*notify)
 53{
 54	xfs_agblock_t			end_rec;
 55	xfs_agblock_t			end_notify;
 56	xfs_agblock_t			start_cross;
 57	xfs_agblock_t			end_cross;
 58
 59	start_cross = max(rec->rm_startblock, notify->startblock);
 60
 61	end_rec = rec->rm_startblock + rec->rm_blockcount;
 62	end_notify = notify->startblock + notify->blockcount;
 63	end_cross = min(end_rec, end_notify);
 64
 65	return XFS_FSB_TO_B(mp, end_cross - start_cross) >> PAGE_SHIFT;
 66}
 67
 68static int
 69xfs_dax_failure_fn(
 70	struct xfs_btree_cur		*cur,
 71	const struct xfs_rmap_irec	*rec,
 72	void				*data)
 73{
 74	struct xfs_mount		*mp = cur->bc_mp;
 75	struct xfs_inode		*ip;
 76	struct xfs_failure_info		*notify = data;
 77	struct address_space		*mapping;
 78	pgoff_t				pgoff;
 79	unsigned long			pgcnt;
 80	int				error = 0;
 81
 82	if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) ||
 83	    (rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK))) {
 84		/* Continue the query because this isn't a failure. */
 85		if (notify->mf_flags & MF_MEM_PRE_REMOVE)
 86			return 0;
 87		notify->want_shutdown = true;
 88		return 0;
 89	}
 90
 91	/* Get files that incore, filter out others that are not in use. */
 92	error = xfs_iget(mp, cur->bc_tp, rec->rm_owner, XFS_IGET_INCORE,
 93			 0, &ip);
 94	/* Continue the rmap query if the inode isn't incore */
 95	if (error == -ENODATA)
 96		return 0;
 97	if (error) {
 98		notify->want_shutdown = true;
 99		return 0;
100	}
101
102	mapping = VFS_I(ip)->i_mapping;
103	pgoff = xfs_failure_pgoff(mp, rec, notify);
104	pgcnt = xfs_failure_pgcnt(mp, rec, notify);
105
106	/* Continue the rmap query if the inode isn't a dax file. */
107	if (dax_mapping(mapping))
108		error = mf_dax_kill_procs(mapping, pgoff, pgcnt,
109					  notify->mf_flags);
110
111	/* Invalidate the cache in dax pages. */
112	if (notify->mf_flags & MF_MEM_PRE_REMOVE)
113		invalidate_inode_pages2_range(mapping, pgoff,
114					      pgoff + pgcnt - 1);
115
116	xfs_irele(ip);
117	return error;
118}
119
120static int
121xfs_dax_notify_failure_freeze(
122	struct xfs_mount	*mp)
123{
124	struct super_block	*sb = mp->m_super;
125	int			error;
126
127	error = freeze_super(sb, FREEZE_HOLDER_KERNEL);
128	if (error)
129		xfs_emerg(mp, "already frozen by kernel, err=%d", error);
130
131	return error;
132}
133
134static void
135xfs_dax_notify_failure_thaw(
136	struct xfs_mount	*mp,
137	bool			kernel_frozen)
138{
139	struct super_block	*sb = mp->m_super;
140	int			error;
141
142	if (kernel_frozen) {
143		error = thaw_super(sb, FREEZE_HOLDER_KERNEL);
144		if (error)
145			xfs_emerg(mp, "still frozen after notify failure, err=%d",
146				error);
147	}
148
149	/*
150	 * Also thaw userspace call anyway because the device is about to be
151	 * removed immediately.
152	 */
153	thaw_super(sb, FREEZE_HOLDER_USERSPACE);
154}
155
156static int
157xfs_dax_translate_range(
158	struct xfs_buftarg	*btp,
159	u64			offset,
160	u64			len,
161	xfs_daddr_t		*daddr,
162	uint64_t		*bblen)
163{
164	u64			dev_start = btp->bt_dax_part_off;
165	u64			dev_len = bdev_nr_bytes(btp->bt_bdev);
166	u64			dev_end = dev_start + dev_len - 1;
167
168	/* Notify failure on the whole device. */
169	if (offset == 0 && len == U64_MAX) {
170		offset = dev_start;
171		len = dev_len;
172	}
173
174	/* Ignore the range out of filesystem area */
175	if (offset + len - 1 < dev_start)
176		return -ENXIO;
177	if (offset > dev_end)
178		return -ENXIO;
179
180	/* Calculate the real range when it touches the boundary */
181	if (offset > dev_start)
182		offset -= dev_start;
183	else {
184		len -= dev_start - offset;
185		offset = 0;
186	}
187	if (offset + len - 1 > dev_end)
188		len = dev_end - offset + 1;
189
190	*daddr = BTOBB(offset);
191	*bblen = BTOBB(len);
192	return 0;
193}
194
195static int
196xfs_dax_notify_logdev_failure(
197	struct xfs_mount	*mp,
198	u64			offset,
199	u64			len,
200	int			mf_flags)
201{
202	xfs_daddr_t		daddr;
203	uint64_t		bblen;
204	int			error;
205
206	/*
207	 * Return ENXIO instead of shutting down the filesystem if the failed
208	 * region is beyond the end of the log.
209	 */
210	error = xfs_dax_translate_range(mp->m_logdev_targp,
211			offset, len, &daddr, &bblen);
212	if (error)
213		return error;
214
215	/*
216	 * In the pre-remove case the failure notification is attempting to
217	 * trigger a force unmount.  The expectation is that the device is
218	 * still present, but its removal is in progress and can not be
219	 * cancelled, proceed with accessing the log device.
220	 */
221	if (mf_flags & MF_MEM_PRE_REMOVE)
222		return 0;
223
224	xfs_err(mp, "ondisk log corrupt, shutting down fs!");
225	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_ONDISK);
226	return -EFSCORRUPTED;
227}
228
229static int
230xfs_dax_notify_ddev_failure(
231	struct xfs_mount	*mp,
232	xfs_daddr_t		daddr,
233	xfs_daddr_t		bblen,
234	int			mf_flags)
235{
236	struct xfs_failure_info	notify = { .mf_flags = mf_flags };
237	struct xfs_trans	*tp = NULL;
238	struct xfs_btree_cur	*cur = NULL;
239	struct xfs_buf		*agf_bp = NULL;
240	int			error = 0;
241	bool			kernel_frozen = false;
242	xfs_fsblock_t		fsbno = XFS_DADDR_TO_FSB(mp, daddr);
243	xfs_agnumber_t		agno = XFS_FSB_TO_AGNO(mp, fsbno);
244	xfs_fsblock_t		end_fsbno = XFS_DADDR_TO_FSB(mp,
245							     daddr + bblen - 1);
246	xfs_agnumber_t		end_agno = XFS_FSB_TO_AGNO(mp, end_fsbno);
247
248	if (mf_flags & MF_MEM_PRE_REMOVE) {
249		xfs_info(mp, "Device is about to be removed!");
250		/*
251		 * Freeze fs to prevent new mappings from being created.
252		 * - Keep going on if others already hold the kernel forzen.
253		 * - Keep going on if other errors too because this device is
254		 *   starting to fail.
255		 * - If kernel frozen state is hold successfully here, thaw it
256		 *   here as well at the end.
257		 */
258		kernel_frozen = xfs_dax_notify_failure_freeze(mp) == 0;
259	}
260
261	error = xfs_trans_alloc_empty(mp, &tp);
262	if (error)
263		goto out;
264
265	for (; agno <= end_agno; agno++) {
266		struct xfs_rmap_irec	ri_low = { };
267		struct xfs_rmap_irec	ri_high;
268		struct xfs_agf		*agf;
269		struct xfs_perag	*pag;
270		xfs_agblock_t		range_agend;
271
272		pag = xfs_perag_get(mp, agno);
273		error = xfs_alloc_read_agf(pag, tp, 0, &agf_bp);
274		if (error) {
275			xfs_perag_put(pag);
276			break;
277		}
278
279		cur = xfs_rmapbt_init_cursor(mp, tp, agf_bp, pag);
280
281		/*
282		 * Set the rmap range from ri_low to ri_high, which represents
283		 * a [start, end] where we looking for the files or metadata.
284		 */
285		memset(&ri_high, 0xFF, sizeof(ri_high));
286		ri_low.rm_startblock = XFS_FSB_TO_AGBNO(mp, fsbno);
287		if (agno == end_agno)
288			ri_high.rm_startblock = XFS_FSB_TO_AGBNO(mp, end_fsbno);
289
290		agf = agf_bp->b_addr;
291		range_agend = min(be32_to_cpu(agf->agf_length) - 1,
292				ri_high.rm_startblock);
293		notify.startblock = ri_low.rm_startblock;
294		notify.blockcount = range_agend + 1 - ri_low.rm_startblock;
295
296		error = xfs_rmap_query_range(cur, &ri_low, &ri_high,
297				xfs_dax_failure_fn, &notify);
298		xfs_btree_del_cursor(cur, error);
299		xfs_trans_brelse(tp, agf_bp);
300		xfs_perag_put(pag);
301		if (error)
302			break;
303
304		fsbno = XFS_AGB_TO_FSB(mp, agno + 1, 0);
305	}
306
307	xfs_trans_cancel(tp);
308
309	/*
310	 * Shutdown fs from a force umount in pre-remove case which won't fail,
311	 * so errors can be ignored.  Otherwise, shutdown the filesystem with
312	 * CORRUPT flag if error occured or notify.want_shutdown was set during
313	 * RMAP querying.
314	 */
315	if (mf_flags & MF_MEM_PRE_REMOVE)
316		xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
317	else if (error || notify.want_shutdown) {
318		xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_ONDISK);
319		if (!error)
320			error = -EFSCORRUPTED;
321	}
322
323out:
324	/* Thaw the fs if it has been frozen before. */
325	if (mf_flags & MF_MEM_PRE_REMOVE)
326		xfs_dax_notify_failure_thaw(mp, kernel_frozen);
327
328	return error;
329}
330
331static int
332xfs_dax_notify_failure(
333	struct dax_device	*dax_dev,
334	u64			offset,
335	u64			len,
336	int			mf_flags)
337{
338	struct xfs_mount	*mp = dax_holder(dax_dev);
339	xfs_daddr_t		daddr;
340	uint64_t		bblen;
341	int			error;
342
343	if (!(mp->m_super->s_flags & SB_BORN)) {
344		xfs_warn(mp, "filesystem is not ready for notify_failure()!");
345		return -EIO;
346	}
347
348	if (mp->m_rtdev_targp && mp->m_rtdev_targp->bt_daxdev == dax_dev) {
349		xfs_debug(mp,
350			 "notify_failure() not supported on realtime device!");
351		return -EOPNOTSUPP;
352	}
353
354	if (mp->m_logdev_targp && mp->m_logdev_targp->bt_daxdev == dax_dev &&
355	    mp->m_logdev_targp != mp->m_ddev_targp) {
356		return xfs_dax_notify_logdev_failure(mp, offset, len, mf_flags);
357	}
358
359	if (!xfs_has_rmapbt(mp)) {
360		xfs_debug(mp, "notify_failure() needs rmapbt enabled!");
361		return -EOPNOTSUPP;
362	}
363
364	error = xfs_dax_translate_range(mp->m_ddev_targp, offset, len, &daddr,
365			&bblen);
366	if (error)
367		return error;
368
369	return xfs_dax_notify_ddev_failure(mp, daddr, bblen, mf_flags);
370}
371
372const struct dax_holder_operations xfs_dax_holder_operations = {
373	.notify_failure		= xfs_dax_notify_failure,
374};