Linux Audio

Check our new training course

Loading...
Note: File does not exist in v6.8.
  1// SPDX-License-Identifier: GPL-2.0-or-later
  2/*
  3 * multipath.c : Multiple Devices driver for Linux
  4 *
  5 * Copyright (C) 1999, 2000, 2001 Ingo Molnar, Red Hat
  6 *
  7 * Copyright (C) 1996, 1997, 1998 Ingo Molnar, Miguel de Icaza, Gadi Oxman
  8 *
  9 * MULTIPATH management functions.
 10 *
 11 * derived from raid1.c.
 12 */
 13
 14#include <linux/blkdev.h>
 15#include <linux/module.h>
 16#include <linux/raid/md_u.h>
 17#include <linux/seq_file.h>
 18#include <linux/slab.h>
 19#include "md.h"
 20#include "md-multipath.h"
 21
 22#define MAX_WORK_PER_DISK 128
 23
 24#define	NR_RESERVED_BUFS	32
 25
 26static int multipath_map (struct mpconf *conf)
 27{
 28	int i, disks = conf->raid_disks;
 29
 30	/*
 31	 * Later we do read balancing on the read side
 32	 * now we use the first available disk.
 33	 */
 34
 35	rcu_read_lock();
 36	for (i = 0; i < disks; i++) {
 37		struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev);
 38		if (rdev && test_bit(In_sync, &rdev->flags) &&
 39		    !test_bit(Faulty, &rdev->flags)) {
 40			atomic_inc(&rdev->nr_pending);
 41			rcu_read_unlock();
 42			return i;
 43		}
 44	}
 45	rcu_read_unlock();
 46
 47	pr_crit_ratelimited("multipath_map(): no more operational IO paths?\n");
 48	return (-1);
 49}
 50
 51static void multipath_reschedule_retry (struct multipath_bh *mp_bh)
 52{
 53	unsigned long flags;
 54	struct mddev *mddev = mp_bh->mddev;
 55	struct mpconf *conf = mddev->private;
 56
 57	spin_lock_irqsave(&conf->device_lock, flags);
 58	list_add(&mp_bh->retry_list, &conf->retry_list);
 59	spin_unlock_irqrestore(&conf->device_lock, flags);
 60	md_wakeup_thread(mddev->thread);
 61}
 62
 63/*
 64 * multipath_end_bh_io() is called when we have finished servicing a multipathed
 65 * operation and are ready to return a success/failure code to the buffer
 66 * cache layer.
 67 */
 68static void multipath_end_bh_io(struct multipath_bh *mp_bh, blk_status_t status)
 69{
 70	struct bio *bio = mp_bh->master_bio;
 71	struct mpconf *conf = mp_bh->mddev->private;
 72
 73	bio->bi_status = status;
 74	bio_endio(bio);
 75	mempool_free(mp_bh, &conf->pool);
 76}
 77
 78static void multipath_end_request(struct bio *bio)
 79{
 80	struct multipath_bh *mp_bh = bio->bi_private;
 81	struct mpconf *conf = mp_bh->mddev->private;
 82	struct md_rdev *rdev = conf->multipaths[mp_bh->path].rdev;
 83
 84	if (!bio->bi_status)
 85		multipath_end_bh_io(mp_bh, 0);
 86	else if (!(bio->bi_opf & REQ_RAHEAD)) {
 87		/*
 88		 * oops, IO error:
 89		 */
 90		char b[BDEVNAME_SIZE];
 91		md_error (mp_bh->mddev, rdev);
 92		pr_info("multipath: %s: rescheduling sector %llu\n",
 93			bdevname(rdev->bdev,b),
 94			(unsigned long long)bio->bi_iter.bi_sector);
 95		multipath_reschedule_retry(mp_bh);
 96	} else
 97		multipath_end_bh_io(mp_bh, bio->bi_status);
 98	rdev_dec_pending(rdev, conf->mddev);
 99}
100
101static bool multipath_make_request(struct mddev *mddev, struct bio * bio)
102{
103	struct mpconf *conf = mddev->private;
104	struct multipath_bh * mp_bh;
105	struct multipath_info *multipath;
106
107	if (unlikely(bio->bi_opf & REQ_PREFLUSH)
108	    && md_flush_request(mddev, bio))
109		return true;
110
111	mp_bh = mempool_alloc(&conf->pool, GFP_NOIO);
112
113	mp_bh->master_bio = bio;
114	mp_bh->mddev = mddev;
115
116	mp_bh->path = multipath_map(conf);
117	if (mp_bh->path < 0) {
118		bio_io_error(bio);
119		mempool_free(mp_bh, &conf->pool);
120		return true;
121	}
122	multipath = conf->multipaths + mp_bh->path;
123
124	bio_init(&mp_bh->bio, NULL, 0);
125	__bio_clone_fast(&mp_bh->bio, bio);
126
127	mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset;
128	bio_set_dev(&mp_bh->bio, multipath->rdev->bdev);
129	mp_bh->bio.bi_opf |= REQ_FAILFAST_TRANSPORT;
130	mp_bh->bio.bi_end_io = multipath_end_request;
131	mp_bh->bio.bi_private = mp_bh;
132	mddev_check_writesame(mddev, &mp_bh->bio);
133	mddev_check_write_zeroes(mddev, &mp_bh->bio);
134	submit_bio_noacct(&mp_bh->bio);
135	return true;
136}
137
138static void multipath_status(struct seq_file *seq, struct mddev *mddev)
139{
140	struct mpconf *conf = mddev->private;
141	int i;
142
143	seq_printf (seq, " [%d/%d] [", conf->raid_disks,
144		    conf->raid_disks - mddev->degraded);
145	rcu_read_lock();
146	for (i = 0; i < conf->raid_disks; i++) {
147		struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev);
148		seq_printf (seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
149	}
150	rcu_read_unlock();
151	seq_putc(seq, ']');
152}
153
154/*
155 * Careful, this can execute in IRQ contexts as well!
156 */
157static void multipath_error (struct mddev *mddev, struct md_rdev *rdev)
158{
159	struct mpconf *conf = mddev->private;
160	char b[BDEVNAME_SIZE];
161
162	if (conf->raid_disks - mddev->degraded <= 1) {
163		/*
164		 * Uh oh, we can do nothing if this is our last path, but
165		 * first check if this is a queued request for a device
166		 * which has just failed.
167		 */
168		pr_warn("multipath: only one IO path left and IO error.\n");
169		/* leave it active... it's all we have */
170		return;
171	}
172	/*
173	 * Mark disk as unusable
174	 */
175	if (test_and_clear_bit(In_sync, &rdev->flags)) {
176		unsigned long flags;
177		spin_lock_irqsave(&conf->device_lock, flags);
178		mddev->degraded++;
179		spin_unlock_irqrestore(&conf->device_lock, flags);
180	}
181	set_bit(Faulty, &rdev->flags);
182	set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
183	pr_err("multipath: IO failure on %s, disabling IO path.\n"
184	       "multipath: Operation continuing on %d IO paths.\n",
185	       bdevname(rdev->bdev, b),
186	       conf->raid_disks - mddev->degraded);
187}
188
189static void print_multipath_conf (struct mpconf *conf)
190{
191	int i;
192	struct multipath_info *tmp;
193
194	pr_debug("MULTIPATH conf printout:\n");
195	if (!conf) {
196		pr_debug("(conf==NULL)\n");
197		return;
198	}
199	pr_debug(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
200		 conf->raid_disks);
201
202	for (i = 0; i < conf->raid_disks; i++) {
203		char b[BDEVNAME_SIZE];
204		tmp = conf->multipaths + i;
205		if (tmp->rdev)
206			pr_debug(" disk%d, o:%d, dev:%s\n",
207				 i,!test_bit(Faulty, &tmp->rdev->flags),
208				 bdevname(tmp->rdev->bdev,b));
209	}
210}
211
212static int multipath_add_disk(struct mddev *mddev, struct md_rdev *rdev)
213{
214	struct mpconf *conf = mddev->private;
215	int err = -EEXIST;
216	int path;
217	struct multipath_info *p;
218	int first = 0;
219	int last = mddev->raid_disks - 1;
220
221	if (rdev->raid_disk >= 0)
222		first = last = rdev->raid_disk;
223
224	print_multipath_conf(conf);
225
226	for (path = first; path <= last; path++)
227		if ((p=conf->multipaths+path)->rdev == NULL) {
228			disk_stack_limits(mddev->gendisk, rdev->bdev,
229					  rdev->data_offset << 9);
230
231			err = md_integrity_add_rdev(rdev, mddev);
232			if (err)
233				break;
234			spin_lock_irq(&conf->device_lock);
235			mddev->degraded--;
236			rdev->raid_disk = path;
237			set_bit(In_sync, &rdev->flags);
238			spin_unlock_irq(&conf->device_lock);
239			rcu_assign_pointer(p->rdev, rdev);
240			err = 0;
241			break;
242		}
243
244	print_multipath_conf(conf);
245
246	return err;
247}
248
249static int multipath_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
250{
251	struct mpconf *conf = mddev->private;
252	int err = 0;
253	int number = rdev->raid_disk;
254	struct multipath_info *p = conf->multipaths + number;
255
256	print_multipath_conf(conf);
257
258	if (rdev == p->rdev) {
259		if (test_bit(In_sync, &rdev->flags) ||
260		    atomic_read(&rdev->nr_pending)) {
261			pr_warn("hot-remove-disk, slot %d is identified but is still operational!\n", number);
262			err = -EBUSY;
263			goto abort;
264		}
265		p->rdev = NULL;
266		if (!test_bit(RemoveSynchronized, &rdev->flags)) {
267			synchronize_rcu();
268			if (atomic_read(&rdev->nr_pending)) {
269				/* lost the race, try later */
270				err = -EBUSY;
271				p->rdev = rdev;
272				goto abort;
273			}
274		}
275		err = md_integrity_register(mddev);
276	}
277abort:
278
279	print_multipath_conf(conf);
280	return err;
281}
282
283/*
284 * This is a kernel thread which:
285 *
286 *	1.	Retries failed read operations on working multipaths.
287 *	2.	Updates the raid superblock when problems encounter.
288 *	3.	Performs writes following reads for array syncronising.
289 */
290
291static void multipathd(struct md_thread *thread)
292{
293	struct mddev *mddev = thread->mddev;
294	struct multipath_bh *mp_bh;
295	struct bio *bio;
296	unsigned long flags;
297	struct mpconf *conf = mddev->private;
298	struct list_head *head = &conf->retry_list;
299
300	md_check_recovery(mddev);
301	for (;;) {
302		char b[BDEVNAME_SIZE];
303		spin_lock_irqsave(&conf->device_lock, flags);
304		if (list_empty(head))
305			break;
306		mp_bh = list_entry(head->prev, struct multipath_bh, retry_list);
307		list_del(head->prev);
308		spin_unlock_irqrestore(&conf->device_lock, flags);
309
310		bio = &mp_bh->bio;
311		bio->bi_iter.bi_sector = mp_bh->master_bio->bi_iter.bi_sector;
312
313		if ((mp_bh->path = multipath_map (conf))<0) {
314			pr_err("multipath: %s: unrecoverable IO read error for block %llu\n",
315			       bio_devname(bio, b),
316			       (unsigned long long)bio->bi_iter.bi_sector);
317			multipath_end_bh_io(mp_bh, BLK_STS_IOERR);
318		} else {
319			pr_err("multipath: %s: redirecting sector %llu to another IO path\n",
320			       bio_devname(bio, b),
321			       (unsigned long long)bio->bi_iter.bi_sector);
322			*bio = *(mp_bh->master_bio);
323			bio->bi_iter.bi_sector +=
324				conf->multipaths[mp_bh->path].rdev->data_offset;
325			bio_set_dev(bio, conf->multipaths[mp_bh->path].rdev->bdev);
326			bio->bi_opf |= REQ_FAILFAST_TRANSPORT;
327			bio->bi_end_io = multipath_end_request;
328			bio->bi_private = mp_bh;
329			submit_bio_noacct(bio);
330		}
331	}
332	spin_unlock_irqrestore(&conf->device_lock, flags);
333}
334
335static sector_t multipath_size(struct mddev *mddev, sector_t sectors, int raid_disks)
336{
337	WARN_ONCE(sectors || raid_disks,
338		  "%s does not support generic reshape\n", __func__);
339
340	return mddev->dev_sectors;
341}
342
343static int multipath_run (struct mddev *mddev)
344{
345	struct mpconf *conf;
346	int disk_idx;
347	struct multipath_info *disk;
348	struct md_rdev *rdev;
349	int working_disks;
350	int ret;
351
352	if (md_check_no_bitmap(mddev))
353		return -EINVAL;
354
355	if (mddev->level != LEVEL_MULTIPATH) {
356		pr_warn("multipath: %s: raid level not set to multipath IO (%d)\n",
357			mdname(mddev), mddev->level);
358		goto out;
359	}
360	/*
361	 * copy the already verified devices into our private MULTIPATH
362	 * bookkeeping area. [whatever we allocate in multipath_run(),
363	 * should be freed in multipath_free()]
364	 */
365
366	conf = kzalloc(sizeof(struct mpconf), GFP_KERNEL);
367	mddev->private = conf;
368	if (!conf)
369		goto out;
370
371	conf->multipaths = kcalloc(mddev->raid_disks,
372				   sizeof(struct multipath_info),
373				   GFP_KERNEL);
374	if (!conf->multipaths)
375		goto out_free_conf;
376
377	working_disks = 0;
378	rdev_for_each(rdev, mddev) {
379		disk_idx = rdev->raid_disk;
380		if (disk_idx < 0 ||
381		    disk_idx >= mddev->raid_disks)
382			continue;
383
384		disk = conf->multipaths + disk_idx;
385		disk->rdev = rdev;
386		disk_stack_limits(mddev->gendisk, rdev->bdev,
387				  rdev->data_offset << 9);
388
389		if (!test_bit(Faulty, &rdev->flags))
390			working_disks++;
391	}
392
393	conf->raid_disks = mddev->raid_disks;
394	conf->mddev = mddev;
395	spin_lock_init(&conf->device_lock);
396	INIT_LIST_HEAD(&conf->retry_list);
397
398	if (!working_disks) {
399		pr_warn("multipath: no operational IO paths for %s\n",
400			mdname(mddev));
401		goto out_free_conf;
402	}
403	mddev->degraded = conf->raid_disks - working_disks;
404
405	ret = mempool_init_kmalloc_pool(&conf->pool, NR_RESERVED_BUFS,
406					sizeof(struct multipath_bh));
407	if (ret)
408		goto out_free_conf;
409
410	mddev->thread = md_register_thread(multipathd, mddev,
411					   "multipath");
412	if (!mddev->thread)
413		goto out_free_conf;
414
415	pr_info("multipath: array %s active with %d out of %d IO paths\n",
416		mdname(mddev), conf->raid_disks - mddev->degraded,
417		mddev->raid_disks);
418	/*
419	 * Ok, everything is just fine now
420	 */
421	md_set_array_sectors(mddev, multipath_size(mddev, 0, 0));
422
423	if (md_integrity_register(mddev))
424		goto out_free_conf;
425
426	return 0;
427
428out_free_conf:
429	mempool_exit(&conf->pool);
430	kfree(conf->multipaths);
431	kfree(conf);
432	mddev->private = NULL;
433out:
434	return -EIO;
435}
436
437static void multipath_free(struct mddev *mddev, void *priv)
438{
439	struct mpconf *conf = priv;
440
441	mempool_exit(&conf->pool);
442	kfree(conf->multipaths);
443	kfree(conf);
444}
445
446static struct md_personality multipath_personality =
447{
448	.name		= "multipath",
449	.level		= LEVEL_MULTIPATH,
450	.owner		= THIS_MODULE,
451	.make_request	= multipath_make_request,
452	.run		= multipath_run,
453	.free		= multipath_free,
454	.status		= multipath_status,
455	.error_handler	= multipath_error,
456	.hot_add_disk	= multipath_add_disk,
457	.hot_remove_disk= multipath_remove_disk,
458	.size		= multipath_size,
459};
460
461static int __init multipath_init (void)
462{
463	return register_md_personality (&multipath_personality);
464}
465
466static void __exit multipath_exit (void)
467{
468	unregister_md_personality (&multipath_personality);
469}
470
471module_init(multipath_init);
472module_exit(multipath_exit);
473MODULE_LICENSE("GPL");
474MODULE_DESCRIPTION("simple multi-path personality for MD (deprecated)");
475MODULE_ALIAS("md-personality-7"); /* MULTIPATH */
476MODULE_ALIAS("md-multipath");
477MODULE_ALIAS("md-level--4");