Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.5.6.
  1// SPDX-License-Identifier: GPL-2.0-or-later
  2/*
  3 * multipath.c : Multiple Devices driver for Linux
  4 *
  5 * Copyright (C) 1999, 2000, 2001 Ingo Molnar, Red Hat
  6 *
  7 * Copyright (C) 1996, 1997, 1998 Ingo Molnar, Miguel de Icaza, Gadi Oxman
  8 *
  9 * MULTIPATH management functions.
 10 *
 11 * derived from raid1.c.
 12 */
 13
 14#include <linux/blkdev.h>
 15#include <linux/module.h>
 16#include <linux/raid/md_u.h>
 17#include <linux/seq_file.h>
 18#include <linux/slab.h>
 19#include "md.h"
 20#include "md-multipath.h"
 21
 22#define MAX_WORK_PER_DISK 128
 23
 24#define	NR_RESERVED_BUFS	32
 25
 26static int multipath_map (struct mpconf *conf)
 27{
 28	int i, disks = conf->raid_disks;
 29
 30	/*
 31	 * Later we do read balancing on the read side
 32	 * now we use the first available disk.
 33	 */
 34
 35	rcu_read_lock();
 36	for (i = 0; i < disks; i++) {
 37		struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev);
 38		if (rdev && test_bit(In_sync, &rdev->flags) &&
 39		    !test_bit(Faulty, &rdev->flags)) {
 40			atomic_inc(&rdev->nr_pending);
 41			rcu_read_unlock();
 42			return i;
 43		}
 44	}
 45	rcu_read_unlock();
 46
 47	pr_crit_ratelimited("multipath_map(): no more operational IO paths?\n");
 48	return (-1);
 49}
 50
 51static void multipath_reschedule_retry (struct multipath_bh *mp_bh)
 52{
 53	unsigned long flags;
 54	struct mddev *mddev = mp_bh->mddev;
 55	struct mpconf *conf = mddev->private;
 56
 57	spin_lock_irqsave(&conf->device_lock, flags);
 58	list_add(&mp_bh->retry_list, &conf->retry_list);
 59	spin_unlock_irqrestore(&conf->device_lock, flags);
 60	md_wakeup_thread(mddev->thread);
 61}
 62
 63/*
 64 * multipath_end_bh_io() is called when we have finished servicing a multipathed
 65 * operation and are ready to return a success/failure code to the buffer
 66 * cache layer.
 67 */
 68static void multipath_end_bh_io(struct multipath_bh *mp_bh, blk_status_t status)
 69{
 70	struct bio *bio = mp_bh->master_bio;
 71	struct mpconf *conf = mp_bh->mddev->private;
 72
 73	bio->bi_status = status;
 74	bio_endio(bio);
 75	mempool_free(mp_bh, &conf->pool);
 76}
 77
 78static void multipath_end_request(struct bio *bio)
 79{
 80	struct multipath_bh *mp_bh = bio->bi_private;
 81	struct mpconf *conf = mp_bh->mddev->private;
 82	struct md_rdev *rdev = conf->multipaths[mp_bh->path].rdev;
 83
 84	if (!bio->bi_status)
 85		multipath_end_bh_io(mp_bh, 0);
 86	else if (!(bio->bi_opf & REQ_RAHEAD)) {
 87		/*
 88		 * oops, IO error:
 89		 */
 90		md_error (mp_bh->mddev, rdev);
 91		pr_info("multipath: %pg: rescheduling sector %llu\n",
 92			rdev->bdev,
 93			(unsigned long long)bio->bi_iter.bi_sector);
 94		multipath_reschedule_retry(mp_bh);
 95	} else
 96		multipath_end_bh_io(mp_bh, bio->bi_status);
 97	rdev_dec_pending(rdev, conf->mddev);
 98}
 99
100static bool multipath_make_request(struct mddev *mddev, struct bio * bio)
101{
102	struct mpconf *conf = mddev->private;
103	struct multipath_bh * mp_bh;
104	struct multipath_info *multipath;
105
106	if (unlikely(bio->bi_opf & REQ_PREFLUSH)
107	    && md_flush_request(mddev, bio))
108		return true;
109
110	mp_bh = mempool_alloc(&conf->pool, GFP_NOIO);
111
112	mp_bh->master_bio = bio;
113	mp_bh->mddev = mddev;
114
115	mp_bh->path = multipath_map(conf);
116	if (mp_bh->path < 0) {
117		bio_io_error(bio);
118		mempool_free(mp_bh, &conf->pool);
119		return true;
120	}
121	multipath = conf->multipaths + mp_bh->path;
122
123	bio_init_clone(multipath->rdev->bdev, &mp_bh->bio, bio, GFP_NOIO);
124
125	mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset;
126	mp_bh->bio.bi_opf |= REQ_FAILFAST_TRANSPORT;
127	mp_bh->bio.bi_end_io = multipath_end_request;
128	mp_bh->bio.bi_private = mp_bh;
129	mddev_check_write_zeroes(mddev, &mp_bh->bio);
130	submit_bio_noacct(&mp_bh->bio);
131	return true;
132}
133
134static void multipath_status(struct seq_file *seq, struct mddev *mddev)
135{
136	struct mpconf *conf = mddev->private;
137	int i;
138
139	seq_printf (seq, " [%d/%d] [", conf->raid_disks,
140		    conf->raid_disks - mddev->degraded);
141	rcu_read_lock();
142	for (i = 0; i < conf->raid_disks; i++) {
143		struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev);
144		seq_printf (seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
145	}
146	rcu_read_unlock();
147	seq_putc(seq, ']');
148}
149
150/*
151 * Careful, this can execute in IRQ contexts as well!
152 */
153static void multipath_error (struct mddev *mddev, struct md_rdev *rdev)
154{
155	struct mpconf *conf = mddev->private;
156
157	if (conf->raid_disks - mddev->degraded <= 1) {
158		/*
159		 * Uh oh, we can do nothing if this is our last path, but
160		 * first check if this is a queued request for a device
161		 * which has just failed.
162		 */
163		pr_warn("multipath: only one IO path left and IO error.\n");
164		/* leave it active... it's all we have */
165		return;
166	}
167	/*
168	 * Mark disk as unusable
169	 */
170	if (test_and_clear_bit(In_sync, &rdev->flags)) {
171		unsigned long flags;
172		spin_lock_irqsave(&conf->device_lock, flags);
173		mddev->degraded++;
174		spin_unlock_irqrestore(&conf->device_lock, flags);
175	}
176	set_bit(Faulty, &rdev->flags);
177	set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
178	pr_err("multipath: IO failure on %pg, disabling IO path.\n"
179	       "multipath: Operation continuing on %d IO paths.\n",
180	       rdev->bdev,
181	       conf->raid_disks - mddev->degraded);
182}
183
184static void print_multipath_conf (struct mpconf *conf)
185{
186	int i;
187	struct multipath_info *tmp;
188
189	pr_debug("MULTIPATH conf printout:\n");
190	if (!conf) {
191		pr_debug("(conf==NULL)\n");
192		return;
193	}
194	pr_debug(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
195		 conf->raid_disks);
196
197	for (i = 0; i < conf->raid_disks; i++) {
198		tmp = conf->multipaths + i;
199		if (tmp->rdev)
200			pr_debug(" disk%d, o:%d, dev:%pg\n",
201				 i,!test_bit(Faulty, &tmp->rdev->flags),
202				 tmp->rdev->bdev);
203	}
204}
205
206static int multipath_add_disk(struct mddev *mddev, struct md_rdev *rdev)
207{
208	struct mpconf *conf = mddev->private;
209	int err = -EEXIST;
210	int path;
211	struct multipath_info *p;
212	int first = 0;
213	int last = mddev->raid_disks - 1;
214
215	if (rdev->raid_disk >= 0)
216		first = last = rdev->raid_disk;
217
218	print_multipath_conf(conf);
219
220	for (path = first; path <= last; path++)
221		if ((p=conf->multipaths+path)->rdev == NULL) {
222			disk_stack_limits(mddev->gendisk, rdev->bdev,
223					  rdev->data_offset << 9);
224
225			err = md_integrity_add_rdev(rdev, mddev);
226			if (err)
227				break;
228			spin_lock_irq(&conf->device_lock);
229			mddev->degraded--;
230			rdev->raid_disk = path;
231			set_bit(In_sync, &rdev->flags);
232			spin_unlock_irq(&conf->device_lock);
233			rcu_assign_pointer(p->rdev, rdev);
234			err = 0;
235			break;
236		}
237
238	print_multipath_conf(conf);
239
240	return err;
241}
242
243static int multipath_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
244{
245	struct mpconf *conf = mddev->private;
246	int err = 0;
247	int number = rdev->raid_disk;
248	struct multipath_info *p = conf->multipaths + number;
249
250	print_multipath_conf(conf);
251
252	if (rdev == p->rdev) {
253		if (test_bit(In_sync, &rdev->flags) ||
254		    atomic_read(&rdev->nr_pending)) {
255			pr_warn("hot-remove-disk, slot %d is identified but is still operational!\n", number);
256			err = -EBUSY;
257			goto abort;
258		}
259		p->rdev = NULL;
260		if (!test_bit(RemoveSynchronized, &rdev->flags)) {
261			synchronize_rcu();
262			if (atomic_read(&rdev->nr_pending)) {
263				/* lost the race, try later */
264				err = -EBUSY;
265				p->rdev = rdev;
266				goto abort;
267			}
268		}
269		err = md_integrity_register(mddev);
270	}
271abort:
272
273	print_multipath_conf(conf);
274	return err;
275}
276
277/*
278 * This is a kernel thread which:
279 *
280 *	1.	Retries failed read operations on working multipaths.
281 *	2.	Updates the raid superblock when problems encounter.
282 *	3.	Performs writes following reads for array syncronising.
283 */
284
285static void multipathd(struct md_thread *thread)
286{
287	struct mddev *mddev = thread->mddev;
288	struct multipath_bh *mp_bh;
289	struct bio *bio;
290	unsigned long flags;
291	struct mpconf *conf = mddev->private;
292	struct list_head *head = &conf->retry_list;
293
294	md_check_recovery(mddev);
295	for (;;) {
296		spin_lock_irqsave(&conf->device_lock, flags);
297		if (list_empty(head))
298			break;
299		mp_bh = list_entry(head->prev, struct multipath_bh, retry_list);
300		list_del(head->prev);
301		spin_unlock_irqrestore(&conf->device_lock, flags);
302
303		bio = &mp_bh->bio;
304		bio->bi_iter.bi_sector = mp_bh->master_bio->bi_iter.bi_sector;
305
306		if ((mp_bh->path = multipath_map (conf))<0) {
307			pr_err("multipath: %pg: unrecoverable IO read error for block %llu\n",
308			       bio->bi_bdev,
309			       (unsigned long long)bio->bi_iter.bi_sector);
310			multipath_end_bh_io(mp_bh, BLK_STS_IOERR);
311		} else {
312			pr_err("multipath: %pg: redirecting sector %llu to another IO path\n",
313			       bio->bi_bdev,
314			       (unsigned long long)bio->bi_iter.bi_sector);
315			*bio = *(mp_bh->master_bio);
316			bio->bi_iter.bi_sector +=
317				conf->multipaths[mp_bh->path].rdev->data_offset;
318			bio_set_dev(bio, conf->multipaths[mp_bh->path].rdev->bdev);
319			bio->bi_opf |= REQ_FAILFAST_TRANSPORT;
320			bio->bi_end_io = multipath_end_request;
321			bio->bi_private = mp_bh;
322			submit_bio_noacct(bio);
323		}
324	}
325	spin_unlock_irqrestore(&conf->device_lock, flags);
326}
327
328static sector_t multipath_size(struct mddev *mddev, sector_t sectors, int raid_disks)
329{
330	WARN_ONCE(sectors || raid_disks,
331		  "%s does not support generic reshape\n", __func__);
332
333	return mddev->dev_sectors;
334}
335
336static int multipath_run (struct mddev *mddev)
337{
338	struct mpconf *conf;
339	int disk_idx;
340	struct multipath_info *disk;
341	struct md_rdev *rdev;
342	int working_disks;
343	int ret;
344
345	if (md_check_no_bitmap(mddev))
346		return -EINVAL;
347
348	if (mddev->level != LEVEL_MULTIPATH) {
349		pr_warn("multipath: %s: raid level not set to multipath IO (%d)\n",
350			mdname(mddev), mddev->level);
351		goto out;
352	}
353	/*
354	 * copy the already verified devices into our private MULTIPATH
355	 * bookkeeping area. [whatever we allocate in multipath_run(),
356	 * should be freed in multipath_free()]
357	 */
358
359	conf = kzalloc(sizeof(struct mpconf), GFP_KERNEL);
360	mddev->private = conf;
361	if (!conf)
362		goto out;
363
364	conf->multipaths = kcalloc(mddev->raid_disks,
365				   sizeof(struct multipath_info),
366				   GFP_KERNEL);
367	if (!conf->multipaths)
368		goto out_free_conf;
369
370	working_disks = 0;
371	rdev_for_each(rdev, mddev) {
372		disk_idx = rdev->raid_disk;
373		if (disk_idx < 0 ||
374		    disk_idx >= mddev->raid_disks)
375			continue;
376
377		disk = conf->multipaths + disk_idx;
378		disk->rdev = rdev;
379		disk_stack_limits(mddev->gendisk, rdev->bdev,
380				  rdev->data_offset << 9);
381
382		if (!test_bit(Faulty, &rdev->flags))
383			working_disks++;
384	}
385
386	conf->raid_disks = mddev->raid_disks;
387	conf->mddev = mddev;
388	spin_lock_init(&conf->device_lock);
389	INIT_LIST_HEAD(&conf->retry_list);
390
391	if (!working_disks) {
392		pr_warn("multipath: no operational IO paths for %s\n",
393			mdname(mddev));
394		goto out_free_conf;
395	}
396	mddev->degraded = conf->raid_disks - working_disks;
397
398	ret = mempool_init_kmalloc_pool(&conf->pool, NR_RESERVED_BUFS,
399					sizeof(struct multipath_bh));
400	if (ret)
401		goto out_free_conf;
402
403	mddev->thread = md_register_thread(multipathd, mddev,
404					   "multipath");
405	if (!mddev->thread)
406		goto out_free_conf;
407
408	pr_info("multipath: array %s active with %d out of %d IO paths\n",
409		mdname(mddev), conf->raid_disks - mddev->degraded,
410		mddev->raid_disks);
411	/*
412	 * Ok, everything is just fine now
413	 */
414	md_set_array_sectors(mddev, multipath_size(mddev, 0, 0));
415
416	if (md_integrity_register(mddev))
417		goto out_free_conf;
418
419	return 0;
420
421out_free_conf:
422	mempool_exit(&conf->pool);
423	kfree(conf->multipaths);
424	kfree(conf);
425	mddev->private = NULL;
426out:
427	return -EIO;
428}
429
430static void multipath_free(struct mddev *mddev, void *priv)
431{
432	struct mpconf *conf = priv;
433
434	mempool_exit(&conf->pool);
435	kfree(conf->multipaths);
436	kfree(conf);
437}
438
439static struct md_personality multipath_personality =
440{
441	.name		= "multipath",
442	.level		= LEVEL_MULTIPATH,
443	.owner		= THIS_MODULE,
444	.make_request	= multipath_make_request,
445	.run		= multipath_run,
446	.free		= multipath_free,
447	.status		= multipath_status,
448	.error_handler	= multipath_error,
449	.hot_add_disk	= multipath_add_disk,
450	.hot_remove_disk= multipath_remove_disk,
451	.size		= multipath_size,
452};
453
454static int __init multipath_init (void)
455{
456	return register_md_personality (&multipath_personality);
457}
458
459static void __exit multipath_exit (void)
460{
461	unregister_md_personality (&multipath_personality);
462}
463
464module_init(multipath_init);
465module_exit(multipath_exit);
466MODULE_LICENSE("GPL");
467MODULE_DESCRIPTION("simple multi-path personality for MD (deprecated)");
468MODULE_ALIAS("md-personality-7"); /* MULTIPATH */
469MODULE_ALIAS("md-multipath");
470MODULE_ALIAS("md-level--4");