Linux Audio

Check our new training course

Loading...
v6.2
  1/*
  2 * Copyright (C) 2010-2012 by Dell Inc.  All rights reserved.
  3 * Copyright (C) 2011-2013 Red Hat, Inc.
  4 *
  5 * This file is released under the GPL.
  6 *
  7 * dm-switch is a device-mapper target that maps IO to underlying block
  8 * devices efficiently when there are a large number of fixed-sized
  9 * address regions but there is no simple pattern to allow for a compact
 10 * mapping representation such as dm-stripe.
 11 */
 12
 13#include <linux/device-mapper.h>
 14
 15#include <linux/module.h>
 16#include <linux/init.h>
 17#include <linux/vmalloc.h>
 18
 19#define DM_MSG_PREFIX "switch"
 20
 21/*
 22 * One region_table_slot_t holds <region_entries_per_slot> region table
 23 * entries each of which is <region_table_entry_bits> in size.
 24 */
 25typedef unsigned long region_table_slot_t;
 26
 27/*
 28 * A device with the offset to its start sector.
 29 */
 30struct switch_path {
 31	struct dm_dev *dmdev;
 32	sector_t start;
 33};
 34
 35/*
 36 * Context block for a dm switch device.
 37 */
 38struct switch_ctx {
 39	struct dm_target *ti;
 40
 41	unsigned nr_paths;		/* Number of paths in path_list. */
 42
 43	unsigned region_size;		/* Region size in 512-byte sectors */
 44	unsigned long nr_regions;	/* Number of regions making up the device */
 45	signed char region_size_bits;	/* log2 of region_size or -1 */
 46
 47	unsigned char region_table_entry_bits;	/* Number of bits in one region table entry */
 48	unsigned char region_entries_per_slot;	/* Number of entries in one region table slot */
 49	signed char region_entries_per_slot_bits;	/* log2 of region_entries_per_slot or -1 */
 50
 51	region_table_slot_t *region_table;	/* Region table */
 52
 53	/*
 54	 * Array of dm devices to switch between.
 55	 */
 56	struct switch_path path_list[];
 57};
 58
 59static struct switch_ctx *alloc_switch_ctx(struct dm_target *ti, unsigned nr_paths,
 60					   unsigned region_size)
 61{
 62	struct switch_ctx *sctx;
 63
 64	sctx = kzalloc(struct_size(sctx, path_list, nr_paths), GFP_KERNEL);
 
 65	if (!sctx)
 66		return NULL;
 67
 68	sctx->ti = ti;
 69	sctx->region_size = region_size;
 70
 71	ti->private = sctx;
 72
 73	return sctx;
 74}
 75
 76static int alloc_region_table(struct dm_target *ti, unsigned nr_paths)
 77{
 78	struct switch_ctx *sctx = ti->private;
 79	sector_t nr_regions = ti->len;
 80	sector_t nr_slots;
 81
 82	if (!(sctx->region_size & (sctx->region_size - 1)))
 83		sctx->region_size_bits = __ffs(sctx->region_size);
 84	else
 85		sctx->region_size_bits = -1;
 86
 87	sctx->region_table_entry_bits = 1;
 88	while (sctx->region_table_entry_bits < sizeof(region_table_slot_t) * 8 &&
 89	       (region_table_slot_t)1 << sctx->region_table_entry_bits < nr_paths)
 90		sctx->region_table_entry_bits++;
 91
 92	sctx->region_entries_per_slot = (sizeof(region_table_slot_t) * 8) / sctx->region_table_entry_bits;
 93	if (!(sctx->region_entries_per_slot & (sctx->region_entries_per_slot - 1)))
 94		sctx->region_entries_per_slot_bits = __ffs(sctx->region_entries_per_slot);
 95	else
 96		sctx->region_entries_per_slot_bits = -1;
 97
 98	if (sector_div(nr_regions, sctx->region_size))
 99		nr_regions++;
100
101	if (nr_regions >= ULONG_MAX) {
 
102		ti->error = "Region table too large";
103		return -EINVAL;
104	}
105	sctx->nr_regions = nr_regions;
106
107	nr_slots = nr_regions;
108	if (sector_div(nr_slots, sctx->region_entries_per_slot))
109		nr_slots++;
110
111	if (nr_slots > ULONG_MAX / sizeof(region_table_slot_t)) {
112		ti->error = "Region table too large";
113		return -EINVAL;
114	}
115
116	sctx->region_table = vmalloc(array_size(nr_slots,
117						sizeof(region_table_slot_t)));
118	if (!sctx->region_table) {
119		ti->error = "Cannot allocate region table";
120		return -ENOMEM;
121	}
122
123	return 0;
124}
125
126static void switch_get_position(struct switch_ctx *sctx, unsigned long region_nr,
127				unsigned long *region_index, unsigned *bit)
128{
129	if (sctx->region_entries_per_slot_bits >= 0) {
130		*region_index = region_nr >> sctx->region_entries_per_slot_bits;
131		*bit = region_nr & (sctx->region_entries_per_slot - 1);
132	} else {
133		*region_index = region_nr / sctx->region_entries_per_slot;
134		*bit = region_nr % sctx->region_entries_per_slot;
135	}
136
137	*bit *= sctx->region_table_entry_bits;
138}
139
140static unsigned switch_region_table_read(struct switch_ctx *sctx, unsigned long region_nr)
141{
142	unsigned long region_index;
143	unsigned bit;
144
145	switch_get_position(sctx, region_nr, &region_index, &bit);
146
147	return (READ_ONCE(sctx->region_table[region_index]) >> bit) &
148		((1 << sctx->region_table_entry_bits) - 1);
149}
150
151/*
152 * Find which path to use at given offset.
153 */
154static unsigned switch_get_path_nr(struct switch_ctx *sctx, sector_t offset)
155{
156	unsigned path_nr;
 
157	sector_t p;
158
159	p = offset;
160	if (sctx->region_size_bits >= 0)
161		p >>= sctx->region_size_bits;
162	else
163		sector_div(p, sctx->region_size);
164
165	path_nr = switch_region_table_read(sctx, p);
 
 
166
167	/* This can only happen if the processor uses non-atomic stores. */
168	if (unlikely(path_nr >= sctx->nr_paths))
169		path_nr = 0;
170
171	return path_nr;
172}
173
174static void switch_region_table_write(struct switch_ctx *sctx, unsigned long region_nr,
175				      unsigned value)
176{
177	unsigned long region_index;
178	unsigned bit;
179	region_table_slot_t pte;
180
181	switch_get_position(sctx, region_nr, &region_index, &bit);
182
183	pte = sctx->region_table[region_index];
184	pte &= ~((((region_table_slot_t)1 << sctx->region_table_entry_bits) - 1) << bit);
185	pte |= (region_table_slot_t)value << bit;
186	sctx->region_table[region_index] = pte;
187}
188
189/*
190 * Fill the region table with an initial round robin pattern.
191 */
192static void initialise_region_table(struct switch_ctx *sctx)
193{
194	unsigned path_nr = 0;
195	unsigned long region_nr;
196
197	for (region_nr = 0; region_nr < sctx->nr_regions; region_nr++) {
198		switch_region_table_write(sctx, region_nr, path_nr);
199		if (++path_nr >= sctx->nr_paths)
200			path_nr = 0;
201	}
202}
203
204static int parse_path(struct dm_arg_set *as, struct dm_target *ti)
205{
206	struct switch_ctx *sctx = ti->private;
207	unsigned long long start;
208	int r;
209
210	r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table),
211			  &sctx->path_list[sctx->nr_paths].dmdev);
212	if (r) {
213		ti->error = "Device lookup failed";
214		return r;
215	}
216
217	if (kstrtoull(dm_shift_arg(as), 10, &start) || start != (sector_t)start) {
218		ti->error = "Invalid device starting offset";
219		dm_put_device(ti, sctx->path_list[sctx->nr_paths].dmdev);
220		return -EINVAL;
221	}
222
223	sctx->path_list[sctx->nr_paths].start = start;
224
225	sctx->nr_paths++;
226
227	return 0;
228}
229
230/*
231 * Destructor: Don't free the dm_target, just the ti->private data (if any).
232 */
233static void switch_dtr(struct dm_target *ti)
234{
235	struct switch_ctx *sctx = ti->private;
236
237	while (sctx->nr_paths--)
238		dm_put_device(ti, sctx->path_list[sctx->nr_paths].dmdev);
239
240	vfree(sctx->region_table);
241	kfree(sctx);
242}
243
244/*
245 * Constructor arguments:
246 *   <num_paths> <region_size> <num_optional_args> [<optional_args>...]
247 *   [<dev_path> <offset>]+
248 *
249 * Optional args are to allow for future extension: currently this
250 * parameter must be 0.
251 */
252static int switch_ctr(struct dm_target *ti, unsigned argc, char **argv)
253{
254	static const struct dm_arg _args[] = {
255		{1, (KMALLOC_MAX_SIZE - sizeof(struct switch_ctx)) / sizeof(struct switch_path), "Invalid number of paths"},
256		{1, UINT_MAX, "Invalid region size"},
257		{0, 0, "Invalid number of optional args"},
258	};
259
260	struct switch_ctx *sctx;
261	struct dm_arg_set as;
262	unsigned nr_paths, region_size, nr_optional_args;
263	int r;
264
265	as.argc = argc;
266	as.argv = argv;
267
268	r = dm_read_arg(_args, &as, &nr_paths, &ti->error);
269	if (r)
270		return -EINVAL;
271
272	r = dm_read_arg(_args + 1, &as, &region_size, &ti->error);
273	if (r)
274		return r;
275
276	r = dm_read_arg_group(_args + 2, &as, &nr_optional_args, &ti->error);
277	if (r)
278		return r;
279	/* parse optional arguments here, if we add any */
280
281	if (as.argc != nr_paths * 2) {
282		ti->error = "Incorrect number of path arguments";
283		return -EINVAL;
284	}
285
286	sctx = alloc_switch_ctx(ti, nr_paths, region_size);
287	if (!sctx) {
288		ti->error = "Cannot allocate redirection context";
289		return -ENOMEM;
290	}
291
292	r = dm_set_target_max_io_len(ti, region_size);
293	if (r)
294		goto error;
295
296	while (as.argc) {
297		r = parse_path(&as, ti);
298		if (r)
299			goto error;
300	}
301
302	r = alloc_region_table(ti, nr_paths);
303	if (r)
304		goto error;
305
306	initialise_region_table(sctx);
307
308	/* For UNMAP, sending the request down any path is sufficient */
309	ti->num_discard_bios = 1;
310
311	return 0;
312
313error:
314	switch_dtr(ti);
315
316	return r;
317}
318
319static int switch_map(struct dm_target *ti, struct bio *bio)
320{
321	struct switch_ctx *sctx = ti->private;
322	sector_t offset = dm_target_offset(ti, bio->bi_iter.bi_sector);
323	unsigned path_nr = switch_get_path_nr(sctx, offset);
324
325	bio_set_dev(bio, sctx->path_list[path_nr].dmdev->bdev);
326	bio->bi_iter.bi_sector = sctx->path_list[path_nr].start + offset;
327
328	return DM_MAPIO_REMAPPED;
329}
330
331/*
332 * We need to parse hex numbers in the message as quickly as possible.
333 *
334 * This table-based hex parser improves performance.
335 * It improves a time to load 1000000 entries compared to the condition-based
336 * parser.
337 *		table-based parser	condition-based parser
338 * PA-RISC	0.29s			0.31s
339 * Opteron	0.0495s			0.0498s
340 */
341static const unsigned char hex_table[256] = {
342255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
343255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
344255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
3450, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255, 255, 255, 255, 255,
346255, 10, 11, 12, 13, 14, 15, 255, 255, 255, 255, 255, 255, 255, 255, 255,
347255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
348255, 10, 11, 12, 13, 14, 15, 255, 255, 255, 255, 255, 255, 255, 255, 255,
349255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
350255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
351255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
352255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
353255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
354255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
355255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
356255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
357255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255
358};
359
360static __always_inline unsigned long parse_hex(const char **string)
361{
362	unsigned char d;
363	unsigned long r = 0;
364
365	while ((d = hex_table[(unsigned char)**string]) < 16) {
366		r = (r << 4) | d;
367		(*string)++;
368	}
369
370	return r;
371}
372
373static int process_set_region_mappings(struct switch_ctx *sctx,
374				       unsigned argc, char **argv)
375{
376	unsigned i;
377	unsigned long region_index = 0;
378
379	for (i = 1; i < argc; i++) {
380		unsigned long path_nr;
381		const char *string = argv[i];
382
383		if ((*string & 0xdf) == 'R') {
384			unsigned long cycle_length, num_write;
385
386			string++;
387			if (unlikely(*string == ',')) {
388				DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
389				return -EINVAL;
390			}
391			cycle_length = parse_hex(&string);
392			if (unlikely(*string != ',')) {
393				DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
394				return -EINVAL;
395			}
396			string++;
397			if (unlikely(!*string)) {
398				DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
399				return -EINVAL;
400			}
401			num_write = parse_hex(&string);
402			if (unlikely(*string)) {
403				DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
404				return -EINVAL;
405			}
406
407			if (unlikely(!cycle_length) || unlikely(cycle_length - 1 > region_index)) {
408				DMWARN("invalid set_region_mappings cycle length: %lu > %lu",
409				       cycle_length - 1, region_index);
410				return -EINVAL;
411			}
412			if (unlikely(region_index + num_write < region_index) ||
413			    unlikely(region_index + num_write >= sctx->nr_regions)) {
414				DMWARN("invalid set_region_mappings region number: %lu + %lu >= %lu",
415				       region_index, num_write, sctx->nr_regions);
416				return -EINVAL;
417			}
418
419			while (num_write--) {
420				region_index++;
421				path_nr = switch_region_table_read(sctx, region_index - cycle_length);
422				switch_region_table_write(sctx, region_index, path_nr);
423			}
424
425			continue;
426		}
427
428		if (*string == ':')
429			region_index++;
430		else {
431			region_index = parse_hex(&string);
432			if (unlikely(*string != ':')) {
433				DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
434				return -EINVAL;
435			}
436		}
437
438		string++;
439		if (unlikely(!*string)) {
440			DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
441			return -EINVAL;
442		}
443
444		path_nr = parse_hex(&string);
445		if (unlikely(*string)) {
446			DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
447			return -EINVAL;
448		}
449		if (unlikely(region_index >= sctx->nr_regions)) {
450			DMWARN("invalid set_region_mappings region number: %lu >= %lu", region_index, sctx->nr_regions);
451			return -EINVAL;
452		}
453		if (unlikely(path_nr >= sctx->nr_paths)) {
454			DMWARN("invalid set_region_mappings device: %lu >= %u", path_nr, sctx->nr_paths);
455			return -EINVAL;
456		}
457
458		switch_region_table_write(sctx, region_index, path_nr);
459	}
460
461	return 0;
462}
463
464/*
465 * Messages are processed one-at-a-time.
466 *
467 * Only set_region_mappings is supported.
468 */
469static int switch_message(struct dm_target *ti, unsigned argc, char **argv,
470			  char *result, unsigned maxlen)
471{
472	static DEFINE_MUTEX(message_mutex);
473
474	struct switch_ctx *sctx = ti->private;
475	int r = -EINVAL;
476
477	mutex_lock(&message_mutex);
478
479	if (!strcasecmp(argv[0], "set_region_mappings"))
480		r = process_set_region_mappings(sctx, argc, argv);
481	else
482		DMWARN("Unrecognised message received.");
483
484	mutex_unlock(&message_mutex);
485
486	return r;
487}
488
489static void switch_status(struct dm_target *ti, status_type_t type,
490			  unsigned status_flags, char *result, unsigned maxlen)
491{
492	struct switch_ctx *sctx = ti->private;
493	unsigned sz = 0;
494	int path_nr;
495
496	switch (type) {
497	case STATUSTYPE_INFO:
498		result[0] = '\0';
499		break;
500
501	case STATUSTYPE_TABLE:
502		DMEMIT("%u %u 0", sctx->nr_paths, sctx->region_size);
503		for (path_nr = 0; path_nr < sctx->nr_paths; path_nr++)
504			DMEMIT(" %s %llu", sctx->path_list[path_nr].dmdev->name,
505			       (unsigned long long)sctx->path_list[path_nr].start);
506		break;
507
508	case STATUSTYPE_IMA:
509		result[0] = '\0';
510		break;
511	}
512}
513
514/*
515 * Switch ioctl:
516 *
517 * Passthrough all ioctls to the path for sector 0
518 */
519static int switch_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
 
520{
521	struct switch_ctx *sctx = ti->private;
 
 
522	unsigned path_nr;
 
523
524	path_nr = switch_get_path_nr(sctx, 0);
525
526	*bdev = sctx->path_list[path_nr].dmdev->bdev;
 
527
528	/*
529	 * Only pass ioctls through if the device sizes match exactly.
530	 */
531	if (ti->len + sctx->path_list[path_nr].start !=
532	    bdev_nr_sectors((*bdev)))
533		return 1;
534	return 0;
535}
536
537static int switch_iterate_devices(struct dm_target *ti,
538				  iterate_devices_callout_fn fn, void *data)
539{
540	struct switch_ctx *sctx = ti->private;
541	int path_nr;
542	int r;
543
544	for (path_nr = 0; path_nr < sctx->nr_paths; path_nr++) {
545		r = fn(ti, sctx->path_list[path_nr].dmdev,
546			 sctx->path_list[path_nr].start, ti->len, data);
547		if (r)
548			return r;
549	}
550
551	return 0;
552}
553
554static struct target_type switch_target = {
555	.name = "switch",
556	.version = {1, 1, 0},
557	.features = DM_TARGET_NOWAIT,
558	.module = THIS_MODULE,
559	.ctr = switch_ctr,
560	.dtr = switch_dtr,
561	.map = switch_map,
562	.message = switch_message,
563	.status = switch_status,
564	.prepare_ioctl = switch_prepare_ioctl,
565	.iterate_devices = switch_iterate_devices,
566};
567
568static int __init dm_switch_init(void)
569{
570	int r;
571
572	r = dm_register_target(&switch_target);
573	if (r < 0)
574		DMERR("dm_register_target() failed %d", r);
575
576	return r;
577}
578
579static void __exit dm_switch_exit(void)
580{
581	dm_unregister_target(&switch_target);
582}
583
584module_init(dm_switch_init);
585module_exit(dm_switch_exit);
586
587MODULE_DESCRIPTION(DM_NAME " dynamic path switching target");
588MODULE_AUTHOR("Kevin D. O'Kelley <Kevin_OKelley@dell.com>");
589MODULE_AUTHOR("Narendran Ganapathy <Narendran_Ganapathy@dell.com>");
590MODULE_AUTHOR("Jim Ramsay <Jim_Ramsay@dell.com>");
591MODULE_AUTHOR("Mikulas Patocka <mpatocka@redhat.com>");
592MODULE_LICENSE("GPL");
v3.15
  1/*
  2 * Copyright (C) 2010-2012 by Dell Inc.  All rights reserved.
  3 * Copyright (C) 2011-2013 Red Hat, Inc.
  4 *
  5 * This file is released under the GPL.
  6 *
  7 * dm-switch is a device-mapper target that maps IO to underlying block
  8 * devices efficiently when there are a large number of fixed-sized
  9 * address regions but there is no simple pattern to allow for a compact
 10 * mapping representation such as dm-stripe.
 11 */
 12
 13#include <linux/device-mapper.h>
 14
 15#include <linux/module.h>
 16#include <linux/init.h>
 17#include <linux/vmalloc.h>
 18
 19#define DM_MSG_PREFIX "switch"
 20
 21/*
 22 * One region_table_slot_t holds <region_entries_per_slot> region table
 23 * entries each of which is <region_table_entry_bits> in size.
 24 */
 25typedef unsigned long region_table_slot_t;
 26
 27/*
 28 * A device with the offset to its start sector.
 29 */
 30struct switch_path {
 31	struct dm_dev *dmdev;
 32	sector_t start;
 33};
 34
 35/*
 36 * Context block for a dm switch device.
 37 */
 38struct switch_ctx {
 39	struct dm_target *ti;
 40
 41	unsigned nr_paths;		/* Number of paths in path_list. */
 42
 43	unsigned region_size;		/* Region size in 512-byte sectors */
 44	unsigned long nr_regions;	/* Number of regions making up the device */
 45	signed char region_size_bits;	/* log2 of region_size or -1 */
 46
 47	unsigned char region_table_entry_bits;	/* Number of bits in one region table entry */
 48	unsigned char region_entries_per_slot;	/* Number of entries in one region table slot */
 49	signed char region_entries_per_slot_bits;	/* log2 of region_entries_per_slot or -1 */
 50
 51	region_table_slot_t *region_table;	/* Region table */
 52
 53	/*
 54	 * Array of dm devices to switch between.
 55	 */
 56	struct switch_path path_list[0];
 57};
 58
 59static struct switch_ctx *alloc_switch_ctx(struct dm_target *ti, unsigned nr_paths,
 60					   unsigned region_size)
 61{
 62	struct switch_ctx *sctx;
 63
 64	sctx = kzalloc(sizeof(struct switch_ctx) + nr_paths * sizeof(struct switch_path),
 65		       GFP_KERNEL);
 66	if (!sctx)
 67		return NULL;
 68
 69	sctx->ti = ti;
 70	sctx->region_size = region_size;
 71
 72	ti->private = sctx;
 73
 74	return sctx;
 75}
 76
 77static int alloc_region_table(struct dm_target *ti, unsigned nr_paths)
 78{
 79	struct switch_ctx *sctx = ti->private;
 80	sector_t nr_regions = ti->len;
 81	sector_t nr_slots;
 82
 83	if (!(sctx->region_size & (sctx->region_size - 1)))
 84		sctx->region_size_bits = __ffs(sctx->region_size);
 85	else
 86		sctx->region_size_bits = -1;
 87
 88	sctx->region_table_entry_bits = 1;
 89	while (sctx->region_table_entry_bits < sizeof(region_table_slot_t) * 8 &&
 90	       (region_table_slot_t)1 << sctx->region_table_entry_bits < nr_paths)
 91		sctx->region_table_entry_bits++;
 92
 93	sctx->region_entries_per_slot = (sizeof(region_table_slot_t) * 8) / sctx->region_table_entry_bits;
 94	if (!(sctx->region_entries_per_slot & (sctx->region_entries_per_slot - 1)))
 95		sctx->region_entries_per_slot_bits = __ffs(sctx->region_entries_per_slot);
 96	else
 97		sctx->region_entries_per_slot_bits = -1;
 98
 99	if (sector_div(nr_regions, sctx->region_size))
100		nr_regions++;
101
102	sctx->nr_regions = nr_regions;
103	if (sctx->nr_regions != nr_regions || sctx->nr_regions >= ULONG_MAX) {
104		ti->error = "Region table too large";
105		return -EINVAL;
106	}
 
107
108	nr_slots = nr_regions;
109	if (sector_div(nr_slots, sctx->region_entries_per_slot))
110		nr_slots++;
111
112	if (nr_slots > ULONG_MAX / sizeof(region_table_slot_t)) {
113		ti->error = "Region table too large";
114		return -EINVAL;
115	}
116
117	sctx->region_table = vmalloc(nr_slots * sizeof(region_table_slot_t));
 
118	if (!sctx->region_table) {
119		ti->error = "Cannot allocate region table";
120		return -ENOMEM;
121	}
122
123	return 0;
124}
125
126static void switch_get_position(struct switch_ctx *sctx, unsigned long region_nr,
127				unsigned long *region_index, unsigned *bit)
128{
129	if (sctx->region_entries_per_slot_bits >= 0) {
130		*region_index = region_nr >> sctx->region_entries_per_slot_bits;
131		*bit = region_nr & (sctx->region_entries_per_slot - 1);
132	} else {
133		*region_index = region_nr / sctx->region_entries_per_slot;
134		*bit = region_nr % sctx->region_entries_per_slot;
135	}
136
137	*bit *= sctx->region_table_entry_bits;
138}
139
 
 
 
 
 
 
 
 
 
 
 
140/*
141 * Find which path to use at given offset.
142 */
143static unsigned switch_get_path_nr(struct switch_ctx *sctx, sector_t offset)
144{
145	unsigned long region_index;
146	unsigned bit, path_nr;
147	sector_t p;
148
149	p = offset;
150	if (sctx->region_size_bits >= 0)
151		p >>= sctx->region_size_bits;
152	else
153		sector_div(p, sctx->region_size);
154
155	switch_get_position(sctx, p, &region_index, &bit);
156	path_nr = (ACCESS_ONCE(sctx->region_table[region_index]) >> bit) &
157	       ((1 << sctx->region_table_entry_bits) - 1);
158
159	/* This can only happen if the processor uses non-atomic stores. */
160	if (unlikely(path_nr >= sctx->nr_paths))
161		path_nr = 0;
162
163	return path_nr;
164}
165
166static void switch_region_table_write(struct switch_ctx *sctx, unsigned long region_nr,
167				      unsigned value)
168{
169	unsigned long region_index;
170	unsigned bit;
171	region_table_slot_t pte;
172
173	switch_get_position(sctx, region_nr, &region_index, &bit);
174
175	pte = sctx->region_table[region_index];
176	pte &= ~((((region_table_slot_t)1 << sctx->region_table_entry_bits) - 1) << bit);
177	pte |= (region_table_slot_t)value << bit;
178	sctx->region_table[region_index] = pte;
179}
180
181/*
182 * Fill the region table with an initial round robin pattern.
183 */
184static void initialise_region_table(struct switch_ctx *sctx)
185{
186	unsigned path_nr = 0;
187	unsigned long region_nr;
188
189	for (region_nr = 0; region_nr < sctx->nr_regions; region_nr++) {
190		switch_region_table_write(sctx, region_nr, path_nr);
191		if (++path_nr >= sctx->nr_paths)
192			path_nr = 0;
193	}
194}
195
196static int parse_path(struct dm_arg_set *as, struct dm_target *ti)
197{
198	struct switch_ctx *sctx = ti->private;
199	unsigned long long start;
200	int r;
201
202	r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table),
203			  &sctx->path_list[sctx->nr_paths].dmdev);
204	if (r) {
205		ti->error = "Device lookup failed";
206		return r;
207	}
208
209	if (kstrtoull(dm_shift_arg(as), 10, &start) || start != (sector_t)start) {
210		ti->error = "Invalid device starting offset";
211		dm_put_device(ti, sctx->path_list[sctx->nr_paths].dmdev);
212		return -EINVAL;
213	}
214
215	sctx->path_list[sctx->nr_paths].start = start;
216
217	sctx->nr_paths++;
218
219	return 0;
220}
221
222/*
223 * Destructor: Don't free the dm_target, just the ti->private data (if any).
224 */
225static void switch_dtr(struct dm_target *ti)
226{
227	struct switch_ctx *sctx = ti->private;
228
229	while (sctx->nr_paths--)
230		dm_put_device(ti, sctx->path_list[sctx->nr_paths].dmdev);
231
232	vfree(sctx->region_table);
233	kfree(sctx);
234}
235
236/*
237 * Constructor arguments:
238 *   <num_paths> <region_size> <num_optional_args> [<optional_args>...]
239 *   [<dev_path> <offset>]+
240 *
241 * Optional args are to allow for future extension: currently this
242 * parameter must be 0.
243 */
244static int switch_ctr(struct dm_target *ti, unsigned argc, char **argv)
245{
246	static struct dm_arg _args[] = {
247		{1, (KMALLOC_MAX_SIZE - sizeof(struct switch_ctx)) / sizeof(struct switch_path), "Invalid number of paths"},
248		{1, UINT_MAX, "Invalid region size"},
249		{0, 0, "Invalid number of optional args"},
250	};
251
252	struct switch_ctx *sctx;
253	struct dm_arg_set as;
254	unsigned nr_paths, region_size, nr_optional_args;
255	int r;
256
257	as.argc = argc;
258	as.argv = argv;
259
260	r = dm_read_arg(_args, &as, &nr_paths, &ti->error);
261	if (r)
262		return -EINVAL;
263
264	r = dm_read_arg(_args + 1, &as, &region_size, &ti->error);
265	if (r)
266		return r;
267
268	r = dm_read_arg_group(_args + 2, &as, &nr_optional_args, &ti->error);
269	if (r)
270		return r;
271	/* parse optional arguments here, if we add any */
272
273	if (as.argc != nr_paths * 2) {
274		ti->error = "Incorrect number of path arguments";
275		return -EINVAL;
276	}
277
278	sctx = alloc_switch_ctx(ti, nr_paths, region_size);
279	if (!sctx) {
280		ti->error = "Cannot allocate redirection context";
281		return -ENOMEM;
282	}
283
284	r = dm_set_target_max_io_len(ti, region_size);
285	if (r)
286		goto error;
287
288	while (as.argc) {
289		r = parse_path(&as, ti);
290		if (r)
291			goto error;
292	}
293
294	r = alloc_region_table(ti, nr_paths);
295	if (r)
296		goto error;
297
298	initialise_region_table(sctx);
299
300	/* For UNMAP, sending the request down any path is sufficient */
301	ti->num_discard_bios = 1;
302
303	return 0;
304
305error:
306	switch_dtr(ti);
307
308	return r;
309}
310
311static int switch_map(struct dm_target *ti, struct bio *bio)
312{
313	struct switch_ctx *sctx = ti->private;
314	sector_t offset = dm_target_offset(ti, bio->bi_iter.bi_sector);
315	unsigned path_nr = switch_get_path_nr(sctx, offset);
316
317	bio->bi_bdev = sctx->path_list[path_nr].dmdev->bdev;
318	bio->bi_iter.bi_sector = sctx->path_list[path_nr].start + offset;
319
320	return DM_MAPIO_REMAPPED;
321}
322
323/*
324 * We need to parse hex numbers in the message as quickly as possible.
325 *
326 * This table-based hex parser improves performance.
327 * It improves a time to load 1000000 entries compared to the condition-based
328 * parser.
329 *		table-based parser	condition-based parser
330 * PA-RISC	0.29s			0.31s
331 * Opteron	0.0495s			0.0498s
332 */
333static const unsigned char hex_table[256] = {
334255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
335255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
336255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
3370, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255, 255, 255, 255, 255,
338255, 10, 11, 12, 13, 14, 15, 255, 255, 255, 255, 255, 255, 255, 255, 255,
339255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
340255, 10, 11, 12, 13, 14, 15, 255, 255, 255, 255, 255, 255, 255, 255, 255,
341255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
342255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
343255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
344255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
345255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
346255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
347255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
348255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
349255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255
350};
351
352static __always_inline unsigned long parse_hex(const char **string)
353{
354	unsigned char d;
355	unsigned long r = 0;
356
357	while ((d = hex_table[(unsigned char)**string]) < 16) {
358		r = (r << 4) | d;
359		(*string)++;
360	}
361
362	return r;
363}
364
365static int process_set_region_mappings(struct switch_ctx *sctx,
366			     unsigned argc, char **argv)
367{
368	unsigned i;
369	unsigned long region_index = 0;
370
371	for (i = 1; i < argc; i++) {
372		unsigned long path_nr;
373		const char *string = argv[i];
374
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
375		if (*string == ':')
376			region_index++;
377		else {
378			region_index = parse_hex(&string);
379			if (unlikely(*string != ':')) {
380				DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
381				return -EINVAL;
382			}
383		}
384
385		string++;
386		if (unlikely(!*string)) {
387			DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
388			return -EINVAL;
389		}
390
391		path_nr = parse_hex(&string);
392		if (unlikely(*string)) {
393			DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
394			return -EINVAL;
395		}
396		if (unlikely(region_index >= sctx->nr_regions)) {
397			DMWARN("invalid set_region_mappings region number: %lu >= %lu", region_index, sctx->nr_regions);
398			return -EINVAL;
399		}
400		if (unlikely(path_nr >= sctx->nr_paths)) {
401			DMWARN("invalid set_region_mappings device: %lu >= %u", path_nr, sctx->nr_paths);
402			return -EINVAL;
403		}
404
405		switch_region_table_write(sctx, region_index, path_nr);
406	}
407
408	return 0;
409}
410
411/*
412 * Messages are processed one-at-a-time.
413 *
414 * Only set_region_mappings is supported.
415 */
416static int switch_message(struct dm_target *ti, unsigned argc, char **argv)
 
417{
418	static DEFINE_MUTEX(message_mutex);
419
420	struct switch_ctx *sctx = ti->private;
421	int r = -EINVAL;
422
423	mutex_lock(&message_mutex);
424
425	if (!strcasecmp(argv[0], "set_region_mappings"))
426		r = process_set_region_mappings(sctx, argc, argv);
427	else
428		DMWARN("Unrecognised message received.");
429
430	mutex_unlock(&message_mutex);
431
432	return r;
433}
434
435static void switch_status(struct dm_target *ti, status_type_t type,
436			  unsigned status_flags, char *result, unsigned maxlen)
437{
438	struct switch_ctx *sctx = ti->private;
439	unsigned sz = 0;
440	int path_nr;
441
442	switch (type) {
443	case STATUSTYPE_INFO:
444		result[0] = '\0';
445		break;
446
447	case STATUSTYPE_TABLE:
448		DMEMIT("%u %u 0", sctx->nr_paths, sctx->region_size);
449		for (path_nr = 0; path_nr < sctx->nr_paths; path_nr++)
450			DMEMIT(" %s %llu", sctx->path_list[path_nr].dmdev->name,
451			       (unsigned long long)sctx->path_list[path_nr].start);
452		break;
 
 
 
 
453	}
454}
455
456/*
457 * Switch ioctl:
458 *
459 * Passthrough all ioctls to the path for sector 0
460 */
461static int switch_ioctl(struct dm_target *ti, unsigned cmd,
462			unsigned long arg)
463{
464	struct switch_ctx *sctx = ti->private;
465	struct block_device *bdev;
466	fmode_t mode;
467	unsigned path_nr;
468	int r = 0;
469
470	path_nr = switch_get_path_nr(sctx, 0);
471
472	bdev = sctx->path_list[path_nr].dmdev->bdev;
473	mode = sctx->path_list[path_nr].dmdev->mode;
474
475	/*
476	 * Only pass ioctls through if the device sizes match exactly.
477	 */
478	if (ti->len + sctx->path_list[path_nr].start != i_size_read(bdev->bd_inode) >> SECTOR_SHIFT)
479		r = scsi_verify_blk_ioctl(NULL, cmd);
480
481	return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg);
482}
483
484static int switch_iterate_devices(struct dm_target *ti,
485				  iterate_devices_callout_fn fn, void *data)
486{
487	struct switch_ctx *sctx = ti->private;
488	int path_nr;
489	int r;
490
491	for (path_nr = 0; path_nr < sctx->nr_paths; path_nr++) {
492		r = fn(ti, sctx->path_list[path_nr].dmdev,
493			 sctx->path_list[path_nr].start, ti->len, data);
494		if (r)
495			return r;
496	}
497
498	return 0;
499}
500
501static struct target_type switch_target = {
502	.name = "switch",
503	.version = {1, 0, 0},
 
504	.module = THIS_MODULE,
505	.ctr = switch_ctr,
506	.dtr = switch_dtr,
507	.map = switch_map,
508	.message = switch_message,
509	.status = switch_status,
510	.ioctl = switch_ioctl,
511	.iterate_devices = switch_iterate_devices,
512};
513
514static int __init dm_switch_init(void)
515{
516	int r;
517
518	r = dm_register_target(&switch_target);
519	if (r < 0)
520		DMERR("dm_register_target() failed %d", r);
521
522	return r;
523}
524
525static void __exit dm_switch_exit(void)
526{
527	dm_unregister_target(&switch_target);
528}
529
530module_init(dm_switch_init);
531module_exit(dm_switch_exit);
532
533MODULE_DESCRIPTION(DM_NAME " dynamic path switching target");
534MODULE_AUTHOR("Kevin D. O'Kelley <Kevin_OKelley@dell.com>");
535MODULE_AUTHOR("Narendran Ganapathy <Narendran_Ganapathy@dell.com>");
536MODULE_AUTHOR("Jim Ramsay <Jim_Ramsay@dell.com>");
537MODULE_AUTHOR("Mikulas Patocka <mpatocka@redhat.com>");
538MODULE_LICENSE("GPL");