Linux Audio

Check our new training course

Loading...
v4.17
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * Copyright (c) 2014-2016 Christoph Hellwig.
  4 */
  5#include <linux/sunrpc/svc.h>
  6#include <linux/blkdev.h>
  7#include <linux/nfs4.h>
  8#include <linux/nfs_fs.h>
  9#include <linux/nfs_xdr.h>
 10#include <linux/pr.h>
 11
 12#include "blocklayout.h"
 13
 14#define NFSDBG_FACILITY		NFSDBG_PNFS_LD
 15
 16static void
 17bl_free_device(struct pnfs_block_dev *dev)
 18{
 19	if (dev->nr_children) {
 20		int i;
 21
 22		for (i = 0; i < dev->nr_children; i++)
 23			bl_free_device(&dev->children[i]);
 24		kfree(dev->children);
 25	} else {
 26		if (dev->pr_registered) {
 27			const struct pr_ops *ops =
 28				dev->bdev->bd_disk->fops->pr_ops;
 29			int error;
 30
 31			error = ops->pr_register(dev->bdev, dev->pr_key, 0,
 32				false);
 33			if (error)
 34				pr_err("failed to unregister PR key.\n");
 35		}
 36
 37		if (dev->bdev)
 38			blkdev_put(dev->bdev, FMODE_READ | FMODE_WRITE);
 39	}
 40}
 41
 42void
 43bl_free_deviceid_node(struct nfs4_deviceid_node *d)
 44{
 45	struct pnfs_block_dev *dev =
 46		container_of(d, struct pnfs_block_dev, node);
 47
 48	bl_free_device(dev);
 49	kfree_rcu(dev, node.rcu);
 50}
 51
 52static int
 53nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
 54{
 55	__be32 *p;
 56	int i;
 57
 58	p = xdr_inline_decode(xdr, 4);
 59	if (!p)
 60		return -EIO;
 61	b->type = be32_to_cpup(p++);
 62
 63	switch (b->type) {
 64	case PNFS_BLOCK_VOLUME_SIMPLE:
 65		p = xdr_inline_decode(xdr, 4);
 66		if (!p)
 67			return -EIO;
 68		b->simple.nr_sigs = be32_to_cpup(p++);
 69		if (!b->simple.nr_sigs || b->simple.nr_sigs > PNFS_BLOCK_MAX_UUIDS) {
 70			dprintk("Bad signature count: %d\n", b->simple.nr_sigs);
 71			return -EIO;
 72		}
 73
 74		b->simple.len = 4 + 4;
 75		for (i = 0; i < b->simple.nr_sigs; i++) {
 76			p = xdr_inline_decode(xdr, 8 + 4);
 77			if (!p)
 78				return -EIO;
 79			p = xdr_decode_hyper(p, &b->simple.sigs[i].offset);
 80			b->simple.sigs[i].sig_len = be32_to_cpup(p++);
 81			if (b->simple.sigs[i].sig_len > PNFS_BLOCK_UUID_LEN) {
 82				pr_info("signature too long: %d\n",
 83					b->simple.sigs[i].sig_len);
 84				return -EIO;
 85			}
 86
 87			p = xdr_inline_decode(xdr, b->simple.sigs[i].sig_len);
 88			if (!p)
 89				return -EIO;
 90			memcpy(&b->simple.sigs[i].sig, p,
 91				b->simple.sigs[i].sig_len);
 92
 93			b->simple.len += 8 + 4 + \
 94				(XDR_QUADLEN(b->simple.sigs[i].sig_len) << 2);
 95		}
 96		break;
 97	case PNFS_BLOCK_VOLUME_SLICE:
 98		p = xdr_inline_decode(xdr, 8 + 8 + 4);
 99		if (!p)
100			return -EIO;
101		p = xdr_decode_hyper(p, &b->slice.start);
102		p = xdr_decode_hyper(p, &b->slice.len);
103		b->slice.volume = be32_to_cpup(p++);
104		break;
105	case PNFS_BLOCK_VOLUME_CONCAT:
106		p = xdr_inline_decode(xdr, 4);
107		if (!p)
108			return -EIO;
109
110		b->concat.volumes_count = be32_to_cpup(p++);
111		if (b->concat.volumes_count > PNFS_BLOCK_MAX_DEVICES) {
112			dprintk("Too many volumes: %d\n", b->concat.volumes_count);
113			return -EIO;
114		}
115
116		p = xdr_inline_decode(xdr, b->concat.volumes_count * 4);
117		if (!p)
118			return -EIO;
119		for (i = 0; i < b->concat.volumes_count; i++)
120			b->concat.volumes[i] = be32_to_cpup(p++);
121		break;
122	case PNFS_BLOCK_VOLUME_STRIPE:
123		p = xdr_inline_decode(xdr, 8 + 4);
124		if (!p)
125			return -EIO;
126
127		p = xdr_decode_hyper(p, &b->stripe.chunk_size);
128		b->stripe.volumes_count = be32_to_cpup(p++);
129		if (b->stripe.volumes_count > PNFS_BLOCK_MAX_DEVICES) {
130			dprintk("Too many volumes: %d\n", b->stripe.volumes_count);
131			return -EIO;
132		}
133
134		p = xdr_inline_decode(xdr, b->stripe.volumes_count * 4);
135		if (!p)
136			return -EIO;
137		for (i = 0; i < b->stripe.volumes_count; i++)
138			b->stripe.volumes[i] = be32_to_cpup(p++);
139		break;
140	case PNFS_BLOCK_VOLUME_SCSI:
141		p = xdr_inline_decode(xdr, 4 + 4 + 4);
142		if (!p)
143			return -EIO;
144		b->scsi.code_set = be32_to_cpup(p++);
145		b->scsi.designator_type = be32_to_cpup(p++);
146		b->scsi.designator_len = be32_to_cpup(p++);
147		p = xdr_inline_decode(xdr, b->scsi.designator_len);
148		if (!p)
149			return -EIO;
150		if (b->scsi.designator_len > 256)
151			return -EIO;
152		memcpy(&b->scsi.designator, p, b->scsi.designator_len);
153		p = xdr_inline_decode(xdr, 8);
154		if (!p)
155			return -EIO;
156		p = xdr_decode_hyper(p, &b->scsi.pr_key);
157		break;
158	default:
159		dprintk("unknown volume type!\n");
160		return -EIO;
161	}
162
163	return 0;
164}
165
166static bool bl_map_simple(struct pnfs_block_dev *dev, u64 offset,
167		struct pnfs_block_dev_map *map)
168{
169	map->start = dev->start;
170	map->len = dev->len;
171	map->disk_offset = dev->disk_offset;
172	map->bdev = dev->bdev;
173	return true;
174}
175
176static bool bl_map_concat(struct pnfs_block_dev *dev, u64 offset,
177		struct pnfs_block_dev_map *map)
178{
179	int i;
180
181	for (i = 0; i < dev->nr_children; i++) {
182		struct pnfs_block_dev *child = &dev->children[i];
183
184		if (child->start > offset ||
185		    child->start + child->len <= offset)
186			continue;
187
188		child->map(child, offset - child->start, map);
189		return true;
190	}
191
192	dprintk("%s: ran off loop!\n", __func__);
193	return false;
194}
195
196static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
197		struct pnfs_block_dev_map *map)
198{
199	struct pnfs_block_dev *child;
200	u64 chunk;
201	u32 chunk_idx;
202	u64 disk_offset;
203
204	chunk = div_u64(offset, dev->chunk_size);
205	div_u64_rem(chunk, dev->nr_children, &chunk_idx);
206
207	if (chunk_idx > dev->nr_children) {
208		dprintk("%s: invalid chunk idx %d (%lld/%lld)\n",
209			__func__, chunk_idx, offset, dev->chunk_size);
210		/* error, should not happen */
211		return false;
212	}
213
214	/* truncate offset to the beginning of the stripe */
215	offset = chunk * dev->chunk_size;
216
217	/* disk offset of the stripe */
218	disk_offset = div_u64(offset, dev->nr_children);
219
220	child = &dev->children[chunk_idx];
221	child->map(child, disk_offset, map);
222
223	map->start += offset;
224	map->disk_offset += disk_offset;
225	map->len = dev->chunk_size;
226	return true;
227}
228
229static int
230bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
231		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask);
232
233
234static int
235bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,
236		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
237{
238	struct pnfs_block_volume *v = &volumes[idx];
239	struct block_device *bdev;
240	dev_t dev;
241
242	dev = bl_resolve_deviceid(server, v, gfp_mask);
243	if (!dev)
244		return -EIO;
245
246	bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_WRITE, NULL);
247	if (IS_ERR(bdev)) {
248		printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n",
249			MAJOR(dev), MINOR(dev), PTR_ERR(bdev));
250		return PTR_ERR(bdev);
251	}
252	d->bdev = bdev;
253
254
255	d->len = i_size_read(d->bdev->bd_inode);
256	d->map = bl_map_simple;
257
258	printk(KERN_INFO "pNFS: using block device %s\n",
259		d->bdev->bd_disk->disk_name);
260	return 0;
261}
262
263static bool
264bl_validate_designator(struct pnfs_block_volume *v)
265{
266	switch (v->scsi.designator_type) {
267	case PS_DESIGNATOR_EUI64:
268		if (v->scsi.code_set != PS_CODE_SET_BINARY)
269			return false;
270
271		if (v->scsi.designator_len != 8 &&
272		    v->scsi.designator_len != 10 &&
273		    v->scsi.designator_len != 16)
274			return false;
275
276		return true;
277	case PS_DESIGNATOR_NAA:
278		if (v->scsi.code_set != PS_CODE_SET_BINARY)
279			return false;
280
281		if (v->scsi.designator_len != 8 &&
282		    v->scsi.designator_len != 16)
283			return false;
284
285		return true;
286	case PS_DESIGNATOR_T10:
287	case PS_DESIGNATOR_NAME:
288		pr_err("pNFS: unsupported designator "
289			"(code set %d, type %d, len %d.\n",
290			v->scsi.code_set,
291			v->scsi.designator_type,
292			v->scsi.designator_len);
293		return false;
294	default:
295		pr_err("pNFS: invalid designator "
296			"(code set %d, type %d, len %d.\n",
297			v->scsi.code_set,
298			v->scsi.designator_type,
299			v->scsi.designator_len);
300		return false;
301	}
302}
303
304/*
305 * Try to open the udev path for the WWN.  At least on Debian the udev
306 * by-id path will always point to the dm-multipath device if one exists.
307 */
308static struct block_device *
309bl_open_udev_path(struct pnfs_block_volume *v)
310{
311	struct block_device *bdev;
312	const char *devname;
313
314	devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%*phN",
315				v->scsi.designator_len, v->scsi.designator);
316	if (!devname)
317		return ERR_PTR(-ENOMEM);
318
319	bdev = blkdev_get_by_path(devname, FMODE_READ | FMODE_WRITE, NULL);
320	if (IS_ERR(bdev)) {
321		pr_warn("pNFS: failed to open device %s (%ld)\n",
322			devname, PTR_ERR(bdev));
323	}
324
325	kfree(devname);
326	return bdev;
327}
328
329/*
330 * Try to open the RH/Fedora specific dm-mpath udev path for this WWN, as the
331 * wwn- links will only point to the first discovered SCSI device there.
332 */
333static struct block_device *
334bl_open_dm_mpath_udev_path(struct pnfs_block_volume *v)
335{
336	struct block_device *bdev;
337	const char *devname;
338
339	devname = kasprintf(GFP_KERNEL,
340			"/dev/disk/by-id/dm-uuid-mpath-%d%*phN",
341			v->scsi.designator_type,
342			v->scsi.designator_len, v->scsi.designator);
343	if (!devname)
344		return ERR_PTR(-ENOMEM);
345
346	bdev = blkdev_get_by_path(devname, FMODE_READ | FMODE_WRITE, NULL);
347	kfree(devname);
348	return bdev;
349}
350
351static int
352bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
353		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
354{
355	struct pnfs_block_volume *v = &volumes[idx];
356	struct block_device *bdev;
357	const struct pr_ops *ops;
358	int error;
359
360	if (!bl_validate_designator(v))
361		return -EINVAL;
362
363	bdev = bl_open_dm_mpath_udev_path(v);
364	if (IS_ERR(bdev))
365		bdev = bl_open_udev_path(v);
366	if (IS_ERR(bdev))
367		return PTR_ERR(bdev);
368	d->bdev = bdev;
369
370	d->len = i_size_read(d->bdev->bd_inode);
371	d->map = bl_map_simple;
372	d->pr_key = v->scsi.pr_key;
373
374	pr_info("pNFS: using block device %s (reservation key 0x%llx)\n",
375		d->bdev->bd_disk->disk_name, d->pr_key);
376
377	ops = d->bdev->bd_disk->fops->pr_ops;
378	if (!ops) {
379		pr_err("pNFS: block device %s does not support reservations.",
380				d->bdev->bd_disk->disk_name);
381		error = -EINVAL;
382		goto out_blkdev_put;
383	}
384
385	error = ops->pr_register(d->bdev, 0, d->pr_key, true);
386	if (error) {
387		pr_err("pNFS: failed to register key for block device %s.",
388				d->bdev->bd_disk->disk_name);
389		goto out_blkdev_put;
390	}
391
392	d->pr_registered = true;
393	return 0;
394
395out_blkdev_put:
396	blkdev_put(d->bdev, FMODE_READ | FMODE_WRITE);
397	return error;
398}
399
400static int
401bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
402		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
403{
404	struct pnfs_block_volume *v = &volumes[idx];
405	int ret;
406
407	ret = bl_parse_deviceid(server, d, volumes, v->slice.volume, gfp_mask);
408	if (ret)
409		return ret;
410
411	d->disk_offset = v->slice.start;
412	d->len = v->slice.len;
413	return 0;
414}
415
416static int
417bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
418		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
419{
420	struct pnfs_block_volume *v = &volumes[idx];
421	u64 len = 0;
422	int ret, i;
423
424	d->children = kcalloc(v->concat.volumes_count,
425			sizeof(struct pnfs_block_dev), GFP_KERNEL);
426	if (!d->children)
427		return -ENOMEM;
428
429	for (i = 0; i < v->concat.volumes_count; i++) {
430		ret = bl_parse_deviceid(server, &d->children[i],
431				volumes, v->concat.volumes[i], gfp_mask);
432		if (ret)
433			return ret;
434
435		d->nr_children++;
436		d->children[i].start += len;
437		len += d->children[i].len;
438	}
439
440	d->len = len;
441	d->map = bl_map_concat;
442	return 0;
443}
444
445static int
446bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
447		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
448{
449	struct pnfs_block_volume *v = &volumes[idx];
450	u64 len = 0;
451	int ret, i;
452
453	d->children = kcalloc(v->stripe.volumes_count,
454			sizeof(struct pnfs_block_dev), GFP_KERNEL);
455	if (!d->children)
456		return -ENOMEM;
457
458	for (i = 0; i < v->stripe.volumes_count; i++) {
459		ret = bl_parse_deviceid(server, &d->children[i],
460				volumes, v->stripe.volumes[i], gfp_mask);
461		if (ret)
462			return ret;
463
464		d->nr_children++;
465		len += d->children[i].len;
466	}
467
468	d->len = len;
469	d->chunk_size = v->stripe.chunk_size;
470	d->map = bl_map_stripe;
471	return 0;
472}
473
474static int
475bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
476		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
477{
478	switch (volumes[idx].type) {
479	case PNFS_BLOCK_VOLUME_SIMPLE:
480		return bl_parse_simple(server, d, volumes, idx, gfp_mask);
481	case PNFS_BLOCK_VOLUME_SLICE:
482		return bl_parse_slice(server, d, volumes, idx, gfp_mask);
483	case PNFS_BLOCK_VOLUME_CONCAT:
484		return bl_parse_concat(server, d, volumes, idx, gfp_mask);
485	case PNFS_BLOCK_VOLUME_STRIPE:
486		return bl_parse_stripe(server, d, volumes, idx, gfp_mask);
487	case PNFS_BLOCK_VOLUME_SCSI:
488		return bl_parse_scsi(server, d, volumes, idx, gfp_mask);
489	default:
490		dprintk("unsupported volume type: %d\n", volumes[idx].type);
491		return -EIO;
492	}
493}
494
495struct nfs4_deviceid_node *
496bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
497		gfp_t gfp_mask)
498{
499	struct nfs4_deviceid_node *node = NULL;
500	struct pnfs_block_volume *volumes;
501	struct pnfs_block_dev *top;
502	struct xdr_stream xdr;
503	struct xdr_buf buf;
504	struct page *scratch;
505	int nr_volumes, ret, i;
506	__be32 *p;
507
508	scratch = alloc_page(gfp_mask);
509	if (!scratch)
510		goto out;
511
512	xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen);
513	xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE);
514
515	p = xdr_inline_decode(&xdr, sizeof(__be32));
516	if (!p)
517		goto out_free_scratch;
518	nr_volumes = be32_to_cpup(p++);
519
520	volumes = kcalloc(nr_volumes, sizeof(struct pnfs_block_volume),
521			  gfp_mask);
522	if (!volumes)
523		goto out_free_scratch;
524
525	for (i = 0; i < nr_volumes; i++) {
526		ret = nfs4_block_decode_volume(&xdr, &volumes[i]);
527		if (ret < 0)
528			goto out_free_volumes;
529	}
530
531	top = kzalloc(sizeof(*top), gfp_mask);
532	if (!top)
533		goto out_free_volumes;
534
535	ret = bl_parse_deviceid(server, top, volumes, nr_volumes - 1, gfp_mask);
536
537	node = &top->node;
538	nfs4_init_deviceid_node(node, server, &pdev->dev_id);
539	if (ret)
540		nfs4_mark_deviceid_unavailable(node);
541
542out_free_volumes:
543	kfree(volumes);
544out_free_scratch:
545	__free_page(scratch);
546out:
547	return node;
548}
v5.14.15
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * Copyright (c) 2014-2016 Christoph Hellwig.
  4 */
  5#include <linux/sunrpc/svc.h>
  6#include <linux/blkdev.h>
  7#include <linux/nfs4.h>
  8#include <linux/nfs_fs.h>
  9#include <linux/nfs_xdr.h>
 10#include <linux/pr.h>
 11
 12#include "blocklayout.h"
 13
 14#define NFSDBG_FACILITY		NFSDBG_PNFS_LD
 15
 16static void
 17bl_free_device(struct pnfs_block_dev *dev)
 18{
 19	if (dev->nr_children) {
 20		int i;
 21
 22		for (i = 0; i < dev->nr_children; i++)
 23			bl_free_device(&dev->children[i]);
 24		kfree(dev->children);
 25	} else {
 26		if (dev->pr_registered) {
 27			const struct pr_ops *ops =
 28				dev->bdev->bd_disk->fops->pr_ops;
 29			int error;
 30
 31			error = ops->pr_register(dev->bdev, dev->pr_key, 0,
 32				false);
 33			if (error)
 34				pr_err("failed to unregister PR key.\n");
 35		}
 36
 37		if (dev->bdev)
 38			blkdev_put(dev->bdev, FMODE_READ | FMODE_WRITE);
 39	}
 40}
 41
 42void
 43bl_free_deviceid_node(struct nfs4_deviceid_node *d)
 44{
 45	struct pnfs_block_dev *dev =
 46		container_of(d, struct pnfs_block_dev, node);
 47
 48	bl_free_device(dev);
 49	kfree_rcu(dev, node.rcu);
 50}
 51
 52static int
 53nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
 54{
 55	__be32 *p;
 56	int i;
 57
 58	p = xdr_inline_decode(xdr, 4);
 59	if (!p)
 60		return -EIO;
 61	b->type = be32_to_cpup(p++);
 62
 63	switch (b->type) {
 64	case PNFS_BLOCK_VOLUME_SIMPLE:
 65		p = xdr_inline_decode(xdr, 4);
 66		if (!p)
 67			return -EIO;
 68		b->simple.nr_sigs = be32_to_cpup(p++);
 69		if (!b->simple.nr_sigs || b->simple.nr_sigs > PNFS_BLOCK_MAX_UUIDS) {
 70			dprintk("Bad signature count: %d\n", b->simple.nr_sigs);
 71			return -EIO;
 72		}
 73
 74		b->simple.len = 4 + 4;
 75		for (i = 0; i < b->simple.nr_sigs; i++) {
 76			p = xdr_inline_decode(xdr, 8 + 4);
 77			if (!p)
 78				return -EIO;
 79			p = xdr_decode_hyper(p, &b->simple.sigs[i].offset);
 80			b->simple.sigs[i].sig_len = be32_to_cpup(p++);
 81			if (b->simple.sigs[i].sig_len > PNFS_BLOCK_UUID_LEN) {
 82				pr_info("signature too long: %d\n",
 83					b->simple.sigs[i].sig_len);
 84				return -EIO;
 85			}
 86
 87			p = xdr_inline_decode(xdr, b->simple.sigs[i].sig_len);
 88			if (!p)
 89				return -EIO;
 90			memcpy(&b->simple.sigs[i].sig, p,
 91				b->simple.sigs[i].sig_len);
 92
 93			b->simple.len += 8 + 4 + \
 94				(XDR_QUADLEN(b->simple.sigs[i].sig_len) << 2);
 95		}
 96		break;
 97	case PNFS_BLOCK_VOLUME_SLICE:
 98		p = xdr_inline_decode(xdr, 8 + 8 + 4);
 99		if (!p)
100			return -EIO;
101		p = xdr_decode_hyper(p, &b->slice.start);
102		p = xdr_decode_hyper(p, &b->slice.len);
103		b->slice.volume = be32_to_cpup(p++);
104		break;
105	case PNFS_BLOCK_VOLUME_CONCAT:
106		p = xdr_inline_decode(xdr, 4);
107		if (!p)
108			return -EIO;
109
110		b->concat.volumes_count = be32_to_cpup(p++);
111		if (b->concat.volumes_count > PNFS_BLOCK_MAX_DEVICES) {
112			dprintk("Too many volumes: %d\n", b->concat.volumes_count);
113			return -EIO;
114		}
115
116		p = xdr_inline_decode(xdr, b->concat.volumes_count * 4);
117		if (!p)
118			return -EIO;
119		for (i = 0; i < b->concat.volumes_count; i++)
120			b->concat.volumes[i] = be32_to_cpup(p++);
121		break;
122	case PNFS_BLOCK_VOLUME_STRIPE:
123		p = xdr_inline_decode(xdr, 8 + 4);
124		if (!p)
125			return -EIO;
126
127		p = xdr_decode_hyper(p, &b->stripe.chunk_size);
128		b->stripe.volumes_count = be32_to_cpup(p++);
129		if (b->stripe.volumes_count > PNFS_BLOCK_MAX_DEVICES) {
130			dprintk("Too many volumes: %d\n", b->stripe.volumes_count);
131			return -EIO;
132		}
133
134		p = xdr_inline_decode(xdr, b->stripe.volumes_count * 4);
135		if (!p)
136			return -EIO;
137		for (i = 0; i < b->stripe.volumes_count; i++)
138			b->stripe.volumes[i] = be32_to_cpup(p++);
139		break;
140	case PNFS_BLOCK_VOLUME_SCSI:
141		p = xdr_inline_decode(xdr, 4 + 4 + 4);
142		if (!p)
143			return -EIO;
144		b->scsi.code_set = be32_to_cpup(p++);
145		b->scsi.designator_type = be32_to_cpup(p++);
146		b->scsi.designator_len = be32_to_cpup(p++);
147		p = xdr_inline_decode(xdr, b->scsi.designator_len);
148		if (!p)
149			return -EIO;
150		if (b->scsi.designator_len > 256)
151			return -EIO;
152		memcpy(&b->scsi.designator, p, b->scsi.designator_len);
153		p = xdr_inline_decode(xdr, 8);
154		if (!p)
155			return -EIO;
156		p = xdr_decode_hyper(p, &b->scsi.pr_key);
157		break;
158	default:
159		dprintk("unknown volume type!\n");
160		return -EIO;
161	}
162
163	return 0;
164}
165
166static bool bl_map_simple(struct pnfs_block_dev *dev, u64 offset,
167		struct pnfs_block_dev_map *map)
168{
169	map->start = dev->start;
170	map->len = dev->len;
171	map->disk_offset = dev->disk_offset;
172	map->bdev = dev->bdev;
173	return true;
174}
175
176static bool bl_map_concat(struct pnfs_block_dev *dev, u64 offset,
177		struct pnfs_block_dev_map *map)
178{
179	int i;
180
181	for (i = 0; i < dev->nr_children; i++) {
182		struct pnfs_block_dev *child = &dev->children[i];
183
184		if (child->start > offset ||
185		    child->start + child->len <= offset)
186			continue;
187
188		child->map(child, offset - child->start, map);
189		return true;
190	}
191
192	dprintk("%s: ran off loop!\n", __func__);
193	return false;
194}
195
196static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
197		struct pnfs_block_dev_map *map)
198{
199	struct pnfs_block_dev *child;
200	u64 chunk;
201	u32 chunk_idx;
202	u64 disk_offset;
203
204	chunk = div_u64(offset, dev->chunk_size);
205	div_u64_rem(chunk, dev->nr_children, &chunk_idx);
206
207	if (chunk_idx >= dev->nr_children) {
208		dprintk("%s: invalid chunk idx %d (%lld/%lld)\n",
209			__func__, chunk_idx, offset, dev->chunk_size);
210		/* error, should not happen */
211		return false;
212	}
213
214	/* truncate offset to the beginning of the stripe */
215	offset = chunk * dev->chunk_size;
216
217	/* disk offset of the stripe */
218	disk_offset = div_u64(offset, dev->nr_children);
219
220	child = &dev->children[chunk_idx];
221	child->map(child, disk_offset, map);
222
223	map->start += offset;
224	map->disk_offset += disk_offset;
225	map->len = dev->chunk_size;
226	return true;
227}
228
229static int
230bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
231		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask);
232
233
234static int
235bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,
236		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
237{
238	struct pnfs_block_volume *v = &volumes[idx];
239	struct block_device *bdev;
240	dev_t dev;
241
242	dev = bl_resolve_deviceid(server, v, gfp_mask);
243	if (!dev)
244		return -EIO;
245
246	bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_WRITE, NULL);
247	if (IS_ERR(bdev)) {
248		printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n",
249			MAJOR(dev), MINOR(dev), PTR_ERR(bdev));
250		return PTR_ERR(bdev);
251	}
252	d->bdev = bdev;
253
254
255	d->len = i_size_read(d->bdev->bd_inode);
256	d->map = bl_map_simple;
257
258	printk(KERN_INFO "pNFS: using block device %s\n",
259		d->bdev->bd_disk->disk_name);
260	return 0;
261}
262
263static bool
264bl_validate_designator(struct pnfs_block_volume *v)
265{
266	switch (v->scsi.designator_type) {
267	case PS_DESIGNATOR_EUI64:
268		if (v->scsi.code_set != PS_CODE_SET_BINARY)
269			return false;
270
271		if (v->scsi.designator_len != 8 &&
272		    v->scsi.designator_len != 10 &&
273		    v->scsi.designator_len != 16)
274			return false;
275
276		return true;
277	case PS_DESIGNATOR_NAA:
278		if (v->scsi.code_set != PS_CODE_SET_BINARY)
279			return false;
280
281		if (v->scsi.designator_len != 8 &&
282		    v->scsi.designator_len != 16)
283			return false;
284
285		return true;
286	case PS_DESIGNATOR_T10:
287	case PS_DESIGNATOR_NAME:
288		pr_err("pNFS: unsupported designator "
289			"(code set %d, type %d, len %d.\n",
290			v->scsi.code_set,
291			v->scsi.designator_type,
292			v->scsi.designator_len);
293		return false;
294	default:
295		pr_err("pNFS: invalid designator "
296			"(code set %d, type %d, len %d.\n",
297			v->scsi.code_set,
298			v->scsi.designator_type,
299			v->scsi.designator_len);
300		return false;
301	}
302}
303
304/*
305 * Try to open the udev path for the WWN.  At least on Debian the udev
306 * by-id path will always point to the dm-multipath device if one exists.
307 */
308static struct block_device *
309bl_open_udev_path(struct pnfs_block_volume *v)
310{
311	struct block_device *bdev;
312	const char *devname;
313
314	devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%*phN",
315				v->scsi.designator_len, v->scsi.designator);
316	if (!devname)
317		return ERR_PTR(-ENOMEM);
318
319	bdev = blkdev_get_by_path(devname, FMODE_READ | FMODE_WRITE, NULL);
320	if (IS_ERR(bdev)) {
321		pr_warn("pNFS: failed to open device %s (%ld)\n",
322			devname, PTR_ERR(bdev));
323	}
324
325	kfree(devname);
326	return bdev;
327}
328
329/*
330 * Try to open the RH/Fedora specific dm-mpath udev path for this WWN, as the
331 * wwn- links will only point to the first discovered SCSI device there.
332 */
333static struct block_device *
334bl_open_dm_mpath_udev_path(struct pnfs_block_volume *v)
335{
336	struct block_device *bdev;
337	const char *devname;
338
339	devname = kasprintf(GFP_KERNEL,
340			"/dev/disk/by-id/dm-uuid-mpath-%d%*phN",
341			v->scsi.designator_type,
342			v->scsi.designator_len, v->scsi.designator);
343	if (!devname)
344		return ERR_PTR(-ENOMEM);
345
346	bdev = blkdev_get_by_path(devname, FMODE_READ | FMODE_WRITE, NULL);
347	kfree(devname);
348	return bdev;
349}
350
351static int
352bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
353		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
354{
355	struct pnfs_block_volume *v = &volumes[idx];
356	struct block_device *bdev;
357	const struct pr_ops *ops;
358	int error;
359
360	if (!bl_validate_designator(v))
361		return -EINVAL;
362
363	bdev = bl_open_dm_mpath_udev_path(v);
364	if (IS_ERR(bdev))
365		bdev = bl_open_udev_path(v);
366	if (IS_ERR(bdev))
367		return PTR_ERR(bdev);
368	d->bdev = bdev;
369
370	d->len = i_size_read(d->bdev->bd_inode);
371	d->map = bl_map_simple;
372	d->pr_key = v->scsi.pr_key;
373
374	pr_info("pNFS: using block device %s (reservation key 0x%llx)\n",
375		d->bdev->bd_disk->disk_name, d->pr_key);
376
377	ops = d->bdev->bd_disk->fops->pr_ops;
378	if (!ops) {
379		pr_err("pNFS: block device %s does not support reservations.",
380				d->bdev->bd_disk->disk_name);
381		error = -EINVAL;
382		goto out_blkdev_put;
383	}
384
385	error = ops->pr_register(d->bdev, 0, d->pr_key, true);
386	if (error) {
387		pr_err("pNFS: failed to register key for block device %s.",
388				d->bdev->bd_disk->disk_name);
389		goto out_blkdev_put;
390	}
391
392	d->pr_registered = true;
393	return 0;
394
395out_blkdev_put:
396	blkdev_put(d->bdev, FMODE_READ | FMODE_WRITE);
397	return error;
398}
399
400static int
401bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
402		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
403{
404	struct pnfs_block_volume *v = &volumes[idx];
405	int ret;
406
407	ret = bl_parse_deviceid(server, d, volumes, v->slice.volume, gfp_mask);
408	if (ret)
409		return ret;
410
411	d->disk_offset = v->slice.start;
412	d->len = v->slice.len;
413	return 0;
414}
415
416static int
417bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
418		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
419{
420	struct pnfs_block_volume *v = &volumes[idx];
421	u64 len = 0;
422	int ret, i;
423
424	d->children = kcalloc(v->concat.volumes_count,
425			sizeof(struct pnfs_block_dev), GFP_KERNEL);
426	if (!d->children)
427		return -ENOMEM;
428
429	for (i = 0; i < v->concat.volumes_count; i++) {
430		ret = bl_parse_deviceid(server, &d->children[i],
431				volumes, v->concat.volumes[i], gfp_mask);
432		if (ret)
433			return ret;
434
435		d->nr_children++;
436		d->children[i].start += len;
437		len += d->children[i].len;
438	}
439
440	d->len = len;
441	d->map = bl_map_concat;
442	return 0;
443}
444
445static int
446bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
447		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
448{
449	struct pnfs_block_volume *v = &volumes[idx];
450	u64 len = 0;
451	int ret, i;
452
453	d->children = kcalloc(v->stripe.volumes_count,
454			sizeof(struct pnfs_block_dev), GFP_KERNEL);
455	if (!d->children)
456		return -ENOMEM;
457
458	for (i = 0; i < v->stripe.volumes_count; i++) {
459		ret = bl_parse_deviceid(server, &d->children[i],
460				volumes, v->stripe.volumes[i], gfp_mask);
461		if (ret)
462			return ret;
463
464		d->nr_children++;
465		len += d->children[i].len;
466	}
467
468	d->len = len;
469	d->chunk_size = v->stripe.chunk_size;
470	d->map = bl_map_stripe;
471	return 0;
472}
473
474static int
475bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
476		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
477{
478	switch (volumes[idx].type) {
479	case PNFS_BLOCK_VOLUME_SIMPLE:
480		return bl_parse_simple(server, d, volumes, idx, gfp_mask);
481	case PNFS_BLOCK_VOLUME_SLICE:
482		return bl_parse_slice(server, d, volumes, idx, gfp_mask);
483	case PNFS_BLOCK_VOLUME_CONCAT:
484		return bl_parse_concat(server, d, volumes, idx, gfp_mask);
485	case PNFS_BLOCK_VOLUME_STRIPE:
486		return bl_parse_stripe(server, d, volumes, idx, gfp_mask);
487	case PNFS_BLOCK_VOLUME_SCSI:
488		return bl_parse_scsi(server, d, volumes, idx, gfp_mask);
489	default:
490		dprintk("unsupported volume type: %d\n", volumes[idx].type);
491		return -EIO;
492	}
493}
494
495struct nfs4_deviceid_node *
496bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
497		gfp_t gfp_mask)
498{
499	struct nfs4_deviceid_node *node = NULL;
500	struct pnfs_block_volume *volumes;
501	struct pnfs_block_dev *top;
502	struct xdr_stream xdr;
503	struct xdr_buf buf;
504	struct page *scratch;
505	int nr_volumes, ret, i;
506	__be32 *p;
507
508	scratch = alloc_page(gfp_mask);
509	if (!scratch)
510		goto out;
511
512	xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen);
513	xdr_set_scratch_page(&xdr, scratch);
514
515	p = xdr_inline_decode(&xdr, sizeof(__be32));
516	if (!p)
517		goto out_free_scratch;
518	nr_volumes = be32_to_cpup(p++);
519
520	volumes = kcalloc(nr_volumes, sizeof(struct pnfs_block_volume),
521			  gfp_mask);
522	if (!volumes)
523		goto out_free_scratch;
524
525	for (i = 0; i < nr_volumes; i++) {
526		ret = nfs4_block_decode_volume(&xdr, &volumes[i]);
527		if (ret < 0)
528			goto out_free_volumes;
529	}
530
531	top = kzalloc(sizeof(*top), gfp_mask);
532	if (!top)
533		goto out_free_volumes;
534
535	ret = bl_parse_deviceid(server, top, volumes, nr_volumes - 1, gfp_mask);
536
537	node = &top->node;
538	nfs4_init_deviceid_node(node, server, &pdev->dev_id);
539	if (ret)
540		nfs4_mark_deviceid_unavailable(node);
541
542out_free_volumes:
543	kfree(volumes);
544out_free_scratch:
545	__free_page(scratch);
546out:
547	return node;
548}