blocklayout.c - fs/nfs/blocklayout/blocklayout.c - Linux diff v4.6

  1/*
  2 *  linux/fs/nfs/blocklayout/blocklayout.c
  3 *
  4 *  Module for the NFSv4.1 pNFS block layout driver.
  5 *
  6 *  Copyright (c) 2006 The Regents of the University of Michigan.
  7 *  All rights reserved.
  8 *
  9 *  Andy Adamson <andros@citi.umich.edu>
 10 *  Fred Isaman <iisaman@umich.edu>
 11 *
 12 * permission is granted to use, copy, create derivative works and
 13 * redistribute this software and such derivative works for any purpose,
 14 * so long as the name of the university of michigan is not used in
 15 * any advertising or publicity pertaining to the use or distribution
 16 * of this software without specific, written prior authorization.  if
 17 * the above copyright notice or any other identification of the
 18 * university of michigan is included in any copy of any portion of
 19 * this software, then the disclaimer below must also be included.
 20 *
 21 * this software is provided as is, without representation from the
 22 * university of michigan as to its fitness for any purpose, and without
 23 * warranty by the university of michigan of any kind, either express
 24 * or implied, including without limitation the implied warranties of
 25 * merchantability and fitness for a particular purpose.  the regents
 26 * of the university of michigan shall not be liable for any damages,
 27 * including special, indirect, incidental, or consequential damages,
 28 * with respect to any claim arising out or in connection with the use
 29 * of the software, even if it has been or is hereafter advised of the
 30 * possibility of such damages.
 31 */
 32
 33#include <linux/module.h>
 34#include <linux/init.h>
 35#include <linux/mount.h>
 36#include <linux/namei.h>
 37#include <linux/bio.h>		/* struct bio */
 
 38#include <linux/prefetch.h>
 39#include <linux/pagevec.h>
 40
 41#include "../pnfs.h"
 42#include "../nfs4session.h"
 43#include "../internal.h"
 44#include "blocklayout.h"
 45
 46#define NFSDBG_FACILITY	NFSDBG_PNFS_LD
 47
 48MODULE_LICENSE("GPL");
 49MODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>");
 50MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver");
 51
 52static bool is_hole(struct pnfs_block_extent *be)
 
 
 
 53{
 54	switch (be->be_state) {
 55	case PNFS_BLOCK_NONE_DATA:
 56		return true;
 57	case PNFS_BLOCK_INVALID_DATA:
 58		return be->be_tag ? false : true;
 59	default:
 60		return false;
 61	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 62}
 63
 64/* The data we are handed might be spread across several bios.  We need
 65 * to track when the last one is finished.
 66 */
 67struct parallel_io {
 68	struct kref refcnt;
 
 69	void (*pnfs_callback) (void *data);
 70	void *data;
 71};
 72
 73static inline struct parallel_io *alloc_parallel(void *data)
 74{
 75	struct parallel_io *rv;
 76
 77	rv  = kmalloc(sizeof(*rv), GFP_NOFS);
 78	if (rv) {
 79		rv->data = data;
 80		kref_init(&rv->refcnt);
 81	}
 82	return rv;
 83}
 84
 85static inline void get_parallel(struct parallel_io *p)
 86{
 87	kref_get(&p->refcnt);
 88}
 89
 90static void destroy_parallel(struct kref *kref)
 91{
 92	struct parallel_io *p = container_of(kref, struct parallel_io, refcnt);
 93
 94	dprintk("%s enter\n", __func__);
 95	p->pnfs_callback(p->data);
 96	kfree(p);
 97}
 98
 99static inline void put_parallel(struct parallel_io *p)
100{
101	kref_put(&p->refcnt, destroy_parallel);
102}
103
104static struct bio *
105bl_submit_bio(int rw, struct bio *bio)
106{
107	if (bio) {
108		get_parallel(bio->bi_private);
109		dprintk("%s submitting %s bio %u@%llu\n", __func__,
110			rw == READ ? "read" : "write", bio->bi_iter.bi_size,
111			(unsigned long long)bio->bi_iter.bi_sector);
112		submit_bio(rw, bio);
113	}
114	return NULL;
115}
116
117static struct bio *
118bl_alloc_init_bio(int npg, struct block_device *bdev, sector_t disk_sector,
119		bio_end_io_t end_io, struct parallel_io *par)
 
120{
121	struct bio *bio;
122
123	npg = min(npg, BIO_MAX_PAGES);
124	bio = bio_alloc(GFP_NOIO, npg);
125	if (!bio && (current->flags & PF_MEMALLOC)) {
126		while (!bio && (npg /= 2))
127			bio = bio_alloc(GFP_NOIO, npg);
128	}
129
130	if (bio) {
131		bio->bi_iter.bi_sector = disk_sector;
132		bio->bi_bdev = bdev;
133		bio->bi_end_io = end_io;
134		bio->bi_private = par;
135	}
136	return bio;
137}
138
139static struct bio *
140do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect,
141		struct page *page, struct pnfs_block_dev_map *map,
142		struct pnfs_block_extent *be, bio_end_io_t end_io,
143		struct parallel_io *par, unsigned int offset, int *len)
144{
145	struct pnfs_block_dev *dev =
146		container_of(be->be_device, struct pnfs_block_dev, node);
147	u64 disk_addr, end;
148
149	dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__,
150		npg, rw, (unsigned long long)isect, offset, *len);
151
152	/* translate to device offset */
153	isect += be->be_v_offset;
154	isect -= be->be_f_offset;
155
156	/* translate to physical disk offset */
157	disk_addr = (u64)isect << SECTOR_SHIFT;
158	if (disk_addr < map->start || disk_addr >= map->start + map->len) {
159		if (!dev->map(dev, disk_addr, map))
160			return ERR_PTR(-EIO);
161		bio = bl_submit_bio(rw, bio);
162	}
163	disk_addr += map->disk_offset;
164	disk_addr -= map->start;
165
166	/* limit length to what the device mapping allows */
167	end = disk_addr + *len;
168	if (end >= map->start + map->len)
169		*len = map->start + map->len - disk_addr;
170
171retry:
172	if (!bio) {
173		bio = bl_alloc_init_bio(npg, map->bdev,
174				disk_addr >> SECTOR_SHIFT, end_io, par);
175		if (!bio)
176			return ERR_PTR(-ENOMEM);
177	}
178	if (bio_add_page(bio, page, *len, offset) < *len) {
179		bio = bl_submit_bio(rw, bio);
180		goto retry;
181	}
182	return bio;
183}
184
185static void bl_end_io_read(struct bio *bio)
186{
187	struct parallel_io *par = bio->bi_private;
188
189	if (bio->bi_error) {
190		struct nfs_pgio_header *header = par->data;
191
192		if (!header->pnfs_error)
193			header->pnfs_error = -EIO;
194		pnfs_set_lo_fail(header->lseg);
195	}
 
196
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197	bio_put(bio);
198	put_parallel(par);
199}
200
201static void bl_read_cleanup(struct work_struct *work)
202{
203	struct rpc_task *task;
204	struct nfs_pgio_header *hdr;
205	dprintk("%s enter\n", __func__);
206	task = container_of(work, struct rpc_task, u.tk_work);
207	hdr = container_of(task, struct nfs_pgio_header, task);
208	pnfs_ld_read_done(hdr);
209}
210
211static void
212bl_end_par_io_read(void *data)
213{
214	struct nfs_pgio_header *hdr = data;
215
216	hdr->task.tk_status = hdr->pnfs_error;
217	INIT_WORK(&hdr->task.u.tk_work, bl_read_cleanup);
218	schedule_work(&hdr->task.u.tk_work);
 
 
 
 
 
 
 
219}
220
221static enum pnfs_try_status
222bl_read_pagelist(struct nfs_pgio_header *header)
223{
224	struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg);
225	struct pnfs_block_dev_map map = { .start = NFS4_MAX_UINT64 };
226	struct bio *bio = NULL;
227	struct pnfs_block_extent be;
228	sector_t isect, extent_length = 0;
229	struct parallel_io *par;
230	loff_t f_offset = header->args.offset;
231	size_t bytes_left = header->args.count;
232	unsigned int pg_offset = header->args.pgbase, pg_len;
233	struct page **pages = header->args.pages;
234	int pg_index = header->args.pgbase >> PAGE_SHIFT;
235	const bool is_dio = (header->dreq != NULL);
236	struct blk_plug plug;
237	int i;
238
239	dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
240		header->page_array.npages, f_offset,
241		(unsigned int)header->args.count);
242
243	par = alloc_parallel(header);
244	if (!par)
245		return PNFS_NOT_ATTEMPTED;
 
 
246	par->pnfs_callback = bl_end_par_io_read;
247
248	blk_start_plug(&plug);
249
250	isect = (sector_t) (f_offset >> SECTOR_SHIFT);
251	/* Code assumes extents are page-aligned */
252	for (i = pg_index; i < header->page_array.npages; i++) {
253		if (extent_length <= 0) {
254			/* We've used up the previous extent */
 
 
255			bio = bl_submit_bio(READ, bio);
256
257			/* Get the next one */
258			if (!ext_tree_lookup(bl, isect, &be, false)) {
259				header->pnfs_error = -EIO;
 
 
260				goto out;
261			}
262			extent_length = be.be_length - (isect - be.be_f_offset);
263		}
264
265		if (is_dio) {
266			if (pg_offset + bytes_left > PAGE_SIZE)
267				pg_len = PAGE_SIZE - pg_offset;
268			else
269				pg_len = bytes_left;
270		} else {
271			BUG_ON(pg_offset != 0);
272			pg_len = PAGE_SIZE;
273		}
274
275		if (is_hole(&be)) {
276			bio = bl_submit_bio(READ, bio);
277			/* Fill hole w/ zeroes w/o accessing device */
278			dprintk("%s Zeroing page for hole\n", __func__);
279			zero_user_segment(pages[i], pg_offset, pg_len);
280
281			/* invalidate map */
282			map.start = NFS4_MAX_UINT64;
283		} else {
284			bio = do_add_page_to_bio(bio,
285						 header->page_array.npages - i,
286						 READ,
287						 isect, pages[i], &map, &be,
288						 bl_end_io_read, par,
289						 pg_offset, &pg_len);
290			if (IS_ERR(bio)) {
291				header->pnfs_error = PTR_ERR(bio);
292				bio = NULL;
293				goto out;
294			}
295		}
296		isect += (pg_len >> SECTOR_SHIFT);
297		extent_length -= (pg_len >> SECTOR_SHIFT);
298		f_offset += pg_len;
299		bytes_left -= pg_len;
300		pg_offset = 0;
301	}
302	if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
303		header->res.eof = 1;
304		header->res.count = header->inode->i_size - header->args.offset;
305	} else {
306		header->res.count = (isect << SECTOR_SHIFT) - header->args.offset;
307	}
308out:
 
 
309	bl_submit_bio(READ, bio);
310	blk_finish_plug(&plug);
311	put_parallel(par);
312	return PNFS_ATTEMPTED;
 
 
 
 
313}
314
315static void bl_end_io_write(struct bio *bio)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316{
317	struct parallel_io *par = bio->bi_private;
318	struct nfs_pgio_header *header = par->data;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
319
320	if (bio->bi_error) {
321		if (!header->pnfs_error)
322			header->pnfs_error = -EIO;
323		pnfs_set_lo_fail(header->lseg);
 
 
 
 
 
 
 
324	}
325	bio_put(bio);
326	put_parallel(par);
327}
328
329/* Function scheduled for call during bl_end_par_io_write,
330 * it marks sectors as written and extends the commitlist.
331 */
332static void bl_write_cleanup(struct work_struct *work)
333{
334	struct rpc_task *task = container_of(work, struct rpc_task, u.tk_work);
335	struct nfs_pgio_header *hdr =
336			container_of(task, struct nfs_pgio_header, task);
337
338	dprintk("%s enter\n", __func__);
339
340	if (likely(!hdr->pnfs_error)) {
341		struct pnfs_block_layout *bl = BLK_LSEG2EXT(hdr->lseg);
342		u64 start = hdr->args.offset & (loff_t)PAGE_MASK;
343		u64 end = (hdr->args.offset + hdr->args.count +
344			PAGE_SIZE - 1) & (loff_t)PAGE_MASK;
345
346		ext_tree_mark_written(bl, start >> SECTOR_SHIFT,
347					(end - start) >> SECTOR_SHIFT);
348	}
349
350	pnfs_ld_write_done(hdr);
351}
352
353/* Called when last of bios associated with a bl_write_pagelist call finishes */
354static void bl_end_par_io_write(void *data)
355{
356	struct nfs_pgio_header *hdr = data;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
357
358	hdr->task.tk_status = hdr->pnfs_error;
359	hdr->verf.committed = NFS_FILE_SYNC;
360	INIT_WORK(&hdr->task.u.tk_work, bl_write_cleanup);
361	schedule_work(&hdr->task.u.tk_work);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362}
363
364static enum pnfs_try_status
365bl_write_pagelist(struct nfs_pgio_header *header, int sync)
366{
367	struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg);
368	struct pnfs_block_dev_map map = { .start = NFS4_MAX_UINT64 };
369	struct bio *bio = NULL;
370	struct pnfs_block_extent be;
371	sector_t isect, extent_length = 0;
372	struct parallel_io *par = NULL;
373	loff_t offset = header->args.offset;
374	size_t count = header->args.count;
375	struct page **pages = header->args.pages;
376	int pg_index = header->args.pgbase >> PAGE_SHIFT;
377	unsigned int pg_len;
378	struct blk_plug plug;
379	int i;
 
380
381	dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
382
383	/* At this point, header->page_aray is a (sequential) list of nfs_pages.
384	 * We want to write each, and if there is an error set pnfs_error
385	 * to have it redone using nfs.
386	 */
387	par = alloc_parallel(header);
388	if (!par)
389		return PNFS_NOT_ATTEMPTED;
 
 
390	par->pnfs_callback = bl_end_par_io_write;
 
391
392	blk_start_plug(&plug);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
393
394	/* we always write out the whole page */
395	offset = offset & (loff_t)PAGE_MASK;
396	isect = offset >> SECTOR_SHIFT;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
397
398	for (i = pg_index; i < header->page_array.npages; i++) {
399		if (extent_length <= 0) {
 
 
400			/* We've used up the previous extent */
 
401			bio = bl_submit_bio(WRITE, bio);
402			/* Get the next one */
403			if (!ext_tree_lookup(bl, isect, &be, true)) {
404				header->pnfs_error = -EINVAL;
 
 
405				goto out;
406			}
407
408			extent_length = be.be_length - (isect - be.be_f_offset);
409		}
410
411		pg_len = PAGE_SIZE;
412		bio = do_add_page_to_bio(bio, header->page_array.npages - i,
413					 WRITE, isect, pages[i], &map, &be,
414					 bl_end_io_write, par,
415					 0, &pg_len);
 
 
 
 
 
 
 
 
416		if (IS_ERR(bio)) {
417			header->pnfs_error = PTR_ERR(bio);
418			bio = NULL;
419			goto out;
420		}
 
 
 
 
421
422		offset += pg_len;
423		count -= pg_len;
424		isect += (pg_len >> SECTOR_SHIFT);
425		extent_length -= (pg_len >> SECTOR_SHIFT);
 
 
 
 
 
426	}
427
428	header->res.count = header->args.count;
 
 
 
 
429out:
 
430	bl_submit_bio(WRITE, bio);
431	blk_finish_plug(&plug);
432	put_parallel(par);
433	return PNFS_ATTEMPTED;
434}
435
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
436static void bl_free_layout_hdr(struct pnfs_layout_hdr *lo)
437{
438	struct pnfs_block_layout *bl = BLK_LO2EXT(lo);
439	int err;
440
441	dprintk("%s enter\n", __func__);
442
443	err = ext_tree_remove(bl, true, 0, LLONG_MAX);
444	WARN_ON(err);
445
446	kfree(bl);
447}
448
449static struct pnfs_layout_hdr *__bl_alloc_layout_hdr(struct inode *inode,
450		gfp_t gfp_flags, bool is_scsi_layout)
451{
452	struct pnfs_block_layout *bl;
453
454	dprintk("%s enter\n", __func__);
455	bl = kzalloc(sizeof(*bl), gfp_flags);
456	if (!bl)
457		return NULL;
458
459	bl->bl_ext_rw = RB_ROOT;
460	bl->bl_ext_ro = RB_ROOT;
461	spin_lock_init(&bl->bl_ext_lock);
462
463	bl->bl_scsi_layout = is_scsi_layout;
 
 
 
 
 
464	return &bl->bl_layout;
465}
466
467static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode,
468						   gfp_t gfp_flags)
469{
470	return __bl_alloc_layout_hdr(inode, gfp_flags, false);
471}
472
473static struct pnfs_layout_hdr *sl_alloc_layout_hdr(struct inode *inode,
474						   gfp_t gfp_flags)
475{
476	return __bl_alloc_layout_hdr(inode, gfp_flags, true);
477}
478
479static void bl_free_lseg(struct pnfs_layout_segment *lseg)
480{
481	dprintk("%s enter\n", __func__);
482	kfree(lseg);
483}
484
485/* Tracks info needed to ensure extents in layout obey constraints of spec */
486struct layout_verification {
487	u32 mode;	/* R or RW */
488	u64 start;	/* Expected start of next non-COW extent */
489	u64 inval;	/* Start of INVAL coverage */
490	u64 cowread;	/* End of COW read coverage */
491};
492
493/* Verify the extent meets the layout requirements of the pnfs-block draft,
494 * section 2.3.1.
495 */
496static int verify_extent(struct pnfs_block_extent *be,
497			 struct layout_verification *lv)
498{
499	if (lv->mode == IOMODE_READ) {
500		if (be->be_state == PNFS_BLOCK_READWRITE_DATA ||
501		    be->be_state == PNFS_BLOCK_INVALID_DATA)
502			return -EIO;
503		if (be->be_f_offset != lv->start)
504			return -EIO;
505		lv->start += be->be_length;
506		return 0;
507	}
508	/* lv->mode == IOMODE_RW */
509	if (be->be_state == PNFS_BLOCK_READWRITE_DATA) {
510		if (be->be_f_offset != lv->start)
511			return -EIO;
512		if (lv->cowread > lv->start)
513			return -EIO;
514		lv->start += be->be_length;
515		lv->inval = lv->start;
516		return 0;
517	} else if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
518		if (be->be_f_offset != lv->start)
519			return -EIO;
520		lv->start += be->be_length;
521		return 0;
522	} else if (be->be_state == PNFS_BLOCK_READ_DATA) {
523		if (be->be_f_offset > lv->start)
524			return -EIO;
525		if (be->be_f_offset < lv->inval)
526			return -EIO;
527		if (be->be_f_offset < lv->cowread)
528			return -EIO;
529		/* It looks like you might want to min this with lv->start,
530		 * but you really don't.
531		 */
532		lv->inval = lv->inval + be->be_length;
533		lv->cowread = be->be_f_offset + be->be_length;
534		return 0;
535	} else
536		return -EIO;
537}
538
539static int decode_sector_number(__be32 **rp, sector_t *sp)
540{
541	uint64_t s;
542
543	*rp = xdr_decode_hyper(*rp, &s);
544	if (s & 0x1ff) {
545		printk(KERN_WARNING "NFS: %s: sector not aligned\n", __func__);
546		return -1;
547	}
548	*sp = s >> SECTOR_SHIFT;
549	return 0;
550}
551
552static int
553bl_alloc_extent(struct xdr_stream *xdr, struct pnfs_layout_hdr *lo,
554		struct layout_verification *lv, struct list_head *extents,
555		gfp_t gfp_mask)
556{
557	struct pnfs_block_extent *be;
558	struct nfs4_deviceid id;
559	int error;
560	__be32 *p;
561
562	p = xdr_inline_decode(xdr, 28 + NFS4_DEVICEID4_SIZE);
563	if (!p)
564		return -EIO;
565
566	be = kzalloc(sizeof(*be), GFP_NOFS);
567	if (!be)
568		return -ENOMEM;
569
570	memcpy(&id, p, NFS4_DEVICEID4_SIZE);
571	p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
572
573	error = -EIO;
574	be->be_device = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), &id,
575						lo->plh_lc_cred, gfp_mask);
576	if (!be->be_device)
577		goto out_free_be;
578
579	/*
580	 * The next three values are read in as bytes, but stored in the
581	 * extent structure in 512-byte granularity.
582	 */
583	if (decode_sector_number(&p, &be->be_f_offset) < 0)
584		goto out_put_deviceid;
585	if (decode_sector_number(&p, &be->be_length) < 0)
586		goto out_put_deviceid;
587	if (decode_sector_number(&p, &be->be_v_offset) < 0)
588		goto out_put_deviceid;
589	be->be_state = be32_to_cpup(p++);
590
591	error = verify_extent(be, lv);
592	if (error) {
593		dprintk("%s: extent verification failed\n", __func__);
594		goto out_put_deviceid;
595	}
596
597	list_add_tail(&be->be_list, extents);
598	return 0;
599
600out_put_deviceid:
601	nfs4_put_deviceid_node(be->be_device);
602out_free_be:
603	kfree(be);
604	return error;
605}
606
607static struct pnfs_layout_segment *
608bl_alloc_lseg(struct pnfs_layout_hdr *lo, struct nfs4_layoutget_res *lgr,
609		gfp_t gfp_mask)
610{
611	struct layout_verification lv = {
612		.mode = lgr->range.iomode,
613		.start = lgr->range.offset >> SECTOR_SHIFT,
614		.inval = lgr->range.offset >> SECTOR_SHIFT,
615		.cowread = lgr->range.offset >> SECTOR_SHIFT,
616	};
617	struct pnfs_block_layout *bl = BLK_LO2EXT(lo);
618	struct pnfs_layout_segment *lseg;
619	struct xdr_buf buf;
620	struct xdr_stream xdr;
621	struct page *scratch;
622	int status, i;
623	uint32_t count;
624	__be32 *p;
625	LIST_HEAD(extents);
626
627	dprintk("---> %s\n", __func__);
628
629	lseg = kzalloc(sizeof(*lseg), gfp_mask);
630	if (!lseg)
631		return ERR_PTR(-ENOMEM);
632
633	status = -ENOMEM;
634	scratch = alloc_page(gfp_mask);
635	if (!scratch)
636		goto out;
637
638	xdr_init_decode_pages(&xdr, &buf,
639			lgr->layoutp->pages, lgr->layoutp->len);
640	xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE);
641
642	status = -EIO;
643	p = xdr_inline_decode(&xdr, 4);
644	if (unlikely(!p))
645		goto out_free_scratch;
646
647	count = be32_to_cpup(p++);
648	dprintk("%s: number of extents %d\n", __func__, count);
649
650	/*
651	 * Decode individual extents, putting them in temporary staging area
652	 * until whole layout is decoded to make error recovery easier.
653	 */
654	for (i = 0; i < count; i++) {
655		status = bl_alloc_extent(&xdr, lo, &lv, &extents, gfp_mask);
656		if (status)
657			goto process_extents;
658	}
659
660	if (lgr->range.offset + lgr->range.length !=
661			lv.start << SECTOR_SHIFT) {
662		dprintk("%s Final length mismatch\n", __func__);
663		status = -EIO;
664		goto process_extents;
665	}
666
667	if (lv.start < lv.cowread) {
668		dprintk("%s Final uncovered COW extent\n", __func__);
669		status = -EIO;
670	}
671
672process_extents:
673	while (!list_empty(&extents)) {
674		struct pnfs_block_extent *be =
675			list_first_entry(&extents, struct pnfs_block_extent,
676					 be_list);
677		list_del(&be->be_list);
678
679		if (!status)
680			status = ext_tree_insert(bl, be);
681
682		if (status) {
683			nfs4_put_deviceid_node(be->be_device);
684			kfree(be);
685		}
686	}
687
688out_free_scratch:
689	__free_page(scratch);
690out:
691	dprintk("%s returns %d\n", __func__, status);
692	if (status) {
 
 
 
693		kfree(lseg);
694		return ERR_PTR(status);
695	}
696	return lseg;
697}
698
699static void
700bl_return_range(struct pnfs_layout_hdr *lo,
701		struct pnfs_layout_range *range)
702{
703	struct pnfs_block_layout *bl = BLK_LO2EXT(lo);
704	sector_t offset = range->offset >> SECTOR_SHIFT, end;
705
706	if (range->offset % 8) {
707		dprintk("%s: offset %lld not block size aligned\n",
708			__func__, range->offset);
709		return;
710	}
711
712	if (range->length != NFS4_MAX_UINT64) {
713		if (range->length % 8) {
714			dprintk("%s: length %lld not block size aligned\n",
715				__func__, range->length);
716			return;
717		}
718
719		end = offset + (range->length >> SECTOR_SHIFT);
720	} else {
721		end = round_down(NFS4_MAX_UINT64, PAGE_SIZE);
722	}
723
724	ext_tree_remove(bl, range->iomode & IOMODE_RW, offset, end);
725}
726
727static int
728bl_prepare_layoutcommit(struct nfs4_layoutcommit_args *arg)
729{
730	return ext_tree_prepare_commit(arg);
731}
732
733static void
734bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata)
735{
736	ext_tree_mark_committed(&lcdata->args, lcdata->res.status);
737}
738
739static int
740bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh)
741{
742	dprintk("%s enter\n", __func__);
 
 
743
744	if (server->pnfs_blksize == 0) {
745		dprintk("%s Server did not return blksize\n", __func__);
746		return -EINVAL;
747	}
748	if (server->pnfs_blksize > PAGE_SIZE) {
749		printk(KERN_ERR "%s: pNFS blksize %d not supported.\n",
750			__func__, server->pnfs_blksize);
751		return -EINVAL;
 
 
 
 
 
 
752	}
753
754	return 0;
755}
756
757static bool
758is_aligned_req(struct nfs_pageio_descriptor *pgio,
759		struct nfs_page *req, unsigned int alignment, bool is_write)
760{
 
 
 
 
 
 
 
 
 
 
761	/*
762	 * Always accept buffered writes, higher layers take care of the
763	 * right alignment.
764	 */
765	if (pgio->pg_dreq == NULL)
766		return true;
767
768	if (!IS_ALIGNED(req->wb_offset, alignment))
769		return false;
770
771	if (IS_ALIGNED(req->wb_bytes, alignment))
772		return true;
773
774	if (is_write &&
775	    (req_offset(req) + req->wb_bytes == i_size_read(pgio->pg_inode))) {
776		/*
777		 * If the write goes up to the inode size, just write
778		 * the full page.  Data past the inode size is
779		 * guaranteed to be zeroed by the higher level client
780		 * code, and this behaviour is mandated by RFC 5663
781		 * section 2.3.2.
782		 */
783		return true;
784	}
785
786	return false;
787}
788
789static void
790bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
791{
792	if (!is_aligned_req(pgio, req, SECTOR_SIZE, false)) {
793		nfs_pageio_reset_read_mds(pgio);
794		return;
795	}
796
797	pnfs_generic_pg_init_read(pgio, req);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
798}
799
800/*
801 * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
802 * of bytes (maximum @req->wb_bytes) that can be coalesced.
803 */
804static size_t
805bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
806		struct nfs_page *req)
807{
808	if (!is_aligned_req(pgio, req, SECTOR_SIZE, false))
809		return 0;
810	return pnfs_generic_pg_test(pgio, prev, req);
811}
 
812
813/*
814 * Return the number of contiguous bytes for a given inode
815 * starting at page frame idx.
816 */
817static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx)
818{
819	struct address_space *mapping = inode->i_mapping;
820	pgoff_t end;
821
822	/* Optimize common case that writes from 0 to end of file */
823	end = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
824	if (end != inode->i_mapping->nrpages) {
825		rcu_read_lock();
826		end = page_cache_next_hole(mapping, idx + 1, ULONG_MAX);
827		rcu_read_unlock();
828	}
829
830	if (!end)
831		return i_size_read(inode) - (idx << PAGE_SHIFT);
832	else
833		return (end - idx) << PAGE_SHIFT;
834}
835
836static void
837bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
838{
839	u64 wb_size;
840
841	if (!is_aligned_req(pgio, req, PAGE_SIZE, true)) {
842		nfs_pageio_reset_write_mds(pgio);
843		return;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
844	}
 
 
845
846	if (pgio->pg_dreq == NULL)
847		wb_size = pnfs_num_cont_bytes(pgio->pg_inode,
848					      req->wb_index);
849	else
850		wb_size = nfs_dreq_bytes_left(pgio->pg_dreq);
851
852	pnfs_generic_pg_init_write(pgio, req, wb_size);
853}
854
855/*
856 * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
857 * of bytes (maximum @req->wb_bytes) that can be coalesced.
858 */
859static size_t
860bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
861		 struct nfs_page *req)
862{
863	if (!is_aligned_req(pgio, req, PAGE_SIZE, true))
864		return 0;
865	return pnfs_generic_pg_test(pgio, prev, req);
 
 
 
866}
867
868static const struct nfs_pageio_ops bl_pg_read_ops = {
869	.pg_init = bl_pg_init_read,
870	.pg_test = bl_pg_test_read,
871	.pg_doio = pnfs_generic_pg_readpages,
872	.pg_cleanup = pnfs_generic_pg_cleanup,
873};
874
875static const struct nfs_pageio_ops bl_pg_write_ops = {
876	.pg_init = bl_pg_init_write,
877	.pg_test = bl_pg_test_write,
878	.pg_doio = pnfs_generic_pg_writepages,
879	.pg_cleanup = pnfs_generic_pg_cleanup,
880};
881
882static struct pnfs_layoutdriver_type blocklayout_type = {
883	.id				= LAYOUT_BLOCK_VOLUME,
884	.name				= "LAYOUT_BLOCK_VOLUME",
885	.owner				= THIS_MODULE,
886	.flags				= PNFS_LAYOUTRET_ON_SETATTR |
887					  PNFS_READ_WHOLE_PAGE,
888	.read_pagelist			= bl_read_pagelist,
889	.write_pagelist			= bl_write_pagelist,
890	.alloc_layout_hdr		= bl_alloc_layout_hdr,
891	.free_layout_hdr		= bl_free_layout_hdr,
892	.alloc_lseg			= bl_alloc_lseg,
893	.free_lseg			= bl_free_lseg,
894	.return_range			= bl_return_range,
895	.prepare_layoutcommit		= bl_prepare_layoutcommit,
896	.cleanup_layoutcommit		= bl_cleanup_layoutcommit,
897	.set_layoutdriver		= bl_set_layoutdriver,
898	.alloc_deviceid_node		= bl_alloc_deviceid_node,
899	.free_deviceid_node		= bl_free_deviceid_node,
900	.pg_read_ops			= &bl_pg_read_ops,
901	.pg_write_ops			= &bl_pg_write_ops,
902	.sync				= pnfs_generic_sync,
903};
904
905static struct pnfs_layoutdriver_type scsilayout_type = {
906	.id				= LAYOUT_SCSI,
907	.name				= "LAYOUT_SCSI",
908	.owner				= THIS_MODULE,
909	.flags				= PNFS_LAYOUTRET_ON_SETATTR |
910					  PNFS_READ_WHOLE_PAGE,
911	.read_pagelist			= bl_read_pagelist,
912	.write_pagelist			= bl_write_pagelist,
913	.alloc_layout_hdr		= sl_alloc_layout_hdr,
914	.free_layout_hdr		= bl_free_layout_hdr,
915	.alloc_lseg			= bl_alloc_lseg,
916	.free_lseg			= bl_free_lseg,
917	.return_range			= bl_return_range,
918	.prepare_layoutcommit		= bl_prepare_layoutcommit,
919	.cleanup_layoutcommit		= bl_cleanup_layoutcommit,
920	.set_layoutdriver		= bl_set_layoutdriver,
921	.alloc_deviceid_node		= bl_alloc_deviceid_node,
922	.free_deviceid_node		= bl_free_deviceid_node,
923	.pg_read_ops			= &bl_pg_read_ops,
924	.pg_write_ops			= &bl_pg_write_ops,
925	.sync				= pnfs_generic_sync,
926};
927
928
929static int __init nfs4blocklayout_init(void)
930{
 
 
931	int ret;
932
933	dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__);
934
935	ret = bl_init_pipefs();
936	if (ret)
937		goto out;
938
939	ret = pnfs_register_layoutdriver(&blocklayout_type);
940	if (ret)
941		goto out_cleanup_pipe;
 
 
 
 
942
943	ret = pnfs_register_layoutdriver(&scsilayout_type);
 
 
944	if (ret)
945		goto out_unregister_block;
946	return 0;
947
948out_unregister_block:
949	pnfs_unregister_layoutdriver(&blocklayout_type);
950out_cleanup_pipe:
951	bl_cleanup_pipefs();
 
 
952out:
953	return ret;
 
 
 
 
954}
955
956static void __exit nfs4blocklayout_exit(void)
957{
958	dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n",
959	       __func__);
960
961	pnfs_unregister_layoutdriver(&scsilayout_type);
962	pnfs_unregister_layoutdriver(&blocklayout_type);
963	bl_cleanup_pipefs();
964}
965
966MODULE_ALIAS("nfs-layouttype4-3");
967
968module_init(nfs4blocklayout_init);
969module_exit(nfs4blocklayout_exit);

   1/*
   2 *  linux/fs/nfs/blocklayout/blocklayout.c
   3 *
   4 *  Module for the NFSv4.1 pNFS block layout driver.
   5 *
   6 *  Copyright (c) 2006 The Regents of the University of Michigan.
   7 *  All rights reserved.
   8 *
   9 *  Andy Adamson <andros@citi.umich.edu>
  10 *  Fred Isaman <iisaman@umich.edu>
  11 *
  12 * permission is granted to use, copy, create derivative works and
  13 * redistribute this software and such derivative works for any purpose,
  14 * so long as the name of the university of michigan is not used in
  15 * any advertising or publicity pertaining to the use or distribution
  16 * of this software without specific, written prior authorization.  if
  17 * the above copyright notice or any other identification of the
  18 * university of michigan is included in any copy of any portion of
  19 * this software, then the disclaimer below must also be included.
  20 *
  21 * this software is provided as is, without representation from the
  22 * university of michigan as to its fitness for any purpose, and without
  23 * warranty by the university of michigan of any kind, either express
  24 * or implied, including without limitation the implied warranties of
  25 * merchantability and fitness for a particular purpose.  the regents
  26 * of the university of michigan shall not be liable for any damages,
  27 * including special, indirect, incidental, or consequential damages,
  28 * with respect to any claim arising out or in connection with the use
  29 * of the software, even if it has been or is hereafter advised of the
  30 * possibility of such damages.
  31 */
  32
  33#include <linux/module.h>
  34#include <linux/init.h>
  35#include <linux/mount.h>
  36#include <linux/namei.h>
  37#include <linux/bio.h>		/* struct bio */
  38#include <linux/buffer_head.h>	/* various write calls */
  39#include <linux/prefetch.h>
 
  40
 
 
 
  41#include "blocklayout.h"
  42
  43#define NFSDBG_FACILITY	NFSDBG_PNFS_LD
  44
  45MODULE_LICENSE("GPL");
  46MODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>");
  47MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver");
  48
  49struct dentry *bl_device_pipe;
  50wait_queue_head_t bl_wq;
  51
  52static void print_page(struct page *page)
  53{
  54	dprintk("PRINTPAGE page %p\n", page);
  55	dprintk("	PagePrivate %d\n", PagePrivate(page));
  56	dprintk("	PageUptodate %d\n", PageUptodate(page));
  57	dprintk("	PageError %d\n", PageError(page));
  58	dprintk("	PageDirty %d\n", PageDirty(page));
  59	dprintk("	PageReferenced %d\n", PageReferenced(page));
  60	dprintk("	PageLocked %d\n", PageLocked(page));
  61	dprintk("	PageWriteback %d\n", PageWriteback(page));
  62	dprintk("	PageMappedToDisk %d\n", PageMappedToDisk(page));
  63	dprintk("\n");
  64}
  65
  66/* Given the be associated with isect, determine if page data needs to be
  67 * initialized.
  68 */
  69static int is_hole(struct pnfs_block_extent *be, sector_t isect)
  70{
  71	if (be->be_state == PNFS_BLOCK_NONE_DATA)
  72		return 1;
  73	else if (be->be_state != PNFS_BLOCK_INVALID_DATA)
  74		return 0;
  75	else
  76		return !bl_is_sector_init(be->be_inval, isect);
  77}
  78
  79/* Given the be associated with isect, determine if page data can be
  80 * written to disk.
  81 */
  82static int is_writable(struct pnfs_block_extent *be, sector_t isect)
  83{
  84	return (be->be_state == PNFS_BLOCK_READWRITE_DATA ||
  85		be->be_state == PNFS_BLOCK_INVALID_DATA);
  86}
  87
  88/* The data we are handed might be spread across several bios.  We need
  89 * to track when the last one is finished.
  90 */
  91struct parallel_io {
  92	struct kref refcnt;
  93	struct rpc_call_ops call_ops;
  94	void (*pnfs_callback) (void *data);
  95	void *data;
  96};
  97
  98static inline struct parallel_io *alloc_parallel(void *data)
  99{
 100	struct parallel_io *rv;
 101
 102	rv  = kmalloc(sizeof(*rv), GFP_NOFS);
 103	if (rv) {
 104		rv->data = data;
 105		kref_init(&rv->refcnt);
 106	}
 107	return rv;
 108}
 109
 110static inline void get_parallel(struct parallel_io *p)
 111{
 112	kref_get(&p->refcnt);
 113}
 114
 115static void destroy_parallel(struct kref *kref)
 116{
 117	struct parallel_io *p = container_of(kref, struct parallel_io, refcnt);
 118
 119	dprintk("%s enter\n", __func__);
 120	p->pnfs_callback(p->data);
 121	kfree(p);
 122}
 123
 124static inline void put_parallel(struct parallel_io *p)
 125{
 126	kref_put(&p->refcnt, destroy_parallel);
 127}
 128
 129static struct bio *
 130bl_submit_bio(int rw, struct bio *bio)
 131{
 132	if (bio) {
 133		get_parallel(bio->bi_private);
 134		dprintk("%s submitting %s bio %u@%llu\n", __func__,
 135			rw == READ ? "read" : "write",
 136			bio->bi_size, (unsigned long long)bio->bi_sector);
 137		submit_bio(rw, bio);
 138	}
 139	return NULL;
 140}
 141
 142static struct bio *bl_alloc_init_bio(int npg, sector_t isect,
 143				     struct pnfs_block_extent *be,
 144				     void (*end_io)(struct bio *, int err),
 145				     struct parallel_io *par)
 146{
 147	struct bio *bio;
 148
 
 149	bio = bio_alloc(GFP_NOIO, npg);
 150	if (!bio)
 151		return NULL;
 
 
 152
 153	bio->bi_sector = isect - be->be_f_offset + be->be_v_offset;
 154	bio->bi_bdev = be->be_mdev;
 155	bio->bi_end_io = end_io;
 156	bio->bi_private = par;
 
 
 157	return bio;
 158}
 159
 160static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw,
 161				      sector_t isect, struct page *page,
 162				      struct pnfs_block_extent *be,
 163				      void (*end_io)(struct bio *, int err),
 164				      struct parallel_io *par)
 165{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 166retry:
 167	if (!bio) {
 168		bio = bl_alloc_init_bio(npg, isect, be, end_io, par);
 
 169		if (!bio)
 170			return ERR_PTR(-ENOMEM);
 171	}
 172	if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
 173		bio = bl_submit_bio(rw, bio);
 174		goto retry;
 175	}
 176	return bio;
 177}
 178
 179static void bl_set_lo_fail(struct pnfs_layout_segment *lseg)
 180{
 181	if (lseg->pls_range.iomode == IOMODE_RW) {
 182		dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__);
 183		set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
 184	} else {
 185		dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__);
 186		set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
 
 
 187	}
 188}
 189
 190/* This is basically copied from mpage_end_io_read */
 191static void bl_end_io_read(struct bio *bio, int err)
 192{
 193	struct parallel_io *par = bio->bi_private;
 194	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
 195	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
 196	struct nfs_read_data *rdata = (struct nfs_read_data *)par->data;
 197
 198	do {
 199		struct page *page = bvec->bv_page;
 200
 201		if (--bvec >= bio->bi_io_vec)
 202			prefetchw(&bvec->bv_page->flags);
 203		if (uptodate)
 204			SetPageUptodate(page);
 205	} while (bvec >= bio->bi_io_vec);
 206	if (!uptodate) {
 207		if (!rdata->pnfs_error)
 208			rdata->pnfs_error = -EIO;
 209		bl_set_lo_fail(rdata->lseg);
 210	}
 211	bio_put(bio);
 212	put_parallel(par);
 213}
 214
 215static void bl_read_cleanup(struct work_struct *work)
 216{
 217	struct rpc_task *task;
 218	struct nfs_read_data *rdata;
 219	dprintk("%s enter\n", __func__);
 220	task = container_of(work, struct rpc_task, u.tk_work);
 221	rdata = container_of(task, struct nfs_read_data, task);
 222	pnfs_ld_read_done(rdata);
 223}
 224
 225static void
 226bl_end_par_io_read(void *data)
 227{
 228	struct nfs_read_data *rdata = data;
 229
 230	INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
 231	schedule_work(&rdata->task.u.tk_work);
 232}
 233
 234/* We don't want normal .rpc_call_done callback used, so we replace it
 235 * with this stub.
 236 */
 237static void bl_rpc_do_nothing(struct rpc_task *task, void *calldata)
 238{
 239	return;
 240}
 241
 242static enum pnfs_try_status
 243bl_read_pagelist(struct nfs_read_data *rdata)
 244{
 245	int i, hole;
 
 246	struct bio *bio = NULL;
 247	struct pnfs_block_extent *be = NULL, *cow_read = NULL;
 248	sector_t isect, extent_length = 0;
 249	struct parallel_io *par;
 250	loff_t f_offset = rdata->args.offset;
 251	size_t count = rdata->args.count;
 252	struct page **pages = rdata->args.pages;
 253	int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
 
 
 
 
 254
 255	dprintk("%s enter nr_pages %u offset %lld count %Zd\n", __func__,
 256	       rdata->npages, f_offset, count);
 
 257
 258	par = alloc_parallel(rdata);
 259	if (!par)
 260		goto use_mds;
 261	par->call_ops = *rdata->mds_ops;
 262	par->call_ops.rpc_call_done = bl_rpc_do_nothing;
 263	par->pnfs_callback = bl_end_par_io_read;
 264	/* At this point, we can no longer jump to use_mds */
 
 265
 266	isect = (sector_t) (f_offset >> SECTOR_SHIFT);
 267	/* Code assumes extents are page-aligned */
 268	for (i = pg_index; i < rdata->npages; i++) {
 269		if (!extent_length) {
 270			/* We've used up the previous extent */
 271			bl_put_extent(be);
 272			bl_put_extent(cow_read);
 273			bio = bl_submit_bio(READ, bio);
 
 274			/* Get the next one */
 275			be = bl_find_get_extent(BLK_LSEG2EXT(rdata->lseg),
 276					     isect, &cow_read);
 277			if (!be) {
 278				rdata->pnfs_error = -EIO;
 279				goto out;
 280			}
 281			extent_length = be->be_length -
 282				(isect - be->be_f_offset);
 283			if (cow_read) {
 284				sector_t cow_length = cow_read->be_length -
 285					(isect - cow_read->be_f_offset);
 286				extent_length = min(extent_length, cow_length);
 287			}
 
 
 
 
 288		}
 289		hole = is_hole(be, isect);
 290		if (hole && !cow_read) {
 291			bio = bl_submit_bio(READ, bio);
 292			/* Fill hole w/ zeroes w/o accessing device */
 293			dprintk("%s Zeroing page for hole\n", __func__);
 294			zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE);
 295			print_page(pages[i]);
 296			SetPageUptodate(pages[i]);
 
 297		} else {
 298			struct pnfs_block_extent *be_read;
 299
 300			be_read = (hole && cow_read) ? cow_read : be;
 301			bio = bl_add_page_to_bio(bio, rdata->npages - i, READ,
 302						 isect, pages[i], be_read,
 303						 bl_end_io_read, par);
 304			if (IS_ERR(bio)) {
 305				rdata->pnfs_error = PTR_ERR(bio);
 
 306				goto out;
 307			}
 308		}
 309		isect += PAGE_CACHE_SECTORS;
 310		extent_length -= PAGE_CACHE_SECTORS;
 311	}
 312	if ((isect << SECTOR_SHIFT) >= rdata->inode->i_size) {
 313		rdata->res.eof = 1;
 314		rdata->res.count = rdata->inode->i_size - f_offset;
 
 
 
 315	} else {
 316		rdata->res.count = (isect << SECTOR_SHIFT) - f_offset;
 317	}
 318out:
 319	bl_put_extent(be);
 320	bl_put_extent(cow_read);
 321	bl_submit_bio(READ, bio);
 
 322	put_parallel(par);
 323	return PNFS_ATTEMPTED;
 324
 325 use_mds:
 326	dprintk("Giving up and using normal NFS\n");
 327	return PNFS_NOT_ATTEMPTED;
 328}
 329
 330static void mark_extents_written(struct pnfs_block_layout *bl,
 331				 __u64 offset, __u32 count)
 332{
 333	sector_t isect, end;
 334	struct pnfs_block_extent *be;
 335
 336	dprintk("%s(%llu, %u)\n", __func__, offset, count);
 337	if (count == 0)
 338		return;
 339	isect = (offset & (long)(PAGE_CACHE_MASK)) >> SECTOR_SHIFT;
 340	end = (offset + count + PAGE_CACHE_SIZE - 1) & (long)(PAGE_CACHE_MASK);
 341	end >>= SECTOR_SHIFT;
 342	while (isect < end) {
 343		sector_t len;
 344		be = bl_find_get_extent(bl, isect, NULL);
 345		BUG_ON(!be); /* FIXME */
 346		len = min(end, be->be_f_offset + be->be_length) - isect;
 347		if (be->be_state == PNFS_BLOCK_INVALID_DATA)
 348			bl_mark_for_commit(be, isect, len); /* What if fails? */
 349		isect += len;
 350		bl_put_extent(be);
 351	}
 352}
 353
 354static void bl_end_io_write_zero(struct bio *bio, int err)
 355{
 356	struct parallel_io *par = bio->bi_private;
 357	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
 358	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
 359	struct nfs_write_data *wdata = (struct nfs_write_data *)par->data;
 360
 361	do {
 362		struct page *page = bvec->bv_page;
 363
 364		if (--bvec >= bio->bi_io_vec)
 365			prefetchw(&bvec->bv_page->flags);
 366		/* This is the zeroing page we added */
 367		end_page_writeback(page);
 368		page_cache_release(page);
 369	} while (bvec >= bio->bi_io_vec);
 370	if (!uptodate) {
 371		if (!wdata->pnfs_error)
 372			wdata->pnfs_error = -EIO;
 373		bl_set_lo_fail(wdata->lseg);
 374	}
 375	bio_put(bio);
 376	put_parallel(par);
 377}
 378
 379/* This is basically copied from mpage_end_io_read */
 380static void bl_end_io_write(struct bio *bio, int err)
 381{
 382	struct parallel_io *par = bio->bi_private;
 383	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
 384	struct nfs_write_data *wdata = (struct nfs_write_data *)par->data;
 385
 386	if (!uptodate) {
 387		if (!wdata->pnfs_error)
 388			wdata->pnfs_error = -EIO;
 389		bl_set_lo_fail(wdata->lseg);
 390	}
 391	bio_put(bio);
 392	put_parallel(par);
 393}
 394
 395/* Function scheduled for call during bl_end_par_io_write,
 396 * it marks sectors as written and extends the commitlist.
 397 */
 398static void bl_write_cleanup(struct work_struct *work)
 399{
 400	struct rpc_task *task;
 401	struct nfs_write_data *wdata;
 
 
 402	dprintk("%s enter\n", __func__);
 403	task = container_of(work, struct rpc_task, u.tk_work);
 404	wdata = container_of(task, struct nfs_write_data, task);
 405	if (!wdata->pnfs_error) {
 406		/* Marks for LAYOUTCOMMIT */
 407		mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
 408				     wdata->args.offset, wdata->args.count);
 
 
 
 409	}
 410	pnfs_ld_write_done(wdata);
 
 411}
 412
 413/* Called when last of bios associated with a bl_write_pagelist call finishes */
 414static void bl_end_par_io_write(void *data)
 415{
 416	struct nfs_write_data *wdata = data;
 417
 418	wdata->task.tk_status = 0;
 419	wdata->verf.committed = NFS_FILE_SYNC;
 420	INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
 421	schedule_work(&wdata->task.u.tk_work);
 422}
 423
 424/* FIXME STUB - mark intersection of layout and page as bad, so is not
 425 * used again.
 426 */
 427static void mark_bad_read(void)
 428{
 429	return;
 430}
 431
 432/*
 433 * map_block:  map a requested I/0 block (isect) into an offset in the LVM
 434 * block_device
 435 */
 436static void
 437map_block(struct buffer_head *bh, sector_t isect, struct pnfs_block_extent *be)
 438{
 439	dprintk("%s enter be=%p\n", __func__, be);
 440
 441	set_buffer_mapped(bh);
 442	bh->b_bdev = be->be_mdev;
 443	bh->b_blocknr = (isect - be->be_f_offset + be->be_v_offset) >>
 444	    (be->be_mdev->bd_inode->i_blkbits - SECTOR_SHIFT);
 445
 446	dprintk("%s isect %llu, bh->b_blocknr %ld, using bsize %Zd\n",
 447		__func__, (unsigned long long)isect, (long)bh->b_blocknr,
 448		bh->b_size);
 449	return;
 450}
 451
 452/* Given an unmapped page, zero it or read in page for COW, page is locked
 453 * by caller.
 454 */
 455static int
 456init_page_for_write(struct page *page, struct pnfs_block_extent *cow_read)
 457{
 458	struct buffer_head *bh = NULL;
 459	int ret = 0;
 460	sector_t isect;
 461
 462	dprintk("%s enter, %p\n", __func__, page);
 463	BUG_ON(PageUptodate(page));
 464	if (!cow_read) {
 465		zero_user_segment(page, 0, PAGE_SIZE);
 466		SetPageUptodate(page);
 467		goto cleanup;
 468	}
 469
 470	bh = alloc_page_buffers(page, PAGE_CACHE_SIZE, 0);
 471	if (!bh) {
 472		ret = -ENOMEM;
 473		goto cleanup;
 474	}
 475
 476	isect = (sector_t) page->index << PAGE_CACHE_SECTOR_SHIFT;
 477	map_block(bh, isect, cow_read);
 478	if (!bh_uptodate_or_lock(bh))
 479		ret = bh_submit_read(bh);
 480	if (ret)
 481		goto cleanup;
 482	SetPageUptodate(page);
 483
 484cleanup:
 485	bl_put_extent(cow_read);
 486	if (bh)
 487		free_buffer_head(bh);
 488	if (ret) {
 489		/* Need to mark layout with bad read...should now
 490		 * just use nfs4 for reads and writes.
 491		 */
 492		mark_bad_read();
 493	}
 494	return ret;
 495}
 496
 497static enum pnfs_try_status
 498bl_write_pagelist(struct nfs_write_data *wdata, int sync)
 499{
 500	int i, ret, npg_zero, pg_index, last = 0;
 
 501	struct bio *bio = NULL;
 502	struct pnfs_block_extent *be = NULL, *cow_read = NULL;
 503	sector_t isect, last_isect = 0, extent_length = 0;
 504	struct parallel_io *par;
 505	loff_t offset = wdata->args.offset;
 506	size_t count = wdata->args.count;
 507	struct page **pages = wdata->args.pages;
 508	struct page *page;
 509	pgoff_t index;
 510	u64 temp;
 511	int npg_per_block =
 512	    NFS_SERVER(wdata->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT;
 513
 514	dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
 515	/* At this point, wdata->pages is a (sequential) list of nfs_pages.
 
 516	 * We want to write each, and if there is an error set pnfs_error
 517	 * to have it redone using nfs.
 518	 */
 519	par = alloc_parallel(wdata);
 520	if (!par)
 521		return PNFS_NOT_ATTEMPTED;
 522	par->call_ops = *wdata->mds_ops;
 523	par->call_ops.rpc_call_done = bl_rpc_do_nothing;
 524	par->pnfs_callback = bl_end_par_io_write;
 525	/* At this point, have to be more careful with error handling */
 526
 527	isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT);
 528	be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read);
 529	if (!be || !is_writable(be, isect)) {
 530		dprintk("%s no matching extents!\n", __func__);
 531		wdata->pnfs_error = -EINVAL;
 532		goto out;
 533	}
 534
 535	/* First page inside INVALID extent */
 536	if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
 537		temp = offset >> PAGE_CACHE_SHIFT;
 538		npg_zero = do_div(temp, npg_per_block);
 539		isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) &
 540				     (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT);
 541		extent_length = be->be_length - (isect - be->be_f_offset);
 542
 543fill_invalid_ext:
 544		dprintk("%s need to zero %d pages\n", __func__, npg_zero);
 545		for (;npg_zero > 0; npg_zero--) {
 546			/* page ref released in bl_end_io_write_zero */
 547			index = isect >> PAGE_CACHE_SECTOR_SHIFT;
 548			dprintk("%s zero %dth page: index %lu isect %llu\n",
 549				__func__, npg_zero, index,
 550				(unsigned long long)isect);
 551			page =
 552			    find_or_create_page(wdata->inode->i_mapping, index,
 553						GFP_NOFS);
 554			if (!page) {
 555				dprintk("%s oom\n", __func__);
 556				wdata->pnfs_error = -ENOMEM;
 557				goto out;
 558			}
 559
 560			/* PageDirty: Other will write this out
 561			 * PageWriteback: Other is writing this out
 562			 * PageUptodate: It was read before
 563			 * sector_initialized: already written out
 564			 */
 565			if (PageDirty(page) || PageWriteback(page) ||
 566			    bl_is_sector_init(be->be_inval, isect)) {
 567				print_page(page);
 568				unlock_page(page);
 569				page_cache_release(page);
 570				goto next_page;
 571			}
 572			if (!PageUptodate(page)) {
 573				/* New page, readin or zero it */
 574				init_page_for_write(page, cow_read);
 575			}
 576			set_page_writeback(page);
 577			unlock_page(page);
 578
 579			ret = bl_mark_sectors_init(be->be_inval, isect,
 580						       PAGE_CACHE_SECTORS,
 581						       NULL);
 582			if (unlikely(ret)) {
 583				dprintk("%s bl_mark_sectors_init fail %d\n",
 584					__func__, ret);
 585				end_page_writeback(page);
 586				page_cache_release(page);
 587				wdata->pnfs_error = ret;
 588				goto out;
 589			}
 590			bio = bl_add_page_to_bio(bio, npg_zero, WRITE,
 591						 isect, page, be,
 592						 bl_end_io_write_zero, par);
 593			if (IS_ERR(bio)) {
 594				wdata->pnfs_error = PTR_ERR(bio);
 595				goto out;
 596			}
 597			/* FIXME: This should be done in bi_end_io */
 598			mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
 599					     page->index << PAGE_CACHE_SHIFT,
 600					     PAGE_CACHE_SIZE);
 601next_page:
 602			isect += PAGE_CACHE_SECTORS;
 603			extent_length -= PAGE_CACHE_SECTORS;
 604		}
 605		if (last)
 606			goto write_done;
 607	}
 608	bio = bl_submit_bio(WRITE, bio);
 609
 610	/* Middle pages */
 611	pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT;
 612	for (i = pg_index; i < wdata->npages; i++) {
 613		if (!extent_length) {
 614			/* We've used up the previous extent */
 615			bl_put_extent(be);
 616			bio = bl_submit_bio(WRITE, bio);
 617			/* Get the next one */
 618			be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg),
 619					     isect, NULL);
 620			if (!be || !is_writable(be, isect)) {
 621				wdata->pnfs_error = -EINVAL;
 622				goto out;
 623			}
 624			extent_length = be->be_length -
 625			    (isect - be->be_f_offset);
 626		}
 627		if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
 628			ret = bl_mark_sectors_init(be->be_inval, isect,
 629						       PAGE_CACHE_SECTORS,
 630						       NULL);
 631			if (unlikely(ret)) {
 632				dprintk("%s bl_mark_sectors_init fail %d\n",
 633					__func__, ret);
 634				wdata->pnfs_error = ret;
 635				goto out;
 636			}
 637		}
 638		bio = bl_add_page_to_bio(bio, wdata->npages - i, WRITE,
 639					 isect, pages[i], be,
 640					 bl_end_io_write, par);
 641		if (IS_ERR(bio)) {
 642			wdata->pnfs_error = PTR_ERR(bio);
 
 643			goto out;
 644		}
 645		isect += PAGE_CACHE_SECTORS;
 646		last_isect = isect;
 647		extent_length -= PAGE_CACHE_SECTORS;
 648	}
 649
 650	/* Last page inside INVALID extent */
 651	if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
 652		bio = bl_submit_bio(WRITE, bio);
 653		temp = last_isect >> PAGE_CACHE_SECTOR_SHIFT;
 654		npg_zero = npg_per_block - do_div(temp, npg_per_block);
 655		if (npg_zero < npg_per_block) {
 656			last = 1;
 657			goto fill_invalid_ext;
 658		}
 659	}
 660
 661write_done:
 662	wdata->res.count = (last_isect << SECTOR_SHIFT) - (offset);
 663	if (count < wdata->res.count) {
 664		wdata->res.count = count;
 665	}
 666out:
 667	bl_put_extent(be);
 668	bl_submit_bio(WRITE, bio);
 
 669	put_parallel(par);
 670	return PNFS_ATTEMPTED;
 671}
 672
 673/* FIXME - range ignored */
 674static void
 675release_extents(struct pnfs_block_layout *bl, struct pnfs_layout_range *range)
 676{
 677	int i;
 678	struct pnfs_block_extent *be;
 679
 680	spin_lock(&bl->bl_ext_lock);
 681	for (i = 0; i < EXTENT_LISTS; i++) {
 682		while (!list_empty(&bl->bl_extents[i])) {
 683			be = list_first_entry(&bl->bl_extents[i],
 684					      struct pnfs_block_extent,
 685					      be_node);
 686			list_del(&be->be_node);
 687			bl_put_extent(be);
 688		}
 689	}
 690	spin_unlock(&bl->bl_ext_lock);
 691}
 692
 693static void
 694release_inval_marks(struct pnfs_inval_markings *marks)
 695{
 696	struct pnfs_inval_tracking *pos, *temp;
 697
 698	list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) {
 699		list_del(&pos->it_link);
 700		kfree(pos);
 701	}
 702	return;
 703}
 704
 705static void bl_free_layout_hdr(struct pnfs_layout_hdr *lo)
 706{
 707	struct pnfs_block_layout *bl = BLK_LO2EXT(lo);
 
 708
 709	dprintk("%s enter\n", __func__);
 710	release_extents(bl, NULL);
 711	release_inval_marks(&bl->bl_inval);
 
 
 712	kfree(bl);
 713}
 714
 715static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode,
 716						   gfp_t gfp_flags)
 717{
 718	struct pnfs_block_layout *bl;
 719
 720	dprintk("%s enter\n", __func__);
 721	bl = kzalloc(sizeof(*bl), gfp_flags);
 722	if (!bl)
 723		return NULL;
 
 
 
 724	spin_lock_init(&bl->bl_ext_lock);
 725	INIT_LIST_HEAD(&bl->bl_extents[0]);
 726	INIT_LIST_HEAD(&bl->bl_extents[1]);
 727	INIT_LIST_HEAD(&bl->bl_commit);
 728	INIT_LIST_HEAD(&bl->bl_committing);
 729	bl->bl_count = 0;
 730	bl->bl_blocksize = NFS_SERVER(inode)->pnfs_blksize >> SECTOR_SHIFT;
 731	BL_INIT_INVAL_MARKS(&bl->bl_inval, bl->bl_blocksize);
 732	return &bl->bl_layout;
 733}
 734
 
 
 
 
 
 
 
 
 
 
 
 
 735static void bl_free_lseg(struct pnfs_layout_segment *lseg)
 736{
 737	dprintk("%s enter\n", __func__);
 738	kfree(lseg);
 739}
 740
 741/* We pretty much ignore lseg, and store all data layout wide, so we
 742 * can correctly merge.
 
 
 
 
 
 
 
 
 743 */
 744static struct pnfs_layout_segment *bl_alloc_lseg(struct pnfs_layout_hdr *lo,
 745						 struct nfs4_layoutget_res *lgr,
 746						 gfp_t gfp_flags)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 747{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 748	struct pnfs_layout_segment *lseg;
 749	int status;
 
 
 
 
 
 
 750
 751	dprintk("%s enter\n", __func__);
 752	lseg = kzalloc(sizeof(*lseg), gfp_flags);
 
 753	if (!lseg)
 754		return ERR_PTR(-ENOMEM);
 755	status = nfs4_blk_process_layoutget(lo, lgr, gfp_flags);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 756	if (status) {
 757		/* We don't want to call the full-blown bl_free_lseg,
 758		 * since on error extents were not touched.
 759		 */
 760		kfree(lseg);
 761		return ERR_PTR(status);
 762	}
 763	return lseg;
 764}
 765
 766static void
 767bl_encode_layoutcommit(struct pnfs_layout_hdr *lo, struct xdr_stream *xdr,
 768		       const struct nfs4_layoutcommit_args *arg)
 769{
 770	dprintk("%s enter\n", __func__);
 771	encode_pnfs_block_layoutupdate(BLK_LO2EXT(lo), xdr, arg);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 772}
 773
 774static void
 775bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata)
 776{
 777	struct pnfs_layout_hdr *lo = NFS_I(lcdata->args.inode)->layout;
 
 778
 
 
 
 779	dprintk("%s enter\n", __func__);
 780	clean_pnfs_block_layoutupdate(BLK_LO2EXT(lo), &lcdata->args, lcdata->res.status);
 781}
 782
 783static void free_blk_mountid(struct block_mount_id *mid)
 784{
 785	if (mid) {
 786		struct pnfs_block_dev *dev;
 787		spin_lock(&mid->bm_lock);
 788		while (!list_empty(&mid->bm_devlist)) {
 789			dev = list_first_entry(&mid->bm_devlist,
 790					       struct pnfs_block_dev,
 791					       bm_node);
 792			list_del(&dev->bm_node);
 793			bl_free_block_dev(dev);
 794		}
 795		spin_unlock(&mid->bm_lock);
 796		kfree(mid);
 797	}
 
 
 798}
 799
 800/* This is mostly copied from the filelayout's get_device_info function.
 801 * It seems much of this should be at the generic pnfs level.
 802 */
 803static struct pnfs_block_dev *
 804nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
 805			struct nfs4_deviceid *d_id)
 806{
 807	struct pnfs_device *dev;
 808	struct pnfs_block_dev *rv = NULL;
 809	u32 max_resp_sz;
 810	int max_pages;
 811	struct page **pages = NULL;
 812	int i, rc;
 813
 814	/*
 815	 * Use the session max response size as the basis for setting
 816	 * GETDEVICEINFO's maxcount
 817	 */
 818	max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
 819	max_pages = max_resp_sz >> PAGE_SHIFT;
 820	dprintk("%s max_resp_sz %u max_pages %d\n",
 821		__func__, max_resp_sz, max_pages);
 822
 823	dev = kmalloc(sizeof(*dev), GFP_NOFS);
 824	if (!dev) {
 825		dprintk("%s kmalloc failed\n", __func__);
 826		return NULL;
 
 
 
 
 
 
 
 
 
 
 827	}
 828
 829	pages = kzalloc(max_pages * sizeof(struct page *), GFP_NOFS);
 830	if (pages == NULL) {
 831		kfree(dev);
 832		return NULL;
 
 
 
 
 
 833	}
 834	for (i = 0; i < max_pages; i++) {
 835		pages[i] = alloc_page(GFP_NOFS);
 836		if (!pages[i])
 837			goto out_free;
 838	}
 839
 840	memcpy(&dev->dev_id, d_id, sizeof(*d_id));
 841	dev->layout_type = LAYOUT_BLOCK_VOLUME;
 842	dev->pages = pages;
 843	dev->pgbase = 0;
 844	dev->pglen = PAGE_SIZE * max_pages;
 845	dev->mincount = 0;
 846
 847	dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data);
 848	rc = nfs4_proc_getdeviceinfo(server, dev);
 849	dprintk("%s getdevice info returns %d\n", __func__, rc);
 850	if (rc)
 851		goto out_free;
 852
 853	rv = nfs4_blk_decode_device(server, dev);
 854 out_free:
 855	for (i = 0; i < max_pages; i++)
 856		__free_page(pages[i]);
 857	kfree(pages);
 858	kfree(dev);
 859	return rv;
 860}
 861
 862static int
 863bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh)
 
 
 
 
 
 864{
 865	struct block_mount_id *b_mt_id = NULL;
 866	struct pnfs_devicelist *dlist = NULL;
 867	struct pnfs_block_dev *bdev;
 868	LIST_HEAD(block_disklist);
 869	int status = 0, i;
 870
 871	dprintk("%s enter\n", __func__);
 
 
 
 
 
 
 
 872
 873	if (server->pnfs_blksize == 0) {
 874		dprintk("%s Server did not return blksize\n", __func__);
 875		return -EINVAL;
 
 
 
 876	}
 877	b_mt_id = kzalloc(sizeof(struct block_mount_id), GFP_NOFS);
 878	if (!b_mt_id) {
 879		status = -ENOMEM;
 880		goto out_error;
 881	}
 882	/* Initialize nfs4 block layout mount id */
 883	spin_lock_init(&b_mt_id->bm_lock);
 884	INIT_LIST_HEAD(&b_mt_id->bm_devlist);
 885
 886	dlist = kmalloc(sizeof(struct pnfs_devicelist), GFP_NOFS);
 887	if (!dlist) {
 888		status = -ENOMEM;
 889		goto out_error;
 890	}
 891	dlist->eof = 0;
 892	while (!dlist->eof) {
 893		status = nfs4_proc_getdevicelist(server, fh, dlist);
 894		if (status)
 895			goto out_error;
 896		dprintk("%s GETDEVICELIST numdevs=%i, eof=%i\n",
 897			__func__, dlist->num_devs, dlist->eof);
 898		for (i = 0; i < dlist->num_devs; i++) {
 899			bdev = nfs4_blk_get_deviceinfo(server, fh,
 900						       &dlist->dev_id[i]);
 901			if (!bdev) {
 902				status = -ENODEV;
 903				goto out_error;
 904			}
 905			spin_lock(&b_mt_id->bm_lock);
 906			list_add(&bdev->bm_node, &b_mt_id->bm_devlist);
 907			spin_unlock(&b_mt_id->bm_lock);
 908		}
 909	}
 910	dprintk("%s SUCCESS\n", __func__);
 911	server->pnfs_ld_data = b_mt_id;
 912
 913 out_return:
 914	kfree(dlist);
 915	return status;
 916
 917 out_error:
 918	free_blk_mountid(b_mt_id);
 919	goto out_return;
 920}
 921
 922static int
 923bl_clear_layoutdriver(struct nfs_server *server)
 
 
 
 
 
 924{
 925	struct block_mount_id *b_mt_id = server->pnfs_ld_data;
 926
 927	dprintk("%s enter\n", __func__);
 928	free_blk_mountid(b_mt_id);
 929	dprintk("%s RETURNS\n", __func__);
 930	return 0;
 931}
 932
 933static const struct nfs_pageio_ops bl_pg_read_ops = {
 934	.pg_init = pnfs_generic_pg_init_read,
 935	.pg_test = pnfs_generic_pg_test,
 936	.pg_doio = pnfs_generic_pg_readpages,
 
 937};
 938
 939static const struct nfs_pageio_ops bl_pg_write_ops = {
 940	.pg_init = pnfs_generic_pg_init_write,
 941	.pg_test = pnfs_generic_pg_test,
 942	.pg_doio = pnfs_generic_pg_writepages,
 
 943};
 944
 945static struct pnfs_layoutdriver_type blocklayout_type = {
 946	.id				= LAYOUT_BLOCK_VOLUME,
 947	.name				= "LAYOUT_BLOCK_VOLUME",
 
 
 
 948	.read_pagelist			= bl_read_pagelist,
 949	.write_pagelist			= bl_write_pagelist,
 950	.alloc_layout_hdr		= bl_alloc_layout_hdr,
 951	.free_layout_hdr		= bl_free_layout_hdr,
 952	.alloc_lseg			= bl_alloc_lseg,
 953	.free_lseg			= bl_free_lseg,
 954	.encode_layoutcommit		= bl_encode_layoutcommit,
 
 955	.cleanup_layoutcommit		= bl_cleanup_layoutcommit,
 956	.set_layoutdriver		= bl_set_layoutdriver,
 957	.clear_layoutdriver		= bl_clear_layoutdriver,
 
 958	.pg_read_ops			= &bl_pg_read_ops,
 959	.pg_write_ops			= &bl_pg_write_ops,
 
 960};
 961
 962static const struct rpc_pipe_ops bl_upcall_ops = {
 963	.upcall		= bl_pipe_upcall,
 964	.downcall	= bl_pipe_downcall,
 965	.destroy_msg	= bl_pipe_destroy_msg,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 966};
 967
 
 968static int __init nfs4blocklayout_init(void)
 969{
 970	struct vfsmount *mnt;
 971	struct path path;
 972	int ret;
 973
 974	dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__);
 975
 976	ret = pnfs_register_layoutdriver(&blocklayout_type);
 977	if (ret)
 978		goto out;
 979
 980	init_waitqueue_head(&bl_wq);
 981
 982	mnt = rpc_get_mount();
 983	if (IS_ERR(mnt)) {
 984		ret = PTR_ERR(mnt);
 985		goto out_remove;
 986	}
 987
 988	ret = vfs_path_lookup(mnt->mnt_root,
 989			      mnt,
 990			      NFS_PIPE_DIRNAME, 0, &path);
 991	if (ret)
 992		goto out_remove;
 
 993
 994	bl_device_pipe = rpc_mkpipe(path.dentry, "blocklayout", NULL,
 995				    &bl_upcall_ops, 0);
 996	if (IS_ERR(bl_device_pipe)) {
 997		ret = PTR_ERR(bl_device_pipe);
 998		goto out_remove;
 999	}
1000out:
1001	return ret;
1002
1003out_remove:
1004	pnfs_unregister_layoutdriver(&blocklayout_type);
1005	return ret;
1006}
1007
1008static void __exit nfs4blocklayout_exit(void)
1009{
1010	dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n",
1011	       __func__);
1012
 
1013	pnfs_unregister_layoutdriver(&blocklayout_type);
1014	rpc_unlink(bl_device_pipe);
1015}
1016
1017MODULE_ALIAS("nfs-layouttype4-3");
1018
1019module_init(nfs4blocklayout_init);
1020module_exit(nfs4blocklayout_exit);