Linux Audio

Check our new training course

Loading...
v3.15
  1//#define DEBUG
  2#include <linux/spinlock.h>
  3#include <linux/slab.h>
  4#include <linux/blkdev.h>
  5#include <linux/hdreg.h>
  6#include <linux/module.h>
  7#include <linux/mutex.h>
  8#include <linux/virtio.h>
  9#include <linux/virtio_blk.h>
 10#include <linux/scatterlist.h>
 11#include <linux/string_helpers.h>
 12#include <scsi/scsi_cmnd.h>
 13#include <linux/idr.h>
 14#include <linux/blk-mq.h>
 15#include <linux/numa.h>
 16
 17#define PART_BITS 4
 
 18
 19static int major;
 20static DEFINE_IDA(vd_index_ida);
 21
 22static struct workqueue_struct *virtblk_wq;
 23
 24struct virtio_blk
 25{
 26	struct virtio_device *vdev;
 27	struct virtqueue *vq;
 28	spinlock_t vq_lock;
 
 
 
 
 
 29
 30	/* The disk structure for the kernel. */
 31	struct gendisk *disk;
 32
 
 
 
 33	/* Process context for config space updates */
 34	struct work_struct config_work;
 35
 36	/* Lock for config space updates */
 37	struct mutex config_lock;
 38
 39	/* enable config space updates */
 40	bool config_enable;
 41
 42	/* What host tells us, plus 2 for header & tailer. */
 43	unsigned int sg_elems;
 44
 45	/* Ida index - used to track minor number allocations. */
 46	int index;
 
 
 
 
 47};
 48
 49struct virtblk_req
 50{
 51	struct request *req;
 52	struct virtio_blk_outhdr out_hdr;
 53	struct virtio_scsi_inhdr in_hdr;
 54	u8 status;
 55	struct scatterlist sg[];
 56};
 57
 58static inline int virtblk_result(struct virtblk_req *vbr)
 59{
 60	switch (vbr->status) {
 61	case VIRTIO_BLK_S_OK:
 62		return 0;
 63	case VIRTIO_BLK_S_UNSUPP:
 64		return -ENOTTY;
 65	default:
 66		return -EIO;
 67	}
 68}
 69
 70static int __virtblk_add_req(struct virtqueue *vq,
 71			     struct virtblk_req *vbr,
 72			     struct scatterlist *data_sg,
 73			     bool have_data)
 74{
 75	struct scatterlist hdr, status, cmd, sense, inhdr, *sgs[6];
 76	unsigned int num_out = 0, num_in = 0;
 77	int type = vbr->out_hdr.type & ~VIRTIO_BLK_T_OUT;
 78
 79	sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr));
 80	sgs[num_out++] = &hdr;
 81
 82	/*
 83	 * If this is a packet command we need a couple of additional headers.
 84	 * Behind the normal outhdr we put a segment with the scsi command
 85	 * block, and before the normal inhdr we put the sense data and the
 86	 * inhdr with additional status information.
 87	 */
 88	if (type == VIRTIO_BLK_T_SCSI_CMD) {
 89		sg_init_one(&cmd, vbr->req->cmd, vbr->req->cmd_len);
 90		sgs[num_out++] = &cmd;
 91	}
 92
 93	if (have_data) {
 94		if (vbr->out_hdr.type & VIRTIO_BLK_T_OUT)
 95			sgs[num_out++] = data_sg;
 96		else
 97			sgs[num_out + num_in++] = data_sg;
 98	}
 99
100	if (type == VIRTIO_BLK_T_SCSI_CMD) {
101		sg_init_one(&sense, vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
102		sgs[num_out + num_in++] = &sense;
103		sg_init_one(&inhdr, &vbr->in_hdr, sizeof(vbr->in_hdr));
104		sgs[num_out + num_in++] = &inhdr;
105	}
106
107	sg_init_one(&status, &vbr->status, sizeof(vbr->status));
108	sgs[num_out + num_in++] = &status;
109
110	return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
111}
112
113static inline void virtblk_request_done(struct request *req)
114{
115	struct virtblk_req *vbr = req->special;
 
116	int error = virtblk_result(vbr);
117
118	if (req->cmd_type == REQ_TYPE_BLOCK_PC) {
119		req->resid_len = vbr->in_hdr.residual;
120		req->sense_len = vbr->in_hdr.sense_len;
121		req->errors = vbr->in_hdr.errors;
122	} else if (req->cmd_type == REQ_TYPE_SPECIAL) {
123		req->errors = (error != 0);
124	}
125
126	blk_mq_end_io(req, error);
127}
128
129static void virtblk_done(struct virtqueue *vq)
130{
131	struct virtio_blk *vblk = vq->vdev->priv;
132	bool req_done = false;
 
133	struct virtblk_req *vbr;
134	unsigned long flags;
135	unsigned int len;
136
137	spin_lock_irqsave(&vblk->vq_lock, flags);
138	do {
139		virtqueue_disable_cb(vq);
140		while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
141			blk_mq_complete_request(vbr->req);
142			req_done = true;
143		}
144		if (unlikely(virtqueue_is_broken(vq)))
145			break;
146	} while (!virtqueue_enable_cb(vq));
147
148	/* In case queue is stopped waiting for more buffers. */
149	if (req_done)
150		blk_mq_start_stopped_hw_queues(vblk->disk->queue);
151	spin_unlock_irqrestore(&vblk->vq_lock, flags);
152}
153
154static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
 
155{
156	struct virtio_blk *vblk = hctx->queue->queuedata;
157	struct virtblk_req *vbr = req->special;
 
158	unsigned long flags;
159	unsigned int num;
160	const bool last = (req->cmd_flags & REQ_END) != 0;
161	int err;
 
162
163	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
164
165	vbr->req = req;
166	if (req->cmd_flags & REQ_FLUSH) {
167		vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
168		vbr->out_hdr.sector = 0;
169		vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
170	} else {
171		switch (req->cmd_type) {
172		case REQ_TYPE_FS:
173			vbr->out_hdr.type = 0;
174			vbr->out_hdr.sector = blk_rq_pos(vbr->req);
175			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
176			break;
177		case REQ_TYPE_BLOCK_PC:
178			vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
179			vbr->out_hdr.sector = 0;
180			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
181			break;
182		case REQ_TYPE_SPECIAL:
183			vbr->out_hdr.type = VIRTIO_BLK_T_GET_ID;
184			vbr->out_hdr.sector = 0;
185			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
186			break;
187		default:
188			/* We don't put anything else in the queue. */
189			BUG();
190		}
191	}
192
 
 
193	num = blk_rq_map_sg(hctx->queue, vbr->req, vbr->sg);
194	if (num) {
195		if (rq_data_dir(vbr->req) == WRITE)
196			vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
197		else
198			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
199	}
200
201	spin_lock_irqsave(&vblk->vq_lock, flags);
202	err = __virtblk_add_req(vblk->vq, vbr, vbr->sg, num);
203	if (err) {
204		virtqueue_kick(vblk->vq);
205		blk_mq_stop_hw_queue(hctx);
206		spin_unlock_irqrestore(&vblk->vq_lock, flags);
207		/* Out of mem doesn't actually happen, since we fall back
208		 * to direct descriptors */
209		if (err == -ENOMEM || err == -ENOSPC)
210			return BLK_MQ_RQ_QUEUE_BUSY;
211		return BLK_MQ_RQ_QUEUE_ERROR;
212	}
213
214	if (last)
215		virtqueue_kick(vblk->vq);
 
216
217	spin_unlock_irqrestore(&vblk->vq_lock, flags);
 
218	return BLK_MQ_RQ_QUEUE_OK;
219}
220
221/* return id (s/n) string for *disk to *id_str
222 */
223static int virtblk_get_id(struct gendisk *disk, char *id_str)
224{
225	struct virtio_blk *vblk = disk->private_data;
226	struct request *req;
227	struct bio *bio;
228	int err;
229
230	bio = bio_map_kern(vblk->disk->queue, id_str, VIRTIO_BLK_ID_BYTES,
231			   GFP_KERNEL);
232	if (IS_ERR(bio))
233		return PTR_ERR(bio);
234
235	req = blk_make_request(vblk->disk->queue, bio, GFP_KERNEL);
236	if (IS_ERR(req)) {
237		bio_put(bio);
238		return PTR_ERR(req);
239	}
240
241	req->cmd_type = REQ_TYPE_SPECIAL;
242	err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
243	blk_put_request(req);
244
245	return err;
246}
247
248static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
249			     unsigned int cmd, unsigned long data)
250{
251	struct gendisk *disk = bdev->bd_disk;
252	struct virtio_blk *vblk = disk->private_data;
253
254	/*
255	 * Only allow the generic SCSI ioctls if the host can support it.
256	 */
257	if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI))
258		return -ENOTTY;
259
260	return scsi_cmd_blk_ioctl(bdev, mode, cmd,
261				  (void __user *)data);
262}
263
264/* We provide getgeo only to please some old bootloader/partitioning tools */
265static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
266{
267	struct virtio_blk *vblk = bd->bd_disk->private_data;
268
269	/* see if the host passed in geometry config */
270	if (virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_GEOMETRY)) {
271		virtio_cread(vblk->vdev, struct virtio_blk_config,
272			     geometry.cylinders, &geo->cylinders);
273		virtio_cread(vblk->vdev, struct virtio_blk_config,
274			     geometry.heads, &geo->heads);
275		virtio_cread(vblk->vdev, struct virtio_blk_config,
276			     geometry.sectors, &geo->sectors);
277	} else {
278		/* some standard values, similar to sd */
279		geo->heads = 1 << 6;
280		geo->sectors = 1 << 5;
281		geo->cylinders = get_capacity(bd->bd_disk) >> 11;
282	}
283	return 0;
284}
285
286static const struct block_device_operations virtblk_fops = {
287	.ioctl  = virtblk_ioctl,
288	.owner  = THIS_MODULE,
289	.getgeo = virtblk_getgeo,
290};
291
292static int index_to_minor(int index)
293{
294	return index << PART_BITS;
295}
296
297static int minor_to_index(int minor)
298{
299	return minor >> PART_BITS;
300}
301
302static ssize_t virtblk_serial_show(struct device *dev,
303				struct device_attribute *attr, char *buf)
304{
305	struct gendisk *disk = dev_to_disk(dev);
306	int err;
307
308	/* sysfs gives us a PAGE_SIZE buffer */
309	BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES);
310
311	buf[VIRTIO_BLK_ID_BYTES] = '\0';
312	err = virtblk_get_id(disk, buf);
313	if (!err)
314		return strlen(buf);
315
316	if (err == -EIO) /* Unsupported? Make it empty. */
317		return 0;
318
319	return err;
320}
321DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL);
 
322
323static void virtblk_config_changed_work(struct work_struct *work)
324{
325	struct virtio_blk *vblk =
326		container_of(work, struct virtio_blk, config_work);
327	struct virtio_device *vdev = vblk->vdev;
328	struct request_queue *q = vblk->disk->queue;
329	char cap_str_2[10], cap_str_10[10];
330	char *envp[] = { "RESIZE=1", NULL };
331	u64 capacity, size;
332
333	mutex_lock(&vblk->config_lock);
334	if (!vblk->config_enable)
335		goto done;
336
337	/* Host must always specify the capacity. */
338	virtio_cread(vdev, struct virtio_blk_config, capacity, &capacity);
339
340	/* If capacity is too big, truncate with warning. */
341	if ((sector_t)capacity != capacity) {
342		dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
343			 (unsigned long long)capacity);
344		capacity = (sector_t)-1;
345	}
346
347	size = capacity * queue_logical_block_size(q);
348	string_get_size(size, STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
349	string_get_size(size, STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
 
350
351	dev_notice(&vdev->dev,
352		  "new size: %llu %d-byte logical blocks (%s/%s)\n",
353		  (unsigned long long)capacity,
354		  queue_logical_block_size(q),
355		  cap_str_10, cap_str_2);
356
357	set_capacity(vblk->disk, capacity);
358	revalidate_disk(vblk->disk);
359	kobject_uevent_env(&disk_to_dev(vblk->disk)->kobj, KOBJ_CHANGE, envp);
360done:
361	mutex_unlock(&vblk->config_lock);
362}
363
364static void virtblk_config_changed(struct virtio_device *vdev)
365{
366	struct virtio_blk *vblk = vdev->priv;
367
368	queue_work(virtblk_wq, &vblk->config_work);
369}
370
371static int init_vq(struct virtio_blk *vblk)
372{
373	int err = 0;
 
 
 
 
 
 
374
375	/* We expect one virtqueue, for output. */
376	vblk->vq = virtio_find_single_vq(vblk->vdev, virtblk_done, "requests");
377	if (IS_ERR(vblk->vq))
378		err = PTR_ERR(vblk->vq);
 
379
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
380	return err;
381}
382
383/*
384 * Legacy naming scheme used for virtio devices.  We are stuck with it for
385 * virtio blk but don't ever use it for any new driver.
386 */
387static int virtblk_name_format(char *prefix, int index, char *buf, int buflen)
388{
389	const int base = 'z' - 'a' + 1;
390	char *begin = buf + strlen(prefix);
391	char *end = buf + buflen;
392	char *p;
393	int unit;
394
395	p = end - 1;
396	*p = '\0';
397	unit = base;
398	do {
399		if (p == begin)
400			return -EINVAL;
401		*--p = 'a' + (index % unit);
402		index = (index / unit) - 1;
403	} while (index >= 0);
404
405	memmove(begin, p, end - p);
406	memcpy(buf, prefix, strlen(prefix));
407
408	return 0;
409}
410
411static int virtblk_get_cache_mode(struct virtio_device *vdev)
412{
413	u8 writeback;
414	int err;
415
416	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE,
417				   struct virtio_blk_config, wce,
418				   &writeback);
 
 
 
 
 
419	if (err)
420		writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_WCE);
421
422	return writeback;
423}
424
425static void virtblk_update_cache_mode(struct virtio_device *vdev)
426{
427	u8 writeback = virtblk_get_cache_mode(vdev);
428	struct virtio_blk *vblk = vdev->priv;
429
430	if (writeback)
431		blk_queue_flush(vblk->disk->queue, REQ_FLUSH);
432	else
433		blk_queue_flush(vblk->disk->queue, 0);
434
435	revalidate_disk(vblk->disk);
436}
437
438static const char *const virtblk_cache_types[] = {
439	"write through", "write back"
440};
441
442static ssize_t
443virtblk_cache_type_store(struct device *dev, struct device_attribute *attr,
444			 const char *buf, size_t count)
445{
446	struct gendisk *disk = dev_to_disk(dev);
447	struct virtio_blk *vblk = disk->private_data;
448	struct virtio_device *vdev = vblk->vdev;
449	int i;
450
451	BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE));
452	for (i = ARRAY_SIZE(virtblk_cache_types); --i >= 0; )
453		if (sysfs_streq(buf, virtblk_cache_types[i]))
454			break;
455
456	if (i < 0)
457		return -EINVAL;
458
459	virtio_cwrite8(vdev, offsetof(struct virtio_blk_config, wce), i);
460	virtblk_update_cache_mode(vdev);
461	return count;
462}
463
464static ssize_t
465virtblk_cache_type_show(struct device *dev, struct device_attribute *attr,
466			 char *buf)
467{
468	struct gendisk *disk = dev_to_disk(dev);
469	struct virtio_blk *vblk = disk->private_data;
470	u8 writeback = virtblk_get_cache_mode(vblk->vdev);
471
472	BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types));
473	return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]);
474}
475
476static const struct device_attribute dev_attr_cache_type_ro =
477	__ATTR(cache_type, S_IRUGO,
478	       virtblk_cache_type_show, NULL);
479static const struct device_attribute dev_attr_cache_type_rw =
480	__ATTR(cache_type, S_IRUGO|S_IWUSR,
481	       virtblk_cache_type_show, virtblk_cache_type_store);
482
483static struct blk_mq_ops virtio_mq_ops = {
484	.queue_rq	= virtio_queue_rq,
485	.map_queue	= blk_mq_map_queue,
486	.alloc_hctx	= blk_mq_alloc_single_hw_queue,
487	.free_hctx	= blk_mq_free_single_hw_queue,
488	.complete	= virtblk_request_done,
489};
490
491static struct blk_mq_reg virtio_mq_reg = {
492	.ops		= &virtio_mq_ops,
493	.nr_hw_queues	= 1,
494	.queue_depth	= 0, /* Set in virtblk_probe */
495	.numa_node	= NUMA_NO_NODE,
496	.flags		= BLK_MQ_F_SHOULD_MERGE,
497};
498module_param_named(queue_depth, virtio_mq_reg.queue_depth, uint, 0444);
499
500static int virtblk_init_vbr(void *data, struct blk_mq_hw_ctx *hctx,
501			     struct request *rq, unsigned int nr)
502{
503	struct virtio_blk *vblk = data;
504	struct virtblk_req *vbr = rq->special;
505
506	sg_init_table(vbr->sg, vblk->sg_elems);
507	return 0;
508}
509
 
 
 
 
 
 
 
 
 
 
510static int virtblk_probe(struct virtio_device *vdev)
511{
512	struct virtio_blk *vblk;
513	struct request_queue *q;
514	int err, index;
515
516	u64 cap;
517	u32 v, blk_size, sg_elems, opt_io_size;
518	u16 min_io_size;
519	u8 physical_block_exp, alignment_offset;
520
 
 
 
 
 
 
521	err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS),
522			     GFP_KERNEL);
523	if (err < 0)
524		goto out;
525	index = err;
526
527	/* We need to know how many segments before we allocate. */
528	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SEG_MAX,
529				   struct virtio_blk_config, seg_max,
530				   &sg_elems);
531
532	/* We need at least one SG element, whatever they say. */
533	if (err || !sg_elems)
534		sg_elems = 1;
535
536	/* We need an extra sg elements at head and tail. */
537	sg_elems += 2;
538	vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
539	if (!vblk) {
540		err = -ENOMEM;
541		goto out_free_index;
542	}
543
544	vblk->vdev = vdev;
545	vblk->sg_elems = sg_elems;
546	mutex_init(&vblk->config_lock);
547
548	INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
549	vblk->config_enable = true;
550
551	err = init_vq(vblk);
552	if (err)
553		goto out_free_vblk;
554	spin_lock_init(&vblk->vq_lock);
555
556	/* FIXME: How many partitions?  How long is a piece of string? */
557	vblk->disk = alloc_disk(1 << PART_BITS);
558	if (!vblk->disk) {
559		err = -ENOMEM;
560		goto out_free_vq;
561	}
562
563	/* Default queue sizing is to fill the ring. */
564	if (!virtio_mq_reg.queue_depth) {
565		virtio_mq_reg.queue_depth = vblk->vq->num_free;
566		/* ... but without indirect descs, we use 2 descs per req */
567		if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC))
568			virtio_mq_reg.queue_depth /= 2;
569	}
570	virtio_mq_reg.cmd_size =
 
 
 
 
 
 
571		sizeof(struct virtblk_req) +
572		sizeof(struct scatterlist) * sg_elems;
 
 
573
574	q = vblk->disk->queue = blk_mq_init_queue(&virtio_mq_reg, vblk);
575	if (!q) {
576		err = -ENOMEM;
577		goto out_put_disk;
578	}
579
580	blk_mq_init_commands(q, virtblk_init_vbr, vblk);
 
 
 
 
581
582	q->queuedata = vblk;
583
584	virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
585
586	vblk->disk->major = major;
587	vblk->disk->first_minor = index_to_minor(index);
588	vblk->disk->private_data = vblk;
589	vblk->disk->fops = &virtblk_fops;
590	vblk->disk->driverfs_dev = &vdev->dev;
 
591	vblk->index = index;
592
593	/* configure queue flush support */
594	virtblk_update_cache_mode(vdev);
595
596	/* If disk is read-only in the host, the guest should obey */
597	if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
598		set_disk_ro(vblk->disk, 1);
599
600	/* Host must always specify the capacity. */
601	virtio_cread(vdev, struct virtio_blk_config, capacity, &cap);
602
603	/* If capacity is too big, truncate with warning. */
604	if ((sector_t)cap != cap) {
605		dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
606			 (unsigned long long)cap);
607		cap = (sector_t)-1;
608	}
609	set_capacity(vblk->disk, cap);
610
611	/* We can handle whatever the host told us to handle. */
612	blk_queue_max_segments(q, vblk->sg_elems-2);
613
614	/* No need to bounce any requests */
615	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
616
617	/* No real sector limit. */
618	blk_queue_max_hw_sectors(q, -1U);
619
620	/* Host can optionally specify maximum segment size and number of
621	 * segments. */
622	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SIZE_MAX,
623				   struct virtio_blk_config, size_max, &v);
624	if (!err)
625		blk_queue_max_segment_size(q, v);
626	else
627		blk_queue_max_segment_size(q, -1U);
628
629	/* Host can optionally specify the block size of the device */
630	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE,
631				   struct virtio_blk_config, blk_size,
632				   &blk_size);
633	if (!err)
634		blk_queue_logical_block_size(q, blk_size);
635	else
636		blk_size = queue_logical_block_size(q);
637
638	/* Use topology information if available */
639	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
640				   struct virtio_blk_config, physical_block_exp,
641				   &physical_block_exp);
642	if (!err && physical_block_exp)
643		blk_queue_physical_block_size(q,
644				blk_size * (1 << physical_block_exp));
645
646	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
647				   struct virtio_blk_config, alignment_offset,
648				   &alignment_offset);
649	if (!err && alignment_offset)
650		blk_queue_alignment_offset(q, blk_size * alignment_offset);
651
652	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
653				   struct virtio_blk_config, min_io_size,
654				   &min_io_size);
655	if (!err && min_io_size)
656		blk_queue_io_min(q, blk_size * min_io_size);
657
658	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
659				   struct virtio_blk_config, opt_io_size,
660				   &opt_io_size);
661	if (!err && opt_io_size)
662		blk_queue_io_opt(q, blk_size * opt_io_size);
663
 
 
664	add_disk(vblk->disk);
665	err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial);
666	if (err)
667		goto out_del_disk;
668
669	if (virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE))
670		err = device_create_file(disk_to_dev(vblk->disk),
671					 &dev_attr_cache_type_rw);
672	else
673		err = device_create_file(disk_to_dev(vblk->disk),
674					 &dev_attr_cache_type_ro);
675	if (err)
676		goto out_del_disk;
677	return 0;
678
679out_del_disk:
680	del_gendisk(vblk->disk);
681	blk_cleanup_queue(vblk->disk->queue);
 
 
682out_put_disk:
683	put_disk(vblk->disk);
684out_free_vq:
685	vdev->config->del_vqs(vdev);
686out_free_vblk:
687	kfree(vblk);
688out_free_index:
689	ida_simple_remove(&vd_index_ida, index);
690out:
691	return err;
692}
693
694static void virtblk_remove(struct virtio_device *vdev)
695{
696	struct virtio_blk *vblk = vdev->priv;
697	int index = vblk->index;
698	int refc;
699
700	/* Prevent config work handler from accessing the device. */
701	mutex_lock(&vblk->config_lock);
702	vblk->config_enable = false;
703	mutex_unlock(&vblk->config_lock);
704
705	del_gendisk(vblk->disk);
706	blk_cleanup_queue(vblk->disk->queue);
707
 
 
708	/* Stop all the virtqueues. */
709	vdev->config->reset(vdev);
710
711	flush_work(&vblk->config_work);
712
713	refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount);
714	put_disk(vblk->disk);
715	vdev->config->del_vqs(vdev);
 
716	kfree(vblk);
717
718	/* Only free device id if we don't have any users */
719	if (refc == 1)
720		ida_simple_remove(&vd_index_ida, index);
721}
722
723#ifdef CONFIG_PM_SLEEP
724static int virtblk_freeze(struct virtio_device *vdev)
725{
726	struct virtio_blk *vblk = vdev->priv;
727
728	/* Ensure we don't receive any more interrupts */
729	vdev->config->reset(vdev);
730
731	/* Prevent config work handler from accessing the device. */
732	mutex_lock(&vblk->config_lock);
733	vblk->config_enable = false;
734	mutex_unlock(&vblk->config_lock);
735
736	flush_work(&vblk->config_work);
737
738	blk_mq_stop_hw_queues(vblk->disk->queue);
739
740	vdev->config->del_vqs(vdev);
741	return 0;
742}
743
744static int virtblk_restore(struct virtio_device *vdev)
745{
746	struct virtio_blk *vblk = vdev->priv;
747	int ret;
748
749	vblk->config_enable = true;
750	ret = init_vq(vdev->priv);
751	if (!ret)
752		blk_mq_start_stopped_hw_queues(vblk->disk->queue);
753
754	return ret;
 
 
 
755}
756#endif
757
758static const struct virtio_device_id id_table[] = {
759	{ VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
760	{ 0 },
761};
762
763static unsigned int features[] = {
764	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
765	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI,
766	VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE
 
 
 
 
 
 
 
 
767};
768
769static struct virtio_driver virtio_blk = {
770	.feature_table		= features,
771	.feature_table_size	= ARRAY_SIZE(features),
772	.driver.name		= KBUILD_MODNAME,
773	.driver.owner		= THIS_MODULE,
774	.id_table		= id_table,
775	.probe			= virtblk_probe,
776	.remove			= virtblk_remove,
777	.config_changed		= virtblk_config_changed,
 
 
778#ifdef CONFIG_PM_SLEEP
779	.freeze			= virtblk_freeze,
780	.restore		= virtblk_restore,
781#endif
782};
783
784static int __init init(void)
785{
786	int error;
787
788	virtblk_wq = alloc_workqueue("virtio-blk", 0, 0);
789	if (!virtblk_wq)
790		return -ENOMEM;
791
792	major = register_blkdev(0, "virtblk");
793	if (major < 0) {
794		error = major;
795		goto out_destroy_workqueue;
796	}
797
798	error = register_virtio_driver(&virtio_blk);
799	if (error)
800		goto out_unregister_blkdev;
801	return 0;
802
803out_unregister_blkdev:
804	unregister_blkdev(major, "virtblk");
805out_destroy_workqueue:
806	destroy_workqueue(virtblk_wq);
807	return error;
808}
809
810static void __exit fini(void)
811{
812	unregister_blkdev(major, "virtblk");
813	unregister_virtio_driver(&virtio_blk);
 
814	destroy_workqueue(virtblk_wq);
815}
816module_init(init);
817module_exit(fini);
818
819MODULE_DEVICE_TABLE(virtio, id_table);
820MODULE_DESCRIPTION("Virtio block driver");
821MODULE_LICENSE("GPL");
v4.6
  1//#define DEBUG
  2#include <linux/spinlock.h>
  3#include <linux/slab.h>
  4#include <linux/blkdev.h>
  5#include <linux/hdreg.h>
  6#include <linux/module.h>
  7#include <linux/mutex.h>
  8#include <linux/virtio.h>
  9#include <linux/virtio_blk.h>
 10#include <linux/scatterlist.h>
 11#include <linux/string_helpers.h>
 12#include <scsi/scsi_cmnd.h>
 13#include <linux/idr.h>
 14#include <linux/blk-mq.h>
 15#include <linux/numa.h>
 16
 17#define PART_BITS 4
 18#define VQ_NAME_LEN 16
 19
 20static int major;
 21static DEFINE_IDA(vd_index_ida);
 22
 23static struct workqueue_struct *virtblk_wq;
 24
 25struct virtio_blk_vq {
 
 
 26	struct virtqueue *vq;
 27	spinlock_t lock;
 28	char name[VQ_NAME_LEN];
 29} ____cacheline_aligned_in_smp;
 30
 31struct virtio_blk {
 32	struct virtio_device *vdev;
 33
 34	/* The disk structure for the kernel. */
 35	struct gendisk *disk;
 36
 37	/* Block layer tags. */
 38	struct blk_mq_tag_set tag_set;
 39
 40	/* Process context for config space updates */
 41	struct work_struct config_work;
 42
 
 
 
 
 
 
 43	/* What host tells us, plus 2 for header & tailer. */
 44	unsigned int sg_elems;
 45
 46	/* Ida index - used to track minor number allocations. */
 47	int index;
 48
 49	/* num of vqs */
 50	int num_vqs;
 51	struct virtio_blk_vq *vqs;
 52};
 53
 54struct virtblk_req {
 
 55	struct request *req;
 56	struct virtio_blk_outhdr out_hdr;
 57	struct virtio_scsi_inhdr in_hdr;
 58	u8 status;
 59	struct scatterlist sg[];
 60};
 61
 62static inline int virtblk_result(struct virtblk_req *vbr)
 63{
 64	switch (vbr->status) {
 65	case VIRTIO_BLK_S_OK:
 66		return 0;
 67	case VIRTIO_BLK_S_UNSUPP:
 68		return -ENOTTY;
 69	default:
 70		return -EIO;
 71	}
 72}
 73
 74static int __virtblk_add_req(struct virtqueue *vq,
 75			     struct virtblk_req *vbr,
 76			     struct scatterlist *data_sg,
 77			     bool have_data)
 78{
 79	struct scatterlist hdr, status, cmd, sense, inhdr, *sgs[6];
 80	unsigned int num_out = 0, num_in = 0;
 81	__virtio32 type = vbr->out_hdr.type & ~cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT);
 82
 83	sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr));
 84	sgs[num_out++] = &hdr;
 85
 86	/*
 87	 * If this is a packet command we need a couple of additional headers.
 88	 * Behind the normal outhdr we put a segment with the scsi command
 89	 * block, and before the normal inhdr we put the sense data and the
 90	 * inhdr with additional status information.
 91	 */
 92	if (type == cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_SCSI_CMD)) {
 93		sg_init_one(&cmd, vbr->req->cmd, vbr->req->cmd_len);
 94		sgs[num_out++] = &cmd;
 95	}
 96
 97	if (have_data) {
 98		if (vbr->out_hdr.type & cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT))
 99			sgs[num_out++] = data_sg;
100		else
101			sgs[num_out + num_in++] = data_sg;
102	}
103
104	if (type == cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_SCSI_CMD)) {
105		sg_init_one(&sense, vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
106		sgs[num_out + num_in++] = &sense;
107		sg_init_one(&inhdr, &vbr->in_hdr, sizeof(vbr->in_hdr));
108		sgs[num_out + num_in++] = &inhdr;
109	}
110
111	sg_init_one(&status, &vbr->status, sizeof(vbr->status));
112	sgs[num_out + num_in++] = &status;
113
114	return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
115}
116
117static inline void virtblk_request_done(struct request *req)
118{
119	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
120	struct virtio_blk *vblk = req->q->queuedata;
121	int error = virtblk_result(vbr);
122
123	if (req->cmd_type == REQ_TYPE_BLOCK_PC) {
124		req->resid_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.residual);
125		req->sense_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.sense_len);
126		req->errors = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.errors);
127	} else if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
128		req->errors = (error != 0);
129	}
130
131	blk_mq_end_request(req, error);
132}
133
134static void virtblk_done(struct virtqueue *vq)
135{
136	struct virtio_blk *vblk = vq->vdev->priv;
137	bool req_done = false;
138	int qid = vq->index;
139	struct virtblk_req *vbr;
140	unsigned long flags;
141	unsigned int len;
142
143	spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
144	do {
145		virtqueue_disable_cb(vq);
146		while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) {
147			blk_mq_complete_request(vbr->req, vbr->req->errors);
148			req_done = true;
149		}
150		if (unlikely(virtqueue_is_broken(vq)))
151			break;
152	} while (!virtqueue_enable_cb(vq));
153
154	/* In case queue is stopped waiting for more buffers. */
155	if (req_done)
156		blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
157	spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
158}
159
160static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
161			   const struct blk_mq_queue_data *bd)
162{
163	struct virtio_blk *vblk = hctx->queue->queuedata;
164	struct request *req = bd->rq;
165	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
166	unsigned long flags;
167	unsigned int num;
168	int qid = hctx->queue_num;
169	int err;
170	bool notify = false;
171
172	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
173
174	vbr->req = req;
175	if (req->cmd_flags & REQ_FLUSH) {
176		vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_FLUSH);
177		vbr->out_hdr.sector = 0;
178		vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req));
179	} else {
180		switch (req->cmd_type) {
181		case REQ_TYPE_FS:
182			vbr->out_hdr.type = 0;
183			vbr->out_hdr.sector = cpu_to_virtio64(vblk->vdev, blk_rq_pos(vbr->req));
184			vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req));
185			break;
186		case REQ_TYPE_BLOCK_PC:
187			vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_SCSI_CMD);
188			vbr->out_hdr.sector = 0;
189			vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req));
190			break;
191		case REQ_TYPE_DRV_PRIV:
192			vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_GET_ID);
193			vbr->out_hdr.sector = 0;
194			vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req));
195			break;
196		default:
197			/* We don't put anything else in the queue. */
198			BUG();
199		}
200	}
201
202	blk_mq_start_request(req);
203
204	num = blk_rq_map_sg(hctx->queue, vbr->req, vbr->sg);
205	if (num) {
206		if (rq_data_dir(vbr->req) == WRITE)
207			vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT);
208		else
209			vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_IN);
210	}
211
212	spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
213	err = __virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num);
214	if (err) {
215		virtqueue_kick(vblk->vqs[qid].vq);
216		blk_mq_stop_hw_queue(hctx);
217		spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
218		/* Out of mem doesn't actually happen, since we fall back
219		 * to direct descriptors */
220		if (err == -ENOMEM || err == -ENOSPC)
221			return BLK_MQ_RQ_QUEUE_BUSY;
222		return BLK_MQ_RQ_QUEUE_ERROR;
223	}
224
225	if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
226		notify = true;
227	spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
228
229	if (notify)
230		virtqueue_notify(vblk->vqs[qid].vq);
231	return BLK_MQ_RQ_QUEUE_OK;
232}
233
234/* return id (s/n) string for *disk to *id_str
235 */
236static int virtblk_get_id(struct gendisk *disk, char *id_str)
237{
238	struct virtio_blk *vblk = disk->private_data;
239	struct request *req;
240	struct bio *bio;
241	int err;
242
243	bio = bio_map_kern(vblk->disk->queue, id_str, VIRTIO_BLK_ID_BYTES,
244			   GFP_KERNEL);
245	if (IS_ERR(bio))
246		return PTR_ERR(bio);
247
248	req = blk_make_request(vblk->disk->queue, bio, GFP_KERNEL);
249	if (IS_ERR(req)) {
250		bio_put(bio);
251		return PTR_ERR(req);
252	}
253
254	req->cmd_type = REQ_TYPE_DRV_PRIV;
255	err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
256	blk_put_request(req);
257
258	return err;
259}
260
261static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
262			     unsigned int cmd, unsigned long data)
263{
264	struct gendisk *disk = bdev->bd_disk;
265	struct virtio_blk *vblk = disk->private_data;
266
267	/*
268	 * Only allow the generic SCSI ioctls if the host can support it.
269	 */
270	if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI))
271		return -ENOTTY;
272
273	return scsi_cmd_blk_ioctl(bdev, mode, cmd,
274				  (void __user *)data);
275}
276
277/* We provide getgeo only to please some old bootloader/partitioning tools */
278static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
279{
280	struct virtio_blk *vblk = bd->bd_disk->private_data;
281
282	/* see if the host passed in geometry config */
283	if (virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_GEOMETRY)) {
284		virtio_cread(vblk->vdev, struct virtio_blk_config,
285			     geometry.cylinders, &geo->cylinders);
286		virtio_cread(vblk->vdev, struct virtio_blk_config,
287			     geometry.heads, &geo->heads);
288		virtio_cread(vblk->vdev, struct virtio_blk_config,
289			     geometry.sectors, &geo->sectors);
290	} else {
291		/* some standard values, similar to sd */
292		geo->heads = 1 << 6;
293		geo->sectors = 1 << 5;
294		geo->cylinders = get_capacity(bd->bd_disk) >> 11;
295	}
296	return 0;
297}
298
299static const struct block_device_operations virtblk_fops = {
300	.ioctl  = virtblk_ioctl,
301	.owner  = THIS_MODULE,
302	.getgeo = virtblk_getgeo,
303};
304
305static int index_to_minor(int index)
306{
307	return index << PART_BITS;
308}
309
310static int minor_to_index(int minor)
311{
312	return minor >> PART_BITS;
313}
314
315static ssize_t virtblk_serial_show(struct device *dev,
316				struct device_attribute *attr, char *buf)
317{
318	struct gendisk *disk = dev_to_disk(dev);
319	int err;
320
321	/* sysfs gives us a PAGE_SIZE buffer */
322	BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES);
323
324	buf[VIRTIO_BLK_ID_BYTES] = '\0';
325	err = virtblk_get_id(disk, buf);
326	if (!err)
327		return strlen(buf);
328
329	if (err == -EIO) /* Unsupported? Make it empty. */
330		return 0;
331
332	return err;
333}
334
335static DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL);
336
337static void virtblk_config_changed_work(struct work_struct *work)
338{
339	struct virtio_blk *vblk =
340		container_of(work, struct virtio_blk, config_work);
341	struct virtio_device *vdev = vblk->vdev;
342	struct request_queue *q = vblk->disk->queue;
343	char cap_str_2[10], cap_str_10[10];
344	char *envp[] = { "RESIZE=1", NULL };
345	u64 capacity;
 
 
 
 
346
347	/* Host must always specify the capacity. */
348	virtio_cread(vdev, struct virtio_blk_config, capacity, &capacity);
349
350	/* If capacity is too big, truncate with warning. */
351	if ((sector_t)capacity != capacity) {
352		dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
353			 (unsigned long long)capacity);
354		capacity = (sector_t)-1;
355	}
356
357	string_get_size(capacity, queue_logical_block_size(q),
358			STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
359	string_get_size(capacity, queue_logical_block_size(q),
360			STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
361
362	dev_notice(&vdev->dev,
363		  "new size: %llu %d-byte logical blocks (%s/%s)\n",
364		  (unsigned long long)capacity,
365		  queue_logical_block_size(q),
366		  cap_str_10, cap_str_2);
367
368	set_capacity(vblk->disk, capacity);
369	revalidate_disk(vblk->disk);
370	kobject_uevent_env(&disk_to_dev(vblk->disk)->kobj, KOBJ_CHANGE, envp);
 
 
371}
372
373static void virtblk_config_changed(struct virtio_device *vdev)
374{
375	struct virtio_blk *vblk = vdev->priv;
376
377	queue_work(virtblk_wq, &vblk->config_work);
378}
379
380static int init_vq(struct virtio_blk *vblk)
381{
382	int err = 0;
383	int i;
384	vq_callback_t **callbacks;
385	const char **names;
386	struct virtqueue **vqs;
387	unsigned short num_vqs;
388	struct virtio_device *vdev = vblk->vdev;
389
390	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_MQ,
391				   struct virtio_blk_config, num_queues,
392				   &num_vqs);
393	if (err)
394		num_vqs = 1;
395
396	vblk->vqs = kmalloc(sizeof(*vblk->vqs) * num_vqs, GFP_KERNEL);
397	if (!vblk->vqs) {
398		err = -ENOMEM;
399		goto out;
400	}
401
402	names = kmalloc(sizeof(*names) * num_vqs, GFP_KERNEL);
403	if (!names)
404		goto err_names;
405
406	callbacks = kmalloc(sizeof(*callbacks) * num_vqs, GFP_KERNEL);
407	if (!callbacks)
408		goto err_callbacks;
409
410	vqs = kmalloc(sizeof(*vqs) * num_vqs, GFP_KERNEL);
411	if (!vqs)
412		goto err_vqs;
413
414	for (i = 0; i < num_vqs; i++) {
415		callbacks[i] = virtblk_done;
416		snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req.%d", i);
417		names[i] = vblk->vqs[i].name;
418	}
419
420	/* Discover virtqueues and write information to configuration.  */
421	err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names);
422	if (err)
423		goto err_find_vqs;
424
425	for (i = 0; i < num_vqs; i++) {
426		spin_lock_init(&vblk->vqs[i].lock);
427		vblk->vqs[i].vq = vqs[i];
428	}
429	vblk->num_vqs = num_vqs;
430
431 err_find_vqs:
432	kfree(vqs);
433 err_vqs:
434	kfree(callbacks);
435 err_callbacks:
436	kfree(names);
437 err_names:
438	if (err)
439		kfree(vblk->vqs);
440 out:
441	return err;
442}
443
444/*
445 * Legacy naming scheme used for virtio devices.  We are stuck with it for
446 * virtio blk but don't ever use it for any new driver.
447 */
448static int virtblk_name_format(char *prefix, int index, char *buf, int buflen)
449{
450	const int base = 'z' - 'a' + 1;
451	char *begin = buf + strlen(prefix);
452	char *end = buf + buflen;
453	char *p;
454	int unit;
455
456	p = end - 1;
457	*p = '\0';
458	unit = base;
459	do {
460		if (p == begin)
461			return -EINVAL;
462		*--p = 'a' + (index % unit);
463		index = (index / unit) - 1;
464	} while (index >= 0);
465
466	memmove(begin, p, end - p);
467	memcpy(buf, prefix, strlen(prefix));
468
469	return 0;
470}
471
472static int virtblk_get_cache_mode(struct virtio_device *vdev)
473{
474	u8 writeback;
475	int err;
476
477	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE,
478				   struct virtio_blk_config, wce,
479				   &writeback);
480
481	/*
482	 * If WCE is not configurable and flush is not available,
483	 * assume no writeback cache is in use.
484	 */
485	if (err)
486		writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH);
487
488	return writeback;
489}
490
491static void virtblk_update_cache_mode(struct virtio_device *vdev)
492{
493	u8 writeback = virtblk_get_cache_mode(vdev);
494	struct virtio_blk *vblk = vdev->priv;
495
496	if (writeback)
497		blk_queue_flush(vblk->disk->queue, REQ_FLUSH);
498	else
499		blk_queue_flush(vblk->disk->queue, 0);
500
501	revalidate_disk(vblk->disk);
502}
503
504static const char *const virtblk_cache_types[] = {
505	"write through", "write back"
506};
507
508static ssize_t
509virtblk_cache_type_store(struct device *dev, struct device_attribute *attr,
510			 const char *buf, size_t count)
511{
512	struct gendisk *disk = dev_to_disk(dev);
513	struct virtio_blk *vblk = disk->private_data;
514	struct virtio_device *vdev = vblk->vdev;
515	int i;
516
517	BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE));
518	for (i = ARRAY_SIZE(virtblk_cache_types); --i >= 0; )
519		if (sysfs_streq(buf, virtblk_cache_types[i]))
520			break;
521
522	if (i < 0)
523		return -EINVAL;
524
525	virtio_cwrite8(vdev, offsetof(struct virtio_blk_config, wce), i);
526	virtblk_update_cache_mode(vdev);
527	return count;
528}
529
530static ssize_t
531virtblk_cache_type_show(struct device *dev, struct device_attribute *attr,
532			 char *buf)
533{
534	struct gendisk *disk = dev_to_disk(dev);
535	struct virtio_blk *vblk = disk->private_data;
536	u8 writeback = virtblk_get_cache_mode(vblk->vdev);
537
538	BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types));
539	return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]);
540}
541
542static const struct device_attribute dev_attr_cache_type_ro =
543	__ATTR(cache_type, S_IRUGO,
544	       virtblk_cache_type_show, NULL);
545static const struct device_attribute dev_attr_cache_type_rw =
546	__ATTR(cache_type, S_IRUGO|S_IWUSR,
547	       virtblk_cache_type_show, virtblk_cache_type_store);
548
549static int virtblk_init_request(void *data, struct request *rq,
550		unsigned int hctx_idx, unsigned int request_idx,
551		unsigned int numa_node)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
552{
553	struct virtio_blk *vblk = data;
554	struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq);
555
556	sg_init_table(vbr->sg, vblk->sg_elems);
557	return 0;
558}
559
560static struct blk_mq_ops virtio_mq_ops = {
561	.queue_rq	= virtio_queue_rq,
562	.map_queue	= blk_mq_map_queue,
563	.complete	= virtblk_request_done,
564	.init_request	= virtblk_init_request,
565};
566
567static unsigned int virtblk_queue_depth;
568module_param_named(queue_depth, virtblk_queue_depth, uint, 0444);
569
570static int virtblk_probe(struct virtio_device *vdev)
571{
572	struct virtio_blk *vblk;
573	struct request_queue *q;
574	int err, index;
575
576	u64 cap;
577	u32 v, blk_size, sg_elems, opt_io_size;
578	u16 min_io_size;
579	u8 physical_block_exp, alignment_offset;
580
581	if (!vdev->config->get) {
582		dev_err(&vdev->dev, "%s failure: config access disabled\n",
583			__func__);
584		return -EINVAL;
585	}
586
587	err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS),
588			     GFP_KERNEL);
589	if (err < 0)
590		goto out;
591	index = err;
592
593	/* We need to know how many segments before we allocate. */
594	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SEG_MAX,
595				   struct virtio_blk_config, seg_max,
596				   &sg_elems);
597
598	/* We need at least one SG element, whatever they say. */
599	if (err || !sg_elems)
600		sg_elems = 1;
601
602	/* We need an extra sg elements at head and tail. */
603	sg_elems += 2;
604	vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
605	if (!vblk) {
606		err = -ENOMEM;
607		goto out_free_index;
608	}
609
610	vblk->vdev = vdev;
611	vblk->sg_elems = sg_elems;
 
612
613	INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
 
614
615	err = init_vq(vblk);
616	if (err)
617		goto out_free_vblk;
 
618
619	/* FIXME: How many partitions?  How long is a piece of string? */
620	vblk->disk = alloc_disk(1 << PART_BITS);
621	if (!vblk->disk) {
622		err = -ENOMEM;
623		goto out_free_vq;
624	}
625
626	/* Default queue sizing is to fill the ring. */
627	if (!virtblk_queue_depth) {
628		virtblk_queue_depth = vblk->vqs[0].vq->num_free;
629		/* ... but without indirect descs, we use 2 descs per req */
630		if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC))
631			virtblk_queue_depth /= 2;
632	}
633
634	memset(&vblk->tag_set, 0, sizeof(vblk->tag_set));
635	vblk->tag_set.ops = &virtio_mq_ops;
636	vblk->tag_set.queue_depth = virtblk_queue_depth;
637	vblk->tag_set.numa_node = NUMA_NO_NODE;
638	vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
639	vblk->tag_set.cmd_size =
640		sizeof(struct virtblk_req) +
641		sizeof(struct scatterlist) * sg_elems;
642	vblk->tag_set.driver_data = vblk;
643	vblk->tag_set.nr_hw_queues = vblk->num_vqs;
644
645	err = blk_mq_alloc_tag_set(&vblk->tag_set);
646	if (err)
 
647		goto out_put_disk;
 
648
649	q = vblk->disk->queue = blk_mq_init_queue(&vblk->tag_set);
650	if (IS_ERR(q)) {
651		err = -ENOMEM;
652		goto out_free_tags;
653	}
654
655	q->queuedata = vblk;
656
657	virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
658
659	vblk->disk->major = major;
660	vblk->disk->first_minor = index_to_minor(index);
661	vblk->disk->private_data = vblk;
662	vblk->disk->fops = &virtblk_fops;
663	vblk->disk->driverfs_dev = &vdev->dev;
664	vblk->disk->flags |= GENHD_FL_EXT_DEVT;
665	vblk->index = index;
666
667	/* configure queue flush support */
668	virtblk_update_cache_mode(vdev);
669
670	/* If disk is read-only in the host, the guest should obey */
671	if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
672		set_disk_ro(vblk->disk, 1);
673
674	/* Host must always specify the capacity. */
675	virtio_cread(vdev, struct virtio_blk_config, capacity, &cap);
676
677	/* If capacity is too big, truncate with warning. */
678	if ((sector_t)cap != cap) {
679		dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
680			 (unsigned long long)cap);
681		cap = (sector_t)-1;
682	}
683	set_capacity(vblk->disk, cap);
684
685	/* We can handle whatever the host told us to handle. */
686	blk_queue_max_segments(q, vblk->sg_elems-2);
687
688	/* No need to bounce any requests */
689	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
690
691	/* No real sector limit. */
692	blk_queue_max_hw_sectors(q, -1U);
693
694	/* Host can optionally specify maximum segment size and number of
695	 * segments. */
696	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SIZE_MAX,
697				   struct virtio_blk_config, size_max, &v);
698	if (!err)
699		blk_queue_max_segment_size(q, v);
700	else
701		blk_queue_max_segment_size(q, -1U);
702
703	/* Host can optionally specify the block size of the device */
704	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE,
705				   struct virtio_blk_config, blk_size,
706				   &blk_size);
707	if (!err)
708		blk_queue_logical_block_size(q, blk_size);
709	else
710		blk_size = queue_logical_block_size(q);
711
712	/* Use topology information if available */
713	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
714				   struct virtio_blk_config, physical_block_exp,
715				   &physical_block_exp);
716	if (!err && physical_block_exp)
717		blk_queue_physical_block_size(q,
718				blk_size * (1 << physical_block_exp));
719
720	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
721				   struct virtio_blk_config, alignment_offset,
722				   &alignment_offset);
723	if (!err && alignment_offset)
724		blk_queue_alignment_offset(q, blk_size * alignment_offset);
725
726	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
727				   struct virtio_blk_config, min_io_size,
728				   &min_io_size);
729	if (!err && min_io_size)
730		blk_queue_io_min(q, blk_size * min_io_size);
731
732	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
733				   struct virtio_blk_config, opt_io_size,
734				   &opt_io_size);
735	if (!err && opt_io_size)
736		blk_queue_io_opt(q, blk_size * opt_io_size);
737
738	virtio_device_ready(vdev);
739
740	add_disk(vblk->disk);
741	err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial);
742	if (err)
743		goto out_del_disk;
744
745	if (virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE))
746		err = device_create_file(disk_to_dev(vblk->disk),
747					 &dev_attr_cache_type_rw);
748	else
749		err = device_create_file(disk_to_dev(vblk->disk),
750					 &dev_attr_cache_type_ro);
751	if (err)
752		goto out_del_disk;
753	return 0;
754
755out_del_disk:
756	del_gendisk(vblk->disk);
757	blk_cleanup_queue(vblk->disk->queue);
758out_free_tags:
759	blk_mq_free_tag_set(&vblk->tag_set);
760out_put_disk:
761	put_disk(vblk->disk);
762out_free_vq:
763	vdev->config->del_vqs(vdev);
764out_free_vblk:
765	kfree(vblk);
766out_free_index:
767	ida_simple_remove(&vd_index_ida, index);
768out:
769	return err;
770}
771
772static void virtblk_remove(struct virtio_device *vdev)
773{
774	struct virtio_blk *vblk = vdev->priv;
775	int index = vblk->index;
776	int refc;
777
778	/* Make sure no work handler is accessing the device. */
779	flush_work(&vblk->config_work);
 
 
780
781	del_gendisk(vblk->disk);
782	blk_cleanup_queue(vblk->disk->queue);
783
784	blk_mq_free_tag_set(&vblk->tag_set);
785
786	/* Stop all the virtqueues. */
787	vdev->config->reset(vdev);
788
 
 
789	refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount);
790	put_disk(vblk->disk);
791	vdev->config->del_vqs(vdev);
792	kfree(vblk->vqs);
793	kfree(vblk);
794
795	/* Only free device id if we don't have any users */
796	if (refc == 1)
797		ida_simple_remove(&vd_index_ida, index);
798}
799
800#ifdef CONFIG_PM_SLEEP
801static int virtblk_freeze(struct virtio_device *vdev)
802{
803	struct virtio_blk *vblk = vdev->priv;
804
805	/* Ensure we don't receive any more interrupts */
806	vdev->config->reset(vdev);
807
808	/* Make sure no work handler is accessing the device. */
 
 
 
 
809	flush_work(&vblk->config_work);
810
811	blk_mq_stop_hw_queues(vblk->disk->queue);
812
813	vdev->config->del_vqs(vdev);
814	return 0;
815}
816
817static int virtblk_restore(struct virtio_device *vdev)
818{
819	struct virtio_blk *vblk = vdev->priv;
820	int ret;
821
 
822	ret = init_vq(vdev->priv);
823	if (ret)
824		return ret;
825
826	virtio_device_ready(vdev);
827
828	blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
829	return 0;
830}
831#endif
832
833static const struct virtio_device_id id_table[] = {
834	{ VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
835	{ 0 },
836};
837
838static unsigned int features_legacy[] = {
839	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
840	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI,
841	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
842	VIRTIO_BLK_F_MQ,
843}
844;
845static unsigned int features[] = {
846	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
847	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
848	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
849	VIRTIO_BLK_F_MQ,
850};
851
852static struct virtio_driver virtio_blk = {
853	.feature_table			= features,
854	.feature_table_size		= ARRAY_SIZE(features),
855	.feature_table_legacy		= features_legacy,
856	.feature_table_size_legacy	= ARRAY_SIZE(features_legacy),
857	.driver.name			= KBUILD_MODNAME,
858	.driver.owner			= THIS_MODULE,
859	.id_table			= id_table,
860	.probe				= virtblk_probe,
861	.remove				= virtblk_remove,
862	.config_changed			= virtblk_config_changed,
863#ifdef CONFIG_PM_SLEEP
864	.freeze				= virtblk_freeze,
865	.restore			= virtblk_restore,
866#endif
867};
868
869static int __init init(void)
870{
871	int error;
872
873	virtblk_wq = alloc_workqueue("virtio-blk", 0, 0);
874	if (!virtblk_wq)
875		return -ENOMEM;
876
877	major = register_blkdev(0, "virtblk");
878	if (major < 0) {
879		error = major;
880		goto out_destroy_workqueue;
881	}
882
883	error = register_virtio_driver(&virtio_blk);
884	if (error)
885		goto out_unregister_blkdev;
886	return 0;
887
888out_unregister_blkdev:
889	unregister_blkdev(major, "virtblk");
890out_destroy_workqueue:
891	destroy_workqueue(virtblk_wq);
892	return error;
893}
894
895static void __exit fini(void)
896{
 
897	unregister_virtio_driver(&virtio_blk);
898	unregister_blkdev(major, "virtblk");
899	destroy_workqueue(virtblk_wq);
900}
901module_init(init);
902module_exit(fini);
903
904MODULE_DEVICE_TABLE(virtio, id_table);
905MODULE_DESCRIPTION("Virtio block driver");
906MODULE_LICENSE("GPL");