Linux Audio

Check our new training course

Loading...
v3.1
 
  1//#define DEBUG
  2#include <linux/spinlock.h>
  3#include <linux/slab.h>
  4#include <linux/blkdev.h>
  5#include <linux/hdreg.h>
 
 
 
  6#include <linux/virtio.h>
  7#include <linux/virtio_blk.h>
  8#include <linux/scatterlist.h>
  9#include <linux/string_helpers.h>
 10#include <scsi/scsi_cmnd.h>
 
 
 
 
 11
 12#define PART_BITS 4
 
 
 13
 14static int major, index;
 15struct workqueue_struct *virtblk_wq;
 16
 17struct virtio_blk
 18{
 
 
 
 
 
 19	spinlock_t lock;
 
 
 20
 
 
 
 
 
 
 
 
 
 
 21	struct virtio_device *vdev;
 22	struct virtqueue *vq;
 23
 24	/* The disk structure for the kernel. */
 25	struct gendisk *disk;
 26
 27	/* Request tracking. */
 28	struct list_head reqs;
 29
 30	mempool_t *pool;
 31
 32	/* Process context for config space updates */
 33	struct work_struct config_work;
 34
 
 
 
 
 
 
 
 35	/* What host tells us, plus 2 for header & tailer. */
 36	unsigned int sg_elems;
 37
 38	/* Scatterlist: can be too big for stack. */
 39	struct scatterlist sg[/*sg_elems*/];
 
 
 
 
 40};
 41
 42struct virtblk_req
 43{
 44	struct list_head list;
 45	struct request *req;
 46	struct virtio_blk_outhdr out_hdr;
 47	struct virtio_scsi_inhdr in_hdr;
 48	u8 status;
 
 49};
 50
 51static void blk_done(struct virtqueue *vq)
 52{
 53	struct virtio_blk *vblk = vq->vdev->priv;
 54	struct virtblk_req *vbr;
 55	unsigned int len;
 56	unsigned long flags;
 
 
 
 
 
 57
 58	spin_lock_irqsave(&vblk->lock, flags);
 59	while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
 60		int error;
 61
 62		switch (vbr->status) {
 63		case VIRTIO_BLK_S_OK:
 64			error = 0;
 65			break;
 66		case VIRTIO_BLK_S_UNSUPP:
 67			error = -ENOTTY;
 68			break;
 69		default:
 70			error = -EIO;
 71			break;
 72		}
 73
 74		switch (vbr->req->cmd_type) {
 75		case REQ_TYPE_BLOCK_PC:
 76			vbr->req->resid_len = vbr->in_hdr.residual;
 77			vbr->req->sense_len = vbr->in_hdr.sense_len;
 78			vbr->req->errors = vbr->in_hdr.errors;
 79			break;
 80		case REQ_TYPE_SPECIAL:
 81			vbr->req->errors = (error != 0);
 82			break;
 83		default:
 84			break;
 85		}
 86
 87		__blk_end_request_all(vbr->req, error);
 88		list_del(&vbr->list);
 89		mempool_free(vbr, vblk->pool);
 
 
 90	}
 91	/* In case queue is stopped waiting for more buffers. */
 92	blk_start_queue(vblk->disk->queue);
 93	spin_unlock_irqrestore(&vblk->lock, flags);
 
 
 94}
 95
 96static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
 97		   struct request *req)
 98{
 99	unsigned long num, out = 0, in = 0;
100	struct virtblk_req *vbr;
 
 
 
101
102	vbr = mempool_alloc(vblk->pool, GFP_ATOMIC);
103	if (!vbr)
104		/* When another request finishes we'll try again. */
105		return false;
106
107	vbr->req = req;
108
109	if (req->cmd_flags & REQ_FLUSH) {
110		vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
111		vbr->out_hdr.sector = 0;
112		vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
 
 
 
 
 
 
 
113	} else {
114		switch (req->cmd_type) {
115		case REQ_TYPE_FS:
116			vbr->out_hdr.type = 0;
117			vbr->out_hdr.sector = blk_rq_pos(vbr->req);
118			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
119			break;
120		case REQ_TYPE_BLOCK_PC:
121			vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
122			vbr->out_hdr.sector = 0;
123			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
124			break;
125		case REQ_TYPE_SPECIAL:
126			vbr->out_hdr.type = VIRTIO_BLK_T_GET_ID;
127			vbr->out_hdr.sector = 0;
128			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
129			break;
130		default:
131			/* We don't put anything else in the queue. */
132			BUG();
133		}
134	}
135
136	sg_set_buf(&vblk->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
137
138	/*
139	 * If this is a packet command we need a couple of additional headers.
140	 * Behind the normal outhdr we put a segment with the scsi command
141	 * block, and before the normal inhdr we put the sense data and the
142	 * inhdr with additional status information before the normal inhdr.
143	 */
144	if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC)
145		sg_set_buf(&vblk->sg[out++], vbr->req->cmd, vbr->req->cmd_len);
146
147	num = blk_rq_map_sg(q, vbr->req, vblk->sg + out);
 
148
149	if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) {
150		sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
151		sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr,
152			   sizeof(vbr->in_hdr));
 
 
 
153	}
154
155	sg_set_buf(&vblk->sg[num + out + in++], &vbr->status,
156		   sizeof(vbr->status));
157
158	if (num) {
159		if (rq_data_dir(vbr->req) == WRITE) {
160			vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
161			out += num;
162		} else {
163			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
164			in += num;
 
 
 
 
 
 
 
 
 
 
 
165		}
166	}
 
 
167
168	if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr) < 0) {
169		mempool_free(vbr, vblk->pool);
170		return false;
171	}
 
 
 
 
 
 
 
 
 
 
 
172
173	list_add_tail(&vbr->list, &vblk->reqs);
174	return true;
175}
176
177static void do_virtblk_request(struct request_queue *q)
 
178{
179	struct virtio_blk *vblk = q->queuedata;
180	struct request *req;
181	unsigned int issued = 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
183	while ((req = blk_peek_request(q)) != NULL) {
184		BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
 
 
 
 
 
 
 
 
 
 
185
186		/* If this request fails, stop queue and wait for something to
187		   finish to restart it. */
188		if (!do_req(q, vblk, req)) {
189			blk_stop_queue(q);
190			break;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191		}
192		blk_start_request(req);
193		issued++;
194	}
195
196	if (issued)
197		virtqueue_kick(vblk->vq);
 
 
 
 
 
198}
199
200/* return id (s/n) string for *disk to *id_str
201 */
202static int virtblk_get_id(struct gendisk *disk, char *id_str)
203{
204	struct virtio_blk *vblk = disk->private_data;
 
205	struct request *req;
206	struct bio *bio;
207	int err;
208
209	bio = bio_map_kern(vblk->disk->queue, id_str, VIRTIO_BLK_ID_BYTES,
210			   GFP_KERNEL);
211	if (IS_ERR(bio))
212		return PTR_ERR(bio);
213
214	req = blk_make_request(vblk->disk->queue, bio, GFP_KERNEL);
215	if (IS_ERR(req)) {
216		bio_put(bio);
217		return PTR_ERR(req);
218	}
219
220	req->cmd_type = REQ_TYPE_SPECIAL;
221	err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
222	blk_put_request(req);
223
 
 
 
 
224	return err;
225}
226
227static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
228			     unsigned int cmd, unsigned long data)
229{
230	struct gendisk *disk = bdev->bd_disk;
231	struct virtio_blk *vblk = disk->private_data;
232
233	/*
234	 * Only allow the generic SCSI ioctls if the host can support it.
235	 */
236	if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI))
237		return -ENOTTY;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
239	return scsi_cmd_ioctl(disk->queue, disk, mode, cmd,
240			      (void __user *)data);
 
 
 
241}
242
243/* We provide getgeo only to please some old bootloader/partitioning tools */
244static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
245{
246	struct virtio_blk *vblk = bd->bd_disk->private_data;
247	struct virtio_blk_geometry vgeo;
248	int err;
 
 
 
 
 
 
249
250	/* see if the host passed in geometry config */
251	err = virtio_config_val(vblk->vdev, VIRTIO_BLK_F_GEOMETRY,
252				offsetof(struct virtio_blk_config, geometry),
253				&vgeo);
254
255	if (!err) {
256		geo->heads = vgeo.heads;
257		geo->sectors = vgeo.sectors;
258		geo->cylinders = vgeo.cylinders;
259	} else {
260		/* some standard values, similar to sd */
261		geo->heads = 1 << 6;
262		geo->sectors = 1 << 5;
263		geo->cylinders = get_capacity(bd->bd_disk) >> 11;
264	}
265	return 0;
 
 
266}
267
268static const struct block_device_operations virtblk_fops = {
269	.ioctl  = virtblk_ioctl,
270	.owner  = THIS_MODULE,
 
 
271	.getgeo = virtblk_getgeo,
272};
273
274static int index_to_minor(int index)
275{
276	return index << PART_BITS;
277}
278
279static ssize_t virtblk_serial_show(struct device *dev,
280				struct device_attribute *attr, char *buf)
 
 
 
 
 
281{
282	struct gendisk *disk = dev_to_disk(dev);
283	int err;
284
285	/* sysfs gives us a PAGE_SIZE buffer */
286	BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES);
287
288	buf[VIRTIO_BLK_ID_BYTES] = '\0';
289	err = virtblk_get_id(disk, buf);
290	if (!err)
291		return strlen(buf);
292
293	if (err == -EIO) /* Unsupported? Make it empty. */
294		return 0;
295
296	return err;
297}
298DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL);
299
300static void virtblk_config_changed_work(struct work_struct *work)
 
 
 
301{
302	struct virtio_blk *vblk =
303		container_of(work, struct virtio_blk, config_work);
304	struct virtio_device *vdev = vblk->vdev;
305	struct request_queue *q = vblk->disk->queue;
306	char cap_str_2[10], cap_str_10[10];
307	u64 capacity, size;
 
308
309	/* Host must always specify the capacity. */
310	vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
311			  &capacity, sizeof(capacity));
312
313	/* If capacity is too big, truncate with warning. */
314	if ((sector_t)capacity != capacity) {
315		dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
316			 (unsigned long long)capacity);
317		capacity = (sector_t)-1;
318	}
319
320	size = capacity * queue_logical_block_size(q);
321	string_get_size(size, STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
322	string_get_size(size, STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
 
323
324	dev_notice(&vdev->dev,
325		  "new size: %llu %d-byte logical blocks (%s/%s)\n",
326		  (unsigned long long)capacity,
327		  queue_logical_block_size(q),
328		  cap_str_10, cap_str_2);
 
 
 
329
330	set_capacity(vblk->disk, capacity);
 
 
 
 
 
 
 
 
331}
332
333static void virtblk_config_changed(struct virtio_device *vdev)
334{
335	struct virtio_blk *vblk = vdev->priv;
336
337	queue_work(virtblk_wq, &vblk->config_work);
338}
339
340static int __devinit virtblk_probe(struct virtio_device *vdev)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
341{
342	struct virtio_blk *vblk;
343	struct request_queue *q;
344	int err;
345	u64 cap;
346	u32 v, blk_size, sg_elems, opt_io_size;
347	u16 min_io_size;
348	u8 physical_block_exp, alignment_offset;
 
349
350	if (index_to_minor(index) >= 1 << MINORBITS)
351		return -ENOSPC;
 
 
 
 
 
 
 
 
 
352
353	/* We need to know how many segments before we allocate. */
354	err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX,
355				offsetof(struct virtio_blk_config, seg_max),
356				&sg_elems);
357
358	/* We need at least one SG element, whatever they say. */
359	if (err || !sg_elems)
360		sg_elems = 1;
361
362	/* We need an extra sg elements at head and tail. */
 
 
 
363	sg_elems += 2;
364	vdev->priv = vblk = kmalloc(sizeof(*vblk) +
365				    sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL);
366	if (!vblk) {
367		err = -ENOMEM;
368		goto out;
369	}
370
371	INIT_LIST_HEAD(&vblk->reqs);
372	spin_lock_init(&vblk->lock);
 
 
373	vblk->vdev = vdev;
374	vblk->sg_elems = sg_elems;
375	sg_init_table(vblk->sg, vblk->sg_elems);
376	INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
377
378	/* We expect one virtqueue, for output. */
379	vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests");
380	if (IS_ERR(vblk->vq)) {
381		err = PTR_ERR(vblk->vq);
382		goto out_free_vblk;
383	}
384
385	vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req));
386	if (!vblk->pool) {
387		err = -ENOMEM;
388		goto out_free_vq;
389	}
390
391	/* FIXME: How many partitions?  How long is a piece of string? */
392	vblk->disk = alloc_disk(1 << PART_BITS);
393	if (!vblk->disk) {
394		err = -ENOMEM;
395		goto out_mempool;
 
 
 
396	}
397
398	q = vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock);
399	if (!q) {
400		err = -ENOMEM;
401		goto out_put_disk;
402	}
 
 
 
 
 
403
404	q->queuedata = vblk;
 
 
405
406	if (index < 26) {
407		sprintf(vblk->disk->disk_name, "vd%c", 'a' + index % 26);
408	} else if (index < (26 + 1) * 26) {
409		sprintf(vblk->disk->disk_name, "vd%c%c",
410			'a' + index / 26 - 1, 'a' + index % 26);
411	} else {
412		const unsigned int m1 = (index / 26 - 1) / 26 - 1;
413		const unsigned int m2 = (index / 26 - 1) % 26;
414		const unsigned int m3 =  index % 26;
415		sprintf(vblk->disk->disk_name, "vd%c%c%c",
416			'a' + m1, 'a' + m2, 'a' + m3);
417	}
 
 
 
418
419	vblk->disk->major = major;
420	vblk->disk->first_minor = index_to_minor(index);
 
421	vblk->disk->private_data = vblk;
422	vblk->disk->fops = &virtblk_fops;
423	vblk->disk->driverfs_dev = &vdev->dev;
424	index++;
425
426	/* configure queue flush support */
427	if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH))
428		blk_queue_flush(q, REQ_FLUSH);
429
430	/* If disk is read-only in the host, the guest should obey */
431	if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
432		set_disk_ro(vblk->disk, 1);
433
434	/* Host must always specify the capacity. */
435	vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
436			  &cap, sizeof(cap));
437
438	/* If capacity is too big, truncate with warning. */
439	if ((sector_t)cap != cap) {
440		dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
441			 (unsigned long long)cap);
442		cap = (sector_t)-1;
443	}
444	set_capacity(vblk->disk, cap);
445
446	/* We can handle whatever the host told us to handle. */
447	blk_queue_max_segments(q, vblk->sg_elems-2);
448
449	/* No need to bounce any requests */
450	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
451
452	/* No real sector limit. */
453	blk_queue_max_hw_sectors(q, -1U);
454
 
 
455	/* Host can optionally specify maximum segment size and number of
456	 * segments. */
457	err = virtio_config_val(vdev, VIRTIO_BLK_F_SIZE_MAX,
458				offsetof(struct virtio_blk_config, size_max),
459				&v);
460	if (!err)
461		blk_queue_max_segment_size(q, v);
462	else
463		blk_queue_max_segment_size(q, -1U);
464
465	/* Host can optionally specify the block size of the device */
466	err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE,
467				offsetof(struct virtio_blk_config, blk_size),
468				&blk_size);
469	if (!err)
470		blk_queue_logical_block_size(q, blk_size);
471	else
472		blk_size = queue_logical_block_size(q);
473
474	/* Use topology information if available */
475	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
476			offsetof(struct virtio_blk_config, physical_block_exp),
477			&physical_block_exp);
478	if (!err && physical_block_exp)
479		blk_queue_physical_block_size(q,
480				blk_size * (1 << physical_block_exp));
481
482	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
483			offsetof(struct virtio_blk_config, alignment_offset),
484			&alignment_offset);
485	if (!err && alignment_offset)
486		blk_queue_alignment_offset(q, blk_size * alignment_offset);
487
488	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
489			offsetof(struct virtio_blk_config, min_io_size),
490			&min_io_size);
491	if (!err && min_io_size)
492		blk_queue_io_min(q, blk_size * min_io_size);
493
494	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
495			offsetof(struct virtio_blk_config, opt_io_size),
496			&opt_io_size);
497	if (!err && opt_io_size)
498		blk_queue_io_opt(q, blk_size * opt_io_size);
499
 
 
500
501	add_disk(vblk->disk);
502	err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial);
503	if (err)
504		goto out_del_disk;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
505
 
506	return 0;
507
508out_del_disk:
509	del_gendisk(vblk->disk);
510	blk_cleanup_queue(vblk->disk->queue);
511out_put_disk:
512	put_disk(vblk->disk);
513out_mempool:
514	mempool_destroy(vblk->pool);
515out_free_vq:
516	vdev->config->del_vqs(vdev);
 
517out_free_vblk:
518	kfree(vblk);
 
 
519out:
520	return err;
521}
522
523static void __devexit virtblk_remove(struct virtio_device *vdev)
524{
525	struct virtio_blk *vblk = vdev->priv;
526
 
527	flush_work(&vblk->config_work);
528
529	/* Nothing should be pending. */
530	BUG_ON(!list_empty(&vblk->reqs));
 
 
 
531
532	/* Stop all the virtqueues. */
533	vdev->config->reset(vdev);
534
535	del_gendisk(vblk->disk);
536	blk_cleanup_queue(vblk->disk->queue);
537	put_disk(vblk->disk);
538	mempool_destroy(vblk->pool);
539	vdev->config->del_vqs(vdev);
540	kfree(vblk);
 
 
 
 
541}
542
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
543static const struct virtio_device_id id_table[] = {
544	{ VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
545	{ 0 },
546};
547
 
 
 
 
 
 
 
548static unsigned int features[] = {
549	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
550	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI,
551	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY
 
552};
553
554/*
555 * virtio_blk causes spurious section mismatch warning by
556 * simultaneously referring to a __devinit and a __devexit function.
557 * Use __refdata to avoid this warning.
558 */
559static struct virtio_driver __refdata virtio_blk = {
560	.feature_table		= features,
561	.feature_table_size	= ARRAY_SIZE(features),
562	.driver.name		= KBUILD_MODNAME,
563	.driver.owner		= THIS_MODULE,
564	.id_table		= id_table,
565	.probe			= virtblk_probe,
566	.remove			= __devexit_p(virtblk_remove),
567	.config_changed		= virtblk_config_changed,
 
568};
569
570static int __init init(void)
571{
572	int error;
573
574	virtblk_wq = alloc_workqueue("virtio-blk", 0, 0);
575	if (!virtblk_wq)
576		return -ENOMEM;
577
578	major = register_blkdev(0, "virtblk");
579	if (major < 0) {
580		error = major;
581		goto out_destroy_workqueue;
582	}
583
584	error = register_virtio_driver(&virtio_blk);
585	if (error)
586		goto out_unregister_blkdev;
587	return 0;
588
589out_unregister_blkdev:
590	unregister_blkdev(major, "virtblk");
591out_destroy_workqueue:
592	destroy_workqueue(virtblk_wq);
593	return error;
594}
595
596static void __exit fini(void)
597{
598	unregister_blkdev(major, "virtblk");
599	unregister_virtio_driver(&virtio_blk);
 
600	destroy_workqueue(virtblk_wq);
601}
602module_init(init);
603module_exit(fini);
604
605MODULE_DEVICE_TABLE(virtio, id_table);
606MODULE_DESCRIPTION("Virtio block driver");
607MODULE_LICENSE("GPL");
v5.14.15
   1// SPDX-License-Identifier: GPL-2.0-only
   2//#define DEBUG
   3#include <linux/spinlock.h>
   4#include <linux/slab.h>
   5#include <linux/blkdev.h>
   6#include <linux/hdreg.h>
   7#include <linux/module.h>
   8#include <linux/mutex.h>
   9#include <linux/interrupt.h>
  10#include <linux/virtio.h>
  11#include <linux/virtio_blk.h>
  12#include <linux/scatterlist.h>
  13#include <linux/string_helpers.h>
  14#include <linux/idr.h>
  15#include <linux/blk-mq.h>
  16#include <linux/blk-mq-virtio.h>
  17#include <linux/numa.h>
  18#include <uapi/linux/virtio_ring.h>
  19
  20#define PART_BITS 4
  21#define VQ_NAME_LEN 16
  22#define MAX_DISCARD_SEGMENTS 256u
  23
  24/* The maximum number of sg elements that fit into a virtqueue */
  25#define VIRTIO_BLK_MAX_SG_ELEMS 32768
  26
  27static int major;
  28static DEFINE_IDA(vd_index_ida);
  29
  30static struct workqueue_struct *virtblk_wq;
  31
  32struct virtio_blk_vq {
  33	struct virtqueue *vq;
  34	spinlock_t lock;
  35	char name[VQ_NAME_LEN];
  36} ____cacheline_aligned_in_smp;
  37
  38struct virtio_blk {
  39	/*
  40	 * This mutex must be held by anything that may run after
  41	 * virtblk_remove() sets vblk->vdev to NULL.
  42	 *
  43	 * blk-mq, virtqueue processing, and sysfs attribute code paths are
  44	 * shut down before vblk->vdev is set to NULL and therefore do not need
  45	 * to hold this mutex.
  46	 */
  47	struct mutex vdev_mutex;
  48	struct virtio_device *vdev;
 
  49
  50	/* The disk structure for the kernel. */
  51	struct gendisk *disk;
  52
  53	/* Block layer tags. */
  54	struct blk_mq_tag_set tag_set;
 
 
  55
  56	/* Process context for config space updates */
  57	struct work_struct config_work;
  58
  59	/*
  60	 * Tracks references from block_device_operations open/release and
  61	 * virtio_driver probe/remove so this object can be freed once no
  62	 * longer in use.
  63	 */
  64	refcount_t refs;
  65
  66	/* What host tells us, plus 2 for header & tailer. */
  67	unsigned int sg_elems;
  68
  69	/* Ida index - used to track minor number allocations. */
  70	int index;
  71
  72	/* num of vqs */
  73	int num_vqs;
  74	struct virtio_blk_vq *vqs;
  75};
  76
  77struct virtblk_req {
 
 
 
  78	struct virtio_blk_outhdr out_hdr;
 
  79	u8 status;
  80	struct scatterlist sg[];
  81};
  82
  83static inline blk_status_t virtblk_result(struct virtblk_req *vbr)
  84{
  85	switch (vbr->status) {
  86	case VIRTIO_BLK_S_OK:
  87		return BLK_STS_OK;
  88	case VIRTIO_BLK_S_UNSUPP:
  89		return BLK_STS_NOTSUPP;
  90	default:
  91		return BLK_STS_IOERR;
  92	}
  93}
  94
  95static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr,
  96		struct scatterlist *data_sg, bool have_data)
  97{
  98	struct scatterlist hdr, status, *sgs[3];
  99	unsigned int num_out = 0, num_in = 0;
 
 
 
 
 
 
 
 
 
 
 100
 101	sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr));
 102	sgs[num_out++] = &hdr;
 
 
 
 
 
 
 
 
 
 
 103
 104	if (have_data) {
 105		if (vbr->out_hdr.type & cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT))
 106			sgs[num_out++] = data_sg;
 107		else
 108			sgs[num_out + num_in++] = data_sg;
 109	}
 110
 111	sg_init_one(&status, &vbr->status, sizeof(vbr->status));
 112	sgs[num_out + num_in++] = &status;
 113
 114	return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
 115}
 116
 117static int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap)
 
 118{
 119	unsigned short segments = blk_rq_nr_discard_segments(req);
 120	unsigned short n = 0;
 121	struct virtio_blk_discard_write_zeroes *range;
 122	struct bio *bio;
 123	u32 flags = 0;
 124
 125	if (unmap)
 126		flags |= VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP;
 127
 128	range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
 129	if (!range)
 130		return -ENOMEM;
 131
 132	/*
 133	 * Single max discard segment means multi-range discard isn't
 134	 * supported, and block layer only runs contiguity merge like
 135	 * normal RW request. So we can't reply on bio for retrieving
 136	 * each range info.
 137	 */
 138	if (queue_max_discard_segments(req->q) == 1) {
 139		range[0].flags = cpu_to_le32(flags);
 140		range[0].num_sectors = cpu_to_le32(blk_rq_sectors(req));
 141		range[0].sector = cpu_to_le64(blk_rq_pos(req));
 142		n = 1;
 143	} else {
 144		__rq_for_each_bio(bio, req) {
 145			u64 sector = bio->bi_iter.bi_sector;
 146			u32 num_sectors = bio->bi_iter.bi_size >> SECTOR_SHIFT;
 147
 148			range[n].flags = cpu_to_le32(flags);
 149			range[n].num_sectors = cpu_to_le32(num_sectors);
 150			range[n].sector = cpu_to_le64(sector);
 151			n++;
 
 
 
 
 
 
 
 
 
 
 
 152		}
 153	}
 154
 155	WARN_ON_ONCE(n != segments);
 156
 157	req->special_vec.bv_page = virt_to_page(range);
 158	req->special_vec.bv_offset = offset_in_page(range);
 159	req->special_vec.bv_len = sizeof(*range) * segments;
 160	req->rq_flags |= RQF_SPECIAL_PAYLOAD;
 
 
 
 
 161
 162	return 0;
 163}
 164
 165static inline void virtblk_request_done(struct request *req)
 166{
 167	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
 168
 169	if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
 170		kfree(page_address(req->special_vec.bv_page) +
 171		      req->special_vec.bv_offset);
 172	}
 173
 174	blk_mq_end_request(req, virtblk_result(vbr));
 175}
 176
 177static void virtblk_done(struct virtqueue *vq)
 178{
 179	struct virtio_blk *vblk = vq->vdev->priv;
 180	bool req_done = false;
 181	int qid = vq->index;
 182	struct virtblk_req *vbr;
 183	unsigned long flags;
 184	unsigned int len;
 185
 186	spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
 187	do {
 188		virtqueue_disable_cb(vq);
 189		while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) {
 190			struct request *req = blk_mq_rq_from_pdu(vbr);
 191
 192			if (likely(!blk_should_fake_timeout(req->q)))
 193				blk_mq_complete_request(req);
 194			req_done = true;
 195		}
 196		if (unlikely(virtqueue_is_broken(vq)))
 197			break;
 198	} while (!virtqueue_enable_cb(vq));
 199
 200	/* In case queue is stopped waiting for more buffers. */
 201	if (req_done)
 202		blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
 203	spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
 204}
 205
 206static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx)
 207{
 208	struct virtio_blk *vblk = hctx->queue->queuedata;
 209	struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num];
 210	bool kick;
 211
 212	spin_lock_irq(&vq->lock);
 213	kick = virtqueue_kick_prepare(vq->vq);
 214	spin_unlock_irq(&vq->lock);
 215
 216	if (kick)
 217		virtqueue_notify(vq->vq);
 218}
 219
 220static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 221			   const struct blk_mq_queue_data *bd)
 222{
 223	struct virtio_blk *vblk = hctx->queue->queuedata;
 224	struct request *req = bd->rq;
 225	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
 226	unsigned long flags;
 227	unsigned int num;
 228	int qid = hctx->queue_num;
 229	int err;
 230	bool notify = false;
 231	bool unmap = false;
 232	u32 type;
 233
 234	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
 235
 236	switch (req_op(req)) {
 237	case REQ_OP_READ:
 238	case REQ_OP_WRITE:
 239		type = 0;
 240		break;
 241	case REQ_OP_FLUSH:
 242		type = VIRTIO_BLK_T_FLUSH;
 243		break;
 244	case REQ_OP_DISCARD:
 245		type = VIRTIO_BLK_T_DISCARD;
 246		break;
 247	case REQ_OP_WRITE_ZEROES:
 248		type = VIRTIO_BLK_T_WRITE_ZEROES;
 249		unmap = !(req->cmd_flags & REQ_NOUNMAP);
 250		break;
 251	case REQ_OP_DRV_IN:
 252		type = VIRTIO_BLK_T_GET_ID;
 253		break;
 254	default:
 255		WARN_ON_ONCE(1);
 256		return BLK_STS_IOERR;
 257	}
 258
 259	vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type);
 260	vbr->out_hdr.sector = type ?
 261		0 : cpu_to_virtio64(vblk->vdev, blk_rq_pos(req));
 262	vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(req));
 263
 264	blk_mq_start_request(req);
 265
 266	if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) {
 267		err = virtblk_setup_discard_write_zeroes(req, unmap);
 268		if (err)
 269			return BLK_STS_RESOURCE;
 270	}
 271
 272	num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
 273	if (num) {
 274		if (rq_data_dir(req) == WRITE)
 275			vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT);
 276		else
 277			vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_IN);
 278	}
 279
 280	spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
 281	err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num);
 282	if (err) {
 283		virtqueue_kick(vblk->vqs[qid].vq);
 284		/* Don't stop the queue if -ENOMEM: we may have failed to
 285		 * bounce the buffer due to global resource outage.
 286		 */
 287		if (err == -ENOSPC)
 288			blk_mq_stop_hw_queue(hctx);
 289		spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
 290		switch (err) {
 291		case -ENOSPC:
 292			return BLK_STS_DEV_RESOURCE;
 293		case -ENOMEM:
 294			return BLK_STS_RESOURCE;
 295		default:
 296			return BLK_STS_IOERR;
 297		}
 
 
 298	}
 299
 300	if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
 301		notify = true;
 302	spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
 303
 304	if (notify)
 305		virtqueue_notify(vblk->vqs[qid].vq);
 306	return BLK_STS_OK;
 307}
 308
 309/* return id (s/n) string for *disk to *id_str
 310 */
 311static int virtblk_get_id(struct gendisk *disk, char *id_str)
 312{
 313	struct virtio_blk *vblk = disk->private_data;
 314	struct request_queue *q = vblk->disk->queue;
 315	struct request *req;
 
 316	int err;
 317
 318	req = blk_get_request(q, REQ_OP_DRV_IN, 0);
 319	if (IS_ERR(req))
 
 
 
 
 
 
 320		return PTR_ERR(req);
 
 321
 322	err = blk_rq_map_kern(q, req, id_str, VIRTIO_BLK_ID_BYTES, GFP_KERNEL);
 323	if (err)
 324		goto out;
 325
 326	blk_execute_rq(vblk->disk, req, false);
 327	err = blk_status_to_errno(virtblk_result(blk_mq_rq_to_pdu(req)));
 328out:
 329	blk_put_request(req);
 330	return err;
 331}
 332
 333static void virtblk_get(struct virtio_blk *vblk)
 
 334{
 335	refcount_inc(&vblk->refs);
 336}
 337
 338static void virtblk_put(struct virtio_blk *vblk)
 339{
 340	if (refcount_dec_and_test(&vblk->refs)) {
 341		ida_simple_remove(&vd_index_ida, vblk->index);
 342		mutex_destroy(&vblk->vdev_mutex);
 343		kfree(vblk);
 344	}
 345}
 346
 347static int virtblk_open(struct block_device *bd, fmode_t mode)
 348{
 349	struct virtio_blk *vblk = bd->bd_disk->private_data;
 350	int ret = 0;
 351
 352	mutex_lock(&vblk->vdev_mutex);
 353
 354	if (vblk->vdev)
 355		virtblk_get(vblk);
 356	else
 357		ret = -ENXIO;
 358
 359	mutex_unlock(&vblk->vdev_mutex);
 360	return ret;
 361}
 362
 363static void virtblk_release(struct gendisk *disk, fmode_t mode)
 364{
 365	struct virtio_blk *vblk = disk->private_data;
 366
 367	virtblk_put(vblk);
 368}
 369
 370/* We provide getgeo only to please some old bootloader/partitioning tools */
 371static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
 372{
 373	struct virtio_blk *vblk = bd->bd_disk->private_data;
 374	int ret = 0;
 375
 376	mutex_lock(&vblk->vdev_mutex);
 377
 378	if (!vblk->vdev) {
 379		ret = -ENXIO;
 380		goto out;
 381	}
 382
 383	/* see if the host passed in geometry config */
 384	if (virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_GEOMETRY)) {
 385		virtio_cread(vblk->vdev, struct virtio_blk_config,
 386			     geometry.cylinders, &geo->cylinders);
 387		virtio_cread(vblk->vdev, struct virtio_blk_config,
 388			     geometry.heads, &geo->heads);
 389		virtio_cread(vblk->vdev, struct virtio_blk_config,
 390			     geometry.sectors, &geo->sectors);
 
 391	} else {
 392		/* some standard values, similar to sd */
 393		geo->heads = 1 << 6;
 394		geo->sectors = 1 << 5;
 395		geo->cylinders = get_capacity(bd->bd_disk) >> 11;
 396	}
 397out:
 398	mutex_unlock(&vblk->vdev_mutex);
 399	return ret;
 400}
 401
 402static const struct block_device_operations virtblk_fops = {
 
 403	.owner  = THIS_MODULE,
 404	.open = virtblk_open,
 405	.release = virtblk_release,
 406	.getgeo = virtblk_getgeo,
 407};
 408
 409static int index_to_minor(int index)
 410{
 411	return index << PART_BITS;
 412}
 413
 414static int minor_to_index(int minor)
 415{
 416	return minor >> PART_BITS;
 417}
 418
 419static ssize_t serial_show(struct device *dev,
 420			   struct device_attribute *attr, char *buf)
 421{
 422	struct gendisk *disk = dev_to_disk(dev);
 423	int err;
 424
 425	/* sysfs gives us a PAGE_SIZE buffer */
 426	BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES);
 427
 428	buf[VIRTIO_BLK_ID_BYTES] = '\0';
 429	err = virtblk_get_id(disk, buf);
 430	if (!err)
 431		return strlen(buf);
 432
 433	if (err == -EIO) /* Unsupported? Make it empty. */
 434		return 0;
 435
 436	return err;
 437}
 
 438
 439static DEVICE_ATTR_RO(serial);
 440
 441/* The queue's logical block size must be set before calling this */
 442static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize)
 443{
 
 
 444	struct virtio_device *vdev = vblk->vdev;
 445	struct request_queue *q = vblk->disk->queue;
 446	char cap_str_2[10], cap_str_10[10];
 447	unsigned long long nblocks;
 448	u64 capacity;
 449
 450	/* Host must always specify the capacity. */
 451	virtio_cread(vdev, struct virtio_blk_config, capacity, &capacity);
 
 452
 453	nblocks = DIV_ROUND_UP_ULL(capacity, queue_logical_block_size(q) >> 9);
 
 
 
 
 
 454
 455	string_get_size(nblocks, queue_logical_block_size(q),
 456			STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
 457	string_get_size(nblocks, queue_logical_block_size(q),
 458			STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
 459
 460	dev_notice(&vdev->dev,
 461		   "[%s] %s%llu %d-byte logical blocks (%s/%s)\n",
 462		   vblk->disk->disk_name,
 463		   resize ? "new size: " : "",
 464		   nblocks,
 465		   queue_logical_block_size(q),
 466		   cap_str_10,
 467		   cap_str_2);
 468
 469	set_capacity_and_notify(vblk->disk, capacity);
 470}
 471
 472static void virtblk_config_changed_work(struct work_struct *work)
 473{
 474	struct virtio_blk *vblk =
 475		container_of(work, struct virtio_blk, config_work);
 476
 477	virtblk_update_capacity(vblk, true);
 478}
 479
 480static void virtblk_config_changed(struct virtio_device *vdev)
 481{
 482	struct virtio_blk *vblk = vdev->priv;
 483
 484	queue_work(virtblk_wq, &vblk->config_work);
 485}
 486
 487static int init_vq(struct virtio_blk *vblk)
 488{
 489	int err;
 490	int i;
 491	vq_callback_t **callbacks;
 492	const char **names;
 493	struct virtqueue **vqs;
 494	unsigned short num_vqs;
 495	struct virtio_device *vdev = vblk->vdev;
 496	struct irq_affinity desc = { 0, };
 497
 498	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_MQ,
 499				   struct virtio_blk_config, num_queues,
 500				   &num_vqs);
 501	if (err)
 502		num_vqs = 1;
 503
 504	num_vqs = min_t(unsigned int, nr_cpu_ids, num_vqs);
 505
 506	vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), GFP_KERNEL);
 507	if (!vblk->vqs)
 508		return -ENOMEM;
 509
 510	names = kmalloc_array(num_vqs, sizeof(*names), GFP_KERNEL);
 511	callbacks = kmalloc_array(num_vqs, sizeof(*callbacks), GFP_KERNEL);
 512	vqs = kmalloc_array(num_vqs, sizeof(*vqs), GFP_KERNEL);
 513	if (!names || !callbacks || !vqs) {
 514		err = -ENOMEM;
 515		goto out;
 516	}
 517
 518	for (i = 0; i < num_vqs; i++) {
 519		callbacks[i] = virtblk_done;
 520		snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req.%d", i);
 521		names[i] = vblk->vqs[i].name;
 522	}
 523
 524	/* Discover virtqueues and write information to configuration.  */
 525	err = virtio_find_vqs(vdev, num_vqs, vqs, callbacks, names, &desc);
 526	if (err)
 527		goto out;
 528
 529	for (i = 0; i < num_vqs; i++) {
 530		spin_lock_init(&vblk->vqs[i].lock);
 531		vblk->vqs[i].vq = vqs[i];
 532	}
 533	vblk->num_vqs = num_vqs;
 534
 535out:
 536	kfree(vqs);
 537	kfree(callbacks);
 538	kfree(names);
 539	if (err)
 540		kfree(vblk->vqs);
 541	return err;
 542}
 543
 544/*
 545 * Legacy naming scheme used for virtio devices.  We are stuck with it for
 546 * virtio blk but don't ever use it for any new driver.
 547 */
 548static int virtblk_name_format(char *prefix, int index, char *buf, int buflen)
 549{
 550	const int base = 'z' - 'a' + 1;
 551	char *begin = buf + strlen(prefix);
 552	char *end = buf + buflen;
 553	char *p;
 554	int unit;
 555
 556	p = end - 1;
 557	*p = '\0';
 558	unit = base;
 559	do {
 560		if (p == begin)
 561			return -EINVAL;
 562		*--p = 'a' + (index % unit);
 563		index = (index / unit) - 1;
 564	} while (index >= 0);
 565
 566	memmove(begin, p, end - p);
 567	memcpy(buf, prefix, strlen(prefix));
 568
 569	return 0;
 570}
 571
 572static int virtblk_get_cache_mode(struct virtio_device *vdev)
 573{
 574	u8 writeback;
 575	int err;
 576
 577	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE,
 578				   struct virtio_blk_config, wce,
 579				   &writeback);
 580
 581	/*
 582	 * If WCE is not configurable and flush is not available,
 583	 * assume no writeback cache is in use.
 584	 */
 585	if (err)
 586		writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH);
 587
 588	return writeback;
 589}
 590
 591static void virtblk_update_cache_mode(struct virtio_device *vdev)
 592{
 593	u8 writeback = virtblk_get_cache_mode(vdev);
 594	struct virtio_blk *vblk = vdev->priv;
 595
 596	blk_queue_write_cache(vblk->disk->queue, writeback, false);
 597}
 598
 599static const char *const virtblk_cache_types[] = {
 600	"write through", "write back"
 601};
 602
 603static ssize_t
 604cache_type_store(struct device *dev, struct device_attribute *attr,
 605		 const char *buf, size_t count)
 606{
 607	struct gendisk *disk = dev_to_disk(dev);
 608	struct virtio_blk *vblk = disk->private_data;
 609	struct virtio_device *vdev = vblk->vdev;
 610	int i;
 611
 612	BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE));
 613	i = sysfs_match_string(virtblk_cache_types, buf);
 614	if (i < 0)
 615		return i;
 616
 617	virtio_cwrite8(vdev, offsetof(struct virtio_blk_config, wce), i);
 618	virtblk_update_cache_mode(vdev);
 619	return count;
 620}
 621
 622static ssize_t
 623cache_type_show(struct device *dev, struct device_attribute *attr, char *buf)
 624{
 625	struct gendisk *disk = dev_to_disk(dev);
 626	struct virtio_blk *vblk = disk->private_data;
 627	u8 writeback = virtblk_get_cache_mode(vblk->vdev);
 628
 629	BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types));
 630	return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]);
 631}
 632
 633static DEVICE_ATTR_RW(cache_type);
 634
 635static struct attribute *virtblk_attrs[] = {
 636	&dev_attr_serial.attr,
 637	&dev_attr_cache_type.attr,
 638	NULL,
 639};
 640
 641static umode_t virtblk_attrs_are_visible(struct kobject *kobj,
 642		struct attribute *a, int n)
 643{
 644	struct device *dev = kobj_to_dev(kobj);
 645	struct gendisk *disk = dev_to_disk(dev);
 646	struct virtio_blk *vblk = disk->private_data;
 647	struct virtio_device *vdev = vblk->vdev;
 648
 649	if (a == &dev_attr_cache_type.attr &&
 650	    !virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE))
 651		return S_IRUGO;
 652
 653	return a->mode;
 654}
 655
 656static const struct attribute_group virtblk_attr_group = {
 657	.attrs = virtblk_attrs,
 658	.is_visible = virtblk_attrs_are_visible,
 659};
 660
 661static const struct attribute_group *virtblk_attr_groups[] = {
 662	&virtblk_attr_group,
 663	NULL,
 664};
 665
 666static int virtblk_init_request(struct blk_mq_tag_set *set, struct request *rq,
 667		unsigned int hctx_idx, unsigned int numa_node)
 668{
 669	struct virtio_blk *vblk = set->driver_data;
 670	struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq);
 671
 672	sg_init_table(vbr->sg, vblk->sg_elems);
 673	return 0;
 674}
 675
 676static int virtblk_map_queues(struct blk_mq_tag_set *set)
 677{
 678	struct virtio_blk *vblk = set->driver_data;
 679
 680	return blk_mq_virtio_map_queues(&set->map[HCTX_TYPE_DEFAULT],
 681					vblk->vdev, 0);
 682}
 683
 684static const struct blk_mq_ops virtio_mq_ops = {
 685	.queue_rq	= virtio_queue_rq,
 686	.commit_rqs	= virtio_commit_rqs,
 687	.complete	= virtblk_request_done,
 688	.init_request	= virtblk_init_request,
 689	.map_queues	= virtblk_map_queues,
 690};
 691
 692static unsigned int virtblk_queue_depth;
 693module_param_named(queue_depth, virtblk_queue_depth, uint, 0444);
 694
 695static int virtblk_probe(struct virtio_device *vdev)
 696{
 697	struct virtio_blk *vblk;
 698	struct request_queue *q;
 699	int err, index;
 700
 701	u32 v, blk_size, max_size, sg_elems, opt_io_size;
 702	u16 min_io_size;
 703	u8 physical_block_exp, alignment_offset;
 704	unsigned int queue_depth;
 705
 706	if (!vdev->config->get) {
 707		dev_err(&vdev->dev, "%s failure: config access disabled\n",
 708			__func__);
 709		return -EINVAL;
 710	}
 711
 712	err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS),
 713			     GFP_KERNEL);
 714	if (err < 0)
 715		goto out;
 716	index = err;
 717
 718	/* We need to know how many segments before we allocate. */
 719	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SEG_MAX,
 720				   struct virtio_blk_config, seg_max,
 721				   &sg_elems);
 722
 723	/* We need at least one SG element, whatever they say. */
 724	if (err || !sg_elems)
 725		sg_elems = 1;
 726
 727	/* Prevent integer overflows and honor max vq size */
 728	sg_elems = min_t(u32, sg_elems, VIRTIO_BLK_MAX_SG_ELEMS - 2);
 729
 730	/* We need extra sg elements at head and tail. */
 731	sg_elems += 2;
 732	vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
 
 733	if (!vblk) {
 734		err = -ENOMEM;
 735		goto out_free_index;
 736	}
 737
 738	/* This reference is dropped in virtblk_remove(). */
 739	refcount_set(&vblk->refs, 1);
 740	mutex_init(&vblk->vdev_mutex);
 741
 742	vblk->vdev = vdev;
 743	vblk->sg_elems = sg_elems;
 744
 745	INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
 746
 747	err = init_vq(vblk);
 748	if (err)
 
 
 749		goto out_free_vblk;
 
 
 
 
 
 
 
 750
 751	/* Default queue sizing is to fill the ring. */
 752	if (!virtblk_queue_depth) {
 753		queue_depth = vblk->vqs[0].vq->num_free;
 754		/* ... but without indirect descs, we use 2 descs per req */
 755		if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC))
 756			queue_depth /= 2;
 757	} else {
 758		queue_depth = virtblk_queue_depth;
 759	}
 760
 761	memset(&vblk->tag_set, 0, sizeof(vblk->tag_set));
 762	vblk->tag_set.ops = &virtio_mq_ops;
 763	vblk->tag_set.queue_depth = queue_depth;
 764	vblk->tag_set.numa_node = NUMA_NO_NODE;
 765	vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
 766	vblk->tag_set.cmd_size =
 767		sizeof(struct virtblk_req) +
 768		sizeof(struct scatterlist) * sg_elems;
 769	vblk->tag_set.driver_data = vblk;
 770	vblk->tag_set.nr_hw_queues = vblk->num_vqs;
 771
 772	err = blk_mq_alloc_tag_set(&vblk->tag_set);
 773	if (err)
 774		goto out_free_vq;
 775
 776	vblk->disk = blk_mq_alloc_disk(&vblk->tag_set, vblk);
 777	if (IS_ERR(vblk->disk)) {
 778		err = PTR_ERR(vblk->disk);
 779		goto out_free_tags;
 
 
 
 
 
 
 
 780	}
 781	q = vblk->disk->queue;
 782
 783	virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
 784
 785	vblk->disk->major = major;
 786	vblk->disk->first_minor = index_to_minor(index);
 787	vblk->disk->minors = 1 << PART_BITS;
 788	vblk->disk->private_data = vblk;
 789	vblk->disk->fops = &virtblk_fops;
 790	vblk->disk->flags |= GENHD_FL_EXT_DEVT;
 791	vblk->index = index;
 792
 793	/* configure queue flush support */
 794	virtblk_update_cache_mode(vdev);
 
 795
 796	/* If disk is read-only in the host, the guest should obey */
 797	if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
 798		set_disk_ro(vblk->disk, 1);
 799
 
 
 
 
 
 
 
 
 
 
 
 
 800	/* We can handle whatever the host told us to handle. */
 801	blk_queue_max_segments(q, vblk->sg_elems-2);
 802
 
 
 
 803	/* No real sector limit. */
 804	blk_queue_max_hw_sectors(q, -1U);
 805
 806	max_size = virtio_max_dma_size(vdev);
 807
 808	/* Host can optionally specify maximum segment size and number of
 809	 * segments. */
 810	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SIZE_MAX,
 811				   struct virtio_blk_config, size_max, &v);
 
 812	if (!err)
 813		max_size = min(max_size, v);
 814
 815	blk_queue_max_segment_size(q, max_size);
 816
 817	/* Host can optionally specify the block size of the device */
 818	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE,
 819				   struct virtio_blk_config, blk_size,
 820				   &blk_size);
 821	if (!err)
 822		blk_queue_logical_block_size(q, blk_size);
 823	else
 824		blk_size = queue_logical_block_size(q);
 825
 826	/* Use topology information if available */
 827	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
 828				   struct virtio_blk_config, physical_block_exp,
 829				   &physical_block_exp);
 830	if (!err && physical_block_exp)
 831		blk_queue_physical_block_size(q,
 832				blk_size * (1 << physical_block_exp));
 833
 834	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
 835				   struct virtio_blk_config, alignment_offset,
 836				   &alignment_offset);
 837	if (!err && alignment_offset)
 838		blk_queue_alignment_offset(q, blk_size * alignment_offset);
 839
 840	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
 841				   struct virtio_blk_config, min_io_size,
 842				   &min_io_size);
 843	if (!err && min_io_size)
 844		blk_queue_io_min(q, blk_size * min_io_size);
 845
 846	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
 847				   struct virtio_blk_config, opt_io_size,
 848				   &opt_io_size);
 849	if (!err && opt_io_size)
 850		blk_queue_io_opt(q, blk_size * opt_io_size);
 851
 852	if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
 853		q->limits.discard_granularity = blk_size;
 854
 855		virtio_cread(vdev, struct virtio_blk_config,
 856			     discard_sector_alignment, &v);
 857		q->limits.discard_alignment = v ? v << SECTOR_SHIFT : 0;
 858
 859		virtio_cread(vdev, struct virtio_blk_config,
 860			     max_discard_sectors, &v);
 861		blk_queue_max_discard_sectors(q, v ? v : UINT_MAX);
 862
 863		virtio_cread(vdev, struct virtio_blk_config, max_discard_seg,
 864			     &v);
 865		blk_queue_max_discard_segments(q,
 866					       min_not_zero(v,
 867							    MAX_DISCARD_SEGMENTS));
 868
 869		blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
 870	}
 871
 872	if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
 873		virtio_cread(vdev, struct virtio_blk_config,
 874			     max_write_zeroes_sectors, &v);
 875		blk_queue_max_write_zeroes_sectors(q, v ? v : UINT_MAX);
 876	}
 877
 878	virtblk_update_capacity(vblk, false);
 879	virtio_device_ready(vdev);
 880
 881	device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups);
 882	return 0;
 883
 884out_free_tags:
 885	blk_mq_free_tag_set(&vblk->tag_set);
 
 
 
 
 
 886out_free_vq:
 887	vdev->config->del_vqs(vdev);
 888	kfree(vblk->vqs);
 889out_free_vblk:
 890	kfree(vblk);
 891out_free_index:
 892	ida_simple_remove(&vd_index_ida, index);
 893out:
 894	return err;
 895}
 896
 897static void virtblk_remove(struct virtio_device *vdev)
 898{
 899	struct virtio_blk *vblk = vdev->priv;
 900
 901	/* Make sure no work handler is accessing the device. */
 902	flush_work(&vblk->config_work);
 903
 904	del_gendisk(vblk->disk);
 905	blk_cleanup_disk(vblk->disk);
 906	blk_mq_free_tag_set(&vblk->tag_set);
 907
 908	mutex_lock(&vblk->vdev_mutex);
 909
 910	/* Stop all the virtqueues. */
 911	vdev->config->reset(vdev);
 912
 913	/* Virtqueues are stopped, nothing can use vblk->vdev anymore. */
 914	vblk->vdev = NULL;
 915
 
 916	vdev->config->del_vqs(vdev);
 917	kfree(vblk->vqs);
 918
 919	mutex_unlock(&vblk->vdev_mutex);
 920
 921	virtblk_put(vblk);
 922}
 923
 924#ifdef CONFIG_PM_SLEEP
 925static int virtblk_freeze(struct virtio_device *vdev)
 926{
 927	struct virtio_blk *vblk = vdev->priv;
 928
 929	/* Ensure we don't receive any more interrupts */
 930	vdev->config->reset(vdev);
 931
 932	/* Make sure no work handler is accessing the device. */
 933	flush_work(&vblk->config_work);
 934
 935	blk_mq_quiesce_queue(vblk->disk->queue);
 936
 937	vdev->config->del_vqs(vdev);
 938	kfree(vblk->vqs);
 939
 940	return 0;
 941}
 942
 943static int virtblk_restore(struct virtio_device *vdev)
 944{
 945	struct virtio_blk *vblk = vdev->priv;
 946	int ret;
 947
 948	ret = init_vq(vdev->priv);
 949	if (ret)
 950		return ret;
 951
 952	virtio_device_ready(vdev);
 953
 954	blk_mq_unquiesce_queue(vblk->disk->queue);
 955	return 0;
 956}
 957#endif
 958
 959static const struct virtio_device_id id_table[] = {
 960	{ VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
 961	{ 0 },
 962};
 963
 964static unsigned int features_legacy[] = {
 965	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
 966	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
 967	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
 968	VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
 969}
 970;
 971static unsigned int features[] = {
 972	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
 973	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
 974	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
 975	VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
 976};
 977
 978static struct virtio_driver virtio_blk = {
 979	.feature_table			= features,
 980	.feature_table_size		= ARRAY_SIZE(features),
 981	.feature_table_legacy		= features_legacy,
 982	.feature_table_size_legacy	= ARRAY_SIZE(features_legacy),
 983	.driver.name			= KBUILD_MODNAME,
 984	.driver.owner			= THIS_MODULE,
 985	.id_table			= id_table,
 986	.probe				= virtblk_probe,
 987	.remove				= virtblk_remove,
 988	.config_changed			= virtblk_config_changed,
 989#ifdef CONFIG_PM_SLEEP
 990	.freeze				= virtblk_freeze,
 991	.restore			= virtblk_restore,
 992#endif
 993};
 994
 995static int __init init(void)
 996{
 997	int error;
 998
 999	virtblk_wq = alloc_workqueue("virtio-blk", 0, 0);
1000	if (!virtblk_wq)
1001		return -ENOMEM;
1002
1003	major = register_blkdev(0, "virtblk");
1004	if (major < 0) {
1005		error = major;
1006		goto out_destroy_workqueue;
1007	}
1008
1009	error = register_virtio_driver(&virtio_blk);
1010	if (error)
1011		goto out_unregister_blkdev;
1012	return 0;
1013
1014out_unregister_blkdev:
1015	unregister_blkdev(major, "virtblk");
1016out_destroy_workqueue:
1017	destroy_workqueue(virtblk_wq);
1018	return error;
1019}
1020
1021static void __exit fini(void)
1022{
 
1023	unregister_virtio_driver(&virtio_blk);
1024	unregister_blkdev(major, "virtblk");
1025	destroy_workqueue(virtblk_wq);
1026}
1027module_init(init);
1028module_exit(fini);
1029
1030MODULE_DEVICE_TABLE(virtio, id_table);
1031MODULE_DESCRIPTION("Virtio block driver");
1032MODULE_LICENSE("GPL");