Linux Audio

Check our new training course

Loading...
v6.8
   1/*
   2 * Linux driver for VMware's para-virtualized SCSI HBA.
   3 *
   4 * Copyright (C) 2008-2014, VMware, Inc. All Rights Reserved.
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms of the GNU General Public License as published by the
   8 * Free Software Foundation; version 2 of the License and no later version.
   9 *
  10 * This program is distributed in the hope that it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  13 * NON INFRINGEMENT.  See the GNU General Public License for more
  14 * details.
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * along with this program; if not, write to the Free Software
  18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 *
  20 */
  21
  22#include <linux/kernel.h>
  23#include <linux/module.h>
  24#include <linux/interrupt.h>
  25#include <linux/slab.h>
  26#include <linux/workqueue.h>
  27#include <linux/pci.h>
  28
  29#include <scsi/scsi.h>
  30#include <scsi/scsi_host.h>
  31#include <scsi/scsi_cmnd.h>
  32#include <scsi/scsi_device.h>
  33#include <scsi/scsi_tcq.h>
  34
  35#include "vmw_pvscsi.h"
  36
  37#define PVSCSI_LINUX_DRIVER_DESC "VMware PVSCSI driver"
  38
  39MODULE_DESCRIPTION(PVSCSI_LINUX_DRIVER_DESC);
  40MODULE_AUTHOR("VMware, Inc.");
  41MODULE_LICENSE("GPL");
  42MODULE_VERSION(PVSCSI_DRIVER_VERSION_STRING);
  43
  44#define PVSCSI_DEFAULT_NUM_PAGES_PER_RING	8
  45#define PVSCSI_DEFAULT_NUM_PAGES_MSG_RING	1
  46#define PVSCSI_DEFAULT_QUEUE_DEPTH		254
  47#define SGL_SIZE				PAGE_SIZE
  48
  49struct pvscsi_sg_list {
  50	struct PVSCSISGElement sge[PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT];
  51};
  52
  53struct pvscsi_ctx {
  54	/*
  55	 * The index of the context in cmd_map serves as the context ID for a
  56	 * 1-to-1 mapping completions back to requests.
  57	 */
  58	struct scsi_cmnd	*cmd;
  59	struct pvscsi_sg_list	*sgl;
  60	struct list_head	list;
  61	dma_addr_t		dataPA;
  62	dma_addr_t		sensePA;
  63	dma_addr_t		sglPA;
  64	struct completion	*abort_cmp;
  65};
  66
  67struct pvscsi_adapter {
  68	char				*mmioBase;
  69	u8				rev;
  70	bool				use_msg;
  71	bool				use_req_threshold;
  72
  73	spinlock_t			hw_lock;
  74
  75	struct workqueue_struct		*workqueue;
  76	struct work_struct		work;
  77
  78	struct PVSCSIRingReqDesc	*req_ring;
  79	unsigned			req_pages;
  80	unsigned			req_depth;
  81	dma_addr_t			reqRingPA;
  82
  83	struct PVSCSIRingCmpDesc	*cmp_ring;
  84	unsigned			cmp_pages;
  85	dma_addr_t			cmpRingPA;
  86
  87	struct PVSCSIRingMsgDesc	*msg_ring;
  88	unsigned			msg_pages;
  89	dma_addr_t			msgRingPA;
  90
  91	struct PVSCSIRingsState		*rings_state;
  92	dma_addr_t			ringStatePA;
  93
  94	struct pci_dev			*dev;
  95	struct Scsi_Host		*host;
  96
  97	struct list_head		cmd_pool;
  98	struct pvscsi_ctx		*cmd_map;
  99};
 100
 101
 102/* Command line parameters */
 103static int pvscsi_ring_pages;
 104static int pvscsi_msg_ring_pages = PVSCSI_DEFAULT_NUM_PAGES_MSG_RING;
 105static int pvscsi_cmd_per_lun    = PVSCSI_DEFAULT_QUEUE_DEPTH;
 106static bool pvscsi_disable_msi;
 107static bool pvscsi_disable_msix;
 108static bool pvscsi_use_msg       = true;
 109static bool pvscsi_use_req_threshold = true;
 110
 111#define PVSCSI_RW (S_IRUSR | S_IWUSR)
 112
 113module_param_named(ring_pages, pvscsi_ring_pages, int, PVSCSI_RW);
 114MODULE_PARM_DESC(ring_pages, "Number of pages per req/cmp ring - (default="
 115		 __stringify(PVSCSI_DEFAULT_NUM_PAGES_PER_RING)
 116		 "[up to 16 targets],"
 117		 __stringify(PVSCSI_SETUP_RINGS_MAX_NUM_PAGES)
 118		 "[for 16+ targets])");
 119
 120module_param_named(msg_ring_pages, pvscsi_msg_ring_pages, int, PVSCSI_RW);
 121MODULE_PARM_DESC(msg_ring_pages, "Number of pages for the msg ring - (default="
 122		 __stringify(PVSCSI_DEFAULT_NUM_PAGES_MSG_RING) ")");
 123
 124module_param_named(cmd_per_lun, pvscsi_cmd_per_lun, int, PVSCSI_RW);
 125MODULE_PARM_DESC(cmd_per_lun, "Maximum commands per lun - (default="
 126		 __stringify(PVSCSI_DEFAULT_QUEUE_DEPTH) ")");
 127
 128module_param_named(disable_msi, pvscsi_disable_msi, bool, PVSCSI_RW);
 129MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)");
 130
 131module_param_named(disable_msix, pvscsi_disable_msix, bool, PVSCSI_RW);
 132MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default=0)");
 133
 134module_param_named(use_msg, pvscsi_use_msg, bool, PVSCSI_RW);
 135MODULE_PARM_DESC(use_msg, "Use msg ring when available - (default=1)");
 136
 137module_param_named(use_req_threshold, pvscsi_use_req_threshold,
 138		   bool, PVSCSI_RW);
 139MODULE_PARM_DESC(use_req_threshold, "Use driver-based request coalescing if configured - (default=1)");
 140
 141static const struct pci_device_id pvscsi_pci_tbl[] = {
 142	{ PCI_VDEVICE(VMWARE, PCI_DEVICE_ID_VMWARE_PVSCSI) },
 143	{ 0 }
 144};
 145
 146MODULE_DEVICE_TABLE(pci, pvscsi_pci_tbl);
 147
 148static struct device *
 149pvscsi_dev(const struct pvscsi_adapter *adapter)
 150{
 151	return &(adapter->dev->dev);
 152}
 153
 154static struct pvscsi_ctx *
 155pvscsi_find_context(const struct pvscsi_adapter *adapter, struct scsi_cmnd *cmd)
 156{
 157	struct pvscsi_ctx *ctx, *end;
 158
 159	end = &adapter->cmd_map[adapter->req_depth];
 160	for (ctx = adapter->cmd_map; ctx < end; ctx++)
 161		if (ctx->cmd == cmd)
 162			return ctx;
 163
 164	return NULL;
 165}
 166
 167static struct pvscsi_ctx *
 168pvscsi_acquire_context(struct pvscsi_adapter *adapter, struct scsi_cmnd *cmd)
 169{
 170	struct pvscsi_ctx *ctx;
 171
 172	if (list_empty(&adapter->cmd_pool))
 173		return NULL;
 174
 175	ctx = list_first_entry(&adapter->cmd_pool, struct pvscsi_ctx, list);
 176	ctx->cmd = cmd;
 177	list_del(&ctx->list);
 178
 179	return ctx;
 180}
 181
 182static void pvscsi_release_context(struct pvscsi_adapter *adapter,
 183				   struct pvscsi_ctx *ctx)
 184{
 185	ctx->cmd = NULL;
 186	ctx->abort_cmp = NULL;
 187	list_add(&ctx->list, &adapter->cmd_pool);
 188}
 189
 190/*
 191 * Map a pvscsi_ctx struct to a context ID field value; we map to a simple
 192 * non-zero integer. ctx always points to an entry in cmd_map array, hence
 193 * the return value is always >=1.
 194 */
 195static u64 pvscsi_map_context(const struct pvscsi_adapter *adapter,
 196			      const struct pvscsi_ctx *ctx)
 197{
 198	return ctx - adapter->cmd_map + 1;
 199}
 200
 201static struct pvscsi_ctx *
 202pvscsi_get_context(const struct pvscsi_adapter *adapter, u64 context)
 203{
 204	return &adapter->cmd_map[context - 1];
 205}
 206
 207static void pvscsi_reg_write(const struct pvscsi_adapter *adapter,
 208			     u32 offset, u32 val)
 209{
 210	writel(val, adapter->mmioBase + offset);
 211}
 212
 213static u32 pvscsi_reg_read(const struct pvscsi_adapter *adapter, u32 offset)
 214{
 215	return readl(adapter->mmioBase + offset);
 216}
 217
 218static u32 pvscsi_read_intr_status(const struct pvscsi_adapter *adapter)
 219{
 220	return pvscsi_reg_read(adapter, PVSCSI_REG_OFFSET_INTR_STATUS);
 221}
 222
 223static void pvscsi_write_intr_status(const struct pvscsi_adapter *adapter,
 224				     u32 val)
 225{
 226	pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_INTR_STATUS, val);
 227}
 228
 229static void pvscsi_unmask_intr(const struct pvscsi_adapter *adapter)
 230{
 231	u32 intr_bits;
 232
 233	intr_bits = PVSCSI_INTR_CMPL_MASK;
 234	if (adapter->use_msg)
 235		intr_bits |= PVSCSI_INTR_MSG_MASK;
 236
 237	pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_INTR_MASK, intr_bits);
 238}
 239
 240static void pvscsi_mask_intr(const struct pvscsi_adapter *adapter)
 241{
 242	pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_INTR_MASK, 0);
 243}
 244
 245static void pvscsi_write_cmd_desc(const struct pvscsi_adapter *adapter,
 246				  u32 cmd, const void *desc, size_t len)
 247{
 248	const u32 *ptr = desc;
 249	size_t i;
 250
 251	len /= sizeof(*ptr);
 252	pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_COMMAND, cmd);
 253	for (i = 0; i < len; i++)
 254		pvscsi_reg_write(adapter,
 255				 PVSCSI_REG_OFFSET_COMMAND_DATA, ptr[i]);
 256}
 257
 258static void pvscsi_abort_cmd(const struct pvscsi_adapter *adapter,
 259			     const struct pvscsi_ctx *ctx)
 260{
 261	struct PVSCSICmdDescAbortCmd cmd = { 0 };
 262
 263	cmd.target = ctx->cmd->device->id;
 264	cmd.context = pvscsi_map_context(adapter, ctx);
 265
 266	pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_ABORT_CMD, &cmd, sizeof(cmd));
 267}
 268
 269static void pvscsi_kick_rw_io(const struct pvscsi_adapter *adapter)
 270{
 271	pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_KICK_RW_IO, 0);
 272}
 273
 274static void pvscsi_process_request_ring(const struct pvscsi_adapter *adapter)
 275{
 276	pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_KICK_NON_RW_IO, 0);
 277}
 278
 279static int scsi_is_rw(unsigned char op)
 280{
 281	return op == READ_6  || op == WRITE_6 ||
 282	       op == READ_10 || op == WRITE_10 ||
 283	       op == READ_12 || op == WRITE_12 ||
 284	       op == READ_16 || op == WRITE_16;
 285}
 286
 287static void pvscsi_kick_io(const struct pvscsi_adapter *adapter,
 288			   unsigned char op)
 289{
 290	if (scsi_is_rw(op)) {
 291		struct PVSCSIRingsState *s = adapter->rings_state;
 292
 293		if (!adapter->use_req_threshold ||
 294		    s->reqProdIdx - s->reqConsIdx >= s->reqCallThreshold)
 295			pvscsi_kick_rw_io(adapter);
 296	} else {
 297		pvscsi_process_request_ring(adapter);
 298	}
 299}
 300
 301static void ll_adapter_reset(const struct pvscsi_adapter *adapter)
 302{
 303	dev_dbg(pvscsi_dev(adapter), "Adapter Reset on %p\n", adapter);
 304
 305	pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_ADAPTER_RESET, NULL, 0);
 306}
 307
 308static void ll_bus_reset(const struct pvscsi_adapter *adapter)
 309{
 310	dev_dbg(pvscsi_dev(adapter), "Resetting bus on %p\n", adapter);
 311
 312	pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_RESET_BUS, NULL, 0);
 313}
 314
 315static void ll_device_reset(const struct pvscsi_adapter *adapter, u32 target)
 316{
 317	struct PVSCSICmdDescResetDevice cmd = { 0 };
 318
 319	dev_dbg(pvscsi_dev(adapter), "Resetting device: target=%u\n", target);
 320
 321	cmd.target = target;
 322
 323	pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_RESET_DEVICE,
 324			      &cmd, sizeof(cmd));
 325}
 326
 327static void pvscsi_create_sg(struct pvscsi_ctx *ctx,
 328			     struct scatterlist *sg, unsigned count)
 329{
 330	unsigned i;
 331	struct PVSCSISGElement *sge;
 332
 333	BUG_ON(count > PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT);
 334
 335	sge = &ctx->sgl->sge[0];
 336	for (i = 0; i < count; i++, sg = sg_next(sg)) {
 337		sge[i].addr   = sg_dma_address(sg);
 338		sge[i].length = sg_dma_len(sg);
 339		sge[i].flags  = 0;
 340	}
 341}
 342
 343/*
 344 * Map all data buffers for a command into PCI space and
 345 * setup the scatter/gather list if needed.
 346 */
 347static int pvscsi_map_buffers(struct pvscsi_adapter *adapter,
 348			      struct pvscsi_ctx *ctx, struct scsi_cmnd *cmd,
 349			      struct PVSCSIRingReqDesc *e)
 350{
 351	unsigned count;
 352	unsigned bufflen = scsi_bufflen(cmd);
 353	struct scatterlist *sg;
 354
 355	e->dataLen = bufflen;
 356	e->dataAddr = 0;
 357	if (bufflen == 0)
 358		return 0;
 359
 360	sg = scsi_sglist(cmd);
 361	count = scsi_sg_count(cmd);
 362	if (count != 0) {
 363		int segs = scsi_dma_map(cmd);
 364
 365		if (segs == -ENOMEM) {
 366			scmd_printk(KERN_DEBUG, cmd,
 367				    "vmw_pvscsi: Failed to map cmd sglist for DMA.\n");
 368			return -ENOMEM;
 369		} else if (segs > 1) {
 370			pvscsi_create_sg(ctx, sg, segs);
 371
 372			e->flags |= PVSCSI_FLAG_CMD_WITH_SG_LIST;
 373			ctx->sglPA = dma_map_single(&adapter->dev->dev,
 374					ctx->sgl, SGL_SIZE, DMA_TO_DEVICE);
 375			if (dma_mapping_error(&adapter->dev->dev, ctx->sglPA)) {
 376				scmd_printk(KERN_ERR, cmd,
 377					    "vmw_pvscsi: Failed to map ctx sglist for DMA.\n");
 378				scsi_dma_unmap(cmd);
 379				ctx->sglPA = 0;
 380				return -ENOMEM;
 381			}
 382			e->dataAddr = ctx->sglPA;
 383		} else
 384			e->dataAddr = sg_dma_address(sg);
 385	} else {
 386		/*
 387		 * In case there is no S/G list, scsi_sglist points
 388		 * directly to the buffer.
 389		 */
 390		ctx->dataPA = dma_map_single(&adapter->dev->dev, sg, bufflen,
 391					     cmd->sc_data_direction);
 392		if (dma_mapping_error(&adapter->dev->dev, ctx->dataPA)) {
 393			scmd_printk(KERN_DEBUG, cmd,
 394				    "vmw_pvscsi: Failed to map direct data buffer for DMA.\n");
 395			return -ENOMEM;
 396		}
 397		e->dataAddr = ctx->dataPA;
 398	}
 399
 400	return 0;
 401}
 402
 403/*
 404 * The device incorrectly doesn't clear the first byte of the sense
 405 * buffer in some cases. We have to do it ourselves.
 406 * Otherwise we run into trouble when SWIOTLB is forced.
 407 */
 408static void pvscsi_patch_sense(struct scsi_cmnd *cmd)
 409{
 410	if (cmd->sense_buffer)
 411		cmd->sense_buffer[0] = 0;
 412}
 413
 414static void pvscsi_unmap_buffers(const struct pvscsi_adapter *adapter,
 415				 struct pvscsi_ctx *ctx)
 416{
 417	struct scsi_cmnd *cmd;
 418	unsigned bufflen;
 419
 420	cmd = ctx->cmd;
 421	bufflen = scsi_bufflen(cmd);
 422
 423	if (bufflen != 0) {
 424		unsigned count = scsi_sg_count(cmd);
 425
 426		if (count != 0) {
 427			scsi_dma_unmap(cmd);
 428			if (ctx->sglPA) {
 429				dma_unmap_single(&adapter->dev->dev, ctx->sglPA,
 430						 SGL_SIZE, DMA_TO_DEVICE);
 431				ctx->sglPA = 0;
 432			}
 433		} else
 434			dma_unmap_single(&adapter->dev->dev, ctx->dataPA,
 435					 bufflen, cmd->sc_data_direction);
 436	}
 437	if (cmd->sense_buffer)
 438		dma_unmap_single(&adapter->dev->dev, ctx->sensePA,
 439				 SCSI_SENSE_BUFFERSIZE, DMA_FROM_DEVICE);
 440}
 441
 442static int pvscsi_allocate_rings(struct pvscsi_adapter *adapter)
 443{
 444	adapter->rings_state = dma_alloc_coherent(&adapter->dev->dev, PAGE_SIZE,
 445			&adapter->ringStatePA, GFP_KERNEL);
 446	if (!adapter->rings_state)
 447		return -ENOMEM;
 448
 449	adapter->req_pages = min(PVSCSI_MAX_NUM_PAGES_REQ_RING,
 450				 pvscsi_ring_pages);
 451	adapter->req_depth = adapter->req_pages
 452					* PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
 453	adapter->req_ring = dma_alloc_coherent(&adapter->dev->dev,
 454			adapter->req_pages * PAGE_SIZE, &adapter->reqRingPA,
 455			GFP_KERNEL);
 456	if (!adapter->req_ring)
 457		return -ENOMEM;
 458
 459	adapter->cmp_pages = min(PVSCSI_MAX_NUM_PAGES_CMP_RING,
 460				 pvscsi_ring_pages);
 461	adapter->cmp_ring = dma_alloc_coherent(&adapter->dev->dev,
 462			adapter->cmp_pages * PAGE_SIZE, &adapter->cmpRingPA,
 463			GFP_KERNEL);
 464	if (!adapter->cmp_ring)
 465		return -ENOMEM;
 466
 467	BUG_ON(!IS_ALIGNED(adapter->ringStatePA, PAGE_SIZE));
 468	BUG_ON(!IS_ALIGNED(adapter->reqRingPA, PAGE_SIZE));
 469	BUG_ON(!IS_ALIGNED(adapter->cmpRingPA, PAGE_SIZE));
 470
 471	if (!adapter->use_msg)
 472		return 0;
 473
 474	adapter->msg_pages = min(PVSCSI_MAX_NUM_PAGES_MSG_RING,
 475				 pvscsi_msg_ring_pages);
 476	adapter->msg_ring = dma_alloc_coherent(&adapter->dev->dev,
 477			adapter->msg_pages * PAGE_SIZE, &adapter->msgRingPA,
 478			GFP_KERNEL);
 479	if (!adapter->msg_ring)
 480		return -ENOMEM;
 481	BUG_ON(!IS_ALIGNED(adapter->msgRingPA, PAGE_SIZE));
 482
 483	return 0;
 484}
 485
 486static void pvscsi_setup_all_rings(const struct pvscsi_adapter *adapter)
 487{
 488	struct PVSCSICmdDescSetupRings cmd = { 0 };
 489	dma_addr_t base;
 490	unsigned i;
 491
 492	cmd.ringsStatePPN   = adapter->ringStatePA >> PAGE_SHIFT;
 493	cmd.reqRingNumPages = adapter->req_pages;
 494	cmd.cmpRingNumPages = adapter->cmp_pages;
 495
 496	base = adapter->reqRingPA;
 497	for (i = 0; i < adapter->req_pages; i++) {
 498		cmd.reqRingPPNs[i] = base >> PAGE_SHIFT;
 499		base += PAGE_SIZE;
 500	}
 501
 502	base = adapter->cmpRingPA;
 503	for (i = 0; i < adapter->cmp_pages; i++) {
 504		cmd.cmpRingPPNs[i] = base >> PAGE_SHIFT;
 505		base += PAGE_SIZE;
 506	}
 507
 508	memset(adapter->rings_state, 0, PAGE_SIZE);
 509	memset(adapter->req_ring, 0, adapter->req_pages * PAGE_SIZE);
 510	memset(adapter->cmp_ring, 0, adapter->cmp_pages * PAGE_SIZE);
 511
 512	pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_SETUP_RINGS,
 513			      &cmd, sizeof(cmd));
 514
 515	if (adapter->use_msg) {
 516		struct PVSCSICmdDescSetupMsgRing cmd_msg = { 0 };
 517
 518		cmd_msg.numPages = adapter->msg_pages;
 519
 520		base = adapter->msgRingPA;
 521		for (i = 0; i < adapter->msg_pages; i++) {
 522			cmd_msg.ringPPNs[i] = base >> PAGE_SHIFT;
 523			base += PAGE_SIZE;
 524		}
 525		memset(adapter->msg_ring, 0, adapter->msg_pages * PAGE_SIZE);
 526
 527		pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_SETUP_MSG_RING,
 528				      &cmd_msg, sizeof(cmd_msg));
 529	}
 530}
 531
 532static int pvscsi_change_queue_depth(struct scsi_device *sdev, int qdepth)
 533{
 534	if (!sdev->tagged_supported)
 535		qdepth = 1;
 536	return scsi_change_queue_depth(sdev, qdepth);
 537}
 538
 539/*
 540 * Pull a completion descriptor off and pass the completion back
 541 * to the SCSI mid layer.
 542 */
 543static void pvscsi_complete_request(struct pvscsi_adapter *adapter,
 544				    const struct PVSCSIRingCmpDesc *e)
 545{
 546	struct pvscsi_ctx *ctx;
 547	struct scsi_cmnd *cmd;
 548	struct completion *abort_cmp;
 549	u32 btstat = e->hostStatus;
 550	u32 sdstat = e->scsiStatus;
 551
 552	ctx = pvscsi_get_context(adapter, e->context);
 553	cmd = ctx->cmd;
 554	abort_cmp = ctx->abort_cmp;
 555	pvscsi_unmap_buffers(adapter, ctx);
 556	if (sdstat != SAM_STAT_CHECK_CONDITION)
 557		pvscsi_patch_sense(cmd);
 558	pvscsi_release_context(adapter, ctx);
 559	if (abort_cmp) {
 560		/*
 561		 * The command was requested to be aborted. Just signal that
 562		 * the request completed and swallow the actual cmd completion
 563		 * here. The abort handler will post a completion for this
 564		 * command indicating that it got successfully aborted.
 565		 */
 566		complete(abort_cmp);
 567		return;
 568	}
 569
 570	cmd->result = 0;
 571	if (sdstat != SAM_STAT_GOOD &&
 572	    (btstat == BTSTAT_SUCCESS ||
 573	     btstat == BTSTAT_LINKED_COMMAND_COMPLETED ||
 574	     btstat == BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG)) {
 575		if (sdstat == SAM_STAT_COMMAND_TERMINATED) {
 576			cmd->result = (DID_RESET << 16);
 577		} else {
 578			cmd->result = (DID_OK << 16) | sdstat;
 579		}
 580	} else
 581		switch (btstat) {
 582		case BTSTAT_SUCCESS:
 583		case BTSTAT_LINKED_COMMAND_COMPLETED:
 584		case BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG:
 585			/*
 586			 * Commands like INQUIRY may transfer less data than
 587			 * requested by the initiator via bufflen. Set residual
 588			 * count to make upper layer aware of the actual amount
 589			 * of data returned. There are cases when controller
 590			 * returns zero dataLen with non zero data - do not set
 591			 * residual count in that case.
 592			 */
 593			if (e->dataLen && (e->dataLen < scsi_bufflen(cmd)))
 594				scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen);
 595			cmd->result = (DID_OK << 16);
 596			break;
 597
 598		case BTSTAT_DATARUN:
 599		case BTSTAT_DATA_UNDERRUN:
 600			/* Report residual data in underruns */
 601			scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen);
 602			cmd->result = (DID_ERROR << 16);
 603			break;
 604
 605		case BTSTAT_SELTIMEO:
 606			/* Our emulation returns this for non-connected devs */
 607			cmd->result = (DID_BAD_TARGET << 16);
 608			break;
 609
 610		case BTSTAT_LUNMISMATCH:
 611		case BTSTAT_TAGREJECT:
 612		case BTSTAT_BADMSG:
 613		case BTSTAT_HAHARDWARE:
 614		case BTSTAT_INVPHASE:
 615		case BTSTAT_HATIMEOUT:
 616		case BTSTAT_NORESPONSE:
 617		case BTSTAT_DISCONNECT:
 618		case BTSTAT_HASOFTWARE:
 619		case BTSTAT_BUSFREE:
 620		case BTSTAT_SENSFAILED:
 621			cmd->result |= (DID_ERROR << 16);
 622			break;
 623
 624		case BTSTAT_SENTRST:
 625		case BTSTAT_RECVRST:
 626		case BTSTAT_BUSRESET:
 627			cmd->result = (DID_RESET << 16);
 628			break;
 629
 630		case BTSTAT_ABORTQUEUE:
 631			cmd->result = (DID_BUS_BUSY << 16);
 632			break;
 633
 634		case BTSTAT_SCSIPARITY:
 635			cmd->result = (DID_PARITY << 16);
 636			break;
 637
 638		default:
 639			cmd->result = (DID_ERROR << 16);
 640			scmd_printk(KERN_DEBUG, cmd,
 641				    "Unknown completion status: 0x%x\n",
 642				    btstat);
 643	}
 644
 645	dev_dbg(&cmd->device->sdev_gendev,
 646		"cmd=%p %x ctx=%p result=0x%x status=0x%x,%x\n",
 647		cmd, cmd->cmnd[0], ctx, cmd->result, btstat, sdstat);
 648
 649	scsi_done(cmd);
 650}
 651
 652/*
 653 * barrier usage : Since the PVSCSI device is emulated, there could be cases
 654 * where we may want to serialize some accesses between the driver and the
 655 * emulation layer. We use compiler barriers instead of the more expensive
 656 * memory barriers because PVSCSI is only supported on X86 which has strong
 657 * memory access ordering.
 658 */
 659static void pvscsi_process_completion_ring(struct pvscsi_adapter *adapter)
 660{
 661	struct PVSCSIRingsState *s = adapter->rings_state;
 662	struct PVSCSIRingCmpDesc *ring = adapter->cmp_ring;
 663	u32 cmp_entries = s->cmpNumEntriesLog2;
 664
 665	while (s->cmpConsIdx != s->cmpProdIdx) {
 666		struct PVSCSIRingCmpDesc *e = ring + (s->cmpConsIdx &
 667						      MASK(cmp_entries));
 668		/*
 669		 * This barrier() ensures that *e is not dereferenced while
 670		 * the device emulation still writes data into the slot.
 671		 * Since the device emulation advances s->cmpProdIdx only after
 672		 * updating the slot we want to check it first.
 673		 */
 674		barrier();
 675		pvscsi_complete_request(adapter, e);
 676		/*
 677		 * This barrier() ensures that compiler doesn't reorder write
 678		 * to s->cmpConsIdx before the read of (*e) inside
 679		 * pvscsi_complete_request. Otherwise, device emulation may
 680		 * overwrite *e before we had a chance to read it.
 681		 */
 682		barrier();
 683		s->cmpConsIdx++;
 684	}
 685}
 686
 687/*
 688 * Translate a Linux SCSI request into a request ring entry.
 689 */
 690static int pvscsi_queue_ring(struct pvscsi_adapter *adapter,
 691			     struct pvscsi_ctx *ctx, struct scsi_cmnd *cmd)
 692{
 693	struct PVSCSIRingsState *s;
 694	struct PVSCSIRingReqDesc *e;
 695	struct scsi_device *sdev;
 696	u32 req_entries;
 697
 698	s = adapter->rings_state;
 699	sdev = cmd->device;
 700	req_entries = s->reqNumEntriesLog2;
 701
 702	/*
 703	 * If this condition holds, we might have room on the request ring, but
 704	 * we might not have room on the completion ring for the response.
 705	 * However, we have already ruled out this possibility - we would not
 706	 * have successfully allocated a context if it were true, since we only
 707	 * have one context per request entry.  Check for it anyway, since it
 708	 * would be a serious bug.
 709	 */
 710	if (s->reqProdIdx - s->cmpConsIdx >= 1 << req_entries) {
 711		scmd_printk(KERN_ERR, cmd, "vmw_pvscsi: "
 712			    "ring full: reqProdIdx=%d cmpConsIdx=%d\n",
 713			    s->reqProdIdx, s->cmpConsIdx);
 714		return -1;
 715	}
 716
 717	e = adapter->req_ring + (s->reqProdIdx & MASK(req_entries));
 718
 719	e->bus    = sdev->channel;
 720	e->target = sdev->id;
 721	memset(e->lun, 0, sizeof(e->lun));
 722	e->lun[1] = sdev->lun;
 723
 724	if (cmd->sense_buffer) {
 725		ctx->sensePA = dma_map_single(&adapter->dev->dev,
 726				cmd->sense_buffer, SCSI_SENSE_BUFFERSIZE,
 727				DMA_FROM_DEVICE);
 728		if (dma_mapping_error(&adapter->dev->dev, ctx->sensePA)) {
 729			scmd_printk(KERN_DEBUG, cmd,
 730				    "vmw_pvscsi: Failed to map sense buffer for DMA.\n");
 731			ctx->sensePA = 0;
 732			return -ENOMEM;
 733		}
 734		e->senseAddr = ctx->sensePA;
 735		e->senseLen = SCSI_SENSE_BUFFERSIZE;
 736	} else {
 737		e->senseLen  = 0;
 738		e->senseAddr = 0;
 739	}
 740	e->cdbLen   = cmd->cmd_len;
 741	e->vcpuHint = smp_processor_id();
 742	memcpy(e->cdb, cmd->cmnd, e->cdbLen);
 743
 744	e->tag = SIMPLE_QUEUE_TAG;
 745
 746	if (cmd->sc_data_direction == DMA_FROM_DEVICE)
 747		e->flags = PVSCSI_FLAG_CMD_DIR_TOHOST;
 748	else if (cmd->sc_data_direction == DMA_TO_DEVICE)
 749		e->flags = PVSCSI_FLAG_CMD_DIR_TODEVICE;
 750	else if (cmd->sc_data_direction == DMA_NONE)
 751		e->flags = PVSCSI_FLAG_CMD_DIR_NONE;
 752	else
 753		e->flags = 0;
 754
 755	if (pvscsi_map_buffers(adapter, ctx, cmd, e) != 0) {
 756		if (cmd->sense_buffer) {
 757			dma_unmap_single(&adapter->dev->dev, ctx->sensePA,
 758					 SCSI_SENSE_BUFFERSIZE,
 759					 DMA_FROM_DEVICE);
 760			ctx->sensePA = 0;
 761		}
 762		return -ENOMEM;
 763	}
 764
 765	e->context = pvscsi_map_context(adapter, ctx);
 766
 767	barrier();
 768
 769	s->reqProdIdx++;
 770
 771	return 0;
 772}
 773
 774static int pvscsi_queue_lck(struct scsi_cmnd *cmd)
 775{
 776	struct Scsi_Host *host = cmd->device->host;
 777	struct pvscsi_adapter *adapter = shost_priv(host);
 778	struct pvscsi_ctx *ctx;
 779	unsigned long flags;
 780	unsigned char op;
 781
 782	spin_lock_irqsave(&adapter->hw_lock, flags);
 783
 784	ctx = pvscsi_acquire_context(adapter, cmd);
 785	if (!ctx || pvscsi_queue_ring(adapter, ctx, cmd) != 0) {
 786		if (ctx)
 787			pvscsi_release_context(adapter, ctx);
 788		spin_unlock_irqrestore(&adapter->hw_lock, flags);
 789		return SCSI_MLQUEUE_HOST_BUSY;
 790	}
 791
 792	op = cmd->cmnd[0];
 793
 794	dev_dbg(&cmd->device->sdev_gendev,
 795		"queued cmd %p, ctx %p, op=%x\n", cmd, ctx, op);
 796
 797	spin_unlock_irqrestore(&adapter->hw_lock, flags);
 798
 799	pvscsi_kick_io(adapter, op);
 800
 801	return 0;
 802}
 803
 804static DEF_SCSI_QCMD(pvscsi_queue)
 805
 806static int pvscsi_abort(struct scsi_cmnd *cmd)
 807{
 808	struct pvscsi_adapter *adapter = shost_priv(cmd->device->host);
 809	struct pvscsi_ctx *ctx;
 810	unsigned long flags;
 811	int result = SUCCESS;
 812	DECLARE_COMPLETION_ONSTACK(abort_cmp);
 813	int done;
 814
 815	scmd_printk(KERN_DEBUG, cmd, "task abort on host %u, %p\n",
 816		    adapter->host->host_no, cmd);
 817
 818	spin_lock_irqsave(&adapter->hw_lock, flags);
 819
 820	/*
 821	 * Poll the completion ring first - we might be trying to abort
 822	 * a command that is waiting to be dispatched in the completion ring.
 823	 */
 824	pvscsi_process_completion_ring(adapter);
 825
 826	/*
 827	 * If there is no context for the command, it either already succeeded
 828	 * or else was never properly issued.  Not our problem.
 829	 */
 830	ctx = pvscsi_find_context(adapter, cmd);
 831	if (!ctx) {
 832		scmd_printk(KERN_DEBUG, cmd, "Failed to abort cmd %p\n", cmd);
 833		goto out;
 834	}
 835
 836	/*
 837	 * Mark that the command has been requested to be aborted and issue
 838	 * the abort.
 839	 */
 840	ctx->abort_cmp = &abort_cmp;
 841
 842	pvscsi_abort_cmd(adapter, ctx);
 843	spin_unlock_irqrestore(&adapter->hw_lock, flags);
 844	/* Wait for 2 secs for the completion. */
 845	done = wait_for_completion_timeout(&abort_cmp, msecs_to_jiffies(2000));
 846	spin_lock_irqsave(&adapter->hw_lock, flags);
 847
 848	if (!done) {
 849		/*
 850		 * Failed to abort the command, unmark the fact that it
 851		 * was requested to be aborted.
 852		 */
 853		ctx->abort_cmp = NULL;
 854		result = FAILED;
 855		scmd_printk(KERN_DEBUG, cmd,
 856			    "Failed to get completion for aborted cmd %p\n",
 857			    cmd);
 858		goto out;
 859	}
 860
 861	/*
 862	 * Successfully aborted the command.
 863	 */
 864	cmd->result = (DID_ABORT << 16);
 865	scsi_done(cmd);
 866
 867out:
 868	spin_unlock_irqrestore(&adapter->hw_lock, flags);
 869	return result;
 870}
 871
 872/*
 873 * Abort all outstanding requests.  This is only safe to use if the completion
 874 * ring will never be walked again or the device has been reset, because it
 875 * destroys the 1-1 mapping between context field passed to emulation and our
 876 * request structure.
 877 */
 878static void pvscsi_reset_all(struct pvscsi_adapter *adapter)
 879{
 880	unsigned i;
 881
 882	for (i = 0; i < adapter->req_depth; i++) {
 883		struct pvscsi_ctx *ctx = &adapter->cmd_map[i];
 884		struct scsi_cmnd *cmd = ctx->cmd;
 885		if (cmd) {
 886			scmd_printk(KERN_ERR, cmd,
 887				    "Forced reset on cmd %p\n", cmd);
 888			pvscsi_unmap_buffers(adapter, ctx);
 889			pvscsi_patch_sense(cmd);
 890			pvscsi_release_context(adapter, ctx);
 891			cmd->result = (DID_RESET << 16);
 892			scsi_done(cmd);
 893		}
 894	}
 895}
 896
 897static int pvscsi_host_reset(struct scsi_cmnd *cmd)
 898{
 899	struct Scsi_Host *host = cmd->device->host;
 900	struct pvscsi_adapter *adapter = shost_priv(host);
 901	unsigned long flags;
 902	bool use_msg;
 903
 904	scmd_printk(KERN_INFO, cmd, "SCSI Host reset\n");
 905
 906	spin_lock_irqsave(&adapter->hw_lock, flags);
 907
 908	use_msg = adapter->use_msg;
 909
 910	if (use_msg) {
 911		adapter->use_msg = false;
 912		spin_unlock_irqrestore(&adapter->hw_lock, flags);
 913
 914		/*
 915		 * Now that we know that the ISR won't add more work on the
 916		 * workqueue we can safely flush any outstanding work.
 917		 */
 918		flush_workqueue(adapter->workqueue);
 919		spin_lock_irqsave(&adapter->hw_lock, flags);
 920	}
 921
 922	/*
 923	 * We're going to tear down the entire ring structure and set it back
 924	 * up, so stalling new requests until all completions are flushed and
 925	 * the rings are back in place.
 926	 */
 927
 928	pvscsi_process_request_ring(adapter);
 929
 930	ll_adapter_reset(adapter);
 931
 932	/*
 933	 * Now process any completions.  Note we do this AFTER adapter reset,
 934	 * which is strange, but stops races where completions get posted
 935	 * between processing the ring and issuing the reset.  The backend will
 936	 * not touch the ring memory after reset, so the immediately pre-reset
 937	 * completion ring state is still valid.
 938	 */
 939	pvscsi_process_completion_ring(adapter);
 940
 941	pvscsi_reset_all(adapter);
 942	adapter->use_msg = use_msg;
 943	pvscsi_setup_all_rings(adapter);
 944	pvscsi_unmask_intr(adapter);
 945
 946	spin_unlock_irqrestore(&adapter->hw_lock, flags);
 947
 948	return SUCCESS;
 949}
 950
 951static int pvscsi_bus_reset(struct scsi_cmnd *cmd)
 952{
 953	struct Scsi_Host *host = cmd->device->host;
 954	struct pvscsi_adapter *adapter = shost_priv(host);
 955	unsigned long flags;
 956
 957	scmd_printk(KERN_INFO, cmd, "SCSI Bus reset\n");
 958
 959	/*
 960	 * We don't want to queue new requests for this bus after
 961	 * flushing all pending requests to emulation, since new
 962	 * requests could then sneak in during this bus reset phase,
 963	 * so take the lock now.
 964	 */
 965	spin_lock_irqsave(&adapter->hw_lock, flags);
 966
 967	pvscsi_process_request_ring(adapter);
 968	ll_bus_reset(adapter);
 969	pvscsi_process_completion_ring(adapter);
 970
 971	spin_unlock_irqrestore(&adapter->hw_lock, flags);
 972
 973	return SUCCESS;
 974}
 975
 976static int pvscsi_device_reset(struct scsi_cmnd *cmd)
 977{
 978	struct Scsi_Host *host = cmd->device->host;
 979	struct pvscsi_adapter *adapter = shost_priv(host);
 980	unsigned long flags;
 981
 982	scmd_printk(KERN_INFO, cmd, "SCSI device reset on scsi%u:%u\n",
 983		    host->host_no, cmd->device->id);
 984
 985	/*
 986	 * We don't want to queue new requests for this device after flushing
 987	 * all pending requests to emulation, since new requests could then
 988	 * sneak in during this device reset phase, so take the lock now.
 989	 */
 990	spin_lock_irqsave(&adapter->hw_lock, flags);
 991
 992	pvscsi_process_request_ring(adapter);
 993	ll_device_reset(adapter, cmd->device->id);
 994	pvscsi_process_completion_ring(adapter);
 995
 996	spin_unlock_irqrestore(&adapter->hw_lock, flags);
 997
 998	return SUCCESS;
 999}
1000
1001static struct scsi_host_template pvscsi_template;
1002
1003static const char *pvscsi_info(struct Scsi_Host *host)
1004{
1005	struct pvscsi_adapter *adapter = shost_priv(host);
1006	static char buf[256];
1007
1008	sprintf(buf, "VMware PVSCSI storage adapter rev %d, req/cmp/msg rings: "
1009		"%u/%u/%u pages, cmd_per_lun=%u", adapter->rev,
1010		adapter->req_pages, adapter->cmp_pages, adapter->msg_pages,
1011		pvscsi_template.cmd_per_lun);
1012
1013	return buf;
1014}
1015
1016static struct scsi_host_template pvscsi_template = {
1017	.module				= THIS_MODULE,
1018	.name				= "VMware PVSCSI Host Adapter",
1019	.proc_name			= "vmw_pvscsi",
1020	.info				= pvscsi_info,
1021	.queuecommand			= pvscsi_queue,
1022	.this_id			= -1,
1023	.sg_tablesize			= PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT,
1024	.dma_boundary			= UINT_MAX,
1025	.max_sectors			= 0xffff,
1026	.change_queue_depth		= pvscsi_change_queue_depth,
1027	.eh_abort_handler		= pvscsi_abort,
1028	.eh_device_reset_handler	= pvscsi_device_reset,
1029	.eh_bus_reset_handler		= pvscsi_bus_reset,
1030	.eh_host_reset_handler		= pvscsi_host_reset,
1031};
1032
1033static void pvscsi_process_msg(const struct pvscsi_adapter *adapter,
1034			       const struct PVSCSIRingMsgDesc *e)
1035{
1036	struct PVSCSIRingsState *s = adapter->rings_state;
1037	struct Scsi_Host *host = adapter->host;
1038	struct scsi_device *sdev;
1039
1040	printk(KERN_INFO "vmw_pvscsi: msg type: 0x%x - MSG RING: %u/%u (%u) \n",
1041	       e->type, s->msgProdIdx, s->msgConsIdx, s->msgNumEntriesLog2);
1042
1043	BUILD_BUG_ON(PVSCSI_MSG_LAST != 2);
1044
1045	if (e->type == PVSCSI_MSG_DEV_ADDED) {
1046		struct PVSCSIMsgDescDevStatusChanged *desc;
1047		desc = (struct PVSCSIMsgDescDevStatusChanged *)e;
1048
1049		printk(KERN_INFO
1050		       "vmw_pvscsi: msg: device added at scsi%u:%u:%u\n",
1051		       desc->bus, desc->target, desc->lun[1]);
1052
1053		if (!scsi_host_get(host))
1054			return;
1055
1056		sdev = scsi_device_lookup(host, desc->bus, desc->target,
1057					  desc->lun[1]);
1058		if (sdev) {
1059			printk(KERN_INFO "vmw_pvscsi: device already exists\n");
1060			scsi_device_put(sdev);
1061		} else
1062			scsi_add_device(adapter->host, desc->bus,
1063					desc->target, desc->lun[1]);
1064
1065		scsi_host_put(host);
1066	} else if (e->type == PVSCSI_MSG_DEV_REMOVED) {
1067		struct PVSCSIMsgDescDevStatusChanged *desc;
1068		desc = (struct PVSCSIMsgDescDevStatusChanged *)e;
1069
1070		printk(KERN_INFO
1071		       "vmw_pvscsi: msg: device removed at scsi%u:%u:%u\n",
1072		       desc->bus, desc->target, desc->lun[1]);
1073
1074		if (!scsi_host_get(host))
1075			return;
1076
1077		sdev = scsi_device_lookup(host, desc->bus, desc->target,
1078					  desc->lun[1]);
1079		if (sdev) {
1080			scsi_remove_device(sdev);
1081			scsi_device_put(sdev);
1082		} else
1083			printk(KERN_INFO
1084			       "vmw_pvscsi: failed to lookup scsi%u:%u:%u\n",
1085			       desc->bus, desc->target, desc->lun[1]);
1086
1087		scsi_host_put(host);
1088	}
1089}
1090
1091static int pvscsi_msg_pending(const struct pvscsi_adapter *adapter)
1092{
1093	struct PVSCSIRingsState *s = adapter->rings_state;
1094
1095	return s->msgProdIdx != s->msgConsIdx;
1096}
1097
1098static void pvscsi_process_msg_ring(const struct pvscsi_adapter *adapter)
1099{
1100	struct PVSCSIRingsState *s = adapter->rings_state;
1101	struct PVSCSIRingMsgDesc *ring = adapter->msg_ring;
1102	u32 msg_entries = s->msgNumEntriesLog2;
1103
1104	while (pvscsi_msg_pending(adapter)) {
1105		struct PVSCSIRingMsgDesc *e = ring + (s->msgConsIdx &
1106						      MASK(msg_entries));
1107
1108		barrier();
1109		pvscsi_process_msg(adapter, e);
1110		barrier();
1111		s->msgConsIdx++;
1112	}
1113}
1114
1115static void pvscsi_msg_workqueue_handler(struct work_struct *data)
1116{
1117	struct pvscsi_adapter *adapter;
1118
1119	adapter = container_of(data, struct pvscsi_adapter, work);
1120
1121	pvscsi_process_msg_ring(adapter);
1122}
1123
1124static int pvscsi_setup_msg_workqueue(struct pvscsi_adapter *adapter)
1125{
1126	char name[32];
1127
1128	if (!pvscsi_use_msg)
1129		return 0;
1130
1131	pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_COMMAND,
1132			 PVSCSI_CMD_SETUP_MSG_RING);
1133
1134	if (pvscsi_reg_read(adapter, PVSCSI_REG_OFFSET_COMMAND_STATUS) == -1)
1135		return 0;
1136
1137	snprintf(name, sizeof(name),
1138		 "vmw_pvscsi_wq_%u", adapter->host->host_no);
1139
1140	adapter->workqueue = create_singlethread_workqueue(name);
 
1141	if (!adapter->workqueue) {
1142		printk(KERN_ERR "vmw_pvscsi: failed to create work queue\n");
1143		return 0;
1144	}
1145	INIT_WORK(&adapter->work, pvscsi_msg_workqueue_handler);
1146
1147	return 1;
1148}
1149
1150static bool pvscsi_setup_req_threshold(struct pvscsi_adapter *adapter,
1151				      bool enable)
1152{
1153	u32 val;
1154
1155	if (!pvscsi_use_req_threshold)
1156		return false;
1157
1158	pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_COMMAND,
1159			 PVSCSI_CMD_SETUP_REQCALLTHRESHOLD);
1160	val = pvscsi_reg_read(adapter, PVSCSI_REG_OFFSET_COMMAND_STATUS);
1161	if (val == -1) {
1162		printk(KERN_INFO "vmw_pvscsi: device does not support req_threshold\n");
1163		return false;
1164	} else {
1165		struct PVSCSICmdDescSetupReqCall cmd_msg = { 0 };
1166		cmd_msg.enable = enable;
1167		printk(KERN_INFO
1168		       "vmw_pvscsi: %sabling reqCallThreshold\n",
1169			enable ? "en" : "dis");
1170		pvscsi_write_cmd_desc(adapter,
1171				      PVSCSI_CMD_SETUP_REQCALLTHRESHOLD,
1172				      &cmd_msg, sizeof(cmd_msg));
1173		return pvscsi_reg_read(adapter,
1174				       PVSCSI_REG_OFFSET_COMMAND_STATUS) != 0;
1175	}
1176}
1177
1178static irqreturn_t pvscsi_isr(int irq, void *devp)
1179{
1180	struct pvscsi_adapter *adapter = devp;
1181	unsigned long flags;
1182
1183	spin_lock_irqsave(&adapter->hw_lock, flags);
1184	pvscsi_process_completion_ring(adapter);
1185	if (adapter->use_msg && pvscsi_msg_pending(adapter))
1186		queue_work(adapter->workqueue, &adapter->work);
1187	spin_unlock_irqrestore(&adapter->hw_lock, flags);
1188
1189	return IRQ_HANDLED;
1190}
1191
1192static irqreturn_t pvscsi_shared_isr(int irq, void *devp)
1193{
1194	struct pvscsi_adapter *adapter = devp;
1195	u32 val = pvscsi_read_intr_status(adapter);
1196
1197	if (!(val & PVSCSI_INTR_ALL_SUPPORTED))
1198		return IRQ_NONE;
1199	pvscsi_write_intr_status(devp, val);
1200	return pvscsi_isr(irq, devp);
1201}
1202
1203static void pvscsi_free_sgls(const struct pvscsi_adapter *adapter)
1204{
1205	struct pvscsi_ctx *ctx = adapter->cmd_map;
1206	unsigned i;
1207
1208	for (i = 0; i < adapter->req_depth; ++i, ++ctx)
1209		free_pages((unsigned long)ctx->sgl, get_order(SGL_SIZE));
1210}
1211
1212static void pvscsi_shutdown_intr(struct pvscsi_adapter *adapter)
1213{
1214	free_irq(pci_irq_vector(adapter->dev, 0), adapter);
1215	pci_free_irq_vectors(adapter->dev);
1216}
1217
1218static void pvscsi_release_resources(struct pvscsi_adapter *adapter)
1219{
1220	if (adapter->workqueue)
1221		destroy_workqueue(adapter->workqueue);
1222
1223	if (adapter->mmioBase)
1224		pci_iounmap(adapter->dev, adapter->mmioBase);
1225
1226	pci_release_regions(adapter->dev);
1227
1228	if (adapter->cmd_map) {
1229		pvscsi_free_sgls(adapter);
1230		kfree(adapter->cmd_map);
1231	}
1232
1233	if (adapter->rings_state)
1234		dma_free_coherent(&adapter->dev->dev, PAGE_SIZE,
1235				    adapter->rings_state, adapter->ringStatePA);
1236
1237	if (adapter->req_ring)
1238		dma_free_coherent(&adapter->dev->dev,
1239				    adapter->req_pages * PAGE_SIZE,
1240				    adapter->req_ring, adapter->reqRingPA);
1241
1242	if (adapter->cmp_ring)
1243		dma_free_coherent(&adapter->dev->dev,
1244				    adapter->cmp_pages * PAGE_SIZE,
1245				    adapter->cmp_ring, adapter->cmpRingPA);
1246
1247	if (adapter->msg_ring)
1248		dma_free_coherent(&adapter->dev->dev,
1249				    adapter->msg_pages * PAGE_SIZE,
1250				    adapter->msg_ring, adapter->msgRingPA);
1251}
1252
1253/*
1254 * Allocate scatter gather lists.
1255 *
1256 * These are statically allocated.  Trying to be clever was not worth it.
1257 *
1258 * Dynamic allocation can fail, and we can't go deep into the memory
1259 * allocator, since we're a SCSI driver, and trying too hard to allocate
1260 * memory might generate disk I/O.  We also don't want to fail disk I/O
1261 * in that case because we can't get an allocation - the I/O could be
1262 * trying to swap out data to free memory.  Since that is pathological,
1263 * just use a statically allocated scatter list.
1264 *
1265 */
1266static int pvscsi_allocate_sg(struct pvscsi_adapter *adapter)
1267{
1268	struct pvscsi_ctx *ctx;
1269	int i;
1270
1271	ctx = adapter->cmd_map;
1272	BUILD_BUG_ON(sizeof(struct pvscsi_sg_list) > SGL_SIZE);
1273
1274	for (i = 0; i < adapter->req_depth; ++i, ++ctx) {
1275		ctx->sgl = (void *)__get_free_pages(GFP_KERNEL,
1276						    get_order(SGL_SIZE));
1277		ctx->sglPA = 0;
1278		BUG_ON(!IS_ALIGNED(((unsigned long)ctx->sgl), PAGE_SIZE));
1279		if (!ctx->sgl) {
1280			for (; i >= 0; --i, --ctx) {
1281				free_pages((unsigned long)ctx->sgl,
1282					   get_order(SGL_SIZE));
1283				ctx->sgl = NULL;
1284			}
1285			return -ENOMEM;
1286		}
1287	}
1288
1289	return 0;
1290}
1291
1292/*
1293 * Query the device, fetch the config info and return the
1294 * maximum number of targets on the adapter. In case of
1295 * failure due to any reason return default i.e. 16.
1296 */
1297static u32 pvscsi_get_max_targets(struct pvscsi_adapter *adapter)
1298{
1299	struct PVSCSICmdDescConfigCmd cmd;
1300	struct PVSCSIConfigPageHeader *header;
1301	struct device *dev;
1302	dma_addr_t configPagePA;
1303	void *config_page;
1304	u32 numPhys = 16;
1305
1306	dev = pvscsi_dev(adapter);
1307	config_page = dma_alloc_coherent(&adapter->dev->dev, PAGE_SIZE,
1308			&configPagePA, GFP_KERNEL);
1309	if (!config_page) {
1310		dev_warn(dev, "vmw_pvscsi: failed to allocate memory for config page\n");
1311		goto exit;
1312	}
1313	BUG_ON(configPagePA & ~PAGE_MASK);
1314
1315	/* Fetch config info from the device. */
1316	cmd.configPageAddress = ((u64)PVSCSI_CONFIG_CONTROLLER_ADDRESS) << 32;
1317	cmd.configPageNum = PVSCSI_CONFIG_PAGE_CONTROLLER;
1318	cmd.cmpAddr = configPagePA;
1319	cmd._pad = 0;
1320
1321	/*
1322	 * Mark the completion page header with error values. If the device
1323	 * completes the command successfully, it sets the status values to
1324	 * indicate success.
1325	 */
1326	header = config_page;
1327	header->hostStatus = BTSTAT_INVPARAM;
1328	header->scsiStatus = SDSTAT_CHECK;
1329
1330	pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_CONFIG, &cmd, sizeof cmd);
1331
1332	if (header->hostStatus == BTSTAT_SUCCESS &&
1333	    header->scsiStatus == SDSTAT_GOOD) {
1334		struct PVSCSIConfigPageController *config;
1335
1336		config = config_page;
1337		numPhys = config->numPhys;
1338	} else
1339		dev_warn(dev, "vmw_pvscsi: PVSCSI_CMD_CONFIG failed. hostStatus = 0x%x, scsiStatus = 0x%x\n",
1340			 header->hostStatus, header->scsiStatus);
1341	dma_free_coherent(&adapter->dev->dev, PAGE_SIZE, config_page,
1342			  configPagePA);
1343exit:
1344	return numPhys;
1345}
1346
1347static int pvscsi_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1348{
1349	unsigned int irq_flag = PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY;
1350	struct pvscsi_adapter *adapter;
1351	struct pvscsi_adapter adapter_temp;
1352	struct Scsi_Host *host = NULL;
1353	unsigned int i;
1354	int error;
1355	u32 max_id;
1356
1357	error = -ENODEV;
1358
1359	if (pci_enable_device(pdev))
1360		return error;
1361
1362	if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) {
1363		printk(KERN_INFO "vmw_pvscsi: using 64bit dma\n");
1364	} else if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32))) {
1365		printk(KERN_INFO "vmw_pvscsi: using 32bit dma\n");
1366	} else {
1367		printk(KERN_ERR "vmw_pvscsi: failed to set DMA mask\n");
1368		goto out_disable_device;
1369	}
1370
1371	/*
1372	 * Let's use a temp pvscsi_adapter struct until we find the number of
1373	 * targets on the adapter, after that we will switch to the real
1374	 * allocated struct.
1375	 */
1376	adapter = &adapter_temp;
1377	memset(adapter, 0, sizeof(*adapter));
1378	adapter->dev  = pdev;
1379	adapter->rev = pdev->revision;
1380
1381	if (pci_request_regions(pdev, "vmw_pvscsi")) {
1382		printk(KERN_ERR "vmw_pvscsi: pci memory selection failed\n");
1383		goto out_disable_device;
1384	}
1385
1386	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
1387		if ((pci_resource_flags(pdev, i) & PCI_BASE_ADDRESS_SPACE_IO))
1388			continue;
1389
1390		if (pci_resource_len(pdev, i) < PVSCSI_MEM_SPACE_SIZE)
1391			continue;
1392
1393		break;
1394	}
1395
1396	if (i == DEVICE_COUNT_RESOURCE) {
1397		printk(KERN_ERR
1398		       "vmw_pvscsi: adapter has no suitable MMIO region\n");
1399		goto out_release_resources_and_disable;
1400	}
1401
1402	adapter->mmioBase = pci_iomap(pdev, i, PVSCSI_MEM_SPACE_SIZE);
1403
1404	if (!adapter->mmioBase) {
1405		printk(KERN_ERR
1406		       "vmw_pvscsi: can't iomap for BAR %d memsize %lu\n",
1407		       i, PVSCSI_MEM_SPACE_SIZE);
1408		goto out_release_resources_and_disable;
1409	}
1410
1411	pci_set_master(pdev);
1412
1413	/*
1414	 * Ask the device for max number of targets before deciding the
1415	 * default pvscsi_ring_pages value.
1416	 */
1417	max_id = pvscsi_get_max_targets(adapter);
1418	printk(KERN_INFO "vmw_pvscsi: max_id: %u\n", max_id);
1419
1420	if (pvscsi_ring_pages == 0)
1421		/*
1422		 * Set the right default value. Up to 16 it is 8, above it is
1423		 * max.
1424		 */
1425		pvscsi_ring_pages = (max_id > 16) ?
1426			PVSCSI_SETUP_RINGS_MAX_NUM_PAGES :
1427			PVSCSI_DEFAULT_NUM_PAGES_PER_RING;
1428	printk(KERN_INFO
1429	       "vmw_pvscsi: setting ring_pages to %d\n",
1430	       pvscsi_ring_pages);
1431
1432	pvscsi_template.can_queue =
1433		min(PVSCSI_MAX_NUM_PAGES_REQ_RING, pvscsi_ring_pages) *
1434		PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
1435	pvscsi_template.cmd_per_lun =
1436		min(pvscsi_template.can_queue, pvscsi_cmd_per_lun);
1437	host = scsi_host_alloc(&pvscsi_template, sizeof(struct pvscsi_adapter));
1438	if (!host) {
1439		printk(KERN_ERR "vmw_pvscsi: failed to allocate host\n");
1440		goto out_release_resources_and_disable;
1441	}
1442
1443	/*
1444	 * Let's use the real pvscsi_adapter struct here onwards.
1445	 */
1446	adapter = shost_priv(host);
1447	memset(adapter, 0, sizeof(*adapter));
1448	adapter->dev  = pdev;
1449	adapter->host = host;
1450	/*
1451	 * Copy back what we already have to the allocated adapter struct.
1452	 */
1453	adapter->rev = adapter_temp.rev;
1454	adapter->mmioBase = adapter_temp.mmioBase;
1455
1456	spin_lock_init(&adapter->hw_lock);
1457	host->max_channel = 0;
1458	host->max_lun     = 1;
1459	host->max_cmd_len = 16;
1460	host->max_id      = max_id;
1461
1462	pci_set_drvdata(pdev, host);
1463
1464	ll_adapter_reset(adapter);
1465
1466	adapter->use_msg = pvscsi_setup_msg_workqueue(adapter);
1467
1468	error = pvscsi_allocate_rings(adapter);
1469	if (error) {
1470		printk(KERN_ERR "vmw_pvscsi: unable to allocate ring memory\n");
1471		goto out_release_resources;
1472	}
1473
1474	/*
1475	 * From this point on we should reset the adapter if anything goes
1476	 * wrong.
1477	 */
1478	pvscsi_setup_all_rings(adapter);
1479
1480	adapter->cmd_map = kcalloc(adapter->req_depth,
1481				   sizeof(struct pvscsi_ctx), GFP_KERNEL);
1482	if (!adapter->cmd_map) {
1483		printk(KERN_ERR "vmw_pvscsi: failed to allocate memory.\n");
1484		error = -ENOMEM;
1485		goto out_reset_adapter;
1486	}
1487
1488	INIT_LIST_HEAD(&adapter->cmd_pool);
1489	for (i = 0; i < adapter->req_depth; i++) {
1490		struct pvscsi_ctx *ctx = adapter->cmd_map + i;
1491		list_add(&ctx->list, &adapter->cmd_pool);
1492	}
1493
1494	error = pvscsi_allocate_sg(adapter);
1495	if (error) {
1496		printk(KERN_ERR "vmw_pvscsi: unable to allocate s/g table\n");
1497		goto out_reset_adapter;
1498	}
1499
1500	if (pvscsi_disable_msix)
1501		irq_flag &= ~PCI_IRQ_MSIX;
1502	if (pvscsi_disable_msi)
1503		irq_flag &= ~PCI_IRQ_MSI;
1504
1505	error = pci_alloc_irq_vectors(adapter->dev, 1, 1, irq_flag);
1506	if (error < 0)
1507		goto out_reset_adapter;
1508
1509	adapter->use_req_threshold = pvscsi_setup_req_threshold(adapter, true);
1510	printk(KERN_DEBUG "vmw_pvscsi: driver-based request coalescing %sabled\n",
1511	       adapter->use_req_threshold ? "en" : "dis");
1512
1513	if (adapter->dev->msix_enabled || adapter->dev->msi_enabled) {
1514		printk(KERN_INFO "vmw_pvscsi: using MSI%s\n",
1515			adapter->dev->msix_enabled ? "-X" : "");
1516		error = request_irq(pci_irq_vector(pdev, 0), pvscsi_isr,
1517				0, "vmw_pvscsi", adapter);
1518	} else {
1519		printk(KERN_INFO "vmw_pvscsi: using INTx\n");
1520		error = request_irq(pci_irq_vector(pdev, 0), pvscsi_shared_isr,
1521				IRQF_SHARED, "vmw_pvscsi", adapter);
1522	}
1523
1524	if (error) {
1525		printk(KERN_ERR
1526		       "vmw_pvscsi: unable to request IRQ: %d\n", error);
1527		goto out_reset_adapter;
1528	}
1529
1530	error = scsi_add_host(host, &pdev->dev);
1531	if (error) {
1532		printk(KERN_ERR
1533		       "vmw_pvscsi: scsi_add_host failed: %d\n", error);
1534		goto out_reset_adapter;
1535	}
1536
1537	dev_info(&pdev->dev, "VMware PVSCSI rev %d host #%u\n",
1538		 adapter->rev, host->host_no);
1539
1540	pvscsi_unmask_intr(adapter);
1541
1542	scsi_scan_host(host);
1543
1544	return 0;
1545
1546out_reset_adapter:
1547	ll_adapter_reset(adapter);
1548out_release_resources:
1549	pvscsi_shutdown_intr(adapter);
1550	pvscsi_release_resources(adapter);
1551	scsi_host_put(host);
1552out_disable_device:
1553	pci_disable_device(pdev);
1554
1555	return error;
1556
1557out_release_resources_and_disable:
1558	pvscsi_shutdown_intr(adapter);
1559	pvscsi_release_resources(adapter);
1560	goto out_disable_device;
1561}
1562
1563static void __pvscsi_shutdown(struct pvscsi_adapter *adapter)
1564{
1565	pvscsi_mask_intr(adapter);
1566
1567	if (adapter->workqueue)
1568		flush_workqueue(adapter->workqueue);
1569
1570	pvscsi_shutdown_intr(adapter);
1571
1572	pvscsi_process_request_ring(adapter);
1573	pvscsi_process_completion_ring(adapter);
1574	ll_adapter_reset(adapter);
1575}
1576
1577static void pvscsi_shutdown(struct pci_dev *dev)
1578{
1579	struct Scsi_Host *host = pci_get_drvdata(dev);
1580	struct pvscsi_adapter *adapter = shost_priv(host);
1581
1582	__pvscsi_shutdown(adapter);
1583}
1584
1585static void pvscsi_remove(struct pci_dev *pdev)
1586{
1587	struct Scsi_Host *host = pci_get_drvdata(pdev);
1588	struct pvscsi_adapter *adapter = shost_priv(host);
1589
1590	scsi_remove_host(host);
1591
1592	__pvscsi_shutdown(adapter);
1593	pvscsi_release_resources(adapter);
1594
1595	scsi_host_put(host);
1596
1597	pci_disable_device(pdev);
1598}
1599
1600static struct pci_driver pvscsi_pci_driver = {
1601	.name		= "vmw_pvscsi",
1602	.id_table	= pvscsi_pci_tbl,
1603	.probe		= pvscsi_probe,
1604	.remove		= pvscsi_remove,
1605	.shutdown       = pvscsi_shutdown,
1606};
1607
1608static int __init pvscsi_init(void)
1609{
1610	pr_info("%s - version %s\n",
1611		PVSCSI_LINUX_DRIVER_DESC, PVSCSI_DRIVER_VERSION_STRING);
1612	return pci_register_driver(&pvscsi_pci_driver);
1613}
1614
1615static void __exit pvscsi_exit(void)
1616{
1617	pci_unregister_driver(&pvscsi_pci_driver);
1618}
1619
1620module_init(pvscsi_init);
1621module_exit(pvscsi_exit);
v6.13.7
   1/*
   2 * Linux driver for VMware's para-virtualized SCSI HBA.
   3 *
   4 * Copyright (C) 2008-2014, VMware, Inc. All Rights Reserved.
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms of the GNU General Public License as published by the
   8 * Free Software Foundation; version 2 of the License and no later version.
   9 *
  10 * This program is distributed in the hope that it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  13 * NON INFRINGEMENT.  See the GNU General Public License for more
  14 * details.
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * along with this program; if not, write to the Free Software
  18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 *
  20 */
  21
  22#include <linux/kernel.h>
  23#include <linux/module.h>
  24#include <linux/interrupt.h>
  25#include <linux/slab.h>
  26#include <linux/workqueue.h>
  27#include <linux/pci.h>
  28
  29#include <scsi/scsi.h>
  30#include <scsi/scsi_host.h>
  31#include <scsi/scsi_cmnd.h>
  32#include <scsi/scsi_device.h>
  33#include <scsi/scsi_tcq.h>
  34
  35#include "vmw_pvscsi.h"
  36
  37#define PVSCSI_LINUX_DRIVER_DESC "VMware PVSCSI driver"
  38
  39MODULE_DESCRIPTION(PVSCSI_LINUX_DRIVER_DESC);
  40MODULE_AUTHOR("VMware, Inc.");
  41MODULE_LICENSE("GPL");
  42MODULE_VERSION(PVSCSI_DRIVER_VERSION_STRING);
  43
  44#define PVSCSI_DEFAULT_NUM_PAGES_PER_RING	8
  45#define PVSCSI_DEFAULT_NUM_PAGES_MSG_RING	1
  46#define PVSCSI_DEFAULT_QUEUE_DEPTH		254
  47#define SGL_SIZE				PAGE_SIZE
  48
  49struct pvscsi_sg_list {
  50	struct PVSCSISGElement sge[PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT];
  51};
  52
  53struct pvscsi_ctx {
  54	/*
  55	 * The index of the context in cmd_map serves as the context ID for a
  56	 * 1-to-1 mapping completions back to requests.
  57	 */
  58	struct scsi_cmnd	*cmd;
  59	struct pvscsi_sg_list	*sgl;
  60	struct list_head	list;
  61	dma_addr_t		dataPA;
  62	dma_addr_t		sensePA;
  63	dma_addr_t		sglPA;
  64	struct completion	*abort_cmp;
  65};
  66
  67struct pvscsi_adapter {
  68	char				*mmioBase;
  69	u8				rev;
  70	bool				use_msg;
  71	bool				use_req_threshold;
  72
  73	spinlock_t			hw_lock;
  74
  75	struct workqueue_struct		*workqueue;
  76	struct work_struct		work;
  77
  78	struct PVSCSIRingReqDesc	*req_ring;
  79	unsigned			req_pages;
  80	unsigned			req_depth;
  81	dma_addr_t			reqRingPA;
  82
  83	struct PVSCSIRingCmpDesc	*cmp_ring;
  84	unsigned			cmp_pages;
  85	dma_addr_t			cmpRingPA;
  86
  87	struct PVSCSIRingMsgDesc	*msg_ring;
  88	unsigned			msg_pages;
  89	dma_addr_t			msgRingPA;
  90
  91	struct PVSCSIRingsState		*rings_state;
  92	dma_addr_t			ringStatePA;
  93
  94	struct pci_dev			*dev;
  95	struct Scsi_Host		*host;
  96
  97	struct list_head		cmd_pool;
  98	struct pvscsi_ctx		*cmd_map;
  99};
 100
 101
 102/* Command line parameters */
 103static int pvscsi_ring_pages;
 104static int pvscsi_msg_ring_pages = PVSCSI_DEFAULT_NUM_PAGES_MSG_RING;
 105static int pvscsi_cmd_per_lun    = PVSCSI_DEFAULT_QUEUE_DEPTH;
 106static bool pvscsi_disable_msi;
 107static bool pvscsi_disable_msix;
 108static bool pvscsi_use_msg       = true;
 109static bool pvscsi_use_req_threshold = true;
 110
 111#define PVSCSI_RW (S_IRUSR | S_IWUSR)
 112
 113module_param_named(ring_pages, pvscsi_ring_pages, int, PVSCSI_RW);
 114MODULE_PARM_DESC(ring_pages, "Number of pages per req/cmp ring - (default="
 115		 __stringify(PVSCSI_DEFAULT_NUM_PAGES_PER_RING)
 116		 "[up to 16 targets],"
 117		 __stringify(PVSCSI_SETUP_RINGS_MAX_NUM_PAGES)
 118		 "[for 16+ targets])");
 119
 120module_param_named(msg_ring_pages, pvscsi_msg_ring_pages, int, PVSCSI_RW);
 121MODULE_PARM_DESC(msg_ring_pages, "Number of pages for the msg ring - (default="
 122		 __stringify(PVSCSI_DEFAULT_NUM_PAGES_MSG_RING) ")");
 123
 124module_param_named(cmd_per_lun, pvscsi_cmd_per_lun, int, PVSCSI_RW);
 125MODULE_PARM_DESC(cmd_per_lun, "Maximum commands per lun - (default="
 126		 __stringify(PVSCSI_DEFAULT_QUEUE_DEPTH) ")");
 127
 128module_param_named(disable_msi, pvscsi_disable_msi, bool, PVSCSI_RW);
 129MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)");
 130
 131module_param_named(disable_msix, pvscsi_disable_msix, bool, PVSCSI_RW);
 132MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default=0)");
 133
 134module_param_named(use_msg, pvscsi_use_msg, bool, PVSCSI_RW);
 135MODULE_PARM_DESC(use_msg, "Use msg ring when available - (default=1)");
 136
 137module_param_named(use_req_threshold, pvscsi_use_req_threshold,
 138		   bool, PVSCSI_RW);
 139MODULE_PARM_DESC(use_req_threshold, "Use driver-based request coalescing if configured - (default=1)");
 140
 141static const struct pci_device_id pvscsi_pci_tbl[] = {
 142	{ PCI_VDEVICE(VMWARE, PCI_DEVICE_ID_VMWARE_PVSCSI) },
 143	{ 0 }
 144};
 145
 146MODULE_DEVICE_TABLE(pci, pvscsi_pci_tbl);
 147
 148static struct device *
 149pvscsi_dev(const struct pvscsi_adapter *adapter)
 150{
 151	return &(adapter->dev->dev);
 152}
 153
 154static struct pvscsi_ctx *
 155pvscsi_find_context(const struct pvscsi_adapter *adapter, struct scsi_cmnd *cmd)
 156{
 157	struct pvscsi_ctx *ctx, *end;
 158
 159	end = &adapter->cmd_map[adapter->req_depth];
 160	for (ctx = adapter->cmd_map; ctx < end; ctx++)
 161		if (ctx->cmd == cmd)
 162			return ctx;
 163
 164	return NULL;
 165}
 166
 167static struct pvscsi_ctx *
 168pvscsi_acquire_context(struct pvscsi_adapter *adapter, struct scsi_cmnd *cmd)
 169{
 170	struct pvscsi_ctx *ctx;
 171
 172	if (list_empty(&adapter->cmd_pool))
 173		return NULL;
 174
 175	ctx = list_first_entry(&adapter->cmd_pool, struct pvscsi_ctx, list);
 176	ctx->cmd = cmd;
 177	list_del(&ctx->list);
 178
 179	return ctx;
 180}
 181
 182static void pvscsi_release_context(struct pvscsi_adapter *adapter,
 183				   struct pvscsi_ctx *ctx)
 184{
 185	ctx->cmd = NULL;
 186	ctx->abort_cmp = NULL;
 187	list_add(&ctx->list, &adapter->cmd_pool);
 188}
 189
 190/*
 191 * Map a pvscsi_ctx struct to a context ID field value; we map to a simple
 192 * non-zero integer. ctx always points to an entry in cmd_map array, hence
 193 * the return value is always >=1.
 194 */
 195static u64 pvscsi_map_context(const struct pvscsi_adapter *adapter,
 196			      const struct pvscsi_ctx *ctx)
 197{
 198	return ctx - adapter->cmd_map + 1;
 199}
 200
 201static struct pvscsi_ctx *
 202pvscsi_get_context(const struct pvscsi_adapter *adapter, u64 context)
 203{
 204	return &adapter->cmd_map[context - 1];
 205}
 206
 207static void pvscsi_reg_write(const struct pvscsi_adapter *adapter,
 208			     u32 offset, u32 val)
 209{
 210	writel(val, adapter->mmioBase + offset);
 211}
 212
 213static u32 pvscsi_reg_read(const struct pvscsi_adapter *adapter, u32 offset)
 214{
 215	return readl(adapter->mmioBase + offset);
 216}
 217
 218static u32 pvscsi_read_intr_status(const struct pvscsi_adapter *adapter)
 219{
 220	return pvscsi_reg_read(adapter, PVSCSI_REG_OFFSET_INTR_STATUS);
 221}
 222
 223static void pvscsi_write_intr_status(const struct pvscsi_adapter *adapter,
 224				     u32 val)
 225{
 226	pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_INTR_STATUS, val);
 227}
 228
 229static void pvscsi_unmask_intr(const struct pvscsi_adapter *adapter)
 230{
 231	u32 intr_bits;
 232
 233	intr_bits = PVSCSI_INTR_CMPL_MASK;
 234	if (adapter->use_msg)
 235		intr_bits |= PVSCSI_INTR_MSG_MASK;
 236
 237	pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_INTR_MASK, intr_bits);
 238}
 239
 240static void pvscsi_mask_intr(const struct pvscsi_adapter *adapter)
 241{
 242	pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_INTR_MASK, 0);
 243}
 244
 245static void pvscsi_write_cmd_desc(const struct pvscsi_adapter *adapter,
 246				  u32 cmd, const void *desc, size_t len)
 247{
 248	const u32 *ptr = desc;
 249	size_t i;
 250
 251	len /= sizeof(*ptr);
 252	pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_COMMAND, cmd);
 253	for (i = 0; i < len; i++)
 254		pvscsi_reg_write(adapter,
 255				 PVSCSI_REG_OFFSET_COMMAND_DATA, ptr[i]);
 256}
 257
 258static void pvscsi_abort_cmd(const struct pvscsi_adapter *adapter,
 259			     const struct pvscsi_ctx *ctx)
 260{
 261	struct PVSCSICmdDescAbortCmd cmd = { 0 };
 262
 263	cmd.target = ctx->cmd->device->id;
 264	cmd.context = pvscsi_map_context(adapter, ctx);
 265
 266	pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_ABORT_CMD, &cmd, sizeof(cmd));
 267}
 268
 269static void pvscsi_kick_rw_io(const struct pvscsi_adapter *adapter)
 270{
 271	pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_KICK_RW_IO, 0);
 272}
 273
 274static void pvscsi_process_request_ring(const struct pvscsi_adapter *adapter)
 275{
 276	pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_KICK_NON_RW_IO, 0);
 277}
 278
 279static int scsi_is_rw(unsigned char op)
 280{
 281	return op == READ_6  || op == WRITE_6 ||
 282	       op == READ_10 || op == WRITE_10 ||
 283	       op == READ_12 || op == WRITE_12 ||
 284	       op == READ_16 || op == WRITE_16;
 285}
 286
 287static void pvscsi_kick_io(const struct pvscsi_adapter *adapter,
 288			   unsigned char op)
 289{
 290	if (scsi_is_rw(op)) {
 291		struct PVSCSIRingsState *s = adapter->rings_state;
 292
 293		if (!adapter->use_req_threshold ||
 294		    s->reqProdIdx - s->reqConsIdx >= s->reqCallThreshold)
 295			pvscsi_kick_rw_io(adapter);
 296	} else {
 297		pvscsi_process_request_ring(adapter);
 298	}
 299}
 300
 301static void ll_adapter_reset(const struct pvscsi_adapter *adapter)
 302{
 303	dev_dbg(pvscsi_dev(adapter), "Adapter Reset on %p\n", adapter);
 304
 305	pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_ADAPTER_RESET, NULL, 0);
 306}
 307
 308static void ll_bus_reset(const struct pvscsi_adapter *adapter)
 309{
 310	dev_dbg(pvscsi_dev(adapter), "Resetting bus on %p\n", adapter);
 311
 312	pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_RESET_BUS, NULL, 0);
 313}
 314
 315static void ll_device_reset(const struct pvscsi_adapter *adapter, u32 target)
 316{
 317	struct PVSCSICmdDescResetDevice cmd = { 0 };
 318
 319	dev_dbg(pvscsi_dev(adapter), "Resetting device: target=%u\n", target);
 320
 321	cmd.target = target;
 322
 323	pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_RESET_DEVICE,
 324			      &cmd, sizeof(cmd));
 325}
 326
 327static void pvscsi_create_sg(struct pvscsi_ctx *ctx,
 328			     struct scatterlist *sg, unsigned count)
 329{
 330	unsigned i;
 331	struct PVSCSISGElement *sge;
 332
 333	BUG_ON(count > PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT);
 334
 335	sge = &ctx->sgl->sge[0];
 336	for (i = 0; i < count; i++, sg = sg_next(sg)) {
 337		sge[i].addr   = sg_dma_address(sg);
 338		sge[i].length = sg_dma_len(sg);
 339		sge[i].flags  = 0;
 340	}
 341}
 342
 343/*
 344 * Map all data buffers for a command into PCI space and
 345 * setup the scatter/gather list if needed.
 346 */
 347static int pvscsi_map_buffers(struct pvscsi_adapter *adapter,
 348			      struct pvscsi_ctx *ctx, struct scsi_cmnd *cmd,
 349			      struct PVSCSIRingReqDesc *e)
 350{
 351	unsigned count;
 352	unsigned bufflen = scsi_bufflen(cmd);
 353	struct scatterlist *sg;
 354
 355	e->dataLen = bufflen;
 356	e->dataAddr = 0;
 357	if (bufflen == 0)
 358		return 0;
 359
 360	sg = scsi_sglist(cmd);
 361	count = scsi_sg_count(cmd);
 362	if (count != 0) {
 363		int segs = scsi_dma_map(cmd);
 364
 365		if (segs == -ENOMEM) {
 366			scmd_printk(KERN_DEBUG, cmd,
 367				    "vmw_pvscsi: Failed to map cmd sglist for DMA.\n");
 368			return -ENOMEM;
 369		} else if (segs > 1) {
 370			pvscsi_create_sg(ctx, sg, segs);
 371
 372			e->flags |= PVSCSI_FLAG_CMD_WITH_SG_LIST;
 373			ctx->sglPA = dma_map_single(&adapter->dev->dev,
 374					ctx->sgl, SGL_SIZE, DMA_TO_DEVICE);
 375			if (dma_mapping_error(&adapter->dev->dev, ctx->sglPA)) {
 376				scmd_printk(KERN_ERR, cmd,
 377					    "vmw_pvscsi: Failed to map ctx sglist for DMA.\n");
 378				scsi_dma_unmap(cmd);
 379				ctx->sglPA = 0;
 380				return -ENOMEM;
 381			}
 382			e->dataAddr = ctx->sglPA;
 383		} else
 384			e->dataAddr = sg_dma_address(sg);
 385	} else {
 386		/*
 387		 * In case there is no S/G list, scsi_sglist points
 388		 * directly to the buffer.
 389		 */
 390		ctx->dataPA = dma_map_single(&adapter->dev->dev, sg, bufflen,
 391					     cmd->sc_data_direction);
 392		if (dma_mapping_error(&adapter->dev->dev, ctx->dataPA)) {
 393			scmd_printk(KERN_DEBUG, cmd,
 394				    "vmw_pvscsi: Failed to map direct data buffer for DMA.\n");
 395			return -ENOMEM;
 396		}
 397		e->dataAddr = ctx->dataPA;
 398	}
 399
 400	return 0;
 401}
 402
 403/*
 404 * The device incorrectly doesn't clear the first byte of the sense
 405 * buffer in some cases. We have to do it ourselves.
 406 * Otherwise we run into trouble when SWIOTLB is forced.
 407 */
 408static void pvscsi_patch_sense(struct scsi_cmnd *cmd)
 409{
 410	if (cmd->sense_buffer)
 411		cmd->sense_buffer[0] = 0;
 412}
 413
 414static void pvscsi_unmap_buffers(const struct pvscsi_adapter *adapter,
 415				 struct pvscsi_ctx *ctx)
 416{
 417	struct scsi_cmnd *cmd;
 418	unsigned bufflen;
 419
 420	cmd = ctx->cmd;
 421	bufflen = scsi_bufflen(cmd);
 422
 423	if (bufflen != 0) {
 424		unsigned count = scsi_sg_count(cmd);
 425
 426		if (count != 0) {
 427			scsi_dma_unmap(cmd);
 428			if (ctx->sglPA) {
 429				dma_unmap_single(&adapter->dev->dev, ctx->sglPA,
 430						 SGL_SIZE, DMA_TO_DEVICE);
 431				ctx->sglPA = 0;
 432			}
 433		} else
 434			dma_unmap_single(&adapter->dev->dev, ctx->dataPA,
 435					 bufflen, cmd->sc_data_direction);
 436	}
 437	if (cmd->sense_buffer)
 438		dma_unmap_single(&adapter->dev->dev, ctx->sensePA,
 439				 SCSI_SENSE_BUFFERSIZE, DMA_FROM_DEVICE);
 440}
 441
 442static int pvscsi_allocate_rings(struct pvscsi_adapter *adapter)
 443{
 444	adapter->rings_state = dma_alloc_coherent(&adapter->dev->dev, PAGE_SIZE,
 445			&adapter->ringStatePA, GFP_KERNEL);
 446	if (!adapter->rings_state)
 447		return -ENOMEM;
 448
 449	adapter->req_pages = min(PVSCSI_MAX_NUM_PAGES_REQ_RING,
 450				 pvscsi_ring_pages);
 451	adapter->req_depth = adapter->req_pages
 452					* PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
 453	adapter->req_ring = dma_alloc_coherent(&adapter->dev->dev,
 454			adapter->req_pages * PAGE_SIZE, &adapter->reqRingPA,
 455			GFP_KERNEL);
 456	if (!adapter->req_ring)
 457		return -ENOMEM;
 458
 459	adapter->cmp_pages = min(PVSCSI_MAX_NUM_PAGES_CMP_RING,
 460				 pvscsi_ring_pages);
 461	adapter->cmp_ring = dma_alloc_coherent(&adapter->dev->dev,
 462			adapter->cmp_pages * PAGE_SIZE, &adapter->cmpRingPA,
 463			GFP_KERNEL);
 464	if (!adapter->cmp_ring)
 465		return -ENOMEM;
 466
 467	BUG_ON(!IS_ALIGNED(adapter->ringStatePA, PAGE_SIZE));
 468	BUG_ON(!IS_ALIGNED(adapter->reqRingPA, PAGE_SIZE));
 469	BUG_ON(!IS_ALIGNED(adapter->cmpRingPA, PAGE_SIZE));
 470
 471	if (!adapter->use_msg)
 472		return 0;
 473
 474	adapter->msg_pages = min(PVSCSI_MAX_NUM_PAGES_MSG_RING,
 475				 pvscsi_msg_ring_pages);
 476	adapter->msg_ring = dma_alloc_coherent(&adapter->dev->dev,
 477			adapter->msg_pages * PAGE_SIZE, &adapter->msgRingPA,
 478			GFP_KERNEL);
 479	if (!adapter->msg_ring)
 480		return -ENOMEM;
 481	BUG_ON(!IS_ALIGNED(adapter->msgRingPA, PAGE_SIZE));
 482
 483	return 0;
 484}
 485
 486static void pvscsi_setup_all_rings(const struct pvscsi_adapter *adapter)
 487{
 488	struct PVSCSICmdDescSetupRings cmd = { 0 };
 489	dma_addr_t base;
 490	unsigned i;
 491
 492	cmd.ringsStatePPN   = adapter->ringStatePA >> PAGE_SHIFT;
 493	cmd.reqRingNumPages = adapter->req_pages;
 494	cmd.cmpRingNumPages = adapter->cmp_pages;
 495
 496	base = adapter->reqRingPA;
 497	for (i = 0; i < adapter->req_pages; i++) {
 498		cmd.reqRingPPNs[i] = base >> PAGE_SHIFT;
 499		base += PAGE_SIZE;
 500	}
 501
 502	base = adapter->cmpRingPA;
 503	for (i = 0; i < adapter->cmp_pages; i++) {
 504		cmd.cmpRingPPNs[i] = base >> PAGE_SHIFT;
 505		base += PAGE_SIZE;
 506	}
 507
 508	memset(adapter->rings_state, 0, PAGE_SIZE);
 509	memset(adapter->req_ring, 0, adapter->req_pages * PAGE_SIZE);
 510	memset(adapter->cmp_ring, 0, adapter->cmp_pages * PAGE_SIZE);
 511
 512	pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_SETUP_RINGS,
 513			      &cmd, sizeof(cmd));
 514
 515	if (adapter->use_msg) {
 516		struct PVSCSICmdDescSetupMsgRing cmd_msg = { 0 };
 517
 518		cmd_msg.numPages = adapter->msg_pages;
 519
 520		base = adapter->msgRingPA;
 521		for (i = 0; i < adapter->msg_pages; i++) {
 522			cmd_msg.ringPPNs[i] = base >> PAGE_SHIFT;
 523			base += PAGE_SIZE;
 524		}
 525		memset(adapter->msg_ring, 0, adapter->msg_pages * PAGE_SIZE);
 526
 527		pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_SETUP_MSG_RING,
 528				      &cmd_msg, sizeof(cmd_msg));
 529	}
 530}
 531
 532static int pvscsi_change_queue_depth(struct scsi_device *sdev, int qdepth)
 533{
 534	if (!sdev->tagged_supported)
 535		qdepth = 1;
 536	return scsi_change_queue_depth(sdev, qdepth);
 537}
 538
 539/*
 540 * Pull a completion descriptor off and pass the completion back
 541 * to the SCSI mid layer.
 542 */
 543static void pvscsi_complete_request(struct pvscsi_adapter *adapter,
 544				    const struct PVSCSIRingCmpDesc *e)
 545{
 546	struct pvscsi_ctx *ctx;
 547	struct scsi_cmnd *cmd;
 548	struct completion *abort_cmp;
 549	u32 btstat = e->hostStatus;
 550	u32 sdstat = e->scsiStatus;
 551
 552	ctx = pvscsi_get_context(adapter, e->context);
 553	cmd = ctx->cmd;
 554	abort_cmp = ctx->abort_cmp;
 555	pvscsi_unmap_buffers(adapter, ctx);
 556	if (sdstat != SAM_STAT_CHECK_CONDITION)
 557		pvscsi_patch_sense(cmd);
 558	pvscsi_release_context(adapter, ctx);
 559	if (abort_cmp) {
 560		/*
 561		 * The command was requested to be aborted. Just signal that
 562		 * the request completed and swallow the actual cmd completion
 563		 * here. The abort handler will post a completion for this
 564		 * command indicating that it got successfully aborted.
 565		 */
 566		complete(abort_cmp);
 567		return;
 568	}
 569
 570	cmd->result = 0;
 571	if (sdstat != SAM_STAT_GOOD &&
 572	    (btstat == BTSTAT_SUCCESS ||
 573	     btstat == BTSTAT_LINKED_COMMAND_COMPLETED ||
 574	     btstat == BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG)) {
 575		if (sdstat == SAM_STAT_COMMAND_TERMINATED) {
 576			cmd->result = (DID_RESET << 16);
 577		} else {
 578			cmd->result = (DID_OK << 16) | sdstat;
 579		}
 580	} else
 581		switch (btstat) {
 582		case BTSTAT_SUCCESS:
 583		case BTSTAT_LINKED_COMMAND_COMPLETED:
 584		case BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG:
 585			/*
 586			 * Commands like INQUIRY may transfer less data than
 587			 * requested by the initiator via bufflen. Set residual
 588			 * count to make upper layer aware of the actual amount
 589			 * of data returned. There are cases when controller
 590			 * returns zero dataLen with non zero data - do not set
 591			 * residual count in that case.
 592			 */
 593			if (e->dataLen && (e->dataLen < scsi_bufflen(cmd)))
 594				scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen);
 595			cmd->result = (DID_OK << 16);
 596			break;
 597
 598		case BTSTAT_DATARUN:
 599		case BTSTAT_DATA_UNDERRUN:
 600			/* Report residual data in underruns */
 601			scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen);
 602			cmd->result = (DID_ERROR << 16);
 603			break;
 604
 605		case BTSTAT_SELTIMEO:
 606			/* Our emulation returns this for non-connected devs */
 607			cmd->result = (DID_BAD_TARGET << 16);
 608			break;
 609
 610		case BTSTAT_LUNMISMATCH:
 611		case BTSTAT_TAGREJECT:
 612		case BTSTAT_BADMSG:
 613		case BTSTAT_HAHARDWARE:
 614		case BTSTAT_INVPHASE:
 615		case BTSTAT_HATIMEOUT:
 616		case BTSTAT_NORESPONSE:
 617		case BTSTAT_DISCONNECT:
 618		case BTSTAT_HASOFTWARE:
 619		case BTSTAT_BUSFREE:
 620		case BTSTAT_SENSFAILED:
 621			cmd->result |= (DID_ERROR << 16);
 622			break;
 623
 624		case BTSTAT_SENTRST:
 625		case BTSTAT_RECVRST:
 626		case BTSTAT_BUSRESET:
 627			cmd->result = (DID_RESET << 16);
 628			break;
 629
 630		case BTSTAT_ABORTQUEUE:
 631			cmd->result = (DID_BUS_BUSY << 16);
 632			break;
 633
 634		case BTSTAT_SCSIPARITY:
 635			cmd->result = (DID_PARITY << 16);
 636			break;
 637
 638		default:
 639			cmd->result = (DID_ERROR << 16);
 640			scmd_printk(KERN_DEBUG, cmd,
 641				    "Unknown completion status: 0x%x\n",
 642				    btstat);
 643	}
 644
 645	dev_dbg(&cmd->device->sdev_gendev,
 646		"cmd=%p %x ctx=%p result=0x%x status=0x%x,%x\n",
 647		cmd, cmd->cmnd[0], ctx, cmd->result, btstat, sdstat);
 648
 649	scsi_done(cmd);
 650}
 651
 652/*
 653 * barrier usage : Since the PVSCSI device is emulated, there could be cases
 654 * where we may want to serialize some accesses between the driver and the
 655 * emulation layer. We use compiler barriers instead of the more expensive
 656 * memory barriers because PVSCSI is only supported on X86 which has strong
 657 * memory access ordering.
 658 */
 659static void pvscsi_process_completion_ring(struct pvscsi_adapter *adapter)
 660{
 661	struct PVSCSIRingsState *s = adapter->rings_state;
 662	struct PVSCSIRingCmpDesc *ring = adapter->cmp_ring;
 663	u32 cmp_entries = s->cmpNumEntriesLog2;
 664
 665	while (s->cmpConsIdx != s->cmpProdIdx) {
 666		struct PVSCSIRingCmpDesc *e = ring + (s->cmpConsIdx &
 667						      MASK(cmp_entries));
 668		/*
 669		 * This barrier() ensures that *e is not dereferenced while
 670		 * the device emulation still writes data into the slot.
 671		 * Since the device emulation advances s->cmpProdIdx only after
 672		 * updating the slot we want to check it first.
 673		 */
 674		barrier();
 675		pvscsi_complete_request(adapter, e);
 676		/*
 677		 * This barrier() ensures that compiler doesn't reorder write
 678		 * to s->cmpConsIdx before the read of (*e) inside
 679		 * pvscsi_complete_request. Otherwise, device emulation may
 680		 * overwrite *e before we had a chance to read it.
 681		 */
 682		barrier();
 683		s->cmpConsIdx++;
 684	}
 685}
 686
 687/*
 688 * Translate a Linux SCSI request into a request ring entry.
 689 */
 690static int pvscsi_queue_ring(struct pvscsi_adapter *adapter,
 691			     struct pvscsi_ctx *ctx, struct scsi_cmnd *cmd)
 692{
 693	struct PVSCSIRingsState *s;
 694	struct PVSCSIRingReqDesc *e;
 695	struct scsi_device *sdev;
 696	u32 req_entries;
 697
 698	s = adapter->rings_state;
 699	sdev = cmd->device;
 700	req_entries = s->reqNumEntriesLog2;
 701
 702	/*
 703	 * If this condition holds, we might have room on the request ring, but
 704	 * we might not have room on the completion ring for the response.
 705	 * However, we have already ruled out this possibility - we would not
 706	 * have successfully allocated a context if it were true, since we only
 707	 * have one context per request entry.  Check for it anyway, since it
 708	 * would be a serious bug.
 709	 */
 710	if (s->reqProdIdx - s->cmpConsIdx >= 1 << req_entries) {
 711		scmd_printk(KERN_ERR, cmd, "vmw_pvscsi: "
 712			    "ring full: reqProdIdx=%d cmpConsIdx=%d\n",
 713			    s->reqProdIdx, s->cmpConsIdx);
 714		return -1;
 715	}
 716
 717	e = adapter->req_ring + (s->reqProdIdx & MASK(req_entries));
 718
 719	e->bus    = sdev->channel;
 720	e->target = sdev->id;
 721	memset(e->lun, 0, sizeof(e->lun));
 722	e->lun[1] = sdev->lun;
 723
 724	if (cmd->sense_buffer) {
 725		ctx->sensePA = dma_map_single(&adapter->dev->dev,
 726				cmd->sense_buffer, SCSI_SENSE_BUFFERSIZE,
 727				DMA_FROM_DEVICE);
 728		if (dma_mapping_error(&adapter->dev->dev, ctx->sensePA)) {
 729			scmd_printk(KERN_DEBUG, cmd,
 730				    "vmw_pvscsi: Failed to map sense buffer for DMA.\n");
 731			ctx->sensePA = 0;
 732			return -ENOMEM;
 733		}
 734		e->senseAddr = ctx->sensePA;
 735		e->senseLen = SCSI_SENSE_BUFFERSIZE;
 736	} else {
 737		e->senseLen  = 0;
 738		e->senseAddr = 0;
 739	}
 740	e->cdbLen   = cmd->cmd_len;
 741	e->vcpuHint = smp_processor_id();
 742	memcpy(e->cdb, cmd->cmnd, e->cdbLen);
 743
 744	e->tag = SIMPLE_QUEUE_TAG;
 745
 746	if (cmd->sc_data_direction == DMA_FROM_DEVICE)
 747		e->flags = PVSCSI_FLAG_CMD_DIR_TOHOST;
 748	else if (cmd->sc_data_direction == DMA_TO_DEVICE)
 749		e->flags = PVSCSI_FLAG_CMD_DIR_TODEVICE;
 750	else if (cmd->sc_data_direction == DMA_NONE)
 751		e->flags = PVSCSI_FLAG_CMD_DIR_NONE;
 752	else
 753		e->flags = 0;
 754
 755	if (pvscsi_map_buffers(adapter, ctx, cmd, e) != 0) {
 756		if (cmd->sense_buffer) {
 757			dma_unmap_single(&adapter->dev->dev, ctx->sensePA,
 758					 SCSI_SENSE_BUFFERSIZE,
 759					 DMA_FROM_DEVICE);
 760			ctx->sensePA = 0;
 761		}
 762		return -ENOMEM;
 763	}
 764
 765	e->context = pvscsi_map_context(adapter, ctx);
 766
 767	barrier();
 768
 769	s->reqProdIdx++;
 770
 771	return 0;
 772}
 773
 774static int pvscsi_queue_lck(struct scsi_cmnd *cmd)
 775{
 776	struct Scsi_Host *host = cmd->device->host;
 777	struct pvscsi_adapter *adapter = shost_priv(host);
 778	struct pvscsi_ctx *ctx;
 779	unsigned long flags;
 780	unsigned char op;
 781
 782	spin_lock_irqsave(&adapter->hw_lock, flags);
 783
 784	ctx = pvscsi_acquire_context(adapter, cmd);
 785	if (!ctx || pvscsi_queue_ring(adapter, ctx, cmd) != 0) {
 786		if (ctx)
 787			pvscsi_release_context(adapter, ctx);
 788		spin_unlock_irqrestore(&adapter->hw_lock, flags);
 789		return SCSI_MLQUEUE_HOST_BUSY;
 790	}
 791
 792	op = cmd->cmnd[0];
 793
 794	dev_dbg(&cmd->device->sdev_gendev,
 795		"queued cmd %p, ctx %p, op=%x\n", cmd, ctx, op);
 796
 797	spin_unlock_irqrestore(&adapter->hw_lock, flags);
 798
 799	pvscsi_kick_io(adapter, op);
 800
 801	return 0;
 802}
 803
 804static DEF_SCSI_QCMD(pvscsi_queue)
 805
 806static int pvscsi_abort(struct scsi_cmnd *cmd)
 807{
 808	struct pvscsi_adapter *adapter = shost_priv(cmd->device->host);
 809	struct pvscsi_ctx *ctx;
 810	unsigned long flags;
 811	int result = SUCCESS;
 812	DECLARE_COMPLETION_ONSTACK(abort_cmp);
 813	int done;
 814
 815	scmd_printk(KERN_DEBUG, cmd, "task abort on host %u, %p\n",
 816		    adapter->host->host_no, cmd);
 817
 818	spin_lock_irqsave(&adapter->hw_lock, flags);
 819
 820	/*
 821	 * Poll the completion ring first - we might be trying to abort
 822	 * a command that is waiting to be dispatched in the completion ring.
 823	 */
 824	pvscsi_process_completion_ring(adapter);
 825
 826	/*
 827	 * If there is no context for the command, it either already succeeded
 828	 * or else was never properly issued.  Not our problem.
 829	 */
 830	ctx = pvscsi_find_context(adapter, cmd);
 831	if (!ctx) {
 832		scmd_printk(KERN_DEBUG, cmd, "Failed to abort cmd %p\n", cmd);
 833		goto out;
 834	}
 835
 836	/*
 837	 * Mark that the command has been requested to be aborted and issue
 838	 * the abort.
 839	 */
 840	ctx->abort_cmp = &abort_cmp;
 841
 842	pvscsi_abort_cmd(adapter, ctx);
 843	spin_unlock_irqrestore(&adapter->hw_lock, flags);
 844	/* Wait for 2 secs for the completion. */
 845	done = wait_for_completion_timeout(&abort_cmp, msecs_to_jiffies(2000));
 846	spin_lock_irqsave(&adapter->hw_lock, flags);
 847
 848	if (!done) {
 849		/*
 850		 * Failed to abort the command, unmark the fact that it
 851		 * was requested to be aborted.
 852		 */
 853		ctx->abort_cmp = NULL;
 854		result = FAILED;
 855		scmd_printk(KERN_DEBUG, cmd,
 856			    "Failed to get completion for aborted cmd %p\n",
 857			    cmd);
 858		goto out;
 859	}
 860
 861	/*
 862	 * Successfully aborted the command.
 863	 */
 864	cmd->result = (DID_ABORT << 16);
 865	scsi_done(cmd);
 866
 867out:
 868	spin_unlock_irqrestore(&adapter->hw_lock, flags);
 869	return result;
 870}
 871
 872/*
 873 * Abort all outstanding requests.  This is only safe to use if the completion
 874 * ring will never be walked again or the device has been reset, because it
 875 * destroys the 1-1 mapping between context field passed to emulation and our
 876 * request structure.
 877 */
 878static void pvscsi_reset_all(struct pvscsi_adapter *adapter)
 879{
 880	unsigned i;
 881
 882	for (i = 0; i < adapter->req_depth; i++) {
 883		struct pvscsi_ctx *ctx = &adapter->cmd_map[i];
 884		struct scsi_cmnd *cmd = ctx->cmd;
 885		if (cmd) {
 886			scmd_printk(KERN_ERR, cmd,
 887				    "Forced reset on cmd %p\n", cmd);
 888			pvscsi_unmap_buffers(adapter, ctx);
 889			pvscsi_patch_sense(cmd);
 890			pvscsi_release_context(adapter, ctx);
 891			cmd->result = (DID_RESET << 16);
 892			scsi_done(cmd);
 893		}
 894	}
 895}
 896
 897static int pvscsi_host_reset(struct scsi_cmnd *cmd)
 898{
 899	struct Scsi_Host *host = cmd->device->host;
 900	struct pvscsi_adapter *adapter = shost_priv(host);
 901	unsigned long flags;
 902	bool use_msg;
 903
 904	scmd_printk(KERN_INFO, cmd, "SCSI Host reset\n");
 905
 906	spin_lock_irqsave(&adapter->hw_lock, flags);
 907
 908	use_msg = adapter->use_msg;
 909
 910	if (use_msg) {
 911		adapter->use_msg = false;
 912		spin_unlock_irqrestore(&adapter->hw_lock, flags);
 913
 914		/*
 915		 * Now that we know that the ISR won't add more work on the
 916		 * workqueue we can safely flush any outstanding work.
 917		 */
 918		flush_workqueue(adapter->workqueue);
 919		spin_lock_irqsave(&adapter->hw_lock, flags);
 920	}
 921
 922	/*
 923	 * We're going to tear down the entire ring structure and set it back
 924	 * up, so stalling new requests until all completions are flushed and
 925	 * the rings are back in place.
 926	 */
 927
 928	pvscsi_process_request_ring(adapter);
 929
 930	ll_adapter_reset(adapter);
 931
 932	/*
 933	 * Now process any completions.  Note we do this AFTER adapter reset,
 934	 * which is strange, but stops races where completions get posted
 935	 * between processing the ring and issuing the reset.  The backend will
 936	 * not touch the ring memory after reset, so the immediately pre-reset
 937	 * completion ring state is still valid.
 938	 */
 939	pvscsi_process_completion_ring(adapter);
 940
 941	pvscsi_reset_all(adapter);
 942	adapter->use_msg = use_msg;
 943	pvscsi_setup_all_rings(adapter);
 944	pvscsi_unmask_intr(adapter);
 945
 946	spin_unlock_irqrestore(&adapter->hw_lock, flags);
 947
 948	return SUCCESS;
 949}
 950
 951static int pvscsi_bus_reset(struct scsi_cmnd *cmd)
 952{
 953	struct Scsi_Host *host = cmd->device->host;
 954	struct pvscsi_adapter *adapter = shost_priv(host);
 955	unsigned long flags;
 956
 957	scmd_printk(KERN_INFO, cmd, "SCSI Bus reset\n");
 958
 959	/*
 960	 * We don't want to queue new requests for this bus after
 961	 * flushing all pending requests to emulation, since new
 962	 * requests could then sneak in during this bus reset phase,
 963	 * so take the lock now.
 964	 */
 965	spin_lock_irqsave(&adapter->hw_lock, flags);
 966
 967	pvscsi_process_request_ring(adapter);
 968	ll_bus_reset(adapter);
 969	pvscsi_process_completion_ring(adapter);
 970
 971	spin_unlock_irqrestore(&adapter->hw_lock, flags);
 972
 973	return SUCCESS;
 974}
 975
 976static int pvscsi_device_reset(struct scsi_cmnd *cmd)
 977{
 978	struct Scsi_Host *host = cmd->device->host;
 979	struct pvscsi_adapter *adapter = shost_priv(host);
 980	unsigned long flags;
 981
 982	scmd_printk(KERN_INFO, cmd, "SCSI device reset on scsi%u:%u\n",
 983		    host->host_no, cmd->device->id);
 984
 985	/*
 986	 * We don't want to queue new requests for this device after flushing
 987	 * all pending requests to emulation, since new requests could then
 988	 * sneak in during this device reset phase, so take the lock now.
 989	 */
 990	spin_lock_irqsave(&adapter->hw_lock, flags);
 991
 992	pvscsi_process_request_ring(adapter);
 993	ll_device_reset(adapter, cmd->device->id);
 994	pvscsi_process_completion_ring(adapter);
 995
 996	spin_unlock_irqrestore(&adapter->hw_lock, flags);
 997
 998	return SUCCESS;
 999}
1000
1001static struct scsi_host_template pvscsi_template;
1002
1003static const char *pvscsi_info(struct Scsi_Host *host)
1004{
1005	struct pvscsi_adapter *adapter = shost_priv(host);
1006	static char buf[256];
1007
1008	sprintf(buf, "VMware PVSCSI storage adapter rev %d, req/cmp/msg rings: "
1009		"%u/%u/%u pages, cmd_per_lun=%u", adapter->rev,
1010		adapter->req_pages, adapter->cmp_pages, adapter->msg_pages,
1011		pvscsi_template.cmd_per_lun);
1012
1013	return buf;
1014}
1015
1016static struct scsi_host_template pvscsi_template = {
1017	.module				= THIS_MODULE,
1018	.name				= "VMware PVSCSI Host Adapter",
1019	.proc_name			= "vmw_pvscsi",
1020	.info				= pvscsi_info,
1021	.queuecommand			= pvscsi_queue,
1022	.this_id			= -1,
1023	.sg_tablesize			= PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT,
1024	.dma_boundary			= UINT_MAX,
1025	.max_sectors			= 0xffff,
1026	.change_queue_depth		= pvscsi_change_queue_depth,
1027	.eh_abort_handler		= pvscsi_abort,
1028	.eh_device_reset_handler	= pvscsi_device_reset,
1029	.eh_bus_reset_handler		= pvscsi_bus_reset,
1030	.eh_host_reset_handler		= pvscsi_host_reset,
1031};
1032
1033static void pvscsi_process_msg(const struct pvscsi_adapter *adapter,
1034			       const struct PVSCSIRingMsgDesc *e)
1035{
1036	struct PVSCSIRingsState *s = adapter->rings_state;
1037	struct Scsi_Host *host = adapter->host;
1038	struct scsi_device *sdev;
1039
1040	printk(KERN_INFO "vmw_pvscsi: msg type: 0x%x - MSG RING: %u/%u (%u) \n",
1041	       e->type, s->msgProdIdx, s->msgConsIdx, s->msgNumEntriesLog2);
1042
1043	BUILD_BUG_ON(PVSCSI_MSG_LAST != 2);
1044
1045	if (e->type == PVSCSI_MSG_DEV_ADDED) {
1046		struct PVSCSIMsgDescDevStatusChanged *desc;
1047		desc = (struct PVSCSIMsgDescDevStatusChanged *)e;
1048
1049		printk(KERN_INFO
1050		       "vmw_pvscsi: msg: device added at scsi%u:%u:%u\n",
1051		       desc->bus, desc->target, desc->lun[1]);
1052
1053		if (!scsi_host_get(host))
1054			return;
1055
1056		sdev = scsi_device_lookup(host, desc->bus, desc->target,
1057					  desc->lun[1]);
1058		if (sdev) {
1059			printk(KERN_INFO "vmw_pvscsi: device already exists\n");
1060			scsi_device_put(sdev);
1061		} else
1062			scsi_add_device(adapter->host, desc->bus,
1063					desc->target, desc->lun[1]);
1064
1065		scsi_host_put(host);
1066	} else if (e->type == PVSCSI_MSG_DEV_REMOVED) {
1067		struct PVSCSIMsgDescDevStatusChanged *desc;
1068		desc = (struct PVSCSIMsgDescDevStatusChanged *)e;
1069
1070		printk(KERN_INFO
1071		       "vmw_pvscsi: msg: device removed at scsi%u:%u:%u\n",
1072		       desc->bus, desc->target, desc->lun[1]);
1073
1074		if (!scsi_host_get(host))
1075			return;
1076
1077		sdev = scsi_device_lookup(host, desc->bus, desc->target,
1078					  desc->lun[1]);
1079		if (sdev) {
1080			scsi_remove_device(sdev);
1081			scsi_device_put(sdev);
1082		} else
1083			printk(KERN_INFO
1084			       "vmw_pvscsi: failed to lookup scsi%u:%u:%u\n",
1085			       desc->bus, desc->target, desc->lun[1]);
1086
1087		scsi_host_put(host);
1088	}
1089}
1090
1091static int pvscsi_msg_pending(const struct pvscsi_adapter *adapter)
1092{
1093	struct PVSCSIRingsState *s = adapter->rings_state;
1094
1095	return s->msgProdIdx != s->msgConsIdx;
1096}
1097
1098static void pvscsi_process_msg_ring(const struct pvscsi_adapter *adapter)
1099{
1100	struct PVSCSIRingsState *s = adapter->rings_state;
1101	struct PVSCSIRingMsgDesc *ring = adapter->msg_ring;
1102	u32 msg_entries = s->msgNumEntriesLog2;
1103
1104	while (pvscsi_msg_pending(adapter)) {
1105		struct PVSCSIRingMsgDesc *e = ring + (s->msgConsIdx &
1106						      MASK(msg_entries));
1107
1108		barrier();
1109		pvscsi_process_msg(adapter, e);
1110		barrier();
1111		s->msgConsIdx++;
1112	}
1113}
1114
1115static void pvscsi_msg_workqueue_handler(struct work_struct *data)
1116{
1117	struct pvscsi_adapter *adapter;
1118
1119	adapter = container_of(data, struct pvscsi_adapter, work);
1120
1121	pvscsi_process_msg_ring(adapter);
1122}
1123
1124static int pvscsi_setup_msg_workqueue(struct pvscsi_adapter *adapter)
1125{
1126	char name[32];
1127
1128	if (!pvscsi_use_msg)
1129		return 0;
1130
1131	pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_COMMAND,
1132			 PVSCSI_CMD_SETUP_MSG_RING);
1133
1134	if (pvscsi_reg_read(adapter, PVSCSI_REG_OFFSET_COMMAND_STATUS) == -1)
1135		return 0;
1136
1137	snprintf(name, sizeof(name),
1138		 "vmw_pvscsi_wq_%u", adapter->host->host_no);
1139
1140	adapter->workqueue =
1141		alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM, name);
1142	if (!adapter->workqueue) {
1143		printk(KERN_ERR "vmw_pvscsi: failed to create work queue\n");
1144		return 0;
1145	}
1146	INIT_WORK(&adapter->work, pvscsi_msg_workqueue_handler);
1147
1148	return 1;
1149}
1150
1151static bool pvscsi_setup_req_threshold(struct pvscsi_adapter *adapter,
1152				      bool enable)
1153{
1154	u32 val;
1155
1156	if (!pvscsi_use_req_threshold)
1157		return false;
1158
1159	pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_COMMAND,
1160			 PVSCSI_CMD_SETUP_REQCALLTHRESHOLD);
1161	val = pvscsi_reg_read(adapter, PVSCSI_REG_OFFSET_COMMAND_STATUS);
1162	if (val == -1) {
1163		printk(KERN_INFO "vmw_pvscsi: device does not support req_threshold\n");
1164		return false;
1165	} else {
1166		struct PVSCSICmdDescSetupReqCall cmd_msg = { 0 };
1167		cmd_msg.enable = enable;
1168		printk(KERN_INFO
1169		       "vmw_pvscsi: %sabling reqCallThreshold\n",
1170			enable ? "en" : "dis");
1171		pvscsi_write_cmd_desc(adapter,
1172				      PVSCSI_CMD_SETUP_REQCALLTHRESHOLD,
1173				      &cmd_msg, sizeof(cmd_msg));
1174		return pvscsi_reg_read(adapter,
1175				       PVSCSI_REG_OFFSET_COMMAND_STATUS) != 0;
1176	}
1177}
1178
1179static irqreturn_t pvscsi_isr(int irq, void *devp)
1180{
1181	struct pvscsi_adapter *adapter = devp;
1182	unsigned long flags;
1183
1184	spin_lock_irqsave(&adapter->hw_lock, flags);
1185	pvscsi_process_completion_ring(adapter);
1186	if (adapter->use_msg && pvscsi_msg_pending(adapter))
1187		queue_work(adapter->workqueue, &adapter->work);
1188	spin_unlock_irqrestore(&adapter->hw_lock, flags);
1189
1190	return IRQ_HANDLED;
1191}
1192
1193static irqreturn_t pvscsi_shared_isr(int irq, void *devp)
1194{
1195	struct pvscsi_adapter *adapter = devp;
1196	u32 val = pvscsi_read_intr_status(adapter);
1197
1198	if (!(val & PVSCSI_INTR_ALL_SUPPORTED))
1199		return IRQ_NONE;
1200	pvscsi_write_intr_status(devp, val);
1201	return pvscsi_isr(irq, devp);
1202}
1203
1204static void pvscsi_free_sgls(const struct pvscsi_adapter *adapter)
1205{
1206	struct pvscsi_ctx *ctx = adapter->cmd_map;
1207	unsigned i;
1208
1209	for (i = 0; i < adapter->req_depth; ++i, ++ctx)
1210		free_pages((unsigned long)ctx->sgl, get_order(SGL_SIZE));
1211}
1212
1213static void pvscsi_shutdown_intr(struct pvscsi_adapter *adapter)
1214{
1215	free_irq(pci_irq_vector(adapter->dev, 0), adapter);
1216	pci_free_irq_vectors(adapter->dev);
1217}
1218
1219static void pvscsi_release_resources(struct pvscsi_adapter *adapter)
1220{
1221	if (adapter->workqueue)
1222		destroy_workqueue(adapter->workqueue);
1223
1224	if (adapter->mmioBase)
1225		pci_iounmap(adapter->dev, adapter->mmioBase);
1226
1227	pci_release_regions(adapter->dev);
1228
1229	if (adapter->cmd_map) {
1230		pvscsi_free_sgls(adapter);
1231		kfree(adapter->cmd_map);
1232	}
1233
1234	if (adapter->rings_state)
1235		dma_free_coherent(&adapter->dev->dev, PAGE_SIZE,
1236				    adapter->rings_state, adapter->ringStatePA);
1237
1238	if (adapter->req_ring)
1239		dma_free_coherent(&adapter->dev->dev,
1240				    adapter->req_pages * PAGE_SIZE,
1241				    adapter->req_ring, adapter->reqRingPA);
1242
1243	if (adapter->cmp_ring)
1244		dma_free_coherent(&adapter->dev->dev,
1245				    adapter->cmp_pages * PAGE_SIZE,
1246				    adapter->cmp_ring, adapter->cmpRingPA);
1247
1248	if (adapter->msg_ring)
1249		dma_free_coherent(&adapter->dev->dev,
1250				    adapter->msg_pages * PAGE_SIZE,
1251				    adapter->msg_ring, adapter->msgRingPA);
1252}
1253
1254/*
1255 * Allocate scatter gather lists.
1256 *
1257 * These are statically allocated.  Trying to be clever was not worth it.
1258 *
1259 * Dynamic allocation can fail, and we can't go deep into the memory
1260 * allocator, since we're a SCSI driver, and trying too hard to allocate
1261 * memory might generate disk I/O.  We also don't want to fail disk I/O
1262 * in that case because we can't get an allocation - the I/O could be
1263 * trying to swap out data to free memory.  Since that is pathological,
1264 * just use a statically allocated scatter list.
1265 *
1266 */
1267static int pvscsi_allocate_sg(struct pvscsi_adapter *adapter)
1268{
1269	struct pvscsi_ctx *ctx;
1270	int i;
1271
1272	ctx = adapter->cmd_map;
1273	BUILD_BUG_ON(sizeof(struct pvscsi_sg_list) > SGL_SIZE);
1274
1275	for (i = 0; i < adapter->req_depth; ++i, ++ctx) {
1276		ctx->sgl = (void *)__get_free_pages(GFP_KERNEL,
1277						    get_order(SGL_SIZE));
1278		ctx->sglPA = 0;
1279		BUG_ON(!IS_ALIGNED(((unsigned long)ctx->sgl), PAGE_SIZE));
1280		if (!ctx->sgl) {
1281			for (; i >= 0; --i, --ctx) {
1282				free_pages((unsigned long)ctx->sgl,
1283					   get_order(SGL_SIZE));
1284				ctx->sgl = NULL;
1285			}
1286			return -ENOMEM;
1287		}
1288	}
1289
1290	return 0;
1291}
1292
1293/*
1294 * Query the device, fetch the config info and return the
1295 * maximum number of targets on the adapter. In case of
1296 * failure due to any reason return default i.e. 16.
1297 */
1298static u32 pvscsi_get_max_targets(struct pvscsi_adapter *adapter)
1299{
1300	struct PVSCSICmdDescConfigCmd cmd;
1301	struct PVSCSIConfigPageHeader *header;
1302	struct device *dev;
1303	dma_addr_t configPagePA;
1304	void *config_page;
1305	u32 numPhys = 16;
1306
1307	dev = pvscsi_dev(adapter);
1308	config_page = dma_alloc_coherent(&adapter->dev->dev, PAGE_SIZE,
1309			&configPagePA, GFP_KERNEL);
1310	if (!config_page) {
1311		dev_warn(dev, "vmw_pvscsi: failed to allocate memory for config page\n");
1312		goto exit;
1313	}
1314	BUG_ON(configPagePA & ~PAGE_MASK);
1315
1316	/* Fetch config info from the device. */
1317	cmd.configPageAddress = ((u64)PVSCSI_CONFIG_CONTROLLER_ADDRESS) << 32;
1318	cmd.configPageNum = PVSCSI_CONFIG_PAGE_CONTROLLER;
1319	cmd.cmpAddr = configPagePA;
1320	cmd._pad = 0;
1321
1322	/*
1323	 * Mark the completion page header with error values. If the device
1324	 * completes the command successfully, it sets the status values to
1325	 * indicate success.
1326	 */
1327	header = config_page;
1328	header->hostStatus = BTSTAT_INVPARAM;
1329	header->scsiStatus = SDSTAT_CHECK;
1330
1331	pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_CONFIG, &cmd, sizeof cmd);
1332
1333	if (header->hostStatus == BTSTAT_SUCCESS &&
1334	    header->scsiStatus == SDSTAT_GOOD) {
1335		struct PVSCSIConfigPageController *config;
1336
1337		config = config_page;
1338		numPhys = config->numPhys;
1339	} else
1340		dev_warn(dev, "vmw_pvscsi: PVSCSI_CMD_CONFIG failed. hostStatus = 0x%x, scsiStatus = 0x%x\n",
1341			 header->hostStatus, header->scsiStatus);
1342	dma_free_coherent(&adapter->dev->dev, PAGE_SIZE, config_page,
1343			  configPagePA);
1344exit:
1345	return numPhys;
1346}
1347
1348static int pvscsi_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1349{
1350	unsigned int irq_flag = PCI_IRQ_ALL_TYPES;
1351	struct pvscsi_adapter *adapter;
1352	struct pvscsi_adapter adapter_temp;
1353	struct Scsi_Host *host = NULL;
1354	unsigned int i;
1355	int error;
1356	u32 max_id;
1357
1358	error = -ENODEV;
1359
1360	if (pci_enable_device(pdev))
1361		return error;
1362
1363	if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) {
1364		printk(KERN_INFO "vmw_pvscsi: using 64bit dma\n");
1365	} else if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32))) {
1366		printk(KERN_INFO "vmw_pvscsi: using 32bit dma\n");
1367	} else {
1368		printk(KERN_ERR "vmw_pvscsi: failed to set DMA mask\n");
1369		goto out_disable_device;
1370	}
1371
1372	/*
1373	 * Let's use a temp pvscsi_adapter struct until we find the number of
1374	 * targets on the adapter, after that we will switch to the real
1375	 * allocated struct.
1376	 */
1377	adapter = &adapter_temp;
1378	memset(adapter, 0, sizeof(*adapter));
1379	adapter->dev  = pdev;
1380	adapter->rev = pdev->revision;
1381
1382	if (pci_request_regions(pdev, "vmw_pvscsi")) {
1383		printk(KERN_ERR "vmw_pvscsi: pci memory selection failed\n");
1384		goto out_disable_device;
1385	}
1386
1387	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
1388		if ((pci_resource_flags(pdev, i) & PCI_BASE_ADDRESS_SPACE_IO))
1389			continue;
1390
1391		if (pci_resource_len(pdev, i) < PVSCSI_MEM_SPACE_SIZE)
1392			continue;
1393
1394		break;
1395	}
1396
1397	if (i == DEVICE_COUNT_RESOURCE) {
1398		printk(KERN_ERR
1399		       "vmw_pvscsi: adapter has no suitable MMIO region\n");
1400		goto out_release_resources_and_disable;
1401	}
1402
1403	adapter->mmioBase = pci_iomap(pdev, i, PVSCSI_MEM_SPACE_SIZE);
1404
1405	if (!adapter->mmioBase) {
1406		printk(KERN_ERR
1407		       "vmw_pvscsi: can't iomap for BAR %d memsize %lu\n",
1408		       i, PVSCSI_MEM_SPACE_SIZE);
1409		goto out_release_resources_and_disable;
1410	}
1411
1412	pci_set_master(pdev);
1413
1414	/*
1415	 * Ask the device for max number of targets before deciding the
1416	 * default pvscsi_ring_pages value.
1417	 */
1418	max_id = pvscsi_get_max_targets(adapter);
1419	printk(KERN_INFO "vmw_pvscsi: max_id: %u\n", max_id);
1420
1421	if (pvscsi_ring_pages == 0)
1422		/*
1423		 * Set the right default value. Up to 16 it is 8, above it is
1424		 * max.
1425		 */
1426		pvscsi_ring_pages = (max_id > 16) ?
1427			PVSCSI_SETUP_RINGS_MAX_NUM_PAGES :
1428			PVSCSI_DEFAULT_NUM_PAGES_PER_RING;
1429	printk(KERN_INFO
1430	       "vmw_pvscsi: setting ring_pages to %d\n",
1431	       pvscsi_ring_pages);
1432
1433	pvscsi_template.can_queue =
1434		min(PVSCSI_MAX_NUM_PAGES_REQ_RING, pvscsi_ring_pages) *
1435		PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
1436	pvscsi_template.cmd_per_lun =
1437		min(pvscsi_template.can_queue, pvscsi_cmd_per_lun);
1438	host = scsi_host_alloc(&pvscsi_template, sizeof(struct pvscsi_adapter));
1439	if (!host) {
1440		printk(KERN_ERR "vmw_pvscsi: failed to allocate host\n");
1441		goto out_release_resources_and_disable;
1442	}
1443
1444	/*
1445	 * Let's use the real pvscsi_adapter struct here onwards.
1446	 */
1447	adapter = shost_priv(host);
1448	memset(adapter, 0, sizeof(*adapter));
1449	adapter->dev  = pdev;
1450	adapter->host = host;
1451	/*
1452	 * Copy back what we already have to the allocated adapter struct.
1453	 */
1454	adapter->rev = adapter_temp.rev;
1455	adapter->mmioBase = adapter_temp.mmioBase;
1456
1457	spin_lock_init(&adapter->hw_lock);
1458	host->max_channel = 0;
1459	host->max_lun     = 1;
1460	host->max_cmd_len = 16;
1461	host->max_id      = max_id;
1462
1463	pci_set_drvdata(pdev, host);
1464
1465	ll_adapter_reset(adapter);
1466
1467	adapter->use_msg = pvscsi_setup_msg_workqueue(adapter);
1468
1469	error = pvscsi_allocate_rings(adapter);
1470	if (error) {
1471		printk(KERN_ERR "vmw_pvscsi: unable to allocate ring memory\n");
1472		goto out_release_resources;
1473	}
1474
1475	/*
1476	 * From this point on we should reset the adapter if anything goes
1477	 * wrong.
1478	 */
1479	pvscsi_setup_all_rings(adapter);
1480
1481	adapter->cmd_map = kcalloc(adapter->req_depth,
1482				   sizeof(struct pvscsi_ctx), GFP_KERNEL);
1483	if (!adapter->cmd_map) {
1484		printk(KERN_ERR "vmw_pvscsi: failed to allocate memory.\n");
1485		error = -ENOMEM;
1486		goto out_reset_adapter;
1487	}
1488
1489	INIT_LIST_HEAD(&adapter->cmd_pool);
1490	for (i = 0; i < adapter->req_depth; i++) {
1491		struct pvscsi_ctx *ctx = adapter->cmd_map + i;
1492		list_add(&ctx->list, &adapter->cmd_pool);
1493	}
1494
1495	error = pvscsi_allocate_sg(adapter);
1496	if (error) {
1497		printk(KERN_ERR "vmw_pvscsi: unable to allocate s/g table\n");
1498		goto out_reset_adapter;
1499	}
1500
1501	if (pvscsi_disable_msix)
1502		irq_flag &= ~PCI_IRQ_MSIX;
1503	if (pvscsi_disable_msi)
1504		irq_flag &= ~PCI_IRQ_MSI;
1505
1506	error = pci_alloc_irq_vectors(adapter->dev, 1, 1, irq_flag);
1507	if (error < 0)
1508		goto out_reset_adapter;
1509
1510	adapter->use_req_threshold = pvscsi_setup_req_threshold(adapter, true);
1511	printk(KERN_DEBUG "vmw_pvscsi: driver-based request coalescing %sabled\n",
1512	       adapter->use_req_threshold ? "en" : "dis");
1513
1514	if (adapter->dev->msix_enabled || adapter->dev->msi_enabled) {
1515		printk(KERN_INFO "vmw_pvscsi: using MSI%s\n",
1516			adapter->dev->msix_enabled ? "-X" : "");
1517		error = request_irq(pci_irq_vector(pdev, 0), pvscsi_isr,
1518				0, "vmw_pvscsi", adapter);
1519	} else {
1520		printk(KERN_INFO "vmw_pvscsi: using INTx\n");
1521		error = request_irq(pci_irq_vector(pdev, 0), pvscsi_shared_isr,
1522				IRQF_SHARED, "vmw_pvscsi", adapter);
1523	}
1524
1525	if (error) {
1526		printk(KERN_ERR
1527		       "vmw_pvscsi: unable to request IRQ: %d\n", error);
1528		goto out_reset_adapter;
1529	}
1530
1531	error = scsi_add_host(host, &pdev->dev);
1532	if (error) {
1533		printk(KERN_ERR
1534		       "vmw_pvscsi: scsi_add_host failed: %d\n", error);
1535		goto out_reset_adapter;
1536	}
1537
1538	dev_info(&pdev->dev, "VMware PVSCSI rev %d host #%u\n",
1539		 adapter->rev, host->host_no);
1540
1541	pvscsi_unmask_intr(adapter);
1542
1543	scsi_scan_host(host);
1544
1545	return 0;
1546
1547out_reset_adapter:
1548	ll_adapter_reset(adapter);
1549out_release_resources:
1550	pvscsi_shutdown_intr(adapter);
1551	pvscsi_release_resources(adapter);
1552	scsi_host_put(host);
1553out_disable_device:
1554	pci_disable_device(pdev);
1555
1556	return error;
1557
1558out_release_resources_and_disable:
1559	pvscsi_shutdown_intr(adapter);
1560	pvscsi_release_resources(adapter);
1561	goto out_disable_device;
1562}
1563
1564static void __pvscsi_shutdown(struct pvscsi_adapter *adapter)
1565{
1566	pvscsi_mask_intr(adapter);
1567
1568	if (adapter->workqueue)
1569		flush_workqueue(adapter->workqueue);
1570
1571	pvscsi_shutdown_intr(adapter);
1572
1573	pvscsi_process_request_ring(adapter);
1574	pvscsi_process_completion_ring(adapter);
1575	ll_adapter_reset(adapter);
1576}
1577
1578static void pvscsi_shutdown(struct pci_dev *dev)
1579{
1580	struct Scsi_Host *host = pci_get_drvdata(dev);
1581	struct pvscsi_adapter *adapter = shost_priv(host);
1582
1583	__pvscsi_shutdown(adapter);
1584}
1585
1586static void pvscsi_remove(struct pci_dev *pdev)
1587{
1588	struct Scsi_Host *host = pci_get_drvdata(pdev);
1589	struct pvscsi_adapter *adapter = shost_priv(host);
1590
1591	scsi_remove_host(host);
1592
1593	__pvscsi_shutdown(adapter);
1594	pvscsi_release_resources(adapter);
1595
1596	scsi_host_put(host);
1597
1598	pci_disable_device(pdev);
1599}
1600
1601static struct pci_driver pvscsi_pci_driver = {
1602	.name		= "vmw_pvscsi",
1603	.id_table	= pvscsi_pci_tbl,
1604	.probe		= pvscsi_probe,
1605	.remove		= pvscsi_remove,
1606	.shutdown       = pvscsi_shutdown,
1607};
1608
1609static int __init pvscsi_init(void)
1610{
1611	pr_info("%s - version %s\n",
1612		PVSCSI_LINUX_DRIVER_DESC, PVSCSI_DRIVER_VERSION_STRING);
1613	return pci_register_driver(&pvscsi_pci_driver);
1614}
1615
1616static void __exit pvscsi_exit(void)
1617{
1618	pci_unregister_driver(&pvscsi_pci_driver);
1619}
1620
1621module_init(pvscsi_init);
1622module_exit(pvscsi_exit);