Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.15.
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/**************************************************************************/
   3/*                                                                        */
   4/*  IBM System i and System p Virtual NIC Device Driver                   */
   5/*  Copyright (C) 2014 IBM Corp.                                          */
   6/*  Santiago Leon (santi_leon@yahoo.com)                                  */
   7/*  Thomas Falcon (tlfalcon@linux.vnet.ibm.com)                           */
   8/*  John Allen (jallen@linux.vnet.ibm.com)                                */
   9/*                                                                        */
  10/*                                                                        */
  11/* This module contains the implementation of a virtual ethernet device   */
  12/* for use with IBM i/p Series LPAR Linux. It utilizes the logical LAN    */
  13/* option of the RS/6000 Platform Architecture to interface with virtual  */
  14/* ethernet NICs that are presented to the partition by the hypervisor.   */
  15/*									   */
  16/* Messages are passed between the VNIC driver and the VNIC server using  */
  17/* Command/Response Queues (CRQs) and sub CRQs (sCRQs). CRQs are used to  */
  18/* issue and receive commands that initiate communication with the server */
  19/* on driver initialization. Sub CRQs (sCRQs) are similar to CRQs, but    */
  20/* are used by the driver to notify the server that a packet is           */
  21/* ready for transmission or that a buffer has been added to receive a    */
  22/* packet. Subsequently, sCRQs are used by the server to notify the       */
  23/* driver that a packet transmission has been completed or that a packet  */
  24/* has been received and placed in a waiting buffer.                      */
  25/*                                                                        */
  26/* In lieu of a more conventional "on-the-fly" DMA mapping strategy in    */
  27/* which skbs are DMA mapped and immediately unmapped when the transmit   */
  28/* or receive has been completed, the VNIC driver is required to use      */
  29/* "long term mapping". This entails that large, continuous DMA mapped    */
  30/* buffers are allocated on driver initialization and these buffers are   */
  31/* then continuously reused to pass skbs to and from the VNIC server.     */
  32/*                                                                        */
  33/**************************************************************************/
  34
  35#include <linux/module.h>
  36#include <linux/moduleparam.h>
  37#include <linux/types.h>
  38#include <linux/errno.h>
  39#include <linux/completion.h>
  40#include <linux/ioport.h>
  41#include <linux/dma-mapping.h>
  42#include <linux/kernel.h>
  43#include <linux/netdevice.h>
  44#include <linux/etherdevice.h>
  45#include <linux/skbuff.h>
  46#include <linux/init.h>
  47#include <linux/delay.h>
  48#include <linux/mm.h>
  49#include <linux/ethtool.h>
  50#include <linux/proc_fs.h>
  51#include <linux/if_arp.h>
  52#include <linux/in.h>
  53#include <linux/ip.h>
  54#include <linux/ipv6.h>
  55#include <linux/irq.h>
  56#include <linux/irqdomain.h>
  57#include <linux/kthread.h>
  58#include <linux/seq_file.h>
  59#include <linux/interrupt.h>
  60#include <net/net_namespace.h>
  61#include <asm/hvcall.h>
  62#include <linux/atomic.h>
  63#include <asm/vio.h>
  64#include <asm/xive.h>
  65#include <asm/iommu.h>
  66#include <linux/uaccess.h>
  67#include <asm/firmware.h>
  68#include <linux/workqueue.h>
  69#include <linux/if_vlan.h>
  70#include <linux/utsname.h>
  71#include <linux/cpu.h>
  72
  73#include "ibmvnic.h"
  74
  75static const char ibmvnic_driver_name[] = "ibmvnic";
  76static const char ibmvnic_driver_string[] = "IBM System i/p Virtual NIC Driver";
  77
  78MODULE_AUTHOR("Santiago Leon");
  79MODULE_DESCRIPTION("IBM System i/p Virtual NIC Driver");
  80MODULE_LICENSE("GPL");
  81MODULE_VERSION(IBMVNIC_DRIVER_VERSION);
  82
  83static int ibmvnic_version = IBMVNIC_INITIAL_VERSION;
  84static void release_sub_crqs(struct ibmvnic_adapter *, bool);
  85static int ibmvnic_reset_crq(struct ibmvnic_adapter *);
  86static int ibmvnic_send_crq_init(struct ibmvnic_adapter *);
  87static int ibmvnic_reenable_crq_queue(struct ibmvnic_adapter *);
  88static int ibmvnic_send_crq(struct ibmvnic_adapter *, union ibmvnic_crq *);
  89static int send_subcrq_indirect(struct ibmvnic_adapter *, u64, u64, u64);
  90static irqreturn_t ibmvnic_interrupt_rx(int irq, void *instance);
  91static int enable_scrq_irq(struct ibmvnic_adapter *,
  92			   struct ibmvnic_sub_crq_queue *);
  93static int disable_scrq_irq(struct ibmvnic_adapter *,
  94			    struct ibmvnic_sub_crq_queue *);
  95static int pending_scrq(struct ibmvnic_adapter *,
  96			struct ibmvnic_sub_crq_queue *);
  97static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *,
  98					struct ibmvnic_sub_crq_queue *);
  99static int ibmvnic_poll(struct napi_struct *napi, int data);
 100static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter);
 101static inline void reinit_init_done(struct ibmvnic_adapter *adapter);
 102static void send_query_map(struct ibmvnic_adapter *adapter);
 103static int send_request_map(struct ibmvnic_adapter *, dma_addr_t, u32, u8);
 104static int send_request_unmap(struct ibmvnic_adapter *, u8);
 105static int send_login(struct ibmvnic_adapter *adapter);
 106static void send_query_cap(struct ibmvnic_adapter *adapter);
 107static int init_sub_crqs(struct ibmvnic_adapter *);
 108static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter);
 109static int ibmvnic_reset_init(struct ibmvnic_adapter *, bool reset);
 110static void release_crq_queue(struct ibmvnic_adapter *);
 111static int __ibmvnic_set_mac(struct net_device *, u8 *);
 112static int init_crq_queue(struct ibmvnic_adapter *adapter);
 113static int send_query_phys_parms(struct ibmvnic_adapter *adapter);
 114static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter,
 115					 struct ibmvnic_sub_crq_queue *tx_scrq);
 116static void free_long_term_buff(struct ibmvnic_adapter *adapter,
 117				struct ibmvnic_long_term_buff *ltb);
 118static void ibmvnic_disable_irqs(struct ibmvnic_adapter *adapter);
 119static void flush_reset_queue(struct ibmvnic_adapter *adapter);
 120static void print_subcrq_error(struct device *dev, int rc, const char *func);
 121
 122struct ibmvnic_stat {
 123	char name[ETH_GSTRING_LEN];
 124	int offset;
 125};
 126
 127#define IBMVNIC_STAT_OFF(stat) (offsetof(struct ibmvnic_adapter, stats) + \
 128			     offsetof(struct ibmvnic_statistics, stat))
 129#define IBMVNIC_GET_STAT(a, off) (*((u64 *)(((unsigned long)(a)) + (off))))
 130
 131static const struct ibmvnic_stat ibmvnic_stats[] = {
 132	{"rx_packets", IBMVNIC_STAT_OFF(rx_packets)},
 133	{"rx_bytes", IBMVNIC_STAT_OFF(rx_bytes)},
 134	{"tx_packets", IBMVNIC_STAT_OFF(tx_packets)},
 135	{"tx_bytes", IBMVNIC_STAT_OFF(tx_bytes)},
 136	{"ucast_tx_packets", IBMVNIC_STAT_OFF(ucast_tx_packets)},
 137	{"ucast_rx_packets", IBMVNIC_STAT_OFF(ucast_rx_packets)},
 138	{"mcast_tx_packets", IBMVNIC_STAT_OFF(mcast_tx_packets)},
 139	{"mcast_rx_packets", IBMVNIC_STAT_OFF(mcast_rx_packets)},
 140	{"bcast_tx_packets", IBMVNIC_STAT_OFF(bcast_tx_packets)},
 141	{"bcast_rx_packets", IBMVNIC_STAT_OFF(bcast_rx_packets)},
 142	{"align_errors", IBMVNIC_STAT_OFF(align_errors)},
 143	{"fcs_errors", IBMVNIC_STAT_OFF(fcs_errors)},
 144	{"single_collision_frames", IBMVNIC_STAT_OFF(single_collision_frames)},
 145	{"multi_collision_frames", IBMVNIC_STAT_OFF(multi_collision_frames)},
 146	{"sqe_test_errors", IBMVNIC_STAT_OFF(sqe_test_errors)},
 147	{"deferred_tx", IBMVNIC_STAT_OFF(deferred_tx)},
 148	{"late_collisions", IBMVNIC_STAT_OFF(late_collisions)},
 149	{"excess_collisions", IBMVNIC_STAT_OFF(excess_collisions)},
 150	{"internal_mac_tx_errors", IBMVNIC_STAT_OFF(internal_mac_tx_errors)},
 151	{"carrier_sense", IBMVNIC_STAT_OFF(carrier_sense)},
 152	{"too_long_frames", IBMVNIC_STAT_OFF(too_long_frames)},
 153	{"internal_mac_rx_errors", IBMVNIC_STAT_OFF(internal_mac_rx_errors)},
 154};
 155
 156static int send_crq_init_complete(struct ibmvnic_adapter *adapter)
 157{
 158	union ibmvnic_crq crq;
 159
 160	memset(&crq, 0, sizeof(crq));
 161	crq.generic.first = IBMVNIC_CRQ_INIT_CMD;
 162	crq.generic.cmd = IBMVNIC_CRQ_INIT_COMPLETE;
 163
 164	return ibmvnic_send_crq(adapter, &crq);
 165}
 166
 167static int send_version_xchg(struct ibmvnic_adapter *adapter)
 168{
 169	union ibmvnic_crq crq;
 170
 171	memset(&crq, 0, sizeof(crq));
 172	crq.version_exchange.first = IBMVNIC_CRQ_CMD;
 173	crq.version_exchange.cmd = VERSION_EXCHANGE;
 174	crq.version_exchange.version = cpu_to_be16(ibmvnic_version);
 175
 176	return ibmvnic_send_crq(adapter, &crq);
 177}
 178
 179static void ibmvnic_clean_queue_affinity(struct ibmvnic_adapter *adapter,
 180					 struct ibmvnic_sub_crq_queue *queue)
 181{
 182	if (!(queue && queue->irq))
 183		return;
 184
 185	cpumask_clear(queue->affinity_mask);
 186
 187	if (irq_set_affinity_and_hint(queue->irq, NULL))
 188		netdev_warn(adapter->netdev,
 189			    "%s: Clear affinity failed, queue addr = %p, IRQ = %d\n",
 190			    __func__, queue, queue->irq);
 191}
 192
 193static void ibmvnic_clean_affinity(struct ibmvnic_adapter *adapter)
 194{
 195	struct ibmvnic_sub_crq_queue **rxqs;
 196	struct ibmvnic_sub_crq_queue **txqs;
 197	int num_rxqs, num_txqs;
 198	int i;
 199
 200	rxqs = adapter->rx_scrq;
 201	txqs = adapter->tx_scrq;
 202	num_txqs = adapter->num_active_tx_scrqs;
 203	num_rxqs = adapter->num_active_rx_scrqs;
 204
 205	netdev_dbg(adapter->netdev, "%s: Cleaning irq affinity hints", __func__);
 206	if (txqs) {
 207		for (i = 0; i < num_txqs; i++)
 208			ibmvnic_clean_queue_affinity(adapter, txqs[i]);
 209	}
 210	if (rxqs) {
 211		for (i = 0; i < num_rxqs; i++)
 212			ibmvnic_clean_queue_affinity(adapter, rxqs[i]);
 213	}
 214}
 215
 216static int ibmvnic_set_queue_affinity(struct ibmvnic_sub_crq_queue *queue,
 217				      unsigned int *cpu, int *stragglers,
 218				      int stride)
 219{
 220	cpumask_var_t mask;
 221	int i;
 222	int rc = 0;
 223
 224	if (!(queue && queue->irq))
 225		return rc;
 226
 227	/* cpumask_var_t is either a pointer or array, allocation works here */
 228	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
 229		return -ENOMEM;
 230
 231	/* while we have extra cpu give one extra to this irq */
 232	if (*stragglers) {
 233		stride++;
 234		(*stragglers)--;
 235	}
 236	/* atomic write is safer than writing bit by bit directly */
 237	for (i = 0; i < stride; i++) {
 238		cpumask_set_cpu(*cpu, mask);
 239		*cpu = cpumask_next_wrap(*cpu, cpu_online_mask,
 240					 nr_cpu_ids, false);
 241	}
 242	/* set queue affinity mask */
 243	cpumask_copy(queue->affinity_mask, mask);
 244	rc = irq_set_affinity_and_hint(queue->irq, queue->affinity_mask);
 245	free_cpumask_var(mask);
 246
 247	return rc;
 248}
 249
 250/* assumes cpu read lock is held */
 251static void ibmvnic_set_affinity(struct ibmvnic_adapter *adapter)
 252{
 253	struct ibmvnic_sub_crq_queue **rxqs = adapter->rx_scrq;
 254	struct ibmvnic_sub_crq_queue **txqs = adapter->tx_scrq;
 255	struct ibmvnic_sub_crq_queue *queue;
 256	int num_rxqs = adapter->num_active_rx_scrqs, i_rxqs = 0;
 257	int num_txqs = adapter->num_active_tx_scrqs, i_txqs = 0;
 258	int total_queues, stride, stragglers, i;
 259	unsigned int num_cpu, cpu;
 260	bool is_rx_queue;
 261	int rc = 0;
 262
 263	netdev_dbg(adapter->netdev, "%s: Setting irq affinity hints", __func__);
 264	if (!(adapter->rx_scrq && adapter->tx_scrq)) {
 265		netdev_warn(adapter->netdev,
 266			    "%s: Set affinity failed, queues not allocated\n",
 267			    __func__);
 268		return;
 269	}
 270
 271	total_queues = num_rxqs + num_txqs;
 272	num_cpu = num_online_cpus();
 273	/* number of cpu's assigned per irq */
 274	stride = max_t(int, num_cpu / total_queues, 1);
 275	/* number of leftover cpu's */
 276	stragglers = num_cpu >= total_queues ? num_cpu % total_queues : 0;
 277	/* next available cpu to assign irq to */
 278	cpu = cpumask_next(-1, cpu_online_mask);
 279
 280	for (i = 0; i < total_queues; i++) {
 281		is_rx_queue = false;
 282		/* balance core load by alternating rx and tx assignments
 283		 * ex: TX0 -> RX0 -> TX1 -> RX1 etc.
 284		 */
 285		if ((i % 2 == 1 && i_rxqs < num_rxqs) || i_txqs == num_txqs) {
 286			queue = rxqs[i_rxqs++];
 287			is_rx_queue = true;
 288		} else {
 289			queue = txqs[i_txqs++];
 290		}
 291
 292		rc = ibmvnic_set_queue_affinity(queue, &cpu, &stragglers,
 293						stride);
 294		if (rc)
 295			goto out;
 296
 297		if (!queue || is_rx_queue)
 298			continue;
 299
 300		rc = __netif_set_xps_queue(adapter->netdev,
 301					   cpumask_bits(queue->affinity_mask),
 302					   i_txqs - 1, XPS_CPUS);
 303		if (rc)
 304			netdev_warn(adapter->netdev, "%s: Set XPS on queue %d failed, rc = %d.\n",
 305				    __func__, i_txqs - 1, rc);
 306	}
 307
 308out:
 309	if (rc) {
 310		netdev_warn(adapter->netdev,
 311			    "%s: Set affinity failed, queue addr = %p, IRQ = %d, rc = %d.\n",
 312			    __func__, queue, queue->irq, rc);
 313		ibmvnic_clean_affinity(adapter);
 314	}
 315}
 316
 317static int ibmvnic_cpu_online(unsigned int cpu, struct hlist_node *node)
 318{
 319	struct ibmvnic_adapter *adapter;
 320
 321	adapter = hlist_entry_safe(node, struct ibmvnic_adapter, node);
 322	ibmvnic_set_affinity(adapter);
 323	return 0;
 324}
 325
 326static int ibmvnic_cpu_dead(unsigned int cpu, struct hlist_node *node)
 327{
 328	struct ibmvnic_adapter *adapter;
 329
 330	adapter = hlist_entry_safe(node, struct ibmvnic_adapter, node_dead);
 331	ibmvnic_set_affinity(adapter);
 332	return 0;
 333}
 334
 335static int ibmvnic_cpu_down_prep(unsigned int cpu, struct hlist_node *node)
 336{
 337	struct ibmvnic_adapter *adapter;
 338
 339	adapter = hlist_entry_safe(node, struct ibmvnic_adapter, node);
 340	ibmvnic_clean_affinity(adapter);
 341	return 0;
 342}
 343
 344static enum cpuhp_state ibmvnic_online;
 345
 346static int ibmvnic_cpu_notif_add(struct ibmvnic_adapter *adapter)
 347{
 348	int ret;
 349
 350	ret = cpuhp_state_add_instance_nocalls(ibmvnic_online, &adapter->node);
 351	if (ret)
 352		return ret;
 353	ret = cpuhp_state_add_instance_nocalls(CPUHP_IBMVNIC_DEAD,
 354					       &adapter->node_dead);
 355	if (!ret)
 356		return ret;
 357	cpuhp_state_remove_instance_nocalls(ibmvnic_online, &adapter->node);
 358	return ret;
 359}
 360
 361static void ibmvnic_cpu_notif_remove(struct ibmvnic_adapter *adapter)
 362{
 363	cpuhp_state_remove_instance_nocalls(ibmvnic_online, &adapter->node);
 364	cpuhp_state_remove_instance_nocalls(CPUHP_IBMVNIC_DEAD,
 365					    &adapter->node_dead);
 366}
 367
 368static long h_reg_sub_crq(unsigned long unit_address, unsigned long token,
 369			  unsigned long length, unsigned long *number,
 370			  unsigned long *irq)
 371{
 372	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
 373	long rc;
 374
 375	rc = plpar_hcall(H_REG_SUB_CRQ, retbuf, unit_address, token, length);
 376	*number = retbuf[0];
 377	*irq = retbuf[1];
 378
 379	return rc;
 380}
 381
 382/**
 383 * ibmvnic_wait_for_completion - Check device state and wait for completion
 384 * @adapter: private device data
 385 * @comp_done: completion structure to wait for
 386 * @timeout: time to wait in milliseconds
 387 *
 388 * Wait for a completion signal or until the timeout limit is reached
 389 * while checking that the device is still active.
 390 */
 391static int ibmvnic_wait_for_completion(struct ibmvnic_adapter *adapter,
 392				       struct completion *comp_done,
 393				       unsigned long timeout)
 394{
 395	struct net_device *netdev;
 396	unsigned long div_timeout;
 397	u8 retry;
 398
 399	netdev = adapter->netdev;
 400	retry = 5;
 401	div_timeout = msecs_to_jiffies(timeout / retry);
 402	while (true) {
 403		if (!adapter->crq.active) {
 404			netdev_err(netdev, "Device down!\n");
 405			return -ENODEV;
 406		}
 407		if (!retry--)
 408			break;
 409		if (wait_for_completion_timeout(comp_done, div_timeout))
 410			return 0;
 411	}
 412	netdev_err(netdev, "Operation timed out.\n");
 413	return -ETIMEDOUT;
 414}
 415
 416/**
 417 * reuse_ltb() - Check if a long term buffer can be reused
 418 * @ltb:  The long term buffer to be checked
 419 * @size: The size of the long term buffer.
 420 *
 421 * An LTB can be reused unless its size has changed.
 422 *
 423 * Return: Return true if the LTB can be reused, false otherwise.
 424 */
 425static bool reuse_ltb(struct ibmvnic_long_term_buff *ltb, int size)
 426{
 427	return (ltb->buff && ltb->size == size);
 428}
 429
 430/**
 431 * alloc_long_term_buff() - Allocate a long term buffer (LTB)
 432 *
 433 * @adapter: ibmvnic adapter associated to the LTB
 434 * @ltb:     container object for the LTB
 435 * @size:    size of the LTB
 436 *
 437 * Allocate an LTB of the specified size and notify VIOS.
 438 *
 439 * If the given @ltb already has the correct size, reuse it. Otherwise if
 440 * its non-NULL, free it. Then allocate a new one of the correct size.
 441 * Notify the VIOS either way since we may now be working with a new VIOS.
 442 *
 443 * Allocating larger chunks of memory during resets, specially LPM or under
 444 * low memory situations can cause resets to fail/timeout and for LPAR to
 445 * lose connectivity. So hold onto the LTB even if we fail to communicate
 446 * with the VIOS and reuse it on next open. Free LTB when adapter is closed.
 447 *
 448 * Return: 0 if we were able to allocate the LTB and notify the VIOS and
 449 *	   a negative value otherwise.
 450 */
 451static int alloc_long_term_buff(struct ibmvnic_adapter *adapter,
 452				struct ibmvnic_long_term_buff *ltb, int size)
 453{
 454	struct device *dev = &adapter->vdev->dev;
 455	u64 prev = 0;
 456	int rc;
 457
 458	if (!reuse_ltb(ltb, size)) {
 459		dev_dbg(dev,
 460			"LTB size changed from 0x%llx to 0x%x, reallocating\n",
 461			 ltb->size, size);
 462		prev = ltb->size;
 463		free_long_term_buff(adapter, ltb);
 464	}
 465
 466	if (ltb->buff) {
 467		dev_dbg(dev, "Reusing LTB [map %d, size 0x%llx]\n",
 468			ltb->map_id, ltb->size);
 469	} else {
 470		ltb->buff = dma_alloc_coherent(dev, size, &ltb->addr,
 471					       GFP_KERNEL);
 472		if (!ltb->buff) {
 473			dev_err(dev, "Couldn't alloc long term buffer\n");
 474			return -ENOMEM;
 475		}
 476		ltb->size = size;
 477
 478		ltb->map_id = find_first_zero_bit(adapter->map_ids,
 479						  MAX_MAP_ID);
 480		bitmap_set(adapter->map_ids, ltb->map_id, 1);
 481
 482		dev_dbg(dev,
 483			"Allocated new LTB [map %d, size 0x%llx was 0x%llx]\n",
 484			 ltb->map_id, ltb->size, prev);
 485	}
 486
 487	/* Ensure ltb is zeroed - specially when reusing it. */
 488	memset(ltb->buff, 0, ltb->size);
 489
 490	mutex_lock(&adapter->fw_lock);
 491	adapter->fw_done_rc = 0;
 492	reinit_completion(&adapter->fw_done);
 493
 494	rc = send_request_map(adapter, ltb->addr, ltb->size, ltb->map_id);
 495	if (rc) {
 496		dev_err(dev, "send_request_map failed, rc = %d\n", rc);
 497		goto out;
 498	}
 499
 500	rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000);
 501	if (rc) {
 502		dev_err(dev, "LTB map request aborted or timed out, rc = %d\n",
 503			rc);
 504		goto out;
 505	}
 506
 507	if (adapter->fw_done_rc) {
 508		dev_err(dev, "Couldn't map LTB, rc = %d\n",
 509			adapter->fw_done_rc);
 510		rc = -EIO;
 511		goto out;
 512	}
 513	rc = 0;
 514out:
 515	/* don't free LTB on communication error - see function header */
 516	mutex_unlock(&adapter->fw_lock);
 517	return rc;
 518}
 519
 520static void free_long_term_buff(struct ibmvnic_adapter *adapter,
 521				struct ibmvnic_long_term_buff *ltb)
 522{
 523	struct device *dev = &adapter->vdev->dev;
 524
 525	if (!ltb->buff)
 526		return;
 527
 528	/* VIOS automatically unmaps the long term buffer at remote
 529	 * end for the following resets:
 530	 * FAILOVER, MOBILITY, TIMEOUT.
 531	 */
 532	if (adapter->reset_reason != VNIC_RESET_FAILOVER &&
 533	    adapter->reset_reason != VNIC_RESET_MOBILITY &&
 534	    adapter->reset_reason != VNIC_RESET_TIMEOUT)
 535		send_request_unmap(adapter, ltb->map_id);
 536
 537	dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr);
 538
 539	ltb->buff = NULL;
 540	/* mark this map_id free */
 541	bitmap_clear(adapter->map_ids, ltb->map_id, 1);
 542	ltb->map_id = 0;
 543}
 544
 545/**
 546 * free_ltb_set - free the given set of long term buffers (LTBS)
 547 * @adapter: The ibmvnic adapter containing this ltb set
 548 * @ltb_set: The ltb_set to be freed
 549 *
 550 * Free the set of LTBs in the given set.
 551 */
 552
 553static void free_ltb_set(struct ibmvnic_adapter *adapter,
 554			 struct ibmvnic_ltb_set *ltb_set)
 555{
 556	int i;
 557
 558	for (i = 0; i < ltb_set->num_ltbs; i++)
 559		free_long_term_buff(adapter, &ltb_set->ltbs[i]);
 560
 561	kfree(ltb_set->ltbs);
 562	ltb_set->ltbs = NULL;
 563	ltb_set->num_ltbs = 0;
 564}
 565
 566/**
 567 * alloc_ltb_set() - Allocate a set of long term buffers (LTBs)
 568 *
 569 * @adapter: ibmvnic adapter associated to the LTB
 570 * @ltb_set: container object for the set of LTBs
 571 * @num_buffs: Number of buffers in the LTB
 572 * @buff_size: Size of each buffer in the LTB
 573 *
 574 * Allocate a set of LTBs to accommodate @num_buffs buffers of @buff_size
 575 * each. We currently cap size each LTB to IBMVNIC_ONE_LTB_SIZE. If the
 576 * new set of LTBs have fewer LTBs than the old set, free the excess LTBs.
 577 * If new set needs more than in old set, allocate the remaining ones.
 578 * Try and reuse as many LTBs as possible and avoid reallocation.
 579 *
 580 * Any changes to this allocation strategy must be reflected in
 581 * map_rxpool_buff_to_ltb() and map_txpool_buff_to_ltb().
 582 */
 583static int alloc_ltb_set(struct ibmvnic_adapter *adapter,
 584			 struct ibmvnic_ltb_set *ltb_set, int num_buffs,
 585			 int buff_size)
 586{
 587	struct device *dev = &adapter->vdev->dev;
 588	struct ibmvnic_ltb_set old_set;
 589	struct ibmvnic_ltb_set new_set;
 590	int rem_size;
 591	int tot_size;		/* size of all ltbs */
 592	int ltb_size;		/* size of one ltb */
 593	int nltbs;
 594	int rc;
 595	int n;
 596	int i;
 597
 598	dev_dbg(dev, "%s() num_buffs %d, buff_size %d\n", __func__, num_buffs,
 599		buff_size);
 600
 601	ltb_size = rounddown(IBMVNIC_ONE_LTB_SIZE, buff_size);
 602	tot_size = num_buffs * buff_size;
 603
 604	if (ltb_size > tot_size)
 605		ltb_size = tot_size;
 606
 607	nltbs = tot_size / ltb_size;
 608	if (tot_size % ltb_size)
 609		nltbs++;
 610
 611	old_set = *ltb_set;
 612
 613	if (old_set.num_ltbs == nltbs) {
 614		new_set = old_set;
 615	} else {
 616		int tmp = nltbs * sizeof(struct ibmvnic_long_term_buff);
 617
 618		new_set.ltbs = kzalloc(tmp, GFP_KERNEL);
 619		if (!new_set.ltbs)
 620			return -ENOMEM;
 621
 622		new_set.num_ltbs = nltbs;
 623
 624		/* Free any excess ltbs in old set */
 625		for (i = new_set.num_ltbs; i < old_set.num_ltbs; i++)
 626			free_long_term_buff(adapter, &old_set.ltbs[i]);
 627
 628		/* Copy remaining ltbs to new set. All LTBs except the
 629		 * last one are of the same size. alloc_long_term_buff()
 630		 * will realloc if the size changes.
 631		 */
 632		n = min(old_set.num_ltbs, new_set.num_ltbs);
 633		for (i = 0; i < n; i++)
 634			new_set.ltbs[i] = old_set.ltbs[i];
 635
 636		/* Any additional ltbs in new set will have NULL ltbs for
 637		 * now and will be allocated in alloc_long_term_buff().
 638		 */
 639
 640		/* We no longer need the old_set so free it. Note that we
 641		 * may have reused some ltbs from old set and freed excess
 642		 * ltbs above. So we only need to free the container now
 643		 * not the LTBs themselves. (i.e. dont free_ltb_set()!)
 644		 */
 645		kfree(old_set.ltbs);
 646		old_set.ltbs = NULL;
 647		old_set.num_ltbs = 0;
 648
 649		/* Install the new set. If allocations fail below, we will
 650		 * retry later and know what size LTBs we need.
 651		 */
 652		*ltb_set = new_set;
 653	}
 654
 655	i = 0;
 656	rem_size = tot_size;
 657	while (rem_size) {
 658		if (ltb_size > rem_size)
 659			ltb_size = rem_size;
 660
 661		rem_size -= ltb_size;
 662
 663		rc = alloc_long_term_buff(adapter, &new_set.ltbs[i], ltb_size);
 664		if (rc)
 665			goto out;
 666		i++;
 667	}
 668
 669	WARN_ON(i != new_set.num_ltbs);
 670
 671	return 0;
 672out:
 673	/* We may have allocated one/more LTBs before failing and we
 674	 * want to try and reuse on next reset. So don't free ltb set.
 675	 */
 676	return rc;
 677}
 678
 679/**
 680 * map_rxpool_buf_to_ltb - Map given rxpool buffer to offset in an LTB.
 681 * @rxpool: The receive buffer pool containing buffer
 682 * @bufidx: Index of buffer in rxpool
 683 * @ltbp: (Output) pointer to the long term buffer containing the buffer
 684 * @offset: (Output) offset of buffer in the LTB from @ltbp
 685 *
 686 * Map the given buffer identified by [rxpool, bufidx] to an LTB in the
 687 * pool and its corresponding offset. Assume for now that each LTB is of
 688 * different size but could possibly be optimized based on the allocation
 689 * strategy in alloc_ltb_set().
 690 */
 691static void map_rxpool_buf_to_ltb(struct ibmvnic_rx_pool *rxpool,
 692				  unsigned int bufidx,
 693				  struct ibmvnic_long_term_buff **ltbp,
 694				  unsigned int *offset)
 695{
 696	struct ibmvnic_long_term_buff *ltb;
 697	int nbufs;	/* # of buffers in one ltb */
 698	int i;
 699
 700	WARN_ON(bufidx >= rxpool->size);
 701
 702	for (i = 0; i < rxpool->ltb_set.num_ltbs; i++) {
 703		ltb = &rxpool->ltb_set.ltbs[i];
 704		nbufs = ltb->size / rxpool->buff_size;
 705		if (bufidx < nbufs)
 706			break;
 707		bufidx -= nbufs;
 708	}
 709
 710	*ltbp = ltb;
 711	*offset = bufidx * rxpool->buff_size;
 712}
 713
 714/**
 715 * map_txpool_buf_to_ltb - Map given txpool buffer to offset in an LTB.
 716 * @txpool: The transmit buffer pool containing buffer
 717 * @bufidx: Index of buffer in txpool
 718 * @ltbp: (Output) pointer to the long term buffer (LTB) containing the buffer
 719 * @offset: (Output) offset of buffer in the LTB from @ltbp
 720 *
 721 * Map the given buffer identified by [txpool, bufidx] to an LTB in the
 722 * pool and its corresponding offset.
 723 */
 724static void map_txpool_buf_to_ltb(struct ibmvnic_tx_pool *txpool,
 725				  unsigned int bufidx,
 726				  struct ibmvnic_long_term_buff **ltbp,
 727				  unsigned int *offset)
 728{
 729	struct ibmvnic_long_term_buff *ltb;
 730	int nbufs;	/* # of buffers in one ltb */
 731	int i;
 732
 733	WARN_ON_ONCE(bufidx >= txpool->num_buffers);
 734
 735	for (i = 0; i < txpool->ltb_set.num_ltbs; i++) {
 736		ltb = &txpool->ltb_set.ltbs[i];
 737		nbufs = ltb->size / txpool->buf_size;
 738		if (bufidx < nbufs)
 739			break;
 740		bufidx -= nbufs;
 741	}
 742
 743	*ltbp = ltb;
 744	*offset = bufidx * txpool->buf_size;
 745}
 746
 747static void deactivate_rx_pools(struct ibmvnic_adapter *adapter)
 748{
 749	int i;
 750
 751	for (i = 0; i < adapter->num_active_rx_pools; i++)
 752		adapter->rx_pool[i].active = 0;
 753}
 754
 755static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
 756			      struct ibmvnic_rx_pool *pool)
 757{
 758	int count = pool->size - atomic_read(&pool->available);
 759	u64 handle = adapter->rx_scrq[pool->index]->handle;
 760	struct device *dev = &adapter->vdev->dev;
 761	struct ibmvnic_ind_xmit_queue *ind_bufp;
 762	struct ibmvnic_sub_crq_queue *rx_scrq;
 763	struct ibmvnic_long_term_buff *ltb;
 764	union sub_crq *sub_crq;
 765	int buffers_added = 0;
 766	unsigned long lpar_rc;
 767	struct sk_buff *skb;
 768	unsigned int offset;
 769	dma_addr_t dma_addr;
 770	unsigned char *dst;
 771	int shift = 0;
 772	int bufidx;
 773	int i;
 774
 775	if (!pool->active)
 776		return;
 777
 778	rx_scrq = adapter->rx_scrq[pool->index];
 779	ind_bufp = &rx_scrq->ind_buf;
 780
 781	/* netdev_skb_alloc() could have failed after we saved a few skbs
 782	 * in the indir_buf and we would not have sent them to VIOS yet.
 783	 * To account for them, start the loop at ind_bufp->index rather
 784	 * than 0. If we pushed all the skbs to VIOS, ind_bufp->index will
 785	 * be 0.
 786	 */
 787	for (i = ind_bufp->index; i < count; ++i) {
 788		bufidx = pool->free_map[pool->next_free];
 789
 790		/* We maybe reusing the skb from earlier resets. Allocate
 791		 * only if necessary. But since the LTB may have changed
 792		 * during reset (see init_rx_pools()), update LTB below
 793		 * even if reusing skb.
 794		 */
 795		skb = pool->rx_buff[bufidx].skb;
 796		if (!skb) {
 797			skb = netdev_alloc_skb(adapter->netdev,
 798					       pool->buff_size);
 799			if (!skb) {
 800				dev_err(dev, "Couldn't replenish rx buff\n");
 801				adapter->replenish_no_mem++;
 802				break;
 803			}
 804		}
 805
 806		pool->free_map[pool->next_free] = IBMVNIC_INVALID_MAP;
 807		pool->next_free = (pool->next_free + 1) % pool->size;
 808
 809		/* Copy the skb to the long term mapped DMA buffer */
 810		map_rxpool_buf_to_ltb(pool, bufidx, &ltb, &offset);
 811		dst = ltb->buff + offset;
 812		memset(dst, 0, pool->buff_size);
 813		dma_addr = ltb->addr + offset;
 814
 815		/* add the skb to an rx_buff in the pool */
 816		pool->rx_buff[bufidx].data = dst;
 817		pool->rx_buff[bufidx].dma = dma_addr;
 818		pool->rx_buff[bufidx].skb = skb;
 819		pool->rx_buff[bufidx].pool_index = pool->index;
 820		pool->rx_buff[bufidx].size = pool->buff_size;
 821
 822		/* queue the rx_buff for the next send_subcrq_indirect */
 823		sub_crq = &ind_bufp->indir_arr[ind_bufp->index++];
 824		memset(sub_crq, 0, sizeof(*sub_crq));
 825		sub_crq->rx_add.first = IBMVNIC_CRQ_CMD;
 826		sub_crq->rx_add.correlator =
 827		    cpu_to_be64((u64)&pool->rx_buff[bufidx]);
 828		sub_crq->rx_add.ioba = cpu_to_be32(dma_addr);
 829		sub_crq->rx_add.map_id = ltb->map_id;
 830
 831		/* The length field of the sCRQ is defined to be 24 bits so the
 832		 * buffer size needs to be left shifted by a byte before it is
 833		 * converted to big endian to prevent the last byte from being
 834		 * truncated.
 835		 */
 836#ifdef __LITTLE_ENDIAN__
 837		shift = 8;
 838#endif
 839		sub_crq->rx_add.len = cpu_to_be32(pool->buff_size << shift);
 840
 841		/* if send_subcrq_indirect queue is full, flush to VIOS */
 842		if (ind_bufp->index == IBMVNIC_MAX_IND_DESCS ||
 843		    i == count - 1) {
 844			lpar_rc =
 845				send_subcrq_indirect(adapter, handle,
 846						     (u64)ind_bufp->indir_dma,
 847						     (u64)ind_bufp->index);
 848			if (lpar_rc != H_SUCCESS)
 849				goto failure;
 850			buffers_added += ind_bufp->index;
 851			adapter->replenish_add_buff_success += ind_bufp->index;
 852			ind_bufp->index = 0;
 853		}
 854	}
 855	atomic_add(buffers_added, &pool->available);
 856	return;
 857
 858failure:
 859	if (lpar_rc != H_PARAMETER && lpar_rc != H_CLOSED)
 860		dev_err_ratelimited(dev, "rx: replenish packet buffer failed\n");
 861	for (i = ind_bufp->index - 1; i >= 0; --i) {
 862		struct ibmvnic_rx_buff *rx_buff;
 863
 864		pool->next_free = pool->next_free == 0 ?
 865				  pool->size - 1 : pool->next_free - 1;
 866		sub_crq = &ind_bufp->indir_arr[i];
 867		rx_buff = (struct ibmvnic_rx_buff *)
 868				be64_to_cpu(sub_crq->rx_add.correlator);
 869		bufidx = (int)(rx_buff - pool->rx_buff);
 870		pool->free_map[pool->next_free] = bufidx;
 871		dev_kfree_skb_any(pool->rx_buff[bufidx].skb);
 872		pool->rx_buff[bufidx].skb = NULL;
 873	}
 874	adapter->replenish_add_buff_failure += ind_bufp->index;
 875	atomic_add(buffers_added, &pool->available);
 876	ind_bufp->index = 0;
 877	if (lpar_rc == H_CLOSED || adapter->failover_pending) {
 878		/* Disable buffer pool replenishment and report carrier off if
 879		 * queue is closed or pending failover.
 880		 * Firmware guarantees that a signal will be sent to the
 881		 * driver, triggering a reset.
 882		 */
 883		deactivate_rx_pools(adapter);
 884		netif_carrier_off(adapter->netdev);
 885	}
 886}
 887
 888static void replenish_pools(struct ibmvnic_adapter *adapter)
 889{
 890	int i;
 891
 892	adapter->replenish_task_cycles++;
 893	for (i = 0; i < adapter->num_active_rx_pools; i++) {
 894		if (adapter->rx_pool[i].active)
 895			replenish_rx_pool(adapter, &adapter->rx_pool[i]);
 896	}
 897
 898	netdev_dbg(adapter->netdev, "Replenished %d pools\n", i);
 899}
 900
 901static void release_stats_buffers(struct ibmvnic_adapter *adapter)
 902{
 903	kfree(adapter->tx_stats_buffers);
 904	kfree(adapter->rx_stats_buffers);
 905	adapter->tx_stats_buffers = NULL;
 906	adapter->rx_stats_buffers = NULL;
 907}
 908
 909static int init_stats_buffers(struct ibmvnic_adapter *adapter)
 910{
 911	adapter->tx_stats_buffers =
 912				kcalloc(IBMVNIC_MAX_QUEUES,
 913					sizeof(struct ibmvnic_tx_queue_stats),
 914					GFP_KERNEL);
 915	if (!adapter->tx_stats_buffers)
 916		return -ENOMEM;
 917
 918	adapter->rx_stats_buffers =
 919				kcalloc(IBMVNIC_MAX_QUEUES,
 920					sizeof(struct ibmvnic_rx_queue_stats),
 921					GFP_KERNEL);
 922	if (!adapter->rx_stats_buffers)
 923		return -ENOMEM;
 924
 925	return 0;
 926}
 927
 928static void release_stats_token(struct ibmvnic_adapter *adapter)
 929{
 930	struct device *dev = &adapter->vdev->dev;
 931
 932	if (!adapter->stats_token)
 933		return;
 934
 935	dma_unmap_single(dev, adapter->stats_token,
 936			 sizeof(struct ibmvnic_statistics),
 937			 DMA_FROM_DEVICE);
 938	adapter->stats_token = 0;
 939}
 940
 941static int init_stats_token(struct ibmvnic_adapter *adapter)
 942{
 943	struct device *dev = &adapter->vdev->dev;
 944	dma_addr_t stok;
 945	int rc;
 946
 947	stok = dma_map_single(dev, &adapter->stats,
 948			      sizeof(struct ibmvnic_statistics),
 949			      DMA_FROM_DEVICE);
 950	rc = dma_mapping_error(dev, stok);
 951	if (rc) {
 952		dev_err(dev, "Couldn't map stats buffer, rc = %d\n", rc);
 953		return rc;
 954	}
 955
 956	adapter->stats_token = stok;
 957	netdev_dbg(adapter->netdev, "Stats token initialized (%llx)\n", stok);
 958	return 0;
 959}
 960
 961/**
 962 * release_rx_pools() - Release any rx pools attached to @adapter.
 963 * @adapter: ibmvnic adapter
 964 *
 965 * Safe to call this multiple times - even if no pools are attached.
 966 */
 967static void release_rx_pools(struct ibmvnic_adapter *adapter)
 968{
 969	struct ibmvnic_rx_pool *rx_pool;
 970	int i, j;
 971
 972	if (!adapter->rx_pool)
 973		return;
 974
 975	for (i = 0; i < adapter->num_active_rx_pools; i++) {
 976		rx_pool = &adapter->rx_pool[i];
 977
 978		netdev_dbg(adapter->netdev, "Releasing rx_pool[%d]\n", i);
 979
 980		kfree(rx_pool->free_map);
 981
 982		free_ltb_set(adapter, &rx_pool->ltb_set);
 983
 984		if (!rx_pool->rx_buff)
 985			continue;
 986
 987		for (j = 0; j < rx_pool->size; j++) {
 988			if (rx_pool->rx_buff[j].skb) {
 989				dev_kfree_skb_any(rx_pool->rx_buff[j].skb);
 990				rx_pool->rx_buff[j].skb = NULL;
 991			}
 992		}
 993
 994		kfree(rx_pool->rx_buff);
 995	}
 996
 997	kfree(adapter->rx_pool);
 998	adapter->rx_pool = NULL;
 999	adapter->num_active_rx_pools = 0;
1000	adapter->prev_rx_pool_size = 0;
1001}
1002
1003/**
1004 * reuse_rx_pools() - Check if the existing rx pools can be reused.
1005 * @adapter: ibmvnic adapter
1006 *
1007 * Check if the existing rx pools in the adapter can be reused. The
1008 * pools can be reused if the pool parameters (number of pools,
1009 * number of buffers in the pool and size of each buffer) have not
1010 * changed.
1011 *
1012 * NOTE: This assumes that all pools have the same number of buffers
1013 *       which is the case currently. If that changes, we must fix this.
1014 *
1015 * Return: true if the rx pools can be reused, false otherwise.
1016 */
1017static bool reuse_rx_pools(struct ibmvnic_adapter *adapter)
1018{
1019	u64 old_num_pools, new_num_pools;
1020	u64 old_pool_size, new_pool_size;
1021	u64 old_buff_size, new_buff_size;
1022
1023	if (!adapter->rx_pool)
1024		return false;
1025
1026	old_num_pools = adapter->num_active_rx_pools;
1027	new_num_pools = adapter->req_rx_queues;
1028
1029	old_pool_size = adapter->prev_rx_pool_size;
1030	new_pool_size = adapter->req_rx_add_entries_per_subcrq;
1031
1032	old_buff_size = adapter->prev_rx_buf_sz;
1033	new_buff_size = adapter->cur_rx_buf_sz;
1034
1035	if (old_buff_size != new_buff_size ||
1036	    old_num_pools != new_num_pools ||
1037	    old_pool_size != new_pool_size)
1038		return false;
1039
1040	return true;
1041}
1042
1043/**
1044 * init_rx_pools(): Initialize the set of receiver pools in the adapter.
1045 * @netdev: net device associated with the vnic interface
1046 *
1047 * Initialize the set of receiver pools in the ibmvnic adapter associated
1048 * with the net_device @netdev. If possible, reuse the existing rx pools.
1049 * Otherwise free any existing pools and  allocate a new set of pools
1050 * before initializing them.
1051 *
1052 * Return: 0 on success and negative value on error.
1053 */
1054static int init_rx_pools(struct net_device *netdev)
1055{
1056	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
1057	struct device *dev = &adapter->vdev->dev;
1058	struct ibmvnic_rx_pool *rx_pool;
1059	u64 num_pools;
1060	u64 pool_size;		/* # of buffers in one pool */
1061	u64 buff_size;
1062	int i, j, rc;
1063
1064	pool_size = adapter->req_rx_add_entries_per_subcrq;
1065	num_pools = adapter->req_rx_queues;
1066	buff_size = adapter->cur_rx_buf_sz;
1067
1068	if (reuse_rx_pools(adapter)) {
1069		dev_dbg(dev, "Reusing rx pools\n");
1070		goto update_ltb;
1071	}
1072
1073	/* Allocate/populate the pools. */
1074	release_rx_pools(adapter);
1075
1076	adapter->rx_pool = kcalloc(num_pools,
1077				   sizeof(struct ibmvnic_rx_pool),
1078				   GFP_KERNEL);
1079	if (!adapter->rx_pool) {
1080		dev_err(dev, "Failed to allocate rx pools\n");
1081		return -ENOMEM;
1082	}
1083
1084	/* Set num_active_rx_pools early. If we fail below after partial
1085	 * allocation, release_rx_pools() will know how many to look for.
1086	 */
1087	adapter->num_active_rx_pools = num_pools;
1088
1089	for (i = 0; i < num_pools; i++) {
1090		rx_pool = &adapter->rx_pool[i];
1091
1092		netdev_dbg(adapter->netdev,
1093			   "Initializing rx_pool[%d], %lld buffs, %lld bytes each\n",
1094			   i, pool_size, buff_size);
1095
1096		rx_pool->size = pool_size;
1097		rx_pool->index = i;
1098		rx_pool->buff_size = ALIGN(buff_size, L1_CACHE_BYTES);
1099
1100		rx_pool->free_map = kcalloc(rx_pool->size, sizeof(int),
1101					    GFP_KERNEL);
1102		if (!rx_pool->free_map) {
1103			dev_err(dev, "Couldn't alloc free_map %d\n", i);
1104			rc = -ENOMEM;
1105			goto out_release;
1106		}
1107
1108		rx_pool->rx_buff = kcalloc(rx_pool->size,
1109					   sizeof(struct ibmvnic_rx_buff),
1110					   GFP_KERNEL);
1111		if (!rx_pool->rx_buff) {
1112			dev_err(dev, "Couldn't alloc rx buffers\n");
1113			rc = -ENOMEM;
1114			goto out_release;
1115		}
1116	}
1117
1118	adapter->prev_rx_pool_size = pool_size;
1119	adapter->prev_rx_buf_sz = adapter->cur_rx_buf_sz;
1120
1121update_ltb:
1122	for (i = 0; i < num_pools; i++) {
1123		rx_pool = &adapter->rx_pool[i];
1124		dev_dbg(dev, "Updating LTB for rx pool %d [%d, %d]\n",
1125			i, rx_pool->size, rx_pool->buff_size);
1126
1127		rc = alloc_ltb_set(adapter, &rx_pool->ltb_set,
1128				   rx_pool->size, rx_pool->buff_size);
1129		if (rc)
1130			goto out;
1131
1132		for (j = 0; j < rx_pool->size; ++j) {
1133			struct ibmvnic_rx_buff *rx_buff;
1134
1135			rx_pool->free_map[j] = j;
1136
1137			/* NOTE: Don't clear rx_buff->skb here - will leak
1138			 * memory! replenish_rx_pool() will reuse skbs or
1139			 * allocate as necessary.
1140			 */
1141			rx_buff = &rx_pool->rx_buff[j];
1142			rx_buff->dma = 0;
1143			rx_buff->data = 0;
1144			rx_buff->size = 0;
1145			rx_buff->pool_index = 0;
1146		}
1147
1148		/* Mark pool "empty" so replenish_rx_pools() will
1149		 * update the LTB info for each buffer
1150		 */
1151		atomic_set(&rx_pool->available, 0);
1152		rx_pool->next_alloc = 0;
1153		rx_pool->next_free = 0;
1154		/* replenish_rx_pool() may have called deactivate_rx_pools()
1155		 * on failover. Ensure pool is active now.
1156		 */
1157		rx_pool->active = 1;
1158	}
1159	return 0;
1160out_release:
1161	release_rx_pools(adapter);
1162out:
1163	/* We failed to allocate one or more LTBs or map them on the VIOS.
1164	 * Hold onto the pools and any LTBs that we did allocate/map.
1165	 */
1166	return rc;
1167}
1168
1169static void release_vpd_data(struct ibmvnic_adapter *adapter)
1170{
1171	if (!adapter->vpd)
1172		return;
1173
1174	kfree(adapter->vpd->buff);
1175	kfree(adapter->vpd);
1176
1177	adapter->vpd = NULL;
1178}
1179
1180static void release_one_tx_pool(struct ibmvnic_adapter *adapter,
1181				struct ibmvnic_tx_pool *tx_pool)
1182{
1183	kfree(tx_pool->tx_buff);
1184	kfree(tx_pool->free_map);
1185	free_ltb_set(adapter, &tx_pool->ltb_set);
1186}
1187
1188/**
1189 * release_tx_pools() - Release any tx pools attached to @adapter.
1190 * @adapter: ibmvnic adapter
1191 *
1192 * Safe to call this multiple times - even if no pools are attached.
1193 */
1194static void release_tx_pools(struct ibmvnic_adapter *adapter)
1195{
1196	int i;
1197
1198	/* init_tx_pools() ensures that ->tx_pool and ->tso_pool are
1199	 * both NULL or both non-NULL. So we only need to check one.
1200	 */
1201	if (!adapter->tx_pool)
1202		return;
1203
1204	for (i = 0; i < adapter->num_active_tx_pools; i++) {
1205		release_one_tx_pool(adapter, &adapter->tx_pool[i]);
1206		release_one_tx_pool(adapter, &adapter->tso_pool[i]);
1207	}
1208
1209	kfree(adapter->tx_pool);
1210	adapter->tx_pool = NULL;
1211	kfree(adapter->tso_pool);
1212	adapter->tso_pool = NULL;
1213	adapter->num_active_tx_pools = 0;
1214	adapter->prev_tx_pool_size = 0;
1215}
1216
1217static int init_one_tx_pool(struct net_device *netdev,
1218			    struct ibmvnic_tx_pool *tx_pool,
1219			    int pool_size, int buf_size)
1220{
1221	int i;
1222
1223	tx_pool->tx_buff = kcalloc(pool_size,
1224				   sizeof(struct ibmvnic_tx_buff),
1225				   GFP_KERNEL);
1226	if (!tx_pool->tx_buff)
1227		return -ENOMEM;
1228
1229	tx_pool->free_map = kcalloc(pool_size, sizeof(int), GFP_KERNEL);
1230	if (!tx_pool->free_map) {
1231		kfree(tx_pool->tx_buff);
1232		tx_pool->tx_buff = NULL;
1233		return -ENOMEM;
1234	}
1235
1236	for (i = 0; i < pool_size; i++)
1237		tx_pool->free_map[i] = i;
1238
1239	tx_pool->consumer_index = 0;
1240	tx_pool->producer_index = 0;
1241	tx_pool->num_buffers = pool_size;
1242	tx_pool->buf_size = buf_size;
1243
1244	return 0;
1245}
1246
1247/**
1248 * reuse_tx_pools() - Check if the existing tx pools can be reused.
1249 * @adapter: ibmvnic adapter
1250 *
1251 * Check if the existing tx pools in the adapter can be reused. The
1252 * pools can be reused if the pool parameters (number of pools,
1253 * number of buffers in the pool and mtu) have not changed.
1254 *
1255 * NOTE: This assumes that all pools have the same number of buffers
1256 *       which is the case currently. If that changes, we must fix this.
1257 *
1258 * Return: true if the tx pools can be reused, false otherwise.
1259 */
1260static bool reuse_tx_pools(struct ibmvnic_adapter *adapter)
1261{
1262	u64 old_num_pools, new_num_pools;
1263	u64 old_pool_size, new_pool_size;
1264	u64 old_mtu, new_mtu;
1265
1266	if (!adapter->tx_pool)
1267		return false;
1268
1269	old_num_pools = adapter->num_active_tx_pools;
1270	new_num_pools = adapter->num_active_tx_scrqs;
1271	old_pool_size = adapter->prev_tx_pool_size;
1272	new_pool_size = adapter->req_tx_entries_per_subcrq;
1273	old_mtu = adapter->prev_mtu;
1274	new_mtu = adapter->req_mtu;
1275
1276	if (old_mtu != new_mtu ||
1277	    old_num_pools != new_num_pools ||
1278	    old_pool_size != new_pool_size)
1279		return false;
1280
1281	return true;
1282}
1283
1284/**
1285 * init_tx_pools(): Initialize the set of transmit pools in the adapter.
1286 * @netdev: net device associated with the vnic interface
1287 *
1288 * Initialize the set of transmit pools in the ibmvnic adapter associated
1289 * with the net_device @netdev. If possible, reuse the existing tx pools.
1290 * Otherwise free any existing pools and  allocate a new set of pools
1291 * before initializing them.
1292 *
1293 * Return: 0 on success and negative value on error.
1294 */
1295static int init_tx_pools(struct net_device *netdev)
1296{
1297	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
1298	struct device *dev = &adapter->vdev->dev;
1299	int num_pools;
1300	u64 pool_size;		/* # of buffers in pool */
1301	u64 buff_size;
1302	int i, j, rc;
1303
1304	num_pools = adapter->req_tx_queues;
1305
1306	/* We must notify the VIOS about the LTB on all resets - but we only
1307	 * need to alloc/populate pools if either the number of buffers or
1308	 * size of each buffer in the pool has changed.
1309	 */
1310	if (reuse_tx_pools(adapter)) {
1311		netdev_dbg(netdev, "Reusing tx pools\n");
1312		goto update_ltb;
1313	}
1314
1315	/* Allocate/populate the pools. */
1316	release_tx_pools(adapter);
1317
1318	pool_size = adapter->req_tx_entries_per_subcrq;
1319	num_pools = adapter->num_active_tx_scrqs;
1320
1321	adapter->tx_pool = kcalloc(num_pools,
1322				   sizeof(struct ibmvnic_tx_pool), GFP_KERNEL);
1323	if (!adapter->tx_pool)
1324		return -ENOMEM;
1325
1326	adapter->tso_pool = kcalloc(num_pools,
1327				    sizeof(struct ibmvnic_tx_pool), GFP_KERNEL);
1328	/* To simplify release_tx_pools() ensure that ->tx_pool and
1329	 * ->tso_pool are either both NULL or both non-NULL.
1330	 */
1331	if (!adapter->tso_pool) {
1332		kfree(adapter->tx_pool);
1333		adapter->tx_pool = NULL;
1334		return -ENOMEM;
1335	}
1336
1337	/* Set num_active_tx_pools early. If we fail below after partial
1338	 * allocation, release_tx_pools() will know how many to look for.
1339	 */
1340	adapter->num_active_tx_pools = num_pools;
1341
1342	buff_size = adapter->req_mtu + VLAN_HLEN;
1343	buff_size = ALIGN(buff_size, L1_CACHE_BYTES);
1344
1345	for (i = 0; i < num_pools; i++) {
1346		dev_dbg(dev, "Init tx pool %d [%llu, %llu]\n",
1347			i, adapter->req_tx_entries_per_subcrq, buff_size);
1348
1349		rc = init_one_tx_pool(netdev, &adapter->tx_pool[i],
1350				      pool_size, buff_size);
1351		if (rc)
1352			goto out_release;
1353
1354		rc = init_one_tx_pool(netdev, &adapter->tso_pool[i],
1355				      IBMVNIC_TSO_BUFS,
1356				      IBMVNIC_TSO_BUF_SZ);
1357		if (rc)
1358			goto out_release;
1359	}
1360
1361	adapter->prev_tx_pool_size = pool_size;
1362	adapter->prev_mtu = adapter->req_mtu;
1363
1364update_ltb:
1365	/* NOTE: All tx_pools have the same number of buffers (which is
1366	 *       same as pool_size). All tso_pools have IBMVNIC_TSO_BUFS
1367	 *       buffers (see calls init_one_tx_pool() for these).
1368	 *       For consistency, we use tx_pool->num_buffers and
1369	 *       tso_pool->num_buffers below.
1370	 */
1371	rc = -1;
1372	for (i = 0; i < num_pools; i++) {
1373		struct ibmvnic_tx_pool *tso_pool;
1374		struct ibmvnic_tx_pool *tx_pool;
1375
1376		tx_pool = &adapter->tx_pool[i];
1377
1378		dev_dbg(dev, "Updating LTB for tx pool %d [%d, %d]\n",
1379			i, tx_pool->num_buffers, tx_pool->buf_size);
1380
1381		rc = alloc_ltb_set(adapter, &tx_pool->ltb_set,
1382				   tx_pool->num_buffers, tx_pool->buf_size);
1383		if (rc)
1384			goto out;
1385
1386		tx_pool->consumer_index = 0;
1387		tx_pool->producer_index = 0;
1388
1389		for (j = 0; j < tx_pool->num_buffers; j++)
1390			tx_pool->free_map[j] = j;
1391
1392		tso_pool = &adapter->tso_pool[i];
1393
1394		dev_dbg(dev, "Updating LTB for tso pool %d [%d, %d]\n",
1395			i, tso_pool->num_buffers, tso_pool->buf_size);
1396
1397		rc = alloc_ltb_set(adapter, &tso_pool->ltb_set,
1398				   tso_pool->num_buffers, tso_pool->buf_size);
1399		if (rc)
1400			goto out;
1401
1402		tso_pool->consumer_index = 0;
1403		tso_pool->producer_index = 0;
1404
1405		for (j = 0; j < tso_pool->num_buffers; j++)
1406			tso_pool->free_map[j] = j;
1407	}
1408
1409	return 0;
1410out_release:
1411	release_tx_pools(adapter);
1412out:
1413	/* We failed to allocate one or more LTBs or map them on the VIOS.
1414	 * Hold onto the pools and any LTBs that we did allocate/map.
1415	 */
1416	return rc;
1417}
1418
1419static void ibmvnic_napi_enable(struct ibmvnic_adapter *adapter)
1420{
1421	int i;
1422
1423	if (adapter->napi_enabled)
1424		return;
1425
1426	for (i = 0; i < adapter->req_rx_queues; i++)
1427		napi_enable(&adapter->napi[i]);
1428
1429	adapter->napi_enabled = true;
1430}
1431
1432static void ibmvnic_napi_disable(struct ibmvnic_adapter *adapter)
1433{
1434	int i;
1435
1436	if (!adapter->napi_enabled)
1437		return;
1438
1439	for (i = 0; i < adapter->req_rx_queues; i++) {
1440		netdev_dbg(adapter->netdev, "Disabling napi[%d]\n", i);
1441		napi_disable(&adapter->napi[i]);
1442	}
1443
1444	adapter->napi_enabled = false;
1445}
1446
1447static int init_napi(struct ibmvnic_adapter *adapter)
1448{
1449	int i;
1450
1451	adapter->napi = kcalloc(adapter->req_rx_queues,
1452				sizeof(struct napi_struct), GFP_KERNEL);
1453	if (!adapter->napi)
1454		return -ENOMEM;
1455
1456	for (i = 0; i < adapter->req_rx_queues; i++) {
1457		netdev_dbg(adapter->netdev, "Adding napi[%d]\n", i);
1458		netif_napi_add(adapter->netdev, &adapter->napi[i],
1459			       ibmvnic_poll);
1460	}
1461
1462	adapter->num_active_rx_napi = adapter->req_rx_queues;
1463	return 0;
1464}
1465
1466static void release_napi(struct ibmvnic_adapter *adapter)
1467{
1468	int i;
1469
1470	if (!adapter->napi)
1471		return;
1472
1473	for (i = 0; i < adapter->num_active_rx_napi; i++) {
1474		netdev_dbg(adapter->netdev, "Releasing napi[%d]\n", i);
1475		netif_napi_del(&adapter->napi[i]);
1476	}
1477
1478	kfree(adapter->napi);
1479	adapter->napi = NULL;
1480	adapter->num_active_rx_napi = 0;
1481	adapter->napi_enabled = false;
1482}
1483
1484static const char *adapter_state_to_string(enum vnic_state state)
1485{
1486	switch (state) {
1487	case VNIC_PROBING:
1488		return "PROBING";
1489	case VNIC_PROBED:
1490		return "PROBED";
1491	case VNIC_OPENING:
1492		return "OPENING";
1493	case VNIC_OPEN:
1494		return "OPEN";
1495	case VNIC_CLOSING:
1496		return "CLOSING";
1497	case VNIC_CLOSED:
1498		return "CLOSED";
1499	case VNIC_REMOVING:
1500		return "REMOVING";
1501	case VNIC_REMOVED:
1502		return "REMOVED";
1503	case VNIC_DOWN:
1504		return "DOWN";
1505	}
1506	return "UNKNOWN";
1507}
1508
1509static int ibmvnic_login(struct net_device *netdev)
1510{
1511	unsigned long flags, timeout = msecs_to_jiffies(20000);
1512	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
1513	int retry_count = 0;
1514	int retries = 10;
1515	bool retry;
1516	int rc;
1517
1518	do {
1519		retry = false;
1520		if (retry_count > retries) {
1521			netdev_warn(netdev, "Login attempts exceeded\n");
1522			return -EACCES;
1523		}
1524
1525		adapter->init_done_rc = 0;
1526		reinit_completion(&adapter->init_done);
1527		rc = send_login(adapter);
1528		if (rc)
1529			return rc;
1530
1531		if (!wait_for_completion_timeout(&adapter->init_done,
1532						 timeout)) {
1533			netdev_warn(netdev, "Login timed out\n");
1534			adapter->login_pending = false;
1535			goto partial_reset;
1536		}
1537
1538		if (adapter->init_done_rc == ABORTED) {
1539			netdev_warn(netdev, "Login aborted, retrying...\n");
1540			retry = true;
1541			adapter->init_done_rc = 0;
1542			retry_count++;
1543			/* FW or device may be busy, so
1544			 * wait a bit before retrying login
1545			 */
1546			msleep(500);
1547		} else if (adapter->init_done_rc == PARTIALSUCCESS) {
1548			retry_count++;
1549			release_sub_crqs(adapter, 1);
1550
1551			retry = true;
1552			netdev_dbg(netdev,
1553				   "Received partial success, retrying...\n");
1554			adapter->init_done_rc = 0;
1555			reinit_completion(&adapter->init_done);
1556			send_query_cap(adapter);
1557			if (!wait_for_completion_timeout(&adapter->init_done,
1558							 timeout)) {
1559				netdev_warn(netdev,
1560					    "Capabilities query timed out\n");
1561				return -ETIMEDOUT;
1562			}
1563
1564			rc = init_sub_crqs(adapter);
1565			if (rc) {
1566				netdev_warn(netdev,
1567					    "SCRQ initialization failed\n");
1568				return rc;
1569			}
1570
1571			rc = init_sub_crq_irqs(adapter);
1572			if (rc) {
1573				netdev_warn(netdev,
1574					    "SCRQ irq initialization failed\n");
1575				return rc;
1576			}
1577		/* Default/timeout error handling, reset and start fresh */
1578		} else if (adapter->init_done_rc) {
1579			netdev_warn(netdev, "Adapter login failed, init_done_rc = %d\n",
1580				    adapter->init_done_rc);
1581
1582partial_reset:
1583			/* adapter login failed, so free any CRQs or sub-CRQs
1584			 * and register again before attempting to login again.
1585			 * If we don't do this then the VIOS may think that
1586			 * we are already logged in and reject any subsequent
1587			 * attempts
1588			 */
1589			netdev_warn(netdev,
1590				    "Freeing and re-registering CRQs before attempting to login again\n");
1591			retry = true;
1592			adapter->init_done_rc = 0;
1593			release_sub_crqs(adapter, true);
1594			/* Much of this is similar logic as ibmvnic_probe(),
1595			 * we are essentially re-initializing communication
1596			 * with the server. We really should not run any
1597			 * resets/failovers here because this is already a form
1598			 * of reset and we do not want parallel resets occurring
1599			 */
1600			do {
1601				reinit_init_done(adapter);
1602				/* Clear any failovers we got in the previous
1603				 * pass since we are re-initializing the CRQ
1604				 */
1605				adapter->failover_pending = false;
1606				release_crq_queue(adapter);
1607				/* If we don't sleep here then we risk an
1608				 * unnecessary failover event from the VIOS.
1609				 * This is a known VIOS issue caused by a vnic
1610				 * device freeing and registering a CRQ too
1611				 * quickly.
1612				 */
1613				msleep(1500);
1614				/* Avoid any resets, since we are currently
1615				 * resetting.
1616				 */
1617				spin_lock_irqsave(&adapter->rwi_lock, flags);
1618				flush_reset_queue(adapter);
1619				spin_unlock_irqrestore(&adapter->rwi_lock,
1620						       flags);
1621
1622				rc = init_crq_queue(adapter);
1623				if (rc) {
1624					netdev_err(netdev, "login recovery: init CRQ failed %d\n",
1625						   rc);
1626					return -EIO;
1627				}
1628
1629				rc = ibmvnic_reset_init(adapter, false);
1630				if (rc)
1631					netdev_err(netdev, "login recovery: Reset init failed %d\n",
1632						   rc);
1633				/* IBMVNIC_CRQ_INIT will return EAGAIN if it
1634				 * fails, since ibmvnic_reset_init will free
1635				 * irq's in failure, we won't be able to receive
1636				 * new CRQs so we need to keep trying. probe()
1637				 * handles this similarly.
1638				 */
1639			} while (rc == -EAGAIN && retry_count++ < retries);
1640		}
1641	} while (retry);
1642
1643	__ibmvnic_set_mac(netdev, adapter->mac_addr);
1644
1645	netdev_dbg(netdev, "[S:%s] Login succeeded\n", adapter_state_to_string(adapter->state));
1646	return 0;
1647}
1648
1649static void release_login_buffer(struct ibmvnic_adapter *adapter)
1650{
1651	if (!adapter->login_buf)
1652		return;
1653
1654	dma_unmap_single(&adapter->vdev->dev, adapter->login_buf_token,
1655			 adapter->login_buf_sz, DMA_TO_DEVICE);
1656	kfree(adapter->login_buf);
1657	adapter->login_buf = NULL;
1658}
1659
1660static void release_login_rsp_buffer(struct ibmvnic_adapter *adapter)
1661{
1662	if (!adapter->login_rsp_buf)
1663		return;
1664
1665	dma_unmap_single(&adapter->vdev->dev, adapter->login_rsp_buf_token,
1666			 adapter->login_rsp_buf_sz, DMA_FROM_DEVICE);
1667	kfree(adapter->login_rsp_buf);
1668	adapter->login_rsp_buf = NULL;
1669}
1670
1671static void release_resources(struct ibmvnic_adapter *adapter)
1672{
1673	release_vpd_data(adapter);
1674
1675	release_napi(adapter);
1676	release_login_buffer(adapter);
1677	release_login_rsp_buffer(adapter);
1678}
1679
1680static int set_link_state(struct ibmvnic_adapter *adapter, u8 link_state)
1681{
1682	struct net_device *netdev = adapter->netdev;
1683	unsigned long timeout = msecs_to_jiffies(20000);
1684	union ibmvnic_crq crq;
1685	bool resend;
1686	int rc;
1687
1688	netdev_dbg(netdev, "setting link state %d\n", link_state);
1689
1690	memset(&crq, 0, sizeof(crq));
1691	crq.logical_link_state.first = IBMVNIC_CRQ_CMD;
1692	crq.logical_link_state.cmd = LOGICAL_LINK_STATE;
1693	crq.logical_link_state.link_state = link_state;
1694
1695	do {
1696		resend = false;
1697
1698		reinit_completion(&adapter->init_done);
1699		rc = ibmvnic_send_crq(adapter, &crq);
1700		if (rc) {
1701			netdev_err(netdev, "Failed to set link state\n");
1702			return rc;
1703		}
1704
1705		if (!wait_for_completion_timeout(&adapter->init_done,
1706						 timeout)) {
1707			netdev_err(netdev, "timeout setting link state\n");
1708			return -ETIMEDOUT;
1709		}
1710
1711		if (adapter->init_done_rc == PARTIALSUCCESS) {
1712			/* Partuial success, delay and re-send */
1713			mdelay(1000);
1714			resend = true;
1715		} else if (adapter->init_done_rc) {
1716			netdev_warn(netdev, "Unable to set link state, rc=%d\n",
1717				    adapter->init_done_rc);
1718			return adapter->init_done_rc;
1719		}
1720	} while (resend);
1721
1722	return 0;
1723}
1724
1725static int set_real_num_queues(struct net_device *netdev)
1726{
1727	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
1728	int rc;
1729
1730	netdev_dbg(netdev, "Setting real tx/rx queues (%llx/%llx)\n",
1731		   adapter->req_tx_queues, adapter->req_rx_queues);
1732
1733	rc = netif_set_real_num_tx_queues(netdev, adapter->req_tx_queues);
1734	if (rc) {
1735		netdev_err(netdev, "failed to set the number of tx queues\n");
1736		return rc;
1737	}
1738
1739	rc = netif_set_real_num_rx_queues(netdev, adapter->req_rx_queues);
1740	if (rc)
1741		netdev_err(netdev, "failed to set the number of rx queues\n");
1742
1743	return rc;
1744}
1745
1746static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter)
1747{
1748	struct device *dev = &adapter->vdev->dev;
1749	union ibmvnic_crq crq;
1750	int len = 0;
1751	int rc;
1752
1753	if (adapter->vpd->buff)
1754		len = adapter->vpd->len;
1755
1756	mutex_lock(&adapter->fw_lock);
1757	adapter->fw_done_rc = 0;
1758	reinit_completion(&adapter->fw_done);
1759
1760	crq.get_vpd_size.first = IBMVNIC_CRQ_CMD;
1761	crq.get_vpd_size.cmd = GET_VPD_SIZE;
1762	rc = ibmvnic_send_crq(adapter, &crq);
1763	if (rc) {
1764		mutex_unlock(&adapter->fw_lock);
1765		return rc;
1766	}
1767
1768	rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000);
1769	if (rc) {
1770		dev_err(dev, "Could not retrieve VPD size, rc = %d\n", rc);
1771		mutex_unlock(&adapter->fw_lock);
1772		return rc;
1773	}
1774	mutex_unlock(&adapter->fw_lock);
1775
1776	if (!adapter->vpd->len)
1777		return -ENODATA;
1778
1779	if (!adapter->vpd->buff)
1780		adapter->vpd->buff = kzalloc(adapter->vpd->len, GFP_KERNEL);
1781	else if (adapter->vpd->len != len)
1782		adapter->vpd->buff =
1783			krealloc(adapter->vpd->buff,
1784				 adapter->vpd->len, GFP_KERNEL);
1785
1786	if (!adapter->vpd->buff) {
1787		dev_err(dev, "Could allocate VPD buffer\n");
1788		return -ENOMEM;
1789	}
1790
1791	adapter->vpd->dma_addr =
1792		dma_map_single(dev, adapter->vpd->buff, adapter->vpd->len,
1793			       DMA_FROM_DEVICE);
1794	if (dma_mapping_error(dev, adapter->vpd->dma_addr)) {
1795		dev_err(dev, "Could not map VPD buffer\n");
1796		kfree(adapter->vpd->buff);
1797		adapter->vpd->buff = NULL;
1798		return -ENOMEM;
1799	}
1800
1801	mutex_lock(&adapter->fw_lock);
1802	adapter->fw_done_rc = 0;
1803	reinit_completion(&adapter->fw_done);
1804
1805	crq.get_vpd.first = IBMVNIC_CRQ_CMD;
1806	crq.get_vpd.cmd = GET_VPD;
1807	crq.get_vpd.ioba = cpu_to_be32(adapter->vpd->dma_addr);
1808	crq.get_vpd.len = cpu_to_be32((u32)adapter->vpd->len);
1809	rc = ibmvnic_send_crq(adapter, &crq);
1810	if (rc) {
1811		kfree(adapter->vpd->buff);
1812		adapter->vpd->buff = NULL;
1813		mutex_unlock(&adapter->fw_lock);
1814		return rc;
1815	}
1816
1817	rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000);
1818	if (rc) {
1819		dev_err(dev, "Unable to retrieve VPD, rc = %d\n", rc);
1820		kfree(adapter->vpd->buff);
1821		adapter->vpd->buff = NULL;
1822		mutex_unlock(&adapter->fw_lock);
1823		return rc;
1824	}
1825
1826	mutex_unlock(&adapter->fw_lock);
1827	return 0;
1828}
1829
1830static int init_resources(struct ibmvnic_adapter *adapter)
1831{
1832	struct net_device *netdev = adapter->netdev;
1833	int rc;
1834
1835	rc = set_real_num_queues(netdev);
1836	if (rc)
1837		return rc;
1838
1839	adapter->vpd = kzalloc(sizeof(*adapter->vpd), GFP_KERNEL);
1840	if (!adapter->vpd)
1841		return -ENOMEM;
1842
1843	/* Vital Product Data (VPD) */
1844	rc = ibmvnic_get_vpd(adapter);
1845	if (rc) {
1846		netdev_err(netdev, "failed to initialize Vital Product Data (VPD)\n");
1847		return rc;
1848	}
1849
1850	rc = init_napi(adapter);
1851	if (rc)
1852		return rc;
1853
1854	send_query_map(adapter);
1855
1856	rc = init_rx_pools(netdev);
1857	if (rc)
1858		return rc;
1859
1860	rc = init_tx_pools(netdev);
1861	return rc;
1862}
1863
1864static int __ibmvnic_open(struct net_device *netdev)
1865{
1866	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
1867	enum vnic_state prev_state = adapter->state;
1868	int i, rc;
1869
1870	adapter->state = VNIC_OPENING;
1871	replenish_pools(adapter);
1872	ibmvnic_napi_enable(adapter);
1873
1874	/* We're ready to receive frames, enable the sub-crq interrupts and
1875	 * set the logical link state to up
1876	 */
1877	for (i = 0; i < adapter->req_rx_queues; i++) {
1878		netdev_dbg(netdev, "Enabling rx_scrq[%d] irq\n", i);
1879		if (prev_state == VNIC_CLOSED)
1880			enable_irq(adapter->rx_scrq[i]->irq);
1881		enable_scrq_irq(adapter, adapter->rx_scrq[i]);
1882	}
1883
1884	for (i = 0; i < adapter->req_tx_queues; i++) {
1885		netdev_dbg(netdev, "Enabling tx_scrq[%d] irq\n", i);
1886		if (prev_state == VNIC_CLOSED)
1887			enable_irq(adapter->tx_scrq[i]->irq);
1888		enable_scrq_irq(adapter, adapter->tx_scrq[i]);
1889		/* netdev_tx_reset_queue will reset dql stats. During NON_FATAL
1890		 * resets, don't reset the stats because there could be batched
1891		 * skb's waiting to be sent. If we reset dql stats, we risk
1892		 * num_completed being greater than num_queued. This will cause
1893		 * a BUG_ON in dql_completed().
1894		 */
1895		if (adapter->reset_reason != VNIC_RESET_NON_FATAL)
1896			netdev_tx_reset_queue(netdev_get_tx_queue(netdev, i));
1897	}
1898
1899	rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_UP);
1900	if (rc) {
1901		ibmvnic_napi_disable(adapter);
1902		ibmvnic_disable_irqs(adapter);
1903		return rc;
1904	}
1905
1906	adapter->tx_queues_active = true;
1907
1908	/* Since queues were stopped until now, there shouldn't be any
1909	 * one in ibmvnic_complete_tx() or ibmvnic_xmit() so maybe we
1910	 * don't need the synchronize_rcu()? Leaving it for consistency
1911	 * with setting ->tx_queues_active = false.
1912	 */
1913	synchronize_rcu();
1914
1915	netif_tx_start_all_queues(netdev);
1916
1917	if (prev_state == VNIC_CLOSED) {
1918		for (i = 0; i < adapter->req_rx_queues; i++)
1919			napi_schedule(&adapter->napi[i]);
1920	}
1921
1922	adapter->state = VNIC_OPEN;
1923	return rc;
1924}
1925
1926static int ibmvnic_open(struct net_device *netdev)
1927{
1928	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
1929	int rc;
1930
1931	ASSERT_RTNL();
1932
1933	/* If device failover is pending or we are about to reset, just set
1934	 * device state and return. Device operation will be handled by reset
1935	 * routine.
1936	 *
1937	 * It should be safe to overwrite the adapter->state here. Since
1938	 * we hold the rtnl, either the reset has not actually started or
1939	 * the rtnl got dropped during the set_link_state() in do_reset().
1940	 * In the former case, no one else is changing the state (again we
1941	 * have the rtnl) and in the latter case, do_reset() will detect and
1942	 * honor our setting below.
1943	 */
1944	if (adapter->failover_pending || (test_bit(0, &adapter->resetting))) {
1945		netdev_dbg(netdev, "[S:%s FOP:%d] Resetting, deferring open\n",
1946			   adapter_state_to_string(adapter->state),
1947			   adapter->failover_pending);
1948		adapter->state = VNIC_OPEN;
1949		rc = 0;
1950		goto out;
1951	}
1952
1953	if (adapter->state != VNIC_CLOSED) {
1954		rc = ibmvnic_login(netdev);
1955		if (rc)
1956			goto out;
1957
1958		rc = init_resources(adapter);
1959		if (rc) {
1960			netdev_err(netdev, "failed to initialize resources\n");
1961			goto out;
1962		}
1963	}
1964
1965	rc = __ibmvnic_open(netdev);
1966
1967out:
1968	/* If open failed and there is a pending failover or in-progress reset,
1969	 * set device state and return. Device operation will be handled by
1970	 * reset routine. See also comments above regarding rtnl.
1971	 */
1972	if (rc &&
1973	    (adapter->failover_pending || (test_bit(0, &adapter->resetting)))) {
1974		adapter->state = VNIC_OPEN;
1975		rc = 0;
1976	}
1977
1978	if (rc) {
1979		release_resources(adapter);
1980		release_rx_pools(adapter);
1981		release_tx_pools(adapter);
1982	}
1983
1984	return rc;
1985}
1986
1987static void clean_rx_pools(struct ibmvnic_adapter *adapter)
1988{
1989	struct ibmvnic_rx_pool *rx_pool;
1990	struct ibmvnic_rx_buff *rx_buff;
1991	u64 rx_entries;
1992	int rx_scrqs;
1993	int i, j;
1994
1995	if (!adapter->rx_pool)
1996		return;
1997
1998	rx_scrqs = adapter->num_active_rx_pools;
1999	rx_entries = adapter->req_rx_add_entries_per_subcrq;
2000
2001	/* Free any remaining skbs in the rx buffer pools */
2002	for (i = 0; i < rx_scrqs; i++) {
2003		rx_pool = &adapter->rx_pool[i];
2004		if (!rx_pool || !rx_pool->rx_buff)
2005			continue;
2006
2007		netdev_dbg(adapter->netdev, "Cleaning rx_pool[%d]\n", i);
2008		for (j = 0; j < rx_entries; j++) {
2009			rx_buff = &rx_pool->rx_buff[j];
2010			if (rx_buff && rx_buff->skb) {
2011				dev_kfree_skb_any(rx_buff->skb);
2012				rx_buff->skb = NULL;
2013			}
2014		}
2015	}
2016}
2017
2018static void clean_one_tx_pool(struct ibmvnic_adapter *adapter,
2019			      struct ibmvnic_tx_pool *tx_pool)
2020{
2021	struct ibmvnic_tx_buff *tx_buff;
2022	u64 tx_entries;
2023	int i;
2024
2025	if (!tx_pool || !tx_pool->tx_buff)
2026		return;
2027
2028	tx_entries = tx_pool->num_buffers;
2029
2030	for (i = 0; i < tx_entries; i++) {
2031		tx_buff = &tx_pool->tx_buff[i];
2032		if (tx_buff && tx_buff->skb) {
2033			dev_kfree_skb_any(tx_buff->skb);
2034			tx_buff->skb = NULL;
2035		}
2036	}
2037}
2038
2039static void clean_tx_pools(struct ibmvnic_adapter *adapter)
2040{
2041	int tx_scrqs;
2042	int i;
2043
2044	if (!adapter->tx_pool || !adapter->tso_pool)
2045		return;
2046
2047	tx_scrqs = adapter->num_active_tx_pools;
2048
2049	/* Free any remaining skbs in the tx buffer pools */
2050	for (i = 0; i < tx_scrqs; i++) {
2051		netdev_dbg(adapter->netdev, "Cleaning tx_pool[%d]\n", i);
2052		clean_one_tx_pool(adapter, &adapter->tx_pool[i]);
2053		clean_one_tx_pool(adapter, &adapter->tso_pool[i]);
2054	}
2055}
2056
2057static void ibmvnic_disable_irqs(struct ibmvnic_adapter *adapter)
2058{
2059	struct net_device *netdev = adapter->netdev;
2060	int i;
2061
2062	if (adapter->tx_scrq) {
2063		for (i = 0; i < adapter->req_tx_queues; i++)
2064			if (adapter->tx_scrq[i]->irq) {
2065				netdev_dbg(netdev,
2066					   "Disabling tx_scrq[%d] irq\n", i);
2067				disable_scrq_irq(adapter, adapter->tx_scrq[i]);
2068				disable_irq(adapter->tx_scrq[i]->irq);
2069			}
2070	}
2071
2072	if (adapter->rx_scrq) {
2073		for (i = 0; i < adapter->req_rx_queues; i++) {
2074			if (adapter->rx_scrq[i]->irq) {
2075				netdev_dbg(netdev,
2076					   "Disabling rx_scrq[%d] irq\n", i);
2077				disable_scrq_irq(adapter, adapter->rx_scrq[i]);
2078				disable_irq(adapter->rx_scrq[i]->irq);
2079			}
2080		}
2081	}
2082}
2083
2084static void ibmvnic_cleanup(struct net_device *netdev)
2085{
2086	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
2087
2088	/* ensure that transmissions are stopped if called by do_reset */
2089
2090	adapter->tx_queues_active = false;
2091
2092	/* Ensure complete_tx() and ibmvnic_xmit() see ->tx_queues_active
2093	 * update so they don't restart a queue after we stop it below.
2094	 */
2095	synchronize_rcu();
2096
2097	if (test_bit(0, &adapter->resetting))
2098		netif_tx_disable(netdev);
2099	else
2100		netif_tx_stop_all_queues(netdev);
2101
2102	ibmvnic_napi_disable(adapter);
2103	ibmvnic_disable_irqs(adapter);
2104}
2105
2106static int __ibmvnic_close(struct net_device *netdev)
2107{
2108	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
2109	int rc = 0;
2110
2111	adapter->state = VNIC_CLOSING;
2112	rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
2113	adapter->state = VNIC_CLOSED;
2114	return rc;
2115}
2116
2117static int ibmvnic_close(struct net_device *netdev)
2118{
2119	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
2120	int rc;
2121
2122	netdev_dbg(netdev, "[S:%s FOP:%d FRR:%d] Closing\n",
2123		   adapter_state_to_string(adapter->state),
2124		   adapter->failover_pending,
2125		   adapter->force_reset_recovery);
2126
2127	/* If device failover is pending, just set device state and return.
2128	 * Device operation will be handled by reset routine.
2129	 */
2130	if (adapter->failover_pending) {
2131		adapter->state = VNIC_CLOSED;
2132		return 0;
2133	}
2134
2135	rc = __ibmvnic_close(netdev);
2136	ibmvnic_cleanup(netdev);
2137	clean_rx_pools(adapter);
2138	clean_tx_pools(adapter);
2139
2140	return rc;
2141}
2142
2143/**
2144 * get_hdr_lens - fills list of L2/L3/L4 hdr lens
2145 * @hdr_field: bitfield determining needed headers
2146 * @skb: socket buffer
2147 * @hdr_len: array of header lengths to be filled
2148 *
2149 * Reads hdr_field to determine which headers are needed by firmware.
2150 * Builds a buffer containing these headers.  Saves individual header
2151 * lengths and total buffer length to be used to build descriptors.
2152 *
2153 * Return: total len of all headers
2154 */
2155static int get_hdr_lens(u8 hdr_field, struct sk_buff *skb,
2156			int *hdr_len)
2157{
2158	int len = 0;
2159
2160
2161	if ((hdr_field >> 6) & 1) {
2162		hdr_len[0] = skb_mac_header_len(skb);
2163		len += hdr_len[0];
2164	}
2165
2166	if ((hdr_field >> 5) & 1) {
2167		hdr_len[1] = skb_network_header_len(skb);
2168		len += hdr_len[1];
2169	}
2170
2171	if (!((hdr_field >> 4) & 1))
2172		return len;
2173
2174	if (skb->protocol == htons(ETH_P_IP)) {
2175		if (ip_hdr(skb)->protocol == IPPROTO_TCP)
2176			hdr_len[2] = tcp_hdrlen(skb);
2177		else if (ip_hdr(skb)->protocol == IPPROTO_UDP)
2178			hdr_len[2] = sizeof(struct udphdr);
2179	} else if (skb->protocol == htons(ETH_P_IPV6)) {
2180		if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
2181			hdr_len[2] = tcp_hdrlen(skb);
2182		else if (ipv6_hdr(skb)->nexthdr == IPPROTO_UDP)
2183			hdr_len[2] = sizeof(struct udphdr);
2184	}
2185
2186	return len + hdr_len[2];
2187}
2188
2189/**
2190 * create_hdr_descs - create header and header extension descriptors
2191 * @hdr_field: bitfield determining needed headers
2192 * @hdr_data: buffer containing header data
2193 * @len: length of data buffer
2194 * @hdr_len: array of individual header lengths
2195 * @scrq_arr: descriptor array
2196 *
2197 * Creates header and, if needed, header extension descriptors and
2198 * places them in a descriptor array, scrq_arr
2199 *
2200 * Return: Number of header descs
2201 */
2202
2203static int create_hdr_descs(u8 hdr_field, u8 *hdr_data, int len, int *hdr_len,
2204			    union sub_crq *scrq_arr)
2205{
2206	union sub_crq *hdr_desc;
2207	int tmp_len = len;
2208	int num_descs = 0;
2209	u8 *data, *cur;
2210	int tmp;
2211
2212	while (tmp_len > 0) {
2213		cur = hdr_data + len - tmp_len;
2214
2215		hdr_desc = &scrq_arr[num_descs];
2216		if (num_descs) {
2217			data = hdr_desc->hdr_ext.data;
2218			tmp = tmp_len > 29 ? 29 : tmp_len;
2219			hdr_desc->hdr_ext.first = IBMVNIC_CRQ_CMD;
2220			hdr_desc->hdr_ext.type = IBMVNIC_HDR_EXT_DESC;
2221			hdr_desc->hdr_ext.len = tmp;
2222		} else {
2223			data = hdr_desc->hdr.data;
2224			tmp = tmp_len > 24 ? 24 : tmp_len;
2225			hdr_desc->hdr.first = IBMVNIC_CRQ_CMD;
2226			hdr_desc->hdr.type = IBMVNIC_HDR_DESC;
2227			hdr_desc->hdr.len = tmp;
2228			hdr_desc->hdr.l2_len = (u8)hdr_len[0];
2229			hdr_desc->hdr.l3_len = cpu_to_be16((u16)hdr_len[1]);
2230			hdr_desc->hdr.l4_len = (u8)hdr_len[2];
2231			hdr_desc->hdr.flag = hdr_field << 1;
2232		}
2233		memcpy(data, cur, tmp);
2234		tmp_len -= tmp;
2235		num_descs++;
2236	}
2237
2238	return num_descs;
2239}
2240
2241/**
2242 * build_hdr_descs_arr - build a header descriptor array
2243 * @skb: tx socket buffer
2244 * @indir_arr: indirect array
2245 * @num_entries: number of descriptors to be sent
2246 * @hdr_field: bit field determining which headers will be sent
2247 *
2248 * This function will build a TX descriptor array with applicable
2249 * L2/L3/L4 packet header descriptors to be sent by send_subcrq_indirect.
2250 */
2251
2252static void build_hdr_descs_arr(struct sk_buff *skb,
2253				union sub_crq *indir_arr,
2254				int *num_entries, u8 hdr_field)
2255{
2256	int hdr_len[3] = {0, 0, 0};
2257	int tot_len;
2258
2259	tot_len = get_hdr_lens(hdr_field, skb, hdr_len);
2260	*num_entries += create_hdr_descs(hdr_field, skb_mac_header(skb),
2261					 tot_len, hdr_len, indir_arr + 1);
2262}
2263
2264static int ibmvnic_xmit_workarounds(struct sk_buff *skb,
2265				    struct net_device *netdev)
2266{
2267	/* For some backing devices, mishandling of small packets
2268	 * can result in a loss of connection or TX stall. Device
2269	 * architects recommend that no packet should be smaller
2270	 * than the minimum MTU value provided to the driver, so
2271	 * pad any packets to that length
2272	 */
2273	if (skb->len < netdev->min_mtu)
2274		return skb_put_padto(skb, netdev->min_mtu);
2275
2276	return 0;
2277}
2278
2279static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter,
2280					 struct ibmvnic_sub_crq_queue *tx_scrq)
2281{
2282	struct ibmvnic_ind_xmit_queue *ind_bufp;
2283	struct ibmvnic_tx_buff *tx_buff;
2284	struct ibmvnic_tx_pool *tx_pool;
2285	union sub_crq tx_scrq_entry;
2286	int queue_num;
2287	int entries;
2288	int index;
2289	int i;
2290
2291	ind_bufp = &tx_scrq->ind_buf;
2292	entries = (u64)ind_bufp->index;
2293	queue_num = tx_scrq->pool_index;
2294
2295	for (i = entries - 1; i >= 0; --i) {
2296		tx_scrq_entry = ind_bufp->indir_arr[i];
2297		if (tx_scrq_entry.v1.type != IBMVNIC_TX_DESC)
2298			continue;
2299		index = be32_to_cpu(tx_scrq_entry.v1.correlator);
2300		if (index & IBMVNIC_TSO_POOL_MASK) {
2301			tx_pool = &adapter->tso_pool[queue_num];
2302			index &= ~IBMVNIC_TSO_POOL_MASK;
2303		} else {
2304			tx_pool = &adapter->tx_pool[queue_num];
2305		}
2306		tx_pool->free_map[tx_pool->consumer_index] = index;
2307		tx_pool->consumer_index = tx_pool->consumer_index == 0 ?
2308					  tx_pool->num_buffers - 1 :
2309					  tx_pool->consumer_index - 1;
2310		tx_buff = &tx_pool->tx_buff[index];
2311		adapter->netdev->stats.tx_packets--;
2312		adapter->netdev->stats.tx_bytes -= tx_buff->skb->len;
2313		adapter->tx_stats_buffers[queue_num].batched_packets--;
2314		adapter->tx_stats_buffers[queue_num].bytes -=
2315						tx_buff->skb->len;
2316		dev_kfree_skb_any(tx_buff->skb);
2317		tx_buff->skb = NULL;
2318		adapter->netdev->stats.tx_dropped++;
2319	}
2320
2321	ind_bufp->index = 0;
2322
2323	if (atomic_sub_return(entries, &tx_scrq->used) <=
2324	    (adapter->req_tx_entries_per_subcrq / 2) &&
2325	    __netif_subqueue_stopped(adapter->netdev, queue_num)) {
2326		rcu_read_lock();
2327
2328		if (adapter->tx_queues_active) {
2329			netif_wake_subqueue(adapter->netdev, queue_num);
2330			netdev_dbg(adapter->netdev, "Started queue %d\n",
2331				   queue_num);
2332		}
2333
2334		rcu_read_unlock();
2335	}
2336}
2337
2338static int send_subcrq_direct(struct ibmvnic_adapter *adapter,
2339			      u64 remote_handle, u64 *entry)
2340{
2341	unsigned int ua = adapter->vdev->unit_address;
2342	struct device *dev = &adapter->vdev->dev;
2343	int rc;
2344
2345	/* Make sure the hypervisor sees the complete request */
2346	dma_wmb();
2347	rc = plpar_hcall_norets(H_SEND_SUB_CRQ, ua,
2348				cpu_to_be64(remote_handle),
2349				cpu_to_be64(entry[0]), cpu_to_be64(entry[1]),
2350				cpu_to_be64(entry[2]), cpu_to_be64(entry[3]));
2351
2352	if (rc)
2353		print_subcrq_error(dev, rc, __func__);
2354
2355	return rc;
2356}
2357
2358static int ibmvnic_tx_scrq_flush(struct ibmvnic_adapter *adapter,
2359				 struct ibmvnic_sub_crq_queue *tx_scrq,
2360				 bool indirect)
2361{
2362	struct ibmvnic_ind_xmit_queue *ind_bufp;
2363	u64 dma_addr;
2364	u64 entries;
2365	u64 handle;
2366	int rc;
2367
2368	ind_bufp = &tx_scrq->ind_buf;
2369	dma_addr = (u64)ind_bufp->indir_dma;
2370	entries = (u64)ind_bufp->index;
2371	handle = tx_scrq->handle;
2372
2373	if (!entries)
2374		return 0;
2375
2376	if (indirect)
2377		rc = send_subcrq_indirect(adapter, handle, dma_addr, entries);
2378	else
2379		rc = send_subcrq_direct(adapter, handle,
2380					(u64 *)ind_bufp->indir_arr);
2381
2382	if (rc)
2383		ibmvnic_tx_scrq_clean_buffer(adapter, tx_scrq);
2384	else
2385		ind_bufp->index = 0;
2386	return rc;
2387}
2388
2389static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
2390{
2391	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
2392	int queue_num = skb_get_queue_mapping(skb);
2393	u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req;
2394	struct device *dev = &adapter->vdev->dev;
2395	struct ibmvnic_ind_xmit_queue *ind_bufp;
2396	struct ibmvnic_tx_buff *tx_buff = NULL;
2397	struct ibmvnic_sub_crq_queue *tx_scrq;
2398	struct ibmvnic_long_term_buff *ltb;
2399	struct ibmvnic_tx_pool *tx_pool;
2400	unsigned int tx_send_failed = 0;
2401	netdev_tx_t ret = NETDEV_TX_OK;
2402	unsigned int tx_map_failed = 0;
2403	union sub_crq indir_arr[16];
2404	unsigned int tx_dropped = 0;
2405	unsigned int tx_dpackets = 0;
2406	unsigned int tx_bpackets = 0;
2407	unsigned int tx_bytes = 0;
2408	dma_addr_t data_dma_addr;
2409	struct netdev_queue *txq;
2410	unsigned long lpar_rc;
2411	unsigned int skblen;
2412	union sub_crq tx_crq;
2413	unsigned int offset;
2414	bool use_scrq_send_direct = false;
2415	int num_entries = 1;
2416	unsigned char *dst;
2417	int bufidx = 0;
2418	u8 proto = 0;
2419
2420	/* If a reset is in progress, drop the packet since
2421	 * the scrqs may get torn down. Otherwise use the
2422	 * rcu to ensure reset waits for us to complete.
2423	 */
2424	rcu_read_lock();
2425	if (!adapter->tx_queues_active) {
2426		dev_kfree_skb_any(skb);
2427
2428		tx_send_failed++;
2429		tx_dropped++;
2430		ret = NETDEV_TX_OK;
2431		goto out;
2432	}
2433
2434	tx_scrq = adapter->tx_scrq[queue_num];
2435	txq = netdev_get_tx_queue(netdev, queue_num);
2436	ind_bufp = &tx_scrq->ind_buf;
2437
2438	if (ibmvnic_xmit_workarounds(skb, netdev)) {
2439		tx_dropped++;
2440		tx_send_failed++;
2441		ret = NETDEV_TX_OK;
2442		lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true);
2443		if (lpar_rc != H_SUCCESS)
2444			goto tx_err;
2445		goto out;
2446	}
2447
2448	if (skb_is_gso(skb))
2449		tx_pool = &adapter->tso_pool[queue_num];
2450	else
2451		tx_pool = &adapter->tx_pool[queue_num];
2452
2453	bufidx = tx_pool->free_map[tx_pool->consumer_index];
2454
2455	if (bufidx == IBMVNIC_INVALID_MAP) {
2456		dev_kfree_skb_any(skb);
2457		tx_send_failed++;
2458		tx_dropped++;
2459		ret = NETDEV_TX_OK;
2460		lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true);
2461		if (lpar_rc != H_SUCCESS)
2462			goto tx_err;
2463		goto out;
2464	}
2465
2466	tx_pool->free_map[tx_pool->consumer_index] = IBMVNIC_INVALID_MAP;
2467
2468	map_txpool_buf_to_ltb(tx_pool, bufidx, &ltb, &offset);
2469
2470	dst = ltb->buff + offset;
2471	memset(dst, 0, tx_pool->buf_size);
2472	data_dma_addr = ltb->addr + offset;
2473
2474	/* if we are going to send_subcrq_direct this then we need to
2475	 * update the checksum before copying the data into ltb. Essentially
2476	 * these packets force disable CSO so that we can guarantee that
2477	 * FW does not need header info and we can send direct. Also, vnic
2478	 * server must be able to xmit standard packets without header data
2479	 */
2480	if (*hdrs == 0 && !skb_is_gso(skb) &&
2481	    !ind_bufp->index && !netdev_xmit_more()) {
2482		use_scrq_send_direct = true;
2483		if (skb->ip_summed == CHECKSUM_PARTIAL &&
2484		    skb_checksum_help(skb))
2485			use_scrq_send_direct = false;
2486	}
2487
2488	if (skb_shinfo(skb)->nr_frags) {
2489		int cur, i;
2490
2491		/* Copy the head */
2492		skb_copy_from_linear_data(skb, dst, skb_headlen(skb));
2493		cur = skb_headlen(skb);
2494
2495		/* Copy the frags */
2496		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2497			const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2498
2499			memcpy(dst + cur, skb_frag_address(frag),
2500			       skb_frag_size(frag));
2501			cur += skb_frag_size(frag);
2502		}
2503	} else {
2504		skb_copy_from_linear_data(skb, dst, skb->len);
2505	}
2506
2507	tx_pool->consumer_index =
2508	    (tx_pool->consumer_index + 1) % tx_pool->num_buffers;
2509
2510	tx_buff = &tx_pool->tx_buff[bufidx];
2511
2512	/* Sanity checks on our free map to make sure it points to an index
2513	 * that is not being occupied by another skb. If skb memory is
2514	 * not freed then we see congestion control kick in and halt tx.
2515	 */
2516	if (unlikely(tx_buff->skb)) {
2517		dev_warn_ratelimited(dev, "TX free map points to untracked skb (%s %d idx=%d)\n",
2518				     skb_is_gso(skb) ? "tso_pool" : "tx_pool",
2519				     queue_num, bufidx);
2520		dev_kfree_skb_any(tx_buff->skb);
2521	}
2522
2523	tx_buff->skb = skb;
2524	tx_buff->index = bufidx;
2525	tx_buff->pool_index = queue_num;
2526	skblen = skb->len;
2527
2528	memset(&tx_crq, 0, sizeof(tx_crq));
2529	tx_crq.v1.first = IBMVNIC_CRQ_CMD;
2530	tx_crq.v1.type = IBMVNIC_TX_DESC;
2531	tx_crq.v1.n_crq_elem = 1;
2532	tx_crq.v1.n_sge = 1;
2533	tx_crq.v1.flags1 = IBMVNIC_TX_COMP_NEEDED;
2534
2535	if (skb_is_gso(skb))
2536		tx_crq.v1.correlator =
2537			cpu_to_be32(bufidx | IBMVNIC_TSO_POOL_MASK);
2538	else
2539		tx_crq.v1.correlator = cpu_to_be32(bufidx);
2540	tx_crq.v1.dma_reg = cpu_to_be16(ltb->map_id);
2541	tx_crq.v1.sge_len = cpu_to_be32(skb->len);
2542	tx_crq.v1.ioba = cpu_to_be64(data_dma_addr);
2543
2544	if (adapter->vlan_header_insertion && skb_vlan_tag_present(skb)) {
2545		tx_crq.v1.flags2 |= IBMVNIC_TX_VLAN_INSERT;
2546		tx_crq.v1.vlan_id = cpu_to_be16(skb->vlan_tci);
2547	}
2548
2549	if (skb->protocol == htons(ETH_P_IP)) {
2550		tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_IPV4;
2551		proto = ip_hdr(skb)->protocol;
2552	} else if (skb->protocol == htons(ETH_P_IPV6)) {
2553		tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_IPV6;
2554		proto = ipv6_hdr(skb)->nexthdr;
2555	}
2556
2557	if (proto == IPPROTO_TCP)
2558		tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_TCP;
2559	else if (proto == IPPROTO_UDP)
2560		tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_UDP;
2561
2562	if (skb->ip_summed == CHECKSUM_PARTIAL) {
2563		tx_crq.v1.flags1 |= IBMVNIC_TX_CHKSUM_OFFLOAD;
2564		hdrs += 2;
2565	}
2566	if (skb_is_gso(skb)) {
2567		tx_crq.v1.flags1 |= IBMVNIC_TX_LSO;
2568		tx_crq.v1.mss = cpu_to_be16(skb_shinfo(skb)->gso_size);
2569		hdrs += 2;
2570	} else if (use_scrq_send_direct) {
2571		/* See above comment, CSO disabled with direct xmit */
2572		tx_crq.v1.flags1 &= ~(IBMVNIC_TX_CHKSUM_OFFLOAD);
2573		ind_bufp->index = 1;
2574		tx_buff->num_entries = 1;
2575		netdev_tx_sent_queue(txq, skb->len);
2576		ind_bufp->indir_arr[0] = tx_crq;
2577		lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, false);
2578		if (lpar_rc != H_SUCCESS)
2579			goto tx_err;
2580
2581		tx_dpackets++;
2582		goto early_exit;
2583	}
2584
2585	if ((*hdrs >> 7) & 1)
2586		build_hdr_descs_arr(skb, indir_arr, &num_entries, *hdrs);
2587
2588	tx_crq.v1.n_crq_elem = num_entries;
2589	tx_buff->num_entries = num_entries;
2590	/* flush buffer if current entry can not fit */
2591	if (num_entries + ind_bufp->index > IBMVNIC_MAX_IND_DESCS) {
2592		lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true);
2593		if (lpar_rc != H_SUCCESS)
2594			goto tx_flush_err;
2595	}
2596
2597	indir_arr[0] = tx_crq;
2598	memcpy(&ind_bufp->indir_arr[ind_bufp->index], &indir_arr[0],
2599	       num_entries * sizeof(struct ibmvnic_generic_scrq));
2600
2601	ind_bufp->index += num_entries;
2602	if (__netdev_tx_sent_queue(txq, skb->len,
2603				   netdev_xmit_more() &&
2604				   ind_bufp->index < IBMVNIC_MAX_IND_DESCS)) {
2605		lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true);
2606		if (lpar_rc != H_SUCCESS)
2607			goto tx_err;
2608	}
2609
2610	tx_bpackets++;
2611
2612early_exit:
2613	if (atomic_add_return(num_entries, &tx_scrq->used)
2614					>= adapter->req_tx_entries_per_subcrq) {
2615		netdev_dbg(netdev, "Stopping queue %d\n", queue_num);
2616		netif_stop_subqueue(netdev, queue_num);
2617	}
2618
2619	tx_bytes += skblen;
2620	txq_trans_cond_update(txq);
2621	ret = NETDEV_TX_OK;
2622	goto out;
2623
2624tx_flush_err:
2625	dev_kfree_skb_any(skb);
2626	tx_buff->skb = NULL;
2627	tx_pool->consumer_index = tx_pool->consumer_index == 0 ?
2628				  tx_pool->num_buffers - 1 :
2629				  tx_pool->consumer_index - 1;
2630	tx_dropped++;
2631tx_err:
2632	if (lpar_rc != H_CLOSED && lpar_rc != H_PARAMETER)
2633		dev_err_ratelimited(dev, "tx: send failed\n");
2634
2635	if (lpar_rc == H_CLOSED || adapter->failover_pending) {
2636		/* Disable TX and report carrier off if queue is closed
2637		 * or pending failover.
2638		 * Firmware guarantees that a signal will be sent to the
2639		 * driver, triggering a reset or some other action.
2640		 */
2641		netif_tx_stop_all_queues(netdev);
2642		netif_carrier_off(netdev);
2643	}
2644out:
2645	rcu_read_unlock();
2646	netdev->stats.tx_dropped += tx_dropped;
2647	netdev->stats.tx_bytes += tx_bytes;
2648	netdev->stats.tx_packets += tx_bpackets + tx_dpackets;
2649	adapter->tx_send_failed += tx_send_failed;
2650	adapter->tx_map_failed += tx_map_failed;
2651	adapter->tx_stats_buffers[queue_num].batched_packets += tx_bpackets;
2652	adapter->tx_stats_buffers[queue_num].direct_packets += tx_dpackets;
2653	adapter->tx_stats_buffers[queue_num].bytes += tx_bytes;
2654	adapter->tx_stats_buffers[queue_num].dropped_packets += tx_dropped;
2655
2656	return ret;
2657}
2658
2659static void ibmvnic_set_multi(struct net_device *netdev)
2660{
2661	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
2662	struct netdev_hw_addr *ha;
2663	union ibmvnic_crq crq;
2664
2665	memset(&crq, 0, sizeof(crq));
2666	crq.request_capability.first = IBMVNIC_CRQ_CMD;
2667	crq.request_capability.cmd = REQUEST_CAPABILITY;
2668
2669	if (netdev->flags & IFF_PROMISC) {
2670		if (!adapter->promisc_supported)
2671			return;
2672	} else {
2673		if (netdev->flags & IFF_ALLMULTI) {
2674			/* Accept all multicast */
2675			memset(&crq, 0, sizeof(crq));
2676			crq.multicast_ctrl.first = IBMVNIC_CRQ_CMD;
2677			crq.multicast_ctrl.cmd = MULTICAST_CTRL;
2678			crq.multicast_ctrl.flags = IBMVNIC_ENABLE_ALL;
2679			ibmvnic_send_crq(adapter, &crq);
2680		} else if (netdev_mc_empty(netdev)) {
2681			/* Reject all multicast */
2682			memset(&crq, 0, sizeof(crq));
2683			crq.multicast_ctrl.first = IBMVNIC_CRQ_CMD;
2684			crq.multicast_ctrl.cmd = MULTICAST_CTRL;
2685			crq.multicast_ctrl.flags = IBMVNIC_DISABLE_ALL;
2686			ibmvnic_send_crq(adapter, &crq);
2687		} else {
2688			/* Accept one or more multicast(s) */
2689			netdev_for_each_mc_addr(ha, netdev) {
2690				memset(&crq, 0, sizeof(crq));
2691				crq.multicast_ctrl.first = IBMVNIC_CRQ_CMD;
2692				crq.multicast_ctrl.cmd = MULTICAST_CTRL;
2693				crq.multicast_ctrl.flags = IBMVNIC_ENABLE_MC;
2694				ether_addr_copy(&crq.multicast_ctrl.mac_addr[0],
2695						ha->addr);
2696				ibmvnic_send_crq(adapter, &crq);
2697			}
2698		}
2699	}
2700}
2701
2702static int __ibmvnic_set_mac(struct net_device *netdev, u8 *dev_addr)
2703{
2704	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
2705	union ibmvnic_crq crq;
2706	int rc;
2707
2708	if (!is_valid_ether_addr(dev_addr)) {
2709		rc = -EADDRNOTAVAIL;
2710		goto err;
2711	}
2712
2713	memset(&crq, 0, sizeof(crq));
2714	crq.change_mac_addr.first = IBMVNIC_CRQ_CMD;
2715	crq.change_mac_addr.cmd = CHANGE_MAC_ADDR;
2716	ether_addr_copy(&crq.change_mac_addr.mac_addr[0], dev_addr);
2717
2718	mutex_lock(&adapter->fw_lock);
2719	adapter->fw_done_rc = 0;
2720	reinit_completion(&adapter->fw_done);
2721
2722	rc = ibmvnic_send_crq(adapter, &crq);
2723	if (rc) {
2724		rc = -EIO;
2725		mutex_unlock(&adapter->fw_lock);
2726		goto err;
2727	}
2728
2729	rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000);
2730	/* netdev->dev_addr is changed in handle_change_mac_rsp function */
2731	if (rc || adapter->fw_done_rc) {
2732		rc = -EIO;
2733		mutex_unlock(&adapter->fw_lock);
2734		goto err;
2735	}
2736	mutex_unlock(&adapter->fw_lock);
2737	return 0;
2738err:
2739	ether_addr_copy(adapter->mac_addr, netdev->dev_addr);
2740	return rc;
2741}
2742
2743static int ibmvnic_set_mac(struct net_device *netdev, void *p)
2744{
2745	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
2746	struct sockaddr *addr = p;
2747	int rc;
2748
2749	rc = 0;
2750	if (!is_valid_ether_addr(addr->sa_data))
2751		return -EADDRNOTAVAIL;
2752
2753	ether_addr_copy(adapter->mac_addr, addr->sa_data);
2754	if (adapter->state != VNIC_PROBED)
2755		rc = __ibmvnic_set_mac(netdev, addr->sa_data);
2756
2757	return rc;
2758}
2759
2760static const char *reset_reason_to_string(enum ibmvnic_reset_reason reason)
2761{
2762	switch (reason) {
2763	case VNIC_RESET_FAILOVER:
2764		return "FAILOVER";
2765	case VNIC_RESET_MOBILITY:
2766		return "MOBILITY";
2767	case VNIC_RESET_FATAL:
2768		return "FATAL";
2769	case VNIC_RESET_NON_FATAL:
2770		return "NON_FATAL";
2771	case VNIC_RESET_TIMEOUT:
2772		return "TIMEOUT";
2773	case VNIC_RESET_CHANGE_PARAM:
2774		return "CHANGE_PARAM";
2775	case VNIC_RESET_PASSIVE_INIT:
2776		return "PASSIVE_INIT";
2777	}
2778	return "UNKNOWN";
2779}
2780
2781/*
2782 * Initialize the init_done completion and return code values. We
2783 * can get a transport event just after registering the CRQ and the
2784 * tasklet will use this to communicate the transport event. To ensure
2785 * we don't miss the notification/error, initialize these _before_
2786 * regisering the CRQ.
2787 */
2788static inline void reinit_init_done(struct ibmvnic_adapter *adapter)
2789{
2790	reinit_completion(&adapter->init_done);
2791	adapter->init_done_rc = 0;
2792}
2793
2794/*
2795 * do_reset returns zero if we are able to keep processing reset events, or
2796 * non-zero if we hit a fatal error and must halt.
2797 */
2798static int do_reset(struct ibmvnic_adapter *adapter,
2799		    struct ibmvnic_rwi *rwi, u32 reset_state)
2800{
2801	struct net_device *netdev = adapter->netdev;
2802	u64 old_num_rx_queues, old_num_tx_queues;
2803	u64 old_num_rx_slots, old_num_tx_slots;
2804	int rc;
2805
2806	netdev_dbg(adapter->netdev,
2807		   "[S:%s FOP:%d] Reset reason: %s, reset_state: %s\n",
2808		   adapter_state_to_string(adapter->state),
2809		   adapter->failover_pending,
2810		   reset_reason_to_string(rwi->reset_reason),
2811		   adapter_state_to_string(reset_state));
2812
2813	adapter->reset_reason = rwi->reset_reason;
2814	/* requestor of VNIC_RESET_CHANGE_PARAM already has the rtnl lock */
2815	if (!(adapter->reset_reason == VNIC_RESET_CHANGE_PARAM))
2816		rtnl_lock();
2817
2818	/* Now that we have the rtnl lock, clear any pending failover.
2819	 * This will ensure ibmvnic_open() has either completed or will
2820	 * block until failover is complete.
2821	 */
2822	if (rwi->reset_reason == VNIC_RESET_FAILOVER)
2823		adapter->failover_pending = false;
2824
2825	/* read the state and check (again) after getting rtnl */
2826	reset_state = adapter->state;
2827
2828	if (reset_state == VNIC_REMOVING || reset_state == VNIC_REMOVED) {
2829		rc = -EBUSY;
2830		goto out;
2831	}
2832
2833	netif_carrier_off(netdev);
2834
2835	old_num_rx_queues = adapter->req_rx_queues;
2836	old_num_tx_queues = adapter->req_tx_queues;
2837	old_num_rx_slots = adapter->req_rx_add_entries_per_subcrq;
2838	old_num_tx_slots = adapter->req_tx_entries_per_subcrq;
2839
2840	ibmvnic_cleanup(netdev);
2841
2842	if (reset_state == VNIC_OPEN &&
2843	    adapter->reset_reason != VNIC_RESET_MOBILITY &&
2844	    adapter->reset_reason != VNIC_RESET_FAILOVER) {
2845		if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) {
2846			rc = __ibmvnic_close(netdev);
2847			if (rc)
2848				goto out;
2849		} else {
2850			adapter->state = VNIC_CLOSING;
2851
2852			/* Release the RTNL lock before link state change and
2853			 * re-acquire after the link state change to allow
2854			 * linkwatch_event to grab the RTNL lock and run during
2855			 * a reset.
2856			 */
2857			rtnl_unlock();
2858			rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
2859			rtnl_lock();
2860			if (rc)
2861				goto out;
2862
2863			if (adapter->state == VNIC_OPEN) {
2864				/* When we dropped rtnl, ibmvnic_open() got
2865				 * it and noticed that we are resetting and
2866				 * set the adapter state to OPEN. Update our
2867				 * new "target" state, and resume the reset
2868				 * from VNIC_CLOSING state.
2869				 */
2870				netdev_dbg(netdev,
2871					   "Open changed state from %s, updating.\n",
2872					   adapter_state_to_string(reset_state));
2873				reset_state = VNIC_OPEN;
2874				adapter->state = VNIC_CLOSING;
2875			}
2876
2877			if (adapter->state != VNIC_CLOSING) {
2878				/* If someone else changed the adapter state
2879				 * when we dropped the rtnl, fail the reset
2880				 */
2881				rc = -EAGAIN;
2882				goto out;
2883			}
2884			adapter->state = VNIC_CLOSED;
2885		}
2886	}
2887
2888	if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) {
2889		release_resources(adapter);
2890		release_sub_crqs(adapter, 1);
2891		release_crq_queue(adapter);
2892	}
2893
2894	if (adapter->reset_reason != VNIC_RESET_NON_FATAL) {
2895		/* remove the closed state so when we call open it appears
2896		 * we are coming from the probed state.
2897		 */
2898		adapter->state = VNIC_PROBED;
2899
2900		reinit_init_done(adapter);
2901
2902		if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) {
2903			rc = init_crq_queue(adapter);
2904		} else if (adapter->reset_reason == VNIC_RESET_MOBILITY) {
2905			rc = ibmvnic_reenable_crq_queue(adapter);
2906			release_sub_crqs(adapter, 1);
2907		} else {
2908			rc = ibmvnic_reset_crq(adapter);
2909			if (rc == H_CLOSED || rc == H_SUCCESS) {
2910				rc = vio_enable_interrupts(adapter->vdev);
2911				if (rc)
2912					netdev_err(adapter->netdev,
2913						   "Reset failed to enable interrupts. rc=%d\n",
2914						   rc);
2915			}
2916		}
2917
2918		if (rc) {
2919			netdev_err(adapter->netdev,
2920				   "Reset couldn't initialize crq. rc=%d\n", rc);
2921			goto out;
2922		}
2923
2924		rc = ibmvnic_reset_init(adapter, true);
2925		if (rc)
2926			goto out;
2927
2928		/* If the adapter was in PROBE or DOWN state prior to the reset,
2929		 * exit here.
2930		 */
2931		if (reset_state == VNIC_PROBED || reset_state == VNIC_DOWN) {
2932			rc = 0;
2933			goto out;
2934		}
2935
2936		rc = ibmvnic_login(netdev);
2937		if (rc)
2938			goto out;
2939
2940		if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) {
2941			rc = init_resources(adapter);
2942			if (rc)
2943				goto out;
2944		} else if (adapter->req_rx_queues != old_num_rx_queues ||
2945		    adapter->req_tx_queues != old_num_tx_queues ||
2946		    adapter->req_rx_add_entries_per_subcrq !=
2947		    old_num_rx_slots ||
2948		    adapter->req_tx_entries_per_subcrq !=
2949		    old_num_tx_slots ||
2950		    !adapter->rx_pool ||
2951		    !adapter->tso_pool ||
2952		    !adapter->tx_pool) {
2953			release_napi(adapter);
2954			release_vpd_data(adapter);
2955
2956			rc = init_resources(adapter);
2957			if (rc)
2958				goto out;
2959
2960		} else {
2961			rc = init_tx_pools(netdev);
2962			if (rc) {
2963				netdev_dbg(netdev,
2964					   "init tx pools failed (%d)\n",
2965					   rc);
2966				goto out;
2967			}
2968
2969			rc = init_rx_pools(netdev);
2970			if (rc) {
2971				netdev_dbg(netdev,
2972					   "init rx pools failed (%d)\n",
2973					   rc);
2974				goto out;
2975			}
2976		}
2977		ibmvnic_disable_irqs(adapter);
2978	}
2979	adapter->state = VNIC_CLOSED;
2980
2981	if (reset_state == VNIC_CLOSED) {
2982		rc = 0;
2983		goto out;
2984	}
2985
2986	rc = __ibmvnic_open(netdev);
2987	if (rc) {
2988		rc = IBMVNIC_OPEN_FAILED;
2989		goto out;
2990	}
2991
2992	/* refresh device's multicast list */
2993	ibmvnic_set_multi(netdev);
2994
2995	if (adapter->reset_reason == VNIC_RESET_FAILOVER ||
2996	    adapter->reset_reason == VNIC_RESET_MOBILITY)
2997		__netdev_notify_peers(netdev);
2998
2999	rc = 0;
3000
3001out:
3002	/* restore the adapter state if reset failed */
3003	if (rc)
3004		adapter->state = reset_state;
3005	/* requestor of VNIC_RESET_CHANGE_PARAM should still hold the rtnl lock */
3006	if (!(adapter->reset_reason == VNIC_RESET_CHANGE_PARAM))
3007		rtnl_unlock();
3008
3009	netdev_dbg(adapter->netdev, "[S:%s FOP:%d] Reset done, rc %d\n",
3010		   adapter_state_to_string(adapter->state),
3011		   adapter->failover_pending, rc);
3012	return rc;
3013}
3014
3015static int do_hard_reset(struct ibmvnic_adapter *adapter,
3016			 struct ibmvnic_rwi *rwi, u32 reset_state)
3017{
3018	struct net_device *netdev = adapter->netdev;
3019	int rc;
3020
3021	netdev_dbg(adapter->netdev, "Hard resetting driver (%s)\n",
3022		   reset_reason_to_string(rwi->reset_reason));
3023
3024	/* read the state and check (again) after getting rtnl */
3025	reset_state = adapter->state;
3026
3027	if (reset_state == VNIC_REMOVING || reset_state == VNIC_REMOVED) {
3028		rc = -EBUSY;
3029		goto out;
3030	}
3031
3032	netif_carrier_off(netdev);
3033	adapter->reset_reason = rwi->reset_reason;
3034
3035	ibmvnic_cleanup(netdev);
3036	release_resources(adapter);
3037	release_sub_crqs(adapter, 0);
3038	release_crq_queue(adapter);
3039
3040	/* remove the closed state so when we call open it appears
3041	 * we are coming from the probed state.
3042	 */
3043	adapter->state = VNIC_PROBED;
3044
3045	reinit_init_done(adapter);
3046
3047	rc = init_crq_queue(adapter);
3048	if (rc) {
3049		netdev_err(adapter->netdev,
3050			   "Couldn't initialize crq. rc=%d\n", rc);
3051		goto out;
3052	}
3053
3054	rc = ibmvnic_reset_init(adapter, false);
3055	if (rc)
3056		goto out;
3057
3058	/* If the adapter was in PROBE or DOWN state prior to the reset,
3059	 * exit here.
3060	 */
3061	if (reset_state == VNIC_PROBED || reset_state == VNIC_DOWN)
3062		goto out;
3063
3064	rc = ibmvnic_login(netdev);
3065	if (rc)
3066		goto out;
3067
3068	rc = init_resources(adapter);
3069	if (rc)
3070		goto out;
3071
3072	ibmvnic_disable_irqs(adapter);
3073	adapter->state = VNIC_CLOSED;
3074
3075	if (reset_state == VNIC_CLOSED)
3076		goto out;
3077
3078	rc = __ibmvnic_open(netdev);
3079	if (rc) {
3080		rc = IBMVNIC_OPEN_FAILED;
3081		goto out;
3082	}
3083
3084	__netdev_notify_peers(netdev);
3085out:
3086	/* restore adapter state if reset failed */
3087	if (rc)
3088		adapter->state = reset_state;
3089	netdev_dbg(adapter->netdev, "[S:%s FOP:%d] Hard reset done, rc %d\n",
3090		   adapter_state_to_string(adapter->state),
3091		   adapter->failover_pending, rc);
3092	return rc;
3093}
3094
3095static struct ibmvnic_rwi *get_next_rwi(struct ibmvnic_adapter *adapter)
3096{
3097	struct ibmvnic_rwi *rwi;
3098	unsigned long flags;
3099
3100	spin_lock_irqsave(&adapter->rwi_lock, flags);
3101
3102	if (!list_empty(&adapter->rwi_list)) {
3103		rwi = list_first_entry(&adapter->rwi_list, struct ibmvnic_rwi,
3104				       list);
3105		list_del(&rwi->list);
3106	} else {
3107		rwi = NULL;
3108	}
3109
3110	spin_unlock_irqrestore(&adapter->rwi_lock, flags);
3111	return rwi;
3112}
3113
3114/**
3115 * do_passive_init - complete probing when partner device is detected.
3116 * @adapter: ibmvnic_adapter struct
3117 *
3118 * If the ibmvnic device does not have a partner device to communicate with at boot
3119 * and that partner device comes online at a later time, this function is called
3120 * to complete the initialization process of ibmvnic device.
3121 * Caller is expected to hold rtnl_lock().
3122 *
3123 * Returns non-zero if sub-CRQs are not initialized properly leaving the device
3124 * in the down state.
3125 * Returns 0 upon success and the device is in PROBED state.
3126 */
3127
3128static int do_passive_init(struct ibmvnic_adapter *adapter)
3129{
3130	unsigned long timeout = msecs_to_jiffies(30000);
3131	struct net_device *netdev = adapter->netdev;
3132	struct device *dev = &adapter->vdev->dev;
3133	int rc;
3134
3135	netdev_dbg(netdev, "Partner device found, probing.\n");
3136
3137	adapter->state = VNIC_PROBING;
3138	reinit_completion(&adapter->init_done);
3139	adapter->init_done_rc = 0;
3140	adapter->crq.active = true;
3141
3142	rc = send_crq_init_complete(adapter);
3143	if (rc)
3144		goto out;
3145
3146	rc = send_version_xchg(adapter);
3147	if (rc)
3148		netdev_dbg(adapter->netdev, "send_version_xchg failed, rc=%d\n", rc);
3149
3150	if (!wait_for_completion_timeout(&adapter->init_done, timeout)) {
3151		dev_err(dev, "Initialization sequence timed out\n");
3152		rc = -ETIMEDOUT;
3153		goto out;
3154	}
3155
3156	rc = init_sub_crqs(adapter);
3157	if (rc) {
3158		dev_err(dev, "Initialization of sub crqs failed, rc=%d\n", rc);
3159		goto out;
3160	}
3161
3162	rc = init_sub_crq_irqs(adapter);
3163	if (rc) {
3164		dev_err(dev, "Failed to initialize sub crq irqs\n, rc=%d", rc);
3165		goto init_failed;
3166	}
3167
3168	netdev->mtu = adapter->req_mtu - ETH_HLEN;
3169	netdev->min_mtu = adapter->min_mtu - ETH_HLEN;
3170	netdev->max_mtu = adapter->max_mtu - ETH_HLEN;
3171
3172	adapter->state = VNIC_PROBED;
3173	netdev_dbg(netdev, "Probed successfully. Waiting for signal from partner device.\n");
3174
3175	return 0;
3176
3177init_failed:
3178	release_sub_crqs(adapter, 1);
3179out:
3180	adapter->state = VNIC_DOWN;
3181	return rc;
3182}
3183
3184static void __ibmvnic_reset(struct work_struct *work)
3185{
3186	struct ibmvnic_adapter *adapter;
3187	unsigned int timeout = 5000;
3188	struct ibmvnic_rwi *tmprwi;
3189	bool saved_state = false;
3190	struct ibmvnic_rwi *rwi;
3191	unsigned long flags;
3192	struct device *dev;
3193	bool need_reset;
3194	int num_fails = 0;
3195	u32 reset_state;
3196	int rc = 0;
3197
3198	adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset);
3199		dev = &adapter->vdev->dev;
3200
3201	/* Wait for ibmvnic_probe() to complete. If probe is taking too long
3202	 * or if another reset is in progress, defer work for now. If probe
3203	 * eventually fails it will flush and terminate our work.
3204	 *
3205	 * Three possibilities here:
3206	 * 1. Adpater being removed  - just return
3207	 * 2. Timed out on probe or another reset in progress - delay the work
3208	 * 3. Completed probe - perform any resets in queue
3209	 */
3210	if (adapter->state == VNIC_PROBING &&
3211	    !wait_for_completion_timeout(&adapter->probe_done, timeout)) {
3212		dev_err(dev, "Reset thread timed out on probe");
3213		queue_delayed_work(system_long_wq,
3214				   &adapter->ibmvnic_delayed_reset,
3215				   IBMVNIC_RESET_DELAY);
3216		return;
3217	}
3218
3219	/* adapter is done with probe (i.e state is never VNIC_PROBING now) */
3220	if (adapter->state == VNIC_REMOVING)
3221		return;
3222
3223	/* ->rwi_list is stable now (no one else is removing entries) */
3224
3225	/* ibmvnic_probe() may have purged the reset queue after we were
3226	 * scheduled to process a reset so there maybe no resets to process.
3227	 * Before setting the ->resetting bit though, we have to make sure
3228	 * that there is infact a reset to process. Otherwise we may race
3229	 * with ibmvnic_open() and end up leaving the vnic down:
3230	 *
3231	 *	__ibmvnic_reset()	    ibmvnic_open()
3232	 *	-----------------	    --------------
3233	 *
3234	 *  set ->resetting bit
3235	 *  				find ->resetting bit is set
3236	 *  				set ->state to IBMVNIC_OPEN (i.e
3237	 *  				assume reset will open device)
3238	 *  				return
3239	 *  find reset queue empty
3240	 *  return
3241	 *
3242	 *  	Neither performed vnic login/open and vnic stays down
3243	 *
3244	 * If we hold the lock and conditionally set the bit, either we
3245	 * or ibmvnic_open() will complete the open.
3246	 */
3247	need_reset = false;
3248	spin_lock(&adapter->rwi_lock);
3249	if (!list_empty(&adapter->rwi_list)) {
3250		if (test_and_set_bit_lock(0, &adapter->resetting)) {
3251			queue_delayed_work(system_long_wq,
3252					   &adapter->ibmvnic_delayed_reset,
3253					   IBMVNIC_RESET_DELAY);
3254		} else {
3255			need_reset = true;
3256		}
3257	}
3258	spin_unlock(&adapter->rwi_lock);
3259
3260	if (!need_reset)
3261		return;
3262
3263	rwi = get_next_rwi(adapter);
3264	while (rwi) {
3265		spin_lock_irqsave(&adapter->state_lock, flags);
3266
3267		if (adapter->state == VNIC_REMOVING ||
3268		    adapter->state == VNIC_REMOVED) {
3269			spin_unlock_irqrestore(&adapter->state_lock, flags);
3270			kfree(rwi);
3271			rc = EBUSY;
3272			break;
3273		}
3274
3275		if (!saved_state) {
3276			reset_state = adapter->state;
3277			saved_state = true;
3278		}
3279		spin_unlock_irqrestore(&adapter->state_lock, flags);
3280
3281		if (rwi->reset_reason == VNIC_RESET_PASSIVE_INIT) {
3282			rtnl_lock();
3283			rc = do_passive_init(adapter);
3284			rtnl_unlock();
3285			if (!rc)
3286				netif_carrier_on(adapter->netdev);
3287		} else if (adapter->force_reset_recovery) {
3288			/* Since we are doing a hard reset now, clear the
3289			 * failover_pending flag so we don't ignore any
3290			 * future MOBILITY or other resets.
3291			 */
3292			adapter->failover_pending = false;
3293
3294			/* Transport event occurred during previous reset */
3295			if (adapter->wait_for_reset) {
3296				/* Previous was CHANGE_PARAM; caller locked */
3297				adapter->force_reset_recovery = false;
3298				rc = do_hard_reset(adapter, rwi, reset_state);
3299			} else {
3300				rtnl_lock();
3301				adapter->force_reset_recovery = false;
3302				rc = do_hard_reset(adapter, rwi, reset_state);
3303				rtnl_unlock();
3304			}
3305			if (rc)
3306				num_fails++;
3307			else
3308				num_fails = 0;
3309
3310			/* If auto-priority-failover is enabled we can get
3311			 * back to back failovers during resets, resulting
3312			 * in at least two failed resets (from high-priority
3313			 * backing device to low-priority one and then back)
3314			 * If resets continue to fail beyond that, give the
3315			 * adapter some time to settle down before retrying.
3316			 */
3317			if (num_fails >= 3) {
3318				netdev_dbg(adapter->netdev,
3319					   "[S:%s] Hard reset failed %d times, waiting 60 secs\n",
3320					   adapter_state_to_string(adapter->state),
3321					   num_fails);
3322				set_current_state(TASK_UNINTERRUPTIBLE);
3323				schedule_timeout(60 * HZ);
3324			}
3325		} else {
3326			rc = do_reset(adapter, rwi, reset_state);
3327		}
3328		tmprwi = rwi;
3329		adapter->last_reset_time = jiffies;
3330
3331		if (rc)
3332			netdev_dbg(adapter->netdev, "Reset failed, rc=%d\n", rc);
3333
3334		rwi = get_next_rwi(adapter);
3335
3336		/*
3337		 * If there are no resets queued and the previous reset failed,
3338		 * the adapter would be in an undefined state. So retry the
3339		 * previous reset as a hard reset.
3340		 *
3341		 * Else, free the previous rwi and, if there is another reset
3342		 * queued, process the new reset even if previous reset failed
3343		 * (the previous reset could have failed because of a fail
3344		 * over for instance, so process the fail over).
3345		 */
3346		if (!rwi && rc)
3347			rwi = tmprwi;
3348		else
3349			kfree(tmprwi);
3350
3351		if (rwi && (rwi->reset_reason == VNIC_RESET_FAILOVER ||
3352			    rwi->reset_reason == VNIC_RESET_MOBILITY || rc))
3353			adapter->force_reset_recovery = true;
3354	}
3355
3356	if (adapter->wait_for_reset) {
3357		adapter->reset_done_rc = rc;
3358		complete(&adapter->reset_done);
3359	}
3360
3361	clear_bit_unlock(0, &adapter->resetting);
3362
3363	netdev_dbg(adapter->netdev,
3364		   "[S:%s FRR:%d WFR:%d] Done processing resets\n",
3365		   adapter_state_to_string(adapter->state),
3366		   adapter->force_reset_recovery,
3367		   adapter->wait_for_reset);
3368}
3369
3370static void __ibmvnic_delayed_reset(struct work_struct *work)
3371{
3372	struct ibmvnic_adapter *adapter;
3373
3374	adapter = container_of(work, struct ibmvnic_adapter,
3375			       ibmvnic_delayed_reset.work);
3376	__ibmvnic_reset(&adapter->ibmvnic_reset);
3377}
3378
3379static void flush_reset_queue(struct ibmvnic_adapter *adapter)
3380{
3381	struct list_head *entry, *tmp_entry;
3382
3383	if (!list_empty(&adapter->rwi_list)) {
3384		list_for_each_safe(entry, tmp_entry, &adapter->rwi_list) {
3385			list_del(entry);
3386			kfree(list_entry(entry, struct ibmvnic_rwi, list));
3387		}
3388	}
3389}
3390
3391static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
3392			 enum ibmvnic_reset_reason reason)
3393{
3394	struct net_device *netdev = adapter->netdev;
3395	struct ibmvnic_rwi *rwi, *tmp;
3396	unsigned long flags;
3397	int ret;
3398
3399	spin_lock_irqsave(&adapter->rwi_lock, flags);
3400
3401	/* If failover is pending don't schedule any other reset.
3402	 * Instead let the failover complete. If there is already a
3403	 * a failover reset scheduled, we will detect and drop the
3404	 * duplicate reset when walking the ->rwi_list below.
3405	 */
3406	if (adapter->state == VNIC_REMOVING ||
3407	    adapter->state == VNIC_REMOVED ||
3408	    (adapter->failover_pending && reason != VNIC_RESET_FAILOVER)) {
3409		ret = EBUSY;
3410		netdev_dbg(netdev, "Adapter removing or pending failover, skipping reset\n");
3411		goto err;
3412	}
3413
3414	list_for_each_entry(tmp, &adapter->rwi_list, list) {
3415		if (tmp->reset_reason == reason) {
3416			netdev_dbg(netdev, "Skipping matching reset, reason=%s\n",
3417				   reset_reason_to_string(reason));
3418			ret = EBUSY;
3419			goto err;
3420		}
3421	}
3422
3423	rwi = kzalloc(sizeof(*rwi), GFP_ATOMIC);
3424	if (!rwi) {
3425		ret = ENOMEM;
3426		goto err;
3427	}
3428	/* if we just received a transport event,
3429	 * flush reset queue and process this reset
3430	 */
3431	if (adapter->force_reset_recovery)
3432		flush_reset_queue(adapter);
3433
3434	rwi->reset_reason = reason;
3435	list_add_tail(&rwi->list, &adapter->rwi_list);
3436	netdev_dbg(adapter->netdev, "Scheduling reset (reason %s)\n",
3437		   reset_reason_to_string(reason));
3438	queue_work(system_long_wq, &adapter->ibmvnic_reset);
3439
3440	ret = 0;
3441err:
3442	/* ibmvnic_close() below can block, so drop the lock first */
3443	spin_unlock_irqrestore(&adapter->rwi_lock, flags);
3444
3445	if (ret == ENOMEM)
3446		ibmvnic_close(netdev);
3447
3448	return -ret;
3449}
3450
3451static void ibmvnic_tx_timeout(struct net_device *dev, unsigned int txqueue)
3452{
3453	struct ibmvnic_adapter *adapter = netdev_priv(dev);
3454
3455	if (test_bit(0, &adapter->resetting)) {
3456		netdev_err(adapter->netdev,
3457			   "Adapter is resetting, skip timeout reset\n");
3458		return;
3459	}
3460	/* No queuing up reset until at least 5 seconds (default watchdog val)
3461	 * after last reset
3462	 */
3463	if (time_before(jiffies, (adapter->last_reset_time + dev->watchdog_timeo))) {
3464		netdev_dbg(dev, "Not yet time to tx timeout.\n");
3465		return;
3466	}
3467	ibmvnic_reset(adapter, VNIC_RESET_TIMEOUT);
3468}
3469
3470static void remove_buff_from_pool(struct ibmvnic_adapter *adapter,
3471				  struct ibmvnic_rx_buff *rx_buff)
3472{
3473	struct ibmvnic_rx_pool *pool = &adapter->rx_pool[rx_buff->pool_index];
3474
3475	rx_buff->skb = NULL;
3476
3477	pool->free_map[pool->next_alloc] = (int)(rx_buff - pool->rx_buff);
3478	pool->next_alloc = (pool->next_alloc + 1) % pool->size;
3479
3480	atomic_dec(&pool->available);
3481}
3482
3483static int ibmvnic_poll(struct napi_struct *napi, int budget)
3484{
3485	struct ibmvnic_sub_crq_queue *rx_scrq;
3486	struct ibmvnic_adapter *adapter;
3487	struct net_device *netdev;
3488	int frames_processed;
3489	int scrq_num;
3490
3491	netdev = napi->dev;
3492	adapter = netdev_priv(netdev);
3493	scrq_num = (int)(napi - adapter->napi);
3494	frames_processed = 0;
3495	rx_scrq = adapter->rx_scrq[scrq_num];
3496
3497restart_poll:
3498	while (frames_processed < budget) {
3499		struct sk_buff *skb;
3500		struct ibmvnic_rx_buff *rx_buff;
3501		union sub_crq *next;
3502		u32 length;
3503		u16 offset;
3504		u8 flags = 0;
3505
3506		if (unlikely(test_bit(0, &adapter->resetting) &&
3507			     adapter->reset_reason != VNIC_RESET_NON_FATAL)) {
3508			enable_scrq_irq(adapter, rx_scrq);
3509			napi_complete_done(napi, frames_processed);
3510			return frames_processed;
3511		}
3512
3513		if (!pending_scrq(adapter, rx_scrq))
3514			break;
3515		next = ibmvnic_next_scrq(adapter, rx_scrq);
3516		rx_buff = (struct ibmvnic_rx_buff *)
3517			  be64_to_cpu(next->rx_comp.correlator);
3518		/* do error checking */
3519		if (next->rx_comp.rc) {
3520			netdev_dbg(netdev, "rx buffer returned with rc %x\n",
3521				   be16_to_cpu(next->rx_comp.rc));
3522			/* free the entry */
3523			next->rx_comp.first = 0;
3524			dev_kfree_skb_any(rx_buff->skb);
3525			remove_buff_from_pool(adapter, rx_buff);
3526			continue;
3527		} else if (!rx_buff->skb) {
3528			/* free the entry */
3529			next->rx_comp.first = 0;
3530			remove_buff_from_pool(adapter, rx_buff);
3531			continue;
3532		}
3533
3534		length = be32_to_cpu(next->rx_comp.len);
3535		offset = be16_to_cpu(next->rx_comp.off_frame_data);
3536		flags = next->rx_comp.flags;
3537		skb = rx_buff->skb;
3538		/* load long_term_buff before copying to skb */
3539		dma_rmb();
3540		skb_copy_to_linear_data(skb, rx_buff->data + offset,
3541					length);
3542
3543		/* VLAN Header has been stripped by the system firmware and
3544		 * needs to be inserted by the driver
3545		 */
3546		if (adapter->rx_vlan_header_insertion &&
3547		    (flags & IBMVNIC_VLAN_STRIPPED))
3548			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
3549					       ntohs(next->rx_comp.vlan_tci));
3550
3551		/* free the entry */
3552		next->rx_comp.first = 0;
3553		remove_buff_from_pool(adapter, rx_buff);
3554
3555		skb_put(skb, length);
3556		skb->protocol = eth_type_trans(skb, netdev);
3557		skb_record_rx_queue(skb, scrq_num);
3558
3559		if (flags & IBMVNIC_IP_CHKSUM_GOOD &&
3560		    flags & IBMVNIC_TCP_UDP_CHKSUM_GOOD) {
3561			skb->ip_summed = CHECKSUM_UNNECESSARY;
3562		}
3563
3564		length = skb->len;
3565		napi_gro_receive(napi, skb); /* send it up */
3566		netdev->stats.rx_packets++;
3567		netdev->stats.rx_bytes += length;
3568		adapter->rx_stats_buffers[scrq_num].packets++;
3569		adapter->rx_stats_buffers[scrq_num].bytes += length;
3570		frames_processed++;
3571	}
3572
3573	if (adapter->state != VNIC_CLOSING &&
3574	    (atomic_read(&adapter->rx_pool[scrq_num].available) <
3575	      adapter->req_rx_add_entries_per_subcrq / 2))
3576		replenish_rx_pool(adapter, &adapter->rx_pool[scrq_num]);
3577	if (frames_processed < budget) {
3578		if (napi_complete_done(napi, frames_processed)) {
3579			enable_scrq_irq(adapter, rx_scrq);
3580			if (pending_scrq(adapter, rx_scrq)) {
3581				if (napi_schedule(napi)) {
3582					disable_scrq_irq(adapter, rx_scrq);
3583					goto restart_poll;
3584				}
3585			}
3586		}
3587	}
3588	return frames_processed;
3589}
3590
3591static int wait_for_reset(struct ibmvnic_adapter *adapter)
3592{
3593	int rc, ret;
3594
3595	adapter->fallback.mtu = adapter->req_mtu;
3596	adapter->fallback.rx_queues = adapter->req_rx_queues;
3597	adapter->fallback.tx_queues = adapter->req_tx_queues;
3598	adapter->fallback.rx_entries = adapter->req_rx_add_entries_per_subcrq;
3599	adapter->fallback.tx_entries = adapter->req_tx_entries_per_subcrq;
3600
3601	reinit_completion(&adapter->reset_done);
3602	adapter->wait_for_reset = true;
3603	rc = ibmvnic_reset(adapter, VNIC_RESET_CHANGE_PARAM);
3604
3605	if (rc) {
3606		ret = rc;
3607		goto out;
3608	}
3609	rc = ibmvnic_wait_for_completion(adapter, &adapter->reset_done, 60000);
3610	if (rc) {
3611		ret = -ENODEV;
3612		goto out;
3613	}
3614
3615	ret = 0;
3616	if (adapter->reset_done_rc) {
3617		ret = -EIO;
3618		adapter->desired.mtu = adapter->fallback.mtu;
3619		adapter->desired.rx_queues = adapter->fallback.rx_queues;
3620		adapter->desired.tx_queues = adapter->fallback.tx_queues;
3621		adapter->desired.rx_entries = adapter->fallback.rx_entries;
3622		adapter->desired.tx_entries = adapter->fallback.tx_entries;
3623
3624		reinit_completion(&adapter->reset_done);
3625		adapter->wait_for_reset = true;
3626		rc = ibmvnic_reset(adapter, VNIC_RESET_CHANGE_PARAM);
3627		if (rc) {
3628			ret = rc;
3629			goto out;
3630		}
3631		rc = ibmvnic_wait_for_completion(adapter, &adapter->reset_done,
3632						 60000);
3633		if (rc) {
3634			ret = -ENODEV;
3635			goto out;
3636		}
3637	}
3638out:
3639	adapter->wait_for_reset = false;
3640
3641	return ret;
3642}
3643
3644static int ibmvnic_change_mtu(struct net_device *netdev, int new_mtu)
3645{
3646	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
3647
3648	adapter->desired.mtu = new_mtu + ETH_HLEN;
3649
3650	return wait_for_reset(adapter);
3651}
3652
3653static netdev_features_t ibmvnic_features_check(struct sk_buff *skb,
3654						struct net_device *dev,
3655						netdev_features_t features)
3656{
3657	/* Some backing hardware adapters can not
3658	 * handle packets with a MSS less than 224
3659	 * or with only one segment.
3660	 */
3661	if (skb_is_gso(skb)) {
3662		if (skb_shinfo(skb)->gso_size < 224 ||
3663		    skb_shinfo(skb)->gso_segs == 1)
3664			features &= ~NETIF_F_GSO_MASK;
3665	}
3666
3667	return features;
3668}
3669
3670static const struct net_device_ops ibmvnic_netdev_ops = {
3671	.ndo_open		= ibmvnic_open,
3672	.ndo_stop		= ibmvnic_close,
3673	.ndo_start_xmit		= ibmvnic_xmit,
3674	.ndo_set_rx_mode	= ibmvnic_set_multi,
3675	.ndo_set_mac_address	= ibmvnic_set_mac,
3676	.ndo_validate_addr	= eth_validate_addr,
3677	.ndo_tx_timeout		= ibmvnic_tx_timeout,
3678	.ndo_change_mtu		= ibmvnic_change_mtu,
3679	.ndo_features_check     = ibmvnic_features_check,
3680};
3681
3682/* ethtool functions */
3683
3684static int ibmvnic_get_link_ksettings(struct net_device *netdev,
3685				      struct ethtool_link_ksettings *cmd)
3686{
3687	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
3688	int rc;
3689
3690	rc = send_query_phys_parms(adapter);
3691	if (rc) {
3692		adapter->speed = SPEED_UNKNOWN;
3693		adapter->duplex = DUPLEX_UNKNOWN;
3694	}
3695	cmd->base.speed = adapter->speed;
3696	cmd->base.duplex = adapter->duplex;
3697	cmd->base.port = PORT_FIBRE;
3698	cmd->base.phy_address = 0;
3699	cmd->base.autoneg = AUTONEG_ENABLE;
3700
3701	return 0;
3702}
3703
3704static void ibmvnic_get_drvinfo(struct net_device *netdev,
3705				struct ethtool_drvinfo *info)
3706{
3707	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
3708
3709	strscpy(info->driver, ibmvnic_driver_name, sizeof(info->driver));
3710	strscpy(info->version, IBMVNIC_DRIVER_VERSION, sizeof(info->version));
3711	strscpy(info->fw_version, adapter->fw_version,
3712		sizeof(info->fw_version));
3713}
3714
3715static u32 ibmvnic_get_msglevel(struct net_device *netdev)
3716{
3717	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
3718
3719	return adapter->msg_enable;
3720}
3721
3722static void ibmvnic_set_msglevel(struct net_device *netdev, u32 data)
3723{
3724	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
3725
3726	adapter->msg_enable = data;
3727}
3728
3729static u32 ibmvnic_get_link(struct net_device *netdev)
3730{
3731	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
3732
3733	/* Don't need to send a query because we request a logical link up at
3734	 * init and then we wait for link state indications
3735	 */
3736	return adapter->logical_link_state;
3737}
3738
3739static void ibmvnic_get_ringparam(struct net_device *netdev,
3740				  struct ethtool_ringparam *ring,
3741				  struct kernel_ethtool_ringparam *kernel_ring,
3742				  struct netlink_ext_ack *extack)
3743{
3744	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
3745
3746	ring->rx_max_pending = adapter->max_rx_add_entries_per_subcrq;
3747	ring->tx_max_pending = adapter->max_tx_entries_per_subcrq;
3748	ring->rx_mini_max_pending = 0;
3749	ring->rx_jumbo_max_pending = 0;
3750	ring->rx_pending = adapter->req_rx_add_entries_per_subcrq;
3751	ring->tx_pending = adapter->req_tx_entries_per_subcrq;
3752	ring->rx_mini_pending = 0;
3753	ring->rx_jumbo_pending = 0;
3754}
3755
3756static int ibmvnic_set_ringparam(struct net_device *netdev,
3757				 struct ethtool_ringparam *ring,
3758				 struct kernel_ethtool_ringparam *kernel_ring,
3759				 struct netlink_ext_ack *extack)
3760{
3761	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
3762
3763	if (ring->rx_pending > adapter->max_rx_add_entries_per_subcrq  ||
3764	    ring->tx_pending > adapter->max_tx_entries_per_subcrq) {
3765		netdev_err(netdev, "Invalid request.\n");
3766		netdev_err(netdev, "Max tx buffers = %llu\n",
3767			   adapter->max_rx_add_entries_per_subcrq);
3768		netdev_err(netdev, "Max rx buffers = %llu\n",
3769			   adapter->max_tx_entries_per_subcrq);
3770		return -EINVAL;
3771	}
3772
3773	adapter->desired.rx_entries = ring->rx_pending;
3774	adapter->desired.tx_entries = ring->tx_pending;
3775
3776	return wait_for_reset(adapter);
3777}
3778
3779static void ibmvnic_get_channels(struct net_device *netdev,
3780				 struct ethtool_channels *channels)
3781{
3782	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
3783
3784	channels->max_rx = adapter->max_rx_queues;
3785	channels->max_tx = adapter->max_tx_queues;
3786	channels->max_other = 0;
3787	channels->max_combined = 0;
3788	channels->rx_count = adapter->req_rx_queues;
3789	channels->tx_count = adapter->req_tx_queues;
3790	channels->other_count = 0;
3791	channels->combined_count = 0;
3792}
3793
3794static int ibmvnic_set_channels(struct net_device *netdev,
3795				struct ethtool_channels *channels)
3796{
3797	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
3798
3799	adapter->desired.rx_queues = channels->rx_count;
3800	adapter->desired.tx_queues = channels->tx_count;
3801
3802	return wait_for_reset(adapter);
3803}
3804
3805static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data)
3806{
3807	struct ibmvnic_adapter *adapter = netdev_priv(dev);
3808	int i;
3809
3810	if (stringset != ETH_SS_STATS)
3811		return;
3812
3813	for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++)
3814		ethtool_puts(&data, ibmvnic_stats[i].name);
3815
3816	for (i = 0; i < adapter->req_tx_queues; i++) {
3817		ethtool_sprintf(&data, "tx%d_batched_packets", i);
3818		ethtool_sprintf(&data, "tx%d_direct_packets", i);
3819		ethtool_sprintf(&data, "tx%d_bytes", i);
3820		ethtool_sprintf(&data, "tx%d_dropped_packets", i);
3821	}
3822
3823	for (i = 0; i < adapter->req_rx_queues; i++) {
3824		ethtool_sprintf(&data, "rx%d_packets", i);
3825		ethtool_sprintf(&data, "rx%d_bytes", i);
3826		ethtool_sprintf(&data, "rx%d_interrupts", i);
3827	}
3828}
3829
3830static int ibmvnic_get_sset_count(struct net_device *dev, int sset)
3831{
3832	struct ibmvnic_adapter *adapter = netdev_priv(dev);
3833
3834	switch (sset) {
3835	case ETH_SS_STATS:
3836		return ARRAY_SIZE(ibmvnic_stats) +
3837		       adapter->req_tx_queues * NUM_TX_STATS +
3838		       adapter->req_rx_queues * NUM_RX_STATS;
3839	default:
3840		return -EOPNOTSUPP;
3841	}
3842}
3843
3844static void ibmvnic_get_ethtool_stats(struct net_device *dev,
3845				      struct ethtool_stats *stats, u64 *data)
3846{
3847	struct ibmvnic_adapter *adapter = netdev_priv(dev);
3848	union ibmvnic_crq crq;
3849	int i, j;
3850	int rc;
3851
3852	memset(&crq, 0, sizeof(crq));
3853	crq.request_statistics.first = IBMVNIC_CRQ_CMD;
3854	crq.request_statistics.cmd = REQUEST_STATISTICS;
3855	crq.request_statistics.ioba = cpu_to_be32(adapter->stats_token);
3856	crq.request_statistics.len =
3857	    cpu_to_be32(sizeof(struct ibmvnic_statistics));
3858
3859	/* Wait for data to be written */
3860	reinit_completion(&adapter->stats_done);
3861	rc = ibmvnic_send_crq(adapter, &crq);
3862	if (rc)
3863		return;
3864	rc = ibmvnic_wait_for_completion(adapter, &adapter->stats_done, 10000);
3865	if (rc)
3866		return;
3867
3868	for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++)
3869		data[i] = be64_to_cpu(IBMVNIC_GET_STAT
3870				      (adapter, ibmvnic_stats[i].offset));
3871
3872	for (j = 0; j < adapter->req_tx_queues; j++) {
3873		data[i] = adapter->tx_stats_buffers[j].batched_packets;
3874		i++;
3875		data[i] = adapter->tx_stats_buffers[j].direct_packets;
3876		i++;
3877		data[i] = adapter->tx_stats_buffers[j].bytes;
3878		i++;
3879		data[i] = adapter->tx_stats_buffers[j].dropped_packets;
3880		i++;
3881	}
3882
3883	for (j = 0; j < adapter->req_rx_queues; j++) {
3884		data[i] = adapter->rx_stats_buffers[j].packets;
3885		i++;
3886		data[i] = adapter->rx_stats_buffers[j].bytes;
3887		i++;
3888		data[i] = adapter->rx_stats_buffers[j].interrupts;
3889		i++;
3890	}
3891}
3892
3893static const struct ethtool_ops ibmvnic_ethtool_ops = {
3894	.get_drvinfo		= ibmvnic_get_drvinfo,
3895	.get_msglevel		= ibmvnic_get_msglevel,
3896	.set_msglevel		= ibmvnic_set_msglevel,
3897	.get_link		= ibmvnic_get_link,
3898	.get_ringparam		= ibmvnic_get_ringparam,
3899	.set_ringparam		= ibmvnic_set_ringparam,
3900	.get_channels		= ibmvnic_get_channels,
3901	.set_channels		= ibmvnic_set_channels,
3902	.get_strings            = ibmvnic_get_strings,
3903	.get_sset_count         = ibmvnic_get_sset_count,
3904	.get_ethtool_stats	= ibmvnic_get_ethtool_stats,
3905	.get_link_ksettings	= ibmvnic_get_link_ksettings,
3906};
3907
3908/* Routines for managing CRQs/sCRQs  */
3909
3910static int reset_one_sub_crq_queue(struct ibmvnic_adapter *adapter,
3911				   struct ibmvnic_sub_crq_queue *scrq)
3912{
3913	int rc;
3914
3915	if (!scrq) {
3916		netdev_dbg(adapter->netdev, "Invalid scrq reset.\n");
3917		return -EINVAL;
3918	}
3919
3920	if (scrq->irq) {
3921		free_irq(scrq->irq, scrq);
3922		irq_dispose_mapping(scrq->irq);
3923		scrq->irq = 0;
3924	}
3925
3926	if (scrq->msgs) {
3927		memset(scrq->msgs, 0, 4 * PAGE_SIZE);
3928		atomic_set(&scrq->used, 0);
3929		scrq->cur = 0;
3930		scrq->ind_buf.index = 0;
3931	} else {
3932		netdev_dbg(adapter->netdev, "Invalid scrq reset\n");
3933		return -EINVAL;
3934	}
3935
3936	rc = h_reg_sub_crq(adapter->vdev->unit_address, scrq->msg_token,
3937			   4 * PAGE_SIZE, &scrq->crq_num, &scrq->hw_irq);
3938	return rc;
3939}
3940
3941static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter)
3942{
3943	int i, rc;
3944
3945	if (!adapter->tx_scrq || !adapter->rx_scrq)
3946		return -EINVAL;
3947
3948	ibmvnic_clean_affinity(adapter);
3949
3950	for (i = 0; i < adapter->req_tx_queues; i++) {
3951		netdev_dbg(adapter->netdev, "Re-setting tx_scrq[%d]\n", i);
3952		rc = reset_one_sub_crq_queue(adapter, adapter->tx_scrq[i]);
3953		if (rc)
3954			return rc;
3955	}
3956
3957	for (i = 0; i < adapter->req_rx_queues; i++) {
3958		netdev_dbg(adapter->netdev, "Re-setting rx_scrq[%d]\n", i);
3959		rc = reset_one_sub_crq_queue(adapter, adapter->rx_scrq[i]);
3960		if (rc)
3961			return rc;
3962	}
3963
3964	return rc;
3965}
3966
3967static void release_sub_crq_queue(struct ibmvnic_adapter *adapter,
3968				  struct ibmvnic_sub_crq_queue *scrq,
3969				  bool do_h_free)
3970{
3971	struct device *dev = &adapter->vdev->dev;
3972	long rc;
3973
3974	netdev_dbg(adapter->netdev, "Releasing sub-CRQ\n");
3975
3976	if (do_h_free) {
3977		/* Close the sub-crqs */
3978		do {
3979			rc = plpar_hcall_norets(H_FREE_SUB_CRQ,
3980						adapter->vdev->unit_address,
3981						scrq->crq_num);
3982		} while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
3983
3984		if (rc) {
3985			netdev_err(adapter->netdev,
3986				   "Failed to release sub-CRQ %16lx, rc = %ld\n",
3987				   scrq->crq_num, rc);
3988		}
3989	}
3990
3991	dma_free_coherent(dev,
3992			  IBMVNIC_IND_ARR_SZ,
3993			  scrq->ind_buf.indir_arr,
3994			  scrq->ind_buf.indir_dma);
3995
3996	dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE,
3997			 DMA_BIDIRECTIONAL);
3998	free_pages((unsigned long)scrq->msgs, 2);
3999	free_cpumask_var(scrq->affinity_mask);
4000	kfree(scrq);
4001}
4002
4003static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter
4004							*adapter)
4005{
4006	struct device *dev = &adapter->vdev->dev;
4007	struct ibmvnic_sub_crq_queue *scrq;
4008	int rc;
4009
4010	scrq = kzalloc(sizeof(*scrq), GFP_KERNEL);
4011	if (!scrq)
4012		return NULL;
4013
4014	scrq->msgs =
4015		(union sub_crq *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 2);
4016	if (!scrq->msgs) {
4017		dev_warn(dev, "Couldn't allocate crq queue messages page\n");
4018		goto zero_page_failed;
4019	}
4020	if (!zalloc_cpumask_var(&scrq->affinity_mask, GFP_KERNEL))
4021		goto cpumask_alloc_failed;
4022
4023	scrq->msg_token = dma_map_single(dev, scrq->msgs, 4 * PAGE_SIZE,
4024					 DMA_BIDIRECTIONAL);
4025	if (dma_mapping_error(dev, scrq->msg_token)) {
4026		dev_warn(dev, "Couldn't map crq queue messages page\n");
4027		goto map_failed;
4028	}
4029
4030	rc = h_reg_sub_crq(adapter->vdev->unit_address, scrq->msg_token,
4031			   4 * PAGE_SIZE, &scrq->crq_num, &scrq->hw_irq);
4032
4033	if (rc == H_RESOURCE)
4034		rc = ibmvnic_reset_crq(adapter);
4035
4036	if (rc == H_CLOSED) {
4037		dev_warn(dev, "Partner adapter not ready, waiting.\n");
4038	} else if (rc) {
4039		dev_warn(dev, "Error %d registering sub-crq\n", rc);
4040		goto reg_failed;
4041	}
4042
4043	scrq->adapter = adapter;
4044	scrq->size = 4 * PAGE_SIZE / sizeof(*scrq->msgs);
4045	scrq->ind_buf.index = 0;
4046
4047	scrq->ind_buf.indir_arr =
4048		dma_alloc_coherent(dev,
4049				   IBMVNIC_IND_ARR_SZ,
4050				   &scrq->ind_buf.indir_dma,
4051				   GFP_KERNEL);
4052
4053	if (!scrq->ind_buf.indir_arr)
4054		goto indir_failed;
4055
4056	spin_lock_init(&scrq->lock);
4057
4058	netdev_dbg(adapter->netdev,
4059		   "sub-crq initialized, num %lx, hw_irq=%lx, irq=%x\n",
4060		   scrq->crq_num, scrq->hw_irq, scrq->irq);
4061
4062	return scrq;
4063
4064indir_failed:
4065	do {
4066		rc = plpar_hcall_norets(H_FREE_SUB_CRQ,
4067					adapter->vdev->unit_address,
4068					scrq->crq_num);
4069	} while (rc == H_BUSY || rc == H_IS_LONG_BUSY(rc));
4070reg_failed:
4071	dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE,
4072			 DMA_BIDIRECTIONAL);
4073map_failed:
4074	free_cpumask_var(scrq->affinity_mask);
4075cpumask_alloc_failed:
4076	free_pages((unsigned long)scrq->msgs, 2);
4077zero_page_failed:
4078	kfree(scrq);
4079
4080	return NULL;
4081}
4082
4083static void release_sub_crqs(struct ibmvnic_adapter *adapter, bool do_h_free)
4084{
4085	int i;
4086
4087	ibmvnic_clean_affinity(adapter);
4088	if (adapter->tx_scrq) {
4089		for (i = 0; i < adapter->num_active_tx_scrqs; i++) {
4090			if (!adapter->tx_scrq[i])
4091				continue;
4092
4093			netdev_dbg(adapter->netdev, "Releasing tx_scrq[%d]\n",
4094				   i);
4095			ibmvnic_tx_scrq_clean_buffer(adapter, adapter->tx_scrq[i]);
4096			if (adapter->tx_scrq[i]->irq) {
4097				free_irq(adapter->tx_scrq[i]->irq,
4098					 adapter->tx_scrq[i]);
4099				irq_dispose_mapping(adapter->tx_scrq[i]->irq);
4100				adapter->tx_scrq[i]->irq = 0;
4101			}
4102
4103			release_sub_crq_queue(adapter, adapter->tx_scrq[i],
4104					      do_h_free);
4105		}
4106
4107		kfree(adapter->tx_scrq);
4108		adapter->tx_scrq = NULL;
4109		adapter->num_active_tx_scrqs = 0;
4110	}
4111
4112	/* Clean any remaining outstanding SKBs
4113	 * we freed the irq so we won't be hearing
4114	 * from them
4115	 */
4116	clean_tx_pools(adapter);
4117
4118	if (adapter->rx_scrq) {
4119		for (i = 0; i < adapter->num_active_rx_scrqs; i++) {
4120			if (!adapter->rx_scrq[i])
4121				continue;
4122
4123			netdev_dbg(adapter->netdev, "Releasing rx_scrq[%d]\n",
4124				   i);
4125			if (adapter->rx_scrq[i]->irq) {
4126				free_irq(adapter->rx_scrq[i]->irq,
4127					 adapter->rx_scrq[i]);
4128				irq_dispose_mapping(adapter->rx_scrq[i]->irq);
4129				adapter->rx_scrq[i]->irq = 0;
4130			}
4131
4132			release_sub_crq_queue(adapter, adapter->rx_scrq[i],
4133					      do_h_free);
4134		}
4135
4136		kfree(adapter->rx_scrq);
4137		adapter->rx_scrq = NULL;
4138		adapter->num_active_rx_scrqs = 0;
4139	}
4140}
4141
4142static int disable_scrq_irq(struct ibmvnic_adapter *adapter,
4143			    struct ibmvnic_sub_crq_queue *scrq)
4144{
4145	struct device *dev = &adapter->vdev->dev;
4146	unsigned long rc;
4147
4148	rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address,
4149				H_DISABLE_VIO_INTERRUPT, scrq->hw_irq, 0, 0);
4150	if (rc)
4151		dev_err(dev, "Couldn't disable scrq irq 0x%lx. rc=%ld\n",
4152			scrq->hw_irq, rc);
4153	return rc;
4154}
4155
4156/* We can not use the IRQ chip EOI handler because that has the
4157 * unintended effect of changing the interrupt priority.
4158 */
4159static void ibmvnic_xics_eoi(struct device *dev, struct ibmvnic_sub_crq_queue *scrq)
4160{
4161	u64 val = 0xff000000 | scrq->hw_irq;
4162	unsigned long rc;
4163
4164	rc = plpar_hcall_norets(H_EOI, val);
4165	if (rc)
4166		dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n", val, rc);
4167}
4168
4169/* Due to a firmware bug, the hypervisor can send an interrupt to a
4170 * transmit or receive queue just prior to a partition migration.
4171 * Force an EOI after migration.
4172 */
4173static void ibmvnic_clear_pending_interrupt(struct device *dev,
4174					    struct ibmvnic_sub_crq_queue *scrq)
4175{
4176	if (!xive_enabled())
4177		ibmvnic_xics_eoi(dev, scrq);
4178}
4179
4180static int enable_scrq_irq(struct ibmvnic_adapter *adapter,
4181			   struct ibmvnic_sub_crq_queue *scrq)
4182{
4183	struct device *dev = &adapter->vdev->dev;
4184	unsigned long rc;
4185
4186	if (scrq->hw_irq > 0x100000000ULL) {
4187		dev_err(dev, "bad hw_irq = %lx\n", scrq->hw_irq);
4188		return 1;
4189	}
4190
4191	if (test_bit(0, &adapter->resetting) &&
4192	    adapter->reset_reason == VNIC_RESET_MOBILITY) {
4193		ibmvnic_clear_pending_interrupt(dev, scrq);
4194	}
4195
4196	rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address,
4197				H_ENABLE_VIO_INTERRUPT, scrq->hw_irq, 0, 0);
4198	if (rc)
4199		dev_err(dev, "Couldn't enable scrq irq 0x%lx. rc=%ld\n",
4200			scrq->hw_irq, rc);
4201	return rc;
4202}
4203
4204static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
4205			       struct ibmvnic_sub_crq_queue *scrq)
4206{
4207	struct device *dev = &adapter->vdev->dev;
4208	int num_packets = 0, total_bytes = 0;
4209	struct ibmvnic_tx_pool *tx_pool;
4210	struct ibmvnic_tx_buff *txbuff;
4211	struct netdev_queue *txq;
4212	union sub_crq *next;
4213	int index, i;
4214
4215restart_loop:
4216	while (pending_scrq(adapter, scrq)) {
4217		unsigned int pool = scrq->pool_index;
4218		int num_entries = 0;
4219		next = ibmvnic_next_scrq(adapter, scrq);
4220		for (i = 0; i < next->tx_comp.num_comps; i++) {
4221			index = be32_to_cpu(next->tx_comp.correlators[i]);
4222			if (index & IBMVNIC_TSO_POOL_MASK) {
4223				tx_pool = &adapter->tso_pool[pool];
4224				index &= ~IBMVNIC_TSO_POOL_MASK;
4225			} else {
4226				tx_pool = &adapter->tx_pool[pool];
4227			}
4228
4229			txbuff = &tx_pool->tx_buff[index];
4230			num_packets++;
4231			num_entries += txbuff->num_entries;
4232			if (txbuff->skb) {
4233				total_bytes += txbuff->skb->len;
4234				if (next->tx_comp.rcs[i]) {
4235					dev_err(dev, "tx error %x\n",
4236						next->tx_comp.rcs[i]);
4237					dev_kfree_skb_irq(txbuff->skb);
4238				} else {
4239					dev_consume_skb_irq(txbuff->skb);
4240				}
4241				txbuff->skb = NULL;
4242			} else {
4243				netdev_warn(adapter->netdev,
4244					    "TX completion received with NULL socket buffer\n");
4245			}
4246			tx_pool->free_map[tx_pool->producer_index] = index;
4247			tx_pool->producer_index =
4248				(tx_pool->producer_index + 1) %
4249					tx_pool->num_buffers;
4250		}
4251		/* remove tx_comp scrq*/
4252		next->tx_comp.first = 0;
4253
4254
4255		if (atomic_sub_return(num_entries, &scrq->used) <=
4256		    (adapter->req_tx_entries_per_subcrq / 2) &&
4257		    __netif_subqueue_stopped(adapter->netdev,
4258					     scrq->pool_index)) {
4259			rcu_read_lock();
4260			if (adapter->tx_queues_active) {
4261				netif_wake_subqueue(adapter->netdev,
4262						    scrq->pool_index);
4263				netdev_dbg(adapter->netdev,
4264					   "Started queue %d\n",
4265					   scrq->pool_index);
4266			}
4267			rcu_read_unlock();
4268		}
4269	}
4270
4271	enable_scrq_irq(adapter, scrq);
4272
4273	if (pending_scrq(adapter, scrq)) {
4274		disable_scrq_irq(adapter, scrq);
4275		goto restart_loop;
4276	}
4277
4278	txq = netdev_get_tx_queue(adapter->netdev, scrq->pool_index);
4279	netdev_tx_completed_queue(txq, num_packets, total_bytes);
4280
4281	return 0;
4282}
4283
4284static irqreturn_t ibmvnic_interrupt_tx(int irq, void *instance)
4285{
4286	struct ibmvnic_sub_crq_queue *scrq = instance;
4287	struct ibmvnic_adapter *adapter = scrq->adapter;
4288
4289	disable_scrq_irq(adapter, scrq);
4290	ibmvnic_complete_tx(adapter, scrq);
4291
4292	return IRQ_HANDLED;
4293}
4294
4295static irqreturn_t ibmvnic_interrupt_rx(int irq, void *instance)
4296{
4297	struct ibmvnic_sub_crq_queue *scrq = instance;
4298	struct ibmvnic_adapter *adapter = scrq->adapter;
4299
4300	/* When booting a kdump kernel we can hit pending interrupts
4301	 * prior to completing driver initialization.
4302	 */
4303	if (unlikely(adapter->state != VNIC_OPEN))
4304		return IRQ_NONE;
4305
4306	adapter->rx_stats_buffers[scrq->scrq_num].interrupts++;
4307
4308	if (napi_schedule_prep(&adapter->napi[scrq->scrq_num])) {
4309		disable_scrq_irq(adapter, scrq);
4310		__napi_schedule(&adapter->napi[scrq->scrq_num]);
4311	}
4312
4313	return IRQ_HANDLED;
4314}
4315
4316static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter)
4317{
4318	struct device *dev = &adapter->vdev->dev;
4319	struct ibmvnic_sub_crq_queue *scrq;
4320	int i = 0, j = 0;
4321	int rc = 0;
4322
4323	for (i = 0; i < adapter->req_tx_queues; i++) {
4324		netdev_dbg(adapter->netdev, "Initializing tx_scrq[%d] irq\n",
4325			   i);
4326		scrq = adapter->tx_scrq[i];
4327		scrq->irq = irq_create_mapping(NULL, scrq->hw_irq);
4328
4329		if (!scrq->irq) {
4330			rc = -EINVAL;
4331			dev_err(dev, "Error mapping irq\n");
4332			goto req_tx_irq_failed;
4333		}
4334
4335		snprintf(scrq->name, sizeof(scrq->name), "ibmvnic-%x-tx%d",
4336			 adapter->vdev->unit_address, i);
4337		rc = request_irq(scrq->irq, ibmvnic_interrupt_tx,
4338				 0, scrq->name, scrq);
4339
4340		if (rc) {
4341			dev_err(dev, "Couldn't register tx irq 0x%x. rc=%d\n",
4342				scrq->irq, rc);
4343			irq_dispose_mapping(scrq->irq);
4344			goto req_tx_irq_failed;
4345		}
4346	}
4347
4348	for (i = 0; i < adapter->req_rx_queues; i++) {
4349		netdev_dbg(adapter->netdev, "Initializing rx_scrq[%d] irq\n",
4350			   i);
4351		scrq = adapter->rx_scrq[i];
4352		scrq->irq = irq_create_mapping(NULL, scrq->hw_irq);
4353		if (!scrq->irq) {
4354			rc = -EINVAL;
4355			dev_err(dev, "Error mapping irq\n");
4356			goto req_rx_irq_failed;
4357		}
4358		snprintf(scrq->name, sizeof(scrq->name), "ibmvnic-%x-rx%d",
4359			 adapter->vdev->unit_address, i);
4360		rc = request_irq(scrq->irq, ibmvnic_interrupt_rx,
4361				 0, scrq->name, scrq);
4362		if (rc) {
4363			dev_err(dev, "Couldn't register rx irq 0x%x. rc=%d\n",
4364				scrq->irq, rc);
4365			irq_dispose_mapping(scrq->irq);
4366			goto req_rx_irq_failed;
4367		}
4368	}
4369
4370	cpus_read_lock();
4371	ibmvnic_set_affinity(adapter);
4372	cpus_read_unlock();
4373
4374	return rc;
4375
4376req_rx_irq_failed:
4377	for (j = 0; j < i; j++) {
4378		free_irq(adapter->rx_scrq[j]->irq, adapter->rx_scrq[j]);
4379		irq_dispose_mapping(adapter->rx_scrq[j]->irq);
4380	}
4381	i = adapter->req_tx_queues;
4382req_tx_irq_failed:
4383	for (j = 0; j < i; j++) {
4384		free_irq(adapter->tx_scrq[j]->irq, adapter->tx_scrq[j]);
4385		irq_dispose_mapping(adapter->tx_scrq[j]->irq);
4386	}
4387	release_sub_crqs(adapter, 1);
4388	return rc;
4389}
4390
4391static int init_sub_crqs(struct ibmvnic_adapter *adapter)
4392{
4393	struct device *dev = &adapter->vdev->dev;
4394	struct ibmvnic_sub_crq_queue **allqueues;
4395	int registered_queues = 0;
4396	int total_queues;
4397	int more = 0;
4398	int i;
4399
4400	total_queues = adapter->req_tx_queues + adapter->req_rx_queues;
4401
4402	allqueues = kcalloc(total_queues, sizeof(*allqueues), GFP_KERNEL);
4403	if (!allqueues)
4404		return -ENOMEM;
4405
4406	for (i = 0; i < total_queues; i++) {
4407		allqueues[i] = init_sub_crq_queue(adapter);
4408		if (!allqueues[i]) {
4409			dev_warn(dev, "Couldn't allocate all sub-crqs\n");
4410			break;
4411		}
4412		registered_queues++;
4413	}
4414
4415	/* Make sure we were able to register the minimum number of queues */
4416	if (registered_queues <
4417	    adapter->min_tx_queues + adapter->min_rx_queues) {
4418		dev_err(dev, "Fatal: Couldn't init  min number of sub-crqs\n");
4419		goto tx_failed;
4420	}
4421
4422	/* Distribute the failed allocated queues*/
4423	for (i = 0; i < total_queues - registered_queues + more ; i++) {
4424		netdev_dbg(adapter->netdev, "Reducing number of queues\n");
4425		switch (i % 3) {
4426		case 0:
4427			if (adapter->req_rx_queues > adapter->min_rx_queues)
4428				adapter->req_rx_queues--;
4429			else
4430				more++;
4431			break;
4432		case 1:
4433			if (adapter->req_tx_queues > adapter->min_tx_queues)
4434				adapter->req_tx_queues--;
4435			else
4436				more++;
4437			break;
4438		}
4439	}
4440
4441	adapter->tx_scrq = kcalloc(adapter->req_tx_queues,
4442				   sizeof(*adapter->tx_scrq), GFP_KERNEL);
4443	if (!adapter->tx_scrq)
4444		goto tx_failed;
4445
4446	for (i = 0; i < adapter->req_tx_queues; i++) {
4447		adapter->tx_scrq[i] = allqueues[i];
4448		adapter->tx_scrq[i]->pool_index = i;
4449		adapter->num_active_tx_scrqs++;
4450	}
4451
4452	adapter->rx_scrq = kcalloc(adapter->req_rx_queues,
4453				   sizeof(*adapter->rx_scrq), GFP_KERNEL);
4454	if (!adapter->rx_scrq)
4455		goto rx_failed;
4456
4457	for (i = 0; i < adapter->req_rx_queues; i++) {
4458		adapter->rx_scrq[i] = allqueues[i + adapter->req_tx_queues];
4459		adapter->rx_scrq[i]->scrq_num = i;
4460		adapter->num_active_rx_scrqs++;
4461	}
4462
4463	kfree(allqueues);
4464	return 0;
4465
4466rx_failed:
4467	kfree(adapter->tx_scrq);
4468	adapter->tx_scrq = NULL;
4469tx_failed:
4470	for (i = 0; i < registered_queues; i++)
4471		release_sub_crq_queue(adapter, allqueues[i], 1);
4472	kfree(allqueues);
4473	return -ENOMEM;
4474}
4475
4476static void send_request_cap(struct ibmvnic_adapter *adapter, int retry)
4477{
4478	struct device *dev = &adapter->vdev->dev;
4479	union ibmvnic_crq crq;
4480	int max_entries;
4481	int cap_reqs;
4482
4483	/* We send out 6 or 7 REQUEST_CAPABILITY CRQs below (depending on
4484	 * the PROMISC flag). Initialize this count upfront. When the tasklet
4485	 * receives a response to all of these, it will send the next protocol
4486	 * message (QUERY_IP_OFFLOAD).
4487	 */
4488	if (!(adapter->netdev->flags & IFF_PROMISC) ||
4489	    adapter->promisc_supported)
4490		cap_reqs = 7;
4491	else
4492		cap_reqs = 6;
4493
4494	if (!retry) {
4495		/* Sub-CRQ entries are 32 byte long */
4496		int entries_page = 4 * PAGE_SIZE / (sizeof(u64) * 4);
4497
4498		atomic_set(&adapter->running_cap_crqs, cap_reqs);
4499
4500		if (adapter->min_tx_entries_per_subcrq > entries_page ||
4501		    adapter->min_rx_add_entries_per_subcrq > entries_page) {
4502			dev_err(dev, "Fatal, invalid entries per sub-crq\n");
4503			return;
4504		}
4505
4506		if (adapter->desired.mtu)
4507			adapter->req_mtu = adapter->desired.mtu;
4508		else
4509			adapter->req_mtu = adapter->netdev->mtu + ETH_HLEN;
4510
4511		if (!adapter->desired.tx_entries)
4512			adapter->desired.tx_entries =
4513					adapter->max_tx_entries_per_subcrq;
4514		if (!adapter->desired.rx_entries)
4515			adapter->desired.rx_entries =
4516					adapter->max_rx_add_entries_per_subcrq;
4517
4518		max_entries = IBMVNIC_LTB_SET_SIZE /
4519			      (adapter->req_mtu + IBMVNIC_BUFFER_HLEN);
4520
4521		if ((adapter->req_mtu + IBMVNIC_BUFFER_HLEN) *
4522			adapter->desired.tx_entries > IBMVNIC_LTB_SET_SIZE) {
4523			adapter->desired.tx_entries = max_entries;
4524		}
4525
4526		if ((adapter->req_mtu + IBMVNIC_BUFFER_HLEN) *
4527			adapter->desired.rx_entries > IBMVNIC_LTB_SET_SIZE) {
4528			adapter->desired.rx_entries = max_entries;
4529		}
4530
4531		if (adapter->desired.tx_entries)
4532			adapter->req_tx_entries_per_subcrq =
4533					adapter->desired.tx_entries;
4534		else
4535			adapter->req_tx_entries_per_subcrq =
4536					adapter->max_tx_entries_per_subcrq;
4537
4538		if (adapter->desired.rx_entries)
4539			adapter->req_rx_add_entries_per_subcrq =
4540					adapter->desired.rx_entries;
4541		else
4542			adapter->req_rx_add_entries_per_subcrq =
4543					adapter->max_rx_add_entries_per_subcrq;
4544
4545		if (adapter->desired.tx_queues)
4546			adapter->req_tx_queues =
4547					adapter->desired.tx_queues;
4548		else
4549			adapter->req_tx_queues =
4550					adapter->opt_tx_comp_sub_queues;
4551
4552		if (adapter->desired.rx_queues)
4553			adapter->req_rx_queues =
4554					adapter->desired.rx_queues;
4555		else
4556			adapter->req_rx_queues =
4557					adapter->opt_rx_comp_queues;
4558
4559		adapter->req_rx_add_queues = adapter->max_rx_add_queues;
4560	} else {
4561		atomic_add(cap_reqs, &adapter->running_cap_crqs);
4562	}
4563	memset(&crq, 0, sizeof(crq));
4564	crq.request_capability.first = IBMVNIC_CRQ_CMD;
4565	crq.request_capability.cmd = REQUEST_CAPABILITY;
4566
4567	crq.request_capability.capability = cpu_to_be16(REQ_TX_QUEUES);
4568	crq.request_capability.number = cpu_to_be64(adapter->req_tx_queues);
4569	cap_reqs--;
4570	ibmvnic_send_crq(adapter, &crq);
4571
4572	crq.request_capability.capability = cpu_to_be16(REQ_RX_QUEUES);
4573	crq.request_capability.number = cpu_to_be64(adapter->req_rx_queues);
4574	cap_reqs--;
4575	ibmvnic_send_crq(adapter, &crq);
4576
4577	crq.request_capability.capability = cpu_to_be16(REQ_RX_ADD_QUEUES);
4578	crq.request_capability.number = cpu_to_be64(adapter->req_rx_add_queues);
4579	cap_reqs--;
4580	ibmvnic_send_crq(adapter, &crq);
4581
4582	crq.request_capability.capability =
4583	    cpu_to_be16(REQ_TX_ENTRIES_PER_SUBCRQ);
4584	crq.request_capability.number =
4585	    cpu_to_be64(adapter->req_tx_entries_per_subcrq);
4586	cap_reqs--;
4587	ibmvnic_send_crq(adapter, &crq);
4588
4589	crq.request_capability.capability =
4590	    cpu_to_be16(REQ_RX_ADD_ENTRIES_PER_SUBCRQ);
4591	crq.request_capability.number =
4592	    cpu_to_be64(adapter->req_rx_add_entries_per_subcrq);
4593	cap_reqs--;
4594	ibmvnic_send_crq(adapter, &crq);
4595
4596	crq.request_capability.capability = cpu_to_be16(REQ_MTU);
4597	crq.request_capability.number = cpu_to_be64(adapter->req_mtu);
4598	cap_reqs--;
4599	ibmvnic_send_crq(adapter, &crq);
4600
4601	if (adapter->netdev->flags & IFF_PROMISC) {
4602		if (adapter->promisc_supported) {
4603			crq.request_capability.capability =
4604			    cpu_to_be16(PROMISC_REQUESTED);
4605			crq.request_capability.number = cpu_to_be64(1);
4606			cap_reqs--;
4607			ibmvnic_send_crq(adapter, &crq);
4608		}
4609	} else {
4610		crq.request_capability.capability =
4611		    cpu_to_be16(PROMISC_REQUESTED);
4612		crq.request_capability.number = cpu_to_be64(0);
4613		cap_reqs--;
4614		ibmvnic_send_crq(adapter, &crq);
4615	}
4616
4617	/* Keep at end to catch any discrepancy between expected and actual
4618	 * CRQs sent.
4619	 */
4620	WARN_ON(cap_reqs != 0);
4621}
4622
4623static int pending_scrq(struct ibmvnic_adapter *adapter,
4624			struct ibmvnic_sub_crq_queue *scrq)
4625{
4626	union sub_crq *entry = &scrq->msgs[scrq->cur];
4627	int rc;
4628
4629	rc = !!(entry->generic.first & IBMVNIC_CRQ_CMD_RSP);
4630
4631	/* Ensure that the SCRQ valid flag is loaded prior to loading the
4632	 * contents of the SCRQ descriptor
4633	 */
4634	dma_rmb();
4635
4636	return rc;
4637}
4638
4639static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *adapter,
4640					struct ibmvnic_sub_crq_queue *scrq)
4641{
4642	union sub_crq *entry;
4643	unsigned long flags;
4644
4645	spin_lock_irqsave(&scrq->lock, flags);
4646	entry = &scrq->msgs[scrq->cur];
4647	if (entry->generic.first & IBMVNIC_CRQ_CMD_RSP) {
4648		if (++scrq->cur == scrq->size)
4649			scrq->cur = 0;
4650	} else {
4651		entry = NULL;
4652	}
4653	spin_unlock_irqrestore(&scrq->lock, flags);
4654
4655	/* Ensure that the SCRQ valid flag is loaded prior to loading the
4656	 * contents of the SCRQ descriptor
4657	 */
4658	dma_rmb();
4659
4660	return entry;
4661}
4662
4663static union ibmvnic_crq *ibmvnic_next_crq(struct ibmvnic_adapter *adapter)
4664{
4665	struct ibmvnic_crq_queue *queue = &adapter->crq;
4666	union ibmvnic_crq *crq;
4667
4668	crq = &queue->msgs[queue->cur];
4669	if (crq->generic.first & IBMVNIC_CRQ_CMD_RSP) {
4670		if (++queue->cur == queue->size)
4671			queue->cur = 0;
4672	} else {
4673		crq = NULL;
4674	}
4675
4676	return crq;
4677}
4678
4679static void print_subcrq_error(struct device *dev, int rc, const char *func)
4680{
4681	switch (rc) {
4682	case H_PARAMETER:
4683		dev_warn_ratelimited(dev,
4684				     "%s failed: Send request is malformed or adapter failover pending. (rc=%d)\n",
4685				     func, rc);
4686		break;
4687	case H_CLOSED:
4688		dev_warn_ratelimited(dev,
4689				     "%s failed: Backing queue closed. Adapter is down or failover pending. (rc=%d)\n",
4690				     func, rc);
4691		break;
4692	default:
4693		dev_err_ratelimited(dev, "%s failed: (rc=%d)\n", func, rc);
4694		break;
4695	}
4696}
4697
4698static int send_subcrq_indirect(struct ibmvnic_adapter *adapter,
4699				u64 remote_handle, u64 ioba, u64 num_entries)
4700{
4701	unsigned int ua = adapter->vdev->unit_address;
4702	struct device *dev = &adapter->vdev->dev;
4703	int rc;
4704
4705	/* Make sure the hypervisor sees the complete request */
4706	dma_wmb();
4707	rc = plpar_hcall_norets(H_SEND_SUB_CRQ_INDIRECT, ua,
4708				cpu_to_be64(remote_handle),
4709				ioba, num_entries);
4710
4711	if (rc)
4712		print_subcrq_error(dev, rc, __func__);
4713
4714	return rc;
4715}
4716
4717static int ibmvnic_send_crq(struct ibmvnic_adapter *adapter,
4718			    union ibmvnic_crq *crq)
4719{
4720	unsigned int ua = adapter->vdev->unit_address;
4721	struct device *dev = &adapter->vdev->dev;
4722	u64 *u64_crq = (u64 *)crq;
4723	int rc;
4724
4725	netdev_dbg(adapter->netdev, "Sending CRQ: %016lx %016lx\n",
4726		   (unsigned long)cpu_to_be64(u64_crq[0]),
4727		   (unsigned long)cpu_to_be64(u64_crq[1]));
4728
4729	if (!adapter->crq.active &&
4730	    crq->generic.first != IBMVNIC_CRQ_INIT_CMD) {
4731		dev_warn(dev, "Invalid request detected while CRQ is inactive, possible device state change during reset\n");
4732		return -EINVAL;
4733	}
4734
4735	/* Make sure the hypervisor sees the complete request */
4736	dma_wmb();
4737
4738	rc = plpar_hcall_norets(H_SEND_CRQ, ua,
4739				cpu_to_be64(u64_crq[0]),
4740				cpu_to_be64(u64_crq[1]));
4741
4742	if (rc) {
4743		if (rc == H_CLOSED) {
4744			dev_warn(dev, "CRQ Queue closed\n");
4745			/* do not reset, report the fail, wait for passive init from server */
4746		}
4747
4748		dev_warn(dev, "Send error (rc=%d)\n", rc);
4749	}
4750
4751	return rc;
4752}
4753
4754static int ibmvnic_send_crq_init(struct ibmvnic_adapter *adapter)
4755{
4756	struct device *dev = &adapter->vdev->dev;
4757	union ibmvnic_crq crq;
4758	int retries = 100;
4759	int rc;
4760
4761	memset(&crq, 0, sizeof(crq));
4762	crq.generic.first = IBMVNIC_CRQ_INIT_CMD;
4763	crq.generic.cmd = IBMVNIC_CRQ_INIT;
4764	netdev_dbg(adapter->netdev, "Sending CRQ init\n");
4765
4766	do {
4767		rc = ibmvnic_send_crq(adapter, &crq);
4768		if (rc != H_CLOSED)
4769			break;
4770		retries--;
4771		msleep(50);
4772
4773	} while (retries > 0);
4774
4775	if (rc) {
4776		dev_err(dev, "Failed to send init request, rc = %d\n", rc);
4777		return rc;
4778	}
4779
4780	return 0;
4781}
4782
4783struct vnic_login_client_data {
4784	u8	type;
4785	__be16	len;
4786	char	name[];
4787} __packed;
4788
4789static int vnic_client_data_len(struct ibmvnic_adapter *adapter)
4790{
4791	int len;
4792
4793	/* Calculate the amount of buffer space needed for the
4794	 * vnic client data in the login buffer. There are four entries,
4795	 * OS name, LPAR name, device name, and a null last entry.
4796	 */
4797	len = 4 * sizeof(struct vnic_login_client_data);
4798	len += 6; /* "Linux" plus NULL */
4799	len += strlen(utsname()->nodename) + 1;
4800	len += strlen(adapter->netdev->name) + 1;
4801
4802	return len;
4803}
4804
4805static void vnic_add_client_data(struct ibmvnic_adapter *adapter,
4806				 struct vnic_login_client_data *vlcd)
4807{
4808	const char *os_name = "Linux";
4809	int len;
4810
4811	/* Type 1 - LPAR OS */
4812	vlcd->type = 1;
4813	len = strlen(os_name) + 1;
4814	vlcd->len = cpu_to_be16(len);
4815	strscpy(vlcd->name, os_name, len);
4816	vlcd = (struct vnic_login_client_data *)(vlcd->name + len);
4817
4818	/* Type 2 - LPAR name */
4819	vlcd->type = 2;
4820	len = strlen(utsname()->nodename) + 1;
4821	vlcd->len = cpu_to_be16(len);
4822	strscpy(vlcd->name, utsname()->nodename, len);
4823	vlcd = (struct vnic_login_client_data *)(vlcd->name + len);
4824
4825	/* Type 3 - device name */
4826	vlcd->type = 3;
4827	len = strlen(adapter->netdev->name) + 1;
4828	vlcd->len = cpu_to_be16(len);
4829	strscpy(vlcd->name, adapter->netdev->name, len);
4830}
4831
4832static int send_login(struct ibmvnic_adapter *adapter)
4833{
4834	struct ibmvnic_login_rsp_buffer *login_rsp_buffer;
4835	struct ibmvnic_login_buffer *login_buffer;
4836	struct device *dev = &adapter->vdev->dev;
4837	struct vnic_login_client_data *vlcd;
4838	dma_addr_t rsp_buffer_token;
4839	dma_addr_t buffer_token;
4840	size_t rsp_buffer_size;
4841	union ibmvnic_crq crq;
4842	int client_data_len;
4843	size_t buffer_size;
4844	__be64 *tx_list_p;
4845	__be64 *rx_list_p;
4846	int rc;
4847	int i;
4848
4849	if (!adapter->tx_scrq || !adapter->rx_scrq) {
4850		netdev_err(adapter->netdev,
4851			   "RX or TX queues are not allocated, device login failed\n");
4852		return -ENOMEM;
4853	}
4854
4855	release_login_buffer(adapter);
4856	release_login_rsp_buffer(adapter);
4857
4858	client_data_len = vnic_client_data_len(adapter);
4859
4860	buffer_size =
4861	    sizeof(struct ibmvnic_login_buffer) +
4862	    sizeof(u64) * (adapter->req_tx_queues + adapter->req_rx_queues) +
4863	    client_data_len;
4864
4865	login_buffer = kzalloc(buffer_size, GFP_ATOMIC);
4866	if (!login_buffer)
4867		goto buf_alloc_failed;
4868
4869	buffer_token = dma_map_single(dev, login_buffer, buffer_size,
4870				      DMA_TO_DEVICE);
4871	if (dma_mapping_error(dev, buffer_token)) {
4872		dev_err(dev, "Couldn't map login buffer\n");
4873		goto buf_map_failed;
4874	}
4875
4876	rsp_buffer_size = sizeof(struct ibmvnic_login_rsp_buffer) +
4877			  sizeof(u64) * adapter->req_tx_queues +
4878			  sizeof(u64) * adapter->req_rx_queues +
4879			  sizeof(u64) * adapter->req_rx_queues +
4880			  sizeof(u8) * IBMVNIC_TX_DESC_VERSIONS;
4881
4882	login_rsp_buffer = kmalloc(rsp_buffer_size, GFP_ATOMIC);
4883	if (!login_rsp_buffer)
4884		goto buf_rsp_alloc_failed;
4885
4886	rsp_buffer_token = dma_map_single(dev, login_rsp_buffer,
4887					  rsp_buffer_size, DMA_FROM_DEVICE);
4888	if (dma_mapping_error(dev, rsp_buffer_token)) {
4889		dev_err(dev, "Couldn't map login rsp buffer\n");
4890		goto buf_rsp_map_failed;
4891	}
4892
4893	adapter->login_buf = login_buffer;
4894	adapter->login_buf_token = buffer_token;
4895	adapter->login_buf_sz = buffer_size;
4896	adapter->login_rsp_buf = login_rsp_buffer;
4897	adapter->login_rsp_buf_token = rsp_buffer_token;
4898	adapter->login_rsp_buf_sz = rsp_buffer_size;
4899
4900	login_buffer->len = cpu_to_be32(buffer_size);
4901	login_buffer->version = cpu_to_be32(INITIAL_VERSION_LB);
4902	login_buffer->num_txcomp_subcrqs = cpu_to_be32(adapter->req_tx_queues);
4903	login_buffer->off_txcomp_subcrqs =
4904	    cpu_to_be32(sizeof(struct ibmvnic_login_buffer));
4905	login_buffer->num_rxcomp_subcrqs = cpu_to_be32(adapter->req_rx_queues);
4906	login_buffer->off_rxcomp_subcrqs =
4907	    cpu_to_be32(sizeof(struct ibmvnic_login_buffer) +
4908			sizeof(u64) * adapter->req_tx_queues);
4909	login_buffer->login_rsp_ioba = cpu_to_be32(rsp_buffer_token);
4910	login_buffer->login_rsp_len = cpu_to_be32(rsp_buffer_size);
4911
4912	tx_list_p = (__be64 *)((char *)login_buffer +
4913				      sizeof(struct ibmvnic_login_buffer));
4914	rx_list_p = (__be64 *)((char *)login_buffer +
4915				      sizeof(struct ibmvnic_login_buffer) +
4916				      sizeof(u64) * adapter->req_tx_queues);
4917
4918	for (i = 0; i < adapter->req_tx_queues; i++) {
4919		if (adapter->tx_scrq[i]) {
4920			tx_list_p[i] =
4921				cpu_to_be64(adapter->tx_scrq[i]->crq_num);
4922		}
4923	}
4924
4925	for (i = 0; i < adapter->req_rx_queues; i++) {
4926		if (adapter->rx_scrq[i]) {
4927			rx_list_p[i] =
4928				cpu_to_be64(adapter->rx_scrq[i]->crq_num);
4929		}
4930	}
4931
4932	/* Insert vNIC login client data */
4933	vlcd = (struct vnic_login_client_data *)
4934		((char *)rx_list_p + (sizeof(u64) * adapter->req_rx_queues));
4935	login_buffer->client_data_offset =
4936			cpu_to_be32((char *)vlcd - (char *)login_buffer);
4937	login_buffer->client_data_len = cpu_to_be32(client_data_len);
4938
4939	vnic_add_client_data(adapter, vlcd);
4940
4941	netdev_dbg(adapter->netdev, "Login Buffer:\n");
4942	for (i = 0; i < (adapter->login_buf_sz - 1) / 8 + 1; i++) {
4943		netdev_dbg(adapter->netdev, "%016lx\n",
4944			   ((unsigned long *)(adapter->login_buf))[i]);
4945	}
4946
4947	memset(&crq, 0, sizeof(crq));
4948	crq.login.first = IBMVNIC_CRQ_CMD;
4949	crq.login.cmd = LOGIN;
4950	crq.login.ioba = cpu_to_be32(buffer_token);
4951	crq.login.len = cpu_to_be32(buffer_size);
4952
4953	adapter->login_pending = true;
4954	rc = ibmvnic_send_crq(adapter, &crq);
4955	if (rc) {
4956		adapter->login_pending = false;
4957		netdev_err(adapter->netdev, "Failed to send login, rc=%d\n", rc);
4958		goto buf_send_failed;
4959	}
4960
4961	return 0;
4962
4963buf_send_failed:
4964	dma_unmap_single(dev, rsp_buffer_token, rsp_buffer_size,
4965			 DMA_FROM_DEVICE);
4966buf_rsp_map_failed:
4967	kfree(login_rsp_buffer);
4968	adapter->login_rsp_buf = NULL;
4969buf_rsp_alloc_failed:
4970	dma_unmap_single(dev, buffer_token, buffer_size, DMA_TO_DEVICE);
4971buf_map_failed:
4972	kfree(login_buffer);
4973	adapter->login_buf = NULL;
4974buf_alloc_failed:
4975	return -ENOMEM;
4976}
4977
4978static int send_request_map(struct ibmvnic_adapter *adapter, dma_addr_t addr,
4979			    u32 len, u8 map_id)
4980{
4981	union ibmvnic_crq crq;
4982
4983	memset(&crq, 0, sizeof(crq));
4984	crq.request_map.first = IBMVNIC_CRQ_CMD;
4985	crq.request_map.cmd = REQUEST_MAP;
4986	crq.request_map.map_id = map_id;
4987	crq.request_map.ioba = cpu_to_be32(addr);
4988	crq.request_map.len = cpu_to_be32(len);
4989	return ibmvnic_send_crq(adapter, &crq);
4990}
4991
4992static int send_request_unmap(struct ibmvnic_adapter *adapter, u8 map_id)
4993{
4994	union ibmvnic_crq crq;
4995
4996	memset(&crq, 0, sizeof(crq));
4997	crq.request_unmap.first = IBMVNIC_CRQ_CMD;
4998	crq.request_unmap.cmd = REQUEST_UNMAP;
4999	crq.request_unmap.map_id = map_id;
5000	return ibmvnic_send_crq(adapter, &crq);
5001}
5002
5003static void send_query_map(struct ibmvnic_adapter *adapter)
5004{
5005	union ibmvnic_crq crq;
5006
5007	memset(&crq, 0, sizeof(crq));
5008	crq.query_map.first = IBMVNIC_CRQ_CMD;
5009	crq.query_map.cmd = QUERY_MAP;
5010	ibmvnic_send_crq(adapter, &crq);
5011}
5012
5013/* Send a series of CRQs requesting various capabilities of the VNIC server */
5014static void send_query_cap(struct ibmvnic_adapter *adapter)
5015{
5016	union ibmvnic_crq crq;
5017	int cap_reqs;
5018
5019	/* We send out 25 QUERY_CAPABILITY CRQs below.  Initialize this count
5020	 * upfront. When the tasklet receives a response to all of these, it
5021	 * can send out the next protocol messaage (REQUEST_CAPABILITY).
5022	 */
5023	cap_reqs = 25;
5024
5025	atomic_set(&adapter->running_cap_crqs, cap_reqs);
5026
5027	memset(&crq, 0, sizeof(crq));
5028	crq.query_capability.first = IBMVNIC_CRQ_CMD;
5029	crq.query_capability.cmd = QUERY_CAPABILITY;
5030
5031	crq.query_capability.capability = cpu_to_be16(MIN_TX_QUEUES);
5032	ibmvnic_send_crq(adapter, &crq);
5033	cap_reqs--;
5034
5035	crq.query_capability.capability = cpu_to_be16(MIN_RX_QUEUES);
5036	ibmvnic_send_crq(adapter, &crq);
5037	cap_reqs--;
5038
5039	crq.query_capability.capability = cpu_to_be16(MIN_RX_ADD_QUEUES);
5040	ibmvnic_send_crq(adapter, &crq);
5041	cap_reqs--;
5042
5043	crq.query_capability.capability = cpu_to_be16(MAX_TX_QUEUES);
5044	ibmvnic_send_crq(adapter, &crq);
5045	cap_reqs--;
5046
5047	crq.query_capability.capability = cpu_to_be16(MAX_RX_QUEUES);
5048	ibmvnic_send_crq(adapter, &crq);
5049	cap_reqs--;
5050
5051	crq.query_capability.capability = cpu_to_be16(MAX_RX_ADD_QUEUES);
5052	ibmvnic_send_crq(adapter, &crq);
5053	cap_reqs--;
5054
5055	crq.query_capability.capability =
5056	    cpu_to_be16(MIN_TX_ENTRIES_PER_SUBCRQ);
5057	ibmvnic_send_crq(adapter, &crq);
5058	cap_reqs--;
5059
5060	crq.query_capability.capability =
5061	    cpu_to_be16(MIN_RX_ADD_ENTRIES_PER_SUBCRQ);
5062	ibmvnic_send_crq(adapter, &crq);
5063	cap_reqs--;
5064
5065	crq.query_capability.capability =
5066	    cpu_to_be16(MAX_TX_ENTRIES_PER_SUBCRQ);
5067	ibmvnic_send_crq(adapter, &crq);
5068	cap_reqs--;
5069
5070	crq.query_capability.capability =
5071	    cpu_to_be16(MAX_RX_ADD_ENTRIES_PER_SUBCRQ);
5072	ibmvnic_send_crq(adapter, &crq);
5073	cap_reqs--;
5074
5075	crq.query_capability.capability = cpu_to_be16(TCP_IP_OFFLOAD);
5076	ibmvnic_send_crq(adapter, &crq);
5077	cap_reqs--;
5078
5079	crq.query_capability.capability = cpu_to_be16(PROMISC_SUPPORTED);
5080	ibmvnic_send_crq(adapter, &crq);
5081	cap_reqs--;
5082
5083	crq.query_capability.capability = cpu_to_be16(MIN_MTU);
5084	ibmvnic_send_crq(adapter, &crq);
5085	cap_reqs--;
5086
5087	crq.query_capability.capability = cpu_to_be16(MAX_MTU);
5088	ibmvnic_send_crq(adapter, &crq);
5089	cap_reqs--;
5090
5091	crq.query_capability.capability = cpu_to_be16(MAX_MULTICAST_FILTERS);
5092	ibmvnic_send_crq(adapter, &crq);
5093	cap_reqs--;
5094
5095	crq.query_capability.capability = cpu_to_be16(VLAN_HEADER_INSERTION);
5096	ibmvnic_send_crq(adapter, &crq);
5097	cap_reqs--;
5098
5099	crq.query_capability.capability = cpu_to_be16(RX_VLAN_HEADER_INSERTION);
5100	ibmvnic_send_crq(adapter, &crq);
5101	cap_reqs--;
5102
5103	crq.query_capability.capability = cpu_to_be16(MAX_TX_SG_ENTRIES);
5104	ibmvnic_send_crq(adapter, &crq);
5105	cap_reqs--;
5106
5107	crq.query_capability.capability = cpu_to_be16(RX_SG_SUPPORTED);
5108	ibmvnic_send_crq(adapter, &crq);
5109	cap_reqs--;
5110
5111	crq.query_capability.capability = cpu_to_be16(OPT_TX_COMP_SUB_QUEUES);
5112	ibmvnic_send_crq(adapter, &crq);
5113	cap_reqs--;
5114
5115	crq.query_capability.capability = cpu_to_be16(OPT_RX_COMP_QUEUES);
5116	ibmvnic_send_crq(adapter, &crq);
5117	cap_reqs--;
5118
5119	crq.query_capability.capability =
5120			cpu_to_be16(OPT_RX_BUFADD_Q_PER_RX_COMP_Q);
5121	ibmvnic_send_crq(adapter, &crq);
5122	cap_reqs--;
5123
5124	crq.query_capability.capability =
5125			cpu_to_be16(OPT_TX_ENTRIES_PER_SUBCRQ);
5126	ibmvnic_send_crq(adapter, &crq);
5127	cap_reqs--;
5128
5129	crq.query_capability.capability =
5130			cpu_to_be16(OPT_RXBA_ENTRIES_PER_SUBCRQ);
5131	ibmvnic_send_crq(adapter, &crq);
5132	cap_reqs--;
5133
5134	crq.query_capability.capability = cpu_to_be16(TX_RX_DESC_REQ);
5135
5136	ibmvnic_send_crq(adapter, &crq);
5137	cap_reqs--;
5138
5139	/* Keep at end to catch any discrepancy between expected and actual
5140	 * CRQs sent.
5141	 */
5142	WARN_ON(cap_reqs != 0);
5143}
5144
5145static void send_query_ip_offload(struct ibmvnic_adapter *adapter)
5146{
5147	int buf_sz = sizeof(struct ibmvnic_query_ip_offload_buffer);
5148	struct device *dev = &adapter->vdev->dev;
5149	union ibmvnic_crq crq;
5150
5151	adapter->ip_offload_tok =
5152		dma_map_single(dev,
5153			       &adapter->ip_offload_buf,
5154			       buf_sz,
5155			       DMA_FROM_DEVICE);
5156
5157	if (dma_mapping_error(dev, adapter->ip_offload_tok)) {
5158		if (!firmware_has_feature(FW_FEATURE_CMO))
5159			dev_err(dev, "Couldn't map offload buffer\n");
5160		return;
5161	}
5162
5163	memset(&crq, 0, sizeof(crq));
5164	crq.query_ip_offload.first = IBMVNIC_CRQ_CMD;
5165	crq.query_ip_offload.cmd = QUERY_IP_OFFLOAD;
5166	crq.query_ip_offload.len = cpu_to_be32(buf_sz);
5167	crq.query_ip_offload.ioba =
5168	    cpu_to_be32(adapter->ip_offload_tok);
5169
5170	ibmvnic_send_crq(adapter, &crq);
5171}
5172
5173static void send_control_ip_offload(struct ibmvnic_adapter *adapter)
5174{
5175	struct ibmvnic_control_ip_offload_buffer *ctrl_buf = &adapter->ip_offload_ctrl;
5176	struct ibmvnic_query_ip_offload_buffer *buf = &adapter->ip_offload_buf;
5177	struct device *dev = &adapter->vdev->dev;
5178	netdev_features_t old_hw_features = 0;
5179	union ibmvnic_crq crq;
5180
5181	adapter->ip_offload_ctrl_tok =
5182		dma_map_single(dev,
5183			       ctrl_buf,
5184			       sizeof(adapter->ip_offload_ctrl),
5185			       DMA_TO_DEVICE);
5186
5187	if (dma_mapping_error(dev, adapter->ip_offload_ctrl_tok)) {
5188		dev_err(dev, "Couldn't map ip offload control buffer\n");
5189		return;
5190	}
5191
5192	ctrl_buf->len = cpu_to_be32(sizeof(adapter->ip_offload_ctrl));
5193	ctrl_buf->version = cpu_to_be32(INITIAL_VERSION_IOB);
5194	ctrl_buf->ipv4_chksum = buf->ipv4_chksum;
5195	ctrl_buf->ipv6_chksum = buf->ipv6_chksum;
5196	ctrl_buf->tcp_ipv4_chksum = buf->tcp_ipv4_chksum;
5197	ctrl_buf->udp_ipv4_chksum = buf->udp_ipv4_chksum;
5198	ctrl_buf->tcp_ipv6_chksum = buf->tcp_ipv6_chksum;
5199	ctrl_buf->udp_ipv6_chksum = buf->udp_ipv6_chksum;
5200	ctrl_buf->large_tx_ipv4 = buf->large_tx_ipv4;
5201	ctrl_buf->large_tx_ipv6 = buf->large_tx_ipv6;
5202
5203	/* large_rx disabled for now, additional features needed */
5204	ctrl_buf->large_rx_ipv4 = 0;
5205	ctrl_buf->large_rx_ipv6 = 0;
5206
5207	if (adapter->state != VNIC_PROBING) {
5208		old_hw_features = adapter->netdev->hw_features;
5209		adapter->netdev->hw_features = 0;
5210	}
5211
5212	adapter->netdev->hw_features = NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO;
5213
5214	if (buf->tcp_ipv4_chksum || buf->udp_ipv4_chksum)
5215		adapter->netdev->hw_features |= NETIF_F_IP_CSUM;
5216
5217	if (buf->tcp_ipv6_chksum || buf->udp_ipv6_chksum)
5218		adapter->netdev->hw_features |= NETIF_F_IPV6_CSUM;
5219
5220	if ((adapter->netdev->features &
5221	    (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)))
5222		adapter->netdev->hw_features |= NETIF_F_RXCSUM;
5223
5224	if (buf->large_tx_ipv4)
5225		adapter->netdev->hw_features |= NETIF_F_TSO;
5226	if (buf->large_tx_ipv6)
5227		adapter->netdev->hw_features |= NETIF_F_TSO6;
5228
5229	if (adapter->state == VNIC_PROBING) {
5230		adapter->netdev->features |= adapter->netdev->hw_features;
5231	} else if (old_hw_features != adapter->netdev->hw_features) {
5232		netdev_features_t tmp = 0;
5233
5234		/* disable features no longer supported */
5235		adapter->netdev->features &= adapter->netdev->hw_features;
5236		/* turn on features now supported if previously enabled */
5237		tmp = (old_hw_features ^ adapter->netdev->hw_features) &
5238			adapter->netdev->hw_features;
5239		adapter->netdev->features |=
5240				tmp & adapter->netdev->wanted_features;
5241	}
5242
5243	memset(&crq, 0, sizeof(crq));
5244	crq.control_ip_offload.first = IBMVNIC_CRQ_CMD;
5245	crq.control_ip_offload.cmd = CONTROL_IP_OFFLOAD;
5246	crq.control_ip_offload.len =
5247	    cpu_to_be32(sizeof(adapter->ip_offload_ctrl));
5248	crq.control_ip_offload.ioba = cpu_to_be32(adapter->ip_offload_ctrl_tok);
5249	ibmvnic_send_crq(adapter, &crq);
5250}
5251
5252static void handle_vpd_size_rsp(union ibmvnic_crq *crq,
5253				struct ibmvnic_adapter *adapter)
5254{
5255	struct device *dev = &adapter->vdev->dev;
5256
5257	if (crq->get_vpd_size_rsp.rc.code) {
5258		dev_err(dev, "Error retrieving VPD size, rc=%x\n",
5259			crq->get_vpd_size_rsp.rc.code);
5260		complete(&adapter->fw_done);
5261		return;
5262	}
5263
5264	adapter->vpd->len = be64_to_cpu(crq->get_vpd_size_rsp.len);
5265	complete(&adapter->fw_done);
5266}
5267
5268static void handle_vpd_rsp(union ibmvnic_crq *crq,
5269			   struct ibmvnic_adapter *adapter)
5270{
5271	struct device *dev = &adapter->vdev->dev;
5272	unsigned char *substr = NULL;
5273	u8 fw_level_len = 0;
5274
5275	memset(adapter->fw_version, 0, 32);
5276
5277	dma_unmap_single(dev, adapter->vpd->dma_addr, adapter->vpd->len,
5278			 DMA_FROM_DEVICE);
5279
5280	if (crq->get_vpd_rsp.rc.code) {
5281		dev_err(dev, "Error retrieving VPD from device, rc=%x\n",
5282			crq->get_vpd_rsp.rc.code);
5283		goto complete;
5284	}
5285
5286	/* get the position of the firmware version info
5287	 * located after the ASCII 'RM' substring in the buffer
5288	 */
5289	substr = strnstr(adapter->vpd->buff, "RM", adapter->vpd->len);
5290	if (!substr) {
5291		dev_info(dev, "Warning - No FW level has been provided in the VPD buffer by the VIOS Server\n");
5292		goto complete;
5293	}
5294
5295	/* get length of firmware level ASCII substring */
5296	if ((substr + 2) < (adapter->vpd->buff + adapter->vpd->len)) {
5297		fw_level_len = *(substr + 2);
5298	} else {
5299		dev_info(dev, "Length of FW substr extrapolated VDP buff\n");
5300		goto complete;
5301	}
5302
5303	/* copy firmware version string from vpd into adapter */
5304	if ((substr + 3 + fw_level_len) <
5305	    (adapter->vpd->buff + adapter->vpd->len)) {
5306		strscpy(adapter->fw_version, substr + 3,
5307			sizeof(adapter->fw_version));
5308	} else {
5309		dev_info(dev, "FW substr extrapolated VPD buff\n");
5310	}
5311
5312complete:
5313	if (adapter->fw_version[0] == '\0')
5314		strscpy((char *)adapter->fw_version, "N/A", sizeof(adapter->fw_version));
5315	complete(&adapter->fw_done);
5316}
5317
5318static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter)
5319{
5320	struct device *dev = &adapter->vdev->dev;
5321	struct ibmvnic_query_ip_offload_buffer *buf = &adapter->ip_offload_buf;
5322	int i;
5323
5324	dma_unmap_single(dev, adapter->ip_offload_tok,
5325			 sizeof(adapter->ip_offload_buf), DMA_FROM_DEVICE);
5326
5327	netdev_dbg(adapter->netdev, "Query IP Offload Buffer:\n");
5328	for (i = 0; i < (sizeof(adapter->ip_offload_buf) - 1) / 8 + 1; i++)
5329		netdev_dbg(adapter->netdev, "%016lx\n",
5330			   ((unsigned long *)(buf))[i]);
5331
5332	netdev_dbg(adapter->netdev, "ipv4_chksum = %d\n", buf->ipv4_chksum);
5333	netdev_dbg(adapter->netdev, "ipv6_chksum = %d\n", buf->ipv6_chksum);
5334	netdev_dbg(adapter->netdev, "tcp_ipv4_chksum = %d\n",
5335		   buf->tcp_ipv4_chksum);
5336	netdev_dbg(adapter->netdev, "tcp_ipv6_chksum = %d\n",
5337		   buf->tcp_ipv6_chksum);
5338	netdev_dbg(adapter->netdev, "udp_ipv4_chksum = %d\n",
5339		   buf->udp_ipv4_chksum);
5340	netdev_dbg(adapter->netdev, "udp_ipv6_chksum = %d\n",
5341		   buf->udp_ipv6_chksum);
5342	netdev_dbg(adapter->netdev, "large_tx_ipv4 = %d\n",
5343		   buf->large_tx_ipv4);
5344	netdev_dbg(adapter->netdev, "large_tx_ipv6 = %d\n",
5345		   buf->large_tx_ipv6);
5346	netdev_dbg(adapter->netdev, "large_rx_ipv4 = %d\n",
5347		   buf->large_rx_ipv4);
5348	netdev_dbg(adapter->netdev, "large_rx_ipv6 = %d\n",
5349		   buf->large_rx_ipv6);
5350	netdev_dbg(adapter->netdev, "max_ipv4_hdr_sz = %d\n",
5351		   buf->max_ipv4_header_size);
5352	netdev_dbg(adapter->netdev, "max_ipv6_hdr_sz = %d\n",
5353		   buf->max_ipv6_header_size);
5354	netdev_dbg(adapter->netdev, "max_tcp_hdr_size = %d\n",
5355		   buf->max_tcp_header_size);
5356	netdev_dbg(adapter->netdev, "max_udp_hdr_size = %d\n",
5357		   buf->max_udp_header_size);
5358	netdev_dbg(adapter->netdev, "max_large_tx_size = %d\n",
5359		   buf->max_large_tx_size);
5360	netdev_dbg(adapter->netdev, "max_large_rx_size = %d\n",
5361		   buf->max_large_rx_size);
5362	netdev_dbg(adapter->netdev, "ipv6_ext_hdr = %d\n",
5363		   buf->ipv6_extension_header);
5364	netdev_dbg(adapter->netdev, "tcp_pseudosum_req = %d\n",
5365		   buf->tcp_pseudosum_req);
5366	netdev_dbg(adapter->netdev, "num_ipv6_ext_hd = %d\n",
5367		   buf->num_ipv6_ext_headers);
5368	netdev_dbg(adapter->netdev, "off_ipv6_ext_hd = %d\n",
5369		   buf->off_ipv6_ext_headers);
5370
5371	send_control_ip_offload(adapter);
5372}
5373
5374static const char *ibmvnic_fw_err_cause(u16 cause)
5375{
5376	switch (cause) {
5377	case ADAPTER_PROBLEM:
5378		return "adapter problem";
5379	case BUS_PROBLEM:
5380		return "bus problem";
5381	case FW_PROBLEM:
5382		return "firmware problem";
5383	case DD_PROBLEM:
5384		return "device driver problem";
5385	case EEH_RECOVERY:
5386		return "EEH recovery";
5387	case FW_UPDATED:
5388		return "firmware updated";
5389	case LOW_MEMORY:
5390		return "low Memory";
5391	default:
5392		return "unknown";
5393	}
5394}
5395
5396static void handle_error_indication(union ibmvnic_crq *crq,
5397				    struct ibmvnic_adapter *adapter)
5398{
5399	struct device *dev = &adapter->vdev->dev;
5400	u16 cause;
5401
5402	cause = be16_to_cpu(crq->error_indication.error_cause);
5403
5404	dev_warn_ratelimited(dev,
5405			     "Firmware reports %serror, cause: %s. Starting recovery...\n",
5406			     crq->error_indication.flags
5407				& IBMVNIC_FATAL_ERROR ? "FATAL " : "",
5408			     ibmvnic_fw_err_cause(cause));
5409
5410	if (crq->error_indication.flags & IBMVNIC_FATAL_ERROR)
5411		ibmvnic_reset(adapter, VNIC_RESET_FATAL);
5412	else
5413		ibmvnic_reset(adapter, VNIC_RESET_NON_FATAL);
5414}
5415
5416static int handle_change_mac_rsp(union ibmvnic_crq *crq,
5417				 struct ibmvnic_adapter *adapter)
5418{
5419	struct net_device *netdev = adapter->netdev;
5420	struct device *dev = &adapter->vdev->dev;
5421	long rc;
5422
5423	rc = crq->change_mac_addr_rsp.rc.code;
5424	if (rc) {
5425		dev_err(dev, "Error %ld in CHANGE_MAC_ADDR_RSP\n", rc);
5426		goto out;
5427	}
5428	/* crq->change_mac_addr.mac_addr is the requested one
5429	 * crq->change_mac_addr_rsp.mac_addr is the returned valid one.
5430	 */
5431	eth_hw_addr_set(netdev, &crq->change_mac_addr_rsp.mac_addr[0]);
5432	ether_addr_copy(adapter->mac_addr,
5433			&crq->change_mac_addr_rsp.mac_addr[0]);
5434out:
5435	complete(&adapter->fw_done);
5436	return rc;
5437}
5438
5439static void handle_request_cap_rsp(union ibmvnic_crq *crq,
5440				   struct ibmvnic_adapter *adapter)
5441{
5442	struct device *dev = &adapter->vdev->dev;
5443	u64 *req_value;
5444	char *name;
5445
5446	atomic_dec(&adapter->running_cap_crqs);
5447	netdev_dbg(adapter->netdev, "Outstanding request-caps: %d\n",
5448		   atomic_read(&adapter->running_cap_crqs));
5449	switch (be16_to_cpu(crq->request_capability_rsp.capability)) {
5450	case REQ_TX_QUEUES:
5451		req_value = &adapter->req_tx_queues;
5452		name = "tx";
5453		break;
5454	case REQ_RX_QUEUES:
5455		req_value = &adapter->req_rx_queues;
5456		name = "rx";
5457		break;
5458	case REQ_RX_ADD_QUEUES:
5459		req_value = &adapter->req_rx_add_queues;
5460		name = "rx_add";
5461		break;
5462	case REQ_TX_ENTRIES_PER_SUBCRQ:
5463		req_value = &adapter->req_tx_entries_per_subcrq;
5464		name = "tx_entries_per_subcrq";
5465		break;
5466	case REQ_RX_ADD_ENTRIES_PER_SUBCRQ:
5467		req_value = &adapter->req_rx_add_entries_per_subcrq;
5468		name = "rx_add_entries_per_subcrq";
5469		break;
5470	case REQ_MTU:
5471		req_value = &adapter->req_mtu;
5472		name = "mtu";
5473		break;
5474	case PROMISC_REQUESTED:
5475		req_value = &adapter->promisc;
5476		name = "promisc";
5477		break;
5478	default:
5479		dev_err(dev, "Got invalid cap request rsp %d\n",
5480			crq->request_capability.capability);
5481		return;
5482	}
5483
5484	switch (crq->request_capability_rsp.rc.code) {
5485	case SUCCESS:
5486		break;
5487	case PARTIALSUCCESS:
5488		dev_info(dev, "req=%lld, rsp=%ld in %s queue, retrying.\n",
5489			 *req_value,
5490			 (long)be64_to_cpu(crq->request_capability_rsp.number),
5491			 name);
5492
5493		if (be16_to_cpu(crq->request_capability_rsp.capability) ==
5494		    REQ_MTU) {
5495			pr_err("mtu of %llu is not supported. Reverting.\n",
5496			       *req_value);
5497			*req_value = adapter->fallback.mtu;
5498		} else {
5499			*req_value =
5500				be64_to_cpu(crq->request_capability_rsp.number);
5501		}
5502
5503		send_request_cap(adapter, 1);
5504		return;
5505	default:
5506		dev_err(dev, "Error %d in request cap rsp\n",
5507			crq->request_capability_rsp.rc.code);
5508		return;
5509	}
5510
5511	/* Done receiving requested capabilities, query IP offload support */
5512	if (atomic_read(&adapter->running_cap_crqs) == 0)
5513		send_query_ip_offload(adapter);
5514}
5515
5516static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
5517			    struct ibmvnic_adapter *adapter)
5518{
5519	struct device *dev = &adapter->vdev->dev;
5520	struct net_device *netdev = adapter->netdev;
5521	struct ibmvnic_login_rsp_buffer *login_rsp = adapter->login_rsp_buf;
5522	struct ibmvnic_login_buffer *login = adapter->login_buf;
5523	u64 *tx_handle_array;
5524	u64 *rx_handle_array;
5525	int num_tx_pools;
5526	int num_rx_pools;
5527	u64 *size_array;
5528	u32 rsp_len;
5529	int i;
5530
5531	/* CHECK: Test/set of login_pending does not need to be atomic
5532	 * because only ibmvnic_tasklet tests/clears this.
5533	 */
5534	if (!adapter->login_pending) {
5535		netdev_warn(netdev, "Ignoring unexpected login response\n");
5536		return 0;
5537	}
5538	adapter->login_pending = false;
5539
5540	/* If the number of queues requested can't be allocated by the
5541	 * server, the login response will return with code 1. We will need
5542	 * to resend the login buffer with fewer queues requested.
5543	 */
5544	if (login_rsp_crq->generic.rc.code) {
5545		adapter->init_done_rc = login_rsp_crq->generic.rc.code;
5546		complete(&adapter->init_done);
5547		return 0;
5548	}
5549
5550	if (adapter->failover_pending) {
5551		adapter->init_done_rc = -EAGAIN;
5552		netdev_dbg(netdev, "Failover pending, ignoring login response\n");
5553		complete(&adapter->init_done);
5554		/* login response buffer will be released on reset */
5555		return 0;
5556	}
5557
5558	netdev->mtu = adapter->req_mtu - ETH_HLEN;
5559
5560	netdev_dbg(adapter->netdev, "Login Response Buffer:\n");
5561	for (i = 0; i < (adapter->login_rsp_buf_sz - 1) / 8 + 1; i++) {
5562		netdev_dbg(adapter->netdev, "%016lx\n",
5563			   ((unsigned long *)(adapter->login_rsp_buf))[i]);
5564	}
5565
5566	/* Sanity checks */
5567	if (login->num_txcomp_subcrqs != login_rsp->num_txsubm_subcrqs ||
5568	    (be32_to_cpu(login->num_rxcomp_subcrqs) *
5569	     adapter->req_rx_add_queues !=
5570	     be32_to_cpu(login_rsp->num_rxadd_subcrqs))) {
5571		dev_err(dev, "FATAL: Inconsistent login and login rsp\n");
5572		ibmvnic_reset(adapter, VNIC_RESET_FATAL);
5573		return -EIO;
5574	}
5575
5576	rsp_len = be32_to_cpu(login_rsp->len);
5577	if (be32_to_cpu(login->login_rsp_len) < rsp_len ||
5578	    rsp_len <= be32_to_cpu(login_rsp->off_txsubm_subcrqs) ||
5579	    rsp_len <= be32_to_cpu(login_rsp->off_rxadd_subcrqs) ||
5580	    rsp_len <= be32_to_cpu(login_rsp->off_rxadd_buff_size) ||
5581	    rsp_len <= be32_to_cpu(login_rsp->off_supp_tx_desc)) {
5582		/* This can happen if a login request times out and there are
5583		 * 2 outstanding login requests sent, the LOGIN_RSP crq
5584		 * could have been for the older login request. So we are
5585		 * parsing the newer response buffer which may be incomplete
5586		 */
5587		dev_err(dev, "FATAL: Login rsp offsets/lengths invalid\n");
5588		ibmvnic_reset(adapter, VNIC_RESET_FATAL);
5589		return -EIO;
5590	}
5591
5592	size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
5593		be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size));
5594	/* variable buffer sizes are not supported, so just read the
5595	 * first entry.
5596	 */
5597	adapter->cur_rx_buf_sz = be64_to_cpu(size_array[0]);
5598
5599	num_tx_pools = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
5600	num_rx_pools = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs);
5601
5602	tx_handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
5603				  be32_to_cpu(adapter->login_rsp_buf->off_txsubm_subcrqs));
5604	rx_handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
5605				  be32_to_cpu(adapter->login_rsp_buf->off_rxadd_subcrqs));
5606
5607	for (i = 0; i < num_tx_pools; i++)
5608		adapter->tx_scrq[i]->handle = tx_handle_array[i];
5609
5610	for (i = 0; i < num_rx_pools; i++)
5611		adapter->rx_scrq[i]->handle = rx_handle_array[i];
5612
5613	adapter->num_active_tx_scrqs = num_tx_pools;
5614	adapter->num_active_rx_scrqs = num_rx_pools;
5615	release_login_rsp_buffer(adapter);
5616	release_login_buffer(adapter);
5617	complete(&adapter->init_done);
5618
5619	return 0;
5620}
5621
5622static void handle_request_unmap_rsp(union ibmvnic_crq *crq,
5623				     struct ibmvnic_adapter *adapter)
5624{
5625	struct device *dev = &adapter->vdev->dev;
5626	long rc;
5627
5628	rc = crq->request_unmap_rsp.rc.code;
5629	if (rc)
5630		dev_err(dev, "Error %ld in REQUEST_UNMAP_RSP\n", rc);
5631}
5632
5633static void handle_query_map_rsp(union ibmvnic_crq *crq,
5634				 struct ibmvnic_adapter *adapter)
5635{
5636	struct net_device *netdev = adapter->netdev;
5637	struct device *dev = &adapter->vdev->dev;
5638	long rc;
5639
5640	rc = crq->query_map_rsp.rc.code;
5641	if (rc) {
5642		dev_err(dev, "Error %ld in QUERY_MAP_RSP\n", rc);
5643		return;
5644	}
5645	netdev_dbg(netdev, "page_size = %d\ntot_pages = %u\nfree_pages = %u\n",
5646		   crq->query_map_rsp.page_size,
5647		   __be32_to_cpu(crq->query_map_rsp.tot_pages),
5648		   __be32_to_cpu(crq->query_map_rsp.free_pages));
5649}
5650
5651static void handle_query_cap_rsp(union ibmvnic_crq *crq,
5652				 struct ibmvnic_adapter *adapter)
5653{
5654	struct net_device *netdev = adapter->netdev;
5655	struct device *dev = &adapter->vdev->dev;
5656	long rc;
5657
5658	atomic_dec(&adapter->running_cap_crqs);
5659	netdev_dbg(netdev, "Outstanding queries: %d\n",
5660		   atomic_read(&adapter->running_cap_crqs));
5661	rc = crq->query_capability.rc.code;
5662	if (rc) {
5663		dev_err(dev, "Error %ld in QUERY_CAP_RSP\n", rc);
5664		goto out;
5665	}
5666
5667	switch (be16_to_cpu(crq->query_capability.capability)) {
5668	case MIN_TX_QUEUES:
5669		adapter->min_tx_queues =
5670		    be64_to_cpu(crq->query_capability.number);
5671		netdev_dbg(netdev, "min_tx_queues = %lld\n",
5672			   adapter->min_tx_queues);
5673		break;
5674	case MIN_RX_QUEUES:
5675		adapter->min_rx_queues =
5676		    be64_to_cpu(crq->query_capability.number);
5677		netdev_dbg(netdev, "min_rx_queues = %lld\n",
5678			   adapter->min_rx_queues);
5679		break;
5680	case MIN_RX_ADD_QUEUES:
5681		adapter->min_rx_add_queues =
5682		    be64_to_cpu(crq->query_capability.number);
5683		netdev_dbg(netdev, "min_rx_add_queues = %lld\n",
5684			   adapter->min_rx_add_queues);
5685		break;
5686	case MAX_TX_QUEUES:
5687		adapter->max_tx_queues =
5688		    be64_to_cpu(crq->query_capability.number);
5689		netdev_dbg(netdev, "max_tx_queues = %lld\n",
5690			   adapter->max_tx_queues);
5691		break;
5692	case MAX_RX_QUEUES:
5693		adapter->max_rx_queues =
5694		    be64_to_cpu(crq->query_capability.number);
5695		netdev_dbg(netdev, "max_rx_queues = %lld\n",
5696			   adapter->max_rx_queues);
5697		break;
5698	case MAX_RX_ADD_QUEUES:
5699		adapter->max_rx_add_queues =
5700		    be64_to_cpu(crq->query_capability.number);
5701		netdev_dbg(netdev, "max_rx_add_queues = %lld\n",
5702			   adapter->max_rx_add_queues);
5703		break;
5704	case MIN_TX_ENTRIES_PER_SUBCRQ:
5705		adapter->min_tx_entries_per_subcrq =
5706		    be64_to_cpu(crq->query_capability.number);
5707		netdev_dbg(netdev, "min_tx_entries_per_subcrq = %lld\n",
5708			   adapter->min_tx_entries_per_subcrq);
5709		break;
5710	case MIN_RX_ADD_ENTRIES_PER_SUBCRQ:
5711		adapter->min_rx_add_entries_per_subcrq =
5712		    be64_to_cpu(crq->query_capability.number);
5713		netdev_dbg(netdev, "min_rx_add_entrs_per_subcrq = %lld\n",
5714			   adapter->min_rx_add_entries_per_subcrq);
5715		break;
5716	case MAX_TX_ENTRIES_PER_SUBCRQ:
5717		adapter->max_tx_entries_per_subcrq =
5718		    be64_to_cpu(crq->query_capability.number);
5719		netdev_dbg(netdev, "max_tx_entries_per_subcrq = %lld\n",
5720			   adapter->max_tx_entries_per_subcrq);
5721		break;
5722	case MAX_RX_ADD_ENTRIES_PER_SUBCRQ:
5723		adapter->max_rx_add_entries_per_subcrq =
5724		    be64_to_cpu(crq->query_capability.number);
5725		netdev_dbg(netdev, "max_rx_add_entrs_per_subcrq = %lld\n",
5726			   adapter->max_rx_add_entries_per_subcrq);
5727		break;
5728	case TCP_IP_OFFLOAD:
5729		adapter->tcp_ip_offload =
5730		    be64_to_cpu(crq->query_capability.number);
5731		netdev_dbg(netdev, "tcp_ip_offload = %lld\n",
5732			   adapter->tcp_ip_offload);
5733		break;
5734	case PROMISC_SUPPORTED:
5735		adapter->promisc_supported =
5736		    be64_to_cpu(crq->query_capability.number);
5737		netdev_dbg(netdev, "promisc_supported = %lld\n",
5738			   adapter->promisc_supported);
5739		break;
5740	case MIN_MTU:
5741		adapter->min_mtu = be64_to_cpu(crq->query_capability.number);
5742		netdev->min_mtu = adapter->min_mtu - ETH_HLEN;
5743		netdev_dbg(netdev, "min_mtu = %lld\n", adapter->min_mtu);
5744		break;
5745	case MAX_MTU:
5746		adapter->max_mtu = be64_to_cpu(crq->query_capability.number);
5747		netdev->max_mtu = adapter->max_mtu - ETH_HLEN;
5748		netdev_dbg(netdev, "max_mtu = %lld\n", adapter->max_mtu);
5749		break;
5750	case MAX_MULTICAST_FILTERS:
5751		adapter->max_multicast_filters =
5752		    be64_to_cpu(crq->query_capability.number);
5753		netdev_dbg(netdev, "max_multicast_filters = %lld\n",
5754			   adapter->max_multicast_filters);
5755		break;
5756	case VLAN_HEADER_INSERTION:
5757		adapter->vlan_header_insertion =
5758		    be64_to_cpu(crq->query_capability.number);
5759		if (adapter->vlan_header_insertion)
5760			netdev->features |= NETIF_F_HW_VLAN_STAG_TX;
5761		netdev_dbg(netdev, "vlan_header_insertion = %lld\n",
5762			   adapter->vlan_header_insertion);
5763		break;
5764	case RX_VLAN_HEADER_INSERTION:
5765		adapter->rx_vlan_header_insertion =
5766		    be64_to_cpu(crq->query_capability.number);
5767		netdev_dbg(netdev, "rx_vlan_header_insertion = %lld\n",
5768			   adapter->rx_vlan_header_insertion);
5769		break;
5770	case MAX_TX_SG_ENTRIES:
5771		adapter->max_tx_sg_entries =
5772		    be64_to_cpu(crq->query_capability.number);
5773		netdev_dbg(netdev, "max_tx_sg_entries = %lld\n",
5774			   adapter->max_tx_sg_entries);
5775		break;
5776	case RX_SG_SUPPORTED:
5777		adapter->rx_sg_supported =
5778		    be64_to_cpu(crq->query_capability.number);
5779		netdev_dbg(netdev, "rx_sg_supported = %lld\n",
5780			   adapter->rx_sg_supported);
5781		break;
5782	case OPT_TX_COMP_SUB_QUEUES:
5783		adapter->opt_tx_comp_sub_queues =
5784		    be64_to_cpu(crq->query_capability.number);
5785		netdev_dbg(netdev, "opt_tx_comp_sub_queues = %lld\n",
5786			   adapter->opt_tx_comp_sub_queues);
5787		break;
5788	case OPT_RX_COMP_QUEUES:
5789		adapter->opt_rx_comp_queues =
5790		    be64_to_cpu(crq->query_capability.number);
5791		netdev_dbg(netdev, "opt_rx_comp_queues = %lld\n",
5792			   adapter->opt_rx_comp_queues);
5793		break;
5794	case OPT_RX_BUFADD_Q_PER_RX_COMP_Q:
5795		adapter->opt_rx_bufadd_q_per_rx_comp_q =
5796		    be64_to_cpu(crq->query_capability.number);
5797		netdev_dbg(netdev, "opt_rx_bufadd_q_per_rx_comp_q = %lld\n",
5798			   adapter->opt_rx_bufadd_q_per_rx_comp_q);
5799		break;
5800	case OPT_TX_ENTRIES_PER_SUBCRQ:
5801		adapter->opt_tx_entries_per_subcrq =
5802		    be64_to_cpu(crq->query_capability.number);
5803		netdev_dbg(netdev, "opt_tx_entries_per_subcrq = %lld\n",
5804			   adapter->opt_tx_entries_per_subcrq);
5805		break;
5806	case OPT_RXBA_ENTRIES_PER_SUBCRQ:
5807		adapter->opt_rxba_entries_per_subcrq =
5808		    be64_to_cpu(crq->query_capability.number);
5809		netdev_dbg(netdev, "opt_rxba_entries_per_subcrq = %lld\n",
5810			   adapter->opt_rxba_entries_per_subcrq);
5811		break;
5812	case TX_RX_DESC_REQ:
5813		adapter->tx_rx_desc_req = crq->query_capability.number;
5814		netdev_dbg(netdev, "tx_rx_desc_req = %llx\n",
5815			   adapter->tx_rx_desc_req);
5816		break;
5817
5818	default:
5819		netdev_err(netdev, "Got invalid cap rsp %d\n",
5820			   crq->query_capability.capability);
5821	}
5822
5823out:
5824	if (atomic_read(&adapter->running_cap_crqs) == 0)
5825		send_request_cap(adapter, 0);
5826}
5827
5828static int send_query_phys_parms(struct ibmvnic_adapter *adapter)
5829{
5830	union ibmvnic_crq crq;
5831	int rc;
5832
5833	memset(&crq, 0, sizeof(crq));
5834	crq.query_phys_parms.first = IBMVNIC_CRQ_CMD;
5835	crq.query_phys_parms.cmd = QUERY_PHYS_PARMS;
5836
5837	mutex_lock(&adapter->fw_lock);
5838	adapter->fw_done_rc = 0;
5839	reinit_completion(&adapter->fw_done);
5840
5841	rc = ibmvnic_send_crq(adapter, &crq);
5842	if (rc) {
5843		mutex_unlock(&adapter->fw_lock);
5844		return rc;
5845	}
5846
5847	rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000);
5848	if (rc) {
5849		mutex_unlock(&adapter->fw_lock);
5850		return rc;
5851	}
5852
5853	mutex_unlock(&adapter->fw_lock);
5854	return adapter->fw_done_rc ? -EIO : 0;
5855}
5856
5857static int handle_query_phys_parms_rsp(union ibmvnic_crq *crq,
5858				       struct ibmvnic_adapter *adapter)
5859{
5860	struct net_device *netdev = adapter->netdev;
5861	int rc;
5862	__be32 rspeed = cpu_to_be32(crq->query_phys_parms_rsp.speed);
5863
5864	rc = crq->query_phys_parms_rsp.rc.code;
5865	if (rc) {
5866		netdev_err(netdev, "Error %d in QUERY_PHYS_PARMS\n", rc);
5867		return rc;
5868	}
5869	switch (rspeed) {
5870	case IBMVNIC_10MBPS:
5871		adapter->speed = SPEED_10;
5872		break;
5873	case IBMVNIC_100MBPS:
5874		adapter->speed = SPEED_100;
5875		break;
5876	case IBMVNIC_1GBPS:
5877		adapter->speed = SPEED_1000;
5878		break;
5879	case IBMVNIC_10GBPS:
5880		adapter->speed = SPEED_10000;
5881		break;
5882	case IBMVNIC_25GBPS:
5883		adapter->speed = SPEED_25000;
5884		break;
5885	case IBMVNIC_40GBPS:
5886		adapter->speed = SPEED_40000;
5887		break;
5888	case IBMVNIC_50GBPS:
5889		adapter->speed = SPEED_50000;
5890		break;
5891	case IBMVNIC_100GBPS:
5892		adapter->speed = SPEED_100000;
5893		break;
5894	case IBMVNIC_200GBPS:
5895		adapter->speed = SPEED_200000;
5896		break;
5897	default:
5898		if (netif_carrier_ok(netdev))
5899			netdev_warn(netdev, "Unknown speed 0x%08x\n", rspeed);
5900		adapter->speed = SPEED_UNKNOWN;
5901	}
5902	if (crq->query_phys_parms_rsp.flags1 & IBMVNIC_FULL_DUPLEX)
5903		adapter->duplex = DUPLEX_FULL;
5904	else if (crq->query_phys_parms_rsp.flags1 & IBMVNIC_HALF_DUPLEX)
5905		adapter->duplex = DUPLEX_HALF;
5906	else
5907		adapter->duplex = DUPLEX_UNKNOWN;
5908
5909	return rc;
5910}
5911
5912static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
5913			       struct ibmvnic_adapter *adapter)
5914{
5915	struct ibmvnic_generic_crq *gen_crq = &crq->generic;
5916	struct net_device *netdev = adapter->netdev;
5917	struct device *dev = &adapter->vdev->dev;
5918	u64 *u64_crq = (u64 *)crq;
5919	long rc;
5920
5921	netdev_dbg(netdev, "Handling CRQ: %016lx %016lx\n",
5922		   (unsigned long)cpu_to_be64(u64_crq[0]),
5923		   (unsigned long)cpu_to_be64(u64_crq[1]));
5924	switch (gen_crq->first) {
5925	case IBMVNIC_CRQ_INIT_RSP:
5926		switch (gen_crq->cmd) {
5927		case IBMVNIC_CRQ_INIT:
5928			dev_info(dev, "Partner initialized\n");
5929			adapter->from_passive_init = true;
5930			/* Discard any stale login responses from prev reset.
5931			 * CHECK: should we clear even on INIT_COMPLETE?
5932			 */
5933			adapter->login_pending = false;
5934
5935			if (adapter->state == VNIC_DOWN)
5936				rc = ibmvnic_reset(adapter, VNIC_RESET_PASSIVE_INIT);
5937			else
5938				rc = ibmvnic_reset(adapter, VNIC_RESET_FAILOVER);
5939
5940			if (rc && rc != -EBUSY) {
5941				/* We were unable to schedule the failover
5942				 * reset either because the adapter was still
5943				 * probing (eg: during kexec) or we could not
5944				 * allocate memory. Clear the failover_pending
5945				 * flag since no one else will. We ignore
5946				 * EBUSY because it means either FAILOVER reset
5947				 * is already scheduled or the adapter is
5948				 * being removed.
5949				 */
5950				netdev_err(netdev,
5951					   "Error %ld scheduling failover reset\n",
5952					   rc);
5953				adapter->failover_pending = false;
5954			}
5955
5956			if (!completion_done(&adapter->init_done)) {
5957				if (!adapter->init_done_rc)
5958					adapter->init_done_rc = -EAGAIN;
5959				complete(&adapter->init_done);
5960			}
5961
5962			break;
5963		case IBMVNIC_CRQ_INIT_COMPLETE:
5964			dev_info(dev, "Partner initialization complete\n");
5965			adapter->crq.active = true;
5966			send_version_xchg(adapter);
5967			break;
5968		default:
5969			dev_err(dev, "Unknown crq cmd: %d\n", gen_crq->cmd);
5970		}
5971		return;
5972	case IBMVNIC_CRQ_XPORT_EVENT:
5973		netif_carrier_off(netdev);
5974		adapter->crq.active = false;
5975		/* terminate any thread waiting for a response
5976		 * from the device
5977		 */
5978		if (!completion_done(&adapter->fw_done)) {
5979			adapter->fw_done_rc = -EIO;
5980			complete(&adapter->fw_done);
5981		}
5982
5983		/* if we got here during crq-init, retry crq-init */
5984		if (!completion_done(&adapter->init_done)) {
5985			adapter->init_done_rc = -EAGAIN;
5986			complete(&adapter->init_done);
5987		}
5988
5989		if (!completion_done(&adapter->stats_done))
5990			complete(&adapter->stats_done);
5991		if (test_bit(0, &adapter->resetting))
5992			adapter->force_reset_recovery = true;
5993		if (gen_crq->cmd == IBMVNIC_PARTITION_MIGRATED) {
5994			dev_info(dev, "Migrated, re-enabling adapter\n");
5995			ibmvnic_reset(adapter, VNIC_RESET_MOBILITY);
5996		} else if (gen_crq->cmd == IBMVNIC_DEVICE_FAILOVER) {
5997			dev_info(dev, "Backing device failover detected\n");
5998			adapter->failover_pending = true;
5999		} else {
6000			/* The adapter lost the connection */
6001			dev_err(dev, "Virtual Adapter failed (rc=%d)\n",
6002				gen_crq->cmd);
6003			ibmvnic_reset(adapter, VNIC_RESET_FATAL);
6004		}
6005		return;
6006	case IBMVNIC_CRQ_CMD_RSP:
6007		break;
6008	default:
6009		dev_err(dev, "Got an invalid msg type 0x%02x\n",
6010			gen_crq->first);
6011		return;
6012	}
6013
6014	switch (gen_crq->cmd) {
6015	case VERSION_EXCHANGE_RSP:
6016		rc = crq->version_exchange_rsp.rc.code;
6017		if (rc) {
6018			dev_err(dev, "Error %ld in VERSION_EXCHG_RSP\n", rc);
6019			break;
6020		}
6021		ibmvnic_version =
6022			    be16_to_cpu(crq->version_exchange_rsp.version);
6023		dev_info(dev, "Partner protocol version is %d\n",
6024			 ibmvnic_version);
6025		send_query_cap(adapter);
6026		break;
6027	case QUERY_CAPABILITY_RSP:
6028		handle_query_cap_rsp(crq, adapter);
6029		break;
6030	case QUERY_MAP_RSP:
6031		handle_query_map_rsp(crq, adapter);
6032		break;
6033	case REQUEST_MAP_RSP:
6034		adapter->fw_done_rc = crq->request_map_rsp.rc.code;
6035		complete(&adapter->fw_done);
6036		break;
6037	case REQUEST_UNMAP_RSP:
6038		handle_request_unmap_rsp(crq, adapter);
6039		break;
6040	case REQUEST_CAPABILITY_RSP:
6041		handle_request_cap_rsp(crq, adapter);
6042		break;
6043	case LOGIN_RSP:
6044		netdev_dbg(netdev, "Got Login Response\n");
6045		handle_login_rsp(crq, adapter);
6046		break;
6047	case LOGICAL_LINK_STATE_RSP:
6048		netdev_dbg(netdev,
6049			   "Got Logical Link State Response, state: %d rc: %d\n",
6050			   crq->logical_link_state_rsp.link_state,
6051			   crq->logical_link_state_rsp.rc.code);
6052		adapter->logical_link_state =
6053		    crq->logical_link_state_rsp.link_state;
6054		adapter->init_done_rc = crq->logical_link_state_rsp.rc.code;
6055		complete(&adapter->init_done);
6056		break;
6057	case LINK_STATE_INDICATION:
6058		netdev_dbg(netdev, "Got Logical Link State Indication\n");
6059		adapter->phys_link_state =
6060		    crq->link_state_indication.phys_link_state;
6061		adapter->logical_link_state =
6062		    crq->link_state_indication.logical_link_state;
6063		if (adapter->phys_link_state && adapter->logical_link_state)
6064			netif_carrier_on(netdev);
6065		else
6066			netif_carrier_off(netdev);
6067		break;
6068	case CHANGE_MAC_ADDR_RSP:
6069		netdev_dbg(netdev, "Got MAC address change Response\n");
6070		adapter->fw_done_rc = handle_change_mac_rsp(crq, adapter);
6071		break;
6072	case ERROR_INDICATION:
6073		netdev_dbg(netdev, "Got Error Indication\n");
6074		handle_error_indication(crq, adapter);
6075		break;
6076	case REQUEST_STATISTICS_RSP:
6077		netdev_dbg(netdev, "Got Statistics Response\n");
6078		complete(&adapter->stats_done);
6079		break;
6080	case QUERY_IP_OFFLOAD_RSP:
6081		netdev_dbg(netdev, "Got Query IP offload Response\n");
6082		handle_query_ip_offload_rsp(adapter);
6083		break;
6084	case MULTICAST_CTRL_RSP:
6085		netdev_dbg(netdev, "Got multicast control Response\n");
6086		break;
6087	case CONTROL_IP_OFFLOAD_RSP:
6088		netdev_dbg(netdev, "Got Control IP offload Response\n");
6089		dma_unmap_single(dev, adapter->ip_offload_ctrl_tok,
6090				 sizeof(adapter->ip_offload_ctrl),
6091				 DMA_TO_DEVICE);
6092		complete(&adapter->init_done);
6093		break;
6094	case COLLECT_FW_TRACE_RSP:
6095		netdev_dbg(netdev, "Got Collect firmware trace Response\n");
6096		complete(&adapter->fw_done);
6097		break;
6098	case GET_VPD_SIZE_RSP:
6099		handle_vpd_size_rsp(crq, adapter);
6100		break;
6101	case GET_VPD_RSP:
6102		handle_vpd_rsp(crq, adapter);
6103		break;
6104	case QUERY_PHYS_PARMS_RSP:
6105		adapter->fw_done_rc = handle_query_phys_parms_rsp(crq, adapter);
6106		complete(&adapter->fw_done);
6107		break;
6108	default:
6109		netdev_err(netdev, "Got an invalid cmd type 0x%02x\n",
6110			   gen_crq->cmd);
6111	}
6112}
6113
6114static irqreturn_t ibmvnic_interrupt(int irq, void *instance)
6115{
6116	struct ibmvnic_adapter *adapter = instance;
6117
6118	tasklet_schedule(&adapter->tasklet);
6119	return IRQ_HANDLED;
6120}
6121
6122static void ibmvnic_tasklet(struct tasklet_struct *t)
6123{
6124	struct ibmvnic_adapter *adapter = from_tasklet(adapter, t, tasklet);
6125	struct ibmvnic_crq_queue *queue = &adapter->crq;
6126	union ibmvnic_crq *crq;
6127	unsigned long flags;
6128
6129	spin_lock_irqsave(&queue->lock, flags);
6130
6131	/* Pull all the valid messages off the CRQ */
6132	while ((crq = ibmvnic_next_crq(adapter)) != NULL) {
6133		/* This barrier makes sure ibmvnic_next_crq()'s
6134		 * crq->generic.first & IBMVNIC_CRQ_CMD_RSP is loaded
6135		 * before ibmvnic_handle_crq()'s
6136		 * switch(gen_crq->first) and switch(gen_crq->cmd).
6137		 */
6138		dma_rmb();
6139		ibmvnic_handle_crq(crq, adapter);
6140		crq->generic.first = 0;
6141	}
6142
6143	spin_unlock_irqrestore(&queue->lock, flags);
6144}
6145
6146static int ibmvnic_reenable_crq_queue(struct ibmvnic_adapter *adapter)
6147{
6148	struct vio_dev *vdev = adapter->vdev;
6149	int rc;
6150
6151	do {
6152		rc = plpar_hcall_norets(H_ENABLE_CRQ, vdev->unit_address);
6153	} while (rc == H_IN_PROGRESS || rc == H_BUSY || H_IS_LONG_BUSY(rc));
6154
6155	if (rc)
6156		dev_err(&vdev->dev, "Error enabling adapter (rc=%d)\n", rc);
6157
6158	return rc;
6159}
6160
6161static int ibmvnic_reset_crq(struct ibmvnic_adapter *adapter)
6162{
6163	struct ibmvnic_crq_queue *crq = &adapter->crq;
6164	struct device *dev = &adapter->vdev->dev;
6165	struct vio_dev *vdev = adapter->vdev;
6166	int rc;
6167
6168	/* Close the CRQ */
6169	do {
6170		rc = plpar_hcall_norets(H_FREE_CRQ, vdev->unit_address);
6171	} while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
6172
6173	/* Clean out the queue */
6174	if (!crq->msgs)
6175		return -EINVAL;
6176
6177	memset(crq->msgs, 0, PAGE_SIZE);
6178	crq->cur = 0;
6179	crq->active = false;
6180
6181	/* And re-open it again */
6182	rc = plpar_hcall_norets(H_REG_CRQ, vdev->unit_address,
6183				crq->msg_token, PAGE_SIZE);
6184
6185	if (rc == H_CLOSED)
6186		/* Adapter is good, but other end is not ready */
6187		dev_warn(dev, "Partner adapter not ready\n");
6188	else if (rc != 0)
6189		dev_warn(dev, "Couldn't register crq (rc=%d)\n", rc);
6190
6191	return rc;
6192}
6193
6194static void release_crq_queue(struct ibmvnic_adapter *adapter)
6195{
6196	struct ibmvnic_crq_queue *crq = &adapter->crq;
6197	struct vio_dev *vdev = adapter->vdev;
6198	long rc;
6199
6200	if (!crq->msgs)
6201		return;
6202
6203	netdev_dbg(adapter->netdev, "Releasing CRQ\n");
6204	free_irq(vdev->irq, adapter);
6205	tasklet_kill(&adapter->tasklet);
6206	do {
6207		rc = plpar_hcall_norets(H_FREE_CRQ, vdev->unit_address);
6208	} while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
6209
6210	dma_unmap_single(&vdev->dev, crq->msg_token, PAGE_SIZE,
6211			 DMA_BIDIRECTIONAL);
6212	free_page((unsigned long)crq->msgs);
6213	crq->msgs = NULL;
6214	crq->active = false;
6215}
6216
6217static int init_crq_queue(struct ibmvnic_adapter *adapter)
6218{
6219	struct ibmvnic_crq_queue *crq = &adapter->crq;
6220	struct device *dev = &adapter->vdev->dev;
6221	struct vio_dev *vdev = adapter->vdev;
6222	int rc, retrc = -ENOMEM;
6223
6224	if (crq->msgs)
6225		return 0;
6226
6227	crq->msgs = (union ibmvnic_crq *)get_zeroed_page(GFP_KERNEL);
6228	/* Should we allocate more than one page? */
6229
6230	if (!crq->msgs)
6231		return -ENOMEM;
6232
6233	crq->size = PAGE_SIZE / sizeof(*crq->msgs);
6234	crq->msg_token = dma_map_single(dev, crq->msgs, PAGE_SIZE,
6235					DMA_BIDIRECTIONAL);
6236	if (dma_mapping_error(dev, crq->msg_token))
6237		goto map_failed;
6238
6239	rc = plpar_hcall_norets(H_REG_CRQ, vdev->unit_address,
6240				crq->msg_token, PAGE_SIZE);
6241
6242	if (rc == H_RESOURCE)
6243		/* maybe kexecing and resource is busy. try a reset */
6244		rc = ibmvnic_reset_crq(adapter);
6245	retrc = rc;
6246
6247	if (rc == H_CLOSED) {
6248		dev_warn(dev, "Partner adapter not ready\n");
6249	} else if (rc) {
6250		dev_warn(dev, "Error %d opening adapter\n", rc);
6251		goto reg_crq_failed;
6252	}
6253
6254	retrc = 0;
6255
6256	tasklet_setup(&adapter->tasklet, (void *)ibmvnic_tasklet);
6257
6258	netdev_dbg(adapter->netdev, "registering irq 0x%x\n", vdev->irq);
6259	snprintf(crq->name, sizeof(crq->name), "ibmvnic-%x",
6260		 adapter->vdev->unit_address);
6261	rc = request_irq(vdev->irq, ibmvnic_interrupt, 0, crq->name, adapter);
6262	if (rc) {
6263		dev_err(dev, "Couldn't register irq 0x%x. rc=%d\n",
6264			vdev->irq, rc);
6265		goto req_irq_failed;
6266	}
6267
6268	rc = vio_enable_interrupts(vdev);
6269	if (rc) {
6270		dev_err(dev, "Error %d enabling interrupts\n", rc);
6271		goto req_irq_failed;
6272	}
6273
6274	crq->cur = 0;
6275	spin_lock_init(&crq->lock);
6276
6277	/* process any CRQs that were queued before we enabled interrupts */
6278	tasklet_schedule(&adapter->tasklet);
6279
6280	return retrc;
6281
6282req_irq_failed:
6283	tasklet_kill(&adapter->tasklet);
6284	do {
6285		rc = plpar_hcall_norets(H_FREE_CRQ, vdev->unit_address);
6286	} while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
6287reg_crq_failed:
6288	dma_unmap_single(dev, crq->msg_token, PAGE_SIZE, DMA_BIDIRECTIONAL);
6289map_failed:
6290	free_page((unsigned long)crq->msgs);
6291	crq->msgs = NULL;
6292	return retrc;
6293}
6294
6295static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset)
6296{
6297	struct device *dev = &adapter->vdev->dev;
6298	unsigned long timeout = msecs_to_jiffies(20000);
6299	u64 old_num_rx_queues = adapter->req_rx_queues;
6300	u64 old_num_tx_queues = adapter->req_tx_queues;
6301	int rc;
6302
6303	adapter->from_passive_init = false;
6304
6305	rc = ibmvnic_send_crq_init(adapter);
6306	if (rc) {
6307		dev_err(dev, "Send crq init failed with error %d\n", rc);
6308		return rc;
6309	}
6310
6311	if (!wait_for_completion_timeout(&adapter->init_done, timeout)) {
6312		dev_err(dev, "Initialization sequence timed out\n");
6313		return -ETIMEDOUT;
6314	}
6315
6316	if (adapter->init_done_rc) {
6317		release_crq_queue(adapter);
6318		dev_err(dev, "CRQ-init failed, %d\n", adapter->init_done_rc);
6319		return adapter->init_done_rc;
6320	}
6321
6322	if (adapter->from_passive_init) {
6323		adapter->state = VNIC_OPEN;
6324		adapter->from_passive_init = false;
6325		dev_err(dev, "CRQ-init failed, passive-init\n");
6326		return -EINVAL;
6327	}
6328
6329	if (reset &&
6330	    test_bit(0, &adapter->resetting) && !adapter->wait_for_reset &&
6331	    adapter->reset_reason != VNIC_RESET_MOBILITY) {
6332		if (adapter->req_rx_queues != old_num_rx_queues ||
6333		    adapter->req_tx_queues != old_num_tx_queues) {
6334			release_sub_crqs(adapter, 0);
6335			rc = init_sub_crqs(adapter);
6336		} else {
6337			/* no need to reinitialize completely, but we do
6338			 * need to clean up transmits that were in flight
6339			 * when we processed the reset.  Failure to do so
6340			 * will confound the upper layer, usually TCP, by
6341			 * creating the illusion of transmits that are
6342			 * awaiting completion.
6343			 */
6344			clean_tx_pools(adapter);
6345
6346			rc = reset_sub_crq_queues(adapter);
6347		}
6348	} else {
6349		rc = init_sub_crqs(adapter);
6350	}
6351
6352	if (rc) {
6353		dev_err(dev, "Initialization of sub crqs failed\n");
6354		release_crq_queue(adapter);
6355		return rc;
6356	}
6357
6358	rc = init_sub_crq_irqs(adapter);
6359	if (rc) {
6360		dev_err(dev, "Failed to initialize sub crq irqs\n");
6361		release_crq_queue(adapter);
6362	}
6363
6364	return rc;
6365}
6366
6367static struct device_attribute dev_attr_failover;
6368
6369static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
6370{
6371	struct ibmvnic_adapter *adapter;
6372	struct net_device *netdev;
6373	unsigned char *mac_addr_p;
6374	unsigned long flags;
6375	bool init_success;
6376	int rc;
6377
6378	dev_dbg(&dev->dev, "entering ibmvnic_probe for UA 0x%x\n",
6379		dev->unit_address);
6380
6381	mac_addr_p = (unsigned char *)vio_get_attribute(dev,
6382							VETH_MAC_ADDR, NULL);
6383	if (!mac_addr_p) {
6384		dev_err(&dev->dev,
6385			"(%s:%3.3d) ERROR: Can't find MAC_ADDR attribute\n",
6386			__FILE__, __LINE__);
6387		return 0;
6388	}
6389
6390	netdev = alloc_etherdev_mq(sizeof(struct ibmvnic_adapter),
6391				   IBMVNIC_MAX_QUEUES);
6392	if (!netdev)
6393		return -ENOMEM;
6394
6395	adapter = netdev_priv(netdev);
6396	adapter->state = VNIC_PROBING;
6397	dev_set_drvdata(&dev->dev, netdev);
6398	adapter->vdev = dev;
6399	adapter->netdev = netdev;
6400	adapter->login_pending = false;
6401	memset(&adapter->map_ids, 0, sizeof(adapter->map_ids));
6402	/* map_ids start at 1, so ensure map_id 0 is always "in-use" */
6403	bitmap_set(adapter->map_ids, 0, 1);
6404
6405	ether_addr_copy(adapter->mac_addr, mac_addr_p);
6406	eth_hw_addr_set(netdev, adapter->mac_addr);
6407	netdev->irq = dev->irq;
6408	netdev->netdev_ops = &ibmvnic_netdev_ops;
6409	netdev->ethtool_ops = &ibmvnic_ethtool_ops;
6410	SET_NETDEV_DEV(netdev, &dev->dev);
6411
6412	INIT_WORK(&adapter->ibmvnic_reset, __ibmvnic_reset);
6413	INIT_DELAYED_WORK(&adapter->ibmvnic_delayed_reset,
6414			  __ibmvnic_delayed_reset);
6415	INIT_LIST_HEAD(&adapter->rwi_list);
6416	spin_lock_init(&adapter->rwi_lock);
6417	spin_lock_init(&adapter->state_lock);
6418	mutex_init(&adapter->fw_lock);
6419	init_completion(&adapter->probe_done);
6420	init_completion(&adapter->init_done);
6421	init_completion(&adapter->fw_done);
6422	init_completion(&adapter->reset_done);
6423	init_completion(&adapter->stats_done);
6424	clear_bit(0, &adapter->resetting);
6425	adapter->prev_rx_buf_sz = 0;
6426	adapter->prev_mtu = 0;
6427
6428	init_success = false;
6429	do {
6430		reinit_init_done(adapter);
6431
6432		/* clear any failovers we got in the previous pass
6433		 * since we are reinitializing the CRQ
6434		 */
6435		adapter->failover_pending = false;
6436
6437		/* If we had already initialized CRQ, we may have one or
6438		 * more resets queued already. Discard those and release
6439		 * the CRQ before initializing the CRQ again.
6440		 */
6441		release_crq_queue(adapter);
6442
6443		/* Since we are still in PROBING state, __ibmvnic_reset()
6444		 * will not access the ->rwi_list and since we released CRQ,
6445		 * we won't get _new_ transport events. But there maybe an
6446		 * ongoing ibmvnic_reset() call. So serialize access to
6447		 * rwi_list. If we win the race, ibvmnic_reset() could add
6448		 * a reset after we purged but thats ok - we just may end
6449		 * up with an extra reset (i.e similar to having two or more
6450		 * resets in the queue at once).
6451		 * CHECK.
6452		 */
6453		spin_lock_irqsave(&adapter->rwi_lock, flags);
6454		flush_reset_queue(adapter);
6455		spin_unlock_irqrestore(&adapter->rwi_lock, flags);
6456
6457		rc = init_crq_queue(adapter);
6458		if (rc) {
6459			dev_err(&dev->dev, "Couldn't initialize crq. rc=%d\n",
6460				rc);
6461			goto ibmvnic_init_fail;
6462		}
6463
6464		rc = ibmvnic_reset_init(adapter, false);
6465	} while (rc == -EAGAIN);
6466
6467	/* We are ignoring the error from ibmvnic_reset_init() assuming that the
6468	 * partner is not ready. CRQ is not active. When the partner becomes
6469	 * ready, we will do the passive init reset.
6470	 */
6471
6472	if (!rc)
6473		init_success = true;
6474
6475	rc = init_stats_buffers(adapter);
6476	if (rc)
6477		goto ibmvnic_init_fail;
6478
6479	rc = init_stats_token(adapter);
6480	if (rc)
6481		goto ibmvnic_stats_fail;
6482
6483	rc = device_create_file(&dev->dev, &dev_attr_failover);
6484	if (rc)
6485		goto ibmvnic_dev_file_err;
6486
6487	netif_carrier_off(netdev);
6488
6489	if (init_success) {
6490		adapter->state = VNIC_PROBED;
6491		netdev->mtu = adapter->req_mtu - ETH_HLEN;
6492		netdev->min_mtu = adapter->min_mtu - ETH_HLEN;
6493		netdev->max_mtu = adapter->max_mtu - ETH_HLEN;
6494	} else {
6495		adapter->state = VNIC_DOWN;
6496	}
6497
6498	adapter->wait_for_reset = false;
6499	adapter->last_reset_time = jiffies;
6500
6501	rc = register_netdev(netdev);
6502	if (rc) {
6503		dev_err(&dev->dev, "failed to register netdev rc=%d\n", rc);
6504		goto ibmvnic_register_fail;
6505	}
6506	dev_info(&dev->dev, "ibmvnic registered\n");
6507
6508	rc = ibmvnic_cpu_notif_add(adapter);
6509	if (rc) {
6510		netdev_err(netdev, "Registering cpu notifier failed\n");
6511		goto cpu_notif_add_failed;
6512	}
6513
6514	complete(&adapter->probe_done);
6515
6516	return 0;
6517
6518cpu_notif_add_failed:
6519	unregister_netdev(netdev);
6520
6521ibmvnic_register_fail:
6522	device_remove_file(&dev->dev, &dev_attr_failover);
6523
6524ibmvnic_dev_file_err:
6525	release_stats_token(adapter);
6526
6527ibmvnic_stats_fail:
6528	release_stats_buffers(adapter);
6529
6530ibmvnic_init_fail:
6531	release_sub_crqs(adapter, 1);
6532	release_crq_queue(adapter);
6533
6534	/* cleanup worker thread after releasing CRQ so we don't get
6535	 * transport events (i.e new work items for the worker thread).
6536	 */
6537	adapter->state = VNIC_REMOVING;
6538	complete(&adapter->probe_done);
6539	flush_work(&adapter->ibmvnic_reset);
6540	flush_delayed_work(&adapter->ibmvnic_delayed_reset);
6541
6542	flush_reset_queue(adapter);
6543
6544	mutex_destroy(&adapter->fw_lock);
6545	free_netdev(netdev);
6546
6547	return rc;
6548}
6549
6550static void ibmvnic_remove(struct vio_dev *dev)
6551{
6552	struct net_device *netdev = dev_get_drvdata(&dev->dev);
6553	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
6554	unsigned long flags;
6555
6556	spin_lock_irqsave(&adapter->state_lock, flags);
6557
6558	/* If ibmvnic_reset() is scheduling a reset, wait for it to
6559	 * finish. Then, set the state to REMOVING to prevent it from
6560	 * scheduling any more work and to have reset functions ignore
6561	 * any resets that have already been scheduled. Drop the lock
6562	 * after setting state, so __ibmvnic_reset() which is called
6563	 * from the flush_work() below, can make progress.
6564	 */
6565	spin_lock(&adapter->rwi_lock);
6566	adapter->state = VNIC_REMOVING;
6567	spin_unlock(&adapter->rwi_lock);
6568
6569	spin_unlock_irqrestore(&adapter->state_lock, flags);
6570
6571	ibmvnic_cpu_notif_remove(adapter);
6572
6573	flush_work(&adapter->ibmvnic_reset);
6574	flush_delayed_work(&adapter->ibmvnic_delayed_reset);
6575
6576	rtnl_lock();
6577	unregister_netdevice(netdev);
6578
6579	release_resources(adapter);
6580	release_rx_pools(adapter);
6581	release_tx_pools(adapter);
6582	release_sub_crqs(adapter, 1);
6583	release_crq_queue(adapter);
6584
6585	release_stats_token(adapter);
6586	release_stats_buffers(adapter);
6587
6588	adapter->state = VNIC_REMOVED;
6589
6590	rtnl_unlock();
6591	mutex_destroy(&adapter->fw_lock);
6592	device_remove_file(&dev->dev, &dev_attr_failover);
6593	free_netdev(netdev);
6594	dev_set_drvdata(&dev->dev, NULL);
6595}
6596
6597static ssize_t failover_store(struct device *dev, struct device_attribute *attr,
6598			      const char *buf, size_t count)
6599{
6600	struct net_device *netdev = dev_get_drvdata(dev);
6601	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
6602	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
6603	__be64 session_token;
6604	long rc;
6605
6606	if (!sysfs_streq(buf, "1"))
6607		return -EINVAL;
6608
6609	rc = plpar_hcall(H_VIOCTL, retbuf, adapter->vdev->unit_address,
6610			 H_GET_SESSION_TOKEN, 0, 0, 0);
6611	if (rc) {
6612		netdev_err(netdev, "Couldn't retrieve session token, rc %ld\n",
6613			   rc);
6614		goto last_resort;
6615	}
6616
6617	session_token = (__be64)retbuf[0];
6618	netdev_dbg(netdev, "Initiating client failover, session id %llx\n",
6619		   be64_to_cpu(session_token));
6620	rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address,
6621				H_SESSION_ERR_DETECTED, session_token, 0, 0);
6622	if (rc) {
6623		netdev_err(netdev,
6624			   "H_VIOCTL initiated failover failed, rc %ld\n",
6625			   rc);
6626		goto last_resort;
6627	}
6628
6629	return count;
6630
6631last_resort:
6632	netdev_dbg(netdev, "Trying to send CRQ_CMD, the last resort\n");
6633	ibmvnic_reset(adapter, VNIC_RESET_FAILOVER);
6634
6635	return count;
6636}
6637static DEVICE_ATTR_WO(failover);
6638
6639static unsigned long ibmvnic_get_desired_dma(struct vio_dev *vdev)
6640{
6641	struct net_device *netdev = dev_get_drvdata(&vdev->dev);
6642	struct ibmvnic_adapter *adapter;
6643	struct iommu_table *tbl;
6644	unsigned long ret = 0;
6645	int i;
6646
6647	tbl = get_iommu_table_base(&vdev->dev);
6648
6649	/* netdev inits at probe time along with the structures we need below*/
6650	if (!netdev)
6651		return IOMMU_PAGE_ALIGN(IBMVNIC_IO_ENTITLEMENT_DEFAULT, tbl);
6652
6653	adapter = netdev_priv(netdev);
6654
6655	ret += PAGE_SIZE; /* the crq message queue */
6656	ret += IOMMU_PAGE_ALIGN(sizeof(struct ibmvnic_statistics), tbl);
6657
6658	for (i = 0; i < adapter->req_tx_queues + adapter->req_rx_queues; i++)
6659		ret += 4 * PAGE_SIZE; /* the scrq message queue */
6660
6661	for (i = 0; i < adapter->num_active_rx_pools; i++)
6662		ret += adapter->rx_pool[i].size *
6663		    IOMMU_PAGE_ALIGN(adapter->rx_pool[i].buff_size, tbl);
6664
6665	return ret;
6666}
6667
6668static int ibmvnic_resume(struct device *dev)
6669{
6670	struct net_device *netdev = dev_get_drvdata(dev);
6671	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
6672
6673	if (adapter->state != VNIC_OPEN)
6674		return 0;
6675
6676	tasklet_schedule(&adapter->tasklet);
6677
6678	return 0;
6679}
6680
6681static const struct vio_device_id ibmvnic_device_table[] = {
6682	{"network", "IBM,vnic"},
6683	{"", "" }
6684};
6685MODULE_DEVICE_TABLE(vio, ibmvnic_device_table);
6686
6687static const struct dev_pm_ops ibmvnic_pm_ops = {
6688	.resume = ibmvnic_resume
6689};
6690
6691static struct vio_driver ibmvnic_driver = {
6692	.id_table       = ibmvnic_device_table,
6693	.probe          = ibmvnic_probe,
6694	.remove         = ibmvnic_remove,
6695	.get_desired_dma = ibmvnic_get_desired_dma,
6696	.name		= ibmvnic_driver_name,
6697	.pm		= &ibmvnic_pm_ops,
6698};
6699
6700/* module functions */
6701static int __init ibmvnic_module_init(void)
6702{
6703	int ret;
6704
6705	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "net/ibmvnic:online",
6706				      ibmvnic_cpu_online,
6707				      ibmvnic_cpu_down_prep);
6708	if (ret < 0)
6709		goto out;
6710	ibmvnic_online = ret;
6711	ret = cpuhp_setup_state_multi(CPUHP_IBMVNIC_DEAD, "net/ibmvnic:dead",
6712				      NULL, ibmvnic_cpu_dead);
6713	if (ret)
6714		goto err_dead;
6715
6716	ret = vio_register_driver(&ibmvnic_driver);
6717	if (ret)
6718		goto err_vio_register;
6719
6720	pr_info("%s: %s %s\n", ibmvnic_driver_name, ibmvnic_driver_string,
6721		IBMVNIC_DRIVER_VERSION);
6722
6723	return 0;
6724err_vio_register:
6725	cpuhp_remove_multi_state(CPUHP_IBMVNIC_DEAD);
6726err_dead:
6727	cpuhp_remove_multi_state(ibmvnic_online);
6728out:
6729	return ret;
6730}
6731
6732static void __exit ibmvnic_module_exit(void)
6733{
6734	vio_unregister_driver(&ibmvnic_driver);
6735	cpuhp_remove_multi_state(CPUHP_IBMVNIC_DEAD);
6736	cpuhp_remove_multi_state(ibmvnic_online);
6737}
6738
6739module_init(ibmvnic_module_init);
6740module_exit(ibmvnic_module_exit);