Linux Audio

Check our new training course

In-person Linux kernel drivers training

Jun 16-20, 2025
Register
Loading...
v6.8
   1// SPDX-License-Identifier: GPL-2.0-only
   2/****************************************************************************
   3 * Driver for Solarflare network controllers and boards
   4 * Copyright 2005-2006 Fen Systems Ltd.
   5 * Copyright 2005-2013 Solarflare Communications Inc.
 
 
 
 
   6 */
   7
   8#include <linux/filter.h>
   9#include <linux/module.h>
  10#include <linux/pci.h>
  11#include <linux/netdevice.h>
  12#include <linux/etherdevice.h>
  13#include <linux/delay.h>
  14#include <linux/notifier.h>
  15#include <linux/ip.h>
  16#include <linux/tcp.h>
  17#include <linux/in.h>
  18#include <linux/ethtool.h>
  19#include <linux/topology.h>
  20#include <linux/gfp.h>
 
  21#include <linux/interrupt.h>
  22#include "net_driver.h"
  23#include <net/gre.h>
  24#include <net/udp_tunnel.h>
  25#include "efx.h"
  26#include "efx_common.h"
  27#include "efx_channels.h"
  28#include "ef100.h"
  29#include "rx_common.h"
  30#include "tx_common.h"
  31#include "nic.h"
  32#include "io.h"
  33#include "selftest.h"
  34#include "sriov.h"
  35#include "efx_devlink.h"
  36
  37#include "mcdi_port_common.h"
  38#include "mcdi_pcol.h"
  39#include "workarounds.h"
  40
  41/**************************************************************************
  42 *
  43 * Configurable values
  44 *
  45 *************************************************************************/
 
  46
  47module_param_named(interrupt_mode, efx_interrupt_mode, uint, 0444);
  48MODULE_PARM_DESC(interrupt_mode,
  49		 "Interrupt mode (0=>MSIX 1=>MSI 2=>legacy)");
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  50
  51module_param(rss_cpus, uint, 0444);
  52MODULE_PARM_DESC(rss_cpus, "Number of CPUs to use for Receive-Side Scaling");
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  53
  54/*
  55 * Use separate channels for TX and RX events
  56 *
  57 * Set this to 1 to use separate channels for TX and RX. It allows us
  58 * to control interrupt affinity separately for TX and RX.
  59 *
  60 * This is only used in MSI-X interrupt mode
  61 */
  62bool efx_separate_tx_channels;
  63module_param(efx_separate_tx_channels, bool, 0444);
  64MODULE_PARM_DESC(efx_separate_tx_channels,
  65		 "Use separate channels for TX and RX");
  66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  67/* Initial interrupt moderation settings.  They can be modified after
  68 * module load with ethtool.
  69 *
  70 * The default for RX should strike a balance between increasing the
  71 * round-trip latency and reducing overhead.
  72 */
  73static unsigned int rx_irq_mod_usec = 60;
  74
  75/* Initial interrupt moderation settings.  They can be modified after
  76 * module load with ethtool.
  77 *
  78 * This default is chosen to ensure that a 10G link does not go idle
  79 * while a TX queue is stopped after it has become full.  A queue is
  80 * restarted when it drops below half full.  The time this takes (assuming
  81 * worst case 3 descriptors per packet and 1024 descriptors) is
  82 *   512 / 3 * 1.2 = 205 usec.
  83 */
  84static unsigned int tx_irq_mod_usec = 150;
  85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  86static bool phy_flash_cfg;
  87module_param(phy_flash_cfg, bool, 0644);
  88MODULE_PARM_DESC(phy_flash_cfg, "Set PHYs into reflash mode initially");
  89
 
 
 
 
 
 
 
 
 
 
  90static unsigned debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE |
  91			 NETIF_MSG_LINK | NETIF_MSG_IFDOWN |
  92			 NETIF_MSG_IFUP | NETIF_MSG_RX_ERR |
  93			 NETIF_MSG_TX_ERR | NETIF_MSG_HW);
  94module_param(debug, uint, 0);
  95MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value");
  96
  97/**************************************************************************
  98 *
  99 * Utility functions and prototypes
 100 *
 101 *************************************************************************/
 102
 
 
 
 
 
 103static void efx_remove_port(struct efx_nic *efx);
 104static int efx_xdp_setup_prog(struct efx_nic *efx, struct bpf_prog *prog);
 105static int efx_xdp(struct net_device *dev, struct netdev_bpf *xdp);
 106static int efx_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdpfs,
 107			u32 flags);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 108
 109/**************************************************************************
 110 *
 111 * Port handling
 112 *
 113 **************************************************************************/
 114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 115static void efx_fini_port(struct efx_nic *efx);
 116
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 117static int efx_probe_port(struct efx_nic *efx)
 118{
 119	int rc;
 120
 121	netif_dbg(efx, probe, efx->net_dev, "create port\n");
 122
 123	if (phy_flash_cfg)
 124		efx->phy_mode = PHY_MODE_SPECIAL;
 125
 126	/* Connect up MAC/PHY operations table */
 127	rc = efx->type->probe_port(efx);
 128	if (rc)
 129		return rc;
 130
 131	/* Initialise MAC address to permanent address */
 132	eth_hw_addr_set(efx->net_dev, efx->net_dev->perm_addr);
 133
 134	return 0;
 135}
 136
 137static int efx_init_port(struct efx_nic *efx)
 138{
 139	int rc;
 140
 141	netif_dbg(efx, drv, efx->net_dev, "init port\n");
 142
 143	mutex_lock(&efx->mac_lock);
 144
 
 
 
 
 145	efx->port_initialized = true;
 146
 
 
 
 
 147	/* Ensure the PHY advertises the correct flow control settings */
 148	rc = efx_mcdi_port_reconfigure(efx);
 149	if (rc && rc != -EPERM)
 150		goto fail;
 151
 152	mutex_unlock(&efx->mac_lock);
 153	return 0;
 154
 155fail:
 
 
 156	mutex_unlock(&efx->mac_lock);
 157	return rc;
 158}
 159
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 160static void efx_fini_port(struct efx_nic *efx)
 161{
 162	netif_dbg(efx, drv, efx->net_dev, "shut down port\n");
 163
 164	if (!efx->port_initialized)
 165		return;
 166
 
 167	efx->port_initialized = false;
 168
 169	efx->link_state.up = false;
 170	efx_link_status_changed(efx);
 171}
 172
 173static void efx_remove_port(struct efx_nic *efx)
 174{
 175	netif_dbg(efx, drv, efx->net_dev, "destroying port\n");
 176
 177	efx->type->remove_port(efx);
 178}
 179
 180/**************************************************************************
 181 *
 182 * NIC handling
 183 *
 184 **************************************************************************/
 185
 186static LIST_HEAD(efx_primary_list);
 187static LIST_HEAD(efx_unassociated_list);
 188
 189static bool efx_same_controller(struct efx_nic *left, struct efx_nic *right)
 190{
 191	return left->type == right->type &&
 192		left->vpd_sn && right->vpd_sn &&
 193		!strcmp(left->vpd_sn, right->vpd_sn);
 194}
 195
 196static void efx_associate(struct efx_nic *efx)
 197{
 198	struct efx_nic *other, *next;
 199
 200	if (efx->primary == efx) {
 201		/* Adding primary function; look for secondaries */
 202
 203		netif_dbg(efx, probe, efx->net_dev, "adding to primary list\n");
 204		list_add_tail(&efx->node, &efx_primary_list);
 205
 206		list_for_each_entry_safe(other, next, &efx_unassociated_list,
 207					 node) {
 208			if (efx_same_controller(efx, other)) {
 209				list_del(&other->node);
 210				netif_dbg(other, probe, other->net_dev,
 211					  "moving to secondary list of %s %s\n",
 212					  pci_name(efx->pci_dev),
 213					  efx->net_dev->name);
 214				list_add_tail(&other->node,
 215					      &efx->secondary_list);
 216				other->primary = efx;
 217			}
 218		}
 219	} else {
 220		/* Adding secondary function; look for primary */
 221
 222		list_for_each_entry(other, &efx_primary_list, node) {
 223			if (efx_same_controller(efx, other)) {
 224				netif_dbg(efx, probe, efx->net_dev,
 225					  "adding to secondary list of %s %s\n",
 226					  pci_name(other->pci_dev),
 227					  other->net_dev->name);
 228				list_add_tail(&efx->node,
 229					      &other->secondary_list);
 230				efx->primary = other;
 231				return;
 232			}
 233		}
 234
 235		netif_dbg(efx, probe, efx->net_dev,
 236			  "adding to unassociated list\n");
 237		list_add_tail(&efx->node, &efx_unassociated_list);
 238	}
 239}
 240
 241static void efx_dissociate(struct efx_nic *efx)
 242{
 243	struct efx_nic *other, *next;
 244
 245	list_del(&efx->node);
 246	efx->primary = NULL;
 247
 248	list_for_each_entry_safe(other, next, &efx->secondary_list, node) {
 249		list_del(&other->node);
 250		netif_dbg(other, probe, other->net_dev,
 251			  "moving to unassociated list\n");
 252		list_add_tail(&other->node, &efx_unassociated_list);
 253		other->primary = NULL;
 254	}
 255}
 256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 257static int efx_probe_nic(struct efx_nic *efx)
 258{
 259	int rc;
 260
 261	netif_dbg(efx, probe, efx->net_dev, "creating NIC\n");
 262
 263	/* Carry out hardware-type specific initialisation */
 264	rc = efx->type->probe(efx);
 265	if (rc)
 266		return rc;
 267
 268	do {
 269		if (!efx->max_channels || !efx->max_tx_channels) {
 270			netif_err(efx, drv, efx->net_dev,
 271				  "Insufficient resources to allocate"
 272				  " any channels\n");
 273			rc = -ENOSPC;
 274			goto fail1;
 275		}
 276
 277		/* Determine the number of channels and queues by trying
 278		 * to hook in MSI-X interrupts.
 279		 */
 280		rc = efx_probe_interrupts(efx);
 281		if (rc)
 282			goto fail1;
 283
 284		rc = efx_set_channels(efx);
 285		if (rc)
 286			goto fail1;
 287
 288		/* dimension_resources can fail with EAGAIN */
 289		rc = efx->type->dimension_resources(efx);
 290		if (rc != 0 && rc != -EAGAIN)
 291			goto fail2;
 292
 293		if (rc == -EAGAIN)
 294			/* try again with new max_channels */
 295			efx_remove_interrupts(efx);
 296
 297	} while (rc == -EAGAIN);
 298
 299	if (efx->n_channels > 1)
 300		netdev_rss_key_fill(efx->rss_context.rx_hash_key,
 301				    sizeof(efx->rss_context.rx_hash_key));
 302	efx_set_default_rx_indir_table(efx, &efx->rss_context);
 303
 
 
 
 304	/* Initialise the interrupt moderation settings */
 305	efx->irq_mod_step_us = DIV_ROUND_UP(efx->timer_quantum_ns, 1000);
 306	efx_init_irq_moderation(efx, tx_irq_mod_usec, rx_irq_mod_usec, true,
 307				true);
 308
 309	return 0;
 310
 311fail2:
 312	efx_remove_interrupts(efx);
 313fail1:
 314	efx->type->remove(efx);
 315	return rc;
 316}
 317
 318static void efx_remove_nic(struct efx_nic *efx)
 319{
 320	netif_dbg(efx, drv, efx->net_dev, "destroying NIC\n");
 321
 322	efx_remove_interrupts(efx);
 323	efx->type->remove(efx);
 324}
 325
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 326/**************************************************************************
 327 *
 328 * NIC startup/shutdown
 329 *
 330 *************************************************************************/
 331
 332static int efx_probe_all(struct efx_nic *efx)
 333{
 334	int rc;
 335
 336	rc = efx_probe_nic(efx);
 337	if (rc) {
 338		netif_err(efx, probe, efx->net_dev, "failed to create NIC\n");
 339		goto fail1;
 340	}
 341
 342	rc = efx_probe_port(efx);
 343	if (rc) {
 344		netif_err(efx, probe, efx->net_dev, "failed to create port\n");
 345		goto fail2;
 346	}
 347
 348	BUILD_BUG_ON(EFX_DEFAULT_DMAQ_SIZE < EFX_RXQ_MIN_ENT);
 349	if (WARN_ON(EFX_DEFAULT_DMAQ_SIZE < EFX_TXQ_MIN_ENT(efx))) {
 350		rc = -EINVAL;
 351		goto fail3;
 352	}
 
 353
 354#ifdef CONFIG_SFC_SRIOV
 355	rc = efx->type->vswitching_probe(efx);
 356	if (rc) /* not fatal; the PF will still work fine */
 357		netif_warn(efx, probe, efx->net_dev,
 358			   "failed to setup vswitching rc=%d;"
 359			   " VFs may not function\n", rc);
 360#endif
 361
 362	rc = efx_probe_filters(efx);
 363	if (rc) {
 364		netif_err(efx, probe, efx->net_dev,
 365			  "failed to create filter tables\n");
 366		goto fail4;
 367	}
 368
 369	rc = efx_probe_channels(efx);
 370	if (rc)
 371		goto fail5;
 372
 373	efx->state = STATE_NET_DOWN;
 374
 375	return 0;
 376
 377 fail5:
 378	efx_remove_filters(efx);
 379 fail4:
 380#ifdef CONFIG_SFC_SRIOV
 381	efx->type->vswitching_remove(efx);
 382#endif
 383 fail3:
 384	efx_remove_port(efx);
 385 fail2:
 386	efx_remove_nic(efx);
 387 fail1:
 388	return rc;
 389}
 390
 391static void efx_remove_all(struct efx_nic *efx)
 
 
 
 
 
 
 
 392{
 393	rtnl_lock();
 394	efx_xdp_setup_prog(efx, NULL);
 395	rtnl_unlock();
 
 
 
 
 
 396
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 397	efx_remove_channels(efx);
 398	efx_remove_filters(efx);
 399#ifdef CONFIG_SFC_SRIOV
 400	efx->type->vswitching_remove(efx);
 401#endif
 402	efx_remove_port(efx);
 403	efx_remove_nic(efx);
 404}
 405
 406/**************************************************************************
 407 *
 408 * Interrupt moderation
 409 *
 410 **************************************************************************/
 411unsigned int efx_usecs_to_ticks(struct efx_nic *efx, unsigned int usecs)
 412{
 413	if (usecs == 0)
 414		return 0;
 415	if (usecs * 1000 < efx->timer_quantum_ns)
 416		return 1; /* never round down to 0 */
 417	return usecs * 1000 / efx->timer_quantum_ns;
 418}
 419
 420unsigned int efx_ticks_to_usecs(struct efx_nic *efx, unsigned int ticks)
 421{
 422	/* We must round up when converting ticks to microseconds
 423	 * because we round down when converting the other way.
 424	 */
 425	return DIV_ROUND_UP(ticks * efx->timer_quantum_ns, 1000);
 426}
 427
 428/* Set interrupt moderation parameters */
 429int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs,
 430			    unsigned int rx_usecs, bool rx_adaptive,
 431			    bool rx_may_override_tx)
 432{
 433	struct efx_channel *channel;
 434	unsigned int timer_max_us;
 435
 436	EFX_ASSERT_RESET_SERIALISED(efx);
 437
 438	timer_max_us = efx->timer_max_ns / 1000;
 439
 440	if (tx_usecs > timer_max_us || rx_usecs > timer_max_us)
 441		return -EINVAL;
 442
 443	if (tx_usecs != rx_usecs && efx->tx_channel_offset == 0 &&
 444	    !rx_may_override_tx) {
 445		netif_err(efx, drv, efx->net_dev, "Channels are shared. "
 446			  "RX and TX IRQ moderation must be equal\n");
 447		return -EINVAL;
 448	}
 449
 450	efx->irq_rx_adaptive = rx_adaptive;
 451	efx->irq_rx_moderation_us = rx_usecs;
 452	efx_for_each_channel(channel, efx) {
 453		if (efx_channel_has_rx_queue(channel))
 454			channel->irq_moderation_us = rx_usecs;
 455		else if (efx_channel_has_tx_queues(channel))
 456			channel->irq_moderation_us = tx_usecs;
 457		else if (efx_channel_is_xdp_tx(channel))
 458			channel->irq_moderation_us = tx_usecs;
 459	}
 460
 461	return 0;
 462}
 463
 464void efx_get_irq_moderation(struct efx_nic *efx, unsigned int *tx_usecs,
 465			    unsigned int *rx_usecs, bool *rx_adaptive)
 466{
 467	*rx_adaptive = efx->irq_rx_adaptive;
 468	*rx_usecs = efx->irq_rx_moderation_us;
 469
 470	/* If channels are shared between RX and TX, so is IRQ
 471	 * moderation.  Otherwise, IRQ moderation is the same for all
 472	 * TX channels and is not adaptive.
 473	 */
 474	if (efx->tx_channel_offset == 0) {
 475		*tx_usecs = *rx_usecs;
 476	} else {
 477		struct efx_channel *tx_channel;
 478
 479		tx_channel = efx->channel[efx->tx_channel_offset];
 480		*tx_usecs = tx_channel->irq_moderation_us;
 481	}
 482}
 483
 484/**************************************************************************
 485 *
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 486 * ioctls
 487 *
 488 *************************************************************************/
 489
 490/* Net device ioctl
 491 * Context: process, rtnl_lock() held.
 492 */
 493static int efx_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd)
 494{
 495	struct efx_nic *efx = efx_netdev_priv(net_dev);
 496	struct mii_ioctl_data *data = if_mii(ifr);
 497
 
 
 
 
 
 498	/* Convert phy_id from older PRTAD/DEVAD format */
 499	if ((cmd == SIOCGMIIREG || cmd == SIOCSMIIREG) &&
 500	    (data->phy_id & 0xfc00) == 0x0400)
 501		data->phy_id ^= MDIO_PHY_ID_C45 | 0x0400;
 502
 503	return mdio_mii_ioctl(&efx->mdio, data, cmd);
 504}
 505
 506/**************************************************************************
 507 *
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 508 * Kernel net device interface
 509 *
 510 *************************************************************************/
 511
 512/* Context: process, rtnl_lock() held. */
 513int efx_net_open(struct net_device *net_dev)
 514{
 515	struct efx_nic *efx = efx_netdev_priv(net_dev);
 516	int rc;
 517
 518	netif_dbg(efx, ifup, efx->net_dev, "opening device on CPU %d\n",
 519		  raw_smp_processor_id());
 520
 521	rc = efx_check_disabled(efx);
 522	if (rc)
 523		return rc;
 524	if (efx->phy_mode & PHY_MODE_SPECIAL)
 525		return -EBUSY;
 526	if (efx_mcdi_poll_reboot(efx) && efx_reset(efx, RESET_TYPE_ALL))
 527		return -EIO;
 528
 529	/* Notify the kernel of the link state polled during driver load,
 530	 * before the monitor starts running */
 531	efx_link_status_changed(efx);
 532
 533	efx_start_all(efx);
 534	if (efx->state == STATE_DISABLED || efx->reset_pending)
 535		netif_device_detach(efx->net_dev);
 536	else
 537		efx->state = STATE_NET_UP;
 538
 539	return 0;
 540}
 541
 542/* Context: process, rtnl_lock() held.
 543 * Note that the kernel will ignore our return code; this method
 544 * should really be a void.
 545 */
 546int efx_net_stop(struct net_device *net_dev)
 547{
 548	struct efx_nic *efx = efx_netdev_priv(net_dev);
 549
 550	netif_dbg(efx, ifdown, efx->net_dev, "closing on CPU %d\n",
 551		  raw_smp_processor_id());
 552
 553	/* Stop the device and flush all the channels */
 554	efx_stop_all(efx);
 555
 556	return 0;
 557}
 558
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 559static int efx_vlan_rx_add_vid(struct net_device *net_dev, __be16 proto, u16 vid)
 560{
 561	struct efx_nic *efx = efx_netdev_priv(net_dev);
 562
 563	if (efx->type->vlan_rx_add_vid)
 564		return efx->type->vlan_rx_add_vid(efx, proto, vid);
 565	else
 566		return -EOPNOTSUPP;
 567}
 568
 569static int efx_vlan_rx_kill_vid(struct net_device *net_dev, __be16 proto, u16 vid)
 570{
 571	struct efx_nic *efx = efx_netdev_priv(net_dev);
 572
 573	if (efx->type->vlan_rx_kill_vid)
 574		return efx->type->vlan_rx_kill_vid(efx, proto, vid);
 575	else
 576		return -EOPNOTSUPP;
 577}
 578
 579static int efx_hwtstamp_set(struct net_device *net_dev,
 580			    struct kernel_hwtstamp_config *config,
 581			    struct netlink_ext_ack *extack)
 
 
 
 
 
 
 
 
 
 
 582{
 583	struct efx_nic *efx = efx_netdev_priv(net_dev);
 
 
 
 
 
 
 584
 585	return efx_ptp_set_ts_config(efx, config, extack);
 
 
 
 
 586}
 587
 588static int efx_hwtstamp_get(struct net_device *net_dev,
 589			    struct kernel_hwtstamp_config *config)
 590{
 591	struct efx_nic *efx = efx_netdev_priv(net_dev);
 
 
 592
 593	return efx_ptp_get_ts_config(efx, config);
 
 
 
 
 
 
 
 
 594}
 595
 596static const struct net_device_ops efx_netdev_ops = {
 597	.ndo_open		= efx_net_open,
 598	.ndo_stop		= efx_net_stop,
 599	.ndo_get_stats64	= efx_net_stats,
 600	.ndo_tx_timeout		= efx_watchdog,
 601	.ndo_start_xmit		= efx_hard_start_xmit,
 602	.ndo_validate_addr	= eth_validate_addr,
 603	.ndo_eth_ioctl		= efx_ioctl,
 604	.ndo_change_mtu		= efx_change_mtu,
 605	.ndo_set_mac_address	= efx_set_mac_address,
 606	.ndo_set_rx_mode	= efx_set_rx_mode,
 607	.ndo_set_features	= efx_set_features,
 608	.ndo_features_check	= efx_features_check,
 609	.ndo_vlan_rx_add_vid	= efx_vlan_rx_add_vid,
 610	.ndo_vlan_rx_kill_vid	= efx_vlan_rx_kill_vid,
 611	.ndo_hwtstamp_set	= efx_hwtstamp_set,
 612	.ndo_hwtstamp_get	= efx_hwtstamp_get,
 613#ifdef CONFIG_SFC_SRIOV
 614	.ndo_set_vf_mac		= efx_sriov_set_vf_mac,
 615	.ndo_set_vf_vlan	= efx_sriov_set_vf_vlan,
 616	.ndo_set_vf_spoofchk	= efx_sriov_set_vf_spoofchk,
 617	.ndo_get_vf_config	= efx_sriov_get_vf_config,
 618	.ndo_set_vf_link_state  = efx_sriov_set_vf_link_state,
 619#endif
 620	.ndo_get_phys_port_id   = efx_get_phys_port_id,
 621	.ndo_get_phys_port_name	= efx_get_phys_port_name,
 
 
 
 
 622#ifdef CONFIG_RFS_ACCEL
 623	.ndo_rx_flow_steer	= efx_filter_rfs,
 624#endif
 625	.ndo_xdp_xmit		= efx_xdp_xmit,
 626	.ndo_bpf		= efx_xdp
 627};
 628
 629static int efx_xdp_setup_prog(struct efx_nic *efx, struct bpf_prog *prog)
 630{
 631	struct bpf_prog *old_prog;
 632
 633	if (efx->xdp_rxq_info_failed) {
 634		netif_err(efx, drv, efx->net_dev,
 635			  "Unable to bind XDP program due to previous failure of rxq_info\n");
 636		return -EINVAL;
 637	}
 638
 639	if (prog && efx->net_dev->mtu > efx_xdp_max_mtu(efx)) {
 640		netif_err(efx, drv, efx->net_dev,
 641			  "Unable to configure XDP with MTU of %d (max: %d)\n",
 642			  efx->net_dev->mtu, efx_xdp_max_mtu(efx));
 643		return -EINVAL;
 644	}
 645
 646	old_prog = rtnl_dereference(efx->xdp_prog);
 647	rcu_assign_pointer(efx->xdp_prog, prog);
 648	/* Release the reference that was originally passed by the caller. */
 649	if (old_prog)
 650		bpf_prog_put(old_prog);
 651
 652	return 0;
 653}
 654
 655/* Context: process, rtnl_lock() held. */
 656static int efx_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 657{
 658	struct efx_nic *efx = efx_netdev_priv(dev);
 659
 660	switch (xdp->command) {
 661	case XDP_SETUP_PROG:
 662		return efx_xdp_setup_prog(efx, xdp->prog);
 663	default:
 664		return -EINVAL;
 665	}
 666}
 667
 668static int efx_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdpfs,
 669			u32 flags)
 670{
 671	struct efx_nic *efx = efx_netdev_priv(dev);
 672
 673	if (!netif_running(dev))
 674		return -EINVAL;
 675
 676	return efx_xdp_tx_buffers(efx, n, xdpfs, flags & XDP_XMIT_FLUSH);
 677}
 678
 679static void efx_update_name(struct efx_nic *efx)
 680{
 681	strcpy(efx->name, efx->net_dev->name);
 682	efx_mtd_rename(efx);
 683	efx_set_channel_names(efx);
 684}
 685
 686static int efx_netdev_event(struct notifier_block *this,
 687			    unsigned long event, void *ptr)
 688{
 689	struct net_device *net_dev = netdev_notifier_info_to_dev(ptr);
 690
 691	if ((net_dev->netdev_ops == &efx_netdev_ops) &&
 692	    event == NETDEV_CHANGENAME)
 693		efx_update_name(efx_netdev_priv(net_dev));
 694
 695	return NOTIFY_DONE;
 696}
 697
 698static struct notifier_block efx_netdev_notifier = {
 699	.notifier_call = efx_netdev_event,
 700};
 701
 702static ssize_t phy_type_show(struct device *dev,
 703			     struct device_attribute *attr, char *buf)
 704{
 705	struct efx_nic *efx = dev_get_drvdata(dev);
 706	return sprintf(buf, "%d\n", efx->phy_type);
 707}
 708static DEVICE_ATTR_RO(phy_type);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 709
 710static int efx_register_netdev(struct efx_nic *efx)
 711{
 712	struct net_device *net_dev = efx->net_dev;
 713	struct efx_channel *channel;
 714	int rc;
 715
 716	net_dev->watchdog_timeo = 5 * HZ;
 717	net_dev->irq = efx->pci_dev->irq;
 718	net_dev->netdev_ops = &efx_netdev_ops;
 719	if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0)
 720		net_dev->priv_flags |= IFF_UNICAST_FLT;
 721	net_dev->ethtool_ops = &efx_ethtool_ops;
 722	netif_set_tso_max_segs(net_dev, EFX_TSO_MAX_SEGS);
 723	net_dev->min_mtu = EFX_MIN_MTU;
 724	net_dev->max_mtu = EFX_MAX_MTU;
 725
 726	rtnl_lock();
 727
 728	/* Enable resets to be scheduled and check whether any were
 729	 * already requested.  If so, the NIC is probably hosed so we
 730	 * abort.
 731	 */
 
 
 732	if (efx->reset_pending) {
 733		pci_err(efx->pci_dev, "aborting probe due to scheduled reset\n");
 
 734		rc = -EIO;
 735		goto fail_locked;
 736	}
 737
 738	rc = dev_alloc_name(net_dev, net_dev->name);
 739	if (rc < 0)
 740		goto fail_locked;
 741	efx_update_name(efx);
 742
 743	/* Always start with carrier off; PHY events will detect the link */
 744	netif_carrier_off(net_dev);
 745
 746	rc = register_netdevice(net_dev);
 747	if (rc)
 748		goto fail_locked;
 749
 750	efx_for_each_channel(channel, efx) {
 751		struct efx_tx_queue *tx_queue;
 752		efx_for_each_channel_tx_queue(tx_queue, channel)
 753			efx_init_tx_queue_core_txq(tx_queue);
 754	}
 755
 756	efx_associate(efx);
 757
 758	efx->state = STATE_NET_DOWN;
 759
 760	rtnl_unlock();
 761
 762	rc = device_create_file(&efx->pci_dev->dev, &dev_attr_phy_type);
 763	if (rc) {
 764		netif_err(efx, drv, efx->net_dev,
 765			  "failed to init net dev attributes\n");
 766		goto fail_registered;
 767	}
 768
 769	efx_init_mcdi_logging(efx);
 
 
 
 
 
 
 770
 771	return 0;
 772
 
 
 
 
 773fail_registered:
 774	rtnl_lock();
 775	efx_dissociate(efx);
 776	unregister_netdevice(net_dev);
 777fail_locked:
 778	efx->state = STATE_UNINIT;
 779	rtnl_unlock();
 780	netif_err(efx, drv, efx->net_dev, "could not register net dev\n");
 781	return rc;
 782}
 783
 784static void efx_unregister_netdev(struct efx_nic *efx)
 785{
 786	if (!efx->net_dev)
 787		return;
 788
 789	if (WARN_ON(efx_netdev_priv(efx->net_dev) != efx))
 790		return;
 791
 792	if (efx_dev_registered(efx)) {
 793		strscpy(efx->name, pci_name(efx->pci_dev), sizeof(efx->name));
 794		efx_fini_mcdi_logging(efx);
 
 
 795		device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type);
 796		unregister_netdev(efx->net_dev);
 797	}
 798}
 799
 800/**************************************************************************
 801 *
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 802 * List of NICs we support
 803 *
 804 **************************************************************************/
 805
 806/* PCI device ID table */
 807static const struct pci_device_id efx_pci_table[] = {
 
 
 
 
 808	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0903),  /* SFC9120 PF */
 809	 .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
 810	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1903),  /* SFC9120 VF */
 811	 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
 812	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0923),  /* SFC9140 PF */
 813	 .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
 814	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1923),  /* SFC9140 VF */
 815	 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
 816	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0a03),  /* SFC9220 PF */
 817	 .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
 818	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1a03),  /* SFC9220 VF */
 819	 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
 820	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0b03),  /* SFC9250 PF */
 821	 .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
 822	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1b03),  /* SFC9250 VF */
 823	 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
 824	{0}			/* end of list */
 825};
 826
 827/**************************************************************************
 828 *
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 829 * Data housekeeping
 830 *
 831 **************************************************************************/
 832
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 833void efx_update_sw_stats(struct efx_nic *efx, u64 *stats)
 834{
 835	u64 n_rx_nodesc_trunc = 0;
 836	struct efx_channel *channel;
 837
 838	efx_for_each_channel(channel, efx)
 839		n_rx_nodesc_trunc += channel->n_rx_nodesc_trunc;
 840	stats[GENERIC_STAT_rx_nodesc_trunc] = n_rx_nodesc_trunc;
 841	stats[GENERIC_STAT_rx_noskb_drops] = atomic_read(&efx->n_rx_noskb_drops);
 842}
 843
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 844/**************************************************************************
 845 *
 846 * PCI interface
 847 *
 848 **************************************************************************/
 849
 850/* Main body of final NIC shutdown code
 851 * This is called only at module unload (or hotplug removal).
 852 */
 853static void efx_pci_remove_main(struct efx_nic *efx)
 854{
 855	/* Flush reset_work. It can no longer be scheduled since we
 856	 * are not READY.
 857	 */
 858	WARN_ON(efx_net_active(efx->state));
 859	efx_flush_reset_workqueue(efx);
 860
 861	efx_disable_interrupts(efx);
 862	efx_clear_interrupt_affinity(efx);
 863	efx_nic_fini_interrupt(efx);
 864	efx_fini_port(efx);
 865	efx->type->fini(efx);
 866	efx_fini_napi(efx);
 867	efx_remove_all(efx);
 868}
 869
 870/* Final NIC shutdown
 871 * This is called only at module unload (or hotplug removal).  A PF can call
 872 * this on its VFs to ensure they are unbound first.
 873 */
 874static void efx_pci_remove(struct pci_dev *pci_dev)
 875{
 876	struct efx_probe_data *probe_data;
 877	struct efx_nic *efx;
 878
 879	efx = pci_get_drvdata(pci_dev);
 880	if (!efx)
 881		return;
 882
 883	/* Mark the NIC as fini, then stop the interface */
 884	rtnl_lock();
 885	efx_dissociate(efx);
 886	dev_close(efx->net_dev);
 887	efx_disable_interrupts(efx);
 888	efx->state = STATE_UNINIT;
 889	rtnl_unlock();
 890
 891	if (efx->type->sriov_fini)
 892		efx->type->sriov_fini(efx);
 893
 894	efx_fini_devlink_lock(efx);
 895	efx_unregister_netdev(efx);
 896
 897	efx_mtd_remove(efx);
 898
 899	efx_pci_remove_main(efx);
 900
 901	efx_fini_io(efx);
 902	pci_dbg(efx->pci_dev, "shutdown successful\n");
 903
 904	efx_fini_devlink_and_unlock(efx);
 905	efx_fini_struct(efx);
 906	free_netdev(efx->net_dev);
 907	probe_data = container_of(efx, struct efx_probe_data, efx);
 908	kfree(probe_data);
 909};
 910
 911/* NIC VPD information
 912 * Called during probe to display the part number of the
 913 * installed NIC.
 
 914 */
 
 915static void efx_probe_vpd_strings(struct efx_nic *efx)
 916{
 917	struct pci_dev *dev = efx->pci_dev;
 918	unsigned int vpd_size, kw_len;
 919	u8 *vpd_data;
 920	int start;
 
 
 
 
 
 
 
 921
 922	vpd_data = pci_vpd_alloc(dev, &vpd_size);
 923	if (IS_ERR(vpd_data)) {
 924		pci_warn(dev, "Unable to read VPD\n");
 
 925		return;
 926	}
 927
 928	start = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size,
 929					     PCI_VPD_RO_KEYWORD_PARTNO, &kw_len);
 930	if (start < 0)
 931		pci_err(dev, "Part number not found or incomplete\n");
 932	else
 933		pci_info(dev, "Part Number : %.*s\n", kw_len, vpd_data + start);
 
 
 
 
 
 
 934
 935	start = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size,
 936					     PCI_VPD_RO_KEYWORD_SERIALNO, &kw_len);
 937	if (start < 0)
 938		pci_err(dev, "Serial number not found or incomplete\n");
 939	else
 940		efx->vpd_sn = kmemdup_nul(vpd_data + start, kw_len, GFP_KERNEL);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 941
 942	kfree(vpd_data);
 943}
 944
 945
 946/* Main body of NIC initialisation
 947 * This is called at module load (or hotplug insertion, theoretically).
 948 */
 949static int efx_pci_probe_main(struct efx_nic *efx)
 950{
 951	int rc;
 952
 953	/* Do start-of-day initialisation */
 954	rc = efx_probe_all(efx);
 955	if (rc)
 956		goto fail1;
 957
 958	efx_init_napi(efx);
 959
 960	down_write(&efx->filter_sem);
 961	rc = efx->type->init(efx);
 962	up_write(&efx->filter_sem);
 963	if (rc) {
 964		pci_err(efx->pci_dev, "failed to initialise NIC\n");
 
 965		goto fail3;
 966	}
 967
 968	rc = efx_init_port(efx);
 969	if (rc) {
 970		netif_err(efx, probe, efx->net_dev,
 971			  "failed to initialise port\n");
 972		goto fail4;
 973	}
 974
 975	rc = efx_nic_init_interrupt(efx);
 976	if (rc)
 977		goto fail5;
 978
 979	efx_set_interrupt_affinity(efx);
 980	rc = efx_enable_interrupts(efx);
 981	if (rc)
 982		goto fail6;
 983
 984	return 0;
 985
 986 fail6:
 987	efx_clear_interrupt_affinity(efx);
 988	efx_nic_fini_interrupt(efx);
 989 fail5:
 990	efx_fini_port(efx);
 991 fail4:
 992	efx->type->fini(efx);
 993 fail3:
 994	efx_fini_napi(efx);
 995	efx_remove_all(efx);
 996 fail1:
 997	return rc;
 998}
 999
1000static int efx_pci_probe_post_io(struct efx_nic *efx)
1001{
1002	struct net_device *net_dev = efx->net_dev;
1003	int rc = efx_pci_probe_main(efx);
1004
1005	if (rc)
1006		return rc;
1007
1008	if (efx->type->sriov_init) {
1009		rc = efx->type->sriov_init(efx);
1010		if (rc)
1011			pci_err(efx->pci_dev, "SR-IOV can't be enabled rc %d\n",
1012				rc);
1013	}
1014
1015	/* Determine netdevice features */
1016	net_dev->features |= efx->type->offload_features;
1017
1018	/* Add TSO features */
1019	if (efx->type->tso_versions && efx->type->tso_versions(efx))
1020		net_dev->features |= NETIF_F_TSO | NETIF_F_TSO6;
1021
 
1022	/* Mask for features that also apply to VLAN devices */
1023	net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG |
1024				   NETIF_F_HIGHDMA | NETIF_F_ALL_TSO |
1025				   NETIF_F_RXCSUM);
1026
1027	/* Determine user configurable features */
1028	net_dev->hw_features |= net_dev->features & ~efx->fixed_features;
1029
1030	/* Disable receiving frames with bad FCS, by default. */
1031	net_dev->features &= ~NETIF_F_RXALL;
1032
1033	/* Disable VLAN filtering by default.  It may be enforced if
1034	 * the feature is fixed (i.e. VLAN filters are required to
1035	 * receive VLAN tagged packets due to vPort restrictions).
1036	 */
1037	net_dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
1038	net_dev->features |= efx->fixed_features;
1039
1040	net_dev->xdp_features = NETDEV_XDP_ACT_BASIC |
1041				NETDEV_XDP_ACT_REDIRECT |
1042				NETDEV_XDP_ACT_NDO_XMIT;
1043
1044	/* devlink creation, registration and lock */
1045	rc = efx_probe_devlink_and_lock(efx);
1046	if (rc)
1047		pci_err(efx->pci_dev, "devlink registration failed");
1048
1049	rc = efx_register_netdev(efx);
1050	efx_probe_devlink_unlock(efx);
1051	if (!rc)
1052		return 0;
1053
1054	efx_pci_remove_main(efx);
1055	return rc;
1056}
1057
1058/* NIC initialisation
1059 *
1060 * This is called at module load (or hotplug insertion,
1061 * theoretically).  It sets up PCI mappings, resets the NIC,
1062 * sets up and registers the network devices with the kernel and hooks
1063 * the interrupt service routine.  It does not prepare the device for
1064 * transmission; this is left to the first time one of the network
1065 * interfaces is brought up (i.e. efx_net_open).
1066 */
1067static int efx_pci_probe(struct pci_dev *pci_dev,
1068			 const struct pci_device_id *entry)
1069{
1070	struct efx_probe_data *probe_data, **probe_ptr;
1071	struct net_device *net_dev;
1072	struct efx_nic *efx;
1073	int rc;
1074
1075	/* Allocate probe data and struct efx_nic */
1076	probe_data = kzalloc(sizeof(*probe_data), GFP_KERNEL);
1077	if (!probe_data)
 
1078		return -ENOMEM;
1079	probe_data->pci_dev = pci_dev;
1080	efx = &probe_data->efx;
1081
1082	/* Allocate and initialise a struct net_device */
1083	net_dev = alloc_etherdev_mq(sizeof(probe_data), EFX_MAX_CORE_TX_QUEUES);
1084	if (!net_dev) {
1085		rc = -ENOMEM;
1086		goto fail0;
1087	}
1088	probe_ptr = netdev_priv(net_dev);
1089	*probe_ptr = probe_data;
1090	efx->net_dev = net_dev;
1091	efx->type = (const struct efx_nic_type *) entry->driver_data;
1092	efx->fixed_features |= NETIF_F_HIGHDMA;
1093
1094	pci_set_drvdata(pci_dev, efx);
1095	SET_NETDEV_DEV(net_dev, &pci_dev->dev);
1096	rc = efx_init_struct(efx, pci_dev);
1097	if (rc)
1098		goto fail1;
1099	efx->mdio.dev = net_dev;
1100
1101	pci_info(pci_dev, "Solarflare NIC detected\n");
 
1102
1103	if (!efx->type->is_vf)
1104		efx_probe_vpd_strings(efx);
1105
1106	/* Set up basic I/O (BAR mappings etc) */
1107	rc = efx_init_io(efx, efx->type->mem_bar(efx), efx->type->max_dma_mask,
1108			 efx->type->mem_map_size(efx));
1109	if (rc)
1110		goto fail2;
1111
1112	rc = efx_pci_probe_post_io(efx);
1113	if (rc) {
1114		/* On failure, retry once immediately.
1115		 * If we aborted probe due to a scheduled reset, dismiss it.
1116		 */
1117		efx->reset_pending = 0;
1118		rc = efx_pci_probe_post_io(efx);
1119		if (rc) {
1120			/* On another failure, retry once more
1121			 * after a 50-305ms delay.
1122			 */
1123			unsigned char r;
1124
1125			get_random_bytes(&r, 1);
1126			msleep((unsigned int)r + 50);
1127			efx->reset_pending = 0;
1128			rc = efx_pci_probe_post_io(efx);
1129		}
1130	}
1131	if (rc)
1132		goto fail3;
1133
1134	netif_dbg(efx, probe, efx->net_dev, "initialisation successful\n");
1135
1136	/* Try to create MTDs, but allow this to fail */
1137	rtnl_lock();
1138	rc = efx_mtd_probe(efx);
1139	rtnl_unlock();
1140	if (rc && rc != -EPERM)
1141		netif_warn(efx, probe, efx->net_dev,
1142			   "failed to create MTDs (%d)\n", rc);
1143
 
 
 
 
 
 
1144	if (efx->type->udp_tnl_push_ports)
1145		efx->type->udp_tnl_push_ports(efx);
1146
1147	return 0;
1148
1149 fail3:
1150	efx_fini_io(efx);
1151 fail2:
1152	efx_fini_struct(efx);
1153 fail1:
1154	WARN_ON(rc > 0);
1155	netif_dbg(efx, drv, efx->net_dev, "initialisation failed. rc=%d\n", rc);
1156	free_netdev(net_dev);
1157 fail0:
1158	kfree(probe_data);
1159	return rc;
1160}
1161
1162/* efx_pci_sriov_configure returns the actual number of Virtual Functions
1163 * enabled on success
1164 */
1165#ifdef CONFIG_SFC_SRIOV
1166static int efx_pci_sriov_configure(struct pci_dev *dev, int num_vfs)
1167{
1168	int rc;
1169	struct efx_nic *efx = pci_get_drvdata(dev);
1170
1171	if (efx->type->sriov_configure) {
1172		rc = efx->type->sriov_configure(efx, num_vfs);
1173		if (rc)
1174			return rc;
1175		else
1176			return num_vfs;
1177	} else
1178		return -EOPNOTSUPP;
1179}
1180#endif
1181
1182static int efx_pm_freeze(struct device *dev)
1183{
1184	struct efx_nic *efx = dev_get_drvdata(dev);
1185
1186	rtnl_lock();
1187
1188	if (efx_net_active(efx->state)) {
 
 
1189		efx_device_detach_sync(efx);
1190
1191		efx_stop_all(efx);
1192		efx_disable_interrupts(efx);
1193
1194		efx->state = efx_freeze(efx->state);
1195	}
1196
1197	rtnl_unlock();
1198
1199	return 0;
1200}
1201
1202static void efx_pci_shutdown(struct pci_dev *pci_dev)
1203{
1204	struct efx_nic *efx = pci_get_drvdata(pci_dev);
1205
1206	if (!efx)
1207		return;
1208
1209	efx_pm_freeze(&pci_dev->dev);
1210	pci_disable_device(pci_dev);
1211}
1212
1213static int efx_pm_thaw(struct device *dev)
1214{
1215	int rc;
1216	struct efx_nic *efx = dev_get_drvdata(dev);
1217
1218	rtnl_lock();
1219
1220	if (efx_frozen(efx->state)) {
1221		rc = efx_enable_interrupts(efx);
1222		if (rc)
1223			goto fail;
1224
1225		mutex_lock(&efx->mac_lock);
1226		efx_mcdi_port_reconfigure(efx);
1227		mutex_unlock(&efx->mac_lock);
1228
1229		efx_start_all(efx);
1230
1231		efx_device_attach_if_not_resetting(efx);
1232
1233		efx->state = efx_thaw(efx->state);
1234
1235		efx->type->resume_wol(efx);
1236	}
1237
1238	rtnl_unlock();
1239
1240	/* Reschedule any quenched resets scheduled during efx_pm_freeze() */
1241	efx_queue_reset_work(efx);
1242
1243	return 0;
1244
1245fail:
1246	rtnl_unlock();
1247
1248	return rc;
1249}
1250
1251static int efx_pm_poweroff(struct device *dev)
1252{
1253	struct pci_dev *pci_dev = to_pci_dev(dev);
1254	struct efx_nic *efx = pci_get_drvdata(pci_dev);
1255
1256	efx->type->fini(efx);
1257
1258	efx->reset_pending = 0;
1259
1260	pci_save_state(pci_dev);
1261	return pci_set_power_state(pci_dev, PCI_D3hot);
1262}
1263
1264/* Used for both resume and restore */
1265static int efx_pm_resume(struct device *dev)
1266{
1267	struct pci_dev *pci_dev = to_pci_dev(dev);
1268	struct efx_nic *efx = pci_get_drvdata(pci_dev);
1269	int rc;
1270
1271	rc = pci_set_power_state(pci_dev, PCI_D0);
1272	if (rc)
1273		return rc;
1274	pci_restore_state(pci_dev);
1275	rc = pci_enable_device(pci_dev);
1276	if (rc)
1277		return rc;
1278	pci_set_master(efx->pci_dev);
1279	rc = efx->type->reset(efx, RESET_TYPE_ALL);
1280	if (rc)
1281		return rc;
1282	down_write(&efx->filter_sem);
1283	rc = efx->type->init(efx);
1284	up_write(&efx->filter_sem);
1285	if (rc)
1286		return rc;
1287	rc = efx_pm_thaw(dev);
1288	return rc;
1289}
1290
1291static int efx_pm_suspend(struct device *dev)
1292{
1293	int rc;
1294
1295	efx_pm_freeze(dev);
1296	rc = efx_pm_poweroff(dev);
1297	if (rc)
1298		efx_pm_resume(dev);
1299	return rc;
1300}
1301
1302static const struct dev_pm_ops efx_pm_ops = {
1303	.suspend	= efx_pm_suspend,
1304	.resume		= efx_pm_resume,
1305	.freeze		= efx_pm_freeze,
1306	.thaw		= efx_pm_thaw,
1307	.poweroff	= efx_pm_poweroff,
1308	.restore	= efx_pm_resume,
1309};
1310
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1311static struct pci_driver efx_pci_driver = {
1312	.name		= KBUILD_MODNAME,
1313	.id_table	= efx_pci_table,
1314	.probe		= efx_pci_probe,
1315	.remove		= efx_pci_remove,
1316	.driver.pm	= &efx_pm_ops,
1317	.shutdown	= efx_pci_shutdown,
1318	.err_handler	= &efx_err_handlers,
1319#ifdef CONFIG_SFC_SRIOV
1320	.sriov_configure = efx_pci_sriov_configure,
1321#endif
1322};
1323
1324/**************************************************************************
1325 *
1326 * Kernel module interface
1327 *
1328 *************************************************************************/
1329
 
 
 
 
1330static int __init efx_init_module(void)
1331{
1332	int rc;
1333
1334	printk(KERN_INFO "Solarflare NET driver\n");
1335
1336	rc = register_netdevice_notifier(&efx_netdev_notifier);
1337	if (rc)
1338		goto err_notifier;
1339
1340	rc = efx_create_reset_workqueue();
 
1341	if (rc)
 
 
 
 
 
 
1342		goto err_reset;
 
1343
1344	rc = pci_register_driver(&efx_pci_driver);
1345	if (rc < 0)
1346		goto err_pci;
1347
1348	rc = pci_register_driver(&ef100_pci_driver);
1349	if (rc < 0)
1350		goto err_pci_ef100;
1351
1352	return 0;
1353
1354 err_pci_ef100:
1355	pci_unregister_driver(&efx_pci_driver);
1356 err_pci:
1357	efx_destroy_reset_workqueue();
1358 err_reset:
 
 
 
 
1359	unregister_netdevice_notifier(&efx_netdev_notifier);
1360 err_notifier:
1361	return rc;
1362}
1363
1364static void __exit efx_exit_module(void)
1365{
1366	printk(KERN_INFO "Solarflare NET driver unloading\n");
1367
1368	pci_unregister_driver(&ef100_pci_driver);
1369	pci_unregister_driver(&efx_pci_driver);
1370	efx_destroy_reset_workqueue();
 
 
 
1371	unregister_netdevice_notifier(&efx_netdev_notifier);
1372
1373}
1374
1375module_init(efx_init_module);
1376module_exit(efx_exit_module);
1377
1378MODULE_AUTHOR("Solarflare Communications and "
1379	      "Michael Brown <mbrown@fensystems.co.uk>");
1380MODULE_DESCRIPTION("Solarflare network driver");
1381MODULE_LICENSE("GPL");
1382MODULE_DEVICE_TABLE(pci, efx_pci_table);
v4.17
 
   1/****************************************************************************
   2 * Driver for Solarflare network controllers and boards
   3 * Copyright 2005-2006 Fen Systems Ltd.
   4 * Copyright 2005-2013 Solarflare Communications Inc.
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms of the GNU General Public License version 2 as published
   8 * by the Free Software Foundation, incorporated herein by reference.
   9 */
  10
 
  11#include <linux/module.h>
  12#include <linux/pci.h>
  13#include <linux/netdevice.h>
  14#include <linux/etherdevice.h>
  15#include <linux/delay.h>
  16#include <linux/notifier.h>
  17#include <linux/ip.h>
  18#include <linux/tcp.h>
  19#include <linux/in.h>
  20#include <linux/ethtool.h>
  21#include <linux/topology.h>
  22#include <linux/gfp.h>
  23#include <linux/aer.h>
  24#include <linux/interrupt.h>
  25#include "net_driver.h"
  26#include <net/gre.h>
  27#include <net/udp_tunnel.h>
  28#include "efx.h"
 
 
 
 
 
  29#include "nic.h"
  30#include "io.h"
  31#include "selftest.h"
  32#include "sriov.h"
 
  33
  34#include "mcdi.h"
  35#include "mcdi_pcol.h"
  36#include "workarounds.h"
  37
  38/**************************************************************************
  39 *
  40 * Type name strings
  41 *
  42 **************************************************************************
  43 */
  44
  45/* Loopback mode names (see LOOPBACK_MODE()) */
  46const unsigned int efx_loopback_mode_max = LOOPBACK_MAX;
  47const char *const efx_loopback_mode_names[] = {
  48	[LOOPBACK_NONE]		= "NONE",
  49	[LOOPBACK_DATA]		= "DATAPATH",
  50	[LOOPBACK_GMAC]		= "GMAC",
  51	[LOOPBACK_XGMII]	= "XGMII",
  52	[LOOPBACK_XGXS]		= "XGXS",
  53	[LOOPBACK_XAUI]		= "XAUI",
  54	[LOOPBACK_GMII]		= "GMII",
  55	[LOOPBACK_SGMII]	= "SGMII",
  56	[LOOPBACK_XGBR]		= "XGBR",
  57	[LOOPBACK_XFI]		= "XFI",
  58	[LOOPBACK_XAUI_FAR]	= "XAUI_FAR",
  59	[LOOPBACK_GMII_FAR]	= "GMII_FAR",
  60	[LOOPBACK_SGMII_FAR]	= "SGMII_FAR",
  61	[LOOPBACK_XFI_FAR]	= "XFI_FAR",
  62	[LOOPBACK_GPHY]		= "GPHY",
  63	[LOOPBACK_PHYXS]	= "PHYXS",
  64	[LOOPBACK_PCS]		= "PCS",
  65	[LOOPBACK_PMAPMD]	= "PMA/PMD",
  66	[LOOPBACK_XPORT]	= "XPORT",
  67	[LOOPBACK_XGMII_WS]	= "XGMII_WS",
  68	[LOOPBACK_XAUI_WS]	= "XAUI_WS",
  69	[LOOPBACK_XAUI_WS_FAR]  = "XAUI_WS_FAR",
  70	[LOOPBACK_XAUI_WS_NEAR] = "XAUI_WS_NEAR",
  71	[LOOPBACK_GMII_WS]	= "GMII_WS",
  72	[LOOPBACK_XFI_WS]	= "XFI_WS",
  73	[LOOPBACK_XFI_WS_FAR]	= "XFI_WS_FAR",
  74	[LOOPBACK_PHYXS_WS]	= "PHYXS_WS",
  75};
  76
  77const unsigned int efx_reset_type_max = RESET_TYPE_MAX;
  78const char *const efx_reset_type_names[] = {
  79	[RESET_TYPE_INVISIBLE]          = "INVISIBLE",
  80	[RESET_TYPE_ALL]                = "ALL",
  81	[RESET_TYPE_RECOVER_OR_ALL]     = "RECOVER_OR_ALL",
  82	[RESET_TYPE_WORLD]              = "WORLD",
  83	[RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE",
  84	[RESET_TYPE_DATAPATH]           = "DATAPATH",
  85	[RESET_TYPE_MC_BIST]		= "MC_BIST",
  86	[RESET_TYPE_DISABLE]            = "DISABLE",
  87	[RESET_TYPE_TX_WATCHDOG]        = "TX_WATCHDOG",
  88	[RESET_TYPE_INT_ERROR]          = "INT_ERROR",
  89	[RESET_TYPE_DMA_ERROR]          = "DMA_ERROR",
  90	[RESET_TYPE_TX_SKIP]            = "TX_SKIP",
  91	[RESET_TYPE_MC_FAILURE]         = "MC_FAILURE",
  92	[RESET_TYPE_MCDI_TIMEOUT]	= "MCDI_TIMEOUT (FLR)",
  93};
  94
  95/* UDP tunnel type names */
  96static const char *const efx_udp_tunnel_type_names[] = {
  97	[TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN] = "vxlan",
  98	[TUNNEL_ENCAP_UDP_PORT_ENTRY_GENEVE] = "geneve",
  99};
 100
 101void efx_get_udp_tunnel_type_name(u16 type, char *buf, size_t buflen)
 102{
 103	if (type < ARRAY_SIZE(efx_udp_tunnel_type_names) &&
 104	    efx_udp_tunnel_type_names[type] != NULL)
 105		snprintf(buf, buflen, "%s", efx_udp_tunnel_type_names[type]);
 106	else
 107		snprintf(buf, buflen, "type %d", type);
 108}
 109
 110/* Reset workqueue. If any NIC has a hardware failure then a reset will be
 111 * queued onto this work queue. This is not a per-nic work queue, because
 112 * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised.
 113 */
 114static struct workqueue_struct *reset_workqueue;
 115
 116/* How often and how many times to poll for a reset while waiting for a
 117 * BIST that another function started to complete.
 118 */
 119#define BIST_WAIT_DELAY_MS	100
 120#define BIST_WAIT_DELAY_COUNT	100
 121
 122/**************************************************************************
 123 *
 124 * Configurable values
 125 *
 126 *************************************************************************/
 127
 128/*
 129 * Use separate channels for TX and RX events
 130 *
 131 * Set this to 1 to use separate channels for TX and RX. It allows us
 132 * to control interrupt affinity separately for TX and RX.
 133 *
 134 * This is only used in MSI-X interrupt mode
 135 */
 136bool efx_separate_tx_channels;
 137module_param(efx_separate_tx_channels, bool, 0444);
 138MODULE_PARM_DESC(efx_separate_tx_channels,
 139		 "Use separate channels for TX and RX");
 140
 141/* This is the weight assigned to each of the (per-channel) virtual
 142 * NAPI devices.
 143 */
 144static int napi_weight = 64;
 145
 146/* This is the time (in jiffies) between invocations of the hardware
 147 * monitor.
 148 * On Falcon-based NICs, this will:
 149 * - Check the on-board hardware monitor;
 150 * - Poll the link state and reconfigure the hardware as necessary.
 151 * On Siena-based NICs for power systems with EEH support, this will give EEH a
 152 * chance to start.
 153 */
 154static unsigned int efx_monitor_interval = 1 * HZ;
 155
 156/* Initial interrupt moderation settings.  They can be modified after
 157 * module load with ethtool.
 158 *
 159 * The default for RX should strike a balance between increasing the
 160 * round-trip latency and reducing overhead.
 161 */
 162static unsigned int rx_irq_mod_usec = 60;
 163
 164/* Initial interrupt moderation settings.  They can be modified after
 165 * module load with ethtool.
 166 *
 167 * This default is chosen to ensure that a 10G link does not go idle
 168 * while a TX queue is stopped after it has become full.  A queue is
 169 * restarted when it drops below half full.  The time this takes (assuming
 170 * worst case 3 descriptors per packet and 1024 descriptors) is
 171 *   512 / 3 * 1.2 = 205 usec.
 172 */
 173static unsigned int tx_irq_mod_usec = 150;
 174
 175/* This is the first interrupt mode to try out of:
 176 * 0 => MSI-X
 177 * 1 => MSI
 178 * 2 => legacy
 179 */
 180static unsigned int interrupt_mode;
 181
 182/* This is the requested number of CPUs to use for Receive-Side Scaling (RSS),
 183 * i.e. the number of CPUs among which we may distribute simultaneous
 184 * interrupt handling.
 185 *
 186 * Cards without MSI-X will only target one CPU via legacy or MSI interrupt.
 187 * The default (0) means to assign an interrupt to each core.
 188 */
 189static unsigned int rss_cpus;
 190module_param(rss_cpus, uint, 0444);
 191MODULE_PARM_DESC(rss_cpus, "Number of CPUs to use for Receive-Side Scaling");
 192
 193static bool phy_flash_cfg;
 194module_param(phy_flash_cfg, bool, 0644);
 195MODULE_PARM_DESC(phy_flash_cfg, "Set PHYs into reflash mode initially");
 196
 197static unsigned irq_adapt_low_thresh = 8000;
 198module_param(irq_adapt_low_thresh, uint, 0644);
 199MODULE_PARM_DESC(irq_adapt_low_thresh,
 200		 "Threshold score for reducing IRQ moderation");
 201
 202static unsigned irq_adapt_high_thresh = 16000;
 203module_param(irq_adapt_high_thresh, uint, 0644);
 204MODULE_PARM_DESC(irq_adapt_high_thresh,
 205		 "Threshold score for increasing IRQ moderation");
 206
 207static unsigned debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE |
 208			 NETIF_MSG_LINK | NETIF_MSG_IFDOWN |
 209			 NETIF_MSG_IFUP | NETIF_MSG_RX_ERR |
 210			 NETIF_MSG_TX_ERR | NETIF_MSG_HW);
 211module_param(debug, uint, 0);
 212MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value");
 213
 214/**************************************************************************
 215 *
 216 * Utility functions and prototypes
 217 *
 218 *************************************************************************/
 219
 220static int efx_soft_enable_interrupts(struct efx_nic *efx);
 221static void efx_soft_disable_interrupts(struct efx_nic *efx);
 222static void efx_remove_channel(struct efx_channel *channel);
 223static void efx_remove_channels(struct efx_nic *efx);
 224static const struct efx_channel_type efx_default_channel_type;
 225static void efx_remove_port(struct efx_nic *efx);
 226static void efx_init_napi_channel(struct efx_channel *channel);
 227static void efx_fini_napi(struct efx_nic *efx);
 228static void efx_fini_napi_channel(struct efx_channel *channel);
 229static void efx_fini_struct(struct efx_nic *efx);
 230static void efx_start_all(struct efx_nic *efx);
 231static void efx_stop_all(struct efx_nic *efx);
 232
 233#define EFX_ASSERT_RESET_SERIALISED(efx)		\
 234	do {						\
 235		if ((efx->state == STATE_READY) ||	\
 236		    (efx->state == STATE_RECOVERY) ||	\
 237		    (efx->state == STATE_DISABLED))	\
 238			ASSERT_RTNL();			\
 239	} while (0)
 240
 241static int efx_check_disabled(struct efx_nic *efx)
 242{
 243	if (efx->state == STATE_DISABLED || efx->state == STATE_RECOVERY) {
 244		netif_err(efx, drv, efx->net_dev,
 245			  "device is disabled due to earlier errors\n");
 246		return -EIO;
 247	}
 248	return 0;
 249}
 250
 251/**************************************************************************
 252 *
 253 * Event queue processing
 254 *
 255 *************************************************************************/
 256
 257/* Process channel's event queue
 258 *
 259 * This function is responsible for processing the event queue of a
 260 * single channel.  The caller must guarantee that this function will
 261 * never be concurrently called more than once on the same channel,
 262 * though different channels may be being processed concurrently.
 263 */
 264static int efx_process_channel(struct efx_channel *channel, int budget)
 265{
 266	struct efx_tx_queue *tx_queue;
 267	int spent;
 268
 269	if (unlikely(!channel->enabled))
 270		return 0;
 271
 272	efx_for_each_channel_tx_queue(tx_queue, channel) {
 273		tx_queue->pkts_compl = 0;
 274		tx_queue->bytes_compl = 0;
 275	}
 276
 277	spent = efx_nic_process_eventq(channel, budget);
 278	if (spent && efx_channel_has_rx_queue(channel)) {
 279		struct efx_rx_queue *rx_queue =
 280			efx_channel_get_rx_queue(channel);
 281
 282		efx_rx_flush_packet(channel);
 283		efx_fast_push_rx_descriptors(rx_queue, true);
 284	}
 285
 286	/* Update BQL */
 287	efx_for_each_channel_tx_queue(tx_queue, channel) {
 288		if (tx_queue->bytes_compl) {
 289			netdev_tx_completed_queue(tx_queue->core_txq,
 290				tx_queue->pkts_compl, tx_queue->bytes_compl);
 291		}
 292	}
 293
 294	return spent;
 295}
 296
 297/* NAPI poll handler
 298 *
 299 * NAPI guarantees serialisation of polls of the same device, which
 300 * provides the guarantee required by efx_process_channel().
 301 */
 302static void efx_update_irq_mod(struct efx_nic *efx, struct efx_channel *channel)
 303{
 304	int step = efx->irq_mod_step_us;
 305
 306	if (channel->irq_mod_score < irq_adapt_low_thresh) {
 307		if (channel->irq_moderation_us > step) {
 308			channel->irq_moderation_us -= step;
 309			efx->type->push_irq_moderation(channel);
 310		}
 311	} else if (channel->irq_mod_score > irq_adapt_high_thresh) {
 312		if (channel->irq_moderation_us <
 313		    efx->irq_rx_moderation_us) {
 314			channel->irq_moderation_us += step;
 315			efx->type->push_irq_moderation(channel);
 316		}
 317	}
 318
 319	channel->irq_count = 0;
 320	channel->irq_mod_score = 0;
 321}
 322
 323static int efx_poll(struct napi_struct *napi, int budget)
 324{
 325	struct efx_channel *channel =
 326		container_of(napi, struct efx_channel, napi_str);
 327	struct efx_nic *efx = channel->efx;
 328	int spent;
 329
 330	netif_vdbg(efx, intr, efx->net_dev,
 331		   "channel %d NAPI poll executing on CPU %d\n",
 332		   channel->channel, raw_smp_processor_id());
 333
 334	spent = efx_process_channel(channel, budget);
 335
 336	if (spent < budget) {
 337		if (efx_channel_has_rx_queue(channel) &&
 338		    efx->irq_rx_adaptive &&
 339		    unlikely(++channel->irq_count == 1000)) {
 340			efx_update_irq_mod(efx, channel);
 341		}
 342
 343#ifdef CONFIG_RFS_ACCEL
 344		/* Perhaps expire some ARFS filters */
 345		schedule_work(&channel->filter_work);
 346#endif
 347
 348		/* There is no race here; although napi_disable() will
 349		 * only wait for napi_complete(), this isn't a problem
 350		 * since efx_nic_eventq_read_ack() will have no effect if
 351		 * interrupts have already been disabled.
 352		 */
 353		if (napi_complete_done(napi, spent))
 354			efx_nic_eventq_read_ack(channel);
 355	}
 356
 357	return spent;
 358}
 359
 360/* Create event queue
 361 * Event queue memory allocations are done only once.  If the channel
 362 * is reset, the memory buffer will be reused; this guards against
 363 * errors during channel reset and also simplifies interrupt handling.
 364 */
 365static int efx_probe_eventq(struct efx_channel *channel)
 366{
 367	struct efx_nic *efx = channel->efx;
 368	unsigned long entries;
 369
 370	netif_dbg(efx, probe, efx->net_dev,
 371		  "chan %d create event queue\n", channel->channel);
 372
 373	/* Build an event queue with room for one event per tx and rx buffer,
 374	 * plus some extra for link state events and MCDI completions. */
 375	entries = roundup_pow_of_two(efx->rxq_entries + efx->txq_entries + 128);
 376	EFX_WARN_ON_PARANOID(entries > EFX_MAX_EVQ_SIZE);
 377	channel->eventq_mask = max(entries, EFX_MIN_EVQ_SIZE) - 1;
 378
 379	return efx_nic_probe_eventq(channel);
 380}
 381
 382/* Prepare channel's event queue */
 383static int efx_init_eventq(struct efx_channel *channel)
 384{
 385	struct efx_nic *efx = channel->efx;
 386	int rc;
 387
 388	EFX_WARN_ON_PARANOID(channel->eventq_init);
 389
 390	netif_dbg(efx, drv, efx->net_dev,
 391		  "chan %d init event queue\n", channel->channel);
 392
 393	rc = efx_nic_init_eventq(channel);
 394	if (rc == 0) {
 395		efx->type->push_irq_moderation(channel);
 396		channel->eventq_read_ptr = 0;
 397		channel->eventq_init = true;
 398	}
 399	return rc;
 400}
 401
 402/* Enable event queue processing and NAPI */
 403void efx_start_eventq(struct efx_channel *channel)
 404{
 405	netif_dbg(channel->efx, ifup, channel->efx->net_dev,
 406		  "chan %d start event queue\n", channel->channel);
 407
 408	/* Make sure the NAPI handler sees the enabled flag set */
 409	channel->enabled = true;
 410	smp_wmb();
 411
 412	napi_enable(&channel->napi_str);
 413	efx_nic_eventq_read_ack(channel);
 414}
 415
 416/* Disable event queue processing and NAPI */
 417void efx_stop_eventq(struct efx_channel *channel)
 418{
 419	if (!channel->enabled)
 420		return;
 421
 422	napi_disable(&channel->napi_str);
 423	channel->enabled = false;
 424}
 425
 426static void efx_fini_eventq(struct efx_channel *channel)
 427{
 428	if (!channel->eventq_init)
 429		return;
 430
 431	netif_dbg(channel->efx, drv, channel->efx->net_dev,
 432		  "chan %d fini event queue\n", channel->channel);
 433
 434	efx_nic_fini_eventq(channel);
 435	channel->eventq_init = false;
 436}
 437
 438static void efx_remove_eventq(struct efx_channel *channel)
 439{
 440	netif_dbg(channel->efx, drv, channel->efx->net_dev,
 441		  "chan %d remove event queue\n", channel->channel);
 442
 443	efx_nic_remove_eventq(channel);
 444}
 445
 446/**************************************************************************
 447 *
 448 * Channel handling
 449 *
 450 *************************************************************************/
 451
 452/* Allocate and initialise a channel structure. */
 453static struct efx_channel *
 454efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel)
 455{
 456	struct efx_channel *channel;
 457	struct efx_rx_queue *rx_queue;
 458	struct efx_tx_queue *tx_queue;
 459	int j;
 460
 461	channel = kzalloc(sizeof(*channel), GFP_KERNEL);
 462	if (!channel)
 463		return NULL;
 464
 465	channel->efx = efx;
 466	channel->channel = i;
 467	channel->type = &efx_default_channel_type;
 468
 469	for (j = 0; j < EFX_TXQ_TYPES; j++) {
 470		tx_queue = &channel->tx_queue[j];
 471		tx_queue->efx = efx;
 472		tx_queue->queue = i * EFX_TXQ_TYPES + j;
 473		tx_queue->channel = channel;
 474	}
 475
 476#ifdef CONFIG_RFS_ACCEL
 477	INIT_WORK(&channel->filter_work, efx_filter_rfs_expire);
 478#endif
 479
 480	rx_queue = &channel->rx_queue;
 481	rx_queue->efx = efx;
 482	timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
 483
 484	return channel;
 485}
 486
 487/* Allocate and initialise a channel structure, copying parameters
 488 * (but not resources) from an old channel structure.
 489 */
 490static struct efx_channel *
 491efx_copy_channel(const struct efx_channel *old_channel)
 492{
 493	struct efx_channel *channel;
 494	struct efx_rx_queue *rx_queue;
 495	struct efx_tx_queue *tx_queue;
 496	int j;
 497
 498	channel = kmalloc(sizeof(*channel), GFP_KERNEL);
 499	if (!channel)
 500		return NULL;
 501
 502	*channel = *old_channel;
 503
 504	channel->napi_dev = NULL;
 505	INIT_HLIST_NODE(&channel->napi_str.napi_hash_node);
 506	channel->napi_str.napi_id = 0;
 507	channel->napi_str.state = 0;
 508	memset(&channel->eventq, 0, sizeof(channel->eventq));
 509
 510	for (j = 0; j < EFX_TXQ_TYPES; j++) {
 511		tx_queue = &channel->tx_queue[j];
 512		if (tx_queue->channel)
 513			tx_queue->channel = channel;
 514		tx_queue->buffer = NULL;
 515		memset(&tx_queue->txd, 0, sizeof(tx_queue->txd));
 516	}
 517
 518	rx_queue = &channel->rx_queue;
 519	rx_queue->buffer = NULL;
 520	memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd));
 521	timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
 522#ifdef CONFIG_RFS_ACCEL
 523	INIT_WORK(&channel->filter_work, efx_filter_rfs_expire);
 524#endif
 525
 526	return channel;
 527}
 528
 529static int efx_probe_channel(struct efx_channel *channel)
 530{
 531	struct efx_tx_queue *tx_queue;
 532	struct efx_rx_queue *rx_queue;
 533	int rc;
 534
 535	netif_dbg(channel->efx, probe, channel->efx->net_dev,
 536		  "creating channel %d\n", channel->channel);
 537
 538	rc = channel->type->pre_probe(channel);
 539	if (rc)
 540		goto fail;
 541
 542	rc = efx_probe_eventq(channel);
 543	if (rc)
 544		goto fail;
 545
 546	efx_for_each_channel_tx_queue(tx_queue, channel) {
 547		rc = efx_probe_tx_queue(tx_queue);
 548		if (rc)
 549			goto fail;
 550	}
 551
 552	efx_for_each_channel_rx_queue(rx_queue, channel) {
 553		rc = efx_probe_rx_queue(rx_queue);
 554		if (rc)
 555			goto fail;
 556	}
 557
 558	return 0;
 559
 560fail:
 561	efx_remove_channel(channel);
 562	return rc;
 563}
 564
 565static void
 566efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len)
 567{
 568	struct efx_nic *efx = channel->efx;
 569	const char *type;
 570	int number;
 571
 572	number = channel->channel;
 573	if (efx->tx_channel_offset == 0) {
 574		type = "";
 575	} else if (channel->channel < efx->tx_channel_offset) {
 576		type = "-rx";
 577	} else {
 578		type = "-tx";
 579		number -= efx->tx_channel_offset;
 580	}
 581	snprintf(buf, len, "%s%s-%d", efx->name, type, number);
 582}
 583
 584static void efx_set_channel_names(struct efx_nic *efx)
 585{
 586	struct efx_channel *channel;
 587
 588	efx_for_each_channel(channel, efx)
 589		channel->type->get_name(channel,
 590					efx->msi_context[channel->channel].name,
 591					sizeof(efx->msi_context[0].name));
 592}
 593
 594static int efx_probe_channels(struct efx_nic *efx)
 595{
 596	struct efx_channel *channel;
 597	int rc;
 598
 599	/* Restart special buffer allocation */
 600	efx->next_buffer_table = 0;
 601
 602	/* Probe channels in reverse, so that any 'extra' channels
 603	 * use the start of the buffer table. This allows the traffic
 604	 * channels to be resized without moving them or wasting the
 605	 * entries before them.
 606	 */
 607	efx_for_each_channel_rev(channel, efx) {
 608		rc = efx_probe_channel(channel);
 609		if (rc) {
 610			netif_err(efx, probe, efx->net_dev,
 611				  "failed to create channel %d\n",
 612				  channel->channel);
 613			goto fail;
 614		}
 615	}
 616	efx_set_channel_names(efx);
 617
 618	return 0;
 619
 620fail:
 621	efx_remove_channels(efx);
 622	return rc;
 623}
 624
 625/* Channels are shutdown and reinitialised whilst the NIC is running
 626 * to propagate configuration changes (mtu, checksum offload), or
 627 * to clear hardware error conditions
 628 */
 629static void efx_start_datapath(struct efx_nic *efx)
 630{
 631	netdev_features_t old_features = efx->net_dev->features;
 632	bool old_rx_scatter = efx->rx_scatter;
 633	struct efx_tx_queue *tx_queue;
 634	struct efx_rx_queue *rx_queue;
 635	struct efx_channel *channel;
 636	size_t rx_buf_len;
 637
 638	/* Calculate the rx buffer allocation parameters required to
 639	 * support the current MTU, including padding for header
 640	 * alignment and overruns.
 641	 */
 642	efx->rx_dma_len = (efx->rx_prefix_size +
 643			   EFX_MAX_FRAME_LEN(efx->net_dev->mtu) +
 644			   efx->type->rx_buffer_padding);
 645	rx_buf_len = (sizeof(struct efx_rx_page_state) +
 646		      efx->rx_ip_align + efx->rx_dma_len);
 647	if (rx_buf_len <= PAGE_SIZE) {
 648		efx->rx_scatter = efx->type->always_rx_scatter;
 649		efx->rx_buffer_order = 0;
 650	} else if (efx->type->can_rx_scatter) {
 651		BUILD_BUG_ON(EFX_RX_USR_BUF_SIZE % L1_CACHE_BYTES);
 652		BUILD_BUG_ON(sizeof(struct efx_rx_page_state) +
 653			     2 * ALIGN(NET_IP_ALIGN + EFX_RX_USR_BUF_SIZE,
 654				       EFX_RX_BUF_ALIGNMENT) >
 655			     PAGE_SIZE);
 656		efx->rx_scatter = true;
 657		efx->rx_dma_len = EFX_RX_USR_BUF_SIZE;
 658		efx->rx_buffer_order = 0;
 659	} else {
 660		efx->rx_scatter = false;
 661		efx->rx_buffer_order = get_order(rx_buf_len);
 662	}
 663
 664	efx_rx_config_page_split(efx);
 665	if (efx->rx_buffer_order)
 666		netif_dbg(efx, drv, efx->net_dev,
 667			  "RX buf len=%u; page order=%u batch=%u\n",
 668			  efx->rx_dma_len, efx->rx_buffer_order,
 669			  efx->rx_pages_per_batch);
 670	else
 671		netif_dbg(efx, drv, efx->net_dev,
 672			  "RX buf len=%u step=%u bpp=%u; page batch=%u\n",
 673			  efx->rx_dma_len, efx->rx_page_buf_step,
 674			  efx->rx_bufs_per_page, efx->rx_pages_per_batch);
 675
 676	/* Restore previously fixed features in hw_features and remove
 677	 * features which are fixed now
 678	 */
 679	efx->net_dev->hw_features |= efx->net_dev->features;
 680	efx->net_dev->hw_features &= ~efx->fixed_features;
 681	efx->net_dev->features |= efx->fixed_features;
 682	if (efx->net_dev->features != old_features)
 683		netdev_features_change(efx->net_dev);
 684
 685	/* RX filters may also have scatter-enabled flags */
 686	if (efx->rx_scatter != old_rx_scatter)
 687		efx->type->filter_update_rx_scatter(efx);
 688
 689	/* We must keep at least one descriptor in a TX ring empty.
 690	 * We could avoid this when the queue size does not exactly
 691	 * match the hardware ring size, but it's not that important.
 692	 * Therefore we stop the queue when one more skb might fill
 693	 * the ring completely.  We wake it when half way back to
 694	 * empty.
 695	 */
 696	efx->txq_stop_thresh = efx->txq_entries - efx_tx_max_skb_descs(efx);
 697	efx->txq_wake_thresh = efx->txq_stop_thresh / 2;
 698
 699	/* Initialise the channels */
 700	efx_for_each_channel(channel, efx) {
 701		efx_for_each_channel_tx_queue(tx_queue, channel) {
 702			efx_init_tx_queue(tx_queue);
 703			atomic_inc(&efx->active_queues);
 704		}
 705
 706		efx_for_each_channel_rx_queue(rx_queue, channel) {
 707			efx_init_rx_queue(rx_queue);
 708			atomic_inc(&efx->active_queues);
 709			efx_stop_eventq(channel);
 710			efx_fast_push_rx_descriptors(rx_queue, false);
 711			efx_start_eventq(channel);
 712		}
 713
 714		WARN_ON(channel->rx_pkt_n_frags);
 715	}
 716
 717	efx_ptp_start_datapath(efx);
 718
 719	if (netif_device_present(efx->net_dev))
 720		netif_tx_wake_all_queues(efx->net_dev);
 721}
 722
 723static void efx_stop_datapath(struct efx_nic *efx)
 724{
 725	struct efx_channel *channel;
 726	struct efx_tx_queue *tx_queue;
 727	struct efx_rx_queue *rx_queue;
 728	int rc;
 729
 730	EFX_ASSERT_RESET_SERIALISED(efx);
 731	BUG_ON(efx->port_enabled);
 732
 733	efx_ptp_stop_datapath(efx);
 734
 735	/* Stop RX refill */
 736	efx_for_each_channel(channel, efx) {
 737		efx_for_each_channel_rx_queue(rx_queue, channel)
 738			rx_queue->refill_enabled = false;
 739	}
 740
 741	efx_for_each_channel(channel, efx) {
 742		/* RX packet processing is pipelined, so wait for the
 743		 * NAPI handler to complete.  At least event queue 0
 744		 * might be kept active by non-data events, so don't
 745		 * use napi_synchronize() but actually disable NAPI
 746		 * temporarily.
 747		 */
 748		if (efx_channel_has_rx_queue(channel)) {
 749			efx_stop_eventq(channel);
 750			efx_start_eventq(channel);
 751		}
 752	}
 753
 754	rc = efx->type->fini_dmaq(efx);
 755	if (rc) {
 756		netif_err(efx, drv, efx->net_dev, "failed to flush queues\n");
 757	} else {
 758		netif_dbg(efx, drv, efx->net_dev,
 759			  "successfully flushed all queues\n");
 760	}
 761
 762	efx_for_each_channel(channel, efx) {
 763		efx_for_each_channel_rx_queue(rx_queue, channel)
 764			efx_fini_rx_queue(rx_queue);
 765		efx_for_each_possible_channel_tx_queue(tx_queue, channel)
 766			efx_fini_tx_queue(tx_queue);
 767	}
 768}
 769
 770static void efx_remove_channel(struct efx_channel *channel)
 771{
 772	struct efx_tx_queue *tx_queue;
 773	struct efx_rx_queue *rx_queue;
 774
 775	netif_dbg(channel->efx, drv, channel->efx->net_dev,
 776		  "destroy chan %d\n", channel->channel);
 777
 778	efx_for_each_channel_rx_queue(rx_queue, channel)
 779		efx_remove_rx_queue(rx_queue);
 780	efx_for_each_possible_channel_tx_queue(tx_queue, channel)
 781		efx_remove_tx_queue(tx_queue);
 782	efx_remove_eventq(channel);
 783	channel->type->post_remove(channel);
 784}
 785
 786static void efx_remove_channels(struct efx_nic *efx)
 787{
 788	struct efx_channel *channel;
 789
 790	efx_for_each_channel(channel, efx)
 791		efx_remove_channel(channel);
 792}
 793
 794int
 795efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
 796{
 797	struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel;
 798	u32 old_rxq_entries, old_txq_entries;
 799	unsigned i, next_buffer_table = 0;
 800	int rc, rc2;
 801
 802	rc = efx_check_disabled(efx);
 803	if (rc)
 804		return rc;
 805
 806	/* Not all channels should be reallocated. We must avoid
 807	 * reallocating their buffer table entries.
 808	 */
 809	efx_for_each_channel(channel, efx) {
 810		struct efx_rx_queue *rx_queue;
 811		struct efx_tx_queue *tx_queue;
 812
 813		if (channel->type->copy)
 814			continue;
 815		next_buffer_table = max(next_buffer_table,
 816					channel->eventq.index +
 817					channel->eventq.entries);
 818		efx_for_each_channel_rx_queue(rx_queue, channel)
 819			next_buffer_table = max(next_buffer_table,
 820						rx_queue->rxd.index +
 821						rx_queue->rxd.entries);
 822		efx_for_each_channel_tx_queue(tx_queue, channel)
 823			next_buffer_table = max(next_buffer_table,
 824						tx_queue->txd.index +
 825						tx_queue->txd.entries);
 826	}
 827
 828	efx_device_detach_sync(efx);
 829	efx_stop_all(efx);
 830	efx_soft_disable_interrupts(efx);
 831
 832	/* Clone channels (where possible) */
 833	memset(other_channel, 0, sizeof(other_channel));
 834	for (i = 0; i < efx->n_channels; i++) {
 835		channel = efx->channel[i];
 836		if (channel->type->copy)
 837			channel = channel->type->copy(channel);
 838		if (!channel) {
 839			rc = -ENOMEM;
 840			goto out;
 841		}
 842		other_channel[i] = channel;
 843	}
 844
 845	/* Swap entry counts and channel pointers */
 846	old_rxq_entries = efx->rxq_entries;
 847	old_txq_entries = efx->txq_entries;
 848	efx->rxq_entries = rxq_entries;
 849	efx->txq_entries = txq_entries;
 850	for (i = 0; i < efx->n_channels; i++) {
 851		channel = efx->channel[i];
 852		efx->channel[i] = other_channel[i];
 853		other_channel[i] = channel;
 854	}
 855
 856	/* Restart buffer table allocation */
 857	efx->next_buffer_table = next_buffer_table;
 858
 859	for (i = 0; i < efx->n_channels; i++) {
 860		channel = efx->channel[i];
 861		if (!channel->type->copy)
 862			continue;
 863		rc = efx_probe_channel(channel);
 864		if (rc)
 865			goto rollback;
 866		efx_init_napi_channel(efx->channel[i]);
 867	}
 868
 869out:
 870	/* Destroy unused channel structures */
 871	for (i = 0; i < efx->n_channels; i++) {
 872		channel = other_channel[i];
 873		if (channel && channel->type->copy) {
 874			efx_fini_napi_channel(channel);
 875			efx_remove_channel(channel);
 876			kfree(channel);
 877		}
 878	}
 879
 880	rc2 = efx_soft_enable_interrupts(efx);
 881	if (rc2) {
 882		rc = rc ? rc : rc2;
 883		netif_err(efx, drv, efx->net_dev,
 884			  "unable to restart interrupts on channel reallocation\n");
 885		efx_schedule_reset(efx, RESET_TYPE_DISABLE);
 886	} else {
 887		efx_start_all(efx);
 888		efx_device_attach_if_not_resetting(efx);
 889	}
 890	return rc;
 891
 892rollback:
 893	/* Swap back */
 894	efx->rxq_entries = old_rxq_entries;
 895	efx->txq_entries = old_txq_entries;
 896	for (i = 0; i < efx->n_channels; i++) {
 897		channel = efx->channel[i];
 898		efx->channel[i] = other_channel[i];
 899		other_channel[i] = channel;
 900	}
 901	goto out;
 902}
 903
 904void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue)
 905{
 906	mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(100));
 907}
 908
 909static bool efx_default_channel_want_txqs(struct efx_channel *channel)
 910{
 911	return channel->channel - channel->efx->tx_channel_offset <
 912		channel->efx->n_tx_channels;
 913}
 914
 915static const struct efx_channel_type efx_default_channel_type = {
 916	.pre_probe		= efx_channel_dummy_op_int,
 917	.post_remove		= efx_channel_dummy_op_void,
 918	.get_name		= efx_get_channel_name,
 919	.copy			= efx_copy_channel,
 920	.want_txqs		= efx_default_channel_want_txqs,
 921	.keep_eventq		= false,
 922	.want_pio		= true,
 923};
 924
 925int efx_channel_dummy_op_int(struct efx_channel *channel)
 926{
 927	return 0;
 928}
 929
 930void efx_channel_dummy_op_void(struct efx_channel *channel)
 931{
 932}
 933
 934/**************************************************************************
 935 *
 936 * Port handling
 937 *
 938 **************************************************************************/
 939
 940/* This ensures that the kernel is kept informed (via
 941 * netif_carrier_on/off) of the link status, and also maintains the
 942 * link status's stop on the port's TX queue.
 943 */
 944void efx_link_status_changed(struct efx_nic *efx)
 945{
 946	struct efx_link_state *link_state = &efx->link_state;
 947
 948	/* SFC Bug 5356: A net_dev notifier is registered, so we must ensure
 949	 * that no events are triggered between unregister_netdev() and the
 950	 * driver unloading. A more general condition is that NETDEV_CHANGE
 951	 * can only be generated between NETDEV_UP and NETDEV_DOWN */
 952	if (!netif_running(efx->net_dev))
 953		return;
 954
 955	if (link_state->up != netif_carrier_ok(efx->net_dev)) {
 956		efx->n_link_state_changes++;
 957
 958		if (link_state->up)
 959			netif_carrier_on(efx->net_dev);
 960		else
 961			netif_carrier_off(efx->net_dev);
 962	}
 963
 964	/* Status message for kernel log */
 965	if (link_state->up)
 966		netif_info(efx, link, efx->net_dev,
 967			   "link up at %uMbps %s-duplex (MTU %d)\n",
 968			   link_state->speed, link_state->fd ? "full" : "half",
 969			   efx->net_dev->mtu);
 970	else
 971		netif_info(efx, link, efx->net_dev, "link down\n");
 972}
 973
 974void efx_link_set_advertising(struct efx_nic *efx,
 975			      const unsigned long *advertising)
 976{
 977	memcpy(efx->link_advertising, advertising,
 978	       sizeof(__ETHTOOL_DECLARE_LINK_MODE_MASK()));
 979
 980	efx->link_advertising[0] |= ADVERTISED_Autoneg;
 981	if (advertising[0] & ADVERTISED_Pause)
 982		efx->wanted_fc |= (EFX_FC_TX | EFX_FC_RX);
 983	else
 984		efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX);
 985	if (advertising[0] & ADVERTISED_Asym_Pause)
 986		efx->wanted_fc ^= EFX_FC_TX;
 987}
 988
 989/* Equivalent to efx_link_set_advertising with all-zeroes, except does not
 990 * force the Autoneg bit on.
 991 */
 992void efx_link_clear_advertising(struct efx_nic *efx)
 993{
 994	bitmap_zero(efx->link_advertising, __ETHTOOL_LINK_MODE_MASK_NBITS);
 995	efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX);
 996}
 997
 998void efx_link_set_wanted_fc(struct efx_nic *efx, u8 wanted_fc)
 999{
1000	efx->wanted_fc = wanted_fc;
1001	if (efx->link_advertising[0]) {
1002		if (wanted_fc & EFX_FC_RX)
1003			efx->link_advertising[0] |= (ADVERTISED_Pause |
1004						     ADVERTISED_Asym_Pause);
1005		else
1006			efx->link_advertising[0] &= ~(ADVERTISED_Pause |
1007						      ADVERTISED_Asym_Pause);
1008		if (wanted_fc & EFX_FC_TX)
1009			efx->link_advertising[0] ^= ADVERTISED_Asym_Pause;
1010	}
1011}
1012
1013static void efx_fini_port(struct efx_nic *efx);
1014
1015/* We assume that efx->type->reconfigure_mac will always try to sync RX
1016 * filters and therefore needs to read-lock the filter table against freeing
1017 */
1018void efx_mac_reconfigure(struct efx_nic *efx)
1019{
1020	down_read(&efx->filter_sem);
1021	efx->type->reconfigure_mac(efx);
1022	up_read(&efx->filter_sem);
1023}
1024
1025/* Push loopback/power/transmit disable settings to the PHY, and reconfigure
1026 * the MAC appropriately. All other PHY configuration changes are pushed
1027 * through phy_op->set_settings(), and pushed asynchronously to the MAC
1028 * through efx_monitor().
1029 *
1030 * Callers must hold the mac_lock
1031 */
1032int __efx_reconfigure_port(struct efx_nic *efx)
1033{
1034	enum efx_phy_mode phy_mode;
1035	int rc;
1036
1037	WARN_ON(!mutex_is_locked(&efx->mac_lock));
1038
1039	/* Disable PHY transmit in mac level loopbacks */
1040	phy_mode = efx->phy_mode;
1041	if (LOOPBACK_INTERNAL(efx))
1042		efx->phy_mode |= PHY_MODE_TX_DISABLED;
1043	else
1044		efx->phy_mode &= ~PHY_MODE_TX_DISABLED;
1045
1046	rc = efx->type->reconfigure_port(efx);
1047
1048	if (rc)
1049		efx->phy_mode = phy_mode;
1050
1051	return rc;
1052}
1053
1054/* Reinitialise the MAC to pick up new PHY settings, even if the port is
1055 * disabled. */
1056int efx_reconfigure_port(struct efx_nic *efx)
1057{
1058	int rc;
1059
1060	EFX_ASSERT_RESET_SERIALISED(efx);
1061
1062	mutex_lock(&efx->mac_lock);
1063	rc = __efx_reconfigure_port(efx);
1064	mutex_unlock(&efx->mac_lock);
1065
1066	return rc;
1067}
1068
1069/* Asynchronous work item for changing MAC promiscuity and multicast
1070 * hash.  Avoid a drain/rx_ingress enable by reconfiguring the current
1071 * MAC directly. */
1072static void efx_mac_work(struct work_struct *data)
1073{
1074	struct efx_nic *efx = container_of(data, struct efx_nic, mac_work);
1075
1076	mutex_lock(&efx->mac_lock);
1077	if (efx->port_enabled)
1078		efx_mac_reconfigure(efx);
1079	mutex_unlock(&efx->mac_lock);
1080}
1081
1082static int efx_probe_port(struct efx_nic *efx)
1083{
1084	int rc;
1085
1086	netif_dbg(efx, probe, efx->net_dev, "create port\n");
1087
1088	if (phy_flash_cfg)
1089		efx->phy_mode = PHY_MODE_SPECIAL;
1090
1091	/* Connect up MAC/PHY operations table */
1092	rc = efx->type->probe_port(efx);
1093	if (rc)
1094		return rc;
1095
1096	/* Initialise MAC address to permanent address */
1097	ether_addr_copy(efx->net_dev->dev_addr, efx->net_dev->perm_addr);
1098
1099	return 0;
1100}
1101
1102static int efx_init_port(struct efx_nic *efx)
1103{
1104	int rc;
1105
1106	netif_dbg(efx, drv, efx->net_dev, "init port\n");
1107
1108	mutex_lock(&efx->mac_lock);
1109
1110	rc = efx->phy_op->init(efx);
1111	if (rc)
1112		goto fail1;
1113
1114	efx->port_initialized = true;
1115
1116	/* Reconfigure the MAC before creating dma queues (required for
1117	 * Falcon/A1 where RX_INGR_EN/TX_DRAIN_EN isn't supported) */
1118	efx_mac_reconfigure(efx);
1119
1120	/* Ensure the PHY advertises the correct flow control settings */
1121	rc = efx->phy_op->reconfigure(efx);
1122	if (rc && rc != -EPERM)
1123		goto fail2;
1124
1125	mutex_unlock(&efx->mac_lock);
1126	return 0;
1127
1128fail2:
1129	efx->phy_op->fini(efx);
1130fail1:
1131	mutex_unlock(&efx->mac_lock);
1132	return rc;
1133}
1134
1135static void efx_start_port(struct efx_nic *efx)
1136{
1137	netif_dbg(efx, ifup, efx->net_dev, "start port\n");
1138	BUG_ON(efx->port_enabled);
1139
1140	mutex_lock(&efx->mac_lock);
1141	efx->port_enabled = true;
1142
1143	/* Ensure MAC ingress/egress is enabled */
1144	efx_mac_reconfigure(efx);
1145
1146	mutex_unlock(&efx->mac_lock);
1147}
1148
1149/* Cancel work for MAC reconfiguration, periodic hardware monitoring
1150 * and the async self-test, wait for them to finish and prevent them
1151 * being scheduled again.  This doesn't cover online resets, which
1152 * should only be cancelled when removing the device.
1153 */
1154static void efx_stop_port(struct efx_nic *efx)
1155{
1156	netif_dbg(efx, ifdown, efx->net_dev, "stop port\n");
1157
1158	EFX_ASSERT_RESET_SERIALISED(efx);
1159
1160	mutex_lock(&efx->mac_lock);
1161	efx->port_enabled = false;
1162	mutex_unlock(&efx->mac_lock);
1163
1164	/* Serialise against efx_set_multicast_list() */
1165	netif_addr_lock_bh(efx->net_dev);
1166	netif_addr_unlock_bh(efx->net_dev);
1167
1168	cancel_delayed_work_sync(&efx->monitor_work);
1169	efx_selftest_async_cancel(efx);
1170	cancel_work_sync(&efx->mac_work);
1171}
1172
1173static void efx_fini_port(struct efx_nic *efx)
1174{
1175	netif_dbg(efx, drv, efx->net_dev, "shut down port\n");
1176
1177	if (!efx->port_initialized)
1178		return;
1179
1180	efx->phy_op->fini(efx);
1181	efx->port_initialized = false;
1182
1183	efx->link_state.up = false;
1184	efx_link_status_changed(efx);
1185}
1186
1187static void efx_remove_port(struct efx_nic *efx)
1188{
1189	netif_dbg(efx, drv, efx->net_dev, "destroying port\n");
1190
1191	efx->type->remove_port(efx);
1192}
1193
1194/**************************************************************************
1195 *
1196 * NIC handling
1197 *
1198 **************************************************************************/
1199
1200static LIST_HEAD(efx_primary_list);
1201static LIST_HEAD(efx_unassociated_list);
1202
1203static bool efx_same_controller(struct efx_nic *left, struct efx_nic *right)
1204{
1205	return left->type == right->type &&
1206		left->vpd_sn && right->vpd_sn &&
1207		!strcmp(left->vpd_sn, right->vpd_sn);
1208}
1209
1210static void efx_associate(struct efx_nic *efx)
1211{
1212	struct efx_nic *other, *next;
1213
1214	if (efx->primary == efx) {
1215		/* Adding primary function; look for secondaries */
1216
1217		netif_dbg(efx, probe, efx->net_dev, "adding to primary list\n");
1218		list_add_tail(&efx->node, &efx_primary_list);
1219
1220		list_for_each_entry_safe(other, next, &efx_unassociated_list,
1221					 node) {
1222			if (efx_same_controller(efx, other)) {
1223				list_del(&other->node);
1224				netif_dbg(other, probe, other->net_dev,
1225					  "moving to secondary list of %s %s\n",
1226					  pci_name(efx->pci_dev),
1227					  efx->net_dev->name);
1228				list_add_tail(&other->node,
1229					      &efx->secondary_list);
1230				other->primary = efx;
1231			}
1232		}
1233	} else {
1234		/* Adding secondary function; look for primary */
1235
1236		list_for_each_entry(other, &efx_primary_list, node) {
1237			if (efx_same_controller(efx, other)) {
1238				netif_dbg(efx, probe, efx->net_dev,
1239					  "adding to secondary list of %s %s\n",
1240					  pci_name(other->pci_dev),
1241					  other->net_dev->name);
1242				list_add_tail(&efx->node,
1243					      &other->secondary_list);
1244				efx->primary = other;
1245				return;
1246			}
1247		}
1248
1249		netif_dbg(efx, probe, efx->net_dev,
1250			  "adding to unassociated list\n");
1251		list_add_tail(&efx->node, &efx_unassociated_list);
1252	}
1253}
1254
1255static void efx_dissociate(struct efx_nic *efx)
1256{
1257	struct efx_nic *other, *next;
1258
1259	list_del(&efx->node);
1260	efx->primary = NULL;
1261
1262	list_for_each_entry_safe(other, next, &efx->secondary_list, node) {
1263		list_del(&other->node);
1264		netif_dbg(other, probe, other->net_dev,
1265			  "moving to unassociated list\n");
1266		list_add_tail(&other->node, &efx_unassociated_list);
1267		other->primary = NULL;
1268	}
1269}
1270
1271/* This configures the PCI device to enable I/O and DMA. */
1272static int efx_init_io(struct efx_nic *efx)
1273{
1274	struct pci_dev *pci_dev = efx->pci_dev;
1275	dma_addr_t dma_mask = efx->type->max_dma_mask;
1276	unsigned int mem_map_size = efx->type->mem_map_size(efx);
1277	int rc, bar;
1278
1279	netif_dbg(efx, probe, efx->net_dev, "initialising I/O\n");
1280
1281	bar = efx->type->mem_bar(efx);
1282
1283	rc = pci_enable_device(pci_dev);
1284	if (rc) {
1285		netif_err(efx, probe, efx->net_dev,
1286			  "failed to enable PCI device\n");
1287		goto fail1;
1288	}
1289
1290	pci_set_master(pci_dev);
1291
1292	/* Set the PCI DMA mask.  Try all possibilities from our
1293	 * genuine mask down to 32 bits, because some architectures
1294	 * (e.g. x86_64 with iommu_sac_force set) will allow 40 bit
1295	 * masks event though they reject 46 bit masks.
1296	 */
1297	while (dma_mask > 0x7fffffffUL) {
1298		rc = dma_set_mask_and_coherent(&pci_dev->dev, dma_mask);
1299		if (rc == 0)
1300			break;
1301		dma_mask >>= 1;
1302	}
1303	if (rc) {
1304		netif_err(efx, probe, efx->net_dev,
1305			  "could not find a suitable DMA mask\n");
1306		goto fail2;
1307	}
1308	netif_dbg(efx, probe, efx->net_dev,
1309		  "using DMA mask %llx\n", (unsigned long long) dma_mask);
1310
1311	efx->membase_phys = pci_resource_start(efx->pci_dev, bar);
1312	rc = pci_request_region(pci_dev, bar, "sfc");
1313	if (rc) {
1314		netif_err(efx, probe, efx->net_dev,
1315			  "request for memory BAR failed\n");
1316		rc = -EIO;
1317		goto fail3;
1318	}
1319	efx->membase = ioremap_nocache(efx->membase_phys, mem_map_size);
1320	if (!efx->membase) {
1321		netif_err(efx, probe, efx->net_dev,
1322			  "could not map memory BAR at %llx+%x\n",
1323			  (unsigned long long)efx->membase_phys, mem_map_size);
1324		rc = -ENOMEM;
1325		goto fail4;
1326	}
1327	netif_dbg(efx, probe, efx->net_dev,
1328		  "memory BAR at %llx+%x (virtual %p)\n",
1329		  (unsigned long long)efx->membase_phys, mem_map_size,
1330		  efx->membase);
1331
1332	return 0;
1333
1334 fail4:
1335	pci_release_region(efx->pci_dev, bar);
1336 fail3:
1337	efx->membase_phys = 0;
1338 fail2:
1339	pci_disable_device(efx->pci_dev);
1340 fail1:
1341	return rc;
1342}
1343
1344static void efx_fini_io(struct efx_nic *efx)
1345{
1346	int bar;
1347
1348	netif_dbg(efx, drv, efx->net_dev, "shutting down I/O\n");
1349
1350	if (efx->membase) {
1351		iounmap(efx->membase);
1352		efx->membase = NULL;
1353	}
1354
1355	if (efx->membase_phys) {
1356		bar = efx->type->mem_bar(efx);
1357		pci_release_region(efx->pci_dev, bar);
1358		efx->membase_phys = 0;
1359	}
1360
1361	/* Don't disable bus-mastering if VFs are assigned */
1362	if (!pci_vfs_assigned(efx->pci_dev))
1363		pci_disable_device(efx->pci_dev);
1364}
1365
1366void efx_set_default_rx_indir_table(struct efx_nic *efx,
1367				    struct efx_rss_context *ctx)
1368{
1369	size_t i;
1370
1371	for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++)
1372		ctx->rx_indir_table[i] =
1373			ethtool_rxfh_indir_default(i, efx->rss_spread);
1374}
1375
1376static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
1377{
1378	cpumask_var_t thread_mask;
1379	unsigned int count;
1380	int cpu;
1381
1382	if (rss_cpus) {
1383		count = rss_cpus;
1384	} else {
1385		if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) {
1386			netif_warn(efx, probe, efx->net_dev,
1387				   "RSS disabled due to allocation failure\n");
1388			return 1;
1389		}
1390
1391		count = 0;
1392		for_each_online_cpu(cpu) {
1393			if (!cpumask_test_cpu(cpu, thread_mask)) {
1394				++count;
1395				cpumask_or(thread_mask, thread_mask,
1396					   topology_sibling_cpumask(cpu));
1397			}
1398		}
1399
1400		free_cpumask_var(thread_mask);
1401	}
1402
1403	if (count > EFX_MAX_RX_QUEUES) {
1404		netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn,
1405			       "Reducing number of rx queues from %u to %u.\n",
1406			       count, EFX_MAX_RX_QUEUES);
1407		count = EFX_MAX_RX_QUEUES;
1408	}
1409
1410	/* If RSS is requested for the PF *and* VFs then we can't write RSS
1411	 * table entries that are inaccessible to VFs
1412	 */
1413#ifdef CONFIG_SFC_SRIOV
1414	if (efx->type->sriov_wanted) {
1415		if (efx->type->sriov_wanted(efx) && efx_vf_size(efx) > 1 &&
1416		    count > efx_vf_size(efx)) {
1417			netif_warn(efx, probe, efx->net_dev,
1418				   "Reducing number of RSS channels from %u to %u for "
1419				   "VF support. Increase vf-msix-limit to use more "
1420				   "channels on the PF.\n",
1421				   count, efx_vf_size(efx));
1422			count = efx_vf_size(efx);
1423		}
1424	}
1425#endif
1426
1427	return count;
1428}
1429
1430/* Probe the number and type of interrupts we are able to obtain, and
1431 * the resulting numbers of channels and RX queues.
1432 */
1433static int efx_probe_interrupts(struct efx_nic *efx)
1434{
1435	unsigned int extra_channels = 0;
1436	unsigned int i, j;
1437	int rc;
1438
1439	for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++)
1440		if (efx->extra_channel_type[i])
1441			++extra_channels;
1442
1443	if (efx->interrupt_mode == EFX_INT_MODE_MSIX) {
1444		struct msix_entry xentries[EFX_MAX_CHANNELS];
1445		unsigned int n_channels;
1446
1447		n_channels = efx_wanted_parallelism(efx);
1448		if (efx_separate_tx_channels)
1449			n_channels *= 2;
1450		n_channels += extra_channels;
1451		n_channels = min(n_channels, efx->max_channels);
1452
1453		for (i = 0; i < n_channels; i++)
1454			xentries[i].entry = i;
1455		rc = pci_enable_msix_range(efx->pci_dev,
1456					   xentries, 1, n_channels);
1457		if (rc < 0) {
1458			/* Fall back to single channel MSI */
1459			netif_err(efx, drv, efx->net_dev,
1460				  "could not enable MSI-X\n");
1461			if (efx->type->min_interrupt_mode >= EFX_INT_MODE_MSI)
1462				efx->interrupt_mode = EFX_INT_MODE_MSI;
1463			else
1464				return rc;
1465		} else if (rc < n_channels) {
1466			netif_err(efx, drv, efx->net_dev,
1467				  "WARNING: Insufficient MSI-X vectors"
1468				  " available (%d < %u).\n", rc, n_channels);
1469			netif_err(efx, drv, efx->net_dev,
1470				  "WARNING: Performance may be reduced.\n");
1471			n_channels = rc;
1472		}
1473
1474		if (rc > 0) {
1475			efx->n_channels = n_channels;
1476			if (n_channels > extra_channels)
1477				n_channels -= extra_channels;
1478			if (efx_separate_tx_channels) {
1479				efx->n_tx_channels = min(max(n_channels / 2,
1480							     1U),
1481							 efx->max_tx_channels);
1482				efx->n_rx_channels = max(n_channels -
1483							 efx->n_tx_channels,
1484							 1U);
1485			} else {
1486				efx->n_tx_channels = min(n_channels,
1487							 efx->max_tx_channels);
1488				efx->n_rx_channels = n_channels;
1489			}
1490			for (i = 0; i < efx->n_channels; i++)
1491				efx_get_channel(efx, i)->irq =
1492					xentries[i].vector;
1493		}
1494	}
1495
1496	/* Try single interrupt MSI */
1497	if (efx->interrupt_mode == EFX_INT_MODE_MSI) {
1498		efx->n_channels = 1;
1499		efx->n_rx_channels = 1;
1500		efx->n_tx_channels = 1;
1501		rc = pci_enable_msi(efx->pci_dev);
1502		if (rc == 0) {
1503			efx_get_channel(efx, 0)->irq = efx->pci_dev->irq;
1504		} else {
1505			netif_err(efx, drv, efx->net_dev,
1506				  "could not enable MSI\n");
1507			if (efx->type->min_interrupt_mode >= EFX_INT_MODE_LEGACY)
1508				efx->interrupt_mode = EFX_INT_MODE_LEGACY;
1509			else
1510				return rc;
1511		}
1512	}
1513
1514	/* Assume legacy interrupts */
1515	if (efx->interrupt_mode == EFX_INT_MODE_LEGACY) {
1516		efx->n_channels = 1 + (efx_separate_tx_channels ? 1 : 0);
1517		efx->n_rx_channels = 1;
1518		efx->n_tx_channels = 1;
1519		efx->legacy_irq = efx->pci_dev->irq;
1520	}
1521
1522	/* Assign extra channels if possible */
1523	efx->n_extra_tx_channels = 0;
1524	j = efx->n_channels;
1525	for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) {
1526		if (!efx->extra_channel_type[i])
1527			continue;
1528		if (efx->interrupt_mode != EFX_INT_MODE_MSIX ||
1529		    efx->n_channels <= extra_channels) {
1530			efx->extra_channel_type[i]->handle_no_channel(efx);
1531		} else {
1532			--j;
1533			efx_get_channel(efx, j)->type =
1534				efx->extra_channel_type[i];
1535			if (efx_channel_has_tx_queues(efx_get_channel(efx, j)))
1536				efx->n_extra_tx_channels++;
1537		}
1538	}
1539
1540	/* RSS might be usable on VFs even if it is disabled on the PF */
1541#ifdef CONFIG_SFC_SRIOV
1542	if (efx->type->sriov_wanted) {
1543		efx->rss_spread = ((efx->n_rx_channels > 1 ||
1544				    !efx->type->sriov_wanted(efx)) ?
1545				   efx->n_rx_channels : efx_vf_size(efx));
1546		return 0;
1547	}
1548#endif
1549	efx->rss_spread = efx->n_rx_channels;
1550
1551	return 0;
1552}
1553
1554static int efx_soft_enable_interrupts(struct efx_nic *efx)
1555{
1556	struct efx_channel *channel, *end_channel;
1557	int rc;
1558
1559	BUG_ON(efx->state == STATE_DISABLED);
1560
1561	efx->irq_soft_enabled = true;
1562	smp_wmb();
1563
1564	efx_for_each_channel(channel, efx) {
1565		if (!channel->type->keep_eventq) {
1566			rc = efx_init_eventq(channel);
1567			if (rc)
1568				goto fail;
1569		}
1570		efx_start_eventq(channel);
1571	}
1572
1573	efx_mcdi_mode_event(efx);
1574
1575	return 0;
1576fail:
1577	end_channel = channel;
1578	efx_for_each_channel(channel, efx) {
1579		if (channel == end_channel)
1580			break;
1581		efx_stop_eventq(channel);
1582		if (!channel->type->keep_eventq)
1583			efx_fini_eventq(channel);
1584	}
1585
1586	return rc;
1587}
1588
1589static void efx_soft_disable_interrupts(struct efx_nic *efx)
1590{
1591	struct efx_channel *channel;
1592
1593	if (efx->state == STATE_DISABLED)
1594		return;
1595
1596	efx_mcdi_mode_poll(efx);
1597
1598	efx->irq_soft_enabled = false;
1599	smp_wmb();
1600
1601	if (efx->legacy_irq)
1602		synchronize_irq(efx->legacy_irq);
1603
1604	efx_for_each_channel(channel, efx) {
1605		if (channel->irq)
1606			synchronize_irq(channel->irq);
1607
1608		efx_stop_eventq(channel);
1609		if (!channel->type->keep_eventq)
1610			efx_fini_eventq(channel);
1611	}
1612
1613	/* Flush the asynchronous MCDI request queue */
1614	efx_mcdi_flush_async(efx);
1615}
1616
1617static int efx_enable_interrupts(struct efx_nic *efx)
1618{
1619	struct efx_channel *channel, *end_channel;
1620	int rc;
1621
1622	BUG_ON(efx->state == STATE_DISABLED);
1623
1624	if (efx->eeh_disabled_legacy_irq) {
1625		enable_irq(efx->legacy_irq);
1626		efx->eeh_disabled_legacy_irq = false;
1627	}
1628
1629	efx->type->irq_enable_master(efx);
1630
1631	efx_for_each_channel(channel, efx) {
1632		if (channel->type->keep_eventq) {
1633			rc = efx_init_eventq(channel);
1634			if (rc)
1635				goto fail;
1636		}
1637	}
1638
1639	rc = efx_soft_enable_interrupts(efx);
1640	if (rc)
1641		goto fail;
1642
1643	return 0;
1644
1645fail:
1646	end_channel = channel;
1647	efx_for_each_channel(channel, efx) {
1648		if (channel == end_channel)
1649			break;
1650		if (channel->type->keep_eventq)
1651			efx_fini_eventq(channel);
1652	}
1653
1654	efx->type->irq_disable_non_ev(efx);
1655
1656	return rc;
1657}
1658
1659static void efx_disable_interrupts(struct efx_nic *efx)
1660{
1661	struct efx_channel *channel;
1662
1663	efx_soft_disable_interrupts(efx);
1664
1665	efx_for_each_channel(channel, efx) {
1666		if (channel->type->keep_eventq)
1667			efx_fini_eventq(channel);
1668	}
1669
1670	efx->type->irq_disable_non_ev(efx);
1671}
1672
1673static void efx_remove_interrupts(struct efx_nic *efx)
1674{
1675	struct efx_channel *channel;
1676
1677	/* Remove MSI/MSI-X interrupts */
1678	efx_for_each_channel(channel, efx)
1679		channel->irq = 0;
1680	pci_disable_msi(efx->pci_dev);
1681	pci_disable_msix(efx->pci_dev);
1682
1683	/* Remove legacy interrupt */
1684	efx->legacy_irq = 0;
1685}
1686
1687static void efx_set_channels(struct efx_nic *efx)
1688{
1689	struct efx_channel *channel;
1690	struct efx_tx_queue *tx_queue;
1691
1692	efx->tx_channel_offset =
1693		efx_separate_tx_channels ?
1694		efx->n_channels - efx->n_tx_channels : 0;
1695
1696	/* We need to mark which channels really have RX and TX
1697	 * queues, and adjust the TX queue numbers if we have separate
1698	 * RX-only and TX-only channels.
1699	 */
1700	efx_for_each_channel(channel, efx) {
1701		if (channel->channel < efx->n_rx_channels)
1702			channel->rx_queue.core_index = channel->channel;
1703		else
1704			channel->rx_queue.core_index = -1;
1705
1706		efx_for_each_channel_tx_queue(tx_queue, channel)
1707			tx_queue->queue -= (efx->tx_channel_offset *
1708					    EFX_TXQ_TYPES);
1709	}
1710}
1711
1712static int efx_probe_nic(struct efx_nic *efx)
1713{
1714	int rc;
1715
1716	netif_dbg(efx, probe, efx->net_dev, "creating NIC\n");
1717
1718	/* Carry out hardware-type specific initialisation */
1719	rc = efx->type->probe(efx);
1720	if (rc)
1721		return rc;
1722
1723	do {
1724		if (!efx->max_channels || !efx->max_tx_channels) {
1725			netif_err(efx, drv, efx->net_dev,
1726				  "Insufficient resources to allocate"
1727				  " any channels\n");
1728			rc = -ENOSPC;
1729			goto fail1;
1730		}
1731
1732		/* Determine the number of channels and queues by trying
1733		 * to hook in MSI-X interrupts.
1734		 */
1735		rc = efx_probe_interrupts(efx);
1736		if (rc)
1737			goto fail1;
1738
1739		efx_set_channels(efx);
 
 
1740
1741		/* dimension_resources can fail with EAGAIN */
1742		rc = efx->type->dimension_resources(efx);
1743		if (rc != 0 && rc != -EAGAIN)
1744			goto fail2;
1745
1746		if (rc == -EAGAIN)
1747			/* try again with new max_channels */
1748			efx_remove_interrupts(efx);
1749
1750	} while (rc == -EAGAIN);
1751
1752	if (efx->n_channels > 1)
1753		netdev_rss_key_fill(efx->rss_context.rx_hash_key,
1754				    sizeof(efx->rss_context.rx_hash_key));
1755	efx_set_default_rx_indir_table(efx, &efx->rss_context);
1756
1757	netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels);
1758	netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels);
1759
1760	/* Initialise the interrupt moderation settings */
1761	efx->irq_mod_step_us = DIV_ROUND_UP(efx->timer_quantum_ns, 1000);
1762	efx_init_irq_moderation(efx, tx_irq_mod_usec, rx_irq_mod_usec, true,
1763				true);
1764
1765	return 0;
1766
1767fail2:
1768	efx_remove_interrupts(efx);
1769fail1:
1770	efx->type->remove(efx);
1771	return rc;
1772}
1773
1774static void efx_remove_nic(struct efx_nic *efx)
1775{
1776	netif_dbg(efx, drv, efx->net_dev, "destroying NIC\n");
1777
1778	efx_remove_interrupts(efx);
1779	efx->type->remove(efx);
1780}
1781
1782static int efx_probe_filters(struct efx_nic *efx)
1783{
1784	int rc;
1785
1786	init_rwsem(&efx->filter_sem);
1787	mutex_lock(&efx->mac_lock);
1788	down_write(&efx->filter_sem);
1789	rc = efx->type->filter_table_probe(efx);
1790	if (rc)
1791		goto out_unlock;
1792
1793#ifdef CONFIG_RFS_ACCEL
1794	if (efx->type->offload_features & NETIF_F_NTUPLE) {
1795		struct efx_channel *channel;
1796		int i, success = 1;
1797
1798		efx_for_each_channel(channel, efx) {
1799			channel->rps_flow_id =
1800				kcalloc(efx->type->max_rx_ip_filters,
1801					sizeof(*channel->rps_flow_id),
1802					GFP_KERNEL);
1803			if (!channel->rps_flow_id)
1804				success = 0;
1805			else
1806				for (i = 0;
1807				     i < efx->type->max_rx_ip_filters;
1808				     ++i)
1809					channel->rps_flow_id[i] =
1810						RPS_FLOW_ID_INVALID;
1811		}
1812
1813		if (!success) {
1814			efx_for_each_channel(channel, efx)
1815				kfree(channel->rps_flow_id);
1816			efx->type->filter_table_remove(efx);
1817			rc = -ENOMEM;
1818			goto out_unlock;
1819		}
1820
1821		efx->rps_expire_index = efx->rps_expire_channel = 0;
1822	}
1823#endif
1824out_unlock:
1825	up_write(&efx->filter_sem);
1826	mutex_unlock(&efx->mac_lock);
1827	return rc;
1828}
1829
1830static void efx_remove_filters(struct efx_nic *efx)
1831{
1832#ifdef CONFIG_RFS_ACCEL
1833	struct efx_channel *channel;
1834
1835	efx_for_each_channel(channel, efx)
1836		kfree(channel->rps_flow_id);
1837#endif
1838	down_write(&efx->filter_sem);
1839	efx->type->filter_table_remove(efx);
1840	up_write(&efx->filter_sem);
1841}
1842
1843static void efx_restore_filters(struct efx_nic *efx)
1844{
1845	down_read(&efx->filter_sem);
1846	efx->type->filter_table_restore(efx);
1847	up_read(&efx->filter_sem);
1848}
1849
1850/**************************************************************************
1851 *
1852 * NIC startup/shutdown
1853 *
1854 *************************************************************************/
1855
1856static int efx_probe_all(struct efx_nic *efx)
1857{
1858	int rc;
1859
1860	rc = efx_probe_nic(efx);
1861	if (rc) {
1862		netif_err(efx, probe, efx->net_dev, "failed to create NIC\n");
1863		goto fail1;
1864	}
1865
1866	rc = efx_probe_port(efx);
1867	if (rc) {
1868		netif_err(efx, probe, efx->net_dev, "failed to create port\n");
1869		goto fail2;
1870	}
1871
1872	BUILD_BUG_ON(EFX_DEFAULT_DMAQ_SIZE < EFX_RXQ_MIN_ENT);
1873	if (WARN_ON(EFX_DEFAULT_DMAQ_SIZE < EFX_TXQ_MIN_ENT(efx))) {
1874		rc = -EINVAL;
1875		goto fail3;
1876	}
1877	efx->rxq_entries = efx->txq_entries = EFX_DEFAULT_DMAQ_SIZE;
1878
1879#ifdef CONFIG_SFC_SRIOV
1880	rc = efx->type->vswitching_probe(efx);
1881	if (rc) /* not fatal; the PF will still work fine */
1882		netif_warn(efx, probe, efx->net_dev,
1883			   "failed to setup vswitching rc=%d;"
1884			   " VFs may not function\n", rc);
1885#endif
1886
1887	rc = efx_probe_filters(efx);
1888	if (rc) {
1889		netif_err(efx, probe, efx->net_dev,
1890			  "failed to create filter tables\n");
1891		goto fail4;
1892	}
1893
1894	rc = efx_probe_channels(efx);
1895	if (rc)
1896		goto fail5;
1897
 
 
1898	return 0;
1899
1900 fail5:
1901	efx_remove_filters(efx);
1902 fail4:
1903#ifdef CONFIG_SFC_SRIOV
1904	efx->type->vswitching_remove(efx);
1905#endif
1906 fail3:
1907	efx_remove_port(efx);
1908 fail2:
1909	efx_remove_nic(efx);
1910 fail1:
1911	return rc;
1912}
1913
1914/* If the interface is supposed to be running but is not, start
1915 * the hardware and software data path, regular activity for the port
1916 * (MAC statistics, link polling, etc.) and schedule the port to be
1917 * reconfigured.  Interrupts must already be enabled.  This function
1918 * is safe to call multiple times, so long as the NIC is not disabled.
1919 * Requires the RTNL lock.
1920 */
1921static void efx_start_all(struct efx_nic *efx)
1922{
1923	EFX_ASSERT_RESET_SERIALISED(efx);
1924	BUG_ON(efx->state == STATE_DISABLED);
1925
1926	/* Check that it is appropriate to restart the interface. All
1927	 * of these flags are safe to read under just the rtnl lock */
1928	if (efx->port_enabled || !netif_running(efx->net_dev) ||
1929	    efx->reset_pending)
1930		return;
1931
1932	efx_start_port(efx);
1933	efx_start_datapath(efx);
1934
1935	/* Start the hardware monitor if there is one */
1936	if (efx->type->monitor != NULL)
1937		queue_delayed_work(efx->workqueue, &efx->monitor_work,
1938				   efx_monitor_interval);
1939
1940	/* Link state detection is normally event-driven; we have
1941	 * to poll now because we could have missed a change
1942	 */
1943	mutex_lock(&efx->mac_lock);
1944	if (efx->phy_op->poll(efx))
1945		efx_link_status_changed(efx);
1946	mutex_unlock(&efx->mac_lock);
1947
1948	efx->type->start_stats(efx);
1949	efx->type->pull_stats(efx);
1950	spin_lock_bh(&efx->stats_lock);
1951	efx->type->update_stats(efx, NULL, NULL);
1952	spin_unlock_bh(&efx->stats_lock);
1953}
1954
1955/* Quiesce the hardware and software data path, and regular activity
1956 * for the port without bringing the link down.  Safe to call multiple
1957 * times with the NIC in almost any state, but interrupts should be
1958 * enabled.  Requires the RTNL lock.
1959 */
1960static void efx_stop_all(struct efx_nic *efx)
1961{
1962	EFX_ASSERT_RESET_SERIALISED(efx);
1963
1964	/* port_enabled can be read safely under the rtnl lock */
1965	if (!efx->port_enabled)
1966		return;
1967
1968	/* update stats before we go down so we can accurately count
1969	 * rx_nodesc_drops
1970	 */
1971	efx->type->pull_stats(efx);
1972	spin_lock_bh(&efx->stats_lock);
1973	efx->type->update_stats(efx, NULL, NULL);
1974	spin_unlock_bh(&efx->stats_lock);
1975	efx->type->stop_stats(efx);
1976	efx_stop_port(efx);
1977
1978	/* Stop the kernel transmit interface.  This is only valid if
1979	 * the device is stopped or detached; otherwise the watchdog
1980	 * may fire immediately.
1981	 */
1982	WARN_ON(netif_running(efx->net_dev) &&
1983		netif_device_present(efx->net_dev));
1984	netif_tx_disable(efx->net_dev);
1985
1986	efx_stop_datapath(efx);
1987}
1988
1989static void efx_remove_all(struct efx_nic *efx)
1990{
1991	efx_remove_channels(efx);
1992	efx_remove_filters(efx);
1993#ifdef CONFIG_SFC_SRIOV
1994	efx->type->vswitching_remove(efx);
1995#endif
1996	efx_remove_port(efx);
1997	efx_remove_nic(efx);
1998}
1999
2000/**************************************************************************
2001 *
2002 * Interrupt moderation
2003 *
2004 **************************************************************************/
2005unsigned int efx_usecs_to_ticks(struct efx_nic *efx, unsigned int usecs)
2006{
2007	if (usecs == 0)
2008		return 0;
2009	if (usecs * 1000 < efx->timer_quantum_ns)
2010		return 1; /* never round down to 0 */
2011	return usecs * 1000 / efx->timer_quantum_ns;
2012}
2013
2014unsigned int efx_ticks_to_usecs(struct efx_nic *efx, unsigned int ticks)
2015{
2016	/* We must round up when converting ticks to microseconds
2017	 * because we round down when converting the other way.
2018	 */
2019	return DIV_ROUND_UP(ticks * efx->timer_quantum_ns, 1000);
2020}
2021
2022/* Set interrupt moderation parameters */
2023int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs,
2024			    unsigned int rx_usecs, bool rx_adaptive,
2025			    bool rx_may_override_tx)
2026{
2027	struct efx_channel *channel;
2028	unsigned int timer_max_us;
2029
2030	EFX_ASSERT_RESET_SERIALISED(efx);
2031
2032	timer_max_us = efx->timer_max_ns / 1000;
2033
2034	if (tx_usecs > timer_max_us || rx_usecs > timer_max_us)
2035		return -EINVAL;
2036
2037	if (tx_usecs != rx_usecs && efx->tx_channel_offset == 0 &&
2038	    !rx_may_override_tx) {
2039		netif_err(efx, drv, efx->net_dev, "Channels are shared. "
2040			  "RX and TX IRQ moderation must be equal\n");
2041		return -EINVAL;
2042	}
2043
2044	efx->irq_rx_adaptive = rx_adaptive;
2045	efx->irq_rx_moderation_us = rx_usecs;
2046	efx_for_each_channel(channel, efx) {
2047		if (efx_channel_has_rx_queue(channel))
2048			channel->irq_moderation_us = rx_usecs;
2049		else if (efx_channel_has_tx_queues(channel))
2050			channel->irq_moderation_us = tx_usecs;
 
 
2051	}
2052
2053	return 0;
2054}
2055
2056void efx_get_irq_moderation(struct efx_nic *efx, unsigned int *tx_usecs,
2057			    unsigned int *rx_usecs, bool *rx_adaptive)
2058{
2059	*rx_adaptive = efx->irq_rx_adaptive;
2060	*rx_usecs = efx->irq_rx_moderation_us;
2061
2062	/* If channels are shared between RX and TX, so is IRQ
2063	 * moderation.  Otherwise, IRQ moderation is the same for all
2064	 * TX channels and is not adaptive.
2065	 */
2066	if (efx->tx_channel_offset == 0) {
2067		*tx_usecs = *rx_usecs;
2068	} else {
2069		struct efx_channel *tx_channel;
2070
2071		tx_channel = efx->channel[efx->tx_channel_offset];
2072		*tx_usecs = tx_channel->irq_moderation_us;
2073	}
2074}
2075
2076/**************************************************************************
2077 *
2078 * Hardware monitor
2079 *
2080 **************************************************************************/
2081
2082/* Run periodically off the general workqueue */
2083static void efx_monitor(struct work_struct *data)
2084{
2085	struct efx_nic *efx = container_of(data, struct efx_nic,
2086					   monitor_work.work);
2087
2088	netif_vdbg(efx, timer, efx->net_dev,
2089		   "hardware monitor executing on CPU %d\n",
2090		   raw_smp_processor_id());
2091	BUG_ON(efx->type->monitor == NULL);
2092
2093	/* If the mac_lock is already held then it is likely a port
2094	 * reconfiguration is already in place, which will likely do
2095	 * most of the work of monitor() anyway. */
2096	if (mutex_trylock(&efx->mac_lock)) {
2097		if (efx->port_enabled)
2098			efx->type->monitor(efx);
2099		mutex_unlock(&efx->mac_lock);
2100	}
2101
2102	queue_delayed_work(efx->workqueue, &efx->monitor_work,
2103			   efx_monitor_interval);
2104}
2105
2106/**************************************************************************
2107 *
2108 * ioctls
2109 *
2110 *************************************************************************/
2111
2112/* Net device ioctl
2113 * Context: process, rtnl_lock() held.
2114 */
2115static int efx_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd)
2116{
2117	struct efx_nic *efx = netdev_priv(net_dev);
2118	struct mii_ioctl_data *data = if_mii(ifr);
2119
2120	if (cmd == SIOCSHWTSTAMP)
2121		return efx_ptp_set_ts_config(efx, ifr);
2122	if (cmd == SIOCGHWTSTAMP)
2123		return efx_ptp_get_ts_config(efx, ifr);
2124
2125	/* Convert phy_id from older PRTAD/DEVAD format */
2126	if ((cmd == SIOCGMIIREG || cmd == SIOCSMIIREG) &&
2127	    (data->phy_id & 0xfc00) == 0x0400)
2128		data->phy_id ^= MDIO_PHY_ID_C45 | 0x0400;
2129
2130	return mdio_mii_ioctl(&efx->mdio, data, cmd);
2131}
2132
2133/**************************************************************************
2134 *
2135 * NAPI interface
2136 *
2137 **************************************************************************/
2138
2139static void efx_init_napi_channel(struct efx_channel *channel)
2140{
2141	struct efx_nic *efx = channel->efx;
2142
2143	channel->napi_dev = efx->net_dev;
2144	netif_napi_add(channel->napi_dev, &channel->napi_str,
2145		       efx_poll, napi_weight);
2146}
2147
2148static void efx_init_napi(struct efx_nic *efx)
2149{
2150	struct efx_channel *channel;
2151
2152	efx_for_each_channel(channel, efx)
2153		efx_init_napi_channel(channel);
2154}
2155
2156static void efx_fini_napi_channel(struct efx_channel *channel)
2157{
2158	if (channel->napi_dev)
2159		netif_napi_del(&channel->napi_str);
2160
2161	channel->napi_dev = NULL;
2162}
2163
2164static void efx_fini_napi(struct efx_nic *efx)
2165{
2166	struct efx_channel *channel;
2167
2168	efx_for_each_channel(channel, efx)
2169		efx_fini_napi_channel(channel);
2170}
2171
2172/**************************************************************************
2173 *
2174 * Kernel netpoll interface
2175 *
2176 *************************************************************************/
2177
2178#ifdef CONFIG_NET_POLL_CONTROLLER
2179
2180/* Although in the common case interrupts will be disabled, this is not
2181 * guaranteed. However, all our work happens inside the NAPI callback,
2182 * so no locking is required.
2183 */
2184static void efx_netpoll(struct net_device *net_dev)
2185{
2186	struct efx_nic *efx = netdev_priv(net_dev);
2187	struct efx_channel *channel;
2188
2189	efx_for_each_channel(channel, efx)
2190		efx_schedule_channel(channel);
2191}
2192
2193#endif
2194
2195/**************************************************************************
2196 *
2197 * Kernel net device interface
2198 *
2199 *************************************************************************/
2200
2201/* Context: process, rtnl_lock() held. */
2202int efx_net_open(struct net_device *net_dev)
2203{
2204	struct efx_nic *efx = netdev_priv(net_dev);
2205	int rc;
2206
2207	netif_dbg(efx, ifup, efx->net_dev, "opening device on CPU %d\n",
2208		  raw_smp_processor_id());
2209
2210	rc = efx_check_disabled(efx);
2211	if (rc)
2212		return rc;
2213	if (efx->phy_mode & PHY_MODE_SPECIAL)
2214		return -EBUSY;
2215	if (efx_mcdi_poll_reboot(efx) && efx_reset(efx, RESET_TYPE_ALL))
2216		return -EIO;
2217
2218	/* Notify the kernel of the link state polled during driver load,
2219	 * before the monitor starts running */
2220	efx_link_status_changed(efx);
2221
2222	efx_start_all(efx);
2223	if (efx->state == STATE_DISABLED || efx->reset_pending)
2224		netif_device_detach(efx->net_dev);
2225	efx_selftest_async_start(efx);
 
 
2226	return 0;
2227}
2228
2229/* Context: process, rtnl_lock() held.
2230 * Note that the kernel will ignore our return code; this method
2231 * should really be a void.
2232 */
2233int efx_net_stop(struct net_device *net_dev)
2234{
2235	struct efx_nic *efx = netdev_priv(net_dev);
2236
2237	netif_dbg(efx, ifdown, efx->net_dev, "closing on CPU %d\n",
2238		  raw_smp_processor_id());
2239
2240	/* Stop the device and flush all the channels */
2241	efx_stop_all(efx);
2242
2243	return 0;
2244}
2245
2246/* Context: process, dev_base_lock or RTNL held, non-blocking. */
2247static void efx_net_stats(struct net_device *net_dev,
2248			  struct rtnl_link_stats64 *stats)
2249{
2250	struct efx_nic *efx = netdev_priv(net_dev);
2251
2252	spin_lock_bh(&efx->stats_lock);
2253	efx->type->update_stats(efx, NULL, stats);
2254	spin_unlock_bh(&efx->stats_lock);
2255}
2256
2257/* Context: netif_tx_lock held, BHs disabled. */
2258static void efx_watchdog(struct net_device *net_dev)
2259{
2260	struct efx_nic *efx = netdev_priv(net_dev);
2261
2262	netif_err(efx, tx_err, efx->net_dev,
2263		  "TX stuck with port_enabled=%d: resetting channels\n",
2264		  efx->port_enabled);
2265
2266	efx_schedule_reset(efx, RESET_TYPE_TX_WATCHDOG);
2267}
2268
2269
2270/* Context: process, rtnl_lock() held. */
2271static int efx_change_mtu(struct net_device *net_dev, int new_mtu)
2272{
2273	struct efx_nic *efx = netdev_priv(net_dev);
2274	int rc;
2275
2276	rc = efx_check_disabled(efx);
2277	if (rc)
2278		return rc;
2279
2280	netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu);
2281
2282	efx_device_detach_sync(efx);
2283	efx_stop_all(efx);
2284
2285	mutex_lock(&efx->mac_lock);
2286	net_dev->mtu = new_mtu;
2287	efx_mac_reconfigure(efx);
2288	mutex_unlock(&efx->mac_lock);
2289
2290	efx_start_all(efx);
2291	efx_device_attach_if_not_resetting(efx);
2292	return 0;
2293}
2294
2295static int efx_set_mac_address(struct net_device *net_dev, void *data)
2296{
2297	struct efx_nic *efx = netdev_priv(net_dev);
2298	struct sockaddr *addr = data;
2299	u8 *new_addr = addr->sa_data;
2300	u8 old_addr[6];
2301	int rc;
2302
2303	if (!is_valid_ether_addr(new_addr)) {
2304		netif_err(efx, drv, efx->net_dev,
2305			  "invalid ethernet MAC address requested: %pM\n",
2306			  new_addr);
2307		return -EADDRNOTAVAIL;
2308	}
2309
2310	/* save old address */
2311	ether_addr_copy(old_addr, net_dev->dev_addr);
2312	ether_addr_copy(net_dev->dev_addr, new_addr);
2313	if (efx->type->set_mac_address) {
2314		rc = efx->type->set_mac_address(efx);
2315		if (rc) {
2316			ether_addr_copy(net_dev->dev_addr, old_addr);
2317			return rc;
2318		}
2319	}
2320
2321	/* Reconfigure the MAC */
2322	mutex_lock(&efx->mac_lock);
2323	efx_mac_reconfigure(efx);
2324	mutex_unlock(&efx->mac_lock);
2325
2326	return 0;
2327}
2328
2329/* Context: netif_addr_lock held, BHs disabled. */
2330static void efx_set_rx_mode(struct net_device *net_dev)
2331{
2332	struct efx_nic *efx = netdev_priv(net_dev);
2333
2334	if (efx->port_enabled)
2335		queue_work(efx->workqueue, &efx->mac_work);
2336	/* Otherwise efx_start_port() will do this */
2337}
2338
2339static int efx_set_features(struct net_device *net_dev, netdev_features_t data)
2340{
2341	struct efx_nic *efx = netdev_priv(net_dev);
2342	int rc;
2343
2344	/* If disabling RX n-tuple filtering, clear existing filters */
2345	if (net_dev->features & ~data & NETIF_F_NTUPLE) {
2346		rc = efx->type->filter_clear_rx(efx, EFX_FILTER_PRI_MANUAL);
2347		if (rc)
2348			return rc;
2349	}
2350
2351	/* If Rx VLAN filter is changed, update filters via mac_reconfigure.
2352	 * If rx-fcs is changed, mac_reconfigure updates that too.
2353	 */
2354	if ((net_dev->features ^ data) & (NETIF_F_HW_VLAN_CTAG_FILTER |
2355					  NETIF_F_RXFCS)) {
2356		/* efx_set_rx_mode() will schedule MAC work to update filters
2357		 * when a new features are finally set in net_dev.
2358		 */
2359		efx_set_rx_mode(net_dev);
2360	}
2361
2362	return 0;
2363}
2364
2365static int efx_get_phys_port_id(struct net_device *net_dev,
2366				struct netdev_phys_item_id *ppid)
2367{
2368	struct efx_nic *efx = netdev_priv(net_dev);
2369
2370	if (efx->type->get_phys_port_id)
2371		return efx->type->get_phys_port_id(efx, ppid);
2372	else
2373		return -EOPNOTSUPP;
2374}
2375
2376static int efx_get_phys_port_name(struct net_device *net_dev,
2377				  char *name, size_t len)
2378{
2379	struct efx_nic *efx = netdev_priv(net_dev);
2380
2381	if (snprintf(name, len, "p%u", efx->port_num) >= len)
2382		return -EINVAL;
2383	return 0;
2384}
2385
2386static int efx_vlan_rx_add_vid(struct net_device *net_dev, __be16 proto, u16 vid)
2387{
2388	struct efx_nic *efx = netdev_priv(net_dev);
2389
2390	if (efx->type->vlan_rx_add_vid)
2391		return efx->type->vlan_rx_add_vid(efx, proto, vid);
2392	else
2393		return -EOPNOTSUPP;
2394}
2395
2396static int efx_vlan_rx_kill_vid(struct net_device *net_dev, __be16 proto, u16 vid)
2397{
2398	struct efx_nic *efx = netdev_priv(net_dev);
2399
2400	if (efx->type->vlan_rx_kill_vid)
2401		return efx->type->vlan_rx_kill_vid(efx, proto, vid);
2402	else
2403		return -EOPNOTSUPP;
2404}
2405
2406static int efx_udp_tunnel_type_map(enum udp_parsable_tunnel_type in)
2407{
2408	switch (in) {
2409	case UDP_TUNNEL_TYPE_VXLAN:
2410		return TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN;
2411	case UDP_TUNNEL_TYPE_GENEVE:
2412		return TUNNEL_ENCAP_UDP_PORT_ENTRY_GENEVE;
2413	default:
2414		return -1;
2415	}
2416}
2417
2418static void efx_udp_tunnel_add(struct net_device *dev, struct udp_tunnel_info *ti)
2419{
2420	struct efx_nic *efx = netdev_priv(dev);
2421	struct efx_udp_tunnel tnl;
2422	int efx_tunnel_type;
2423
2424	efx_tunnel_type = efx_udp_tunnel_type_map(ti->type);
2425	if (efx_tunnel_type < 0)
2426		return;
2427
2428	tnl.type = (u16)efx_tunnel_type;
2429	tnl.port = ti->port;
2430
2431	if (efx->type->udp_tnl_add_port)
2432		(void)efx->type->udp_tnl_add_port(efx, tnl);
2433}
2434
2435static void efx_udp_tunnel_del(struct net_device *dev, struct udp_tunnel_info *ti)
 
2436{
2437	struct efx_nic *efx = netdev_priv(dev);
2438	struct efx_udp_tunnel tnl;
2439	int efx_tunnel_type;
2440
2441	efx_tunnel_type = efx_udp_tunnel_type_map(ti->type);
2442	if (efx_tunnel_type < 0)
2443		return;
2444
2445	tnl.type = (u16)efx_tunnel_type;
2446	tnl.port = ti->port;
2447
2448	if (efx->type->udp_tnl_del_port)
2449		(void)efx->type->udp_tnl_del_port(efx, tnl);
2450}
2451
2452static const struct net_device_ops efx_netdev_ops = {
2453	.ndo_open		= efx_net_open,
2454	.ndo_stop		= efx_net_stop,
2455	.ndo_get_stats64	= efx_net_stats,
2456	.ndo_tx_timeout		= efx_watchdog,
2457	.ndo_start_xmit		= efx_hard_start_xmit,
2458	.ndo_validate_addr	= eth_validate_addr,
2459	.ndo_do_ioctl		= efx_ioctl,
2460	.ndo_change_mtu		= efx_change_mtu,
2461	.ndo_set_mac_address	= efx_set_mac_address,
2462	.ndo_set_rx_mode	= efx_set_rx_mode,
2463	.ndo_set_features	= efx_set_features,
 
2464	.ndo_vlan_rx_add_vid	= efx_vlan_rx_add_vid,
2465	.ndo_vlan_rx_kill_vid	= efx_vlan_rx_kill_vid,
 
 
2466#ifdef CONFIG_SFC_SRIOV
2467	.ndo_set_vf_mac		= efx_sriov_set_vf_mac,
2468	.ndo_set_vf_vlan	= efx_sriov_set_vf_vlan,
2469	.ndo_set_vf_spoofchk	= efx_sriov_set_vf_spoofchk,
2470	.ndo_get_vf_config	= efx_sriov_get_vf_config,
2471	.ndo_set_vf_link_state  = efx_sriov_set_vf_link_state,
2472#endif
2473	.ndo_get_phys_port_id   = efx_get_phys_port_id,
2474	.ndo_get_phys_port_name	= efx_get_phys_port_name,
2475#ifdef CONFIG_NET_POLL_CONTROLLER
2476	.ndo_poll_controller = efx_netpoll,
2477#endif
2478	.ndo_setup_tc		= efx_setup_tc,
2479#ifdef CONFIG_RFS_ACCEL
2480	.ndo_rx_flow_steer	= efx_filter_rfs,
2481#endif
2482	.ndo_udp_tunnel_add	= efx_udp_tunnel_add,
2483	.ndo_udp_tunnel_del	= efx_udp_tunnel_del,
2484};
2485
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2486static void efx_update_name(struct efx_nic *efx)
2487{
2488	strcpy(efx->name, efx->net_dev->name);
2489	efx_mtd_rename(efx);
2490	efx_set_channel_names(efx);
2491}
2492
2493static int efx_netdev_event(struct notifier_block *this,
2494			    unsigned long event, void *ptr)
2495{
2496	struct net_device *net_dev = netdev_notifier_info_to_dev(ptr);
2497
2498	if ((net_dev->netdev_ops == &efx_netdev_ops) &&
2499	    event == NETDEV_CHANGENAME)
2500		efx_update_name(netdev_priv(net_dev));
2501
2502	return NOTIFY_DONE;
2503}
2504
2505static struct notifier_block efx_netdev_notifier = {
2506	.notifier_call = efx_netdev_event,
2507};
2508
2509static ssize_t
2510show_phy_type(struct device *dev, struct device_attribute *attr, char *buf)
2511{
2512	struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
2513	return sprintf(buf, "%d\n", efx->phy_type);
2514}
2515static DEVICE_ATTR(phy_type, 0444, show_phy_type, NULL);
2516
2517#ifdef CONFIG_SFC_MCDI_LOGGING
2518static ssize_t show_mcdi_log(struct device *dev, struct device_attribute *attr,
2519			     char *buf)
2520{
2521	struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
2522	struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
2523
2524	return scnprintf(buf, PAGE_SIZE, "%d\n", mcdi->logging_enabled);
2525}
2526static ssize_t set_mcdi_log(struct device *dev, struct device_attribute *attr,
2527			    const char *buf, size_t count)
2528{
2529	struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
2530	struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
2531	bool enable = count > 0 && *buf != '0';
2532
2533	mcdi->logging_enabled = enable;
2534	return count;
2535}
2536static DEVICE_ATTR(mcdi_logging, 0644, show_mcdi_log, set_mcdi_log);
2537#endif
2538
2539static int efx_register_netdev(struct efx_nic *efx)
2540{
2541	struct net_device *net_dev = efx->net_dev;
2542	struct efx_channel *channel;
2543	int rc;
2544
2545	net_dev->watchdog_timeo = 5 * HZ;
2546	net_dev->irq = efx->pci_dev->irq;
2547	net_dev->netdev_ops = &efx_netdev_ops;
2548	if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0)
2549		net_dev->priv_flags |= IFF_UNICAST_FLT;
2550	net_dev->ethtool_ops = &efx_ethtool_ops;
2551	net_dev->gso_max_segs = EFX_TSO_MAX_SEGS;
2552	net_dev->min_mtu = EFX_MIN_MTU;
2553	net_dev->max_mtu = EFX_MAX_MTU;
2554
2555	rtnl_lock();
2556
2557	/* Enable resets to be scheduled and check whether any were
2558	 * already requested.  If so, the NIC is probably hosed so we
2559	 * abort.
2560	 */
2561	efx->state = STATE_READY;
2562	smp_mb(); /* ensure we change state before checking reset_pending */
2563	if (efx->reset_pending) {
2564		netif_err(efx, probe, efx->net_dev,
2565			  "aborting probe due to scheduled reset\n");
2566		rc = -EIO;
2567		goto fail_locked;
2568	}
2569
2570	rc = dev_alloc_name(net_dev, net_dev->name);
2571	if (rc < 0)
2572		goto fail_locked;
2573	efx_update_name(efx);
2574
2575	/* Always start with carrier off; PHY events will detect the link */
2576	netif_carrier_off(net_dev);
2577
2578	rc = register_netdevice(net_dev);
2579	if (rc)
2580		goto fail_locked;
2581
2582	efx_for_each_channel(channel, efx) {
2583		struct efx_tx_queue *tx_queue;
2584		efx_for_each_channel_tx_queue(tx_queue, channel)
2585			efx_init_tx_queue_core_txq(tx_queue);
2586	}
2587
2588	efx_associate(efx);
2589
 
 
2590	rtnl_unlock();
2591
2592	rc = device_create_file(&efx->pci_dev->dev, &dev_attr_phy_type);
2593	if (rc) {
2594		netif_err(efx, drv, efx->net_dev,
2595			  "failed to init net dev attributes\n");
2596		goto fail_registered;
2597	}
2598#ifdef CONFIG_SFC_MCDI_LOGGING
2599	rc = device_create_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging);
2600	if (rc) {
2601		netif_err(efx, drv, efx->net_dev,
2602			  "failed to init net dev attributes\n");
2603		goto fail_attr_mcdi_logging;
2604	}
2605#endif
2606
2607	return 0;
2608
2609#ifdef CONFIG_SFC_MCDI_LOGGING
2610fail_attr_mcdi_logging:
2611	device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type);
2612#endif
2613fail_registered:
2614	rtnl_lock();
2615	efx_dissociate(efx);
2616	unregister_netdevice(net_dev);
2617fail_locked:
2618	efx->state = STATE_UNINIT;
2619	rtnl_unlock();
2620	netif_err(efx, drv, efx->net_dev, "could not register net dev\n");
2621	return rc;
2622}
2623
2624static void efx_unregister_netdev(struct efx_nic *efx)
2625{
2626	if (!efx->net_dev)
2627		return;
2628
2629	BUG_ON(netdev_priv(efx->net_dev) != efx);
 
2630
2631	if (efx_dev_registered(efx)) {
2632		strlcpy(efx->name, pci_name(efx->pci_dev), sizeof(efx->name));
2633#ifdef CONFIG_SFC_MCDI_LOGGING
2634		device_remove_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging);
2635#endif
2636		device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type);
2637		unregister_netdev(efx->net_dev);
2638	}
2639}
2640
2641/**************************************************************************
2642 *
2643 * Device reset and suspend
2644 *
2645 **************************************************************************/
2646
2647/* Tears down the entire software state and most of the hardware state
2648 * before reset.  */
2649void efx_reset_down(struct efx_nic *efx, enum reset_type method)
2650{
2651	EFX_ASSERT_RESET_SERIALISED(efx);
2652
2653	if (method == RESET_TYPE_MCDI_TIMEOUT)
2654		efx->type->prepare_flr(efx);
2655
2656	efx_stop_all(efx);
2657	efx_disable_interrupts(efx);
2658
2659	mutex_lock(&efx->mac_lock);
2660	mutex_lock(&efx->rss_lock);
2661	if (efx->port_initialized && method != RESET_TYPE_INVISIBLE &&
2662	    method != RESET_TYPE_DATAPATH)
2663		efx->phy_op->fini(efx);
2664	efx->type->fini(efx);
2665}
2666
2667/* This function will always ensure that the locks acquired in
2668 * efx_reset_down() are released. A failure return code indicates
2669 * that we were unable to reinitialise the hardware, and the
2670 * driver should be disabled. If ok is false, then the rx and tx
2671 * engines are not restarted, pending a RESET_DISABLE. */
2672int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)
2673{
2674	int rc;
2675
2676	EFX_ASSERT_RESET_SERIALISED(efx);
2677
2678	if (method == RESET_TYPE_MCDI_TIMEOUT)
2679		efx->type->finish_flr(efx);
2680
2681	/* Ensure that SRAM is initialised even if we're disabling the device */
2682	rc = efx->type->init(efx);
2683	if (rc) {
2684		netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n");
2685		goto fail;
2686	}
2687
2688	if (!ok)
2689		goto fail;
2690
2691	if (efx->port_initialized && method != RESET_TYPE_INVISIBLE &&
2692	    method != RESET_TYPE_DATAPATH) {
2693		rc = efx->phy_op->init(efx);
2694		if (rc)
2695			goto fail;
2696		rc = efx->phy_op->reconfigure(efx);
2697		if (rc && rc != -EPERM)
2698			netif_err(efx, drv, efx->net_dev,
2699				  "could not restore PHY settings\n");
2700	}
2701
2702	rc = efx_enable_interrupts(efx);
2703	if (rc)
2704		goto fail;
2705
2706#ifdef CONFIG_SFC_SRIOV
2707	rc = efx->type->vswitching_restore(efx);
2708	if (rc) /* not fatal; the PF will still work fine */
2709		netif_warn(efx, probe, efx->net_dev,
2710			   "failed to restore vswitching rc=%d;"
2711			   " VFs may not function\n", rc);
2712#endif
2713
2714	if (efx->type->rx_restore_rss_contexts)
2715		efx->type->rx_restore_rss_contexts(efx);
2716	mutex_unlock(&efx->rss_lock);
2717	down_read(&efx->filter_sem);
2718	efx_restore_filters(efx);
2719	up_read(&efx->filter_sem);
2720	if (efx->type->sriov_reset)
2721		efx->type->sriov_reset(efx);
2722
2723	mutex_unlock(&efx->mac_lock);
2724
2725	efx_start_all(efx);
2726
2727	if (efx->type->udp_tnl_push_ports)
2728		efx->type->udp_tnl_push_ports(efx);
2729
2730	return 0;
2731
2732fail:
2733	efx->port_initialized = false;
2734
2735	mutex_unlock(&efx->rss_lock);
2736	mutex_unlock(&efx->mac_lock);
2737
2738	return rc;
2739}
2740
2741/* Reset the NIC using the specified method.  Note that the reset may
2742 * fail, in which case the card will be left in an unusable state.
2743 *
2744 * Caller must hold the rtnl_lock.
2745 */
2746int efx_reset(struct efx_nic *efx, enum reset_type method)
2747{
2748	int rc, rc2;
2749	bool disabled;
2750
2751	netif_info(efx, drv, efx->net_dev, "resetting (%s)\n",
2752		   RESET_TYPE(method));
2753
2754	efx_device_detach_sync(efx);
2755	efx_reset_down(efx, method);
2756
2757	rc = efx->type->reset(efx, method);
2758	if (rc) {
2759		netif_err(efx, drv, efx->net_dev, "failed to reset hardware\n");
2760		goto out;
2761	}
2762
2763	/* Clear flags for the scopes we covered.  We assume the NIC and
2764	 * driver are now quiescent so that there is no race here.
2765	 */
2766	if (method < RESET_TYPE_MAX_METHOD)
2767		efx->reset_pending &= -(1 << (method + 1));
2768	else /* it doesn't fit into the well-ordered scope hierarchy */
2769		__clear_bit(method, &efx->reset_pending);
2770
2771	/* Reinitialise bus-mastering, which may have been turned off before
2772	 * the reset was scheduled. This is still appropriate, even in the
2773	 * RESET_TYPE_DISABLE since this driver generally assumes the hardware
2774	 * can respond to requests. */
2775	pci_set_master(efx->pci_dev);
2776
2777out:
2778	/* Leave device stopped if necessary */
2779	disabled = rc ||
2780		method == RESET_TYPE_DISABLE ||
2781		method == RESET_TYPE_RECOVER_OR_DISABLE;
2782	rc2 = efx_reset_up(efx, method, !disabled);
2783	if (rc2) {
2784		disabled = true;
2785		if (!rc)
2786			rc = rc2;
2787	}
2788
2789	if (disabled) {
2790		dev_close(efx->net_dev);
2791		netif_err(efx, drv, efx->net_dev, "has been disabled\n");
2792		efx->state = STATE_DISABLED;
2793	} else {
2794		netif_dbg(efx, drv, efx->net_dev, "reset complete\n");
2795		efx_device_attach_if_not_resetting(efx);
2796	}
2797	return rc;
2798}
2799
2800/* Try recovery mechanisms.
2801 * For now only EEH is supported.
2802 * Returns 0 if the recovery mechanisms are unsuccessful.
2803 * Returns a non-zero value otherwise.
2804 */
2805int efx_try_recovery(struct efx_nic *efx)
2806{
2807#ifdef CONFIG_EEH
2808	/* A PCI error can occur and not be seen by EEH because nothing
2809	 * happens on the PCI bus. In this case the driver may fail and
2810	 * schedule a 'recover or reset', leading to this recovery handler.
2811	 * Manually call the eeh failure check function.
2812	 */
2813	struct eeh_dev *eehdev = pci_dev_to_eeh_dev(efx->pci_dev);
2814	if (eeh_dev_check_failure(eehdev)) {
2815		/* The EEH mechanisms will handle the error and reset the
2816		 * device if necessary.
2817		 */
2818		return 1;
2819	}
2820#endif
2821	return 0;
2822}
2823
2824static void efx_wait_for_bist_end(struct efx_nic *efx)
2825{
2826	int i;
2827
2828	for (i = 0; i < BIST_WAIT_DELAY_COUNT; ++i) {
2829		if (efx_mcdi_poll_reboot(efx))
2830			goto out;
2831		msleep(BIST_WAIT_DELAY_MS);
2832	}
2833
2834	netif_err(efx, drv, efx->net_dev, "Warning: No MC reboot after BIST mode\n");
2835out:
2836	/* Either way unset the BIST flag. If we found no reboot we probably
2837	 * won't recover, but we should try.
2838	 */
2839	efx->mc_bist_for_other_fn = false;
2840}
2841
2842/* The worker thread exists so that code that cannot sleep can
2843 * schedule a reset for later.
2844 */
2845static void efx_reset_work(struct work_struct *data)
2846{
2847	struct efx_nic *efx = container_of(data, struct efx_nic, reset_work);
2848	unsigned long pending;
2849	enum reset_type method;
2850
2851	pending = READ_ONCE(efx->reset_pending);
2852	method = fls(pending) - 1;
2853
2854	if (method == RESET_TYPE_MC_BIST)
2855		efx_wait_for_bist_end(efx);
2856
2857	if ((method == RESET_TYPE_RECOVER_OR_DISABLE ||
2858	     method == RESET_TYPE_RECOVER_OR_ALL) &&
2859	    efx_try_recovery(efx))
2860		return;
2861
2862	if (!pending)
2863		return;
2864
2865	rtnl_lock();
2866
2867	/* We checked the state in efx_schedule_reset() but it may
2868	 * have changed by now.  Now that we have the RTNL lock,
2869	 * it cannot change again.
2870	 */
2871	if (efx->state == STATE_READY)
2872		(void)efx_reset(efx, method);
2873
2874	rtnl_unlock();
2875}
2876
2877void efx_schedule_reset(struct efx_nic *efx, enum reset_type type)
2878{
2879	enum reset_type method;
2880
2881	if (efx->state == STATE_RECOVERY) {
2882		netif_dbg(efx, drv, efx->net_dev,
2883			  "recovering: skip scheduling %s reset\n",
2884			  RESET_TYPE(type));
2885		return;
2886	}
2887
2888	switch (type) {
2889	case RESET_TYPE_INVISIBLE:
2890	case RESET_TYPE_ALL:
2891	case RESET_TYPE_RECOVER_OR_ALL:
2892	case RESET_TYPE_WORLD:
2893	case RESET_TYPE_DISABLE:
2894	case RESET_TYPE_RECOVER_OR_DISABLE:
2895	case RESET_TYPE_DATAPATH:
2896	case RESET_TYPE_MC_BIST:
2897	case RESET_TYPE_MCDI_TIMEOUT:
2898		method = type;
2899		netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n",
2900			  RESET_TYPE(method));
2901		break;
2902	default:
2903		method = efx->type->map_reset_reason(type);
2904		netif_dbg(efx, drv, efx->net_dev,
2905			  "scheduling %s reset for %s\n",
2906			  RESET_TYPE(method), RESET_TYPE(type));
2907		break;
2908	}
2909
2910	set_bit(method, &efx->reset_pending);
2911	smp_mb(); /* ensure we change reset_pending before checking state */
2912
2913	/* If we're not READY then just leave the flags set as the cue
2914	 * to abort probing or reschedule the reset later.
2915	 */
2916	if (READ_ONCE(efx->state) != STATE_READY)
2917		return;
2918
2919	/* efx_process_channel() will no longer read events once a
2920	 * reset is scheduled. So switch back to poll'd MCDI completions. */
2921	efx_mcdi_mode_poll(efx);
2922
2923	queue_work(reset_workqueue, &efx->reset_work);
2924}
2925
2926/**************************************************************************
2927 *
2928 * List of NICs we support
2929 *
2930 **************************************************************************/
2931
2932/* PCI device ID table */
2933static const struct pci_device_id efx_pci_table[] = {
2934	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0803),	/* SFC9020 */
2935	 .driver_data = (unsigned long) &siena_a0_nic_type},
2936	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0813),	/* SFL9021 */
2937	 .driver_data = (unsigned long) &siena_a0_nic_type},
2938	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0903),  /* SFC9120 PF */
2939	 .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
2940	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1903),  /* SFC9120 VF */
2941	 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
2942	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0923),  /* SFC9140 PF */
2943	 .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
2944	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1923),  /* SFC9140 VF */
2945	 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
2946	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0a03),  /* SFC9220 PF */
2947	 .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
2948	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1a03),  /* SFC9220 VF */
2949	 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
2950	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0b03),  /* SFC9250 PF */
2951	 .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
2952	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1b03),  /* SFC9250 VF */
2953	 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
2954	{0}			/* end of list */
2955};
2956
2957/**************************************************************************
2958 *
2959 * Dummy PHY/MAC operations
2960 *
2961 * Can be used for some unimplemented operations
2962 * Needed so all function pointers are valid and do not have to be tested
2963 * before use
2964 *
2965 **************************************************************************/
2966int efx_port_dummy_op_int(struct efx_nic *efx)
2967{
2968	return 0;
2969}
2970void efx_port_dummy_op_void(struct efx_nic *efx) {}
2971
2972static bool efx_port_dummy_op_poll(struct efx_nic *efx)
2973{
2974	return false;
2975}
2976
2977static const struct efx_phy_operations efx_dummy_phy_operations = {
2978	.init		 = efx_port_dummy_op_int,
2979	.reconfigure	 = efx_port_dummy_op_int,
2980	.poll		 = efx_port_dummy_op_poll,
2981	.fini		 = efx_port_dummy_op_void,
2982};
2983
2984/**************************************************************************
2985 *
2986 * Data housekeeping
2987 *
2988 **************************************************************************/
2989
2990/* This zeroes out and then fills in the invariants in a struct
2991 * efx_nic (including all sub-structures).
2992 */
2993static int efx_init_struct(struct efx_nic *efx,
2994			   struct pci_dev *pci_dev, struct net_device *net_dev)
2995{
2996	int rc = -ENOMEM, i;
2997
2998	/* Initialise common structures */
2999	INIT_LIST_HEAD(&efx->node);
3000	INIT_LIST_HEAD(&efx->secondary_list);
3001	spin_lock_init(&efx->biu_lock);
3002#ifdef CONFIG_SFC_MTD
3003	INIT_LIST_HEAD(&efx->mtd_list);
3004#endif
3005	INIT_WORK(&efx->reset_work, efx_reset_work);
3006	INIT_DELAYED_WORK(&efx->monitor_work, efx_monitor);
3007	INIT_DELAYED_WORK(&efx->selftest_work, efx_selftest_async_work);
3008	efx->pci_dev = pci_dev;
3009	efx->msg_enable = debug;
3010	efx->state = STATE_UNINIT;
3011	strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name));
3012
3013	efx->net_dev = net_dev;
3014	efx->rx_prefix_size = efx->type->rx_prefix_size;
3015	efx->rx_ip_align =
3016		NET_IP_ALIGN ? (efx->rx_prefix_size + NET_IP_ALIGN) % 4 : 0;
3017	efx->rx_packet_hash_offset =
3018		efx->type->rx_hash_offset - efx->type->rx_prefix_size;
3019	efx->rx_packet_ts_offset =
3020		efx->type->rx_ts_offset - efx->type->rx_prefix_size;
3021	INIT_LIST_HEAD(&efx->rss_context.list);
3022	mutex_init(&efx->rss_lock);
3023	spin_lock_init(&efx->stats_lock);
3024	efx->vi_stride = EFX_DEFAULT_VI_STRIDE;
3025	efx->num_mac_stats = MC_CMD_MAC_NSTATS;
3026	BUILD_BUG_ON(MC_CMD_MAC_NSTATS - 1 != MC_CMD_MAC_GENERATION_END);
3027	mutex_init(&efx->mac_lock);
3028#ifdef CONFIG_RFS_ACCEL
3029	mutex_init(&efx->rps_mutex);
3030	spin_lock_init(&efx->rps_hash_lock);
3031	/* Failure to allocate is not fatal, but may degrade ARFS performance */
3032	efx->rps_hash_table = kcalloc(EFX_ARFS_HASH_TABLE_SIZE,
3033				      sizeof(*efx->rps_hash_table), GFP_KERNEL);
3034#endif
3035	efx->phy_op = &efx_dummy_phy_operations;
3036	efx->mdio.dev = net_dev;
3037	INIT_WORK(&efx->mac_work, efx_mac_work);
3038	init_waitqueue_head(&efx->flush_wq);
3039
3040	for (i = 0; i < EFX_MAX_CHANNELS; i++) {
3041		efx->channel[i] = efx_alloc_channel(efx, i, NULL);
3042		if (!efx->channel[i])
3043			goto fail;
3044		efx->msi_context[i].efx = efx;
3045		efx->msi_context[i].index = i;
3046	}
3047
3048	/* Higher numbered interrupt modes are less capable! */
3049	if (WARN_ON_ONCE(efx->type->max_interrupt_mode >
3050			 efx->type->min_interrupt_mode)) {
3051		rc = -EIO;
3052		goto fail;
3053	}
3054	efx->interrupt_mode = max(efx->type->max_interrupt_mode,
3055				  interrupt_mode);
3056	efx->interrupt_mode = min(efx->type->min_interrupt_mode,
3057				  interrupt_mode);
3058
3059	/* Would be good to use the net_dev name, but we're too early */
3060	snprintf(efx->workqueue_name, sizeof(efx->workqueue_name), "sfc%s",
3061		 pci_name(pci_dev));
3062	efx->workqueue = create_singlethread_workqueue(efx->workqueue_name);
3063	if (!efx->workqueue)
3064		goto fail;
3065
3066	return 0;
3067
3068fail:
3069	efx_fini_struct(efx);
3070	return rc;
3071}
3072
3073static void efx_fini_struct(struct efx_nic *efx)
3074{
3075	int i;
3076
3077#ifdef CONFIG_RFS_ACCEL
3078	kfree(efx->rps_hash_table);
3079#endif
3080
3081	for (i = 0; i < EFX_MAX_CHANNELS; i++)
3082		kfree(efx->channel[i]);
3083
3084	kfree(efx->vpd_sn);
3085
3086	if (efx->workqueue) {
3087		destroy_workqueue(efx->workqueue);
3088		efx->workqueue = NULL;
3089	}
3090}
3091
3092void efx_update_sw_stats(struct efx_nic *efx, u64 *stats)
3093{
3094	u64 n_rx_nodesc_trunc = 0;
3095	struct efx_channel *channel;
3096
3097	efx_for_each_channel(channel, efx)
3098		n_rx_nodesc_trunc += channel->n_rx_nodesc_trunc;
3099	stats[GENERIC_STAT_rx_nodesc_trunc] = n_rx_nodesc_trunc;
3100	stats[GENERIC_STAT_rx_noskb_drops] = atomic_read(&efx->n_rx_noskb_drops);
3101}
3102
3103bool efx_filter_spec_equal(const struct efx_filter_spec *left,
3104			   const struct efx_filter_spec *right)
3105{
3106	if ((left->match_flags ^ right->match_flags) |
3107	    ((left->flags ^ right->flags) &
3108	     (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX)))
3109		return false;
3110
3111	return memcmp(&left->outer_vid, &right->outer_vid,
3112		      sizeof(struct efx_filter_spec) -
3113		      offsetof(struct efx_filter_spec, outer_vid)) == 0;
3114}
3115
3116u32 efx_filter_spec_hash(const struct efx_filter_spec *spec)
3117{
3118	BUILD_BUG_ON(offsetof(struct efx_filter_spec, outer_vid) & 3);
3119	return jhash2((const u32 *)&spec->outer_vid,
3120		      (sizeof(struct efx_filter_spec) -
3121		       offsetof(struct efx_filter_spec, outer_vid)) / 4,
3122		      0);
3123}
3124
3125#ifdef CONFIG_RFS_ACCEL
3126bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx,
3127			bool *force)
3128{
3129	if (rule->filter_id == EFX_ARFS_FILTER_ID_PENDING) {
3130		/* ARFS is currently updating this entry, leave it */
3131		return false;
3132	}
3133	if (rule->filter_id == EFX_ARFS_FILTER_ID_ERROR) {
3134		/* ARFS tried and failed to update this, so it's probably out
3135		 * of date.  Remove the filter and the ARFS rule entry.
3136		 */
3137		rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING;
3138		*force = true;
3139		return true;
3140	} else if (WARN_ON(rule->filter_id != filter_idx)) { /* can't happen */
3141		/* ARFS has moved on, so old filter is not needed.  Since we did
3142		 * not mark the rule with EFX_ARFS_FILTER_ID_REMOVING, it will
3143		 * not be removed by efx_rps_hash_del() subsequently.
3144		 */
3145		*force = true;
3146		return true;
3147	}
3148	/* Remove it iff ARFS wants to. */
3149	return true;
3150}
3151
3152struct hlist_head *efx_rps_hash_bucket(struct efx_nic *efx,
3153				       const struct efx_filter_spec *spec)
3154{
3155	u32 hash = efx_filter_spec_hash(spec);
3156
3157	WARN_ON(!spin_is_locked(&efx->rps_hash_lock));
3158	if (!efx->rps_hash_table)
3159		return NULL;
3160	return &efx->rps_hash_table[hash % EFX_ARFS_HASH_TABLE_SIZE];
3161}
3162
3163struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx,
3164					const struct efx_filter_spec *spec)
3165{
3166	struct efx_arfs_rule *rule;
3167	struct hlist_head *head;
3168	struct hlist_node *node;
3169
3170	head = efx_rps_hash_bucket(efx, spec);
3171	if (!head)
3172		return NULL;
3173	hlist_for_each(node, head) {
3174		rule = container_of(node, struct efx_arfs_rule, node);
3175		if (efx_filter_spec_equal(spec, &rule->spec))
3176			return rule;
3177	}
3178	return NULL;
3179}
3180
3181struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx,
3182				       const struct efx_filter_spec *spec,
3183				       bool *new)
3184{
3185	struct efx_arfs_rule *rule;
3186	struct hlist_head *head;
3187	struct hlist_node *node;
3188
3189	head = efx_rps_hash_bucket(efx, spec);
3190	if (!head)
3191		return NULL;
3192	hlist_for_each(node, head) {
3193		rule = container_of(node, struct efx_arfs_rule, node);
3194		if (efx_filter_spec_equal(spec, &rule->spec)) {
3195			*new = false;
3196			return rule;
3197		}
3198	}
3199	rule = kmalloc(sizeof(*rule), GFP_ATOMIC);
3200	*new = true;
3201	if (rule) {
3202		memcpy(&rule->spec, spec, sizeof(rule->spec));
3203		hlist_add_head(&rule->node, head);
3204	}
3205	return rule;
3206}
3207
3208void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec)
3209{
3210	struct efx_arfs_rule *rule;
3211	struct hlist_head *head;
3212	struct hlist_node *node;
3213
3214	head = efx_rps_hash_bucket(efx, spec);
3215	if (WARN_ON(!head))
3216		return;
3217	hlist_for_each(node, head) {
3218		rule = container_of(node, struct efx_arfs_rule, node);
3219		if (efx_filter_spec_equal(spec, &rule->spec)) {
3220			/* Someone already reused the entry.  We know that if
3221			 * this check doesn't fire (i.e. filter_id == REMOVING)
3222			 * then the REMOVING mark was put there by our caller,
3223			 * because caller is holding a lock on filter table and
3224			 * only holders of that lock set REMOVING.
3225			 */
3226			if (rule->filter_id != EFX_ARFS_FILTER_ID_REMOVING)
3227				return;
3228			hlist_del(node);
3229			kfree(rule);
3230			return;
3231		}
3232	}
3233	/* We didn't find it. */
3234	WARN_ON(1);
3235}
3236#endif
3237
3238/* RSS contexts.  We're using linked lists and crappy O(n) algorithms, because
3239 * (a) this is an infrequent control-plane operation and (b) n is small (max 64)
3240 */
3241struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx)
3242{
3243	struct list_head *head = &efx->rss_context.list;
3244	struct efx_rss_context *ctx, *new;
3245	u32 id = 1; /* Don't use zero, that refers to the master RSS context */
3246
3247	WARN_ON(!mutex_is_locked(&efx->rss_lock));
3248
3249	/* Search for first gap in the numbering */
3250	list_for_each_entry(ctx, head, list) {
3251		if (ctx->user_id != id)
3252			break;
3253		id++;
3254		/* Check for wrap.  If this happens, we have nearly 2^32
3255		 * allocated RSS contexts, which seems unlikely.
3256		 */
3257		if (WARN_ON_ONCE(!id))
3258			return NULL;
3259	}
3260
3261	/* Create the new entry */
3262	new = kmalloc(sizeof(struct efx_rss_context), GFP_KERNEL);
3263	if (!new)
3264		return NULL;
3265	new->context_id = EFX_EF10_RSS_CONTEXT_INVALID;
3266	new->rx_hash_udp_4tuple = false;
3267
3268	/* Insert the new entry into the gap */
3269	new->user_id = id;
3270	list_add_tail(&new->list, &ctx->list);
3271	return new;
3272}
3273
3274struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id)
3275{
3276	struct list_head *head = &efx->rss_context.list;
3277	struct efx_rss_context *ctx;
3278
3279	WARN_ON(!mutex_is_locked(&efx->rss_lock));
3280
3281	list_for_each_entry(ctx, head, list)
3282		if (ctx->user_id == id)
3283			return ctx;
3284	return NULL;
3285}
3286
3287void efx_free_rss_context_entry(struct efx_rss_context *ctx)
3288{
3289	list_del(&ctx->list);
3290	kfree(ctx);
3291}
3292
3293/**************************************************************************
3294 *
3295 * PCI interface
3296 *
3297 **************************************************************************/
3298
3299/* Main body of final NIC shutdown code
3300 * This is called only at module unload (or hotplug removal).
3301 */
3302static void efx_pci_remove_main(struct efx_nic *efx)
3303{
3304	/* Flush reset_work. It can no longer be scheduled since we
3305	 * are not READY.
3306	 */
3307	BUG_ON(efx->state == STATE_READY);
3308	cancel_work_sync(&efx->reset_work);
3309
3310	efx_disable_interrupts(efx);
 
3311	efx_nic_fini_interrupt(efx);
3312	efx_fini_port(efx);
3313	efx->type->fini(efx);
3314	efx_fini_napi(efx);
3315	efx_remove_all(efx);
3316}
3317
3318/* Final NIC shutdown
3319 * This is called only at module unload (or hotplug removal).  A PF can call
3320 * this on its VFs to ensure they are unbound first.
3321 */
3322static void efx_pci_remove(struct pci_dev *pci_dev)
3323{
 
3324	struct efx_nic *efx;
3325
3326	efx = pci_get_drvdata(pci_dev);
3327	if (!efx)
3328		return;
3329
3330	/* Mark the NIC as fini, then stop the interface */
3331	rtnl_lock();
3332	efx_dissociate(efx);
3333	dev_close(efx->net_dev);
3334	efx_disable_interrupts(efx);
3335	efx->state = STATE_UNINIT;
3336	rtnl_unlock();
3337
3338	if (efx->type->sriov_fini)
3339		efx->type->sriov_fini(efx);
3340
 
3341	efx_unregister_netdev(efx);
3342
3343	efx_mtd_remove(efx);
3344
3345	efx_pci_remove_main(efx);
3346
3347	efx_fini_io(efx);
3348	netif_dbg(efx, drv, efx->net_dev, "shutdown successful\n");
3349
 
3350	efx_fini_struct(efx);
3351	free_netdev(efx->net_dev);
3352
3353	pci_disable_pcie_error_reporting(pci_dev);
3354};
3355
3356/* NIC VPD information
3357 * Called during probe to display the part number of the
3358 * installed NIC.  VPD is potentially very large but this should
3359 * always appear within the first 512 bytes.
3360 */
3361#define SFC_VPD_LEN 512
3362static void efx_probe_vpd_strings(struct efx_nic *efx)
3363{
3364	struct pci_dev *dev = efx->pci_dev;
3365	char vpd_data[SFC_VPD_LEN];
3366	ssize_t vpd_size;
3367	int ro_start, ro_size, i, j;
3368
3369	/* Get the vpd data from the device */
3370	vpd_size = pci_read_vpd(dev, 0, sizeof(vpd_data), vpd_data);
3371	if (vpd_size <= 0) {
3372		netif_err(efx, drv, efx->net_dev, "Unable to read VPD\n");
3373		return;
3374	}
3375
3376	/* Get the Read only section */
3377	ro_start = pci_vpd_find_tag(vpd_data, 0, vpd_size, PCI_VPD_LRDT_RO_DATA);
3378	if (ro_start < 0) {
3379		netif_err(efx, drv, efx->net_dev, "VPD Read-only not found\n");
3380		return;
3381	}
3382
3383	ro_size = pci_vpd_lrdt_size(&vpd_data[ro_start]);
3384	j = ro_size;
3385	i = ro_start + PCI_VPD_LRDT_TAG_SIZE;
3386	if (i + j > vpd_size)
3387		j = vpd_size - i;
3388
3389	/* Get the Part number */
3390	i = pci_vpd_find_info_keyword(vpd_data, i, j, "PN");
3391	if (i < 0) {
3392		netif_err(efx, drv, efx->net_dev, "Part number not found\n");
3393		return;
3394	}
3395
3396	j = pci_vpd_info_field_size(&vpd_data[i]);
3397	i += PCI_VPD_INFO_FLD_HDR_SIZE;
3398	if (i + j > vpd_size) {
3399		netif_err(efx, drv, efx->net_dev, "Incomplete part number\n");
3400		return;
3401	}
3402
3403	netif_info(efx, drv, efx->net_dev,
3404		   "Part Number : %.*s\n", j, &vpd_data[i]);
3405
3406	i = ro_start + PCI_VPD_LRDT_TAG_SIZE;
3407	j = ro_size;
3408	i = pci_vpd_find_info_keyword(vpd_data, i, j, "SN");
3409	if (i < 0) {
3410		netif_err(efx, drv, efx->net_dev, "Serial number not found\n");
3411		return;
3412	}
3413
3414	j = pci_vpd_info_field_size(&vpd_data[i]);
3415	i += PCI_VPD_INFO_FLD_HDR_SIZE;
3416	if (i + j > vpd_size) {
3417		netif_err(efx, drv, efx->net_dev, "Incomplete serial number\n");
3418		return;
3419	}
3420
3421	efx->vpd_sn = kmalloc(j + 1, GFP_KERNEL);
3422	if (!efx->vpd_sn)
3423		return;
3424
3425	snprintf(efx->vpd_sn, j + 1, "%s", &vpd_data[i]);
3426}
3427
3428
3429/* Main body of NIC initialisation
3430 * This is called at module load (or hotplug insertion, theoretically).
3431 */
3432static int efx_pci_probe_main(struct efx_nic *efx)
3433{
3434	int rc;
3435
3436	/* Do start-of-day initialisation */
3437	rc = efx_probe_all(efx);
3438	if (rc)
3439		goto fail1;
3440
3441	efx_init_napi(efx);
3442
 
3443	rc = efx->type->init(efx);
 
3444	if (rc) {
3445		netif_err(efx, probe, efx->net_dev,
3446			  "failed to initialise NIC\n");
3447		goto fail3;
3448	}
3449
3450	rc = efx_init_port(efx);
3451	if (rc) {
3452		netif_err(efx, probe, efx->net_dev,
3453			  "failed to initialise port\n");
3454		goto fail4;
3455	}
3456
3457	rc = efx_nic_init_interrupt(efx);
3458	if (rc)
3459		goto fail5;
 
 
3460	rc = efx_enable_interrupts(efx);
3461	if (rc)
3462		goto fail6;
3463
3464	return 0;
3465
3466 fail6:
 
3467	efx_nic_fini_interrupt(efx);
3468 fail5:
3469	efx_fini_port(efx);
3470 fail4:
3471	efx->type->fini(efx);
3472 fail3:
3473	efx_fini_napi(efx);
3474	efx_remove_all(efx);
3475 fail1:
3476	return rc;
3477}
3478
3479static int efx_pci_probe_post_io(struct efx_nic *efx)
3480{
3481	struct net_device *net_dev = efx->net_dev;
3482	int rc = efx_pci_probe_main(efx);
3483
3484	if (rc)
3485		return rc;
3486
3487	if (efx->type->sriov_init) {
3488		rc = efx->type->sriov_init(efx);
3489		if (rc)
3490			netif_err(efx, probe, efx->net_dev,
3491				  "SR-IOV can't be enabled rc %d\n", rc);
3492	}
3493
3494	/* Determine netdevice features */
3495	net_dev->features |= (efx->type->offload_features | NETIF_F_SG |
3496			      NETIF_F_TSO | NETIF_F_RXCSUM | NETIF_F_RXALL);
3497	if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
3498		net_dev->features |= NETIF_F_TSO6;
3499	/* Check whether device supports TSO */
3500	if (!efx->type->tso_versions || !efx->type->tso_versions(efx))
3501		net_dev->features &= ~NETIF_F_ALL_TSO;
3502	/* Mask for features that also apply to VLAN devices */
3503	net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG |
3504				   NETIF_F_HIGHDMA | NETIF_F_ALL_TSO |
3505				   NETIF_F_RXCSUM);
3506
 
3507	net_dev->hw_features |= net_dev->features & ~efx->fixed_features;
3508
3509	/* Disable receiving frames with bad FCS, by default. */
3510	net_dev->features &= ~NETIF_F_RXALL;
3511
3512	/* Disable VLAN filtering by default.  It may be enforced if
3513	 * the feature is fixed (i.e. VLAN filters are required to
3514	 * receive VLAN tagged packets due to vPort restrictions).
3515	 */
3516	net_dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
3517	net_dev->features |= efx->fixed_features;
3518
 
 
 
 
 
 
 
 
 
3519	rc = efx_register_netdev(efx);
 
3520	if (!rc)
3521		return 0;
3522
3523	efx_pci_remove_main(efx);
3524	return rc;
3525}
3526
3527/* NIC initialisation
3528 *
3529 * This is called at module load (or hotplug insertion,
3530 * theoretically).  It sets up PCI mappings, resets the NIC,
3531 * sets up and registers the network devices with the kernel and hooks
3532 * the interrupt service routine.  It does not prepare the device for
3533 * transmission; this is left to the first time one of the network
3534 * interfaces is brought up (i.e. efx_net_open).
3535 */
3536static int efx_pci_probe(struct pci_dev *pci_dev,
3537			 const struct pci_device_id *entry)
3538{
 
3539	struct net_device *net_dev;
3540	struct efx_nic *efx;
3541	int rc;
3542
3543	/* Allocate and initialise a struct net_device and struct efx_nic */
3544	net_dev = alloc_etherdev_mqs(sizeof(*efx), EFX_MAX_CORE_TX_QUEUES,
3545				     EFX_MAX_RX_QUEUES);
3546	if (!net_dev)
3547		return -ENOMEM;
3548	efx = netdev_priv(net_dev);
 
 
 
 
 
 
 
 
 
 
 
3549	efx->type = (const struct efx_nic_type *) entry->driver_data;
3550	efx->fixed_features |= NETIF_F_HIGHDMA;
3551
3552	pci_set_drvdata(pci_dev, efx);
3553	SET_NETDEV_DEV(net_dev, &pci_dev->dev);
3554	rc = efx_init_struct(efx, pci_dev, net_dev);
3555	if (rc)
3556		goto fail1;
 
3557
3558	netif_info(efx, probe, efx->net_dev,
3559		   "Solarflare NIC detected\n");
3560
3561	if (!efx->type->is_vf)
3562		efx_probe_vpd_strings(efx);
3563
3564	/* Set up basic I/O (BAR mappings etc) */
3565	rc = efx_init_io(efx);
 
3566	if (rc)
3567		goto fail2;
3568
3569	rc = efx_pci_probe_post_io(efx);
3570	if (rc) {
3571		/* On failure, retry once immediately.
3572		 * If we aborted probe due to a scheduled reset, dismiss it.
3573		 */
3574		efx->reset_pending = 0;
3575		rc = efx_pci_probe_post_io(efx);
3576		if (rc) {
3577			/* On another failure, retry once more
3578			 * after a 50-305ms delay.
3579			 */
3580			unsigned char r;
3581
3582			get_random_bytes(&r, 1);
3583			msleep((unsigned int)r + 50);
3584			efx->reset_pending = 0;
3585			rc = efx_pci_probe_post_io(efx);
3586		}
3587	}
3588	if (rc)
3589		goto fail3;
3590
3591	netif_dbg(efx, probe, efx->net_dev, "initialisation successful\n");
3592
3593	/* Try to create MTDs, but allow this to fail */
3594	rtnl_lock();
3595	rc = efx_mtd_probe(efx);
3596	rtnl_unlock();
3597	if (rc && rc != -EPERM)
3598		netif_warn(efx, probe, efx->net_dev,
3599			   "failed to create MTDs (%d)\n", rc);
3600
3601	rc = pci_enable_pcie_error_reporting(pci_dev);
3602	if (rc && rc != -EINVAL)
3603		netif_notice(efx, probe, efx->net_dev,
3604			     "PCIE error reporting unavailable (%d).\n",
3605			     rc);
3606
3607	if (efx->type->udp_tnl_push_ports)
3608		efx->type->udp_tnl_push_ports(efx);
3609
3610	return 0;
3611
3612 fail3:
3613	efx_fini_io(efx);
3614 fail2:
3615	efx_fini_struct(efx);
3616 fail1:
3617	WARN_ON(rc > 0);
3618	netif_dbg(efx, drv, efx->net_dev, "initialisation failed. rc=%d\n", rc);
3619	free_netdev(net_dev);
 
 
3620	return rc;
3621}
3622
3623/* efx_pci_sriov_configure returns the actual number of Virtual Functions
3624 * enabled on success
3625 */
3626#ifdef CONFIG_SFC_SRIOV
3627static int efx_pci_sriov_configure(struct pci_dev *dev, int num_vfs)
3628{
3629	int rc;
3630	struct efx_nic *efx = pci_get_drvdata(dev);
3631
3632	if (efx->type->sriov_configure) {
3633		rc = efx->type->sriov_configure(efx, num_vfs);
3634		if (rc)
3635			return rc;
3636		else
3637			return num_vfs;
3638	} else
3639		return -EOPNOTSUPP;
3640}
3641#endif
3642
3643static int efx_pm_freeze(struct device *dev)
3644{
3645	struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
3646
3647	rtnl_lock();
3648
3649	if (efx->state != STATE_DISABLED) {
3650		efx->state = STATE_UNINIT;
3651
3652		efx_device_detach_sync(efx);
3653
3654		efx_stop_all(efx);
3655		efx_disable_interrupts(efx);
 
 
3656	}
3657
3658	rtnl_unlock();
3659
3660	return 0;
3661}
3662
 
 
 
 
 
 
 
 
 
 
 
3663static int efx_pm_thaw(struct device *dev)
3664{
3665	int rc;
3666	struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
3667
3668	rtnl_lock();
3669
3670	if (efx->state != STATE_DISABLED) {
3671		rc = efx_enable_interrupts(efx);
3672		if (rc)
3673			goto fail;
3674
3675		mutex_lock(&efx->mac_lock);
3676		efx->phy_op->reconfigure(efx);
3677		mutex_unlock(&efx->mac_lock);
3678
3679		efx_start_all(efx);
3680
3681		efx_device_attach_if_not_resetting(efx);
3682
3683		efx->state = STATE_READY;
3684
3685		efx->type->resume_wol(efx);
3686	}
3687
3688	rtnl_unlock();
3689
3690	/* Reschedule any quenched resets scheduled during efx_pm_freeze() */
3691	queue_work(reset_workqueue, &efx->reset_work);
3692
3693	return 0;
3694
3695fail:
3696	rtnl_unlock();
3697
3698	return rc;
3699}
3700
3701static int efx_pm_poweroff(struct device *dev)
3702{
3703	struct pci_dev *pci_dev = to_pci_dev(dev);
3704	struct efx_nic *efx = pci_get_drvdata(pci_dev);
3705
3706	efx->type->fini(efx);
3707
3708	efx->reset_pending = 0;
3709
3710	pci_save_state(pci_dev);
3711	return pci_set_power_state(pci_dev, PCI_D3hot);
3712}
3713
3714/* Used for both resume and restore */
3715static int efx_pm_resume(struct device *dev)
3716{
3717	struct pci_dev *pci_dev = to_pci_dev(dev);
3718	struct efx_nic *efx = pci_get_drvdata(pci_dev);
3719	int rc;
3720
3721	rc = pci_set_power_state(pci_dev, PCI_D0);
3722	if (rc)
3723		return rc;
3724	pci_restore_state(pci_dev);
3725	rc = pci_enable_device(pci_dev);
3726	if (rc)
3727		return rc;
3728	pci_set_master(efx->pci_dev);
3729	rc = efx->type->reset(efx, RESET_TYPE_ALL);
3730	if (rc)
3731		return rc;
 
3732	rc = efx->type->init(efx);
 
3733	if (rc)
3734		return rc;
3735	rc = efx_pm_thaw(dev);
3736	return rc;
3737}
3738
3739static int efx_pm_suspend(struct device *dev)
3740{
3741	int rc;
3742
3743	efx_pm_freeze(dev);
3744	rc = efx_pm_poweroff(dev);
3745	if (rc)
3746		efx_pm_resume(dev);
3747	return rc;
3748}
3749
3750static const struct dev_pm_ops efx_pm_ops = {
3751	.suspend	= efx_pm_suspend,
3752	.resume		= efx_pm_resume,
3753	.freeze		= efx_pm_freeze,
3754	.thaw		= efx_pm_thaw,
3755	.poweroff	= efx_pm_poweroff,
3756	.restore	= efx_pm_resume,
3757};
3758
3759/* A PCI error affecting this device was detected.
3760 * At this point MMIO and DMA may be disabled.
3761 * Stop the software path and request a slot reset.
3762 */
3763static pci_ers_result_t efx_io_error_detected(struct pci_dev *pdev,
3764					      enum pci_channel_state state)
3765{
3766	pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
3767	struct efx_nic *efx = pci_get_drvdata(pdev);
3768
3769	if (state == pci_channel_io_perm_failure)
3770		return PCI_ERS_RESULT_DISCONNECT;
3771
3772	rtnl_lock();
3773
3774	if (efx->state != STATE_DISABLED) {
3775		efx->state = STATE_RECOVERY;
3776		efx->reset_pending = 0;
3777
3778		efx_device_detach_sync(efx);
3779
3780		efx_stop_all(efx);
3781		efx_disable_interrupts(efx);
3782
3783		status = PCI_ERS_RESULT_NEED_RESET;
3784	} else {
3785		/* If the interface is disabled we don't want to do anything
3786		 * with it.
3787		 */
3788		status = PCI_ERS_RESULT_RECOVERED;
3789	}
3790
3791	rtnl_unlock();
3792
3793	pci_disable_device(pdev);
3794
3795	return status;
3796}
3797
3798/* Fake a successful reset, which will be performed later in efx_io_resume. */
3799static pci_ers_result_t efx_io_slot_reset(struct pci_dev *pdev)
3800{
3801	struct efx_nic *efx = pci_get_drvdata(pdev);
3802	pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
3803	int rc;
3804
3805	if (pci_enable_device(pdev)) {
3806		netif_err(efx, hw, efx->net_dev,
3807			  "Cannot re-enable PCI device after reset.\n");
3808		status =  PCI_ERS_RESULT_DISCONNECT;
3809	}
3810
3811	rc = pci_cleanup_aer_uncorrect_error_status(pdev);
3812	if (rc) {
3813		netif_err(efx, hw, efx->net_dev,
3814		"pci_cleanup_aer_uncorrect_error_status failed (%d)\n", rc);
3815		/* Non-fatal error. Continue. */
3816	}
3817
3818	return status;
3819}
3820
3821/* Perform the actual reset and resume I/O operations. */
3822static void efx_io_resume(struct pci_dev *pdev)
3823{
3824	struct efx_nic *efx = pci_get_drvdata(pdev);
3825	int rc;
3826
3827	rtnl_lock();
3828
3829	if (efx->state == STATE_DISABLED)
3830		goto out;
3831
3832	rc = efx_reset(efx, RESET_TYPE_ALL);
3833	if (rc) {
3834		netif_err(efx, hw, efx->net_dev,
3835			  "efx_reset failed after PCI error (%d)\n", rc);
3836	} else {
3837		efx->state = STATE_READY;
3838		netif_dbg(efx, hw, efx->net_dev,
3839			  "Done resetting and resuming IO after PCI error.\n");
3840	}
3841
3842out:
3843	rtnl_unlock();
3844}
3845
3846/* For simplicity and reliability, we always require a slot reset and try to
3847 * reset the hardware when a pci error affecting the device is detected.
3848 * We leave both the link_reset and mmio_enabled callback unimplemented:
3849 * with our request for slot reset the mmio_enabled callback will never be
3850 * called, and the link_reset callback is not used by AER or EEH mechanisms.
3851 */
3852static const struct pci_error_handlers efx_err_handlers = {
3853	.error_detected = efx_io_error_detected,
3854	.slot_reset	= efx_io_slot_reset,
3855	.resume		= efx_io_resume,
3856};
3857
3858static struct pci_driver efx_pci_driver = {
3859	.name		= KBUILD_MODNAME,
3860	.id_table	= efx_pci_table,
3861	.probe		= efx_pci_probe,
3862	.remove		= efx_pci_remove,
3863	.driver.pm	= &efx_pm_ops,
 
3864	.err_handler	= &efx_err_handlers,
3865#ifdef CONFIG_SFC_SRIOV
3866	.sriov_configure = efx_pci_sriov_configure,
3867#endif
3868};
3869
3870/**************************************************************************
3871 *
3872 * Kernel module interface
3873 *
3874 *************************************************************************/
3875
3876module_param(interrupt_mode, uint, 0444);
3877MODULE_PARM_DESC(interrupt_mode,
3878		 "Interrupt mode (0=>MSIX 1=>MSI 2=>legacy)");
3879
3880static int __init efx_init_module(void)
3881{
3882	int rc;
3883
3884	printk(KERN_INFO "Solarflare NET driver v" EFX_DRIVER_VERSION "\n");
3885
3886	rc = register_netdevice_notifier(&efx_netdev_notifier);
3887	if (rc)
3888		goto err_notifier;
3889
3890#ifdef CONFIG_SFC_SRIOV
3891	rc = efx_init_sriov();
3892	if (rc)
3893		goto err_sriov;
3894#endif
3895
3896	reset_workqueue = create_singlethread_workqueue("sfc_reset");
3897	if (!reset_workqueue) {
3898		rc = -ENOMEM;
3899		goto err_reset;
3900	}
3901
3902	rc = pci_register_driver(&efx_pci_driver);
3903	if (rc < 0)
3904		goto err_pci;
3905
 
 
 
 
3906	return 0;
3907
 
 
3908 err_pci:
3909	destroy_workqueue(reset_workqueue);
3910 err_reset:
3911#ifdef CONFIG_SFC_SRIOV
3912	efx_fini_sriov();
3913 err_sriov:
3914#endif
3915	unregister_netdevice_notifier(&efx_netdev_notifier);
3916 err_notifier:
3917	return rc;
3918}
3919
3920static void __exit efx_exit_module(void)
3921{
3922	printk(KERN_INFO "Solarflare NET driver unloading\n");
3923
 
3924	pci_unregister_driver(&efx_pci_driver);
3925	destroy_workqueue(reset_workqueue);
3926#ifdef CONFIG_SFC_SRIOV
3927	efx_fini_sriov();
3928#endif
3929	unregister_netdevice_notifier(&efx_netdev_notifier);
3930
3931}
3932
3933module_init(efx_init_module);
3934module_exit(efx_exit_module);
3935
3936MODULE_AUTHOR("Solarflare Communications and "
3937	      "Michael Brown <mbrown@fensystems.co.uk>");
3938MODULE_DESCRIPTION("Solarflare network driver");
3939MODULE_LICENSE("GPL");
3940MODULE_DEVICE_TABLE(pci, efx_pci_table);
3941MODULE_VERSION(EFX_DRIVER_VERSION);