Linux Audio

Check our new training course

Loading...
v6.8
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) 2005 - 2016 Broadcom
   4 * All rights reserved.
   5 *
   6 * Contact Information:
   7 * linux-drivers@emulex.com
   8 *
   9 * Emulex
  10 * 3333 Susan Street
  11 * Costa Mesa, CA 92626
  12 */
  13
  14#include <linux/prefetch.h>
  15#include <linux/module.h>
  16#include "be.h"
  17#include "be_cmds.h"
  18#include <asm/div64.h>
 
  19#include <linux/if_bridge.h>
  20#include <net/busy_poll.h>
  21#include <net/vxlan.h>
  22
  23MODULE_DESCRIPTION(DRV_DESC);
  24MODULE_AUTHOR("Emulex Corporation");
  25MODULE_LICENSE("GPL");
  26
  27/* num_vfs module param is obsolete.
  28 * Use sysfs method to enable/disable VFs.
  29 */
  30static unsigned int num_vfs;
  31module_param(num_vfs, uint, 0444);
  32MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
  33
  34static ushort rx_frag_size = 2048;
  35module_param(rx_frag_size, ushort, 0444);
  36MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
  37
  38/* Per-module error detection/recovery workq shared across all functions.
  39 * Each function schedules its own work request on this shared workq.
  40 */
  41static struct workqueue_struct *be_err_recovery_workq;
  42
  43static const struct pci_device_id be_dev_ids[] = {
  44#ifdef CONFIG_BE2NET_BE2
  45	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
  46	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
  47#endif /* CONFIG_BE2NET_BE2 */
  48#ifdef CONFIG_BE2NET_BE3
  49	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
  50	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
  51#endif /* CONFIG_BE2NET_BE3 */
  52#ifdef CONFIG_BE2NET_LANCER
  53	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
  54	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
  55#endif /* CONFIG_BE2NET_LANCER */
  56#ifdef CONFIG_BE2NET_SKYHAWK
  57	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
  58	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
  59#endif /* CONFIG_BE2NET_SKYHAWK */
  60	{ 0 }
  61};
  62MODULE_DEVICE_TABLE(pci, be_dev_ids);
  63
  64/* Workqueue used by all functions for defering cmd calls to the adapter */
  65static struct workqueue_struct *be_wq;
  66
  67/* UE Status Low CSR */
  68static const char * const ue_status_low_desc[] = {
  69	"CEV",
  70	"CTX",
  71	"DBUF",
  72	"ERX",
  73	"Host",
  74	"MPU",
  75	"NDMA",
  76	"PTC ",
  77	"RDMA ",
  78	"RXF ",
  79	"RXIPS ",
  80	"RXULP0 ",
  81	"RXULP1 ",
  82	"RXULP2 ",
  83	"TIM ",
  84	"TPOST ",
  85	"TPRE ",
  86	"TXIPS ",
  87	"TXULP0 ",
  88	"TXULP1 ",
  89	"UC ",
  90	"WDMA ",
  91	"TXULP2 ",
  92	"HOST1 ",
  93	"P0_OB_LINK ",
  94	"P1_OB_LINK ",
  95	"HOST_GPIO ",
  96	"MBOX ",
  97	"ERX2 ",
  98	"SPARE ",
  99	"JTAG ",
 100	"MPU_INTPEND "
 101};
 102
 103/* UE Status High CSR */
 104static const char * const ue_status_hi_desc[] = {
 105	"LPCMEMHOST",
 106	"MGMT_MAC",
 107	"PCS0ONLINE",
 108	"MPU_IRAM",
 109	"PCS1ONLINE",
 110	"PCTL0",
 111	"PCTL1",
 112	"PMEM",
 113	"RR",
 114	"TXPB",
 115	"RXPP",
 116	"XAUI",
 117	"TXP",
 118	"ARM",
 119	"IPC",
 120	"HOST2",
 121	"HOST3",
 122	"HOST4",
 123	"HOST5",
 124	"HOST6",
 125	"HOST7",
 126	"ECRC",
 127	"Poison TLP",
 128	"NETC",
 129	"PERIPH",
 130	"LLTXULP",
 131	"D2P",
 132	"RCON",
 133	"LDMA",
 134	"LLTXP",
 135	"LLTXPB",
 136	"Unknown"
 137};
 138
 139#define BE_VF_IF_EN_FLAGS	(BE_IF_FLAGS_UNTAGGED | \
 140				 BE_IF_FLAGS_BROADCAST | \
 141				 BE_IF_FLAGS_MULTICAST | \
 142				 BE_IF_FLAGS_PASS_L3L4_ERRORS)
 143
 144static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
 145{
 146	struct be_dma_mem *mem = &q->dma_mem;
 147
 148	if (mem->va) {
 149		dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
 150				  mem->dma);
 151		mem->va = NULL;
 152	}
 153}
 154
 155static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
 156			  u16 len, u16 entry_size)
 157{
 158	struct be_dma_mem *mem = &q->dma_mem;
 159
 160	memset(q, 0, sizeof(*q));
 161	q->len = len;
 162	q->entry_size = entry_size;
 163	mem->size = len * entry_size;
 164	mem->va = dma_alloc_coherent(&adapter->pdev->dev, mem->size,
 165				     &mem->dma, GFP_KERNEL);
 166	if (!mem->va)
 167		return -ENOMEM;
 168	return 0;
 169}
 170
 171static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
 172{
 173	u32 reg, enabled;
 174
 175	pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
 176			      &reg);
 177	enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
 178
 179	if (!enabled && enable)
 180		reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
 181	else if (enabled && !enable)
 182		reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
 183	else
 184		return;
 185
 186	pci_write_config_dword(adapter->pdev,
 187			       PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
 188}
 189
 190static void be_intr_set(struct be_adapter *adapter, bool enable)
 191{
 192	int status = 0;
 193
 194	/* On lancer interrupts can't be controlled via this register */
 195	if (lancer_chip(adapter))
 196		return;
 197
 198	if (be_check_error(adapter, BE_ERROR_EEH))
 199		return;
 200
 201	status = be_cmd_intr_set(adapter, enable);
 202	if (status)
 203		be_reg_intr_set(adapter, enable);
 204}
 205
 206static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
 207{
 208	u32 val = 0;
 209
 210	if (be_check_error(adapter, BE_ERROR_HW))
 211		return;
 212
 213	val |= qid & DB_RQ_RING_ID_MASK;
 214	val |= posted << DB_RQ_NUM_POSTED_SHIFT;
 215
 216	wmb();
 217	iowrite32(val, adapter->db + DB_RQ_OFFSET);
 218}
 219
 220static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
 221			  u16 posted)
 222{
 223	u32 val = 0;
 224
 225	if (be_check_error(adapter, BE_ERROR_HW))
 226		return;
 227
 228	val |= txo->q.id & DB_TXULP_RING_ID_MASK;
 229	val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
 230
 231	wmb();
 232	iowrite32(val, adapter->db + txo->db_offset);
 233}
 234
 235static void be_eq_notify(struct be_adapter *adapter, u16 qid,
 236			 bool arm, bool clear_int, u16 num_popped,
 237			 u32 eq_delay_mult_enc)
 238{
 239	u32 val = 0;
 240
 241	val |= qid & DB_EQ_RING_ID_MASK;
 242	val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
 243
 244	if (be_check_error(adapter, BE_ERROR_HW))
 245		return;
 246
 247	if (arm)
 248		val |= 1 << DB_EQ_REARM_SHIFT;
 249	if (clear_int)
 250		val |= 1 << DB_EQ_CLR_SHIFT;
 251	val |= 1 << DB_EQ_EVNT_SHIFT;
 252	val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
 253	val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
 254	iowrite32(val, adapter->db + DB_EQ_OFFSET);
 255}
 256
 257void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
 258{
 259	u32 val = 0;
 260
 261	val |= qid & DB_CQ_RING_ID_MASK;
 262	val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
 263			DB_CQ_RING_ID_EXT_MASK_SHIFT);
 264
 265	if (be_check_error(adapter, BE_ERROR_HW))
 266		return;
 267
 268	if (arm)
 269		val |= 1 << DB_CQ_REARM_SHIFT;
 270	val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
 271	iowrite32(val, adapter->db + DB_CQ_OFFSET);
 272}
 273
 274static int be_dev_mac_add(struct be_adapter *adapter, const u8 *mac)
 275{
 276	int i;
 277
 278	/* Check if mac has already been added as part of uc-list */
 279	for (i = 0; i < adapter->uc_macs; i++) {
 280		if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
 281			/* mac already added, skip addition */
 282			adapter->pmac_id[0] = adapter->pmac_id[i + 1];
 283			return 0;
 284		}
 285	}
 286
 287	return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
 288			       &adapter->pmac_id[0], 0);
 289}
 290
 291static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
 292{
 293	int i;
 294
 295	/* Skip deletion if the programmed mac is
 296	 * being used in uc-list
 297	 */
 298	for (i = 0; i < adapter->uc_macs; i++) {
 299		if (adapter->pmac_id[i + 1] == pmac_id)
 300			return;
 301	}
 302	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
 303}
 304
 305static int be_mac_addr_set(struct net_device *netdev, void *p)
 306{
 307	struct be_adapter *adapter = netdev_priv(netdev);
 308	struct device *dev = &adapter->pdev->dev;
 309	struct sockaddr *addr = p;
 310	int status;
 311	u8 mac[ETH_ALEN];
 312	u32 old_pmac_id = adapter->pmac_id[0];
 313
 314	if (!is_valid_ether_addr(addr->sa_data))
 315		return -EADDRNOTAVAIL;
 316
 317	/* Proceed further only if, User provided MAC is different
 318	 * from active MAC
 319	 */
 320	if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
 321		return 0;
 322
 323	/* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
 324	 * address
 325	 */
 326	if (BEx_chip(adapter) && be_virtfn(adapter) &&
 327	    !check_privilege(adapter, BE_PRIV_FILTMGMT))
 328		return -EPERM;
 329
 330	/* if device is not running, copy MAC to netdev->dev_addr */
 331	if (!netif_running(netdev))
 332		goto done;
 333
 334	/* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
 335	 * privilege or if PF did not provision the new MAC address.
 336	 * On BE3, this cmd will always fail if the VF doesn't have the
 337	 * FILTMGMT privilege. This failure is OK, only if the PF programmed
 338	 * the MAC for the VF.
 339	 */
 340	mutex_lock(&adapter->rx_filter_lock);
 341	status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
 342	if (!status) {
 343
 344		/* Delete the old programmed MAC. This call may fail if the
 345		 * old MAC was already deleted by the PF driver.
 346		 */
 347		if (adapter->pmac_id[0] != old_pmac_id)
 348			be_dev_mac_del(adapter, old_pmac_id);
 349	}
 350
 351	mutex_unlock(&adapter->rx_filter_lock);
 352	/* Decide if the new MAC is successfully activated only after
 353	 * querying the FW
 354	 */
 355	status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
 356				       adapter->if_handle, true, 0);
 357	if (status)
 358		goto err;
 359
 360	/* The MAC change did not happen, either due to lack of privilege
 361	 * or PF didn't pre-provision.
 362	 */
 363	if (!ether_addr_equal(addr->sa_data, mac)) {
 364		status = -EPERM;
 365		goto err;
 366	}
 367
 368	/* Remember currently programmed MAC */
 369	ether_addr_copy(adapter->dev_mac, addr->sa_data);
 370done:
 371	eth_hw_addr_set(netdev, addr->sa_data);
 372	dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
 373	return 0;
 374err:
 375	dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
 376	return status;
 377}
 378
 379/* BE2 supports only v0 cmd */
 380static void *hw_stats_from_cmd(struct be_adapter *adapter)
 381{
 382	if (BE2_chip(adapter)) {
 383		struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
 384
 385		return &cmd->hw_stats;
 386	} else if (BE3_chip(adapter)) {
 387		struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
 388
 389		return &cmd->hw_stats;
 390	} else {
 391		struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
 392
 393		return &cmd->hw_stats;
 394	}
 395}
 396
 397/* BE2 supports only v0 cmd */
 398static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
 399{
 400	if (BE2_chip(adapter)) {
 401		struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
 402
 403		return &hw_stats->erx;
 404	} else if (BE3_chip(adapter)) {
 405		struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
 406
 407		return &hw_stats->erx;
 408	} else {
 409		struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
 410
 411		return &hw_stats->erx;
 412	}
 413}
 414
 415static void populate_be_v0_stats(struct be_adapter *adapter)
 416{
 417	struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
 418	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
 419	struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
 420	struct be_port_rxf_stats_v0 *port_stats =
 421					&rxf_stats->port[adapter->port_num];
 422	struct be_drv_stats *drvs = &adapter->drv_stats;
 423
 424	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
 425	drvs->rx_pause_frames = port_stats->rx_pause_frames;
 426	drvs->rx_crc_errors = port_stats->rx_crc_errors;
 427	drvs->rx_control_frames = port_stats->rx_control_frames;
 428	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
 429	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
 430	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
 431	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
 432	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
 433	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
 434	drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
 435	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
 436	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
 437	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
 438	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
 439	drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
 440	drvs->rx_dropped_header_too_small =
 441		port_stats->rx_dropped_header_too_small;
 442	drvs->rx_address_filtered =
 443					port_stats->rx_address_filtered +
 444					port_stats->rx_vlan_filtered;
 445	drvs->rx_alignment_symbol_errors =
 446		port_stats->rx_alignment_symbol_errors;
 447
 448	drvs->tx_pauseframes = port_stats->tx_pauseframes;
 449	drvs->tx_controlframes = port_stats->tx_controlframes;
 450
 451	if (adapter->port_num)
 452		drvs->jabber_events = rxf_stats->port1_jabber_events;
 453	else
 454		drvs->jabber_events = rxf_stats->port0_jabber_events;
 455	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
 456	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
 457	drvs->forwarded_packets = rxf_stats->forwarded_packets;
 458	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
 459	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
 460	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
 461	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
 462}
 463
 464static void populate_be_v1_stats(struct be_adapter *adapter)
 465{
 466	struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
 467	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
 468	struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
 469	struct be_port_rxf_stats_v1 *port_stats =
 470					&rxf_stats->port[adapter->port_num];
 471	struct be_drv_stats *drvs = &adapter->drv_stats;
 472
 473	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
 474	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
 475	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
 476	drvs->rx_pause_frames = port_stats->rx_pause_frames;
 477	drvs->rx_crc_errors = port_stats->rx_crc_errors;
 478	drvs->rx_control_frames = port_stats->rx_control_frames;
 479	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
 480	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
 481	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
 482	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
 483	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
 484	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
 485	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
 486	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
 487	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
 488	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
 489	drvs->rx_dropped_header_too_small =
 490		port_stats->rx_dropped_header_too_small;
 491	drvs->rx_input_fifo_overflow_drop =
 492		port_stats->rx_input_fifo_overflow_drop;
 493	drvs->rx_address_filtered = port_stats->rx_address_filtered;
 494	drvs->rx_alignment_symbol_errors =
 495		port_stats->rx_alignment_symbol_errors;
 496	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
 497	drvs->tx_pauseframes = port_stats->tx_pauseframes;
 498	drvs->tx_controlframes = port_stats->tx_controlframes;
 499	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
 500	drvs->jabber_events = port_stats->jabber_events;
 501	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
 502	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
 503	drvs->forwarded_packets = rxf_stats->forwarded_packets;
 504	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
 505	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
 506	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
 507	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
 508}
 509
 510static void populate_be_v2_stats(struct be_adapter *adapter)
 511{
 512	struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
 513	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
 514	struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
 515	struct be_port_rxf_stats_v2 *port_stats =
 516					&rxf_stats->port[adapter->port_num];
 517	struct be_drv_stats *drvs = &adapter->drv_stats;
 518
 519	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
 520	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
 521	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
 522	drvs->rx_pause_frames = port_stats->rx_pause_frames;
 523	drvs->rx_crc_errors = port_stats->rx_crc_errors;
 524	drvs->rx_control_frames = port_stats->rx_control_frames;
 525	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
 526	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
 527	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
 528	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
 529	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
 530	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
 531	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
 532	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
 533	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
 534	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
 535	drvs->rx_dropped_header_too_small =
 536		port_stats->rx_dropped_header_too_small;
 537	drvs->rx_input_fifo_overflow_drop =
 538		port_stats->rx_input_fifo_overflow_drop;
 539	drvs->rx_address_filtered = port_stats->rx_address_filtered;
 540	drvs->rx_alignment_symbol_errors =
 541		port_stats->rx_alignment_symbol_errors;
 542	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
 543	drvs->tx_pauseframes = port_stats->tx_pauseframes;
 544	drvs->tx_controlframes = port_stats->tx_controlframes;
 545	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
 546	drvs->jabber_events = port_stats->jabber_events;
 547	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
 548	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
 549	drvs->forwarded_packets = rxf_stats->forwarded_packets;
 550	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
 551	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
 552	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
 553	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
 554	if (be_roce_supported(adapter)) {
 555		drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
 556		drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
 557		drvs->rx_roce_frames = port_stats->roce_frames_received;
 558		drvs->roce_drops_crc = port_stats->roce_drops_crc;
 559		drvs->roce_drops_payload_len =
 560			port_stats->roce_drops_payload_len;
 561	}
 562}
 563
 564static void populate_lancer_stats(struct be_adapter *adapter)
 565{
 566	struct be_drv_stats *drvs = &adapter->drv_stats;
 567	struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
 568
 569	be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
 570	drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
 571	drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
 572	drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
 573	drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
 574	drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
 575	drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
 576	drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
 577	drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
 578	drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
 579	drvs->rx_dropped_tcp_length =
 580				pport_stats->rx_dropped_invalid_tcp_length;
 581	drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
 582	drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
 583	drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
 584	drvs->rx_dropped_header_too_small =
 585				pport_stats->rx_dropped_header_too_small;
 586	drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
 587	drvs->rx_address_filtered =
 588					pport_stats->rx_address_filtered +
 589					pport_stats->rx_vlan_filtered;
 590	drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
 591	drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
 592	drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
 593	drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
 594	drvs->jabber_events = pport_stats->rx_jabbers;
 595	drvs->forwarded_packets = pport_stats->num_forwards_lo;
 596	drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
 597	drvs->rx_drops_too_many_frags =
 598				pport_stats->rx_drops_too_many_frags_lo;
 599}
 600
 601static void accumulate_16bit_val(u32 *acc, u16 val)
 602{
 603#define lo(x)			(x & 0xFFFF)
 604#define hi(x)			(x & 0xFFFF0000)
 605	bool wrapped = val < lo(*acc);
 606	u32 newacc = hi(*acc) + val;
 607
 608	if (wrapped)
 609		newacc += 65536;
 610	WRITE_ONCE(*acc, newacc);
 611}
 612
 613static void populate_erx_stats(struct be_adapter *adapter,
 614			       struct be_rx_obj *rxo, u32 erx_stat)
 615{
 616	if (!BEx_chip(adapter))
 617		rx_stats(rxo)->rx_drops_no_frags = erx_stat;
 618	else
 619		/* below erx HW counter can actually wrap around after
 620		 * 65535. Driver accumulates a 32-bit value
 621		 */
 622		accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
 623				     (u16)erx_stat);
 624}
 625
 626void be_parse_stats(struct be_adapter *adapter)
 627{
 628	struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
 629	struct be_rx_obj *rxo;
 630	int i;
 631	u32 erx_stat;
 632
 633	if (lancer_chip(adapter)) {
 634		populate_lancer_stats(adapter);
 635	} else {
 636		if (BE2_chip(adapter))
 637			populate_be_v0_stats(adapter);
 638		else if (BE3_chip(adapter))
 639			/* for BE3 */
 640			populate_be_v1_stats(adapter);
 641		else
 642			populate_be_v2_stats(adapter);
 643
 644		/* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
 645		for_all_rx_queues(adapter, rxo, i) {
 646			erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
 647			populate_erx_stats(adapter, rxo, erx_stat);
 648		}
 649	}
 650}
 651
 652static void be_get_stats64(struct net_device *netdev,
 653			   struct rtnl_link_stats64 *stats)
 654{
 655	struct be_adapter *adapter = netdev_priv(netdev);
 656	struct be_drv_stats *drvs = &adapter->drv_stats;
 657	struct be_rx_obj *rxo;
 658	struct be_tx_obj *txo;
 659	u64 pkts, bytes;
 660	unsigned int start;
 661	int i;
 662
 663	for_all_rx_queues(adapter, rxo, i) {
 664		const struct be_rx_stats *rx_stats = rx_stats(rxo);
 665
 666		do {
 667			start = u64_stats_fetch_begin(&rx_stats->sync);
 668			pkts = rx_stats(rxo)->rx_pkts;
 669			bytes = rx_stats(rxo)->rx_bytes;
 670		} while (u64_stats_fetch_retry(&rx_stats->sync, start));
 671		stats->rx_packets += pkts;
 672		stats->rx_bytes += bytes;
 673		stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
 674		stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
 675					rx_stats(rxo)->rx_drops_no_frags;
 676	}
 677
 678	for_all_tx_queues(adapter, txo, i) {
 679		const struct be_tx_stats *tx_stats = tx_stats(txo);
 680
 681		do {
 682			start = u64_stats_fetch_begin(&tx_stats->sync);
 683			pkts = tx_stats(txo)->tx_pkts;
 684			bytes = tx_stats(txo)->tx_bytes;
 685		} while (u64_stats_fetch_retry(&tx_stats->sync, start));
 686		stats->tx_packets += pkts;
 687		stats->tx_bytes += bytes;
 688	}
 689
 690	/* bad pkts received */
 691	stats->rx_errors = drvs->rx_crc_errors +
 692		drvs->rx_alignment_symbol_errors +
 693		drvs->rx_in_range_errors +
 694		drvs->rx_out_range_errors +
 695		drvs->rx_frame_too_long +
 696		drvs->rx_dropped_too_small +
 697		drvs->rx_dropped_too_short +
 698		drvs->rx_dropped_header_too_small +
 699		drvs->rx_dropped_tcp_length +
 700		drvs->rx_dropped_runt;
 701
 702	/* detailed rx errors */
 703	stats->rx_length_errors = drvs->rx_in_range_errors +
 704		drvs->rx_out_range_errors +
 705		drvs->rx_frame_too_long;
 706
 707	stats->rx_crc_errors = drvs->rx_crc_errors;
 708
 709	/* frame alignment errors */
 710	stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
 711
 712	/* receiver fifo overrun */
 713	/* drops_no_pbuf is no per i/f, it's per BE card */
 714	stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
 715				drvs->rx_input_fifo_overflow_drop +
 716				drvs->rx_drops_no_pbuf;
 717}
 718
 719void be_link_status_update(struct be_adapter *adapter, u8 link_status)
 720{
 721	struct net_device *netdev = adapter->netdev;
 722
 723	if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
 724		netif_carrier_off(netdev);
 725		adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
 726	}
 727
 728	if (link_status)
 729		netif_carrier_on(netdev);
 730	else
 731		netif_carrier_off(netdev);
 732
 733	netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
 734}
 735
 736static int be_gso_hdr_len(struct sk_buff *skb)
 737{
 738	if (skb->encapsulation)
 739		return skb_inner_tcp_all_headers(skb);
 740
 741	return skb_tcp_all_headers(skb);
 742}
 743
 744static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
 745{
 746	struct be_tx_stats *stats = tx_stats(txo);
 747	u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
 748	/* Account for headers which get duplicated in TSO pkt */
 749	u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
 750
 751	u64_stats_update_begin(&stats->sync);
 752	stats->tx_reqs++;
 753	stats->tx_bytes += skb->len + dup_hdr_len;
 754	stats->tx_pkts += tx_pkts;
 755	if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
 756		stats->tx_vxlan_offload_pkts += tx_pkts;
 757	u64_stats_update_end(&stats->sync);
 758}
 759
 760/* Returns number of WRBs needed for the skb */
 761static u32 skb_wrb_cnt(struct sk_buff *skb)
 762{
 763	/* +1 for the header wrb */
 764	return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
 765}
 766
 767static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
 768{
 769	wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
 770	wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
 771	wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
 772	wrb->rsvd0 = 0;
 773}
 774
 775/* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
 776 * to avoid the swap and shift/mask operations in wrb_fill().
 777 */
 778static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
 779{
 780	wrb->frag_pa_hi = 0;
 781	wrb->frag_pa_lo = 0;
 782	wrb->frag_len = 0;
 783	wrb->rsvd0 = 0;
 784}
 785
 786static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
 787				     struct sk_buff *skb)
 788{
 789	u8 vlan_prio;
 790	u16 vlan_tag;
 791
 792	vlan_tag = skb_vlan_tag_get(skb);
 793	vlan_prio = skb_vlan_tag_get_prio(skb);
 794	/* If vlan priority provided by OS is NOT in available bmap */
 795	if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
 796		vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
 797				adapter->recommended_prio_bits;
 798
 799	return vlan_tag;
 800}
 801
 802/* Used only for IP tunnel packets */
 803static u16 skb_inner_ip_proto(struct sk_buff *skb)
 804{
 805	return (inner_ip_hdr(skb)->version == 4) ?
 806		inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
 807}
 808
 809static u16 skb_ip_proto(struct sk_buff *skb)
 810{
 811	return (ip_hdr(skb)->version == 4) ?
 812		ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
 813}
 814
 815static inline bool be_is_txq_full(struct be_tx_obj *txo)
 816{
 817	return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
 818}
 819
 820static inline bool be_can_txq_wake(struct be_tx_obj *txo)
 821{
 822	return atomic_read(&txo->q.used) < txo->q.len / 2;
 823}
 824
 825static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
 826{
 827	return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
 828}
 829
 830static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
 831				       struct sk_buff *skb,
 832				       struct be_wrb_params *wrb_params)
 833{
 834	u16 proto;
 835
 836	if (skb_is_gso(skb)) {
 837		BE_WRB_F_SET(wrb_params->features, LSO, 1);
 838		wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
 839		if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
 840			BE_WRB_F_SET(wrb_params->features, LSO6, 1);
 841	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
 842		if (skb->encapsulation) {
 843			BE_WRB_F_SET(wrb_params->features, IPCS, 1);
 844			proto = skb_inner_ip_proto(skb);
 845		} else {
 846			proto = skb_ip_proto(skb);
 847		}
 848		if (proto == IPPROTO_TCP)
 849			BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
 850		else if (proto == IPPROTO_UDP)
 851			BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
 852	}
 853
 854	if (skb_vlan_tag_present(skb)) {
 855		BE_WRB_F_SET(wrb_params->features, VLAN, 1);
 856		wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
 857	}
 858
 859	BE_WRB_F_SET(wrb_params->features, CRC, 1);
 860}
 861
 862static void wrb_fill_hdr(struct be_adapter *adapter,
 863			 struct be_eth_hdr_wrb *hdr,
 864			 struct be_wrb_params *wrb_params,
 865			 struct sk_buff *skb)
 866{
 867	memset(hdr, 0, sizeof(*hdr));
 868
 869	SET_TX_WRB_HDR_BITS(crc, hdr,
 870			    BE_WRB_F_GET(wrb_params->features, CRC));
 871	SET_TX_WRB_HDR_BITS(ipcs, hdr,
 872			    BE_WRB_F_GET(wrb_params->features, IPCS));
 873	SET_TX_WRB_HDR_BITS(tcpcs, hdr,
 874			    BE_WRB_F_GET(wrb_params->features, TCPCS));
 875	SET_TX_WRB_HDR_BITS(udpcs, hdr,
 876			    BE_WRB_F_GET(wrb_params->features, UDPCS));
 877
 878	SET_TX_WRB_HDR_BITS(lso, hdr,
 879			    BE_WRB_F_GET(wrb_params->features, LSO));
 880	SET_TX_WRB_HDR_BITS(lso6, hdr,
 881			    BE_WRB_F_GET(wrb_params->features, LSO6));
 882	SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
 883
 884	/* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
 885	 * hack is not needed, the evt bit is set while ringing DB.
 886	 */
 887	SET_TX_WRB_HDR_BITS(event, hdr,
 888			    BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
 889	SET_TX_WRB_HDR_BITS(vlan, hdr,
 890			    BE_WRB_F_GET(wrb_params->features, VLAN));
 891	SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
 892
 893	SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
 894	SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
 895	SET_TX_WRB_HDR_BITS(mgmt, hdr,
 896			    BE_WRB_F_GET(wrb_params->features, OS2BMC));
 897}
 898
 899static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
 900			  bool unmap_single)
 901{
 902	dma_addr_t dma;
 903	u32 frag_len = le32_to_cpu(wrb->frag_len);
 904
 905
 906	dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
 907		(u64)le32_to_cpu(wrb->frag_pa_lo);
 908	if (frag_len) {
 909		if (unmap_single)
 910			dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
 911		else
 912			dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
 913	}
 914}
 915
 916/* Grab a WRB header for xmit */
 917static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
 918{
 919	u32 head = txo->q.head;
 920
 921	queue_head_inc(&txo->q);
 922	return head;
 923}
 924
 925/* Set up the WRB header for xmit */
 926static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
 927				struct be_tx_obj *txo,
 928				struct be_wrb_params *wrb_params,
 929				struct sk_buff *skb, u16 head)
 930{
 931	u32 num_frags = skb_wrb_cnt(skb);
 932	struct be_queue_info *txq = &txo->q;
 933	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
 934
 935	wrb_fill_hdr(adapter, hdr, wrb_params, skb);
 936	be_dws_cpu_to_le(hdr, sizeof(*hdr));
 937
 938	BUG_ON(txo->sent_skb_list[head]);
 939	txo->sent_skb_list[head] = skb;
 940	txo->last_req_hdr = head;
 941	atomic_add(num_frags, &txq->used);
 942	txo->last_req_wrb_cnt = num_frags;
 943	txo->pend_wrb_cnt += num_frags;
 944}
 945
 946/* Setup a WRB fragment (buffer descriptor) for xmit */
 947static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
 948				 int len)
 949{
 950	struct be_eth_wrb *wrb;
 951	struct be_queue_info *txq = &txo->q;
 952
 953	wrb = queue_head_node(txq);
 954	wrb_fill(wrb, busaddr, len);
 955	queue_head_inc(txq);
 956}
 957
 958/* Bring the queue back to the state it was in before be_xmit_enqueue() routine
 959 * was invoked. The producer index is restored to the previous packet and the
 960 * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
 961 */
 962static void be_xmit_restore(struct be_adapter *adapter,
 963			    struct be_tx_obj *txo, u32 head, bool map_single,
 964			    u32 copied)
 965{
 966	struct device *dev;
 967	struct be_eth_wrb *wrb;
 968	struct be_queue_info *txq = &txo->q;
 969
 970	dev = &adapter->pdev->dev;
 971	txq->head = head;
 972
 973	/* skip the first wrb (hdr); it's not mapped */
 974	queue_head_inc(txq);
 975	while (copied) {
 976		wrb = queue_head_node(txq);
 977		unmap_tx_frag(dev, wrb, map_single);
 978		map_single = false;
 979		copied -= le32_to_cpu(wrb->frag_len);
 980		queue_head_inc(txq);
 981	}
 982
 983	txq->head = head;
 984}
 985
 986/* Enqueue the given packet for transmit. This routine allocates WRBs for the
 987 * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
 988 * of WRBs used up by the packet.
 989 */
 990static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
 991			   struct sk_buff *skb,
 992			   struct be_wrb_params *wrb_params)
 993{
 994	u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
 995	struct device *dev = &adapter->pdev->dev;
 996	bool map_single = false;
 997	u32 head;
 998	dma_addr_t busaddr;
 999	int len;
1000
1001	head = be_tx_get_wrb_hdr(txo);
1002
1003	if (skb->len > skb->data_len) {
1004		len = skb_headlen(skb);
1005
1006		busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1007		if (dma_mapping_error(dev, busaddr))
1008			goto dma_err;
1009		map_single = true;
1010		be_tx_setup_wrb_frag(txo, busaddr, len);
1011		copied += len;
1012	}
1013
1014	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1015		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1016		len = skb_frag_size(frag);
1017
1018		busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1019		if (dma_mapping_error(dev, busaddr))
1020			goto dma_err;
1021		be_tx_setup_wrb_frag(txo, busaddr, len);
1022		copied += len;
1023	}
1024
1025	be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1026
1027	be_tx_stats_update(txo, skb);
1028	return wrb_cnt;
1029
1030dma_err:
1031	adapter->drv_stats.dma_map_errors++;
1032	be_xmit_restore(adapter, txo, head, map_single, copied);
1033	return 0;
1034}
1035
1036static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1037{
1038	return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1039}
1040
1041static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1042					     struct sk_buff *skb,
1043					     struct be_wrb_params
1044					     *wrb_params)
1045{
1046	bool insert_vlan = false;
1047	u16 vlan_tag = 0;
1048
1049	skb = skb_share_check(skb, GFP_ATOMIC);
1050	if (unlikely(!skb))
1051		return skb;
1052
1053	if (skb_vlan_tag_present(skb)) {
1054		vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1055		insert_vlan = true;
1056	}
1057
1058	if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1059		if (!insert_vlan) {
1060			vlan_tag = adapter->pvid;
1061			insert_vlan = true;
1062		}
1063		/* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1064		 * skip VLAN insertion
1065		 */
1066		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1067	}
1068
1069	if (insert_vlan) {
1070		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1071						vlan_tag);
1072		if (unlikely(!skb))
1073			return skb;
1074		__vlan_hwaccel_clear_tag(skb);
1075	}
1076
1077	/* Insert the outer VLAN, if any */
1078	if (adapter->qnq_vid) {
1079		vlan_tag = adapter->qnq_vid;
1080		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1081						vlan_tag);
1082		if (unlikely(!skb))
1083			return skb;
1084		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1085	}
1086
1087	return skb;
1088}
1089
1090static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1091{
1092	struct ethhdr *eh = (struct ethhdr *)skb->data;
1093	u16 offset = ETH_HLEN;
1094
1095	if (eh->h_proto == htons(ETH_P_IPV6)) {
1096		struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1097
1098		offset += sizeof(struct ipv6hdr);
1099		if (ip6h->nexthdr != NEXTHDR_TCP &&
1100		    ip6h->nexthdr != NEXTHDR_UDP) {
1101			struct ipv6_opt_hdr *ehdr =
1102				(struct ipv6_opt_hdr *)(skb->data + offset);
1103
1104			/* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1105			if (ehdr->hdrlen == 0xff)
1106				return true;
1107		}
1108	}
1109	return false;
1110}
1111
1112static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1113{
1114	return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1115}
1116
1117static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1118{
1119	return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1120}
1121
1122static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1123						  struct sk_buff *skb,
1124						  struct be_wrb_params
1125						  *wrb_params)
1126{
1127	struct vlan_ethhdr *veh = skb_vlan_eth_hdr(skb);
1128	unsigned int eth_hdr_len;
1129	struct iphdr *ip;
1130
1131	/* For padded packets, BE HW modifies tot_len field in IP header
1132	 * incorrecly when VLAN tag is inserted by HW.
1133	 * For padded packets, Lancer computes incorrect checksum.
1134	 */
1135	eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1136						VLAN_ETH_HLEN : ETH_HLEN;
1137	if (skb->len <= 60 &&
1138	    (lancer_chip(adapter) || BE3_chip(adapter) ||
1139	     skb_vlan_tag_present(skb)) && is_ipv4_pkt(skb)) {
1140		ip = (struct iphdr *)ip_hdr(skb);
1141		if (unlikely(pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len))))
1142			goto tx_drop;
1143	}
1144
1145	/* If vlan tag is already inlined in the packet, skip HW VLAN
1146	 * tagging in pvid-tagging mode
1147	 */
1148	if (be_pvid_tagging_enabled(adapter) &&
1149	    veh->h_vlan_proto == htons(ETH_P_8021Q))
1150		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1151
1152	/* HW has a bug wherein it will calculate CSUM for VLAN
1153	 * pkts even though it is disabled.
1154	 * Manually insert VLAN in pkt.
1155	 */
1156	if (skb->ip_summed != CHECKSUM_PARTIAL &&
1157	    skb_vlan_tag_present(skb)) {
1158		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1159		if (unlikely(!skb))
1160			goto err;
1161	}
1162
1163	/* HW may lockup when VLAN HW tagging is requested on
1164	 * certain ipv6 packets. Drop such pkts if the HW workaround to
1165	 * skip HW tagging is not enabled by FW.
1166	 */
1167	if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1168		     (adapter->pvid || adapter->qnq_vid) &&
1169		     !qnq_async_evt_rcvd(adapter)))
1170		goto tx_drop;
1171
1172	/* Manual VLAN tag insertion to prevent:
1173	 * ASIC lockup when the ASIC inserts VLAN tag into
1174	 * certain ipv6 packets. Insert VLAN tags in driver,
1175	 * and set event, completion, vlan bits accordingly
1176	 * in the Tx WRB.
1177	 */
1178	if (be_ipv6_tx_stall_chk(adapter, skb) &&
1179	    be_vlan_tag_tx_chk(adapter, skb)) {
1180		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1181		if (unlikely(!skb))
1182			goto err;
1183	}
1184
1185	return skb;
1186tx_drop:
1187	dev_kfree_skb_any(skb);
1188err:
1189	return NULL;
1190}
1191
1192static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1193					   struct sk_buff *skb,
1194					   struct be_wrb_params *wrb_params)
1195{
1196	int err;
1197
1198	/* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1199	 * packets that are 32b or less may cause a transmit stall
1200	 * on that port. The workaround is to pad such packets
1201	 * (len <= 32 bytes) to a minimum length of 36b.
1202	 */
1203	if (skb->len <= 32) {
1204		if (skb_put_padto(skb, 36))
1205			return NULL;
1206	}
1207
1208	if (BEx_chip(adapter) || lancer_chip(adapter)) {
1209		skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1210		if (!skb)
1211			return NULL;
1212	}
1213
1214	/* The stack can send us skbs with length greater than
1215	 * what the HW can handle. Trim the extra bytes.
1216	 */
1217	WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1218	err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1219	WARN_ON(err);
1220
1221	return skb;
1222}
1223
1224static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1225{
1226	struct be_queue_info *txq = &txo->q;
1227	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1228
1229	/* Mark the last request eventable if it hasn't been marked already */
1230	if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1231		hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1232
1233	/* compose a dummy wrb if there are odd set of wrbs to notify */
1234	if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1235		wrb_fill_dummy(queue_head_node(txq));
1236		queue_head_inc(txq);
1237		atomic_inc(&txq->used);
1238		txo->pend_wrb_cnt++;
1239		hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1240					   TX_HDR_WRB_NUM_SHIFT);
1241		hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1242					  TX_HDR_WRB_NUM_SHIFT);
1243	}
1244	be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1245	txo->pend_wrb_cnt = 0;
1246}
1247
1248/* OS2BMC related */
1249
1250#define DHCP_CLIENT_PORT	68
1251#define DHCP_SERVER_PORT	67
1252#define NET_BIOS_PORT1		137
1253#define NET_BIOS_PORT2		138
1254#define DHCPV6_RAS_PORT		547
1255
1256#define is_mc_allowed_on_bmc(adapter, eh)	\
1257	(!is_multicast_filt_enabled(adapter) &&	\
1258	 is_multicast_ether_addr(eh->h_dest) &&	\
1259	 !is_broadcast_ether_addr(eh->h_dest))
1260
1261#define is_bc_allowed_on_bmc(adapter, eh)	\
1262	(!is_broadcast_filt_enabled(adapter) &&	\
1263	 is_broadcast_ether_addr(eh->h_dest))
1264
1265#define is_arp_allowed_on_bmc(adapter, skb)	\
1266	(is_arp(skb) && is_arp_filt_enabled(adapter))
1267
1268#define is_arp(skb)	(skb->protocol == htons(ETH_P_ARP))
1269
1270#define is_arp_filt_enabled(adapter)	\
1271		(adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1272
1273#define is_dhcp_client_filt_enabled(adapter)	\
1274		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1275
1276#define is_dhcp_srvr_filt_enabled(adapter)	\
1277		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1278
1279#define is_nbios_filt_enabled(adapter)	\
1280		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1281
1282#define is_ipv6_na_filt_enabled(adapter)	\
1283		(adapter->bmc_filt_mask &	\
1284			BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1285
1286#define is_ipv6_ra_filt_enabled(adapter)	\
1287		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1288
1289#define is_ipv6_ras_filt_enabled(adapter)	\
1290		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1291
1292#define is_broadcast_filt_enabled(adapter)	\
1293		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1294
1295#define is_multicast_filt_enabled(adapter)	\
1296		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1297
1298static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1299			       struct sk_buff **skb)
1300{
1301	struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1302	bool os2bmc = false;
1303
1304	if (!be_is_os2bmc_enabled(adapter))
1305		goto done;
1306
1307	if (!is_multicast_ether_addr(eh->h_dest))
1308		goto done;
1309
1310	if (is_mc_allowed_on_bmc(adapter, eh) ||
1311	    is_bc_allowed_on_bmc(adapter, eh) ||
1312	    is_arp_allowed_on_bmc(adapter, (*skb))) {
1313		os2bmc = true;
1314		goto done;
1315	}
1316
1317	if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1318		struct ipv6hdr *hdr = ipv6_hdr((*skb));
1319		u8 nexthdr = hdr->nexthdr;
1320
1321		if (nexthdr == IPPROTO_ICMPV6) {
1322			struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1323
1324			switch (icmp6->icmp6_type) {
1325			case NDISC_ROUTER_ADVERTISEMENT:
1326				os2bmc = is_ipv6_ra_filt_enabled(adapter);
1327				goto done;
1328			case NDISC_NEIGHBOUR_ADVERTISEMENT:
1329				os2bmc = is_ipv6_na_filt_enabled(adapter);
1330				goto done;
1331			default:
1332				break;
1333			}
1334		}
1335	}
1336
1337	if (is_udp_pkt((*skb))) {
1338		struct udphdr *udp = udp_hdr((*skb));
1339
1340		switch (ntohs(udp->dest)) {
1341		case DHCP_CLIENT_PORT:
1342			os2bmc = is_dhcp_client_filt_enabled(adapter);
1343			goto done;
1344		case DHCP_SERVER_PORT:
1345			os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1346			goto done;
1347		case NET_BIOS_PORT1:
1348		case NET_BIOS_PORT2:
1349			os2bmc = is_nbios_filt_enabled(adapter);
1350			goto done;
1351		case DHCPV6_RAS_PORT:
1352			os2bmc = is_ipv6_ras_filt_enabled(adapter);
1353			goto done;
1354		default:
1355			break;
1356		}
1357	}
1358done:
1359	/* For packets over a vlan, which are destined
1360	 * to BMC, asic expects the vlan to be inline in the packet.
1361	 */
1362	if (os2bmc)
1363		*skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1364
1365	return os2bmc;
1366}
1367
1368static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1369{
1370	struct be_adapter *adapter = netdev_priv(netdev);
1371	u16 q_idx = skb_get_queue_mapping(skb);
1372	struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1373	struct be_wrb_params wrb_params = { 0 };
1374	bool flush = !netdev_xmit_more();
1375	u16 wrb_cnt;
1376
1377	skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1378	if (unlikely(!skb))
1379		goto drop;
1380
1381	be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1382
1383	wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1384	if (unlikely(!wrb_cnt)) {
1385		dev_kfree_skb_any(skb);
1386		goto drop;
1387	}
1388
1389	/* if os2bmc is enabled and if the pkt is destined to bmc,
1390	 * enqueue the pkt a 2nd time with mgmt bit set.
1391	 */
1392	if (be_send_pkt_to_bmc(adapter, &skb)) {
1393		BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1394		wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1395		if (unlikely(!wrb_cnt))
1396			goto drop;
1397		else
1398			skb_get(skb);
1399	}
1400
1401	if (be_is_txq_full(txo)) {
1402		netif_stop_subqueue(netdev, q_idx);
1403		tx_stats(txo)->tx_stops++;
1404	}
1405
1406	if (flush || __netif_subqueue_stopped(netdev, q_idx))
1407		be_xmit_flush(adapter, txo);
1408
1409	return NETDEV_TX_OK;
1410drop:
1411	tx_stats(txo)->tx_drv_drops++;
1412	/* Flush the already enqueued tx requests */
1413	if (flush && txo->pend_wrb_cnt)
1414		be_xmit_flush(adapter, txo);
1415
1416	return NETDEV_TX_OK;
1417}
1418
1419static void be_tx_timeout(struct net_device *netdev, unsigned int txqueue)
1420{
1421	struct be_adapter *adapter = netdev_priv(netdev);
1422	struct device *dev = &adapter->pdev->dev;
1423	struct be_tx_obj *txo;
1424	struct sk_buff *skb;
1425	struct tcphdr *tcphdr;
1426	struct udphdr *udphdr;
1427	u32 *entry;
1428	int status;
1429	int i, j;
1430
1431	for_all_tx_queues(adapter, txo, i) {
1432		dev_info(dev, "TXQ Dump: %d H: %d T: %d used: %d, qid: 0x%x\n",
1433			 i, txo->q.head, txo->q.tail,
1434			 atomic_read(&txo->q.used), txo->q.id);
1435
1436		entry = txo->q.dma_mem.va;
1437		for (j = 0; j < TX_Q_LEN * 4; j += 4) {
1438			if (entry[j] != 0 || entry[j + 1] != 0 ||
1439			    entry[j + 2] != 0 || entry[j + 3] != 0) {
1440				dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1441					 j, entry[j], entry[j + 1],
1442					 entry[j + 2], entry[j + 3]);
1443			}
1444		}
1445
1446		entry = txo->cq.dma_mem.va;
1447		dev_info(dev, "TXCQ Dump: %d  H: %d T: %d used: %d\n",
1448			 i, txo->cq.head, txo->cq.tail,
1449			 atomic_read(&txo->cq.used));
1450		for (j = 0; j < TX_CQ_LEN * 4; j += 4) {
1451			if (entry[j] != 0 || entry[j + 1] != 0 ||
1452			    entry[j + 2] != 0 || entry[j + 3] != 0) {
1453				dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1454					 j, entry[j], entry[j + 1],
1455					 entry[j + 2], entry[j + 3]);
1456			}
1457		}
1458
1459		for (j = 0; j < TX_Q_LEN; j++) {
1460			if (txo->sent_skb_list[j]) {
1461				skb = txo->sent_skb_list[j];
1462				if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
1463					tcphdr = tcp_hdr(skb);
1464					dev_info(dev, "TCP source port %d\n",
1465						 ntohs(tcphdr->source));
1466					dev_info(dev, "TCP dest port %d\n",
1467						 ntohs(tcphdr->dest));
1468					dev_info(dev, "TCP sequence num %d\n",
1469						 ntohs(tcphdr->seq));
1470					dev_info(dev, "TCP ack_seq %d\n",
1471						 ntohs(tcphdr->ack_seq));
1472				} else if (ip_hdr(skb)->protocol ==
1473					   IPPROTO_UDP) {
1474					udphdr = udp_hdr(skb);
1475					dev_info(dev, "UDP source port %d\n",
1476						 ntohs(udphdr->source));
1477					dev_info(dev, "UDP dest port %d\n",
1478						 ntohs(udphdr->dest));
1479				}
1480				dev_info(dev, "skb[%d] %p len %d proto 0x%x\n",
1481					 j, skb, skb->len, skb->protocol);
1482			}
1483		}
1484	}
1485
1486	if (lancer_chip(adapter)) {
1487		dev_info(dev, "Initiating reset due to tx timeout\n");
1488		dev_info(dev, "Resetting adapter\n");
1489		status = lancer_physdev_ctrl(adapter,
1490					     PHYSDEV_CONTROL_FW_RESET_MASK);
1491		if (status)
1492			dev_err(dev, "Reset failed .. Reboot server\n");
1493	}
1494}
1495
1496static inline bool be_in_all_promisc(struct be_adapter *adapter)
1497{
1498	return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1499			BE_IF_FLAGS_ALL_PROMISCUOUS;
1500}
1501
1502static int be_set_vlan_promisc(struct be_adapter *adapter)
1503{
1504	struct device *dev = &adapter->pdev->dev;
1505	int status;
1506
1507	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1508		return 0;
1509
1510	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1511	if (!status) {
1512		dev_info(dev, "Enabled VLAN promiscuous mode\n");
1513		adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1514	} else {
1515		dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1516	}
1517	return status;
1518}
1519
1520static int be_clear_vlan_promisc(struct be_adapter *adapter)
1521{
1522	struct device *dev = &adapter->pdev->dev;
1523	int status;
1524
1525	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1526	if (!status) {
1527		dev_info(dev, "Disabling VLAN promiscuous mode\n");
1528		adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1529	}
1530	return status;
1531}
1532
1533/*
1534 * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1535 * If the user configures more, place BE in vlan promiscuous mode.
1536 */
1537static int be_vid_config(struct be_adapter *adapter)
1538{
1539	struct device *dev = &adapter->pdev->dev;
1540	u16 vids[BE_NUM_VLANS_SUPPORTED];
1541	u16 num = 0, i = 0;
1542	int status = 0;
1543
1544	/* No need to change the VLAN state if the I/F is in promiscuous */
1545	if (adapter->netdev->flags & IFF_PROMISC)
1546		return 0;
1547
1548	if (adapter->vlans_added > be_max_vlans(adapter))
1549		return be_set_vlan_promisc(adapter);
1550
1551	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1552		status = be_clear_vlan_promisc(adapter);
1553		if (status)
1554			return status;
1555	}
1556	/* Construct VLAN Table to give to HW */
1557	for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1558		vids[num++] = cpu_to_le16(i);
1559
1560	status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1561	if (status) {
1562		dev_err(dev, "Setting HW VLAN filtering failed\n");
1563		/* Set to VLAN promisc mode as setting VLAN filter failed */
1564		if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1565		    addl_status(status) ==
1566				MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1567			return be_set_vlan_promisc(adapter);
1568	}
1569	return status;
1570}
1571
1572static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1573{
1574	struct be_adapter *adapter = netdev_priv(netdev);
1575	int status = 0;
1576
1577	mutex_lock(&adapter->rx_filter_lock);
1578
1579	/* Packets with VID 0 are always received by Lancer by default */
1580	if (lancer_chip(adapter) && vid == 0)
1581		goto done;
1582
1583	if (test_bit(vid, adapter->vids))
1584		goto done;
1585
1586	set_bit(vid, adapter->vids);
1587	adapter->vlans_added++;
1588
1589	status = be_vid_config(adapter);
1590done:
1591	mutex_unlock(&adapter->rx_filter_lock);
1592	return status;
1593}
1594
1595static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1596{
1597	struct be_adapter *adapter = netdev_priv(netdev);
1598	int status = 0;
1599
1600	mutex_lock(&adapter->rx_filter_lock);
1601
1602	/* Packets with VID 0 are always received by Lancer by default */
1603	if (lancer_chip(adapter) && vid == 0)
1604		goto done;
1605
1606	if (!test_bit(vid, adapter->vids))
1607		goto done;
1608
1609	clear_bit(vid, adapter->vids);
1610	adapter->vlans_added--;
1611
1612	status = be_vid_config(adapter);
1613done:
1614	mutex_unlock(&adapter->rx_filter_lock);
1615	return status;
1616}
1617
1618static void be_set_all_promisc(struct be_adapter *adapter)
1619{
1620	be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1621	adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1622}
1623
1624static void be_set_mc_promisc(struct be_adapter *adapter)
1625{
1626	int status;
1627
1628	if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1629		return;
1630
1631	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1632	if (!status)
1633		adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1634}
1635
1636static void be_set_uc_promisc(struct be_adapter *adapter)
1637{
1638	int status;
1639
1640	if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1641		return;
1642
1643	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1644	if (!status)
1645		adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1646}
1647
1648static void be_clear_uc_promisc(struct be_adapter *adapter)
1649{
1650	int status;
1651
1652	if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1653		return;
1654
1655	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1656	if (!status)
1657		adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1658}
1659
1660/* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1661 * We use a single callback function for both sync and unsync. We really don't
1662 * add/remove addresses through this callback. But, we use it to detect changes
1663 * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1664 */
1665static int be_uc_list_update(struct net_device *netdev,
1666			     const unsigned char *addr)
1667{
1668	struct be_adapter *adapter = netdev_priv(netdev);
1669
1670	adapter->update_uc_list = true;
1671	return 0;
1672}
1673
1674static int be_mc_list_update(struct net_device *netdev,
1675			     const unsigned char *addr)
1676{
1677	struct be_adapter *adapter = netdev_priv(netdev);
1678
1679	adapter->update_mc_list = true;
1680	return 0;
1681}
1682
1683static void be_set_mc_list(struct be_adapter *adapter)
1684{
1685	struct net_device *netdev = adapter->netdev;
1686	struct netdev_hw_addr *ha;
1687	bool mc_promisc = false;
1688	int status;
1689
1690	netif_addr_lock_bh(netdev);
1691	__dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1692
1693	if (netdev->flags & IFF_PROMISC) {
1694		adapter->update_mc_list = false;
1695	} else if (netdev->flags & IFF_ALLMULTI ||
1696		   netdev_mc_count(netdev) > be_max_mc(adapter)) {
1697		/* Enable multicast promisc if num configured exceeds
1698		 * what we support
1699		 */
1700		mc_promisc = true;
1701		adapter->update_mc_list = false;
1702	} else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1703		/* Update mc-list unconditionally if the iface was previously
1704		 * in mc-promisc mode and now is out of that mode.
1705		 */
1706		adapter->update_mc_list = true;
1707	}
1708
1709	if (adapter->update_mc_list) {
1710		int i = 0;
1711
1712		/* cache the mc-list in adapter */
1713		netdev_for_each_mc_addr(ha, netdev) {
1714			ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1715			i++;
1716		}
1717		adapter->mc_count = netdev_mc_count(netdev);
1718	}
1719	netif_addr_unlock_bh(netdev);
1720
1721	if (mc_promisc) {
1722		be_set_mc_promisc(adapter);
1723	} else if (adapter->update_mc_list) {
1724		status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1725		if (!status)
1726			adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1727		else
1728			be_set_mc_promisc(adapter);
1729
1730		adapter->update_mc_list = false;
1731	}
1732}
1733
1734static void be_clear_mc_list(struct be_adapter *adapter)
1735{
1736	struct net_device *netdev = adapter->netdev;
1737
1738	__dev_mc_unsync(netdev, NULL);
1739	be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1740	adapter->mc_count = 0;
1741}
1742
1743static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1744{
1745	if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1746		adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1747		return 0;
1748	}
1749
1750	return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1751			       adapter->if_handle,
1752			       &adapter->pmac_id[uc_idx + 1], 0);
1753}
1754
1755static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1756{
1757	if (pmac_id == adapter->pmac_id[0])
1758		return;
1759
1760	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1761}
1762
1763static void be_set_uc_list(struct be_adapter *adapter)
1764{
1765	struct net_device *netdev = adapter->netdev;
1766	struct netdev_hw_addr *ha;
1767	bool uc_promisc = false;
1768	int curr_uc_macs = 0, i;
1769
1770	netif_addr_lock_bh(netdev);
1771	__dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1772
1773	if (netdev->flags & IFF_PROMISC) {
1774		adapter->update_uc_list = false;
1775	} else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1776		uc_promisc = true;
1777		adapter->update_uc_list = false;
1778	}  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1779		/* Update uc-list unconditionally if the iface was previously
1780		 * in uc-promisc mode and now is out of that mode.
1781		 */
1782		adapter->update_uc_list = true;
1783	}
1784
1785	if (adapter->update_uc_list) {
1786		/* cache the uc-list in adapter array */
1787		i = 0;
1788		netdev_for_each_uc_addr(ha, netdev) {
1789			ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1790			i++;
1791		}
1792		curr_uc_macs = netdev_uc_count(netdev);
1793	}
1794	netif_addr_unlock_bh(netdev);
1795
1796	if (uc_promisc) {
1797		be_set_uc_promisc(adapter);
1798	} else if (adapter->update_uc_list) {
1799		be_clear_uc_promisc(adapter);
1800
1801		for (i = 0; i < adapter->uc_macs; i++)
1802			be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1803
1804		for (i = 0; i < curr_uc_macs; i++)
1805			be_uc_mac_add(adapter, i);
1806		adapter->uc_macs = curr_uc_macs;
1807		adapter->update_uc_list = false;
1808	}
1809}
1810
1811static void be_clear_uc_list(struct be_adapter *adapter)
1812{
1813	struct net_device *netdev = adapter->netdev;
1814	int i;
1815
1816	__dev_uc_unsync(netdev, NULL);
1817	for (i = 0; i < adapter->uc_macs; i++)
1818		be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1819
1820	adapter->uc_macs = 0;
1821}
1822
1823static void __be_set_rx_mode(struct be_adapter *adapter)
1824{
1825	struct net_device *netdev = adapter->netdev;
1826
1827	mutex_lock(&adapter->rx_filter_lock);
1828
1829	if (netdev->flags & IFF_PROMISC) {
1830		if (!be_in_all_promisc(adapter))
1831			be_set_all_promisc(adapter);
1832	} else if (be_in_all_promisc(adapter)) {
1833		/* We need to re-program the vlan-list or clear
1834		 * vlan-promisc mode (if needed) when the interface
1835		 * comes out of promisc mode.
1836		 */
1837		be_vid_config(adapter);
1838	}
1839
1840	be_set_uc_list(adapter);
1841	be_set_mc_list(adapter);
1842
1843	mutex_unlock(&adapter->rx_filter_lock);
1844}
1845
1846static void be_work_set_rx_mode(struct work_struct *work)
1847{
1848	struct be_cmd_work *cmd_work =
1849				container_of(work, struct be_cmd_work, work);
1850
1851	__be_set_rx_mode(cmd_work->adapter);
1852	kfree(cmd_work);
1853}
1854
1855static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1856{
1857	struct be_adapter *adapter = netdev_priv(netdev);
1858	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1859	int status;
1860
1861	if (!sriov_enabled(adapter))
1862		return -EPERM;
1863
1864	if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1865		return -EINVAL;
1866
1867	/* Proceed further only if user provided MAC is different
1868	 * from active MAC
1869	 */
1870	if (ether_addr_equal(mac, vf_cfg->mac_addr))
1871		return 0;
1872
1873	if (BEx_chip(adapter)) {
1874		be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1875				vf + 1);
1876
1877		status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1878					 &vf_cfg->pmac_id, vf + 1);
1879	} else {
1880		status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1881					vf + 1);
1882	}
1883
1884	if (status) {
1885		dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1886			mac, vf, status);
1887		return be_cmd_status(status);
1888	}
1889
1890	ether_addr_copy(vf_cfg->mac_addr, mac);
1891
1892	return 0;
1893}
1894
1895static int be_get_vf_config(struct net_device *netdev, int vf,
1896			    struct ifla_vf_info *vi)
1897{
1898	struct be_adapter *adapter = netdev_priv(netdev);
1899	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1900
1901	if (!sriov_enabled(adapter))
1902		return -EPERM;
1903
1904	if (vf >= adapter->num_vfs)
1905		return -EINVAL;
1906
1907	vi->vf = vf;
1908	vi->max_tx_rate = vf_cfg->tx_rate;
1909	vi->min_tx_rate = 0;
1910	vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1911	vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1912	memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1913	vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1914	vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1915
1916	return 0;
1917}
1918
1919static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1920{
1921	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1922	u16 vids[BE_NUM_VLANS_SUPPORTED];
1923	int vf_if_id = vf_cfg->if_handle;
1924	int status;
1925
1926	/* Enable Transparent VLAN Tagging */
1927	status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1928	if (status)
1929		return status;
1930
1931	/* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1932	vids[0] = 0;
1933	status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1934	if (!status)
1935		dev_info(&adapter->pdev->dev,
1936			 "Cleared guest VLANs on VF%d", vf);
1937
1938	/* After TVT is enabled, disallow VFs to program VLAN filters */
1939	if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1940		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1941						  ~BE_PRIV_FILTMGMT, vf + 1);
1942		if (!status)
1943			vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1944	}
1945	return 0;
1946}
1947
1948static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1949{
1950	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1951	struct device *dev = &adapter->pdev->dev;
1952	int status;
1953
1954	/* Reset Transparent VLAN Tagging. */
1955	status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1956				       vf_cfg->if_handle, 0, 0);
1957	if (status)
1958		return status;
1959
1960	/* Allow VFs to program VLAN filtering */
1961	if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1962		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1963						  BE_PRIV_FILTMGMT, vf + 1);
1964		if (!status) {
1965			vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1966			dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1967		}
1968	}
1969
1970	dev_info(dev,
1971		 "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1972	return 0;
1973}
1974
1975static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1976			  __be16 vlan_proto)
1977{
1978	struct be_adapter *adapter = netdev_priv(netdev);
1979	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1980	int status;
1981
1982	if (!sriov_enabled(adapter))
1983		return -EPERM;
1984
1985	if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1986		return -EINVAL;
1987
1988	if (vlan_proto != htons(ETH_P_8021Q))
1989		return -EPROTONOSUPPORT;
1990
1991	if (vlan || qos) {
1992		vlan |= qos << VLAN_PRIO_SHIFT;
1993		status = be_set_vf_tvt(adapter, vf, vlan);
1994	} else {
1995		status = be_clear_vf_tvt(adapter, vf);
1996	}
1997
1998	if (status) {
1999		dev_err(&adapter->pdev->dev,
2000			"VLAN %d config on VF %d failed : %#x\n", vlan, vf,
2001			status);
2002		return be_cmd_status(status);
2003	}
2004
2005	vf_cfg->vlan_tag = vlan;
2006	return 0;
2007}
2008
2009static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
2010			     int min_tx_rate, int max_tx_rate)
2011{
2012	struct be_adapter *adapter = netdev_priv(netdev);
2013	struct device *dev = &adapter->pdev->dev;
2014	int percent_rate, status = 0;
2015	u16 link_speed = 0;
2016	u8 link_status;
2017
2018	if (!sriov_enabled(adapter))
2019		return -EPERM;
2020
2021	if (vf >= adapter->num_vfs)
2022		return -EINVAL;
2023
2024	if (min_tx_rate)
2025		return -EINVAL;
2026
2027	if (!max_tx_rate)
2028		goto config_qos;
2029
2030	status = be_cmd_link_status_query(adapter, &link_speed,
2031					  &link_status, 0);
2032	if (status)
2033		goto err;
2034
2035	if (!link_status) {
2036		dev_err(dev, "TX-rate setting not allowed when link is down\n");
2037		status = -ENETDOWN;
2038		goto err;
2039	}
2040
2041	if (max_tx_rate < 100 || max_tx_rate > link_speed) {
2042		dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
2043			link_speed);
2044		status = -EINVAL;
2045		goto err;
2046	}
2047
2048	/* On Skyhawk the QOS setting must be done only as a % value */
2049	percent_rate = link_speed / 100;
2050	if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
2051		dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
2052			percent_rate);
2053		status = -EINVAL;
2054		goto err;
2055	}
2056
2057config_qos:
2058	status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
2059	if (status)
2060		goto err;
2061
2062	adapter->vf_cfg[vf].tx_rate = max_tx_rate;
2063	return 0;
2064
2065err:
2066	dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
2067		max_tx_rate, vf);
2068	return be_cmd_status(status);
2069}
2070
2071static int be_set_vf_link_state(struct net_device *netdev, int vf,
2072				int link_state)
2073{
2074	struct be_adapter *adapter = netdev_priv(netdev);
2075	int status;
2076
2077	if (!sriov_enabled(adapter))
2078		return -EPERM;
2079
2080	if (vf >= adapter->num_vfs)
2081		return -EINVAL;
2082
2083	status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2084	if (status) {
2085		dev_err(&adapter->pdev->dev,
2086			"Link state change on VF %d failed: %#x\n", vf, status);
2087		return be_cmd_status(status);
2088	}
2089
2090	adapter->vf_cfg[vf].plink_tracking = link_state;
2091
2092	return 0;
2093}
2094
2095static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2096{
2097	struct be_adapter *adapter = netdev_priv(netdev);
2098	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2099	u8 spoofchk;
2100	int status;
2101
2102	if (!sriov_enabled(adapter))
2103		return -EPERM;
2104
2105	if (vf >= adapter->num_vfs)
2106		return -EINVAL;
2107
2108	if (BEx_chip(adapter))
2109		return -EOPNOTSUPP;
2110
2111	if (enable == vf_cfg->spoofchk)
2112		return 0;
2113
2114	spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2115
2116	status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2117				       0, spoofchk);
2118	if (status) {
2119		dev_err(&adapter->pdev->dev,
2120			"Spoofchk change on VF %d failed: %#x\n", vf, status);
2121		return be_cmd_status(status);
2122	}
2123
2124	vf_cfg->spoofchk = enable;
2125	return 0;
2126}
2127
2128static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2129			  ulong now)
2130{
2131	aic->rx_pkts_prev = rx_pkts;
2132	aic->tx_reqs_prev = tx_pkts;
2133	aic->jiffies = now;
2134}
2135
2136static int be_get_new_eqd(struct be_eq_obj *eqo)
2137{
2138	struct be_adapter *adapter = eqo->adapter;
2139	int eqd, start;
2140	struct be_aic_obj *aic;
2141	struct be_rx_obj *rxo;
2142	struct be_tx_obj *txo;
2143	u64 rx_pkts = 0, tx_pkts = 0;
2144	ulong now;
2145	u32 pps, delta;
2146	int i;
2147
2148	aic = &adapter->aic_obj[eqo->idx];
2149	if (!adapter->aic_enabled) {
2150		if (aic->jiffies)
2151			aic->jiffies = 0;
2152		eqd = aic->et_eqd;
2153		return eqd;
2154	}
2155
2156	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2157		do {
2158			start = u64_stats_fetch_begin(&rxo->stats.sync);
2159			rx_pkts += rxo->stats.rx_pkts;
2160		} while (u64_stats_fetch_retry(&rxo->stats.sync, start));
2161	}
2162
2163	for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2164		do {
2165			start = u64_stats_fetch_begin(&txo->stats.sync);
2166			tx_pkts += txo->stats.tx_reqs;
2167		} while (u64_stats_fetch_retry(&txo->stats.sync, start));
2168	}
2169
2170	/* Skip, if wrapped around or first calculation */
2171	now = jiffies;
2172	if (!aic->jiffies || time_before(now, aic->jiffies) ||
2173	    rx_pkts < aic->rx_pkts_prev ||
2174	    tx_pkts < aic->tx_reqs_prev) {
2175		be_aic_update(aic, rx_pkts, tx_pkts, now);
2176		return aic->prev_eqd;
2177	}
2178
2179	delta = jiffies_to_msecs(now - aic->jiffies);
2180	if (delta == 0)
2181		return aic->prev_eqd;
2182
2183	pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2184		(((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2185	eqd = (pps / 15000) << 2;
2186
2187	if (eqd < 8)
2188		eqd = 0;
2189	eqd = min_t(u32, eqd, aic->max_eqd);
2190	eqd = max_t(u32, eqd, aic->min_eqd);
2191
2192	be_aic_update(aic, rx_pkts, tx_pkts, now);
2193
2194	return eqd;
2195}
2196
2197/* For Skyhawk-R only */
2198static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2199{
2200	struct be_adapter *adapter = eqo->adapter;
2201	struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2202	ulong now = jiffies;
2203	int eqd;
2204	u32 mult_enc;
2205
2206	if (!adapter->aic_enabled)
2207		return 0;
2208
2209	if (jiffies_to_msecs(now - aic->jiffies) < 1)
2210		eqd = aic->prev_eqd;
2211	else
2212		eqd = be_get_new_eqd(eqo);
2213
2214	if (eqd > 100)
2215		mult_enc = R2I_DLY_ENC_1;
2216	else if (eqd > 60)
2217		mult_enc = R2I_DLY_ENC_2;
2218	else if (eqd > 20)
2219		mult_enc = R2I_DLY_ENC_3;
2220	else
2221		mult_enc = R2I_DLY_ENC_0;
2222
2223	aic->prev_eqd = eqd;
2224
2225	return mult_enc;
2226}
2227
2228void be_eqd_update(struct be_adapter *adapter, bool force_update)
2229{
2230	struct be_set_eqd set_eqd[MAX_EVT_QS];
2231	struct be_aic_obj *aic;
2232	struct be_eq_obj *eqo;
2233	int i, num = 0, eqd;
2234
2235	for_all_evt_queues(adapter, eqo, i) {
2236		aic = &adapter->aic_obj[eqo->idx];
2237		eqd = be_get_new_eqd(eqo);
2238		if (force_update || eqd != aic->prev_eqd) {
2239			set_eqd[num].delay_multiplier = (eqd * 65)/100;
2240			set_eqd[num].eq_id = eqo->q.id;
2241			aic->prev_eqd = eqd;
2242			num++;
2243		}
2244	}
2245
2246	if (num)
2247		be_cmd_modify_eqd(adapter, set_eqd, num);
2248}
2249
2250static void be_rx_stats_update(struct be_rx_obj *rxo,
2251			       struct be_rx_compl_info *rxcp)
2252{
2253	struct be_rx_stats *stats = rx_stats(rxo);
2254
2255	u64_stats_update_begin(&stats->sync);
2256	stats->rx_compl++;
2257	stats->rx_bytes += rxcp->pkt_size;
2258	stats->rx_pkts++;
2259	if (rxcp->tunneled)
2260		stats->rx_vxlan_offload_pkts++;
2261	if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2262		stats->rx_mcast_pkts++;
2263	if (rxcp->err)
2264		stats->rx_compl_err++;
2265	u64_stats_update_end(&stats->sync);
2266}
2267
2268static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2269{
2270	/* L4 checksum is not reliable for non TCP/UDP packets.
2271	 * Also ignore ipcksm for ipv6 pkts
2272	 */
2273	return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2274		(rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2275}
2276
2277static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2278{
2279	struct be_adapter *adapter = rxo->adapter;
2280	struct be_rx_page_info *rx_page_info;
2281	struct be_queue_info *rxq = &rxo->q;
2282	u32 frag_idx = rxq->tail;
2283
2284	rx_page_info = &rxo->page_info_tbl[frag_idx];
2285	BUG_ON(!rx_page_info->page);
2286
2287	if (rx_page_info->last_frag) {
2288		dma_unmap_page(&adapter->pdev->dev,
2289			       dma_unmap_addr(rx_page_info, bus),
2290			       adapter->big_page_size, DMA_FROM_DEVICE);
2291		rx_page_info->last_frag = false;
2292	} else {
2293		dma_sync_single_for_cpu(&adapter->pdev->dev,
2294					dma_unmap_addr(rx_page_info, bus),
2295					rx_frag_size, DMA_FROM_DEVICE);
2296	}
2297
2298	queue_tail_inc(rxq);
2299	atomic_dec(&rxq->used);
2300	return rx_page_info;
2301}
2302
2303/* Throwaway the data in the Rx completion */
2304static void be_rx_compl_discard(struct be_rx_obj *rxo,
2305				struct be_rx_compl_info *rxcp)
2306{
2307	struct be_rx_page_info *page_info;
2308	u16 i, num_rcvd = rxcp->num_rcvd;
2309
2310	for (i = 0; i < num_rcvd; i++) {
2311		page_info = get_rx_page_info(rxo);
2312		put_page(page_info->page);
2313		memset(page_info, 0, sizeof(*page_info));
2314	}
2315}
2316
2317/*
2318 * skb_fill_rx_data forms a complete skb for an ether frame
2319 * indicated by rxcp.
2320 */
2321static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2322			     struct be_rx_compl_info *rxcp)
2323{
2324	struct be_rx_page_info *page_info;
2325	u16 i, j;
2326	u16 hdr_len, curr_frag_len, remaining;
2327	u8 *start;
2328
2329	page_info = get_rx_page_info(rxo);
2330	start = page_address(page_info->page) + page_info->page_offset;
2331	prefetch(start);
2332
2333	/* Copy data in the first descriptor of this completion */
2334	curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2335
2336	skb->len = curr_frag_len;
2337	if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2338		memcpy(skb->data, start, curr_frag_len);
2339		/* Complete packet has now been moved to data */
2340		put_page(page_info->page);
2341		skb->data_len = 0;
2342		skb->tail += curr_frag_len;
2343	} else {
2344		hdr_len = ETH_HLEN;
2345		memcpy(skb->data, start, hdr_len);
2346		skb_shinfo(skb)->nr_frags = 1;
2347		skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[0],
2348					page_info->page,
2349					page_info->page_offset + hdr_len,
2350					curr_frag_len - hdr_len);
 
2351		skb->data_len = curr_frag_len - hdr_len;
2352		skb->truesize += rx_frag_size;
2353		skb->tail += hdr_len;
2354	}
2355	page_info->page = NULL;
2356
2357	if (rxcp->pkt_size <= rx_frag_size) {
2358		BUG_ON(rxcp->num_rcvd != 1);
2359		return;
2360	}
2361
2362	/* More frags present for this completion */
2363	remaining = rxcp->pkt_size - curr_frag_len;
2364	for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2365		page_info = get_rx_page_info(rxo);
2366		curr_frag_len = min(remaining, rx_frag_size);
2367
2368		/* Coalesce all frags from the same physical page in one slot */
2369		if (page_info->page_offset == 0) {
2370			/* Fresh page */
2371			j++;
2372			skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[j],
2373						page_info->page,
2374						page_info->page_offset,
2375						curr_frag_len);
2376			skb_shinfo(skb)->nr_frags++;
2377		} else {
2378			put_page(page_info->page);
2379			skb_frag_size_add(&skb_shinfo(skb)->frags[j],
2380					  curr_frag_len);
2381		}
2382
 
2383		skb->len += curr_frag_len;
2384		skb->data_len += curr_frag_len;
2385		skb->truesize += rx_frag_size;
2386		remaining -= curr_frag_len;
2387		page_info->page = NULL;
2388	}
2389	BUG_ON(j > MAX_SKB_FRAGS);
2390}
2391
2392/* Process the RX completion indicated by rxcp when GRO is disabled */
2393static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2394				struct be_rx_compl_info *rxcp)
2395{
2396	struct be_adapter *adapter = rxo->adapter;
2397	struct net_device *netdev = adapter->netdev;
2398	struct sk_buff *skb;
2399
2400	skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2401	if (unlikely(!skb)) {
2402		rx_stats(rxo)->rx_drops_no_skbs++;
2403		be_rx_compl_discard(rxo, rxcp);
2404		return;
2405	}
2406
2407	skb_fill_rx_data(rxo, skb, rxcp);
2408
2409	if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2410		skb->ip_summed = CHECKSUM_UNNECESSARY;
2411	else
2412		skb_checksum_none_assert(skb);
2413
2414	skb->protocol = eth_type_trans(skb, netdev);
2415	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2416	if (netdev->features & NETIF_F_RXHASH)
2417		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2418
2419	skb->csum_level = rxcp->tunneled;
2420	skb_mark_napi_id(skb, napi);
2421
2422	if (rxcp->vlanf)
2423		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2424
2425	netif_receive_skb(skb);
2426}
2427
2428/* Process the RX completion indicated by rxcp when GRO is enabled */
2429static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2430				    struct napi_struct *napi,
2431				    struct be_rx_compl_info *rxcp)
2432{
2433	struct be_adapter *adapter = rxo->adapter;
2434	struct be_rx_page_info *page_info;
2435	struct sk_buff *skb = NULL;
2436	u16 remaining, curr_frag_len;
2437	u16 i, j;
2438
2439	skb = napi_get_frags(napi);
2440	if (!skb) {
2441		be_rx_compl_discard(rxo, rxcp);
2442		return;
2443	}
2444
2445	remaining = rxcp->pkt_size;
2446	for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2447		page_info = get_rx_page_info(rxo);
2448
2449		curr_frag_len = min(remaining, rx_frag_size);
2450
2451		/* Coalesce all frags from the same physical page in one slot */
2452		if (i == 0 || page_info->page_offset == 0) {
2453			/* First frag or Fresh page */
2454			j++;
2455			skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[j],
2456						page_info->page,
2457						page_info->page_offset,
2458						curr_frag_len);
2459		} else {
2460			put_page(page_info->page);
2461			skb_frag_size_add(&skb_shinfo(skb)->frags[j],
2462					  curr_frag_len);
2463		}
2464
2465		skb->truesize += rx_frag_size;
2466		remaining -= curr_frag_len;
2467		memset(page_info, 0, sizeof(*page_info));
2468	}
2469	BUG_ON(j > MAX_SKB_FRAGS);
2470
2471	skb_shinfo(skb)->nr_frags = j + 1;
2472	skb->len = rxcp->pkt_size;
2473	skb->data_len = rxcp->pkt_size;
2474	skb->ip_summed = CHECKSUM_UNNECESSARY;
2475	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2476	if (adapter->netdev->features & NETIF_F_RXHASH)
2477		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2478
2479	skb->csum_level = rxcp->tunneled;
2480
2481	if (rxcp->vlanf)
2482		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2483
2484	napi_gro_frags(napi);
2485}
2486
2487static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2488				 struct be_rx_compl_info *rxcp)
2489{
2490	rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2491	rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2492	rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2493	rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2494	rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2495	rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2496	rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2497	rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2498	rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2499	rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2500	rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2501	if (rxcp->vlanf) {
2502		rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2503		rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2504	}
2505	rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2506	rxcp->tunneled =
2507		GET_RX_COMPL_V1_BITS(tunneled, compl);
2508}
2509
2510static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2511				 struct be_rx_compl_info *rxcp)
2512{
2513	rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2514	rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2515	rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2516	rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2517	rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2518	rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2519	rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2520	rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2521	rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2522	rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2523	rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2524	if (rxcp->vlanf) {
2525		rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2526		rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2527	}
2528	rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2529	rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2530}
2531
2532static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2533{
2534	struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2535	struct be_rx_compl_info *rxcp = &rxo->rxcp;
2536	struct be_adapter *adapter = rxo->adapter;
2537
2538	/* For checking the valid bit it is Ok to use either definition as the
2539	 * valid bit is at the same position in both v0 and v1 Rx compl */
2540	if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2541		return NULL;
2542
2543	rmb();
2544	be_dws_le_to_cpu(compl, sizeof(*compl));
2545
2546	if (adapter->be3_native)
2547		be_parse_rx_compl_v1(compl, rxcp);
2548	else
2549		be_parse_rx_compl_v0(compl, rxcp);
2550
2551	if (rxcp->ip_frag)
2552		rxcp->l4_csum = 0;
2553
2554	if (rxcp->vlanf) {
2555		/* In QNQ modes, if qnq bit is not set, then the packet was
2556		 * tagged only with the transparent outer vlan-tag and must
2557		 * not be treated as a vlan packet by host
2558		 */
2559		if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2560			rxcp->vlanf = 0;
2561
2562		if (!lancer_chip(adapter))
2563			rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2564
2565		if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2566		    !test_bit(rxcp->vlan_tag, adapter->vids))
2567			rxcp->vlanf = 0;
2568	}
2569
2570	/* As the compl has been parsed, reset it; we wont touch it again */
2571	compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2572
2573	queue_tail_inc(&rxo->cq);
2574	return rxcp;
2575}
2576
2577static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2578{
2579	u32 order = get_order(size);
2580
2581	if (order > 0)
2582		gfp |= __GFP_COMP;
2583	return  alloc_pages(gfp, order);
2584}
2585
2586/*
2587 * Allocate a page, split it to fragments of size rx_frag_size and post as
2588 * receive buffers to BE
2589 */
2590static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2591{
2592	struct be_adapter *adapter = rxo->adapter;
2593	struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2594	struct be_queue_info *rxq = &rxo->q;
2595	struct page *pagep = NULL;
2596	struct device *dev = &adapter->pdev->dev;
2597	struct be_eth_rx_d *rxd;
2598	u64 page_dmaaddr = 0, frag_dmaaddr;
2599	u32 posted, page_offset = 0, notify = 0;
2600
2601	page_info = &rxo->page_info_tbl[rxq->head];
2602	for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2603		if (!pagep) {
2604			pagep = be_alloc_pages(adapter->big_page_size, gfp);
2605			if (unlikely(!pagep)) {
2606				rx_stats(rxo)->rx_post_fail++;
2607				break;
2608			}
2609			page_dmaaddr = dma_map_page(dev, pagep, 0,
2610						    adapter->big_page_size,
2611						    DMA_FROM_DEVICE);
2612			if (dma_mapping_error(dev, page_dmaaddr)) {
2613				put_page(pagep);
2614				pagep = NULL;
2615				adapter->drv_stats.dma_map_errors++;
2616				break;
2617			}
2618			page_offset = 0;
2619		} else {
2620			get_page(pagep);
2621			page_offset += rx_frag_size;
2622		}
2623		page_info->page_offset = page_offset;
2624		page_info->page = pagep;
2625
2626		rxd = queue_head_node(rxq);
2627		frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2628		rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2629		rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2630
2631		/* Any space left in the current big page for another frag? */
2632		if ((page_offset + rx_frag_size + rx_frag_size) >
2633					adapter->big_page_size) {
2634			pagep = NULL;
2635			page_info->last_frag = true;
2636			dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2637		} else {
2638			dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2639		}
2640
2641		prev_page_info = page_info;
2642		queue_head_inc(rxq);
2643		page_info = &rxo->page_info_tbl[rxq->head];
2644	}
2645
2646	/* Mark the last frag of a page when we break out of the above loop
2647	 * with no more slots available in the RXQ
2648	 */
2649	if (pagep) {
2650		prev_page_info->last_frag = true;
2651		dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2652	}
2653
2654	if (posted) {
2655		atomic_add(posted, &rxq->used);
2656		if (rxo->rx_post_starved)
2657			rxo->rx_post_starved = false;
2658		do {
2659			notify = min(MAX_NUM_POST_ERX_DB, posted);
2660			be_rxq_notify(adapter, rxq->id, notify);
2661			posted -= notify;
2662		} while (posted);
2663	} else if (atomic_read(&rxq->used) == 0) {
2664		/* Let be_worker replenish when memory is available */
2665		rxo->rx_post_starved = true;
2666	}
2667}
2668
2669static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
2670{
2671	switch (status) {
2672	case BE_TX_COMP_HDR_PARSE_ERR:
2673		tx_stats(txo)->tx_hdr_parse_err++;
2674		break;
2675	case BE_TX_COMP_NDMA_ERR:
2676		tx_stats(txo)->tx_dma_err++;
2677		break;
2678	case BE_TX_COMP_ACL_ERR:
2679		tx_stats(txo)->tx_spoof_check_err++;
2680		break;
2681	}
2682}
2683
2684static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
2685{
2686	switch (status) {
2687	case LANCER_TX_COMP_LSO_ERR:
2688		tx_stats(txo)->tx_tso_err++;
2689		break;
2690	case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
2691	case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
2692		tx_stats(txo)->tx_spoof_check_err++;
2693		break;
2694	case LANCER_TX_COMP_QINQ_ERR:
2695		tx_stats(txo)->tx_qinq_err++;
2696		break;
2697	case LANCER_TX_COMP_PARITY_ERR:
2698		tx_stats(txo)->tx_internal_parity_err++;
2699		break;
2700	case LANCER_TX_COMP_DMA_ERR:
2701		tx_stats(txo)->tx_dma_err++;
2702		break;
2703	case LANCER_TX_COMP_SGE_ERR:
2704		tx_stats(txo)->tx_sge_err++;
2705		break;
2706	}
2707}
2708
2709static struct be_tx_compl_info *be_tx_compl_get(struct be_adapter *adapter,
2710						struct be_tx_obj *txo)
2711{
2712	struct be_queue_info *tx_cq = &txo->cq;
2713	struct be_tx_compl_info *txcp = &txo->txcp;
2714	struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2715
2716	if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2717		return NULL;
2718
2719	/* Ensure load ordering of valid bit dword and other dwords below */
2720	rmb();
2721	be_dws_le_to_cpu(compl, sizeof(*compl));
2722
2723	txcp->status = GET_TX_COMPL_BITS(status, compl);
2724	txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2725
2726	if (txcp->status) {
2727		if (lancer_chip(adapter)) {
2728			lancer_update_tx_err(txo, txcp->status);
2729			/* Reset the adapter incase of TSO,
2730			 * SGE or Parity error
2731			 */
2732			if (txcp->status == LANCER_TX_COMP_LSO_ERR ||
2733			    txcp->status == LANCER_TX_COMP_PARITY_ERR ||
2734			    txcp->status == LANCER_TX_COMP_SGE_ERR)
2735				be_set_error(adapter, BE_ERROR_TX);
2736		} else {
2737			be_update_tx_err(txo, txcp->status);
2738		}
2739	}
2740
2741	if (be_check_error(adapter, BE_ERROR_TX))
2742		return NULL;
2743
2744	compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2745	queue_tail_inc(tx_cq);
2746	return txcp;
2747}
2748
2749static u16 be_tx_compl_process(struct be_adapter *adapter,
2750			       struct be_tx_obj *txo, u16 last_index)
2751{
2752	struct sk_buff **sent_skbs = txo->sent_skb_list;
2753	struct be_queue_info *txq = &txo->q;
2754	struct sk_buff *skb = NULL;
2755	bool unmap_skb_hdr = false;
2756	struct be_eth_wrb *wrb;
2757	u16 num_wrbs = 0;
2758	u32 frag_index;
2759
2760	do {
2761		if (sent_skbs[txq->tail]) {
2762			/* Free skb from prev req */
2763			if (skb)
2764				dev_consume_skb_any(skb);
2765			skb = sent_skbs[txq->tail];
2766			sent_skbs[txq->tail] = NULL;
2767			queue_tail_inc(txq);  /* skip hdr wrb */
2768			num_wrbs++;
2769			unmap_skb_hdr = true;
2770		}
2771		wrb = queue_tail_node(txq);
2772		frag_index = txq->tail;
2773		unmap_tx_frag(&adapter->pdev->dev, wrb,
2774			      (unmap_skb_hdr && skb_headlen(skb)));
2775		unmap_skb_hdr = false;
2776		queue_tail_inc(txq);
2777		num_wrbs++;
2778	} while (frag_index != last_index);
2779	dev_consume_skb_any(skb);
2780
2781	return num_wrbs;
2782}
2783
2784/* Return the number of events in the event queue */
2785static inline int events_get(struct be_eq_obj *eqo)
2786{
2787	struct be_eq_entry *eqe;
2788	int num = 0;
2789
2790	do {
2791		eqe = queue_tail_node(&eqo->q);
2792		if (eqe->evt == 0)
2793			break;
2794
2795		rmb();
2796		eqe->evt = 0;
2797		num++;
2798		queue_tail_inc(&eqo->q);
2799	} while (true);
2800
2801	return num;
2802}
2803
2804/* Leaves the EQ is disarmed state */
2805static void be_eq_clean(struct be_eq_obj *eqo)
2806{
2807	int num = events_get(eqo);
2808
2809	be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2810}
2811
2812/* Free posted rx buffers that were not used */
2813static void be_rxq_clean(struct be_rx_obj *rxo)
2814{
2815	struct be_queue_info *rxq = &rxo->q;
2816	struct be_rx_page_info *page_info;
2817
2818	while (atomic_read(&rxq->used) > 0) {
2819		page_info = get_rx_page_info(rxo);
2820		put_page(page_info->page);
2821		memset(page_info, 0, sizeof(*page_info));
2822	}
2823	BUG_ON(atomic_read(&rxq->used));
2824	rxq->tail = 0;
2825	rxq->head = 0;
2826}
2827
2828static void be_rx_cq_clean(struct be_rx_obj *rxo)
2829{
2830	struct be_queue_info *rx_cq = &rxo->cq;
2831	struct be_rx_compl_info *rxcp;
2832	struct be_adapter *adapter = rxo->adapter;
2833	int flush_wait = 0;
2834
2835	/* Consume pending rx completions.
2836	 * Wait for the flush completion (identified by zero num_rcvd)
2837	 * to arrive. Notify CQ even when there are no more CQ entries
2838	 * for HW to flush partially coalesced CQ entries.
2839	 * In Lancer, there is no need to wait for flush compl.
2840	 */
2841	for (;;) {
2842		rxcp = be_rx_compl_get(rxo);
2843		if (!rxcp) {
2844			if (lancer_chip(adapter))
2845				break;
2846
2847			if (flush_wait++ > 50 ||
2848			    be_check_error(adapter,
2849					   BE_ERROR_HW)) {
2850				dev_warn(&adapter->pdev->dev,
2851					 "did not receive flush compl\n");
2852				break;
2853			}
2854			be_cq_notify(adapter, rx_cq->id, true, 0);
2855			mdelay(1);
2856		} else {
2857			be_rx_compl_discard(rxo, rxcp);
2858			be_cq_notify(adapter, rx_cq->id, false, 1);
2859			if (rxcp->num_rcvd == 0)
2860				break;
2861		}
2862	}
2863
2864	/* After cleanup, leave the CQ in unarmed state */
2865	be_cq_notify(adapter, rx_cq->id, false, 0);
2866}
2867
2868static void be_tx_compl_clean(struct be_adapter *adapter)
2869{
2870	struct device *dev = &adapter->pdev->dev;
2871	u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2872	struct be_tx_compl_info *txcp;
2873	struct be_queue_info *txq;
2874	u32 end_idx, notified_idx;
2875	struct be_tx_obj *txo;
2876	int i, pending_txqs;
2877
2878	/* Stop polling for compls when HW has been silent for 10ms */
2879	do {
2880		pending_txqs = adapter->num_tx_qs;
2881
2882		for_all_tx_queues(adapter, txo, i) {
2883			cmpl = 0;
2884			num_wrbs = 0;
2885			txq = &txo->q;
2886			while ((txcp = be_tx_compl_get(adapter, txo))) {
2887				num_wrbs +=
2888					be_tx_compl_process(adapter, txo,
2889							    txcp->end_index);
2890				cmpl++;
2891			}
2892			if (cmpl) {
2893				be_cq_notify(adapter, txo->cq.id, false, cmpl);
2894				atomic_sub(num_wrbs, &txq->used);
2895				timeo = 0;
2896			}
2897			if (!be_is_tx_compl_pending(txo))
2898				pending_txqs--;
2899		}
2900
2901		if (pending_txqs == 0 || ++timeo > 10 ||
2902		    be_check_error(adapter, BE_ERROR_HW))
2903			break;
2904
2905		mdelay(1);
2906	} while (true);
2907
2908	/* Free enqueued TX that was never notified to HW */
2909	for_all_tx_queues(adapter, txo, i) {
2910		txq = &txo->q;
2911
2912		if (atomic_read(&txq->used)) {
2913			dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2914				 i, atomic_read(&txq->used));
2915			notified_idx = txq->tail;
2916			end_idx = txq->tail;
2917			index_adv(&end_idx, atomic_read(&txq->used) - 1,
2918				  txq->len);
2919			/* Use the tx-compl process logic to handle requests
2920			 * that were not sent to the HW.
2921			 */
2922			num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2923			atomic_sub(num_wrbs, &txq->used);
2924			BUG_ON(atomic_read(&txq->used));
2925			txo->pend_wrb_cnt = 0;
2926			/* Since hw was never notified of these requests,
2927			 * reset TXQ indices
2928			 */
2929			txq->head = notified_idx;
2930			txq->tail = notified_idx;
2931		}
2932	}
2933}
2934
2935static void be_evt_queues_destroy(struct be_adapter *adapter)
2936{
2937	struct be_eq_obj *eqo;
2938	int i;
2939
2940	for_all_evt_queues(adapter, eqo, i) {
2941		if (eqo->q.created) {
2942			be_eq_clean(eqo);
2943			be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2944			netif_napi_del(&eqo->napi);
2945			free_cpumask_var(eqo->affinity_mask);
2946		}
2947		be_queue_free(adapter, &eqo->q);
2948	}
2949}
2950
2951static int be_evt_queues_create(struct be_adapter *adapter)
2952{
2953	struct be_queue_info *eq;
2954	struct be_eq_obj *eqo;
2955	struct be_aic_obj *aic;
2956	int i, rc;
2957
2958	/* need enough EQs to service both RX and TX queues */
2959	adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2960				    max(adapter->cfg_num_rx_irqs,
2961					adapter->cfg_num_tx_irqs));
2962
2963	adapter->aic_enabled = true;
2964
2965	for_all_evt_queues(adapter, eqo, i) {
2966		int numa_node = dev_to_node(&adapter->pdev->dev);
2967
2968		aic = &adapter->aic_obj[i];
2969		eqo->adapter = adapter;
2970		eqo->idx = i;
2971		aic->max_eqd = BE_MAX_EQD;
2972
2973		eq = &eqo->q;
2974		rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2975				    sizeof(struct be_eq_entry));
2976		if (rc)
2977			return rc;
2978
2979		rc = be_cmd_eq_create(adapter, eqo);
2980		if (rc)
2981			return rc;
2982
2983		if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2984			return -ENOMEM;
2985		cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2986				eqo->affinity_mask);
2987		netif_napi_add(adapter->netdev, &eqo->napi, be_poll);
 
2988	}
2989	return 0;
2990}
2991
2992static void be_mcc_queues_destroy(struct be_adapter *adapter)
2993{
2994	struct be_queue_info *q;
2995
2996	q = &adapter->mcc_obj.q;
2997	if (q->created)
2998		be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2999	be_queue_free(adapter, q);
3000
3001	q = &adapter->mcc_obj.cq;
3002	if (q->created)
3003		be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3004	be_queue_free(adapter, q);
3005}
3006
3007/* Must be called only after TX qs are created as MCC shares TX EQ */
3008static int be_mcc_queues_create(struct be_adapter *adapter)
3009{
3010	struct be_queue_info *q, *cq;
3011
3012	cq = &adapter->mcc_obj.cq;
3013	if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
3014			   sizeof(struct be_mcc_compl)))
3015		goto err;
3016
3017	/* Use the default EQ for MCC completions */
3018	if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
3019		goto mcc_cq_free;
3020
3021	q = &adapter->mcc_obj.q;
3022	if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
3023		goto mcc_cq_destroy;
3024
3025	if (be_cmd_mccq_create(adapter, q, cq))
3026		goto mcc_q_free;
3027
3028	return 0;
3029
3030mcc_q_free:
3031	be_queue_free(adapter, q);
3032mcc_cq_destroy:
3033	be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
3034mcc_cq_free:
3035	be_queue_free(adapter, cq);
3036err:
3037	return -1;
3038}
3039
3040static void be_tx_queues_destroy(struct be_adapter *adapter)
3041{
3042	struct be_queue_info *q;
3043	struct be_tx_obj *txo;
3044	u8 i;
3045
3046	for_all_tx_queues(adapter, txo, i) {
3047		q = &txo->q;
3048		if (q->created)
3049			be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
3050		be_queue_free(adapter, q);
3051
3052		q = &txo->cq;
3053		if (q->created)
3054			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3055		be_queue_free(adapter, q);
3056	}
3057}
3058
3059static int be_tx_qs_create(struct be_adapter *adapter)
3060{
3061	struct be_queue_info *cq;
3062	struct be_tx_obj *txo;
3063	struct be_eq_obj *eqo;
3064	int status, i;
3065
3066	adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
3067
3068	for_all_tx_queues(adapter, txo, i) {
3069		cq = &txo->cq;
3070		status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
3071					sizeof(struct be_eth_tx_compl));
3072		if (status)
3073			return status;
3074
3075		u64_stats_init(&txo->stats.sync);
3076		u64_stats_init(&txo->stats.sync_compl);
3077
3078		/* If num_evt_qs is less than num_tx_qs, then more than
3079		 * one txq share an eq
3080		 */
3081		eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
3082		status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
3083		if (status)
3084			return status;
3085
3086		status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
3087					sizeof(struct be_eth_wrb));
3088		if (status)
3089			return status;
3090
3091		status = be_cmd_txq_create(adapter, txo);
3092		if (status)
3093			return status;
3094
3095		netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
3096				    eqo->idx);
3097	}
3098
3099	dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
3100		 adapter->num_tx_qs);
3101	return 0;
3102}
3103
3104static void be_rx_cqs_destroy(struct be_adapter *adapter)
3105{
3106	struct be_queue_info *q;
3107	struct be_rx_obj *rxo;
3108	int i;
3109
3110	for_all_rx_queues(adapter, rxo, i) {
3111		q = &rxo->cq;
3112		if (q->created)
3113			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3114		be_queue_free(adapter, q);
3115	}
3116}
3117
3118static int be_rx_cqs_create(struct be_adapter *adapter)
3119{
3120	struct be_queue_info *eq, *cq;
3121	struct be_rx_obj *rxo;
3122	int rc, i;
3123
3124	adapter->num_rss_qs =
3125			min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
3126
3127	/* We'll use RSS only if atleast 2 RSS rings are supported. */
3128	if (adapter->num_rss_qs < 2)
3129		adapter->num_rss_qs = 0;
3130
3131	adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
3132
3133	/* When the interface is not capable of RSS rings (and there is no
3134	 * need to create a default RXQ) we'll still need one RXQ
3135	 */
3136	if (adapter->num_rx_qs == 0)
3137		adapter->num_rx_qs = 1;
3138
3139	adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
3140	for_all_rx_queues(adapter, rxo, i) {
3141		rxo->adapter = adapter;
3142		cq = &rxo->cq;
3143		rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3144				    sizeof(struct be_eth_rx_compl));
3145		if (rc)
3146			return rc;
3147
3148		u64_stats_init(&rxo->stats.sync);
3149		eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3150		rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3151		if (rc)
3152			return rc;
3153	}
3154
3155	dev_info(&adapter->pdev->dev,
3156		 "created %d RX queue(s)\n", adapter->num_rx_qs);
3157	return 0;
3158}
3159
3160static irqreturn_t be_intx(int irq, void *dev)
3161{
3162	struct be_eq_obj *eqo = dev;
3163	struct be_adapter *adapter = eqo->adapter;
3164	int num_evts = 0;
3165
3166	/* IRQ is not expected when NAPI is scheduled as the EQ
3167	 * will not be armed.
3168	 * But, this can happen on Lancer INTx where it takes
3169	 * a while to de-assert INTx or in BE2 where occasionaly
3170	 * an interrupt may be raised even when EQ is unarmed.
3171	 * If NAPI is already scheduled, then counting & notifying
3172	 * events will orphan them.
3173	 */
3174	if (napi_schedule_prep(&eqo->napi)) {
3175		num_evts = events_get(eqo);
3176		__napi_schedule(&eqo->napi);
3177		if (num_evts)
3178			eqo->spurious_intr = 0;
3179	}
3180	be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3181
3182	/* Return IRQ_HANDLED only for the first spurious intr
3183	 * after a valid intr to stop the kernel from branding
3184	 * this irq as a bad one!
3185	 */
3186	if (num_evts || eqo->spurious_intr++ == 0)
3187		return IRQ_HANDLED;
3188	else
3189		return IRQ_NONE;
3190}
3191
3192static irqreturn_t be_msix(int irq, void *dev)
3193{
3194	struct be_eq_obj *eqo = dev;
3195
3196	be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3197	napi_schedule(&eqo->napi);
3198	return IRQ_HANDLED;
3199}
3200
3201static inline bool do_gro(struct be_rx_compl_info *rxcp)
3202{
3203	return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3204}
3205
3206static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3207			 int budget)
3208{
3209	struct be_adapter *adapter = rxo->adapter;
3210	struct be_queue_info *rx_cq = &rxo->cq;
3211	struct be_rx_compl_info *rxcp;
3212	u32 work_done;
3213	u32 frags_consumed = 0;
3214
3215	for (work_done = 0; work_done < budget; work_done++) {
3216		rxcp = be_rx_compl_get(rxo);
3217		if (!rxcp)
3218			break;
3219
3220		/* Is it a flush compl that has no data */
3221		if (unlikely(rxcp->num_rcvd == 0))
3222			goto loop_continue;
3223
3224		/* Discard compl with partial DMA Lancer B0 */
3225		if (unlikely(!rxcp->pkt_size)) {
3226			be_rx_compl_discard(rxo, rxcp);
3227			goto loop_continue;
3228		}
3229
3230		/* On BE drop pkts that arrive due to imperfect filtering in
3231		 * promiscuous mode on some skews
3232		 */
3233		if (unlikely(rxcp->port != adapter->port_num &&
3234			     !lancer_chip(adapter))) {
3235			be_rx_compl_discard(rxo, rxcp);
3236			goto loop_continue;
3237		}
3238
3239		if (do_gro(rxcp))
3240			be_rx_compl_process_gro(rxo, napi, rxcp);
3241		else
3242			be_rx_compl_process(rxo, napi, rxcp);
3243
3244loop_continue:
3245		frags_consumed += rxcp->num_rcvd;
3246		be_rx_stats_update(rxo, rxcp);
3247	}
3248
3249	if (work_done) {
3250		be_cq_notify(adapter, rx_cq->id, true, work_done);
3251
3252		/* When an rx-obj gets into post_starved state, just
3253		 * let be_worker do the posting.
3254		 */
3255		if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3256		    !rxo->rx_post_starved)
3257			be_post_rx_frags(rxo, GFP_ATOMIC,
3258					 max_t(u32, MAX_RX_POST,
3259					       frags_consumed));
3260	}
3261
3262	return work_done;
3263}
3264
3265
3266static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3267			  int idx)
3268{
3269	int num_wrbs = 0, work_done = 0;
3270	struct be_tx_compl_info *txcp;
3271
3272	while ((txcp = be_tx_compl_get(adapter, txo))) {
3273		num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3274		work_done++;
3275	}
3276
3277	if (work_done) {
3278		be_cq_notify(adapter, txo->cq.id, true, work_done);
3279		atomic_sub(num_wrbs, &txo->q.used);
3280
3281		/* As Tx wrbs have been freed up, wake up netdev queue
3282		 * if it was stopped due to lack of tx wrbs.  */
3283		if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3284		    be_can_txq_wake(txo)) {
3285			netif_wake_subqueue(adapter->netdev, idx);
3286		}
3287
3288		u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3289		tx_stats(txo)->tx_compl += work_done;
3290		u64_stats_update_end(&tx_stats(txo)->sync_compl);
3291	}
3292}
3293
3294int be_poll(struct napi_struct *napi, int budget)
3295{
3296	struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3297	struct be_adapter *adapter = eqo->adapter;
3298	int max_work = 0, work, i, num_evts;
3299	struct be_rx_obj *rxo;
3300	struct be_tx_obj *txo;
3301	u32 mult_enc = 0;
3302
3303	num_evts = events_get(eqo);
3304
3305	for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3306		be_process_tx(adapter, txo, i);
3307
3308	/* This loop will iterate twice for EQ0 in which
3309	 * completions of the last RXQ (default one) are also processed
3310	 * For other EQs the loop iterates only once
3311	 */
3312	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3313		work = be_process_rx(rxo, napi, budget);
3314		max_work = max(work, max_work);
3315	}
3316
3317	if (is_mcc_eqo(eqo))
3318		be_process_mcc(adapter);
3319
3320	if (max_work < budget) {
3321		napi_complete_done(napi, max_work);
3322
3323		/* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3324		 * delay via a delay multiplier encoding value
3325		 */
3326		if (skyhawk_chip(adapter))
3327			mult_enc = be_get_eq_delay_mult_enc(eqo);
3328
3329		be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3330			     mult_enc);
3331	} else {
3332		/* As we'll continue in polling mode, count and clear events */
3333		be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3334	}
3335	return max_work;
3336}
3337
3338void be_detect_error(struct be_adapter *adapter)
3339{
3340	u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3341	u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3342	struct device *dev = &adapter->pdev->dev;
3343	u16 val;
3344	u32 i;
3345
3346	if (be_check_error(adapter, BE_ERROR_HW))
3347		return;
3348
3349	if (lancer_chip(adapter)) {
3350		sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3351		if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3352			be_set_error(adapter, BE_ERROR_UE);
3353			sliport_err1 = ioread32(adapter->db +
3354						SLIPORT_ERROR1_OFFSET);
3355			sliport_err2 = ioread32(adapter->db +
3356						SLIPORT_ERROR2_OFFSET);
3357			/* Do not log error messages if its a FW reset */
3358			if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3359			    sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3360				dev_info(dev, "Reset is in progress\n");
3361			} else {
3362				dev_err(dev, "Error detected in the card\n");
3363				dev_err(dev, "ERR: sliport status 0x%x\n",
3364					sliport_status);
3365				dev_err(dev, "ERR: sliport error1 0x%x\n",
3366					sliport_err1);
3367				dev_err(dev, "ERR: sliport error2 0x%x\n",
3368					sliport_err2);
3369			}
3370		}
3371	} else {
3372		ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3373		ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3374		ue_lo_mask = ioread32(adapter->pcicfg +
3375				      PCICFG_UE_STATUS_LOW_MASK);
3376		ue_hi_mask = ioread32(adapter->pcicfg +
3377				      PCICFG_UE_STATUS_HI_MASK);
3378
3379		ue_lo = (ue_lo & ~ue_lo_mask);
3380		ue_hi = (ue_hi & ~ue_hi_mask);
3381
3382		if (ue_lo || ue_hi) {
3383			/* On certain platforms BE3 hardware can indicate
3384			 * spurious UEs. In case of a UE in the chip,
3385			 * the POST register correctly reports either a
3386			 * FAT_LOG_START state (FW is currently dumping
3387			 * FAT log data) or a ARMFW_UE state. Check for the
3388			 * above states to ascertain if the UE is valid or not.
3389			 */
3390			if (BE3_chip(adapter)) {
3391				val = be_POST_stage_get(adapter);
3392				if ((val & POST_STAGE_FAT_LOG_START)
3393				     != POST_STAGE_FAT_LOG_START &&
3394				    (val & POST_STAGE_ARMFW_UE)
3395				     != POST_STAGE_ARMFW_UE &&
3396				    (val & POST_STAGE_RECOVERABLE_ERR)
3397				     != POST_STAGE_RECOVERABLE_ERR)
3398					return;
3399			}
3400
3401			dev_err(dev, "Error detected in the adapter");
3402			be_set_error(adapter, BE_ERROR_UE);
3403
3404			for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3405				if (ue_lo & 1)
3406					dev_err(dev, "UE: %s bit set\n",
3407						ue_status_low_desc[i]);
3408			}
3409			for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3410				if (ue_hi & 1)
3411					dev_err(dev, "UE: %s bit set\n",
3412						ue_status_hi_desc[i]);
3413			}
3414		}
3415	}
3416}
3417
3418static void be_msix_disable(struct be_adapter *adapter)
3419{
3420	if (msix_enabled(adapter)) {
3421		pci_disable_msix(adapter->pdev);
3422		adapter->num_msix_vec = 0;
3423		adapter->num_msix_roce_vec = 0;
3424	}
3425}
3426
3427static int be_msix_enable(struct be_adapter *adapter)
3428{
3429	unsigned int i, max_roce_eqs;
3430	struct device *dev = &adapter->pdev->dev;
3431	int num_vec;
3432
3433	/* If RoCE is supported, program the max number of vectors that
3434	 * could be used for NIC and RoCE, else, just program the number
3435	 * we'll use initially.
3436	 */
3437	if (be_roce_supported(adapter)) {
3438		max_roce_eqs =
3439			be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3440		max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3441		num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3442	} else {
3443		num_vec = max(adapter->cfg_num_rx_irqs,
3444			      adapter->cfg_num_tx_irqs);
3445	}
3446
3447	for (i = 0; i < num_vec; i++)
3448		adapter->msix_entries[i].entry = i;
3449
3450	num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3451					MIN_MSIX_VECTORS, num_vec);
3452	if (num_vec < 0)
3453		goto fail;
3454
3455	if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3456		adapter->num_msix_roce_vec = num_vec / 2;
3457		dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3458			 adapter->num_msix_roce_vec);
3459	}
3460
3461	adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3462
3463	dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3464		 adapter->num_msix_vec);
3465	return 0;
3466
3467fail:
3468	dev_warn(dev, "MSIx enable failed\n");
3469
3470	/* INTx is not supported in VFs, so fail probe if enable_msix fails */
3471	if (be_virtfn(adapter))
3472		return num_vec;
3473	return 0;
3474}
3475
3476static inline int be_msix_vec_get(struct be_adapter *adapter,
3477				  struct be_eq_obj *eqo)
3478{
3479	return adapter->msix_entries[eqo->msix_idx].vector;
3480}
3481
3482static int be_msix_register(struct be_adapter *adapter)
3483{
3484	struct net_device *netdev = adapter->netdev;
3485	struct be_eq_obj *eqo;
3486	int status, i, vec;
3487
3488	for_all_evt_queues(adapter, eqo, i) {
3489		sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3490		vec = be_msix_vec_get(adapter, eqo);
3491		status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3492		if (status)
3493			goto err_msix;
3494
3495		irq_update_affinity_hint(vec, eqo->affinity_mask);
3496	}
3497
3498	return 0;
3499err_msix:
3500	for (i--; i >= 0; i--) {
3501		eqo = &adapter->eq_obj[i];
3502		free_irq(be_msix_vec_get(adapter, eqo), eqo);
3503	}
3504	dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3505		 status);
3506	be_msix_disable(adapter);
3507	return status;
3508}
3509
3510static int be_irq_register(struct be_adapter *adapter)
3511{
3512	struct net_device *netdev = adapter->netdev;
3513	int status;
3514
3515	if (msix_enabled(adapter)) {
3516		status = be_msix_register(adapter);
3517		if (status == 0)
3518			goto done;
3519		/* INTx is not supported for VF */
3520		if (be_virtfn(adapter))
3521			return status;
3522	}
3523
3524	/* INTx: only the first EQ is used */
3525	netdev->irq = adapter->pdev->irq;
3526	status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3527			     &adapter->eq_obj[0]);
3528	if (status) {
3529		dev_err(&adapter->pdev->dev,
3530			"INTx request IRQ failed - err %d\n", status);
3531		return status;
3532	}
3533done:
3534	adapter->isr_registered = true;
3535	return 0;
3536}
3537
3538static void be_irq_unregister(struct be_adapter *adapter)
3539{
3540	struct net_device *netdev = adapter->netdev;
3541	struct be_eq_obj *eqo;
3542	int i, vec;
3543
3544	if (!adapter->isr_registered)
3545		return;
3546
3547	/* INTx */
3548	if (!msix_enabled(adapter)) {
3549		free_irq(netdev->irq, &adapter->eq_obj[0]);
3550		goto done;
3551	}
3552
3553	/* MSIx */
3554	for_all_evt_queues(adapter, eqo, i) {
3555		vec = be_msix_vec_get(adapter, eqo);
3556		irq_update_affinity_hint(vec, NULL);
3557		free_irq(vec, eqo);
3558	}
3559
3560done:
3561	adapter->isr_registered = false;
3562}
3563
3564static void be_rx_qs_destroy(struct be_adapter *adapter)
3565{
3566	struct rss_info *rss = &adapter->rss_info;
3567	struct be_queue_info *q;
3568	struct be_rx_obj *rxo;
3569	int i;
3570
3571	for_all_rx_queues(adapter, rxo, i) {
3572		q = &rxo->q;
3573		if (q->created) {
3574			/* If RXQs are destroyed while in an "out of buffer"
3575			 * state, there is a possibility of an HW stall on
3576			 * Lancer. So, post 64 buffers to each queue to relieve
3577			 * the "out of buffer" condition.
3578			 * Make sure there's space in the RXQ before posting.
3579			 */
3580			if (lancer_chip(adapter)) {
3581				be_rx_cq_clean(rxo);
3582				if (atomic_read(&q->used) == 0)
3583					be_post_rx_frags(rxo, GFP_KERNEL,
3584							 MAX_RX_POST);
3585			}
3586
3587			be_cmd_rxq_destroy(adapter, q);
3588			be_rx_cq_clean(rxo);
3589			be_rxq_clean(rxo);
3590		}
3591		be_queue_free(adapter, q);
3592	}
3593
3594	if (rss->rss_flags) {
3595		rss->rss_flags = RSS_ENABLE_NONE;
3596		be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3597				  128, rss->rss_hkey);
3598	}
3599}
3600
3601static void be_disable_if_filters(struct be_adapter *adapter)
3602{
3603	/* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3604	if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3605	    check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3606		be_dev_mac_del(adapter, adapter->pmac_id[0]);
3607		eth_zero_addr(adapter->dev_mac);
3608	}
3609
3610	be_clear_uc_list(adapter);
3611	be_clear_mc_list(adapter);
3612
3613	/* The IFACE flags are enabled in the open path and cleared
3614	 * in the close path. When a VF gets detached from the host and
3615	 * assigned to a VM the following happens:
3616	 *	- VF's IFACE flags get cleared in the detach path
3617	 *	- IFACE create is issued by the VF in the attach path
3618	 * Due to a bug in the BE3/Skyhawk-R FW
3619	 * (Lancer FW doesn't have the bug), the IFACE capability flags
3620	 * specified along with the IFACE create cmd issued by a VF are not
3621	 * honoured by FW.  As a consequence, if a *new* driver
3622	 * (that enables/disables IFACE flags in open/close)
3623	 * is loaded in the host and an *old* driver is * used by a VM/VF,
3624	 * the IFACE gets created *without* the needed flags.
3625	 * To avoid this, disable RX-filter flags only for Lancer.
3626	 */
3627	if (lancer_chip(adapter)) {
3628		be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3629		adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3630	}
3631}
3632
3633static int be_close(struct net_device *netdev)
3634{
3635	struct be_adapter *adapter = netdev_priv(netdev);
3636	struct be_eq_obj *eqo;
3637	int i;
3638
3639	/* This protection is needed as be_close() may be called even when the
3640	 * adapter is in cleared state (after eeh perm failure)
3641	 */
3642	if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3643		return 0;
3644
3645	/* Before attempting cleanup ensure all the pending cmds in the
3646	 * config_wq have finished execution
3647	 */
3648	flush_workqueue(be_wq);
3649
3650	be_disable_if_filters(adapter);
3651
3652	if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3653		for_all_evt_queues(adapter, eqo, i) {
3654			napi_disable(&eqo->napi);
3655		}
3656		adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3657	}
3658
3659	be_async_mcc_disable(adapter);
3660
3661	/* Wait for all pending tx completions to arrive so that
3662	 * all tx skbs are freed.
3663	 */
3664	netif_tx_disable(netdev);
3665	be_tx_compl_clean(adapter);
3666
3667	be_rx_qs_destroy(adapter);
3668
3669	for_all_evt_queues(adapter, eqo, i) {
3670		if (msix_enabled(adapter))
3671			synchronize_irq(be_msix_vec_get(adapter, eqo));
3672		else
3673			synchronize_irq(netdev->irq);
3674		be_eq_clean(eqo);
3675	}
3676
3677	be_irq_unregister(adapter);
3678
3679	return 0;
3680}
3681
3682static int be_rx_qs_create(struct be_adapter *adapter)
3683{
3684	struct rss_info *rss = &adapter->rss_info;
3685	u8 rss_key[RSS_HASH_KEY_LEN];
3686	struct be_rx_obj *rxo;
3687	int rc, i, j;
3688
3689	for_all_rx_queues(adapter, rxo, i) {
3690		rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3691				    sizeof(struct be_eth_rx_d));
3692		if (rc)
3693			return rc;
3694	}
3695
3696	if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3697		rxo = default_rxo(adapter);
3698		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3699				       rx_frag_size, adapter->if_handle,
3700				       false, &rxo->rss_id);
3701		if (rc)
3702			return rc;
3703	}
3704
3705	for_all_rss_queues(adapter, rxo, i) {
3706		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3707				       rx_frag_size, adapter->if_handle,
3708				       true, &rxo->rss_id);
3709		if (rc)
3710			return rc;
3711	}
3712
3713	if (be_multi_rxq(adapter)) {
3714		for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3715			for_all_rss_queues(adapter, rxo, i) {
3716				if ((j + i) >= RSS_INDIR_TABLE_LEN)
3717					break;
3718				rss->rsstable[j + i] = rxo->rss_id;
3719				rss->rss_queue[j + i] = i;
3720			}
3721		}
3722		rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3723			RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3724
3725		if (!BEx_chip(adapter))
3726			rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3727				RSS_ENABLE_UDP_IPV6;
3728
3729		netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3730		rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3731				       RSS_INDIR_TABLE_LEN, rss_key);
3732		if (rc) {
3733			rss->rss_flags = RSS_ENABLE_NONE;
3734			return rc;
3735		}
3736
3737		memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3738	} else {
3739		/* Disable RSS, if only default RX Q is created */
3740		rss->rss_flags = RSS_ENABLE_NONE;
3741	}
3742
3743
3744	/* Post 1 less than RXQ-len to avoid head being equal to tail,
3745	 * which is a queue empty condition
3746	 */
3747	for_all_rx_queues(adapter, rxo, i)
3748		be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3749
3750	return 0;
3751}
3752
3753static int be_enable_if_filters(struct be_adapter *adapter)
3754{
3755	int status;
3756
3757	status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3758	if (status)
3759		return status;
3760
3761	/* Normally this condition usually true as the ->dev_mac is zeroed.
3762	 * But on BE3 VFs the initial MAC is pre-programmed by PF and
3763	 * subsequent be_dev_mac_add() can fail (after fresh boot)
3764	 */
3765	if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3766		int old_pmac_id = -1;
3767
3768		/* Remember old programmed MAC if any - can happen on BE3 VF */
3769		if (!is_zero_ether_addr(adapter->dev_mac))
3770			old_pmac_id = adapter->pmac_id[0];
3771
3772		status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3773		if (status)
3774			return status;
3775
3776		/* Delete the old programmed MAC as we successfully programmed
3777		 * a new MAC
3778		 */
3779		if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3780			be_dev_mac_del(adapter, old_pmac_id);
3781
3782		ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3783	}
3784
3785	if (adapter->vlans_added)
3786		be_vid_config(adapter);
3787
3788	__be_set_rx_mode(adapter);
3789
3790	return 0;
3791}
3792
3793static int be_open(struct net_device *netdev)
3794{
3795	struct be_adapter *adapter = netdev_priv(netdev);
3796	struct be_eq_obj *eqo;
3797	struct be_rx_obj *rxo;
3798	struct be_tx_obj *txo;
3799	u8 link_status;
3800	int status, i;
3801
3802	status = be_rx_qs_create(adapter);
3803	if (status)
3804		goto err;
3805
3806	status = be_enable_if_filters(adapter);
3807	if (status)
3808		goto err;
3809
3810	status = be_irq_register(adapter);
3811	if (status)
3812		goto err;
3813
3814	for_all_rx_queues(adapter, rxo, i)
3815		be_cq_notify(adapter, rxo->cq.id, true, 0);
3816
3817	for_all_tx_queues(adapter, txo, i)
3818		be_cq_notify(adapter, txo->cq.id, true, 0);
3819
3820	be_async_mcc_enable(adapter);
3821
3822	for_all_evt_queues(adapter, eqo, i) {
3823		napi_enable(&eqo->napi);
3824		be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3825	}
3826	adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3827
3828	status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3829	if (!status)
3830		be_link_status_update(adapter, link_status);
3831
3832	netif_tx_start_all_queues(netdev);
3833
3834	udp_tunnel_nic_reset_ntf(netdev);
3835
3836	return 0;
3837err:
3838	be_close(adapter->netdev);
3839	return -EIO;
3840}
3841
3842static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3843{
3844	u32 addr;
3845
3846	addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3847
3848	mac[5] = (u8)(addr & 0xFF);
3849	mac[4] = (u8)((addr >> 8) & 0xFF);
3850	mac[3] = (u8)((addr >> 16) & 0xFF);
3851	/* Use the OUI from the current MAC address */
3852	memcpy(mac, adapter->netdev->dev_addr, 3);
3853}
3854
3855/*
3856 * Generate a seed MAC address from the PF MAC Address using jhash.
3857 * MAC Address for VFs are assigned incrementally starting from the seed.
3858 * These addresses are programmed in the ASIC by the PF and the VF driver
3859 * queries for the MAC address during its probe.
3860 */
3861static int be_vf_eth_addr_config(struct be_adapter *adapter)
3862{
3863	u32 vf;
3864	int status = 0;
3865	u8 mac[ETH_ALEN];
3866	struct be_vf_cfg *vf_cfg;
3867
3868	be_vf_eth_addr_generate(adapter, mac);
3869
3870	for_all_vfs(adapter, vf_cfg, vf) {
3871		if (BEx_chip(adapter))
3872			status = be_cmd_pmac_add(adapter, mac,
3873						 vf_cfg->if_handle,
3874						 &vf_cfg->pmac_id, vf + 1);
3875		else
3876			status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3877						vf + 1);
3878
3879		if (status)
3880			dev_err(&adapter->pdev->dev,
3881				"Mac address assignment failed for VF %d\n",
3882				vf);
3883		else
3884			memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3885
3886		mac[5] += 1;
3887	}
3888	return status;
3889}
3890
3891static int be_vfs_mac_query(struct be_adapter *adapter)
3892{
3893	int status, vf;
3894	u8 mac[ETH_ALEN];
3895	struct be_vf_cfg *vf_cfg;
3896
3897	for_all_vfs(adapter, vf_cfg, vf) {
3898		status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3899					       mac, vf_cfg->if_handle,
3900					       false, vf+1);
3901		if (status)
3902			return status;
3903		memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3904	}
3905	return 0;
3906}
3907
3908static void be_vf_clear(struct be_adapter *adapter)
3909{
3910	struct be_vf_cfg *vf_cfg;
3911	u32 vf;
3912
3913	if (pci_vfs_assigned(adapter->pdev)) {
3914		dev_warn(&adapter->pdev->dev,
3915			 "VFs are assigned to VMs: not disabling VFs\n");
3916		goto done;
3917	}
3918
3919	pci_disable_sriov(adapter->pdev);
3920
3921	for_all_vfs(adapter, vf_cfg, vf) {
3922		if (BEx_chip(adapter))
3923			be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3924					vf_cfg->pmac_id, vf + 1);
3925		else
3926			be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3927				       vf + 1);
3928
3929		be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3930	}
3931
3932	if (BE3_chip(adapter))
3933		be_cmd_set_hsw_config(adapter, 0, 0,
3934				      adapter->if_handle,
3935				      PORT_FWD_TYPE_PASSTHRU, 0);
3936done:
3937	kfree(adapter->vf_cfg);
3938	adapter->num_vfs = 0;
3939	adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3940}
3941
3942static void be_clear_queues(struct be_adapter *adapter)
3943{
3944	be_mcc_queues_destroy(adapter);
3945	be_rx_cqs_destroy(adapter);
3946	be_tx_queues_destroy(adapter);
3947	be_evt_queues_destroy(adapter);
3948}
3949
3950static void be_cancel_worker(struct be_adapter *adapter)
3951{
3952	if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3953		cancel_delayed_work_sync(&adapter->work);
3954		adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3955	}
3956}
3957
3958static void be_cancel_err_detection(struct be_adapter *adapter)
3959{
3960	struct be_error_recovery *err_rec = &adapter->error_recovery;
3961
3962	if (!be_err_recovery_workq)
3963		return;
3964
3965	if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3966		cancel_delayed_work_sync(&err_rec->err_detection_work);
3967		adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3968	}
3969}
3970
3971/* VxLAN offload Notes:
3972 *
3973 * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
3974 * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
3975 * is expected to work across all types of IP tunnels once exported. Skyhawk
3976 * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
3977 * offloads in hw_enc_features only when a VxLAN port is added. If other (non
3978 * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
3979 * those other tunnels are unexported on the fly through ndo_features_check().
3980 */
3981static int be_vxlan_set_port(struct net_device *netdev, unsigned int table,
3982			     unsigned int entry, struct udp_tunnel_info *ti)
3983{
3984	struct be_adapter *adapter = netdev_priv(netdev);
3985	struct device *dev = &adapter->pdev->dev;
3986	int status;
3987
3988	status = be_cmd_manage_iface(adapter, adapter->if_handle,
3989				     OP_CONVERT_NORMAL_TO_TUNNEL);
3990	if (status) {
3991		dev_warn(dev, "Failed to convert normal interface to tunnel\n");
3992		return status;
3993	}
3994	adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
3995
3996	status = be_cmd_set_vxlan_port(adapter, ti->port);
3997	if (status) {
3998		dev_warn(dev, "Failed to add VxLAN port\n");
3999		return status;
4000	}
4001	adapter->vxlan_port = ti->port;
4002
4003	netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
4004				   NETIF_F_TSO | NETIF_F_TSO6 |
4005				   NETIF_F_GSO_UDP_TUNNEL;
4006
4007	dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
4008		 be16_to_cpu(ti->port));
4009	return 0;
4010}
4011
4012static int be_vxlan_unset_port(struct net_device *netdev, unsigned int table,
4013			       unsigned int entry, struct udp_tunnel_info *ti)
4014{
4015	struct be_adapter *adapter = netdev_priv(netdev);
4016
4017	if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
4018		be_cmd_manage_iface(adapter, adapter->if_handle,
4019				    OP_CONVERT_TUNNEL_TO_NORMAL);
4020
4021	if (adapter->vxlan_port)
4022		be_cmd_set_vxlan_port(adapter, 0);
4023
4024	adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
4025	adapter->vxlan_port = 0;
4026
4027	netdev->hw_enc_features = 0;
4028	return 0;
4029}
4030
4031static const struct udp_tunnel_nic_info be_udp_tunnels = {
4032	.set_port	= be_vxlan_set_port,
4033	.unset_port	= be_vxlan_unset_port,
4034	.flags		= UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
4035			  UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
4036	.tables		= {
4037		{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, },
4038	},
4039};
4040
4041static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
4042				struct be_resources *vft_res)
4043{
4044	struct be_resources res = adapter->pool_res;
4045	u32 vf_if_cap_flags = res.vf_if_cap_flags;
4046	struct be_resources res_mod = {0};
4047	u16 num_vf_qs = 1;
4048
4049	/* Distribute the queue resources among the PF and it's VFs */
4050	if (num_vfs) {
4051		/* Divide the rx queues evenly among the VFs and the PF, capped
4052		 * at VF-EQ-count. Any remainder queues belong to the PF.
4053		 */
4054		num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4055				res.max_rss_qs / (num_vfs + 1));
4056
4057		/* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4058		 * RSS Tables per port. Provide RSS on VFs, only if number of
4059		 * VFs requested is less than it's PF Pool's RSS Tables limit.
4060		 */
4061		if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4062			num_vf_qs = 1;
4063	}
4064
4065	/* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4066	 * which are modifiable using SET_PROFILE_CONFIG cmd.
4067	 */
4068	be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4069				  RESOURCE_MODIFIABLE, 0);
4070
4071	/* If RSS IFACE capability flags are modifiable for a VF, set the
4072	 * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4073	 * more than 1 RSSQ is available for a VF.
4074	 * Otherwise, provision only 1 queue pair for VF.
4075	 */
4076	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4077		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4078		if (num_vf_qs > 1) {
4079			vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4080			if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4081				vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4082		} else {
4083			vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4084					     BE_IF_FLAGS_DEFQ_RSS);
4085		}
4086	} else {
4087		num_vf_qs = 1;
4088	}
4089
4090	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4091		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4092		vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4093	}
4094
4095	vft_res->vf_if_cap_flags = vf_if_cap_flags;
4096	vft_res->max_rx_qs = num_vf_qs;
4097	vft_res->max_rss_qs = num_vf_qs;
4098	vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4099	vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4100
4101	/* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4102	 * among the PF and it's VFs, if the fields are changeable
4103	 */
4104	if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4105		vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4106
4107	if (res_mod.max_vlans == FIELD_MODIFIABLE)
4108		vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4109
4110	if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4111		vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4112
4113	if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4114		vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4115}
4116
4117static void be_if_destroy(struct be_adapter *adapter)
4118{
4119	be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4120
4121	kfree(adapter->pmac_id);
4122	adapter->pmac_id = NULL;
4123
4124	kfree(adapter->mc_list);
4125	adapter->mc_list = NULL;
4126
4127	kfree(adapter->uc_list);
4128	adapter->uc_list = NULL;
4129}
4130
4131static int be_clear(struct be_adapter *adapter)
4132{
4133	struct pci_dev *pdev = adapter->pdev;
4134	struct  be_resources vft_res = {0};
4135
4136	be_cancel_worker(adapter);
4137
4138	flush_workqueue(be_wq);
4139
4140	if (sriov_enabled(adapter))
4141		be_vf_clear(adapter);
4142
4143	/* Re-configure FW to distribute resources evenly across max-supported
4144	 * number of VFs, only when VFs are not already enabled.
4145	 */
4146	if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4147	    !pci_vfs_assigned(pdev)) {
4148		be_calculate_vf_res(adapter,
4149				    pci_sriov_get_totalvfs(pdev),
4150				    &vft_res);
4151		be_cmd_set_sriov_config(adapter, adapter->pool_res,
4152					pci_sriov_get_totalvfs(pdev),
4153					&vft_res);
4154	}
4155
4156	be_vxlan_unset_port(adapter->netdev, 0, 0, NULL);
4157
4158	be_if_destroy(adapter);
4159
4160	be_clear_queues(adapter);
4161
4162	be_msix_disable(adapter);
4163	adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4164	return 0;
4165}
4166
4167static int be_vfs_if_create(struct be_adapter *adapter)
4168{
4169	struct be_resources res = {0};
4170	u32 cap_flags, en_flags, vf;
4171	struct be_vf_cfg *vf_cfg;
4172	int status;
4173
4174	/* If a FW profile exists, then cap_flags are updated */
4175	cap_flags = BE_VF_IF_EN_FLAGS;
4176
4177	for_all_vfs(adapter, vf_cfg, vf) {
4178		if (!BE3_chip(adapter)) {
4179			status = be_cmd_get_profile_config(adapter, &res, NULL,
4180							   ACTIVE_PROFILE_TYPE,
4181							   RESOURCE_LIMITS,
4182							   vf + 1);
4183			if (!status) {
4184				cap_flags = res.if_cap_flags;
4185				/* Prevent VFs from enabling VLAN promiscuous
4186				 * mode
4187				 */
4188				cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4189			}
4190		}
4191
4192		/* PF should enable IF flags during proxy if_create call */
4193		en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4194		status = be_cmd_if_create(adapter, cap_flags, en_flags,
4195					  &vf_cfg->if_handle, vf + 1);
4196		if (status)
4197			return status;
4198	}
4199
4200	return 0;
4201}
4202
4203static int be_vf_setup_init(struct be_adapter *adapter)
4204{
4205	struct be_vf_cfg *vf_cfg;
4206	int vf;
4207
4208	adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4209				  GFP_KERNEL);
4210	if (!adapter->vf_cfg)
4211		return -ENOMEM;
4212
4213	for_all_vfs(adapter, vf_cfg, vf) {
4214		vf_cfg->if_handle = -1;
4215		vf_cfg->pmac_id = -1;
4216	}
4217	return 0;
4218}
4219
4220static int be_vf_setup(struct be_adapter *adapter)
4221{
4222	struct device *dev = &adapter->pdev->dev;
4223	struct be_vf_cfg *vf_cfg;
4224	int status, old_vfs, vf;
4225	bool spoofchk;
4226
4227	old_vfs = pci_num_vf(adapter->pdev);
4228
4229	status = be_vf_setup_init(adapter);
4230	if (status)
4231		goto err;
4232
4233	if (old_vfs) {
4234		for_all_vfs(adapter, vf_cfg, vf) {
4235			status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4236			if (status)
4237				goto err;
4238		}
4239
4240		status = be_vfs_mac_query(adapter);
4241		if (status)
4242			goto err;
4243	} else {
4244		status = be_vfs_if_create(adapter);
4245		if (status)
4246			goto err;
4247
4248		status = be_vf_eth_addr_config(adapter);
4249		if (status)
4250			goto err;
4251	}
4252
4253	for_all_vfs(adapter, vf_cfg, vf) {
4254		/* Allow VFs to programs MAC/VLAN filters */
4255		status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4256						  vf + 1);
4257		if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4258			status = be_cmd_set_fn_privileges(adapter,
4259							  vf_cfg->privileges |
4260							  BE_PRIV_FILTMGMT,
4261							  vf + 1);
4262			if (!status) {
4263				vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4264				dev_info(dev, "VF%d has FILTMGMT privilege\n",
4265					 vf);
4266			}
4267		}
4268
4269		/* Allow full available bandwidth */
4270		if (!old_vfs)
4271			be_cmd_config_qos(adapter, 0, 0, vf + 1);
4272
4273		status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4274					       vf_cfg->if_handle, NULL,
4275					       &spoofchk);
4276		if (!status)
4277			vf_cfg->spoofchk = spoofchk;
4278
4279		if (!old_vfs) {
4280			be_cmd_enable_vf(adapter, vf + 1);
4281			be_cmd_set_logical_link_config(adapter,
4282						       IFLA_VF_LINK_STATE_AUTO,
4283						       vf+1);
4284		}
4285	}
4286
4287	if (!old_vfs) {
4288		status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4289		if (status) {
4290			dev_err(dev, "SRIOV enable failed\n");
4291			adapter->num_vfs = 0;
4292			goto err;
4293		}
4294	}
4295
4296	if (BE3_chip(adapter)) {
4297		/* On BE3, enable VEB only when SRIOV is enabled */
4298		status = be_cmd_set_hsw_config(adapter, 0, 0,
4299					       adapter->if_handle,
4300					       PORT_FWD_TYPE_VEB, 0);
4301		if (status)
4302			goto err;
4303	}
4304
4305	adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4306	return 0;
4307err:
4308	dev_err(dev, "VF setup failed\n");
4309	be_vf_clear(adapter);
4310	return status;
4311}
4312
4313/* Converting function_mode bits on BE3 to SH mc_type enums */
4314
4315static u8 be_convert_mc_type(u32 function_mode)
4316{
4317	if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4318		return vNIC1;
4319	else if (function_mode & QNQ_MODE)
4320		return FLEX10;
4321	else if (function_mode & VNIC_MODE)
4322		return vNIC2;
4323	else if (function_mode & UMC_ENABLED)
4324		return UMC;
4325	else
4326		return MC_NONE;
4327}
4328
4329/* On BE2/BE3 FW does not suggest the supported limits */
4330static void BEx_get_resources(struct be_adapter *adapter,
4331			      struct be_resources *res)
4332{
4333	bool use_sriov = adapter->num_vfs ? 1 : 0;
4334
4335	if (be_physfn(adapter))
4336		res->max_uc_mac = BE_UC_PMAC_COUNT;
4337	else
4338		res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4339
4340	adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4341
4342	if (be_is_mc(adapter)) {
4343		/* Assuming that there are 4 channels per port,
4344		 * when multi-channel is enabled
4345		 */
4346		if (be_is_qnq_mode(adapter))
4347			res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4348		else
4349			/* In a non-qnq multichannel mode, the pvid
4350			 * takes up one vlan entry
4351			 */
4352			res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4353	} else {
4354		res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4355	}
4356
4357	res->max_mcast_mac = BE_MAX_MC;
4358
4359	/* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4360	 * 2) Create multiple TX rings on a BE3-R multi-channel interface
4361	 *    *only* if it is RSS-capable.
4362	 */
4363	if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4364	    be_virtfn(adapter) ||
4365	    (be_is_mc(adapter) &&
4366	     !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4367		res->max_tx_qs = 1;
4368	} else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4369		struct be_resources super_nic_res = {0};
4370
4371		/* On a SuperNIC profile, the driver needs to use the
4372		 * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4373		 */
4374		be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4375					  ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4376					  0);
4377		/* Some old versions of BE3 FW don't report max_tx_qs value */
4378		res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4379	} else {
4380		res->max_tx_qs = BE3_MAX_TX_QS;
4381	}
4382
4383	if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4384	    !use_sriov && be_physfn(adapter))
4385		res->max_rss_qs = (adapter->be3_native) ?
4386					   BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4387	res->max_rx_qs = res->max_rss_qs + 1;
4388
4389	if (be_physfn(adapter))
4390		res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4391					BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4392	else
4393		res->max_evt_qs = 1;
4394
4395	res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4396	res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4397	if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4398		res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4399}
4400
4401static void be_setup_init(struct be_adapter *adapter)
4402{
4403	adapter->vlan_prio_bmap = 0xff;
4404	adapter->phy.link_speed = -1;
4405	adapter->if_handle = -1;
4406	adapter->be3_native = false;
4407	adapter->if_flags = 0;
4408	adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4409	if (be_physfn(adapter))
4410		adapter->cmd_privileges = MAX_PRIVILEGES;
4411	else
4412		adapter->cmd_privileges = MIN_PRIVILEGES;
4413}
4414
4415/* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4416 * However, this HW limitation is not exposed to the host via any SLI cmd.
4417 * As a result, in the case of SRIOV and in particular multi-partition configs
4418 * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4419 * for distribution between the VFs. This self-imposed limit will determine the
4420 * no: of VFs for which RSS can be enabled.
4421 */
4422static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4423{
4424	struct be_port_resources port_res = {0};
4425	u8 rss_tables_on_port;
4426	u16 max_vfs = be_max_vfs(adapter);
4427
4428	be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4429				  RESOURCE_LIMITS, 0);
4430
4431	rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4432
4433	/* Each PF Pool's RSS Tables limit =
4434	 * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4435	 */
4436	adapter->pool_res.max_rss_tables =
4437		max_vfs * rss_tables_on_port / port_res.max_vfs;
4438}
4439
4440static int be_get_sriov_config(struct be_adapter *adapter)
4441{
4442	struct be_resources res = {0};
4443	int max_vfs, old_vfs;
4444
4445	be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4446				  RESOURCE_LIMITS, 0);
4447
4448	/* Some old versions of BE3 FW don't report max_vfs value */
4449	if (BE3_chip(adapter) && !res.max_vfs) {
4450		max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4451		res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4452	}
4453
4454	adapter->pool_res = res;
4455
4456	/* If during previous unload of the driver, the VFs were not disabled,
4457	 * then we cannot rely on the PF POOL limits for the TotalVFs value.
4458	 * Instead use the TotalVFs value stored in the pci-dev struct.
4459	 */
4460	old_vfs = pci_num_vf(adapter->pdev);
4461	if (old_vfs) {
4462		dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4463			 old_vfs);
4464
4465		adapter->pool_res.max_vfs =
4466			pci_sriov_get_totalvfs(adapter->pdev);
4467		adapter->num_vfs = old_vfs;
4468	}
4469
4470	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4471		be_calculate_pf_pool_rss_tables(adapter);
4472		dev_info(&adapter->pdev->dev,
4473			 "RSS can be enabled for all VFs if num_vfs <= %d\n",
4474			 be_max_pf_pool_rss_tables(adapter));
4475	}
4476	return 0;
4477}
4478
4479static void be_alloc_sriov_res(struct be_adapter *adapter)
4480{
4481	int old_vfs = pci_num_vf(adapter->pdev);
4482	struct  be_resources vft_res = {0};
4483	int status;
4484
4485	be_get_sriov_config(adapter);
4486
4487	if (!old_vfs)
4488		pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4489
4490	/* When the HW is in SRIOV capable configuration, the PF-pool
4491	 * resources are given to PF during driver load, if there are no
4492	 * old VFs. This facility is not available in BE3 FW.
4493	 * Also, this is done by FW in Lancer chip.
4494	 */
4495	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4496		be_calculate_vf_res(adapter, 0, &vft_res);
4497		status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4498						 &vft_res);
4499		if (status)
4500			dev_err(&adapter->pdev->dev,
4501				"Failed to optimize SRIOV resources\n");
4502	}
4503}
4504
4505static int be_get_resources(struct be_adapter *adapter)
4506{
4507	struct device *dev = &adapter->pdev->dev;
4508	struct be_resources res = {0};
4509	int status;
4510
4511	/* For Lancer, SH etc read per-function resource limits from FW.
4512	 * GET_FUNC_CONFIG returns per function guaranteed limits.
4513	 * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4514	 */
4515	if (BEx_chip(adapter)) {
4516		BEx_get_resources(adapter, &res);
4517	} else {
4518		status = be_cmd_get_func_config(adapter, &res);
4519		if (status)
4520			return status;
4521
4522		/* If a deafault RXQ must be created, we'll use up one RSSQ*/
4523		if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4524		    !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4525			res.max_rss_qs -= 1;
4526	}
4527
4528	/* If RoCE is supported stash away half the EQs for RoCE */
4529	res.max_nic_evt_qs = be_roce_supported(adapter) ?
4530				res.max_evt_qs / 2 : res.max_evt_qs;
4531	adapter->res = res;
4532
4533	/* If FW supports RSS default queue, then skip creating non-RSS
4534	 * queue for non-IP traffic.
4535	 */
4536	adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4537				 BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4538
4539	dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4540		 be_max_txqs(adapter), be_max_rxqs(adapter),
4541		 be_max_rss(adapter), be_max_nic_eqs(adapter),
4542		 be_max_vfs(adapter));
4543	dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4544		 be_max_uc(adapter), be_max_mc(adapter),
4545		 be_max_vlans(adapter));
4546
4547	/* Ensure RX and TX queues are created in pairs at init time */
4548	adapter->cfg_num_rx_irqs =
4549				min_t(u16, netif_get_num_default_rss_queues(),
4550				      be_max_qp_irqs(adapter));
4551	adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4552	return 0;
4553}
4554
4555static int be_get_config(struct be_adapter *adapter)
4556{
4557	int status, level;
4558	u16 profile_id;
4559
4560	status = be_cmd_get_cntl_attributes(adapter);
4561	if (status)
4562		return status;
4563
4564	status = be_cmd_query_fw_cfg(adapter);
4565	if (status)
4566		return status;
4567
4568	if (!lancer_chip(adapter) && be_physfn(adapter))
4569		be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4570
4571	if (BEx_chip(adapter)) {
4572		level = be_cmd_get_fw_log_level(adapter);
4573		adapter->msg_enable =
4574			level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4575	}
4576
4577	be_cmd_get_acpi_wol_cap(adapter);
4578	pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4579	pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4580
4581	be_cmd_query_port_name(adapter);
4582
4583	if (be_physfn(adapter)) {
4584		status = be_cmd_get_active_profile(adapter, &profile_id);
4585		if (!status)
4586			dev_info(&adapter->pdev->dev,
4587				 "Using profile 0x%x\n", profile_id);
4588	}
4589
4590	return 0;
4591}
4592
4593static int be_mac_setup(struct be_adapter *adapter)
4594{
4595	u8 mac[ETH_ALEN];
4596	int status;
4597
4598	if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4599		status = be_cmd_get_perm_mac(adapter, mac);
4600		if (status)
4601			return status;
4602
4603		eth_hw_addr_set(adapter->netdev, mac);
4604		memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4605
4606		/* Initial MAC for BE3 VFs is already programmed by PF */
4607		if (BEx_chip(adapter) && be_virtfn(adapter))
4608			memcpy(adapter->dev_mac, mac, ETH_ALEN);
4609	}
4610
4611	return 0;
4612}
4613
4614static void be_schedule_worker(struct be_adapter *adapter)
4615{
4616	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4617	adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4618}
4619
4620static void be_destroy_err_recovery_workq(void)
4621{
4622	if (!be_err_recovery_workq)
4623		return;
4624
 
4625	destroy_workqueue(be_err_recovery_workq);
4626	be_err_recovery_workq = NULL;
4627}
4628
4629static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4630{
4631	struct be_error_recovery *err_rec = &adapter->error_recovery;
4632
4633	if (!be_err_recovery_workq)
4634		return;
4635
4636	queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4637			   msecs_to_jiffies(delay));
4638	adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4639}
4640
4641static int be_setup_queues(struct be_adapter *adapter)
4642{
4643	struct net_device *netdev = adapter->netdev;
4644	int status;
4645
4646	status = be_evt_queues_create(adapter);
4647	if (status)
4648		goto err;
4649
4650	status = be_tx_qs_create(adapter);
4651	if (status)
4652		goto err;
4653
4654	status = be_rx_cqs_create(adapter);
4655	if (status)
4656		goto err;
4657
4658	status = be_mcc_queues_create(adapter);
4659	if (status)
4660		goto err;
4661
4662	status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4663	if (status)
4664		goto err;
4665
4666	status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4667	if (status)
4668		goto err;
4669
4670	return 0;
4671err:
4672	dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4673	return status;
4674}
4675
4676static int be_if_create(struct be_adapter *adapter)
4677{
4678	u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4679	u32 cap_flags = be_if_cap_flags(adapter);
 
4680
4681	/* alloc required memory for other filtering fields */
4682	adapter->pmac_id = kcalloc(be_max_uc(adapter),
4683				   sizeof(*adapter->pmac_id), GFP_KERNEL);
4684	if (!adapter->pmac_id)
4685		return -ENOMEM;
4686
4687	adapter->mc_list = kcalloc(be_max_mc(adapter),
4688				   sizeof(*adapter->mc_list), GFP_KERNEL);
4689	if (!adapter->mc_list)
4690		return -ENOMEM;
4691
4692	adapter->uc_list = kcalloc(be_max_uc(adapter),
4693				   sizeof(*adapter->uc_list), GFP_KERNEL);
4694	if (!adapter->uc_list)
4695		return -ENOMEM;
4696
4697	if (adapter->cfg_num_rx_irqs == 1)
4698		cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4699
4700	en_flags &= cap_flags;
4701	/* will enable all the needed filter flags in be_open() */
4702	return be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4703				  &adapter->if_handle, 0);
 
 
 
 
 
4704}
4705
4706int be_update_queues(struct be_adapter *adapter)
4707{
4708	struct net_device *netdev = adapter->netdev;
4709	int status;
4710
4711	if (netif_running(netdev)) {
4712		/* be_tx_timeout() must not run concurrently with this
4713		 * function, synchronize with an already-running dev_watchdog
4714		 */
4715		netif_tx_lock_bh(netdev);
4716		/* device cannot transmit now, avoid dev_watchdog timeouts */
4717		netif_carrier_off(netdev);
4718		netif_tx_unlock_bh(netdev);
4719
4720		be_close(netdev);
4721	}
4722
4723	be_cancel_worker(adapter);
4724
4725	/* If any vectors have been shared with RoCE we cannot re-program
4726	 * the MSIx table.
4727	 */
4728	if (!adapter->num_msix_roce_vec)
4729		be_msix_disable(adapter);
4730
4731	be_clear_queues(adapter);
4732	status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4733	if (status)
4734		return status;
4735
4736	if (!msix_enabled(adapter)) {
4737		status = be_msix_enable(adapter);
4738		if (status)
4739			return status;
4740	}
4741
4742	status = be_if_create(adapter);
4743	if (status)
4744		return status;
4745
4746	status = be_setup_queues(adapter);
4747	if (status)
4748		return status;
4749
4750	be_schedule_worker(adapter);
4751
4752	/* The IF was destroyed and re-created. We need to clear
4753	 * all promiscuous flags valid for the destroyed IF.
4754	 * Without this promisc mode is not restored during
4755	 * be_open() because the driver thinks that it is
4756	 * already enabled in HW.
4757	 */
4758	adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
4759
4760	if (netif_running(netdev))
4761		status = be_open(netdev);
4762
4763	return status;
4764}
4765
4766static inline int fw_major_num(const char *fw_ver)
4767{
4768	int fw_major = 0, i;
4769
4770	i = sscanf(fw_ver, "%d.", &fw_major);
4771	if (i != 1)
4772		return 0;
4773
4774	return fw_major;
4775}
4776
4777/* If it is error recovery, FLR the PF
4778 * Else if any VFs are already enabled don't FLR the PF
4779 */
4780static bool be_reset_required(struct be_adapter *adapter)
4781{
4782	if (be_error_recovering(adapter))
4783		return true;
4784	else
4785		return pci_num_vf(adapter->pdev) == 0;
4786}
4787
4788/* Wait for the FW to be ready and perform the required initialization */
4789static int be_func_init(struct be_adapter *adapter)
4790{
4791	int status;
4792
4793	status = be_fw_wait_ready(adapter);
4794	if (status)
4795		return status;
4796
4797	/* FW is now ready; clear errors to allow cmds/doorbell */
4798	be_clear_error(adapter, BE_CLEAR_ALL);
4799
4800	if (be_reset_required(adapter)) {
4801		status = be_cmd_reset_function(adapter);
4802		if (status)
4803			return status;
4804
4805		/* Wait for interrupts to quiesce after an FLR */
4806		msleep(100);
4807	}
4808
4809	/* Tell FW we're ready to fire cmds */
4810	status = be_cmd_fw_init(adapter);
4811	if (status)
4812		return status;
4813
4814	/* Allow interrupts for other ULPs running on NIC function */
4815	be_intr_set(adapter, true);
4816
4817	return 0;
4818}
4819
4820static int be_setup(struct be_adapter *adapter)
4821{
4822	struct device *dev = &adapter->pdev->dev;
4823	int status;
4824
4825	status = be_func_init(adapter);
4826	if (status)
4827		return status;
4828
4829	be_setup_init(adapter);
4830
4831	if (!lancer_chip(adapter))
4832		be_cmd_req_native_mode(adapter);
4833
4834	/* invoke this cmd first to get pf_num and vf_num which are needed
4835	 * for issuing profile related cmds
4836	 */
4837	if (!BEx_chip(adapter)) {
4838		status = be_cmd_get_func_config(adapter, NULL);
4839		if (status)
4840			return status;
4841	}
4842
4843	status = be_get_config(adapter);
4844	if (status)
4845		goto err;
4846
4847	if (!BE2_chip(adapter) && be_physfn(adapter))
4848		be_alloc_sriov_res(adapter);
4849
4850	status = be_get_resources(adapter);
4851	if (status)
4852		goto err;
4853
4854	status = be_msix_enable(adapter);
4855	if (status)
4856		goto err;
4857
4858	/* will enable all the needed filter flags in be_open() */
4859	status = be_if_create(adapter);
4860	if (status)
4861		goto err;
4862
4863	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4864	rtnl_lock();
4865	status = be_setup_queues(adapter);
4866	rtnl_unlock();
4867	if (status)
4868		goto err;
4869
4870	be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4871
4872	status = be_mac_setup(adapter);
4873	if (status)
4874		goto err;
4875
4876	be_cmd_get_fw_ver(adapter);
4877	dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4878
4879	if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4880		dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4881			adapter->fw_ver);
4882		dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4883	}
4884
4885	status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4886					 adapter->rx_fc);
4887	if (status)
4888		be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4889					&adapter->rx_fc);
4890
4891	dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4892		 adapter->tx_fc, adapter->rx_fc);
4893
4894	if (be_physfn(adapter))
4895		be_cmd_set_logical_link_config(adapter,
4896					       IFLA_VF_LINK_STATE_AUTO, 0);
4897
4898	/* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4899	 * confusing a linux bridge or OVS that it might be connected to.
4900	 * Set the EVB to PASSTHRU mode which effectively disables the EVB
4901	 * when SRIOV is not enabled.
4902	 */
4903	if (BE3_chip(adapter))
4904		be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4905				      PORT_FWD_TYPE_PASSTHRU, 0);
4906
4907	if (adapter->num_vfs)
4908		be_vf_setup(adapter);
4909
4910	status = be_cmd_get_phy_info(adapter);
4911	if (!status && be_pause_supported(adapter))
4912		adapter->phy.fc_autoneg = 1;
4913
4914	if (be_physfn(adapter) && !lancer_chip(adapter))
4915		be_cmd_set_features(adapter);
4916
4917	be_schedule_worker(adapter);
4918	adapter->flags |= BE_FLAGS_SETUP_DONE;
4919	return 0;
4920err:
4921	be_clear(adapter);
4922	return status;
4923}
4924
4925#ifdef CONFIG_NET_POLL_CONTROLLER
4926static void be_netpoll(struct net_device *netdev)
4927{
4928	struct be_adapter *adapter = netdev_priv(netdev);
4929	struct be_eq_obj *eqo;
4930	int i;
4931
4932	for_all_evt_queues(adapter, eqo, i) {
4933		be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4934		napi_schedule(&eqo->napi);
4935	}
4936}
4937#endif
4938
4939int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4940{
4941	const struct firmware *fw;
4942	int status;
4943
4944	if (!netif_running(adapter->netdev)) {
4945		dev_err(&adapter->pdev->dev,
4946			"Firmware load not allowed (interface is down)\n");
4947		return -ENETDOWN;
4948	}
4949
4950	status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4951	if (status)
4952		goto fw_exit;
4953
4954	dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4955
4956	if (lancer_chip(adapter))
4957		status = lancer_fw_download(adapter, fw);
4958	else
4959		status = be_fw_download(adapter, fw);
4960
4961	if (!status)
4962		be_cmd_get_fw_ver(adapter);
4963
4964fw_exit:
4965	release_firmware(fw);
4966	return status;
4967}
4968
4969static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4970				 u16 flags, struct netlink_ext_ack *extack)
4971{
4972	struct be_adapter *adapter = netdev_priv(dev);
4973	struct nlattr *attr, *br_spec;
4974	int rem;
4975	int status = 0;
4976	u16 mode = 0;
4977
4978	if (!sriov_enabled(adapter))
4979		return -EOPNOTSUPP;
4980
4981	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4982	if (!br_spec)
4983		return -EINVAL;
4984
4985	nla_for_each_nested(attr, br_spec, rem) {
4986		if (nla_type(attr) != IFLA_BRIDGE_MODE)
4987			continue;
4988
 
 
 
4989		mode = nla_get_u16(attr);
4990		if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4991			return -EOPNOTSUPP;
4992
4993		if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4994			return -EINVAL;
4995
4996		status = be_cmd_set_hsw_config(adapter, 0, 0,
4997					       adapter->if_handle,
4998					       mode == BRIDGE_MODE_VEPA ?
4999					       PORT_FWD_TYPE_VEPA :
5000					       PORT_FWD_TYPE_VEB, 0);
5001		if (status)
5002			goto err;
5003
5004		dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
5005			 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
5006
5007		return status;
5008	}
5009err:
5010	dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
5011		mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
5012
5013	return status;
5014}
5015
5016static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
5017				 struct net_device *dev, u32 filter_mask,
5018				 int nlflags)
5019{
5020	struct be_adapter *adapter = netdev_priv(dev);
5021	int status = 0;
5022	u8 hsw_mode;
5023
5024	/* BE and Lancer chips support VEB mode only */
5025	if (BEx_chip(adapter) || lancer_chip(adapter)) {
5026		/* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
5027		if (!pci_sriov_get_totalvfs(adapter->pdev))
5028			return 0;
5029		hsw_mode = PORT_FWD_TYPE_VEB;
5030	} else {
5031		status = be_cmd_get_hsw_config(adapter, NULL, 0,
5032					       adapter->if_handle, &hsw_mode,
5033					       NULL);
5034		if (status)
5035			return 0;
5036
5037		if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
5038			return 0;
5039	}
5040
5041	return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
5042				       hsw_mode == PORT_FWD_TYPE_VEPA ?
5043				       BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
5044				       0, 0, nlflags, filter_mask, NULL);
5045}
5046
5047static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
5048					 void (*func)(struct work_struct *))
5049{
5050	struct be_cmd_work *work;
5051
5052	work = kzalloc(sizeof(*work), GFP_ATOMIC);
5053	if (!work) {
5054		dev_err(&adapter->pdev->dev,
5055			"be_work memory allocation failed\n");
5056		return NULL;
5057	}
5058
5059	INIT_WORK(&work->work, func);
5060	work->adapter = adapter;
5061	return work;
5062}
5063
5064static netdev_features_t be_features_check(struct sk_buff *skb,
5065					   struct net_device *dev,
5066					   netdev_features_t features)
5067{
5068	struct be_adapter *adapter = netdev_priv(dev);
5069	u8 l4_hdr = 0;
5070
5071	if (skb_is_gso(skb)) {
5072		/* IPv6 TSO requests with extension hdrs are a problem
5073		 * to Lancer and BE3 HW. Disable TSO6 feature.
5074		 */
5075		if (!skyhawk_chip(adapter) && is_ipv6_ext_hdr(skb))
5076			features &= ~NETIF_F_TSO6;
5077
5078		/* Lancer cannot handle the packet with MSS less than 256.
5079		 * Also it can't handle a TSO packet with a single segment
5080		 * Disable the GSO support in such cases
5081		 */
5082		if (lancer_chip(adapter) &&
5083		    (skb_shinfo(skb)->gso_size < 256 ||
5084		     skb_shinfo(skb)->gso_segs == 1))
5085			features &= ~NETIF_F_GSO_MASK;
5086	}
5087
5088	/* The code below restricts offload features for some tunneled and
5089	 * Q-in-Q packets.
5090	 * Offload features for normal (non tunnel) packets are unchanged.
5091	 */
5092	features = vlan_features_check(skb, features);
5093	if (!skb->encapsulation ||
5094	    !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5095		return features;
5096
5097	/* It's an encapsulated packet and VxLAN offloads are enabled. We
5098	 * should disable tunnel offload features if it's not a VxLAN packet,
5099	 * as tunnel offloads have been enabled only for VxLAN. This is done to
5100	 * allow other tunneled traffic like GRE work fine while VxLAN
5101	 * offloads are configured in Skyhawk-R.
5102	 */
5103	switch (vlan_get_protocol(skb)) {
5104	case htons(ETH_P_IP):
5105		l4_hdr = ip_hdr(skb)->protocol;
5106		break;
5107	case htons(ETH_P_IPV6):
5108		l4_hdr = ipv6_hdr(skb)->nexthdr;
5109		break;
5110	default:
5111		return features;
5112	}
5113
5114	if (l4_hdr != IPPROTO_UDP ||
5115	    skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5116	    skb->inner_protocol != htons(ETH_P_TEB) ||
5117	    skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5118		sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5119	    !adapter->vxlan_port ||
5120	    udp_hdr(skb)->dest != adapter->vxlan_port)
5121		return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5122
5123	return features;
5124}
5125
5126static int be_get_phys_port_id(struct net_device *dev,
5127			       struct netdev_phys_item_id *ppid)
5128{
5129	int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5130	struct be_adapter *adapter = netdev_priv(dev);
5131	u8 *id;
5132
5133	if (MAX_PHYS_ITEM_ID_LEN < id_len)
5134		return -ENOSPC;
5135
5136	ppid->id[0] = adapter->hba_port_num + 1;
5137	id = &ppid->id[1];
5138	for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5139	     i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5140		memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5141
5142	ppid->id_len = id_len;
5143
5144	return 0;
5145}
5146
5147static void be_set_rx_mode(struct net_device *dev)
5148{
5149	struct be_adapter *adapter = netdev_priv(dev);
5150	struct be_cmd_work *work;
5151
5152	work = be_alloc_work(adapter, be_work_set_rx_mode);
5153	if (work)
5154		queue_work(be_wq, &work->work);
5155}
5156
5157static const struct net_device_ops be_netdev_ops = {
5158	.ndo_open		= be_open,
5159	.ndo_stop		= be_close,
5160	.ndo_start_xmit		= be_xmit,
5161	.ndo_set_rx_mode	= be_set_rx_mode,
5162	.ndo_set_mac_address	= be_mac_addr_set,
5163	.ndo_get_stats64	= be_get_stats64,
5164	.ndo_validate_addr	= eth_validate_addr,
5165	.ndo_vlan_rx_add_vid	= be_vlan_add_vid,
5166	.ndo_vlan_rx_kill_vid	= be_vlan_rem_vid,
5167	.ndo_set_vf_mac		= be_set_vf_mac,
5168	.ndo_set_vf_vlan	= be_set_vf_vlan,
5169	.ndo_set_vf_rate	= be_set_vf_tx_rate,
5170	.ndo_get_vf_config	= be_get_vf_config,
5171	.ndo_set_vf_link_state  = be_set_vf_link_state,
5172	.ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5173	.ndo_tx_timeout		= be_tx_timeout,
5174#ifdef CONFIG_NET_POLL_CONTROLLER
5175	.ndo_poll_controller	= be_netpoll,
5176#endif
5177	.ndo_bridge_setlink	= be_ndo_bridge_setlink,
5178	.ndo_bridge_getlink	= be_ndo_bridge_getlink,
 
 
5179	.ndo_features_check	= be_features_check,
5180	.ndo_get_phys_port_id   = be_get_phys_port_id,
5181};
5182
5183static void be_netdev_init(struct net_device *netdev)
5184{
5185	struct be_adapter *adapter = netdev_priv(netdev);
5186
5187	netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5188		NETIF_F_GSO_UDP_TUNNEL |
5189		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5190		NETIF_F_HW_VLAN_CTAG_TX;
5191	if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5192		netdev->hw_features |= NETIF_F_RXHASH;
5193
5194	netdev->features |= netdev->hw_features |
5195		NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER |
5196		NETIF_F_HIGHDMA;
5197
5198	netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5199		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5200
5201	netdev->priv_flags |= IFF_UNICAST_FLT;
5202
5203	netdev->flags |= IFF_MULTICAST;
5204
5205	netif_set_tso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5206
5207	netdev->netdev_ops = &be_netdev_ops;
5208
5209	netdev->ethtool_ops = &be_ethtool_ops;
5210
5211	if (!lancer_chip(adapter) && !BEx_chip(adapter) && !be_is_mc(adapter))
5212		netdev->udp_tunnel_nic_info = &be_udp_tunnels;
5213
5214	/* MTU range: 256 - 9000 */
5215	netdev->min_mtu = BE_MIN_MTU;
5216	netdev->max_mtu = BE_MAX_MTU;
5217}
5218
5219static void be_cleanup(struct be_adapter *adapter)
5220{
5221	struct net_device *netdev = adapter->netdev;
5222
5223	rtnl_lock();
5224	netif_device_detach(netdev);
5225	if (netif_running(netdev))
5226		be_close(netdev);
5227	rtnl_unlock();
5228
5229	be_clear(adapter);
5230}
5231
5232static int be_resume(struct be_adapter *adapter)
5233{
5234	struct net_device *netdev = adapter->netdev;
5235	int status;
5236
5237	status = be_setup(adapter);
5238	if (status)
5239		return status;
5240
5241	rtnl_lock();
5242	if (netif_running(netdev))
5243		status = be_open(netdev);
5244	rtnl_unlock();
5245
5246	if (status)
5247		return status;
5248
5249	netif_device_attach(netdev);
5250
5251	return 0;
5252}
5253
5254static void be_soft_reset(struct be_adapter *adapter)
5255{
5256	u32 val;
5257
5258	dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5259	val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5260	val |= SLIPORT_SOFTRESET_SR_MASK;
5261	iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5262}
5263
5264static bool be_err_is_recoverable(struct be_adapter *adapter)
5265{
5266	struct be_error_recovery *err_rec = &adapter->error_recovery;
5267	unsigned long initial_idle_time =
5268		msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5269	unsigned long recovery_interval =
5270		msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5271	u16 ue_err_code;
5272	u32 val;
5273
5274	val = be_POST_stage_get(adapter);
5275	if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5276		return false;
5277	ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5278	if (ue_err_code == 0)
5279		return false;
5280
5281	dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5282		ue_err_code);
5283
5284	if (time_before_eq(jiffies - err_rec->probe_time, initial_idle_time)) {
5285		dev_err(&adapter->pdev->dev,
5286			"Cannot recover within %lu sec from driver load\n",
5287			jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5288		return false;
5289	}
5290
5291	if (err_rec->last_recovery_time && time_before_eq(
5292		jiffies - err_rec->last_recovery_time, recovery_interval)) {
5293		dev_err(&adapter->pdev->dev,
5294			"Cannot recover within %lu sec from last recovery\n",
5295			jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5296		return false;
5297	}
5298
5299	if (ue_err_code == err_rec->last_err_code) {
5300		dev_err(&adapter->pdev->dev,
5301			"Cannot recover from a consecutive TPE error\n");
5302		return false;
5303	}
5304
5305	err_rec->last_recovery_time = jiffies;
5306	err_rec->last_err_code = ue_err_code;
5307	return true;
5308}
5309
5310static int be_tpe_recover(struct be_adapter *adapter)
5311{
5312	struct be_error_recovery *err_rec = &adapter->error_recovery;
5313	int status = -EAGAIN;
5314	u32 val;
5315
5316	switch (err_rec->recovery_state) {
5317	case ERR_RECOVERY_ST_NONE:
5318		err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5319		err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5320		break;
5321
5322	case ERR_RECOVERY_ST_DETECT:
5323		val = be_POST_stage_get(adapter);
5324		if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5325		    POST_STAGE_RECOVERABLE_ERR) {
5326			dev_err(&adapter->pdev->dev,
5327				"Unrecoverable HW error detected: 0x%x\n", val);
5328			status = -EINVAL;
5329			err_rec->resched_delay = 0;
5330			break;
5331		}
5332
5333		dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5334
5335		/* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5336		 * milliseconds before it checks for final error status in
5337		 * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5338		 * If it does, then PF0 initiates a Soft Reset.
5339		 */
5340		if (adapter->pf_num == 0) {
5341			err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5342			err_rec->resched_delay = err_rec->ue_to_reset_time -
5343					ERR_RECOVERY_UE_DETECT_DURATION;
5344			break;
5345		}
5346
5347		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5348		err_rec->resched_delay = err_rec->ue_to_poll_time -
5349					ERR_RECOVERY_UE_DETECT_DURATION;
5350		break;
5351
5352	case ERR_RECOVERY_ST_RESET:
5353		if (!be_err_is_recoverable(adapter)) {
5354			dev_err(&adapter->pdev->dev,
5355				"Failed to meet recovery criteria\n");
5356			status = -EIO;
5357			err_rec->resched_delay = 0;
5358			break;
5359		}
5360		be_soft_reset(adapter);
5361		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5362		err_rec->resched_delay = err_rec->ue_to_poll_time -
5363					err_rec->ue_to_reset_time;
5364		break;
5365
5366	case ERR_RECOVERY_ST_PRE_POLL:
5367		err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5368		err_rec->resched_delay = 0;
5369		status = 0;			/* done */
5370		break;
5371
5372	default:
5373		status = -EINVAL;
5374		err_rec->resched_delay = 0;
5375		break;
5376	}
5377
5378	return status;
5379}
5380
5381static int be_err_recover(struct be_adapter *adapter)
5382{
5383	int status;
5384
5385	if (!lancer_chip(adapter)) {
5386		if (!adapter->error_recovery.recovery_supported ||
5387		    adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5388			return -EIO;
5389		status = be_tpe_recover(adapter);
5390		if (status)
5391			goto err;
5392	}
5393
5394	/* Wait for adapter to reach quiescent state before
5395	 * destroying queues
5396	 */
5397	status = be_fw_wait_ready(adapter);
5398	if (status)
5399		goto err;
5400
5401	adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5402
5403	be_cleanup(adapter);
5404
5405	status = be_resume(adapter);
5406	if (status)
5407		goto err;
5408
5409	adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5410
5411err:
5412	return status;
5413}
5414
5415static void be_err_detection_task(struct work_struct *work)
5416{
5417	struct be_error_recovery *err_rec =
5418			container_of(work, struct be_error_recovery,
5419				     err_detection_work.work);
5420	struct be_adapter *adapter =
5421			container_of(err_rec, struct be_adapter,
5422				     error_recovery);
5423	u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5424	struct device *dev = &adapter->pdev->dev;
5425	int recovery_status;
5426
5427	be_detect_error(adapter);
5428	if (!be_check_error(adapter, BE_ERROR_HW))
5429		goto reschedule_task;
5430
5431	recovery_status = be_err_recover(adapter);
5432	if (!recovery_status) {
5433		err_rec->recovery_retries = 0;
5434		err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5435		dev_info(dev, "Adapter recovery successful\n");
5436		goto reschedule_task;
5437	} else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5438		/* BEx/SH recovery state machine */
5439		if (adapter->pf_num == 0 &&
5440		    err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5441			dev_err(&adapter->pdev->dev,
5442				"Adapter recovery in progress\n");
5443		resched_delay = err_rec->resched_delay;
5444		goto reschedule_task;
5445	} else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5446		/* For VFs, check if PF have allocated resources
5447		 * every second.
5448		 */
5449		dev_err(dev, "Re-trying adapter recovery\n");
5450		goto reschedule_task;
5451	} else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5452		   ERR_RECOVERY_MAX_RETRY_COUNT) {
5453		/* In case of another error during recovery, it takes 30 sec
5454		 * for adapter to come out of error. Retry error recovery after
5455		 * this time interval.
5456		 */
5457		dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5458		resched_delay = ERR_RECOVERY_RETRY_DELAY;
5459		goto reschedule_task;
5460	} else {
5461		dev_err(dev, "Adapter recovery failed\n");
5462		dev_err(dev, "Please reboot server to recover\n");
5463	}
5464
5465	return;
5466
5467reschedule_task:
5468	be_schedule_err_detection(adapter, resched_delay);
5469}
5470
5471static void be_log_sfp_info(struct be_adapter *adapter)
5472{
5473	int status;
5474
5475	status = be_cmd_query_sfp_info(adapter);
5476	if (!status) {
5477		dev_err(&adapter->pdev->dev,
5478			"Port %c: %s Vendor: %s part no: %s",
5479			adapter->port_name,
5480			be_misconfig_evt_port_state[adapter->phy_state],
5481			adapter->phy.vendor_name,
5482			adapter->phy.vendor_pn);
5483	}
5484	adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5485}
5486
5487static void be_worker(struct work_struct *work)
5488{
5489	struct be_adapter *adapter =
5490		container_of(work, struct be_adapter, work.work);
5491	struct be_rx_obj *rxo;
5492	int i;
5493
5494	if (be_physfn(adapter) &&
5495	    MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5496		be_cmd_get_die_temperature(adapter);
5497
5498	/* when interrupts are not yet enabled, just reap any pending
5499	 * mcc completions
5500	 */
5501	if (!netif_running(adapter->netdev)) {
5502		local_bh_disable();
5503		be_process_mcc(adapter);
5504		local_bh_enable();
5505		goto reschedule;
5506	}
5507
5508	if (!adapter->stats_cmd_sent) {
5509		if (lancer_chip(adapter))
5510			lancer_cmd_get_pport_stats(adapter,
5511						   &adapter->stats_cmd);
5512		else
5513			be_cmd_get_stats(adapter, &adapter->stats_cmd);
5514	}
5515
5516	for_all_rx_queues(adapter, rxo, i) {
5517		/* Replenish RX-queues starved due to memory
5518		 * allocation failures.
5519		 */
5520		if (rxo->rx_post_starved)
5521			be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5522	}
5523
5524	/* EQ-delay update for Skyhawk is done while notifying EQ */
5525	if (!skyhawk_chip(adapter))
5526		be_eqd_update(adapter, false);
5527
5528	if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5529		be_log_sfp_info(adapter);
5530
5531reschedule:
5532	adapter->work_counter++;
5533	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5534}
5535
5536static void be_unmap_pci_bars(struct be_adapter *adapter)
5537{
5538	if (adapter->csr)
5539		pci_iounmap(adapter->pdev, adapter->csr);
5540	if (adapter->db)
5541		pci_iounmap(adapter->pdev, adapter->db);
5542	if (adapter->pcicfg && adapter->pcicfg_mapped)
5543		pci_iounmap(adapter->pdev, adapter->pcicfg);
5544}
5545
5546static int db_bar(struct be_adapter *adapter)
5547{
5548	if (lancer_chip(adapter) || be_virtfn(adapter))
5549		return 0;
5550	else
5551		return 4;
5552}
5553
5554static int be_roce_map_pci_bars(struct be_adapter *adapter)
5555{
5556	if (skyhawk_chip(adapter)) {
5557		adapter->roce_db.size = 4096;
5558		adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5559							      db_bar(adapter));
5560		adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5561							       db_bar(adapter));
5562	}
5563	return 0;
5564}
5565
5566static int be_map_pci_bars(struct be_adapter *adapter)
5567{
5568	struct pci_dev *pdev = adapter->pdev;
5569	u8 __iomem *addr;
5570	u32 sli_intf;
5571
5572	pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5573	adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5574				SLI_INTF_FAMILY_SHIFT;
5575	adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5576
5577	if (BEx_chip(adapter) && be_physfn(adapter)) {
5578		adapter->csr = pci_iomap(pdev, 2, 0);
5579		if (!adapter->csr)
5580			return -ENOMEM;
5581	}
5582
5583	addr = pci_iomap(pdev, db_bar(adapter), 0);
5584	if (!addr)
5585		goto pci_map_err;
5586	adapter->db = addr;
5587
5588	if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5589		if (be_physfn(adapter)) {
5590			/* PCICFG is the 2nd BAR in BE2 */
5591			addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5592			if (!addr)
5593				goto pci_map_err;
5594			adapter->pcicfg = addr;
5595			adapter->pcicfg_mapped = true;
5596		} else {
5597			adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5598			adapter->pcicfg_mapped = false;
5599		}
5600	}
5601
5602	be_roce_map_pci_bars(adapter);
5603	return 0;
5604
5605pci_map_err:
5606	dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5607	be_unmap_pci_bars(adapter);
5608	return -ENOMEM;
5609}
5610
5611static void be_drv_cleanup(struct be_adapter *adapter)
5612{
5613	struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5614	struct device *dev = &adapter->pdev->dev;
5615
5616	if (mem->va)
5617		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5618
5619	mem = &adapter->rx_filter;
5620	if (mem->va)
5621		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5622
5623	mem = &adapter->stats_cmd;
5624	if (mem->va)
5625		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5626}
5627
5628/* Allocate and initialize various fields in be_adapter struct */
5629static int be_drv_init(struct be_adapter *adapter)
5630{
5631	struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5632	struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5633	struct be_dma_mem *rx_filter = &adapter->rx_filter;
5634	struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5635	struct device *dev = &adapter->pdev->dev;
5636	int status = 0;
5637
5638	mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5639	mbox_mem_alloc->va = dma_alloc_coherent(dev, mbox_mem_alloc->size,
5640						&mbox_mem_alloc->dma,
5641						GFP_KERNEL);
5642	if (!mbox_mem_alloc->va)
5643		return -ENOMEM;
5644
5645	mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5646	mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5647	mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5648
5649	rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5650	rx_filter->va = dma_alloc_coherent(dev, rx_filter->size,
5651					   &rx_filter->dma, GFP_KERNEL);
5652	if (!rx_filter->va) {
5653		status = -ENOMEM;
5654		goto free_mbox;
5655	}
5656
5657	if (lancer_chip(adapter))
5658		stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5659	else if (BE2_chip(adapter))
5660		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5661	else if (BE3_chip(adapter))
5662		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5663	else
5664		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5665	stats_cmd->va = dma_alloc_coherent(dev, stats_cmd->size,
5666					   &stats_cmd->dma, GFP_KERNEL);
5667	if (!stats_cmd->va) {
5668		status = -ENOMEM;
5669		goto free_rx_filter;
5670	}
5671
5672	mutex_init(&adapter->mbox_lock);
5673	mutex_init(&adapter->mcc_lock);
5674	mutex_init(&adapter->rx_filter_lock);
5675	spin_lock_init(&adapter->mcc_cq_lock);
5676	init_completion(&adapter->et_cmd_compl);
5677
5678	pci_save_state(adapter->pdev);
5679
5680	INIT_DELAYED_WORK(&adapter->work, be_worker);
5681
5682	adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5683	adapter->error_recovery.resched_delay = 0;
5684	INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5685			  be_err_detection_task);
5686
5687	adapter->rx_fc = true;
5688	adapter->tx_fc = true;
5689
5690	/* Must be a power of 2 or else MODULO will BUG_ON */
5691	adapter->be_get_temp_freq = 64;
5692
5693	return 0;
5694
5695free_rx_filter:
5696	dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5697free_mbox:
5698	dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5699			  mbox_mem_alloc->dma);
5700	return status;
5701}
5702
5703static void be_remove(struct pci_dev *pdev)
5704{
5705	struct be_adapter *adapter = pci_get_drvdata(pdev);
5706
5707	if (!adapter)
5708		return;
5709
5710	be_roce_dev_remove(adapter);
5711	be_intr_set(adapter, false);
5712
5713	be_cancel_err_detection(adapter);
5714
5715	unregister_netdev(adapter->netdev);
5716
5717	be_clear(adapter);
5718
5719	if (!pci_vfs_assigned(adapter->pdev))
5720		be_cmd_reset_function(adapter);
5721
5722	/* tell fw we're done with firing cmds */
5723	be_cmd_fw_clean(adapter);
5724
5725	be_unmap_pci_bars(adapter);
5726	be_drv_cleanup(adapter);
5727
 
 
5728	pci_release_regions(pdev);
5729	pci_disable_device(pdev);
5730
5731	free_netdev(adapter->netdev);
5732}
5733
5734static ssize_t be_hwmon_show_temp(struct device *dev,
5735				  struct device_attribute *dev_attr,
5736				  char *buf)
5737{
5738	struct be_adapter *adapter = dev_get_drvdata(dev);
5739
5740	/* Unit: millidegree Celsius */
5741	if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5742		return -EIO;
5743	else
5744		return sprintf(buf, "%u\n",
5745			       adapter->hwmon_info.be_on_die_temp * 1000);
5746}
5747
5748static SENSOR_DEVICE_ATTR(temp1_input, 0444,
5749			  be_hwmon_show_temp, NULL, 1);
5750
5751static struct attribute *be_hwmon_attrs[] = {
5752	&sensor_dev_attr_temp1_input.dev_attr.attr,
5753	NULL
5754};
5755
5756ATTRIBUTE_GROUPS(be_hwmon);
5757
5758static char *mc_name(struct be_adapter *adapter)
5759{
5760	char *str = "";	/* default */
5761
5762	switch (adapter->mc_type) {
5763	case UMC:
5764		str = "UMC";
5765		break;
5766	case FLEX10:
5767		str = "FLEX10";
5768		break;
5769	case vNIC1:
5770		str = "vNIC-1";
5771		break;
5772	case nPAR:
5773		str = "nPAR";
5774		break;
5775	case UFP:
5776		str = "UFP";
5777		break;
5778	case vNIC2:
5779		str = "vNIC-2";
5780		break;
5781	default:
5782		str = "";
5783	}
5784
5785	return str;
5786}
5787
5788static inline char *func_name(struct be_adapter *adapter)
5789{
5790	return be_physfn(adapter) ? "PF" : "VF";
5791}
5792
5793static inline char *nic_name(struct pci_dev *pdev)
5794{
5795	switch (pdev->device) {
5796	case OC_DEVICE_ID1:
5797		return OC_NAME;
5798	case OC_DEVICE_ID2:
5799		return OC_NAME_BE;
5800	case OC_DEVICE_ID3:
5801	case OC_DEVICE_ID4:
5802		return OC_NAME_LANCER;
5803	case BE_DEVICE_ID2:
5804		return BE3_NAME;
5805	case OC_DEVICE_ID5:
5806	case OC_DEVICE_ID6:
5807		return OC_NAME_SH;
5808	default:
5809		return BE_NAME;
5810	}
5811}
5812
5813static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5814{
5815	struct be_adapter *adapter;
5816	struct net_device *netdev;
5817	int status = 0;
5818
5819	status = pci_enable_device(pdev);
5820	if (status)
5821		goto do_none;
5822
5823	status = pci_request_regions(pdev, DRV_NAME);
5824	if (status)
5825		goto disable_dev;
5826	pci_set_master(pdev);
5827
5828	netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5829	if (!netdev) {
5830		status = -ENOMEM;
5831		goto rel_reg;
5832	}
5833	adapter = netdev_priv(netdev);
5834	adapter->pdev = pdev;
5835	pci_set_drvdata(pdev, adapter);
5836	adapter->netdev = netdev;
5837	SET_NETDEV_DEV(netdev, &pdev->dev);
5838
5839	status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5840	if (status) {
5841		dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5842		goto free_netdev;
 
 
 
 
 
5843	}
 
 
 
 
5844
5845	status = be_map_pci_bars(adapter);
5846	if (status)
5847		goto free_netdev;
5848
5849	status = be_drv_init(adapter);
5850	if (status)
5851		goto unmap_bars;
5852
5853	status = be_setup(adapter);
5854	if (status)
5855		goto drv_cleanup;
5856
5857	be_netdev_init(netdev);
5858	status = register_netdev(netdev);
5859	if (status != 0)
5860		goto unsetup;
5861
5862	be_roce_dev_add(adapter);
5863
5864	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5865	adapter->error_recovery.probe_time = jiffies;
5866
5867	/* On Die temperature not supported for VF. */
5868	if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5869		adapter->hwmon_info.hwmon_dev =
5870			devm_hwmon_device_register_with_groups(&pdev->dev,
5871							       DRV_NAME,
5872							       adapter,
5873							       be_hwmon_groups);
5874		adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5875	}
5876
5877	dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5878		 func_name(adapter), mc_name(adapter), adapter->port_name);
5879
5880	return 0;
5881
5882unsetup:
5883	be_clear(adapter);
5884drv_cleanup:
5885	be_drv_cleanup(adapter);
5886unmap_bars:
5887	be_unmap_pci_bars(adapter);
5888free_netdev:
5889	free_netdev(netdev);
5890rel_reg:
5891	pci_release_regions(pdev);
5892disable_dev:
5893	pci_disable_device(pdev);
5894do_none:
5895	dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5896	return status;
5897}
5898
5899static int __maybe_unused be_suspend(struct device *dev_d)
5900{
5901	struct be_adapter *adapter = dev_get_drvdata(dev_d);
5902
5903	be_intr_set(adapter, false);
5904	be_cancel_err_detection(adapter);
5905
5906	be_cleanup(adapter);
5907
5908	return 0;
5909}
5910
5911static int __maybe_unused be_pci_resume(struct device *dev_d)
5912{
5913	struct be_adapter *adapter = dev_get_drvdata(dev_d);
5914	int status = 0;
5915
5916	status = be_resume(adapter);
5917	if (status)
5918		return status;
5919
5920	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5921
5922	return 0;
5923}
5924
5925/*
5926 * An FLR will stop BE from DMAing any data.
5927 */
5928static void be_shutdown(struct pci_dev *pdev)
5929{
5930	struct be_adapter *adapter = pci_get_drvdata(pdev);
5931
5932	if (!adapter)
5933		return;
5934
5935	be_roce_dev_shutdown(adapter);
5936	cancel_delayed_work_sync(&adapter->work);
5937	be_cancel_err_detection(adapter);
5938
5939	netif_device_detach(adapter->netdev);
5940
5941	be_cmd_reset_function(adapter);
5942
5943	pci_disable_device(pdev);
5944}
5945
5946static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
5947					    pci_channel_state_t state)
5948{
5949	struct be_adapter *adapter = pci_get_drvdata(pdev);
5950
5951	dev_err(&adapter->pdev->dev, "EEH error detected\n");
5952
5953	be_roce_dev_remove(adapter);
5954
5955	if (!be_check_error(adapter, BE_ERROR_EEH)) {
5956		be_set_error(adapter, BE_ERROR_EEH);
5957
5958		be_cancel_err_detection(adapter);
5959
5960		be_cleanup(adapter);
5961	}
5962
5963	if (state == pci_channel_io_perm_failure)
5964		return PCI_ERS_RESULT_DISCONNECT;
5965
5966	pci_disable_device(pdev);
5967
5968	/* The error could cause the FW to trigger a flash debug dump.
5969	 * Resetting the card while flash dump is in progress
5970	 * can cause it not to recover; wait for it to finish.
5971	 * Wait only for first function as it is needed only once per
5972	 * adapter.
5973	 */
5974	if (pdev->devfn == 0)
5975		ssleep(30);
5976
5977	return PCI_ERS_RESULT_NEED_RESET;
5978}
5979
5980static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
5981{
5982	struct be_adapter *adapter = pci_get_drvdata(pdev);
5983	int status;
5984
5985	dev_info(&adapter->pdev->dev, "EEH reset\n");
5986
5987	status = pci_enable_device(pdev);
5988	if (status)
5989		return PCI_ERS_RESULT_DISCONNECT;
5990
5991	pci_set_master(pdev);
5992	pci_restore_state(pdev);
5993
5994	/* Check if card is ok and fw is ready */
5995	dev_info(&adapter->pdev->dev,
5996		 "Waiting for FW to be ready after EEH reset\n");
5997	status = be_fw_wait_ready(adapter);
5998	if (status)
5999		return PCI_ERS_RESULT_DISCONNECT;
6000
6001	be_clear_error(adapter, BE_CLEAR_ALL);
6002	return PCI_ERS_RESULT_RECOVERED;
6003}
6004
6005static void be_eeh_resume(struct pci_dev *pdev)
6006{
6007	int status = 0;
6008	struct be_adapter *adapter = pci_get_drvdata(pdev);
6009
6010	dev_info(&adapter->pdev->dev, "EEH resume\n");
6011
6012	pci_save_state(pdev);
6013
6014	status = be_resume(adapter);
6015	if (status)
6016		goto err;
6017
6018	be_roce_dev_add(adapter);
6019
6020	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6021	return;
6022err:
6023	dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6024}
6025
6026static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6027{
6028	struct be_adapter *adapter = pci_get_drvdata(pdev);
6029	struct be_resources vft_res = {0};
6030	int status;
6031
6032	if (!num_vfs)
6033		be_vf_clear(adapter);
6034
6035	adapter->num_vfs = num_vfs;
6036
6037	if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6038		dev_warn(&pdev->dev,
6039			 "Cannot disable VFs while they are assigned\n");
6040		return -EBUSY;
6041	}
6042
6043	/* When the HW is in SRIOV capable configuration, the PF-pool resources
6044	 * are equally distributed across the max-number of VFs. The user may
6045	 * request only a subset of the max-vfs to be enabled.
6046	 * Based on num_vfs, redistribute the resources across num_vfs so that
6047	 * each VF will have access to more number of resources.
6048	 * This facility is not available in BE3 FW.
6049	 * Also, this is done by FW in Lancer chip.
6050	 */
6051	if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6052		be_calculate_vf_res(adapter, adapter->num_vfs,
6053				    &vft_res);
6054		status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6055						 adapter->num_vfs, &vft_res);
6056		if (status)
6057			dev_err(&pdev->dev,
6058				"Failed to optimize SR-IOV resources\n");
6059	}
6060
6061	status = be_get_resources(adapter);
6062	if (status)
6063		return be_cmd_status(status);
6064
6065	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6066	rtnl_lock();
6067	status = be_update_queues(adapter);
6068	rtnl_unlock();
6069	if (status)
6070		return be_cmd_status(status);
6071
6072	if (adapter->num_vfs)
6073		status = be_vf_setup(adapter);
6074
6075	if (!status)
6076		return adapter->num_vfs;
6077
6078	return 0;
6079}
6080
6081static const struct pci_error_handlers be_eeh_handlers = {
6082	.error_detected = be_eeh_err_detected,
6083	.slot_reset = be_eeh_reset,
6084	.resume = be_eeh_resume,
6085};
6086
6087static SIMPLE_DEV_PM_OPS(be_pci_pm_ops, be_suspend, be_pci_resume);
6088
6089static struct pci_driver be_driver = {
6090	.name = DRV_NAME,
6091	.id_table = be_dev_ids,
6092	.probe = be_probe,
6093	.remove = be_remove,
6094	.driver.pm = &be_pci_pm_ops,
6095	.shutdown = be_shutdown,
6096	.sriov_configure = be_pci_sriov_configure,
6097	.err_handler = &be_eeh_handlers
6098};
6099
6100static int __init be_init_module(void)
6101{
6102	int status;
6103
6104	if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6105	    rx_frag_size != 2048) {
6106		printk(KERN_WARNING DRV_NAME
6107			" : Module param rx_frag_size must be 2048/4096/8192."
6108			" Using 2048\n");
6109		rx_frag_size = 2048;
6110	}
6111
6112	if (num_vfs > 0) {
6113		pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6114		pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6115	}
6116
6117	be_wq = create_singlethread_workqueue("be_wq");
6118	if (!be_wq) {
6119		pr_warn(DRV_NAME "workqueue creation failed\n");
6120		return -1;
6121	}
6122
6123	be_err_recovery_workq =
6124		create_singlethread_workqueue("be_err_recover");
6125	if (!be_err_recovery_workq)
6126		pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6127
6128	status = pci_register_driver(&be_driver);
6129	if (status) {
6130		destroy_workqueue(be_wq);
6131		be_destroy_err_recovery_workq();
6132	}
6133	return status;
6134}
6135module_init(be_init_module);
6136
6137static void __exit be_exit_module(void)
6138{
6139	pci_unregister_driver(&be_driver);
6140
6141	be_destroy_err_recovery_workq();
6142
6143	if (be_wq)
6144		destroy_workqueue(be_wq);
6145}
6146module_exit(be_exit_module);
v5.9
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) 2005 - 2016 Broadcom
   4 * All rights reserved.
   5 *
   6 * Contact Information:
   7 * linux-drivers@emulex.com
   8 *
   9 * Emulex
  10 * 3333 Susan Street
  11 * Costa Mesa, CA 92626
  12 */
  13
  14#include <linux/prefetch.h>
  15#include <linux/module.h>
  16#include "be.h"
  17#include "be_cmds.h"
  18#include <asm/div64.h>
  19#include <linux/aer.h>
  20#include <linux/if_bridge.h>
  21#include <net/busy_poll.h>
  22#include <net/vxlan.h>
  23
  24MODULE_DESCRIPTION(DRV_DESC);
  25MODULE_AUTHOR("Emulex Corporation");
  26MODULE_LICENSE("GPL");
  27
  28/* num_vfs module param is obsolete.
  29 * Use sysfs method to enable/disable VFs.
  30 */
  31static unsigned int num_vfs;
  32module_param(num_vfs, uint, 0444);
  33MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
  34
  35static ushort rx_frag_size = 2048;
  36module_param(rx_frag_size, ushort, 0444);
  37MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
  38
  39/* Per-module error detection/recovery workq shared across all functions.
  40 * Each function schedules its own work request on this shared workq.
  41 */
  42static struct workqueue_struct *be_err_recovery_workq;
  43
  44static const struct pci_device_id be_dev_ids[] = {
  45#ifdef CONFIG_BE2NET_BE2
  46	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
  47	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
  48#endif /* CONFIG_BE2NET_BE2 */
  49#ifdef CONFIG_BE2NET_BE3
  50	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
  51	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
  52#endif /* CONFIG_BE2NET_BE3 */
  53#ifdef CONFIG_BE2NET_LANCER
  54	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
  55	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
  56#endif /* CONFIG_BE2NET_LANCER */
  57#ifdef CONFIG_BE2NET_SKYHAWK
  58	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
  59	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
  60#endif /* CONFIG_BE2NET_SKYHAWK */
  61	{ 0 }
  62};
  63MODULE_DEVICE_TABLE(pci, be_dev_ids);
  64
  65/* Workqueue used by all functions for defering cmd calls to the adapter */
  66static struct workqueue_struct *be_wq;
  67
  68/* UE Status Low CSR */
  69static const char * const ue_status_low_desc[] = {
  70	"CEV",
  71	"CTX",
  72	"DBUF",
  73	"ERX",
  74	"Host",
  75	"MPU",
  76	"NDMA",
  77	"PTC ",
  78	"RDMA ",
  79	"RXF ",
  80	"RXIPS ",
  81	"RXULP0 ",
  82	"RXULP1 ",
  83	"RXULP2 ",
  84	"TIM ",
  85	"TPOST ",
  86	"TPRE ",
  87	"TXIPS ",
  88	"TXULP0 ",
  89	"TXULP1 ",
  90	"UC ",
  91	"WDMA ",
  92	"TXULP2 ",
  93	"HOST1 ",
  94	"P0_OB_LINK ",
  95	"P1_OB_LINK ",
  96	"HOST_GPIO ",
  97	"MBOX ",
  98	"ERX2 ",
  99	"SPARE ",
 100	"JTAG ",
 101	"MPU_INTPEND "
 102};
 103
 104/* UE Status High CSR */
 105static const char * const ue_status_hi_desc[] = {
 106	"LPCMEMHOST",
 107	"MGMT_MAC",
 108	"PCS0ONLINE",
 109	"MPU_IRAM",
 110	"PCS1ONLINE",
 111	"PCTL0",
 112	"PCTL1",
 113	"PMEM",
 114	"RR",
 115	"TXPB",
 116	"RXPP",
 117	"XAUI",
 118	"TXP",
 119	"ARM",
 120	"IPC",
 121	"HOST2",
 122	"HOST3",
 123	"HOST4",
 124	"HOST5",
 125	"HOST6",
 126	"HOST7",
 127	"ECRC",
 128	"Poison TLP",
 129	"NETC",
 130	"PERIPH",
 131	"LLTXULP",
 132	"D2P",
 133	"RCON",
 134	"LDMA",
 135	"LLTXP",
 136	"LLTXPB",
 137	"Unknown"
 138};
 139
 140#define BE_VF_IF_EN_FLAGS	(BE_IF_FLAGS_UNTAGGED | \
 141				 BE_IF_FLAGS_BROADCAST | \
 142				 BE_IF_FLAGS_MULTICAST | \
 143				 BE_IF_FLAGS_PASS_L3L4_ERRORS)
 144
 145static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
 146{
 147	struct be_dma_mem *mem = &q->dma_mem;
 148
 149	if (mem->va) {
 150		dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
 151				  mem->dma);
 152		mem->va = NULL;
 153	}
 154}
 155
 156static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
 157			  u16 len, u16 entry_size)
 158{
 159	struct be_dma_mem *mem = &q->dma_mem;
 160
 161	memset(q, 0, sizeof(*q));
 162	q->len = len;
 163	q->entry_size = entry_size;
 164	mem->size = len * entry_size;
 165	mem->va = dma_alloc_coherent(&adapter->pdev->dev, mem->size,
 166				     &mem->dma, GFP_KERNEL);
 167	if (!mem->va)
 168		return -ENOMEM;
 169	return 0;
 170}
 171
 172static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
 173{
 174	u32 reg, enabled;
 175
 176	pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
 177			      &reg);
 178	enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
 179
 180	if (!enabled && enable)
 181		reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
 182	else if (enabled && !enable)
 183		reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
 184	else
 185		return;
 186
 187	pci_write_config_dword(adapter->pdev,
 188			       PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
 189}
 190
 191static void be_intr_set(struct be_adapter *adapter, bool enable)
 192{
 193	int status = 0;
 194
 195	/* On lancer interrupts can't be controlled via this register */
 196	if (lancer_chip(adapter))
 197		return;
 198
 199	if (be_check_error(adapter, BE_ERROR_EEH))
 200		return;
 201
 202	status = be_cmd_intr_set(adapter, enable);
 203	if (status)
 204		be_reg_intr_set(adapter, enable);
 205}
 206
 207static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
 208{
 209	u32 val = 0;
 210
 211	if (be_check_error(adapter, BE_ERROR_HW))
 212		return;
 213
 214	val |= qid & DB_RQ_RING_ID_MASK;
 215	val |= posted << DB_RQ_NUM_POSTED_SHIFT;
 216
 217	wmb();
 218	iowrite32(val, adapter->db + DB_RQ_OFFSET);
 219}
 220
 221static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
 222			  u16 posted)
 223{
 224	u32 val = 0;
 225
 226	if (be_check_error(adapter, BE_ERROR_HW))
 227		return;
 228
 229	val |= txo->q.id & DB_TXULP_RING_ID_MASK;
 230	val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
 231
 232	wmb();
 233	iowrite32(val, adapter->db + txo->db_offset);
 234}
 235
 236static void be_eq_notify(struct be_adapter *adapter, u16 qid,
 237			 bool arm, bool clear_int, u16 num_popped,
 238			 u32 eq_delay_mult_enc)
 239{
 240	u32 val = 0;
 241
 242	val |= qid & DB_EQ_RING_ID_MASK;
 243	val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
 244
 245	if (be_check_error(adapter, BE_ERROR_HW))
 246		return;
 247
 248	if (arm)
 249		val |= 1 << DB_EQ_REARM_SHIFT;
 250	if (clear_int)
 251		val |= 1 << DB_EQ_CLR_SHIFT;
 252	val |= 1 << DB_EQ_EVNT_SHIFT;
 253	val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
 254	val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
 255	iowrite32(val, adapter->db + DB_EQ_OFFSET);
 256}
 257
 258void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
 259{
 260	u32 val = 0;
 261
 262	val |= qid & DB_CQ_RING_ID_MASK;
 263	val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
 264			DB_CQ_RING_ID_EXT_MASK_SHIFT);
 265
 266	if (be_check_error(adapter, BE_ERROR_HW))
 267		return;
 268
 269	if (arm)
 270		val |= 1 << DB_CQ_REARM_SHIFT;
 271	val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
 272	iowrite32(val, adapter->db + DB_CQ_OFFSET);
 273}
 274
 275static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
 276{
 277	int i;
 278
 279	/* Check if mac has already been added as part of uc-list */
 280	for (i = 0; i < adapter->uc_macs; i++) {
 281		if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
 282			/* mac already added, skip addition */
 283			adapter->pmac_id[0] = adapter->pmac_id[i + 1];
 284			return 0;
 285		}
 286	}
 287
 288	return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
 289			       &adapter->pmac_id[0], 0);
 290}
 291
 292static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
 293{
 294	int i;
 295
 296	/* Skip deletion if the programmed mac is
 297	 * being used in uc-list
 298	 */
 299	for (i = 0; i < adapter->uc_macs; i++) {
 300		if (adapter->pmac_id[i + 1] == pmac_id)
 301			return;
 302	}
 303	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
 304}
 305
 306static int be_mac_addr_set(struct net_device *netdev, void *p)
 307{
 308	struct be_adapter *adapter = netdev_priv(netdev);
 309	struct device *dev = &adapter->pdev->dev;
 310	struct sockaddr *addr = p;
 311	int status;
 312	u8 mac[ETH_ALEN];
 313	u32 old_pmac_id = adapter->pmac_id[0];
 314
 315	if (!is_valid_ether_addr(addr->sa_data))
 316		return -EADDRNOTAVAIL;
 317
 318	/* Proceed further only if, User provided MAC is different
 319	 * from active MAC
 320	 */
 321	if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
 322		return 0;
 323
 324	/* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
 325	 * address
 326	 */
 327	if (BEx_chip(adapter) && be_virtfn(adapter) &&
 328	    !check_privilege(adapter, BE_PRIV_FILTMGMT))
 329		return -EPERM;
 330
 331	/* if device is not running, copy MAC to netdev->dev_addr */
 332	if (!netif_running(netdev))
 333		goto done;
 334
 335	/* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
 336	 * privilege or if PF did not provision the new MAC address.
 337	 * On BE3, this cmd will always fail if the VF doesn't have the
 338	 * FILTMGMT privilege. This failure is OK, only if the PF programmed
 339	 * the MAC for the VF.
 340	 */
 341	mutex_lock(&adapter->rx_filter_lock);
 342	status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
 343	if (!status) {
 344
 345		/* Delete the old programmed MAC. This call may fail if the
 346		 * old MAC was already deleted by the PF driver.
 347		 */
 348		if (adapter->pmac_id[0] != old_pmac_id)
 349			be_dev_mac_del(adapter, old_pmac_id);
 350	}
 351
 352	mutex_unlock(&adapter->rx_filter_lock);
 353	/* Decide if the new MAC is successfully activated only after
 354	 * querying the FW
 355	 */
 356	status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
 357				       adapter->if_handle, true, 0);
 358	if (status)
 359		goto err;
 360
 361	/* The MAC change did not happen, either due to lack of privilege
 362	 * or PF didn't pre-provision.
 363	 */
 364	if (!ether_addr_equal(addr->sa_data, mac)) {
 365		status = -EPERM;
 366		goto err;
 367	}
 368
 369	/* Remember currently programmed MAC */
 370	ether_addr_copy(adapter->dev_mac, addr->sa_data);
 371done:
 372	ether_addr_copy(netdev->dev_addr, addr->sa_data);
 373	dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
 374	return 0;
 375err:
 376	dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
 377	return status;
 378}
 379
 380/* BE2 supports only v0 cmd */
 381static void *hw_stats_from_cmd(struct be_adapter *adapter)
 382{
 383	if (BE2_chip(adapter)) {
 384		struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
 385
 386		return &cmd->hw_stats;
 387	} else if (BE3_chip(adapter)) {
 388		struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
 389
 390		return &cmd->hw_stats;
 391	} else {
 392		struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
 393
 394		return &cmd->hw_stats;
 395	}
 396}
 397
 398/* BE2 supports only v0 cmd */
 399static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
 400{
 401	if (BE2_chip(adapter)) {
 402		struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
 403
 404		return &hw_stats->erx;
 405	} else if (BE3_chip(adapter)) {
 406		struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
 407
 408		return &hw_stats->erx;
 409	} else {
 410		struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
 411
 412		return &hw_stats->erx;
 413	}
 414}
 415
 416static void populate_be_v0_stats(struct be_adapter *adapter)
 417{
 418	struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
 419	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
 420	struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
 421	struct be_port_rxf_stats_v0 *port_stats =
 422					&rxf_stats->port[adapter->port_num];
 423	struct be_drv_stats *drvs = &adapter->drv_stats;
 424
 425	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
 426	drvs->rx_pause_frames = port_stats->rx_pause_frames;
 427	drvs->rx_crc_errors = port_stats->rx_crc_errors;
 428	drvs->rx_control_frames = port_stats->rx_control_frames;
 429	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
 430	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
 431	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
 432	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
 433	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
 434	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
 435	drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
 436	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
 437	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
 438	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
 439	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
 440	drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
 441	drvs->rx_dropped_header_too_small =
 442		port_stats->rx_dropped_header_too_small;
 443	drvs->rx_address_filtered =
 444					port_stats->rx_address_filtered +
 445					port_stats->rx_vlan_filtered;
 446	drvs->rx_alignment_symbol_errors =
 447		port_stats->rx_alignment_symbol_errors;
 448
 449	drvs->tx_pauseframes = port_stats->tx_pauseframes;
 450	drvs->tx_controlframes = port_stats->tx_controlframes;
 451
 452	if (adapter->port_num)
 453		drvs->jabber_events = rxf_stats->port1_jabber_events;
 454	else
 455		drvs->jabber_events = rxf_stats->port0_jabber_events;
 456	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
 457	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
 458	drvs->forwarded_packets = rxf_stats->forwarded_packets;
 459	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
 460	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
 461	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
 462	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
 463}
 464
 465static void populate_be_v1_stats(struct be_adapter *adapter)
 466{
 467	struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
 468	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
 469	struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
 470	struct be_port_rxf_stats_v1 *port_stats =
 471					&rxf_stats->port[adapter->port_num];
 472	struct be_drv_stats *drvs = &adapter->drv_stats;
 473
 474	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
 475	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
 476	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
 477	drvs->rx_pause_frames = port_stats->rx_pause_frames;
 478	drvs->rx_crc_errors = port_stats->rx_crc_errors;
 479	drvs->rx_control_frames = port_stats->rx_control_frames;
 480	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
 481	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
 482	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
 483	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
 484	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
 485	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
 486	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
 487	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
 488	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
 489	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
 490	drvs->rx_dropped_header_too_small =
 491		port_stats->rx_dropped_header_too_small;
 492	drvs->rx_input_fifo_overflow_drop =
 493		port_stats->rx_input_fifo_overflow_drop;
 494	drvs->rx_address_filtered = port_stats->rx_address_filtered;
 495	drvs->rx_alignment_symbol_errors =
 496		port_stats->rx_alignment_symbol_errors;
 497	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
 498	drvs->tx_pauseframes = port_stats->tx_pauseframes;
 499	drvs->tx_controlframes = port_stats->tx_controlframes;
 500	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
 501	drvs->jabber_events = port_stats->jabber_events;
 502	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
 503	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
 504	drvs->forwarded_packets = rxf_stats->forwarded_packets;
 505	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
 506	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
 507	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
 508	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
 509}
 510
 511static void populate_be_v2_stats(struct be_adapter *adapter)
 512{
 513	struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
 514	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
 515	struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
 516	struct be_port_rxf_stats_v2 *port_stats =
 517					&rxf_stats->port[adapter->port_num];
 518	struct be_drv_stats *drvs = &adapter->drv_stats;
 519
 520	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
 521	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
 522	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
 523	drvs->rx_pause_frames = port_stats->rx_pause_frames;
 524	drvs->rx_crc_errors = port_stats->rx_crc_errors;
 525	drvs->rx_control_frames = port_stats->rx_control_frames;
 526	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
 527	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
 528	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
 529	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
 530	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
 531	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
 532	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
 533	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
 534	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
 535	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
 536	drvs->rx_dropped_header_too_small =
 537		port_stats->rx_dropped_header_too_small;
 538	drvs->rx_input_fifo_overflow_drop =
 539		port_stats->rx_input_fifo_overflow_drop;
 540	drvs->rx_address_filtered = port_stats->rx_address_filtered;
 541	drvs->rx_alignment_symbol_errors =
 542		port_stats->rx_alignment_symbol_errors;
 543	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
 544	drvs->tx_pauseframes = port_stats->tx_pauseframes;
 545	drvs->tx_controlframes = port_stats->tx_controlframes;
 546	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
 547	drvs->jabber_events = port_stats->jabber_events;
 548	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
 549	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
 550	drvs->forwarded_packets = rxf_stats->forwarded_packets;
 551	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
 552	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
 553	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
 554	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
 555	if (be_roce_supported(adapter)) {
 556		drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
 557		drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
 558		drvs->rx_roce_frames = port_stats->roce_frames_received;
 559		drvs->roce_drops_crc = port_stats->roce_drops_crc;
 560		drvs->roce_drops_payload_len =
 561			port_stats->roce_drops_payload_len;
 562	}
 563}
 564
 565static void populate_lancer_stats(struct be_adapter *adapter)
 566{
 567	struct be_drv_stats *drvs = &adapter->drv_stats;
 568	struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
 569
 570	be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
 571	drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
 572	drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
 573	drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
 574	drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
 575	drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
 576	drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
 577	drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
 578	drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
 579	drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
 580	drvs->rx_dropped_tcp_length =
 581				pport_stats->rx_dropped_invalid_tcp_length;
 582	drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
 583	drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
 584	drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
 585	drvs->rx_dropped_header_too_small =
 586				pport_stats->rx_dropped_header_too_small;
 587	drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
 588	drvs->rx_address_filtered =
 589					pport_stats->rx_address_filtered +
 590					pport_stats->rx_vlan_filtered;
 591	drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
 592	drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
 593	drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
 594	drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
 595	drvs->jabber_events = pport_stats->rx_jabbers;
 596	drvs->forwarded_packets = pport_stats->num_forwards_lo;
 597	drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
 598	drvs->rx_drops_too_many_frags =
 599				pport_stats->rx_drops_too_many_frags_lo;
 600}
 601
 602static void accumulate_16bit_val(u32 *acc, u16 val)
 603{
 604#define lo(x)			(x & 0xFFFF)
 605#define hi(x)			(x & 0xFFFF0000)
 606	bool wrapped = val < lo(*acc);
 607	u32 newacc = hi(*acc) + val;
 608
 609	if (wrapped)
 610		newacc += 65536;
 611	WRITE_ONCE(*acc, newacc);
 612}
 613
 614static void populate_erx_stats(struct be_adapter *adapter,
 615			       struct be_rx_obj *rxo, u32 erx_stat)
 616{
 617	if (!BEx_chip(adapter))
 618		rx_stats(rxo)->rx_drops_no_frags = erx_stat;
 619	else
 620		/* below erx HW counter can actually wrap around after
 621		 * 65535. Driver accumulates a 32-bit value
 622		 */
 623		accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
 624				     (u16)erx_stat);
 625}
 626
 627void be_parse_stats(struct be_adapter *adapter)
 628{
 629	struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
 630	struct be_rx_obj *rxo;
 631	int i;
 632	u32 erx_stat;
 633
 634	if (lancer_chip(adapter)) {
 635		populate_lancer_stats(adapter);
 636	} else {
 637		if (BE2_chip(adapter))
 638			populate_be_v0_stats(adapter);
 639		else if (BE3_chip(adapter))
 640			/* for BE3 */
 641			populate_be_v1_stats(adapter);
 642		else
 643			populate_be_v2_stats(adapter);
 644
 645		/* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
 646		for_all_rx_queues(adapter, rxo, i) {
 647			erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
 648			populate_erx_stats(adapter, rxo, erx_stat);
 649		}
 650	}
 651}
 652
 653static void be_get_stats64(struct net_device *netdev,
 654			   struct rtnl_link_stats64 *stats)
 655{
 656	struct be_adapter *adapter = netdev_priv(netdev);
 657	struct be_drv_stats *drvs = &adapter->drv_stats;
 658	struct be_rx_obj *rxo;
 659	struct be_tx_obj *txo;
 660	u64 pkts, bytes;
 661	unsigned int start;
 662	int i;
 663
 664	for_all_rx_queues(adapter, rxo, i) {
 665		const struct be_rx_stats *rx_stats = rx_stats(rxo);
 666
 667		do {
 668			start = u64_stats_fetch_begin_irq(&rx_stats->sync);
 669			pkts = rx_stats(rxo)->rx_pkts;
 670			bytes = rx_stats(rxo)->rx_bytes;
 671		} while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
 672		stats->rx_packets += pkts;
 673		stats->rx_bytes += bytes;
 674		stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
 675		stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
 676					rx_stats(rxo)->rx_drops_no_frags;
 677	}
 678
 679	for_all_tx_queues(adapter, txo, i) {
 680		const struct be_tx_stats *tx_stats = tx_stats(txo);
 681
 682		do {
 683			start = u64_stats_fetch_begin_irq(&tx_stats->sync);
 684			pkts = tx_stats(txo)->tx_pkts;
 685			bytes = tx_stats(txo)->tx_bytes;
 686		} while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
 687		stats->tx_packets += pkts;
 688		stats->tx_bytes += bytes;
 689	}
 690
 691	/* bad pkts received */
 692	stats->rx_errors = drvs->rx_crc_errors +
 693		drvs->rx_alignment_symbol_errors +
 694		drvs->rx_in_range_errors +
 695		drvs->rx_out_range_errors +
 696		drvs->rx_frame_too_long +
 697		drvs->rx_dropped_too_small +
 698		drvs->rx_dropped_too_short +
 699		drvs->rx_dropped_header_too_small +
 700		drvs->rx_dropped_tcp_length +
 701		drvs->rx_dropped_runt;
 702
 703	/* detailed rx errors */
 704	stats->rx_length_errors = drvs->rx_in_range_errors +
 705		drvs->rx_out_range_errors +
 706		drvs->rx_frame_too_long;
 707
 708	stats->rx_crc_errors = drvs->rx_crc_errors;
 709
 710	/* frame alignment errors */
 711	stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
 712
 713	/* receiver fifo overrun */
 714	/* drops_no_pbuf is no per i/f, it's per BE card */
 715	stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
 716				drvs->rx_input_fifo_overflow_drop +
 717				drvs->rx_drops_no_pbuf;
 718}
 719
 720void be_link_status_update(struct be_adapter *adapter, u8 link_status)
 721{
 722	struct net_device *netdev = adapter->netdev;
 723
 724	if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
 725		netif_carrier_off(netdev);
 726		adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
 727	}
 728
 729	if (link_status)
 730		netif_carrier_on(netdev);
 731	else
 732		netif_carrier_off(netdev);
 733
 734	netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
 735}
 736
 737static int be_gso_hdr_len(struct sk_buff *skb)
 738{
 739	if (skb->encapsulation)
 740		return skb_inner_transport_offset(skb) +
 741		       inner_tcp_hdrlen(skb);
 742	return skb_transport_offset(skb) + tcp_hdrlen(skb);
 743}
 744
 745static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
 746{
 747	struct be_tx_stats *stats = tx_stats(txo);
 748	u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
 749	/* Account for headers which get duplicated in TSO pkt */
 750	u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
 751
 752	u64_stats_update_begin(&stats->sync);
 753	stats->tx_reqs++;
 754	stats->tx_bytes += skb->len + dup_hdr_len;
 755	stats->tx_pkts += tx_pkts;
 756	if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
 757		stats->tx_vxlan_offload_pkts += tx_pkts;
 758	u64_stats_update_end(&stats->sync);
 759}
 760
 761/* Returns number of WRBs needed for the skb */
 762static u32 skb_wrb_cnt(struct sk_buff *skb)
 763{
 764	/* +1 for the header wrb */
 765	return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
 766}
 767
 768static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
 769{
 770	wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
 771	wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
 772	wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
 773	wrb->rsvd0 = 0;
 774}
 775
 776/* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
 777 * to avoid the swap and shift/mask operations in wrb_fill().
 778 */
 779static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
 780{
 781	wrb->frag_pa_hi = 0;
 782	wrb->frag_pa_lo = 0;
 783	wrb->frag_len = 0;
 784	wrb->rsvd0 = 0;
 785}
 786
 787static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
 788				     struct sk_buff *skb)
 789{
 790	u8 vlan_prio;
 791	u16 vlan_tag;
 792
 793	vlan_tag = skb_vlan_tag_get(skb);
 794	vlan_prio = skb_vlan_tag_get_prio(skb);
 795	/* If vlan priority provided by OS is NOT in available bmap */
 796	if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
 797		vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
 798				adapter->recommended_prio_bits;
 799
 800	return vlan_tag;
 801}
 802
 803/* Used only for IP tunnel packets */
 804static u16 skb_inner_ip_proto(struct sk_buff *skb)
 805{
 806	return (inner_ip_hdr(skb)->version == 4) ?
 807		inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
 808}
 809
 810static u16 skb_ip_proto(struct sk_buff *skb)
 811{
 812	return (ip_hdr(skb)->version == 4) ?
 813		ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
 814}
 815
 816static inline bool be_is_txq_full(struct be_tx_obj *txo)
 817{
 818	return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
 819}
 820
 821static inline bool be_can_txq_wake(struct be_tx_obj *txo)
 822{
 823	return atomic_read(&txo->q.used) < txo->q.len / 2;
 824}
 825
 826static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
 827{
 828	return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
 829}
 830
 831static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
 832				       struct sk_buff *skb,
 833				       struct be_wrb_params *wrb_params)
 834{
 835	u16 proto;
 836
 837	if (skb_is_gso(skb)) {
 838		BE_WRB_F_SET(wrb_params->features, LSO, 1);
 839		wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
 840		if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
 841			BE_WRB_F_SET(wrb_params->features, LSO6, 1);
 842	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
 843		if (skb->encapsulation) {
 844			BE_WRB_F_SET(wrb_params->features, IPCS, 1);
 845			proto = skb_inner_ip_proto(skb);
 846		} else {
 847			proto = skb_ip_proto(skb);
 848		}
 849		if (proto == IPPROTO_TCP)
 850			BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
 851		else if (proto == IPPROTO_UDP)
 852			BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
 853	}
 854
 855	if (skb_vlan_tag_present(skb)) {
 856		BE_WRB_F_SET(wrb_params->features, VLAN, 1);
 857		wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
 858	}
 859
 860	BE_WRB_F_SET(wrb_params->features, CRC, 1);
 861}
 862
 863static void wrb_fill_hdr(struct be_adapter *adapter,
 864			 struct be_eth_hdr_wrb *hdr,
 865			 struct be_wrb_params *wrb_params,
 866			 struct sk_buff *skb)
 867{
 868	memset(hdr, 0, sizeof(*hdr));
 869
 870	SET_TX_WRB_HDR_BITS(crc, hdr,
 871			    BE_WRB_F_GET(wrb_params->features, CRC));
 872	SET_TX_WRB_HDR_BITS(ipcs, hdr,
 873			    BE_WRB_F_GET(wrb_params->features, IPCS));
 874	SET_TX_WRB_HDR_BITS(tcpcs, hdr,
 875			    BE_WRB_F_GET(wrb_params->features, TCPCS));
 876	SET_TX_WRB_HDR_BITS(udpcs, hdr,
 877			    BE_WRB_F_GET(wrb_params->features, UDPCS));
 878
 879	SET_TX_WRB_HDR_BITS(lso, hdr,
 880			    BE_WRB_F_GET(wrb_params->features, LSO));
 881	SET_TX_WRB_HDR_BITS(lso6, hdr,
 882			    BE_WRB_F_GET(wrb_params->features, LSO6));
 883	SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
 884
 885	/* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
 886	 * hack is not needed, the evt bit is set while ringing DB.
 887	 */
 888	SET_TX_WRB_HDR_BITS(event, hdr,
 889			    BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
 890	SET_TX_WRB_HDR_BITS(vlan, hdr,
 891			    BE_WRB_F_GET(wrb_params->features, VLAN));
 892	SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
 893
 894	SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
 895	SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
 896	SET_TX_WRB_HDR_BITS(mgmt, hdr,
 897			    BE_WRB_F_GET(wrb_params->features, OS2BMC));
 898}
 899
 900static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
 901			  bool unmap_single)
 902{
 903	dma_addr_t dma;
 904	u32 frag_len = le32_to_cpu(wrb->frag_len);
 905
 906
 907	dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
 908		(u64)le32_to_cpu(wrb->frag_pa_lo);
 909	if (frag_len) {
 910		if (unmap_single)
 911			dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
 912		else
 913			dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
 914	}
 915}
 916
 917/* Grab a WRB header for xmit */
 918static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
 919{
 920	u32 head = txo->q.head;
 921
 922	queue_head_inc(&txo->q);
 923	return head;
 924}
 925
 926/* Set up the WRB header for xmit */
 927static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
 928				struct be_tx_obj *txo,
 929				struct be_wrb_params *wrb_params,
 930				struct sk_buff *skb, u16 head)
 931{
 932	u32 num_frags = skb_wrb_cnt(skb);
 933	struct be_queue_info *txq = &txo->q;
 934	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
 935
 936	wrb_fill_hdr(adapter, hdr, wrb_params, skb);
 937	be_dws_cpu_to_le(hdr, sizeof(*hdr));
 938
 939	BUG_ON(txo->sent_skb_list[head]);
 940	txo->sent_skb_list[head] = skb;
 941	txo->last_req_hdr = head;
 942	atomic_add(num_frags, &txq->used);
 943	txo->last_req_wrb_cnt = num_frags;
 944	txo->pend_wrb_cnt += num_frags;
 945}
 946
 947/* Setup a WRB fragment (buffer descriptor) for xmit */
 948static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
 949				 int len)
 950{
 951	struct be_eth_wrb *wrb;
 952	struct be_queue_info *txq = &txo->q;
 953
 954	wrb = queue_head_node(txq);
 955	wrb_fill(wrb, busaddr, len);
 956	queue_head_inc(txq);
 957}
 958
 959/* Bring the queue back to the state it was in before be_xmit_enqueue() routine
 960 * was invoked. The producer index is restored to the previous packet and the
 961 * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
 962 */
 963static void be_xmit_restore(struct be_adapter *adapter,
 964			    struct be_tx_obj *txo, u32 head, bool map_single,
 965			    u32 copied)
 966{
 967	struct device *dev;
 968	struct be_eth_wrb *wrb;
 969	struct be_queue_info *txq = &txo->q;
 970
 971	dev = &adapter->pdev->dev;
 972	txq->head = head;
 973
 974	/* skip the first wrb (hdr); it's not mapped */
 975	queue_head_inc(txq);
 976	while (copied) {
 977		wrb = queue_head_node(txq);
 978		unmap_tx_frag(dev, wrb, map_single);
 979		map_single = false;
 980		copied -= le32_to_cpu(wrb->frag_len);
 981		queue_head_inc(txq);
 982	}
 983
 984	txq->head = head;
 985}
 986
 987/* Enqueue the given packet for transmit. This routine allocates WRBs for the
 988 * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
 989 * of WRBs used up by the packet.
 990 */
 991static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
 992			   struct sk_buff *skb,
 993			   struct be_wrb_params *wrb_params)
 994{
 995	u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
 996	struct device *dev = &adapter->pdev->dev;
 997	bool map_single = false;
 998	u32 head;
 999	dma_addr_t busaddr;
1000	int len;
1001
1002	head = be_tx_get_wrb_hdr(txo);
1003
1004	if (skb->len > skb->data_len) {
1005		len = skb_headlen(skb);
1006
1007		busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1008		if (dma_mapping_error(dev, busaddr))
1009			goto dma_err;
1010		map_single = true;
1011		be_tx_setup_wrb_frag(txo, busaddr, len);
1012		copied += len;
1013	}
1014
1015	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1016		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1017		len = skb_frag_size(frag);
1018
1019		busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1020		if (dma_mapping_error(dev, busaddr))
1021			goto dma_err;
1022		be_tx_setup_wrb_frag(txo, busaddr, len);
1023		copied += len;
1024	}
1025
1026	be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1027
1028	be_tx_stats_update(txo, skb);
1029	return wrb_cnt;
1030
1031dma_err:
1032	adapter->drv_stats.dma_map_errors++;
1033	be_xmit_restore(adapter, txo, head, map_single, copied);
1034	return 0;
1035}
1036
1037static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1038{
1039	return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1040}
1041
1042static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1043					     struct sk_buff *skb,
1044					     struct be_wrb_params
1045					     *wrb_params)
1046{
1047	bool insert_vlan = false;
1048	u16 vlan_tag = 0;
1049
1050	skb = skb_share_check(skb, GFP_ATOMIC);
1051	if (unlikely(!skb))
1052		return skb;
1053
1054	if (skb_vlan_tag_present(skb)) {
1055		vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1056		insert_vlan = true;
1057	}
1058
1059	if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1060		if (!insert_vlan) {
1061			vlan_tag = adapter->pvid;
1062			insert_vlan = true;
1063		}
1064		/* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1065		 * skip VLAN insertion
1066		 */
1067		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1068	}
1069
1070	if (insert_vlan) {
1071		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1072						vlan_tag);
1073		if (unlikely(!skb))
1074			return skb;
1075		__vlan_hwaccel_clear_tag(skb);
1076	}
1077
1078	/* Insert the outer VLAN, if any */
1079	if (adapter->qnq_vid) {
1080		vlan_tag = adapter->qnq_vid;
1081		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1082						vlan_tag);
1083		if (unlikely(!skb))
1084			return skb;
1085		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1086	}
1087
1088	return skb;
1089}
1090
1091static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1092{
1093	struct ethhdr *eh = (struct ethhdr *)skb->data;
1094	u16 offset = ETH_HLEN;
1095
1096	if (eh->h_proto == htons(ETH_P_IPV6)) {
1097		struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1098
1099		offset += sizeof(struct ipv6hdr);
1100		if (ip6h->nexthdr != NEXTHDR_TCP &&
1101		    ip6h->nexthdr != NEXTHDR_UDP) {
1102			struct ipv6_opt_hdr *ehdr =
1103				(struct ipv6_opt_hdr *)(skb->data + offset);
1104
1105			/* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1106			if (ehdr->hdrlen == 0xff)
1107				return true;
1108		}
1109	}
1110	return false;
1111}
1112
1113static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1114{
1115	return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1116}
1117
1118static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1119{
1120	return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1121}
1122
1123static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1124						  struct sk_buff *skb,
1125						  struct be_wrb_params
1126						  *wrb_params)
1127{
1128	struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1129	unsigned int eth_hdr_len;
1130	struct iphdr *ip;
1131
1132	/* For padded packets, BE HW modifies tot_len field in IP header
1133	 * incorrecly when VLAN tag is inserted by HW.
1134	 * For padded packets, Lancer computes incorrect checksum.
1135	 */
1136	eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1137						VLAN_ETH_HLEN : ETH_HLEN;
1138	if (skb->len <= 60 &&
1139	    (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1140	    is_ipv4_pkt(skb)) {
1141		ip = (struct iphdr *)ip_hdr(skb);
1142		pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
 
1143	}
1144
1145	/* If vlan tag is already inlined in the packet, skip HW VLAN
1146	 * tagging in pvid-tagging mode
1147	 */
1148	if (be_pvid_tagging_enabled(adapter) &&
1149	    veh->h_vlan_proto == htons(ETH_P_8021Q))
1150		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1151
1152	/* HW has a bug wherein it will calculate CSUM for VLAN
1153	 * pkts even though it is disabled.
1154	 * Manually insert VLAN in pkt.
1155	 */
1156	if (skb->ip_summed != CHECKSUM_PARTIAL &&
1157	    skb_vlan_tag_present(skb)) {
1158		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1159		if (unlikely(!skb))
1160			goto err;
1161	}
1162
1163	/* HW may lockup when VLAN HW tagging is requested on
1164	 * certain ipv6 packets. Drop such pkts if the HW workaround to
1165	 * skip HW tagging is not enabled by FW.
1166	 */
1167	if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1168		     (adapter->pvid || adapter->qnq_vid) &&
1169		     !qnq_async_evt_rcvd(adapter)))
1170		goto tx_drop;
1171
1172	/* Manual VLAN tag insertion to prevent:
1173	 * ASIC lockup when the ASIC inserts VLAN tag into
1174	 * certain ipv6 packets. Insert VLAN tags in driver,
1175	 * and set event, completion, vlan bits accordingly
1176	 * in the Tx WRB.
1177	 */
1178	if (be_ipv6_tx_stall_chk(adapter, skb) &&
1179	    be_vlan_tag_tx_chk(adapter, skb)) {
1180		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1181		if (unlikely(!skb))
1182			goto err;
1183	}
1184
1185	return skb;
1186tx_drop:
1187	dev_kfree_skb_any(skb);
1188err:
1189	return NULL;
1190}
1191
1192static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1193					   struct sk_buff *skb,
1194					   struct be_wrb_params *wrb_params)
1195{
1196	int err;
1197
1198	/* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1199	 * packets that are 32b or less may cause a transmit stall
1200	 * on that port. The workaround is to pad such packets
1201	 * (len <= 32 bytes) to a minimum length of 36b.
1202	 */
1203	if (skb->len <= 32) {
1204		if (skb_put_padto(skb, 36))
1205			return NULL;
1206	}
1207
1208	if (BEx_chip(adapter) || lancer_chip(adapter)) {
1209		skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1210		if (!skb)
1211			return NULL;
1212	}
1213
1214	/* The stack can send us skbs with length greater than
1215	 * what the HW can handle. Trim the extra bytes.
1216	 */
1217	WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1218	err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1219	WARN_ON(err);
1220
1221	return skb;
1222}
1223
1224static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1225{
1226	struct be_queue_info *txq = &txo->q;
1227	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1228
1229	/* Mark the last request eventable if it hasn't been marked already */
1230	if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1231		hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1232
1233	/* compose a dummy wrb if there are odd set of wrbs to notify */
1234	if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1235		wrb_fill_dummy(queue_head_node(txq));
1236		queue_head_inc(txq);
1237		atomic_inc(&txq->used);
1238		txo->pend_wrb_cnt++;
1239		hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1240					   TX_HDR_WRB_NUM_SHIFT);
1241		hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1242					  TX_HDR_WRB_NUM_SHIFT);
1243	}
1244	be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1245	txo->pend_wrb_cnt = 0;
1246}
1247
1248/* OS2BMC related */
1249
1250#define DHCP_CLIENT_PORT	68
1251#define DHCP_SERVER_PORT	67
1252#define NET_BIOS_PORT1		137
1253#define NET_BIOS_PORT2		138
1254#define DHCPV6_RAS_PORT		547
1255
1256#define is_mc_allowed_on_bmc(adapter, eh)	\
1257	(!is_multicast_filt_enabled(adapter) &&	\
1258	 is_multicast_ether_addr(eh->h_dest) &&	\
1259	 !is_broadcast_ether_addr(eh->h_dest))
1260
1261#define is_bc_allowed_on_bmc(adapter, eh)	\
1262	(!is_broadcast_filt_enabled(adapter) &&	\
1263	 is_broadcast_ether_addr(eh->h_dest))
1264
1265#define is_arp_allowed_on_bmc(adapter, skb)	\
1266	(is_arp(skb) && is_arp_filt_enabled(adapter))
1267
1268#define is_arp(skb)	(skb->protocol == htons(ETH_P_ARP))
1269
1270#define is_arp_filt_enabled(adapter)	\
1271		(adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1272
1273#define is_dhcp_client_filt_enabled(adapter)	\
1274		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1275
1276#define is_dhcp_srvr_filt_enabled(adapter)	\
1277		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1278
1279#define is_nbios_filt_enabled(adapter)	\
1280		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1281
1282#define is_ipv6_na_filt_enabled(adapter)	\
1283		(adapter->bmc_filt_mask &	\
1284			BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1285
1286#define is_ipv6_ra_filt_enabled(adapter)	\
1287		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1288
1289#define is_ipv6_ras_filt_enabled(adapter)	\
1290		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1291
1292#define is_broadcast_filt_enabled(adapter)	\
1293		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1294
1295#define is_multicast_filt_enabled(adapter)	\
1296		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1297
1298static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1299			       struct sk_buff **skb)
1300{
1301	struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1302	bool os2bmc = false;
1303
1304	if (!be_is_os2bmc_enabled(adapter))
1305		goto done;
1306
1307	if (!is_multicast_ether_addr(eh->h_dest))
1308		goto done;
1309
1310	if (is_mc_allowed_on_bmc(adapter, eh) ||
1311	    is_bc_allowed_on_bmc(adapter, eh) ||
1312	    is_arp_allowed_on_bmc(adapter, (*skb))) {
1313		os2bmc = true;
1314		goto done;
1315	}
1316
1317	if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1318		struct ipv6hdr *hdr = ipv6_hdr((*skb));
1319		u8 nexthdr = hdr->nexthdr;
1320
1321		if (nexthdr == IPPROTO_ICMPV6) {
1322			struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1323
1324			switch (icmp6->icmp6_type) {
1325			case NDISC_ROUTER_ADVERTISEMENT:
1326				os2bmc = is_ipv6_ra_filt_enabled(adapter);
1327				goto done;
1328			case NDISC_NEIGHBOUR_ADVERTISEMENT:
1329				os2bmc = is_ipv6_na_filt_enabled(adapter);
1330				goto done;
1331			default:
1332				break;
1333			}
1334		}
1335	}
1336
1337	if (is_udp_pkt((*skb))) {
1338		struct udphdr *udp = udp_hdr((*skb));
1339
1340		switch (ntohs(udp->dest)) {
1341		case DHCP_CLIENT_PORT:
1342			os2bmc = is_dhcp_client_filt_enabled(adapter);
1343			goto done;
1344		case DHCP_SERVER_PORT:
1345			os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1346			goto done;
1347		case NET_BIOS_PORT1:
1348		case NET_BIOS_PORT2:
1349			os2bmc = is_nbios_filt_enabled(adapter);
1350			goto done;
1351		case DHCPV6_RAS_PORT:
1352			os2bmc = is_ipv6_ras_filt_enabled(adapter);
1353			goto done;
1354		default:
1355			break;
1356		}
1357	}
1358done:
1359	/* For packets over a vlan, which are destined
1360	 * to BMC, asic expects the vlan to be inline in the packet.
1361	 */
1362	if (os2bmc)
1363		*skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1364
1365	return os2bmc;
1366}
1367
1368static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1369{
1370	struct be_adapter *adapter = netdev_priv(netdev);
1371	u16 q_idx = skb_get_queue_mapping(skb);
1372	struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1373	struct be_wrb_params wrb_params = { 0 };
1374	bool flush = !netdev_xmit_more();
1375	u16 wrb_cnt;
1376
1377	skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1378	if (unlikely(!skb))
1379		goto drop;
1380
1381	be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1382
1383	wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1384	if (unlikely(!wrb_cnt)) {
1385		dev_kfree_skb_any(skb);
1386		goto drop;
1387	}
1388
1389	/* if os2bmc is enabled and if the pkt is destined to bmc,
1390	 * enqueue the pkt a 2nd time with mgmt bit set.
1391	 */
1392	if (be_send_pkt_to_bmc(adapter, &skb)) {
1393		BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1394		wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1395		if (unlikely(!wrb_cnt))
1396			goto drop;
1397		else
1398			skb_get(skb);
1399	}
1400
1401	if (be_is_txq_full(txo)) {
1402		netif_stop_subqueue(netdev, q_idx);
1403		tx_stats(txo)->tx_stops++;
1404	}
1405
1406	if (flush || __netif_subqueue_stopped(netdev, q_idx))
1407		be_xmit_flush(adapter, txo);
1408
1409	return NETDEV_TX_OK;
1410drop:
1411	tx_stats(txo)->tx_drv_drops++;
1412	/* Flush the already enqueued tx requests */
1413	if (flush && txo->pend_wrb_cnt)
1414		be_xmit_flush(adapter, txo);
1415
1416	return NETDEV_TX_OK;
1417}
1418
1419static void be_tx_timeout(struct net_device *netdev, unsigned int txqueue)
1420{
1421	struct be_adapter *adapter = netdev_priv(netdev);
1422	struct device *dev = &adapter->pdev->dev;
1423	struct be_tx_obj *txo;
1424	struct sk_buff *skb;
1425	struct tcphdr *tcphdr;
1426	struct udphdr *udphdr;
1427	u32 *entry;
1428	int status;
1429	int i, j;
1430
1431	for_all_tx_queues(adapter, txo, i) {
1432		dev_info(dev, "TXQ Dump: %d H: %d T: %d used: %d, qid: 0x%x\n",
1433			 i, txo->q.head, txo->q.tail,
1434			 atomic_read(&txo->q.used), txo->q.id);
1435
1436		entry = txo->q.dma_mem.va;
1437		for (j = 0; j < TX_Q_LEN * 4; j += 4) {
1438			if (entry[j] != 0 || entry[j + 1] != 0 ||
1439			    entry[j + 2] != 0 || entry[j + 3] != 0) {
1440				dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1441					 j, entry[j], entry[j + 1],
1442					 entry[j + 2], entry[j + 3]);
1443			}
1444		}
1445
1446		entry = txo->cq.dma_mem.va;
1447		dev_info(dev, "TXCQ Dump: %d  H: %d T: %d used: %d\n",
1448			 i, txo->cq.head, txo->cq.tail,
1449			 atomic_read(&txo->cq.used));
1450		for (j = 0; j < TX_CQ_LEN * 4; j += 4) {
1451			if (entry[j] != 0 || entry[j + 1] != 0 ||
1452			    entry[j + 2] != 0 || entry[j + 3] != 0) {
1453				dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1454					 j, entry[j], entry[j + 1],
1455					 entry[j + 2], entry[j + 3]);
1456			}
1457		}
1458
1459		for (j = 0; j < TX_Q_LEN; j++) {
1460			if (txo->sent_skb_list[j]) {
1461				skb = txo->sent_skb_list[j];
1462				if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
1463					tcphdr = tcp_hdr(skb);
1464					dev_info(dev, "TCP source port %d\n",
1465						 ntohs(tcphdr->source));
1466					dev_info(dev, "TCP dest port %d\n",
1467						 ntohs(tcphdr->dest));
1468					dev_info(dev, "TCP sequence num %d\n",
1469						 ntohs(tcphdr->seq));
1470					dev_info(dev, "TCP ack_seq %d\n",
1471						 ntohs(tcphdr->ack_seq));
1472				} else if (ip_hdr(skb)->protocol ==
1473					   IPPROTO_UDP) {
1474					udphdr = udp_hdr(skb);
1475					dev_info(dev, "UDP source port %d\n",
1476						 ntohs(udphdr->source));
1477					dev_info(dev, "UDP dest port %d\n",
1478						 ntohs(udphdr->dest));
1479				}
1480				dev_info(dev, "skb[%d] %p len %d proto 0x%x\n",
1481					 j, skb, skb->len, skb->protocol);
1482			}
1483		}
1484	}
1485
1486	if (lancer_chip(adapter)) {
1487		dev_info(dev, "Initiating reset due to tx timeout\n");
1488		dev_info(dev, "Resetting adapter\n");
1489		status = lancer_physdev_ctrl(adapter,
1490					     PHYSDEV_CONTROL_FW_RESET_MASK);
1491		if (status)
1492			dev_err(dev, "Reset failed .. Reboot server\n");
1493	}
1494}
1495
1496static inline bool be_in_all_promisc(struct be_adapter *adapter)
1497{
1498	return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1499			BE_IF_FLAGS_ALL_PROMISCUOUS;
1500}
1501
1502static int be_set_vlan_promisc(struct be_adapter *adapter)
1503{
1504	struct device *dev = &adapter->pdev->dev;
1505	int status;
1506
1507	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1508		return 0;
1509
1510	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1511	if (!status) {
1512		dev_info(dev, "Enabled VLAN promiscuous mode\n");
1513		adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1514	} else {
1515		dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1516	}
1517	return status;
1518}
1519
1520static int be_clear_vlan_promisc(struct be_adapter *adapter)
1521{
1522	struct device *dev = &adapter->pdev->dev;
1523	int status;
1524
1525	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1526	if (!status) {
1527		dev_info(dev, "Disabling VLAN promiscuous mode\n");
1528		adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1529	}
1530	return status;
1531}
1532
1533/*
1534 * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1535 * If the user configures more, place BE in vlan promiscuous mode.
1536 */
1537static int be_vid_config(struct be_adapter *adapter)
1538{
1539	struct device *dev = &adapter->pdev->dev;
1540	u16 vids[BE_NUM_VLANS_SUPPORTED];
1541	u16 num = 0, i = 0;
1542	int status = 0;
1543
1544	/* No need to change the VLAN state if the I/F is in promiscuous */
1545	if (adapter->netdev->flags & IFF_PROMISC)
1546		return 0;
1547
1548	if (adapter->vlans_added > be_max_vlans(adapter))
1549		return be_set_vlan_promisc(adapter);
1550
1551	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1552		status = be_clear_vlan_promisc(adapter);
1553		if (status)
1554			return status;
1555	}
1556	/* Construct VLAN Table to give to HW */
1557	for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1558		vids[num++] = cpu_to_le16(i);
1559
1560	status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1561	if (status) {
1562		dev_err(dev, "Setting HW VLAN filtering failed\n");
1563		/* Set to VLAN promisc mode as setting VLAN filter failed */
1564		if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1565		    addl_status(status) ==
1566				MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1567			return be_set_vlan_promisc(adapter);
1568	}
1569	return status;
1570}
1571
1572static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1573{
1574	struct be_adapter *adapter = netdev_priv(netdev);
1575	int status = 0;
1576
1577	mutex_lock(&adapter->rx_filter_lock);
1578
1579	/* Packets with VID 0 are always received by Lancer by default */
1580	if (lancer_chip(adapter) && vid == 0)
1581		goto done;
1582
1583	if (test_bit(vid, adapter->vids))
1584		goto done;
1585
1586	set_bit(vid, adapter->vids);
1587	adapter->vlans_added++;
1588
1589	status = be_vid_config(adapter);
1590done:
1591	mutex_unlock(&adapter->rx_filter_lock);
1592	return status;
1593}
1594
1595static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1596{
1597	struct be_adapter *adapter = netdev_priv(netdev);
1598	int status = 0;
1599
1600	mutex_lock(&adapter->rx_filter_lock);
1601
1602	/* Packets with VID 0 are always received by Lancer by default */
1603	if (lancer_chip(adapter) && vid == 0)
1604		goto done;
1605
1606	if (!test_bit(vid, adapter->vids))
1607		goto done;
1608
1609	clear_bit(vid, adapter->vids);
1610	adapter->vlans_added--;
1611
1612	status = be_vid_config(adapter);
1613done:
1614	mutex_unlock(&adapter->rx_filter_lock);
1615	return status;
1616}
1617
1618static void be_set_all_promisc(struct be_adapter *adapter)
1619{
1620	be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1621	adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1622}
1623
1624static void be_set_mc_promisc(struct be_adapter *adapter)
1625{
1626	int status;
1627
1628	if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1629		return;
1630
1631	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1632	if (!status)
1633		adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1634}
1635
1636static void be_set_uc_promisc(struct be_adapter *adapter)
1637{
1638	int status;
1639
1640	if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1641		return;
1642
1643	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1644	if (!status)
1645		adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1646}
1647
1648static void be_clear_uc_promisc(struct be_adapter *adapter)
1649{
1650	int status;
1651
1652	if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1653		return;
1654
1655	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1656	if (!status)
1657		adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1658}
1659
1660/* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1661 * We use a single callback function for both sync and unsync. We really don't
1662 * add/remove addresses through this callback. But, we use it to detect changes
1663 * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1664 */
1665static int be_uc_list_update(struct net_device *netdev,
1666			     const unsigned char *addr)
1667{
1668	struct be_adapter *adapter = netdev_priv(netdev);
1669
1670	adapter->update_uc_list = true;
1671	return 0;
1672}
1673
1674static int be_mc_list_update(struct net_device *netdev,
1675			     const unsigned char *addr)
1676{
1677	struct be_adapter *adapter = netdev_priv(netdev);
1678
1679	adapter->update_mc_list = true;
1680	return 0;
1681}
1682
1683static void be_set_mc_list(struct be_adapter *adapter)
1684{
1685	struct net_device *netdev = adapter->netdev;
1686	struct netdev_hw_addr *ha;
1687	bool mc_promisc = false;
1688	int status;
1689
1690	netif_addr_lock_bh(netdev);
1691	__dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1692
1693	if (netdev->flags & IFF_PROMISC) {
1694		adapter->update_mc_list = false;
1695	} else if (netdev->flags & IFF_ALLMULTI ||
1696		   netdev_mc_count(netdev) > be_max_mc(adapter)) {
1697		/* Enable multicast promisc if num configured exceeds
1698		 * what we support
1699		 */
1700		mc_promisc = true;
1701		adapter->update_mc_list = false;
1702	} else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1703		/* Update mc-list unconditionally if the iface was previously
1704		 * in mc-promisc mode and now is out of that mode.
1705		 */
1706		adapter->update_mc_list = true;
1707	}
1708
1709	if (adapter->update_mc_list) {
1710		int i = 0;
1711
1712		/* cache the mc-list in adapter */
1713		netdev_for_each_mc_addr(ha, netdev) {
1714			ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1715			i++;
1716		}
1717		adapter->mc_count = netdev_mc_count(netdev);
1718	}
1719	netif_addr_unlock_bh(netdev);
1720
1721	if (mc_promisc) {
1722		be_set_mc_promisc(adapter);
1723	} else if (adapter->update_mc_list) {
1724		status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1725		if (!status)
1726			adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1727		else
1728			be_set_mc_promisc(adapter);
1729
1730		adapter->update_mc_list = false;
1731	}
1732}
1733
1734static void be_clear_mc_list(struct be_adapter *adapter)
1735{
1736	struct net_device *netdev = adapter->netdev;
1737
1738	__dev_mc_unsync(netdev, NULL);
1739	be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1740	adapter->mc_count = 0;
1741}
1742
1743static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1744{
1745	if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1746		adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1747		return 0;
1748	}
1749
1750	return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1751			       adapter->if_handle,
1752			       &adapter->pmac_id[uc_idx + 1], 0);
1753}
1754
1755static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1756{
1757	if (pmac_id == adapter->pmac_id[0])
1758		return;
1759
1760	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1761}
1762
1763static void be_set_uc_list(struct be_adapter *adapter)
1764{
1765	struct net_device *netdev = adapter->netdev;
1766	struct netdev_hw_addr *ha;
1767	bool uc_promisc = false;
1768	int curr_uc_macs = 0, i;
1769
1770	netif_addr_lock_bh(netdev);
1771	__dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1772
1773	if (netdev->flags & IFF_PROMISC) {
1774		adapter->update_uc_list = false;
1775	} else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1776		uc_promisc = true;
1777		adapter->update_uc_list = false;
1778	}  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1779		/* Update uc-list unconditionally if the iface was previously
1780		 * in uc-promisc mode and now is out of that mode.
1781		 */
1782		adapter->update_uc_list = true;
1783	}
1784
1785	if (adapter->update_uc_list) {
1786		/* cache the uc-list in adapter array */
1787		i = 0;
1788		netdev_for_each_uc_addr(ha, netdev) {
1789			ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1790			i++;
1791		}
1792		curr_uc_macs = netdev_uc_count(netdev);
1793	}
1794	netif_addr_unlock_bh(netdev);
1795
1796	if (uc_promisc) {
1797		be_set_uc_promisc(adapter);
1798	} else if (adapter->update_uc_list) {
1799		be_clear_uc_promisc(adapter);
1800
1801		for (i = 0; i < adapter->uc_macs; i++)
1802			be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1803
1804		for (i = 0; i < curr_uc_macs; i++)
1805			be_uc_mac_add(adapter, i);
1806		adapter->uc_macs = curr_uc_macs;
1807		adapter->update_uc_list = false;
1808	}
1809}
1810
1811static void be_clear_uc_list(struct be_adapter *adapter)
1812{
1813	struct net_device *netdev = adapter->netdev;
1814	int i;
1815
1816	__dev_uc_unsync(netdev, NULL);
1817	for (i = 0; i < adapter->uc_macs; i++)
1818		be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1819
1820	adapter->uc_macs = 0;
1821}
1822
1823static void __be_set_rx_mode(struct be_adapter *adapter)
1824{
1825	struct net_device *netdev = adapter->netdev;
1826
1827	mutex_lock(&adapter->rx_filter_lock);
1828
1829	if (netdev->flags & IFF_PROMISC) {
1830		if (!be_in_all_promisc(adapter))
1831			be_set_all_promisc(adapter);
1832	} else if (be_in_all_promisc(adapter)) {
1833		/* We need to re-program the vlan-list or clear
1834		 * vlan-promisc mode (if needed) when the interface
1835		 * comes out of promisc mode.
1836		 */
1837		be_vid_config(adapter);
1838	}
1839
1840	be_set_uc_list(adapter);
1841	be_set_mc_list(adapter);
1842
1843	mutex_unlock(&adapter->rx_filter_lock);
1844}
1845
1846static void be_work_set_rx_mode(struct work_struct *work)
1847{
1848	struct be_cmd_work *cmd_work =
1849				container_of(work, struct be_cmd_work, work);
1850
1851	__be_set_rx_mode(cmd_work->adapter);
1852	kfree(cmd_work);
1853}
1854
1855static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1856{
1857	struct be_adapter *adapter = netdev_priv(netdev);
1858	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1859	int status;
1860
1861	if (!sriov_enabled(adapter))
1862		return -EPERM;
1863
1864	if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1865		return -EINVAL;
1866
1867	/* Proceed further only if user provided MAC is different
1868	 * from active MAC
1869	 */
1870	if (ether_addr_equal(mac, vf_cfg->mac_addr))
1871		return 0;
1872
1873	if (BEx_chip(adapter)) {
1874		be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1875				vf + 1);
1876
1877		status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1878					 &vf_cfg->pmac_id, vf + 1);
1879	} else {
1880		status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1881					vf + 1);
1882	}
1883
1884	if (status) {
1885		dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1886			mac, vf, status);
1887		return be_cmd_status(status);
1888	}
1889
1890	ether_addr_copy(vf_cfg->mac_addr, mac);
1891
1892	return 0;
1893}
1894
1895static int be_get_vf_config(struct net_device *netdev, int vf,
1896			    struct ifla_vf_info *vi)
1897{
1898	struct be_adapter *adapter = netdev_priv(netdev);
1899	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1900
1901	if (!sriov_enabled(adapter))
1902		return -EPERM;
1903
1904	if (vf >= adapter->num_vfs)
1905		return -EINVAL;
1906
1907	vi->vf = vf;
1908	vi->max_tx_rate = vf_cfg->tx_rate;
1909	vi->min_tx_rate = 0;
1910	vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1911	vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1912	memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1913	vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1914	vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1915
1916	return 0;
1917}
1918
1919static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1920{
1921	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1922	u16 vids[BE_NUM_VLANS_SUPPORTED];
1923	int vf_if_id = vf_cfg->if_handle;
1924	int status;
1925
1926	/* Enable Transparent VLAN Tagging */
1927	status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1928	if (status)
1929		return status;
1930
1931	/* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1932	vids[0] = 0;
1933	status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1934	if (!status)
1935		dev_info(&adapter->pdev->dev,
1936			 "Cleared guest VLANs on VF%d", vf);
1937
1938	/* After TVT is enabled, disallow VFs to program VLAN filters */
1939	if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1940		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1941						  ~BE_PRIV_FILTMGMT, vf + 1);
1942		if (!status)
1943			vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1944	}
1945	return 0;
1946}
1947
1948static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1949{
1950	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1951	struct device *dev = &adapter->pdev->dev;
1952	int status;
1953
1954	/* Reset Transparent VLAN Tagging. */
1955	status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1956				       vf_cfg->if_handle, 0, 0);
1957	if (status)
1958		return status;
1959
1960	/* Allow VFs to program VLAN filtering */
1961	if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1962		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1963						  BE_PRIV_FILTMGMT, vf + 1);
1964		if (!status) {
1965			vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1966			dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1967		}
1968	}
1969
1970	dev_info(dev,
1971		 "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1972	return 0;
1973}
1974
1975static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1976			  __be16 vlan_proto)
1977{
1978	struct be_adapter *adapter = netdev_priv(netdev);
1979	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1980	int status;
1981
1982	if (!sriov_enabled(adapter))
1983		return -EPERM;
1984
1985	if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1986		return -EINVAL;
1987
1988	if (vlan_proto != htons(ETH_P_8021Q))
1989		return -EPROTONOSUPPORT;
1990
1991	if (vlan || qos) {
1992		vlan |= qos << VLAN_PRIO_SHIFT;
1993		status = be_set_vf_tvt(adapter, vf, vlan);
1994	} else {
1995		status = be_clear_vf_tvt(adapter, vf);
1996	}
1997
1998	if (status) {
1999		dev_err(&adapter->pdev->dev,
2000			"VLAN %d config on VF %d failed : %#x\n", vlan, vf,
2001			status);
2002		return be_cmd_status(status);
2003	}
2004
2005	vf_cfg->vlan_tag = vlan;
2006	return 0;
2007}
2008
2009static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
2010			     int min_tx_rate, int max_tx_rate)
2011{
2012	struct be_adapter *adapter = netdev_priv(netdev);
2013	struct device *dev = &adapter->pdev->dev;
2014	int percent_rate, status = 0;
2015	u16 link_speed = 0;
2016	u8 link_status;
2017
2018	if (!sriov_enabled(adapter))
2019		return -EPERM;
2020
2021	if (vf >= adapter->num_vfs)
2022		return -EINVAL;
2023
2024	if (min_tx_rate)
2025		return -EINVAL;
2026
2027	if (!max_tx_rate)
2028		goto config_qos;
2029
2030	status = be_cmd_link_status_query(adapter, &link_speed,
2031					  &link_status, 0);
2032	if (status)
2033		goto err;
2034
2035	if (!link_status) {
2036		dev_err(dev, "TX-rate setting not allowed when link is down\n");
2037		status = -ENETDOWN;
2038		goto err;
2039	}
2040
2041	if (max_tx_rate < 100 || max_tx_rate > link_speed) {
2042		dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
2043			link_speed);
2044		status = -EINVAL;
2045		goto err;
2046	}
2047
2048	/* On Skyhawk the QOS setting must be done only as a % value */
2049	percent_rate = link_speed / 100;
2050	if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
2051		dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
2052			percent_rate);
2053		status = -EINVAL;
2054		goto err;
2055	}
2056
2057config_qos:
2058	status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
2059	if (status)
2060		goto err;
2061
2062	adapter->vf_cfg[vf].tx_rate = max_tx_rate;
2063	return 0;
2064
2065err:
2066	dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
2067		max_tx_rate, vf);
2068	return be_cmd_status(status);
2069}
2070
2071static int be_set_vf_link_state(struct net_device *netdev, int vf,
2072				int link_state)
2073{
2074	struct be_adapter *adapter = netdev_priv(netdev);
2075	int status;
2076
2077	if (!sriov_enabled(adapter))
2078		return -EPERM;
2079
2080	if (vf >= adapter->num_vfs)
2081		return -EINVAL;
2082
2083	status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2084	if (status) {
2085		dev_err(&adapter->pdev->dev,
2086			"Link state change on VF %d failed: %#x\n", vf, status);
2087		return be_cmd_status(status);
2088	}
2089
2090	adapter->vf_cfg[vf].plink_tracking = link_state;
2091
2092	return 0;
2093}
2094
2095static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2096{
2097	struct be_adapter *adapter = netdev_priv(netdev);
2098	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2099	u8 spoofchk;
2100	int status;
2101
2102	if (!sriov_enabled(adapter))
2103		return -EPERM;
2104
2105	if (vf >= adapter->num_vfs)
2106		return -EINVAL;
2107
2108	if (BEx_chip(adapter))
2109		return -EOPNOTSUPP;
2110
2111	if (enable == vf_cfg->spoofchk)
2112		return 0;
2113
2114	spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2115
2116	status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2117				       0, spoofchk);
2118	if (status) {
2119		dev_err(&adapter->pdev->dev,
2120			"Spoofchk change on VF %d failed: %#x\n", vf, status);
2121		return be_cmd_status(status);
2122	}
2123
2124	vf_cfg->spoofchk = enable;
2125	return 0;
2126}
2127
2128static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2129			  ulong now)
2130{
2131	aic->rx_pkts_prev = rx_pkts;
2132	aic->tx_reqs_prev = tx_pkts;
2133	aic->jiffies = now;
2134}
2135
2136static int be_get_new_eqd(struct be_eq_obj *eqo)
2137{
2138	struct be_adapter *adapter = eqo->adapter;
2139	int eqd, start;
2140	struct be_aic_obj *aic;
2141	struct be_rx_obj *rxo;
2142	struct be_tx_obj *txo;
2143	u64 rx_pkts = 0, tx_pkts = 0;
2144	ulong now;
2145	u32 pps, delta;
2146	int i;
2147
2148	aic = &adapter->aic_obj[eqo->idx];
2149	if (!adapter->aic_enabled) {
2150		if (aic->jiffies)
2151			aic->jiffies = 0;
2152		eqd = aic->et_eqd;
2153		return eqd;
2154	}
2155
2156	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2157		do {
2158			start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2159			rx_pkts += rxo->stats.rx_pkts;
2160		} while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2161	}
2162
2163	for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2164		do {
2165			start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2166			tx_pkts += txo->stats.tx_reqs;
2167		} while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2168	}
2169
2170	/* Skip, if wrapped around or first calculation */
2171	now = jiffies;
2172	if (!aic->jiffies || time_before(now, aic->jiffies) ||
2173	    rx_pkts < aic->rx_pkts_prev ||
2174	    tx_pkts < aic->tx_reqs_prev) {
2175		be_aic_update(aic, rx_pkts, tx_pkts, now);
2176		return aic->prev_eqd;
2177	}
2178
2179	delta = jiffies_to_msecs(now - aic->jiffies);
2180	if (delta == 0)
2181		return aic->prev_eqd;
2182
2183	pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2184		(((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2185	eqd = (pps / 15000) << 2;
2186
2187	if (eqd < 8)
2188		eqd = 0;
2189	eqd = min_t(u32, eqd, aic->max_eqd);
2190	eqd = max_t(u32, eqd, aic->min_eqd);
2191
2192	be_aic_update(aic, rx_pkts, tx_pkts, now);
2193
2194	return eqd;
2195}
2196
2197/* For Skyhawk-R only */
2198static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2199{
2200	struct be_adapter *adapter = eqo->adapter;
2201	struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2202	ulong now = jiffies;
2203	int eqd;
2204	u32 mult_enc;
2205
2206	if (!adapter->aic_enabled)
2207		return 0;
2208
2209	if (jiffies_to_msecs(now - aic->jiffies) < 1)
2210		eqd = aic->prev_eqd;
2211	else
2212		eqd = be_get_new_eqd(eqo);
2213
2214	if (eqd > 100)
2215		mult_enc = R2I_DLY_ENC_1;
2216	else if (eqd > 60)
2217		mult_enc = R2I_DLY_ENC_2;
2218	else if (eqd > 20)
2219		mult_enc = R2I_DLY_ENC_3;
2220	else
2221		mult_enc = R2I_DLY_ENC_0;
2222
2223	aic->prev_eqd = eqd;
2224
2225	return mult_enc;
2226}
2227
2228void be_eqd_update(struct be_adapter *adapter, bool force_update)
2229{
2230	struct be_set_eqd set_eqd[MAX_EVT_QS];
2231	struct be_aic_obj *aic;
2232	struct be_eq_obj *eqo;
2233	int i, num = 0, eqd;
2234
2235	for_all_evt_queues(adapter, eqo, i) {
2236		aic = &adapter->aic_obj[eqo->idx];
2237		eqd = be_get_new_eqd(eqo);
2238		if (force_update || eqd != aic->prev_eqd) {
2239			set_eqd[num].delay_multiplier = (eqd * 65)/100;
2240			set_eqd[num].eq_id = eqo->q.id;
2241			aic->prev_eqd = eqd;
2242			num++;
2243		}
2244	}
2245
2246	if (num)
2247		be_cmd_modify_eqd(adapter, set_eqd, num);
2248}
2249
2250static void be_rx_stats_update(struct be_rx_obj *rxo,
2251			       struct be_rx_compl_info *rxcp)
2252{
2253	struct be_rx_stats *stats = rx_stats(rxo);
2254
2255	u64_stats_update_begin(&stats->sync);
2256	stats->rx_compl++;
2257	stats->rx_bytes += rxcp->pkt_size;
2258	stats->rx_pkts++;
2259	if (rxcp->tunneled)
2260		stats->rx_vxlan_offload_pkts++;
2261	if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2262		stats->rx_mcast_pkts++;
2263	if (rxcp->err)
2264		stats->rx_compl_err++;
2265	u64_stats_update_end(&stats->sync);
2266}
2267
2268static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2269{
2270	/* L4 checksum is not reliable for non TCP/UDP packets.
2271	 * Also ignore ipcksm for ipv6 pkts
2272	 */
2273	return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2274		(rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2275}
2276
2277static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2278{
2279	struct be_adapter *adapter = rxo->adapter;
2280	struct be_rx_page_info *rx_page_info;
2281	struct be_queue_info *rxq = &rxo->q;
2282	u32 frag_idx = rxq->tail;
2283
2284	rx_page_info = &rxo->page_info_tbl[frag_idx];
2285	BUG_ON(!rx_page_info->page);
2286
2287	if (rx_page_info->last_frag) {
2288		dma_unmap_page(&adapter->pdev->dev,
2289			       dma_unmap_addr(rx_page_info, bus),
2290			       adapter->big_page_size, DMA_FROM_DEVICE);
2291		rx_page_info->last_frag = false;
2292	} else {
2293		dma_sync_single_for_cpu(&adapter->pdev->dev,
2294					dma_unmap_addr(rx_page_info, bus),
2295					rx_frag_size, DMA_FROM_DEVICE);
2296	}
2297
2298	queue_tail_inc(rxq);
2299	atomic_dec(&rxq->used);
2300	return rx_page_info;
2301}
2302
2303/* Throwaway the data in the Rx completion */
2304static void be_rx_compl_discard(struct be_rx_obj *rxo,
2305				struct be_rx_compl_info *rxcp)
2306{
2307	struct be_rx_page_info *page_info;
2308	u16 i, num_rcvd = rxcp->num_rcvd;
2309
2310	for (i = 0; i < num_rcvd; i++) {
2311		page_info = get_rx_page_info(rxo);
2312		put_page(page_info->page);
2313		memset(page_info, 0, sizeof(*page_info));
2314	}
2315}
2316
2317/*
2318 * skb_fill_rx_data forms a complete skb for an ether frame
2319 * indicated by rxcp.
2320 */
2321static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2322			     struct be_rx_compl_info *rxcp)
2323{
2324	struct be_rx_page_info *page_info;
2325	u16 i, j;
2326	u16 hdr_len, curr_frag_len, remaining;
2327	u8 *start;
2328
2329	page_info = get_rx_page_info(rxo);
2330	start = page_address(page_info->page) + page_info->page_offset;
2331	prefetch(start);
2332
2333	/* Copy data in the first descriptor of this completion */
2334	curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2335
2336	skb->len = curr_frag_len;
2337	if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2338		memcpy(skb->data, start, curr_frag_len);
2339		/* Complete packet has now been moved to data */
2340		put_page(page_info->page);
2341		skb->data_len = 0;
2342		skb->tail += curr_frag_len;
2343	} else {
2344		hdr_len = ETH_HLEN;
2345		memcpy(skb->data, start, hdr_len);
2346		skb_shinfo(skb)->nr_frags = 1;
2347		skb_frag_set_page(skb, 0, page_info->page);
2348		skb_frag_off_set(&skb_shinfo(skb)->frags[0],
2349				 page_info->page_offset + hdr_len);
2350		skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2351				  curr_frag_len - hdr_len);
2352		skb->data_len = curr_frag_len - hdr_len;
2353		skb->truesize += rx_frag_size;
2354		skb->tail += hdr_len;
2355	}
2356	page_info->page = NULL;
2357
2358	if (rxcp->pkt_size <= rx_frag_size) {
2359		BUG_ON(rxcp->num_rcvd != 1);
2360		return;
2361	}
2362
2363	/* More frags present for this completion */
2364	remaining = rxcp->pkt_size - curr_frag_len;
2365	for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2366		page_info = get_rx_page_info(rxo);
2367		curr_frag_len = min(remaining, rx_frag_size);
2368
2369		/* Coalesce all frags from the same physical page in one slot */
2370		if (page_info->page_offset == 0) {
2371			/* Fresh page */
2372			j++;
2373			skb_frag_set_page(skb, j, page_info->page);
2374			skb_frag_off_set(&skb_shinfo(skb)->frags[j],
2375					 page_info->page_offset);
2376			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2377			skb_shinfo(skb)->nr_frags++;
2378		} else {
2379			put_page(page_info->page);
 
 
2380		}
2381
2382		skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2383		skb->len += curr_frag_len;
2384		skb->data_len += curr_frag_len;
2385		skb->truesize += rx_frag_size;
2386		remaining -= curr_frag_len;
2387		page_info->page = NULL;
2388	}
2389	BUG_ON(j > MAX_SKB_FRAGS);
2390}
2391
2392/* Process the RX completion indicated by rxcp when GRO is disabled */
2393static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2394				struct be_rx_compl_info *rxcp)
2395{
2396	struct be_adapter *adapter = rxo->adapter;
2397	struct net_device *netdev = adapter->netdev;
2398	struct sk_buff *skb;
2399
2400	skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2401	if (unlikely(!skb)) {
2402		rx_stats(rxo)->rx_drops_no_skbs++;
2403		be_rx_compl_discard(rxo, rxcp);
2404		return;
2405	}
2406
2407	skb_fill_rx_data(rxo, skb, rxcp);
2408
2409	if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2410		skb->ip_summed = CHECKSUM_UNNECESSARY;
2411	else
2412		skb_checksum_none_assert(skb);
2413
2414	skb->protocol = eth_type_trans(skb, netdev);
2415	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2416	if (netdev->features & NETIF_F_RXHASH)
2417		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2418
2419	skb->csum_level = rxcp->tunneled;
2420	skb_mark_napi_id(skb, napi);
2421
2422	if (rxcp->vlanf)
2423		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2424
2425	netif_receive_skb(skb);
2426}
2427
2428/* Process the RX completion indicated by rxcp when GRO is enabled */
2429static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2430				    struct napi_struct *napi,
2431				    struct be_rx_compl_info *rxcp)
2432{
2433	struct be_adapter *adapter = rxo->adapter;
2434	struct be_rx_page_info *page_info;
2435	struct sk_buff *skb = NULL;
2436	u16 remaining, curr_frag_len;
2437	u16 i, j;
2438
2439	skb = napi_get_frags(napi);
2440	if (!skb) {
2441		be_rx_compl_discard(rxo, rxcp);
2442		return;
2443	}
2444
2445	remaining = rxcp->pkt_size;
2446	for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2447		page_info = get_rx_page_info(rxo);
2448
2449		curr_frag_len = min(remaining, rx_frag_size);
2450
2451		/* Coalesce all frags from the same physical page in one slot */
2452		if (i == 0 || page_info->page_offset == 0) {
2453			/* First frag or Fresh page */
2454			j++;
2455			skb_frag_set_page(skb, j, page_info->page);
2456			skb_frag_off_set(&skb_shinfo(skb)->frags[j],
2457					 page_info->page_offset);
2458			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2459		} else {
2460			put_page(page_info->page);
 
 
2461		}
2462		skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2463		skb->truesize += rx_frag_size;
2464		remaining -= curr_frag_len;
2465		memset(page_info, 0, sizeof(*page_info));
2466	}
2467	BUG_ON(j > MAX_SKB_FRAGS);
2468
2469	skb_shinfo(skb)->nr_frags = j + 1;
2470	skb->len = rxcp->pkt_size;
2471	skb->data_len = rxcp->pkt_size;
2472	skb->ip_summed = CHECKSUM_UNNECESSARY;
2473	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2474	if (adapter->netdev->features & NETIF_F_RXHASH)
2475		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2476
2477	skb->csum_level = rxcp->tunneled;
2478
2479	if (rxcp->vlanf)
2480		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2481
2482	napi_gro_frags(napi);
2483}
2484
2485static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2486				 struct be_rx_compl_info *rxcp)
2487{
2488	rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2489	rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2490	rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2491	rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2492	rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2493	rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2494	rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2495	rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2496	rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2497	rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2498	rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2499	if (rxcp->vlanf) {
2500		rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2501		rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2502	}
2503	rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2504	rxcp->tunneled =
2505		GET_RX_COMPL_V1_BITS(tunneled, compl);
2506}
2507
2508static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2509				 struct be_rx_compl_info *rxcp)
2510{
2511	rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2512	rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2513	rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2514	rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2515	rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2516	rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2517	rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2518	rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2519	rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2520	rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2521	rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2522	if (rxcp->vlanf) {
2523		rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2524		rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2525	}
2526	rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2527	rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2528}
2529
2530static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2531{
2532	struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2533	struct be_rx_compl_info *rxcp = &rxo->rxcp;
2534	struct be_adapter *adapter = rxo->adapter;
2535
2536	/* For checking the valid bit it is Ok to use either definition as the
2537	 * valid bit is at the same position in both v0 and v1 Rx compl */
2538	if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2539		return NULL;
2540
2541	rmb();
2542	be_dws_le_to_cpu(compl, sizeof(*compl));
2543
2544	if (adapter->be3_native)
2545		be_parse_rx_compl_v1(compl, rxcp);
2546	else
2547		be_parse_rx_compl_v0(compl, rxcp);
2548
2549	if (rxcp->ip_frag)
2550		rxcp->l4_csum = 0;
2551
2552	if (rxcp->vlanf) {
2553		/* In QNQ modes, if qnq bit is not set, then the packet was
2554		 * tagged only with the transparent outer vlan-tag and must
2555		 * not be treated as a vlan packet by host
2556		 */
2557		if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2558			rxcp->vlanf = 0;
2559
2560		if (!lancer_chip(adapter))
2561			rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2562
2563		if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2564		    !test_bit(rxcp->vlan_tag, adapter->vids))
2565			rxcp->vlanf = 0;
2566	}
2567
2568	/* As the compl has been parsed, reset it; we wont touch it again */
2569	compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2570
2571	queue_tail_inc(&rxo->cq);
2572	return rxcp;
2573}
2574
2575static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2576{
2577	u32 order = get_order(size);
2578
2579	if (order > 0)
2580		gfp |= __GFP_COMP;
2581	return  alloc_pages(gfp, order);
2582}
2583
2584/*
2585 * Allocate a page, split it to fragments of size rx_frag_size and post as
2586 * receive buffers to BE
2587 */
2588static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2589{
2590	struct be_adapter *adapter = rxo->adapter;
2591	struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2592	struct be_queue_info *rxq = &rxo->q;
2593	struct page *pagep = NULL;
2594	struct device *dev = &adapter->pdev->dev;
2595	struct be_eth_rx_d *rxd;
2596	u64 page_dmaaddr = 0, frag_dmaaddr;
2597	u32 posted, page_offset = 0, notify = 0;
2598
2599	page_info = &rxo->page_info_tbl[rxq->head];
2600	for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2601		if (!pagep) {
2602			pagep = be_alloc_pages(adapter->big_page_size, gfp);
2603			if (unlikely(!pagep)) {
2604				rx_stats(rxo)->rx_post_fail++;
2605				break;
2606			}
2607			page_dmaaddr = dma_map_page(dev, pagep, 0,
2608						    adapter->big_page_size,
2609						    DMA_FROM_DEVICE);
2610			if (dma_mapping_error(dev, page_dmaaddr)) {
2611				put_page(pagep);
2612				pagep = NULL;
2613				adapter->drv_stats.dma_map_errors++;
2614				break;
2615			}
2616			page_offset = 0;
2617		} else {
2618			get_page(pagep);
2619			page_offset += rx_frag_size;
2620		}
2621		page_info->page_offset = page_offset;
2622		page_info->page = pagep;
2623
2624		rxd = queue_head_node(rxq);
2625		frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2626		rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2627		rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2628
2629		/* Any space left in the current big page for another frag? */
2630		if ((page_offset + rx_frag_size + rx_frag_size) >
2631					adapter->big_page_size) {
2632			pagep = NULL;
2633			page_info->last_frag = true;
2634			dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2635		} else {
2636			dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2637		}
2638
2639		prev_page_info = page_info;
2640		queue_head_inc(rxq);
2641		page_info = &rxo->page_info_tbl[rxq->head];
2642	}
2643
2644	/* Mark the last frag of a page when we break out of the above loop
2645	 * with no more slots available in the RXQ
2646	 */
2647	if (pagep) {
2648		prev_page_info->last_frag = true;
2649		dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2650	}
2651
2652	if (posted) {
2653		atomic_add(posted, &rxq->used);
2654		if (rxo->rx_post_starved)
2655			rxo->rx_post_starved = false;
2656		do {
2657			notify = min(MAX_NUM_POST_ERX_DB, posted);
2658			be_rxq_notify(adapter, rxq->id, notify);
2659			posted -= notify;
2660		} while (posted);
2661	} else if (atomic_read(&rxq->used) == 0) {
2662		/* Let be_worker replenish when memory is available */
2663		rxo->rx_post_starved = true;
2664	}
2665}
2666
2667static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
2668{
2669	switch (status) {
2670	case BE_TX_COMP_HDR_PARSE_ERR:
2671		tx_stats(txo)->tx_hdr_parse_err++;
2672		break;
2673	case BE_TX_COMP_NDMA_ERR:
2674		tx_stats(txo)->tx_dma_err++;
2675		break;
2676	case BE_TX_COMP_ACL_ERR:
2677		tx_stats(txo)->tx_spoof_check_err++;
2678		break;
2679	}
2680}
2681
2682static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
2683{
2684	switch (status) {
2685	case LANCER_TX_COMP_LSO_ERR:
2686		tx_stats(txo)->tx_tso_err++;
2687		break;
2688	case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
2689	case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
2690		tx_stats(txo)->tx_spoof_check_err++;
2691		break;
2692	case LANCER_TX_COMP_QINQ_ERR:
2693		tx_stats(txo)->tx_qinq_err++;
2694		break;
2695	case LANCER_TX_COMP_PARITY_ERR:
2696		tx_stats(txo)->tx_internal_parity_err++;
2697		break;
2698	case LANCER_TX_COMP_DMA_ERR:
2699		tx_stats(txo)->tx_dma_err++;
2700		break;
2701	case LANCER_TX_COMP_SGE_ERR:
2702		tx_stats(txo)->tx_sge_err++;
2703		break;
2704	}
2705}
2706
2707static struct be_tx_compl_info *be_tx_compl_get(struct be_adapter *adapter,
2708						struct be_tx_obj *txo)
2709{
2710	struct be_queue_info *tx_cq = &txo->cq;
2711	struct be_tx_compl_info *txcp = &txo->txcp;
2712	struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2713
2714	if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2715		return NULL;
2716
2717	/* Ensure load ordering of valid bit dword and other dwords below */
2718	rmb();
2719	be_dws_le_to_cpu(compl, sizeof(*compl));
2720
2721	txcp->status = GET_TX_COMPL_BITS(status, compl);
2722	txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2723
2724	if (txcp->status) {
2725		if (lancer_chip(adapter)) {
2726			lancer_update_tx_err(txo, txcp->status);
2727			/* Reset the adapter incase of TSO,
2728			 * SGE or Parity error
2729			 */
2730			if (txcp->status == LANCER_TX_COMP_LSO_ERR ||
2731			    txcp->status == LANCER_TX_COMP_PARITY_ERR ||
2732			    txcp->status == LANCER_TX_COMP_SGE_ERR)
2733				be_set_error(adapter, BE_ERROR_TX);
2734		} else {
2735			be_update_tx_err(txo, txcp->status);
2736		}
2737	}
2738
2739	if (be_check_error(adapter, BE_ERROR_TX))
2740		return NULL;
2741
2742	compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2743	queue_tail_inc(tx_cq);
2744	return txcp;
2745}
2746
2747static u16 be_tx_compl_process(struct be_adapter *adapter,
2748			       struct be_tx_obj *txo, u16 last_index)
2749{
2750	struct sk_buff **sent_skbs = txo->sent_skb_list;
2751	struct be_queue_info *txq = &txo->q;
2752	struct sk_buff *skb = NULL;
2753	bool unmap_skb_hdr = false;
2754	struct be_eth_wrb *wrb;
2755	u16 num_wrbs = 0;
2756	u32 frag_index;
2757
2758	do {
2759		if (sent_skbs[txq->tail]) {
2760			/* Free skb from prev req */
2761			if (skb)
2762				dev_consume_skb_any(skb);
2763			skb = sent_skbs[txq->tail];
2764			sent_skbs[txq->tail] = NULL;
2765			queue_tail_inc(txq);  /* skip hdr wrb */
2766			num_wrbs++;
2767			unmap_skb_hdr = true;
2768		}
2769		wrb = queue_tail_node(txq);
2770		frag_index = txq->tail;
2771		unmap_tx_frag(&adapter->pdev->dev, wrb,
2772			      (unmap_skb_hdr && skb_headlen(skb)));
2773		unmap_skb_hdr = false;
2774		queue_tail_inc(txq);
2775		num_wrbs++;
2776	} while (frag_index != last_index);
2777	dev_consume_skb_any(skb);
2778
2779	return num_wrbs;
2780}
2781
2782/* Return the number of events in the event queue */
2783static inline int events_get(struct be_eq_obj *eqo)
2784{
2785	struct be_eq_entry *eqe;
2786	int num = 0;
2787
2788	do {
2789		eqe = queue_tail_node(&eqo->q);
2790		if (eqe->evt == 0)
2791			break;
2792
2793		rmb();
2794		eqe->evt = 0;
2795		num++;
2796		queue_tail_inc(&eqo->q);
2797	} while (true);
2798
2799	return num;
2800}
2801
2802/* Leaves the EQ is disarmed state */
2803static void be_eq_clean(struct be_eq_obj *eqo)
2804{
2805	int num = events_get(eqo);
2806
2807	be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2808}
2809
2810/* Free posted rx buffers that were not used */
2811static void be_rxq_clean(struct be_rx_obj *rxo)
2812{
2813	struct be_queue_info *rxq = &rxo->q;
2814	struct be_rx_page_info *page_info;
2815
2816	while (atomic_read(&rxq->used) > 0) {
2817		page_info = get_rx_page_info(rxo);
2818		put_page(page_info->page);
2819		memset(page_info, 0, sizeof(*page_info));
2820	}
2821	BUG_ON(atomic_read(&rxq->used));
2822	rxq->tail = 0;
2823	rxq->head = 0;
2824}
2825
2826static void be_rx_cq_clean(struct be_rx_obj *rxo)
2827{
2828	struct be_queue_info *rx_cq = &rxo->cq;
2829	struct be_rx_compl_info *rxcp;
2830	struct be_adapter *adapter = rxo->adapter;
2831	int flush_wait = 0;
2832
2833	/* Consume pending rx completions.
2834	 * Wait for the flush completion (identified by zero num_rcvd)
2835	 * to arrive. Notify CQ even when there are no more CQ entries
2836	 * for HW to flush partially coalesced CQ entries.
2837	 * In Lancer, there is no need to wait for flush compl.
2838	 */
2839	for (;;) {
2840		rxcp = be_rx_compl_get(rxo);
2841		if (!rxcp) {
2842			if (lancer_chip(adapter))
2843				break;
2844
2845			if (flush_wait++ > 50 ||
2846			    be_check_error(adapter,
2847					   BE_ERROR_HW)) {
2848				dev_warn(&adapter->pdev->dev,
2849					 "did not receive flush compl\n");
2850				break;
2851			}
2852			be_cq_notify(adapter, rx_cq->id, true, 0);
2853			mdelay(1);
2854		} else {
2855			be_rx_compl_discard(rxo, rxcp);
2856			be_cq_notify(adapter, rx_cq->id, false, 1);
2857			if (rxcp->num_rcvd == 0)
2858				break;
2859		}
2860	}
2861
2862	/* After cleanup, leave the CQ in unarmed state */
2863	be_cq_notify(adapter, rx_cq->id, false, 0);
2864}
2865
2866static void be_tx_compl_clean(struct be_adapter *adapter)
2867{
2868	struct device *dev = &adapter->pdev->dev;
2869	u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2870	struct be_tx_compl_info *txcp;
2871	struct be_queue_info *txq;
2872	u32 end_idx, notified_idx;
2873	struct be_tx_obj *txo;
2874	int i, pending_txqs;
2875
2876	/* Stop polling for compls when HW has been silent for 10ms */
2877	do {
2878		pending_txqs = adapter->num_tx_qs;
2879
2880		for_all_tx_queues(adapter, txo, i) {
2881			cmpl = 0;
2882			num_wrbs = 0;
2883			txq = &txo->q;
2884			while ((txcp = be_tx_compl_get(adapter, txo))) {
2885				num_wrbs +=
2886					be_tx_compl_process(adapter, txo,
2887							    txcp->end_index);
2888				cmpl++;
2889			}
2890			if (cmpl) {
2891				be_cq_notify(adapter, txo->cq.id, false, cmpl);
2892				atomic_sub(num_wrbs, &txq->used);
2893				timeo = 0;
2894			}
2895			if (!be_is_tx_compl_pending(txo))
2896				pending_txqs--;
2897		}
2898
2899		if (pending_txqs == 0 || ++timeo > 10 ||
2900		    be_check_error(adapter, BE_ERROR_HW))
2901			break;
2902
2903		mdelay(1);
2904	} while (true);
2905
2906	/* Free enqueued TX that was never notified to HW */
2907	for_all_tx_queues(adapter, txo, i) {
2908		txq = &txo->q;
2909
2910		if (atomic_read(&txq->used)) {
2911			dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2912				 i, atomic_read(&txq->used));
2913			notified_idx = txq->tail;
2914			end_idx = txq->tail;
2915			index_adv(&end_idx, atomic_read(&txq->used) - 1,
2916				  txq->len);
2917			/* Use the tx-compl process logic to handle requests
2918			 * that were not sent to the HW.
2919			 */
2920			num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2921			atomic_sub(num_wrbs, &txq->used);
2922			BUG_ON(atomic_read(&txq->used));
2923			txo->pend_wrb_cnt = 0;
2924			/* Since hw was never notified of these requests,
2925			 * reset TXQ indices
2926			 */
2927			txq->head = notified_idx;
2928			txq->tail = notified_idx;
2929		}
2930	}
2931}
2932
2933static void be_evt_queues_destroy(struct be_adapter *adapter)
2934{
2935	struct be_eq_obj *eqo;
2936	int i;
2937
2938	for_all_evt_queues(adapter, eqo, i) {
2939		if (eqo->q.created) {
2940			be_eq_clean(eqo);
2941			be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2942			netif_napi_del(&eqo->napi);
2943			free_cpumask_var(eqo->affinity_mask);
2944		}
2945		be_queue_free(adapter, &eqo->q);
2946	}
2947}
2948
2949static int be_evt_queues_create(struct be_adapter *adapter)
2950{
2951	struct be_queue_info *eq;
2952	struct be_eq_obj *eqo;
2953	struct be_aic_obj *aic;
2954	int i, rc;
2955
2956	/* need enough EQs to service both RX and TX queues */
2957	adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2958				    max(adapter->cfg_num_rx_irqs,
2959					adapter->cfg_num_tx_irqs));
2960
2961	adapter->aic_enabled = true;
2962
2963	for_all_evt_queues(adapter, eqo, i) {
2964		int numa_node = dev_to_node(&adapter->pdev->dev);
2965
2966		aic = &adapter->aic_obj[i];
2967		eqo->adapter = adapter;
2968		eqo->idx = i;
2969		aic->max_eqd = BE_MAX_EQD;
2970
2971		eq = &eqo->q;
2972		rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2973				    sizeof(struct be_eq_entry));
2974		if (rc)
2975			return rc;
2976
2977		rc = be_cmd_eq_create(adapter, eqo);
2978		if (rc)
2979			return rc;
2980
2981		if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2982			return -ENOMEM;
2983		cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2984				eqo->affinity_mask);
2985		netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2986			       BE_NAPI_WEIGHT);
2987	}
2988	return 0;
2989}
2990
2991static void be_mcc_queues_destroy(struct be_adapter *adapter)
2992{
2993	struct be_queue_info *q;
2994
2995	q = &adapter->mcc_obj.q;
2996	if (q->created)
2997		be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2998	be_queue_free(adapter, q);
2999
3000	q = &adapter->mcc_obj.cq;
3001	if (q->created)
3002		be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3003	be_queue_free(adapter, q);
3004}
3005
3006/* Must be called only after TX qs are created as MCC shares TX EQ */
3007static int be_mcc_queues_create(struct be_adapter *adapter)
3008{
3009	struct be_queue_info *q, *cq;
3010
3011	cq = &adapter->mcc_obj.cq;
3012	if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
3013			   sizeof(struct be_mcc_compl)))
3014		goto err;
3015
3016	/* Use the default EQ for MCC completions */
3017	if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
3018		goto mcc_cq_free;
3019
3020	q = &adapter->mcc_obj.q;
3021	if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
3022		goto mcc_cq_destroy;
3023
3024	if (be_cmd_mccq_create(adapter, q, cq))
3025		goto mcc_q_free;
3026
3027	return 0;
3028
3029mcc_q_free:
3030	be_queue_free(adapter, q);
3031mcc_cq_destroy:
3032	be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
3033mcc_cq_free:
3034	be_queue_free(adapter, cq);
3035err:
3036	return -1;
3037}
3038
3039static void be_tx_queues_destroy(struct be_adapter *adapter)
3040{
3041	struct be_queue_info *q;
3042	struct be_tx_obj *txo;
3043	u8 i;
3044
3045	for_all_tx_queues(adapter, txo, i) {
3046		q = &txo->q;
3047		if (q->created)
3048			be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
3049		be_queue_free(adapter, q);
3050
3051		q = &txo->cq;
3052		if (q->created)
3053			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3054		be_queue_free(adapter, q);
3055	}
3056}
3057
3058static int be_tx_qs_create(struct be_adapter *adapter)
3059{
3060	struct be_queue_info *cq;
3061	struct be_tx_obj *txo;
3062	struct be_eq_obj *eqo;
3063	int status, i;
3064
3065	adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
3066
3067	for_all_tx_queues(adapter, txo, i) {
3068		cq = &txo->cq;
3069		status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
3070					sizeof(struct be_eth_tx_compl));
3071		if (status)
3072			return status;
3073
3074		u64_stats_init(&txo->stats.sync);
3075		u64_stats_init(&txo->stats.sync_compl);
3076
3077		/* If num_evt_qs is less than num_tx_qs, then more than
3078		 * one txq share an eq
3079		 */
3080		eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
3081		status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
3082		if (status)
3083			return status;
3084
3085		status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
3086					sizeof(struct be_eth_wrb));
3087		if (status)
3088			return status;
3089
3090		status = be_cmd_txq_create(adapter, txo);
3091		if (status)
3092			return status;
3093
3094		netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
3095				    eqo->idx);
3096	}
3097
3098	dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
3099		 adapter->num_tx_qs);
3100	return 0;
3101}
3102
3103static void be_rx_cqs_destroy(struct be_adapter *adapter)
3104{
3105	struct be_queue_info *q;
3106	struct be_rx_obj *rxo;
3107	int i;
3108
3109	for_all_rx_queues(adapter, rxo, i) {
3110		q = &rxo->cq;
3111		if (q->created)
3112			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3113		be_queue_free(adapter, q);
3114	}
3115}
3116
3117static int be_rx_cqs_create(struct be_adapter *adapter)
3118{
3119	struct be_queue_info *eq, *cq;
3120	struct be_rx_obj *rxo;
3121	int rc, i;
3122
3123	adapter->num_rss_qs =
3124			min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
3125
3126	/* We'll use RSS only if atleast 2 RSS rings are supported. */
3127	if (adapter->num_rss_qs < 2)
3128		adapter->num_rss_qs = 0;
3129
3130	adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
3131
3132	/* When the interface is not capable of RSS rings (and there is no
3133	 * need to create a default RXQ) we'll still need one RXQ
3134	 */
3135	if (adapter->num_rx_qs == 0)
3136		adapter->num_rx_qs = 1;
3137
3138	adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
3139	for_all_rx_queues(adapter, rxo, i) {
3140		rxo->adapter = adapter;
3141		cq = &rxo->cq;
3142		rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3143				    sizeof(struct be_eth_rx_compl));
3144		if (rc)
3145			return rc;
3146
3147		u64_stats_init(&rxo->stats.sync);
3148		eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3149		rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3150		if (rc)
3151			return rc;
3152	}
3153
3154	dev_info(&adapter->pdev->dev,
3155		 "created %d RX queue(s)\n", adapter->num_rx_qs);
3156	return 0;
3157}
3158
3159static irqreturn_t be_intx(int irq, void *dev)
3160{
3161	struct be_eq_obj *eqo = dev;
3162	struct be_adapter *adapter = eqo->adapter;
3163	int num_evts = 0;
3164
3165	/* IRQ is not expected when NAPI is scheduled as the EQ
3166	 * will not be armed.
3167	 * But, this can happen on Lancer INTx where it takes
3168	 * a while to de-assert INTx or in BE2 where occasionaly
3169	 * an interrupt may be raised even when EQ is unarmed.
3170	 * If NAPI is already scheduled, then counting & notifying
3171	 * events will orphan them.
3172	 */
3173	if (napi_schedule_prep(&eqo->napi)) {
3174		num_evts = events_get(eqo);
3175		__napi_schedule(&eqo->napi);
3176		if (num_evts)
3177			eqo->spurious_intr = 0;
3178	}
3179	be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3180
3181	/* Return IRQ_HANDLED only for the the first spurious intr
3182	 * after a valid intr to stop the kernel from branding
3183	 * this irq as a bad one!
3184	 */
3185	if (num_evts || eqo->spurious_intr++ == 0)
3186		return IRQ_HANDLED;
3187	else
3188		return IRQ_NONE;
3189}
3190
3191static irqreturn_t be_msix(int irq, void *dev)
3192{
3193	struct be_eq_obj *eqo = dev;
3194
3195	be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3196	napi_schedule(&eqo->napi);
3197	return IRQ_HANDLED;
3198}
3199
3200static inline bool do_gro(struct be_rx_compl_info *rxcp)
3201{
3202	return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3203}
3204
3205static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3206			 int budget)
3207{
3208	struct be_adapter *adapter = rxo->adapter;
3209	struct be_queue_info *rx_cq = &rxo->cq;
3210	struct be_rx_compl_info *rxcp;
3211	u32 work_done;
3212	u32 frags_consumed = 0;
3213
3214	for (work_done = 0; work_done < budget; work_done++) {
3215		rxcp = be_rx_compl_get(rxo);
3216		if (!rxcp)
3217			break;
3218
3219		/* Is it a flush compl that has no data */
3220		if (unlikely(rxcp->num_rcvd == 0))
3221			goto loop_continue;
3222
3223		/* Discard compl with partial DMA Lancer B0 */
3224		if (unlikely(!rxcp->pkt_size)) {
3225			be_rx_compl_discard(rxo, rxcp);
3226			goto loop_continue;
3227		}
3228
3229		/* On BE drop pkts that arrive due to imperfect filtering in
3230		 * promiscuous mode on some skews
3231		 */
3232		if (unlikely(rxcp->port != adapter->port_num &&
3233			     !lancer_chip(adapter))) {
3234			be_rx_compl_discard(rxo, rxcp);
3235			goto loop_continue;
3236		}
3237
3238		if (do_gro(rxcp))
3239			be_rx_compl_process_gro(rxo, napi, rxcp);
3240		else
3241			be_rx_compl_process(rxo, napi, rxcp);
3242
3243loop_continue:
3244		frags_consumed += rxcp->num_rcvd;
3245		be_rx_stats_update(rxo, rxcp);
3246	}
3247
3248	if (work_done) {
3249		be_cq_notify(adapter, rx_cq->id, true, work_done);
3250
3251		/* When an rx-obj gets into post_starved state, just
3252		 * let be_worker do the posting.
3253		 */
3254		if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3255		    !rxo->rx_post_starved)
3256			be_post_rx_frags(rxo, GFP_ATOMIC,
3257					 max_t(u32, MAX_RX_POST,
3258					       frags_consumed));
3259	}
3260
3261	return work_done;
3262}
3263
3264
3265static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3266			  int idx)
3267{
3268	int num_wrbs = 0, work_done = 0;
3269	struct be_tx_compl_info *txcp;
3270
3271	while ((txcp = be_tx_compl_get(adapter, txo))) {
3272		num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3273		work_done++;
3274	}
3275
3276	if (work_done) {
3277		be_cq_notify(adapter, txo->cq.id, true, work_done);
3278		atomic_sub(num_wrbs, &txo->q.used);
3279
3280		/* As Tx wrbs have been freed up, wake up netdev queue
3281		 * if it was stopped due to lack of tx wrbs.  */
3282		if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3283		    be_can_txq_wake(txo)) {
3284			netif_wake_subqueue(adapter->netdev, idx);
3285		}
3286
3287		u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3288		tx_stats(txo)->tx_compl += work_done;
3289		u64_stats_update_end(&tx_stats(txo)->sync_compl);
3290	}
3291}
3292
3293int be_poll(struct napi_struct *napi, int budget)
3294{
3295	struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3296	struct be_adapter *adapter = eqo->adapter;
3297	int max_work = 0, work, i, num_evts;
3298	struct be_rx_obj *rxo;
3299	struct be_tx_obj *txo;
3300	u32 mult_enc = 0;
3301
3302	num_evts = events_get(eqo);
3303
3304	for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3305		be_process_tx(adapter, txo, i);
3306
3307	/* This loop will iterate twice for EQ0 in which
3308	 * completions of the last RXQ (default one) are also processed
3309	 * For other EQs the loop iterates only once
3310	 */
3311	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3312		work = be_process_rx(rxo, napi, budget);
3313		max_work = max(work, max_work);
3314	}
3315
3316	if (is_mcc_eqo(eqo))
3317		be_process_mcc(adapter);
3318
3319	if (max_work < budget) {
3320		napi_complete_done(napi, max_work);
3321
3322		/* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3323		 * delay via a delay multiplier encoding value
3324		 */
3325		if (skyhawk_chip(adapter))
3326			mult_enc = be_get_eq_delay_mult_enc(eqo);
3327
3328		be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3329			     mult_enc);
3330	} else {
3331		/* As we'll continue in polling mode, count and clear events */
3332		be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3333	}
3334	return max_work;
3335}
3336
3337void be_detect_error(struct be_adapter *adapter)
3338{
3339	u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3340	u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3341	struct device *dev = &adapter->pdev->dev;
3342	u16 val;
3343	u32 i;
3344
3345	if (be_check_error(adapter, BE_ERROR_HW))
3346		return;
3347
3348	if (lancer_chip(adapter)) {
3349		sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3350		if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3351			be_set_error(adapter, BE_ERROR_UE);
3352			sliport_err1 = ioread32(adapter->db +
3353						SLIPORT_ERROR1_OFFSET);
3354			sliport_err2 = ioread32(adapter->db +
3355						SLIPORT_ERROR2_OFFSET);
3356			/* Do not log error messages if its a FW reset */
3357			if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3358			    sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3359				dev_info(dev, "Reset is in progress\n");
3360			} else {
3361				dev_err(dev, "Error detected in the card\n");
3362				dev_err(dev, "ERR: sliport status 0x%x\n",
3363					sliport_status);
3364				dev_err(dev, "ERR: sliport error1 0x%x\n",
3365					sliport_err1);
3366				dev_err(dev, "ERR: sliport error2 0x%x\n",
3367					sliport_err2);
3368			}
3369		}
3370	} else {
3371		ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3372		ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3373		ue_lo_mask = ioread32(adapter->pcicfg +
3374				      PCICFG_UE_STATUS_LOW_MASK);
3375		ue_hi_mask = ioread32(adapter->pcicfg +
3376				      PCICFG_UE_STATUS_HI_MASK);
3377
3378		ue_lo = (ue_lo & ~ue_lo_mask);
3379		ue_hi = (ue_hi & ~ue_hi_mask);
3380
3381		if (ue_lo || ue_hi) {
3382			/* On certain platforms BE3 hardware can indicate
3383			 * spurious UEs. In case of a UE in the chip,
3384			 * the POST register correctly reports either a
3385			 * FAT_LOG_START state (FW is currently dumping
3386			 * FAT log data) or a ARMFW_UE state. Check for the
3387			 * above states to ascertain if the UE is valid or not.
3388			 */
3389			if (BE3_chip(adapter)) {
3390				val = be_POST_stage_get(adapter);
3391				if ((val & POST_STAGE_FAT_LOG_START)
3392				     != POST_STAGE_FAT_LOG_START &&
3393				    (val & POST_STAGE_ARMFW_UE)
3394				     != POST_STAGE_ARMFW_UE &&
3395				    (val & POST_STAGE_RECOVERABLE_ERR)
3396				     != POST_STAGE_RECOVERABLE_ERR)
3397					return;
3398			}
3399
3400			dev_err(dev, "Error detected in the adapter");
3401			be_set_error(adapter, BE_ERROR_UE);
3402
3403			for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3404				if (ue_lo & 1)
3405					dev_err(dev, "UE: %s bit set\n",
3406						ue_status_low_desc[i]);
3407			}
3408			for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3409				if (ue_hi & 1)
3410					dev_err(dev, "UE: %s bit set\n",
3411						ue_status_hi_desc[i]);
3412			}
3413		}
3414	}
3415}
3416
3417static void be_msix_disable(struct be_adapter *adapter)
3418{
3419	if (msix_enabled(adapter)) {
3420		pci_disable_msix(adapter->pdev);
3421		adapter->num_msix_vec = 0;
3422		adapter->num_msix_roce_vec = 0;
3423	}
3424}
3425
3426static int be_msix_enable(struct be_adapter *adapter)
3427{
3428	unsigned int i, max_roce_eqs;
3429	struct device *dev = &adapter->pdev->dev;
3430	int num_vec;
3431
3432	/* If RoCE is supported, program the max number of vectors that
3433	 * could be used for NIC and RoCE, else, just program the number
3434	 * we'll use initially.
3435	 */
3436	if (be_roce_supported(adapter)) {
3437		max_roce_eqs =
3438			be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3439		max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3440		num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3441	} else {
3442		num_vec = max(adapter->cfg_num_rx_irqs,
3443			      adapter->cfg_num_tx_irqs);
3444	}
3445
3446	for (i = 0; i < num_vec; i++)
3447		adapter->msix_entries[i].entry = i;
3448
3449	num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3450					MIN_MSIX_VECTORS, num_vec);
3451	if (num_vec < 0)
3452		goto fail;
3453
3454	if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3455		adapter->num_msix_roce_vec = num_vec / 2;
3456		dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3457			 adapter->num_msix_roce_vec);
3458	}
3459
3460	adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3461
3462	dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3463		 adapter->num_msix_vec);
3464	return 0;
3465
3466fail:
3467	dev_warn(dev, "MSIx enable failed\n");
3468
3469	/* INTx is not supported in VFs, so fail probe if enable_msix fails */
3470	if (be_virtfn(adapter))
3471		return num_vec;
3472	return 0;
3473}
3474
3475static inline int be_msix_vec_get(struct be_adapter *adapter,
3476				  struct be_eq_obj *eqo)
3477{
3478	return adapter->msix_entries[eqo->msix_idx].vector;
3479}
3480
3481static int be_msix_register(struct be_adapter *adapter)
3482{
3483	struct net_device *netdev = adapter->netdev;
3484	struct be_eq_obj *eqo;
3485	int status, i, vec;
3486
3487	for_all_evt_queues(adapter, eqo, i) {
3488		sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3489		vec = be_msix_vec_get(adapter, eqo);
3490		status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3491		if (status)
3492			goto err_msix;
3493
3494		irq_set_affinity_hint(vec, eqo->affinity_mask);
3495	}
3496
3497	return 0;
3498err_msix:
3499	for (i--; i >= 0; i--) {
3500		eqo = &adapter->eq_obj[i];
3501		free_irq(be_msix_vec_get(adapter, eqo), eqo);
3502	}
3503	dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3504		 status);
3505	be_msix_disable(adapter);
3506	return status;
3507}
3508
3509static int be_irq_register(struct be_adapter *adapter)
3510{
3511	struct net_device *netdev = adapter->netdev;
3512	int status;
3513
3514	if (msix_enabled(adapter)) {
3515		status = be_msix_register(adapter);
3516		if (status == 0)
3517			goto done;
3518		/* INTx is not supported for VF */
3519		if (be_virtfn(adapter))
3520			return status;
3521	}
3522
3523	/* INTx: only the first EQ is used */
3524	netdev->irq = adapter->pdev->irq;
3525	status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3526			     &adapter->eq_obj[0]);
3527	if (status) {
3528		dev_err(&adapter->pdev->dev,
3529			"INTx request IRQ failed - err %d\n", status);
3530		return status;
3531	}
3532done:
3533	adapter->isr_registered = true;
3534	return 0;
3535}
3536
3537static void be_irq_unregister(struct be_adapter *adapter)
3538{
3539	struct net_device *netdev = adapter->netdev;
3540	struct be_eq_obj *eqo;
3541	int i, vec;
3542
3543	if (!adapter->isr_registered)
3544		return;
3545
3546	/* INTx */
3547	if (!msix_enabled(adapter)) {
3548		free_irq(netdev->irq, &adapter->eq_obj[0]);
3549		goto done;
3550	}
3551
3552	/* MSIx */
3553	for_all_evt_queues(adapter, eqo, i) {
3554		vec = be_msix_vec_get(adapter, eqo);
3555		irq_set_affinity_hint(vec, NULL);
3556		free_irq(vec, eqo);
3557	}
3558
3559done:
3560	adapter->isr_registered = false;
3561}
3562
3563static void be_rx_qs_destroy(struct be_adapter *adapter)
3564{
3565	struct rss_info *rss = &adapter->rss_info;
3566	struct be_queue_info *q;
3567	struct be_rx_obj *rxo;
3568	int i;
3569
3570	for_all_rx_queues(adapter, rxo, i) {
3571		q = &rxo->q;
3572		if (q->created) {
3573			/* If RXQs are destroyed while in an "out of buffer"
3574			 * state, there is a possibility of an HW stall on
3575			 * Lancer. So, post 64 buffers to each queue to relieve
3576			 * the "out of buffer" condition.
3577			 * Make sure there's space in the RXQ before posting.
3578			 */
3579			if (lancer_chip(adapter)) {
3580				be_rx_cq_clean(rxo);
3581				if (atomic_read(&q->used) == 0)
3582					be_post_rx_frags(rxo, GFP_KERNEL,
3583							 MAX_RX_POST);
3584			}
3585
3586			be_cmd_rxq_destroy(adapter, q);
3587			be_rx_cq_clean(rxo);
3588			be_rxq_clean(rxo);
3589		}
3590		be_queue_free(adapter, q);
3591	}
3592
3593	if (rss->rss_flags) {
3594		rss->rss_flags = RSS_ENABLE_NONE;
3595		be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3596				  128, rss->rss_hkey);
3597	}
3598}
3599
3600static void be_disable_if_filters(struct be_adapter *adapter)
3601{
3602	/* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3603	if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3604	    check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3605		be_dev_mac_del(adapter, adapter->pmac_id[0]);
3606		eth_zero_addr(adapter->dev_mac);
3607	}
3608
3609	be_clear_uc_list(adapter);
3610	be_clear_mc_list(adapter);
3611
3612	/* The IFACE flags are enabled in the open path and cleared
3613	 * in the close path. When a VF gets detached from the host and
3614	 * assigned to a VM the following happens:
3615	 *	- VF's IFACE flags get cleared in the detach path
3616	 *	- IFACE create is issued by the VF in the attach path
3617	 * Due to a bug in the BE3/Skyhawk-R FW
3618	 * (Lancer FW doesn't have the bug), the IFACE capability flags
3619	 * specified along with the IFACE create cmd issued by a VF are not
3620	 * honoured by FW.  As a consequence, if a *new* driver
3621	 * (that enables/disables IFACE flags in open/close)
3622	 * is loaded in the host and an *old* driver is * used by a VM/VF,
3623	 * the IFACE gets created *without* the needed flags.
3624	 * To avoid this, disable RX-filter flags only for Lancer.
3625	 */
3626	if (lancer_chip(adapter)) {
3627		be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3628		adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3629	}
3630}
3631
3632static int be_close(struct net_device *netdev)
3633{
3634	struct be_adapter *adapter = netdev_priv(netdev);
3635	struct be_eq_obj *eqo;
3636	int i;
3637
3638	/* This protection is needed as be_close() may be called even when the
3639	 * adapter is in cleared state (after eeh perm failure)
3640	 */
3641	if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3642		return 0;
3643
3644	/* Before attempting cleanup ensure all the pending cmds in the
3645	 * config_wq have finished execution
3646	 */
3647	flush_workqueue(be_wq);
3648
3649	be_disable_if_filters(adapter);
3650
3651	if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3652		for_all_evt_queues(adapter, eqo, i) {
3653			napi_disable(&eqo->napi);
3654		}
3655		adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3656	}
3657
3658	be_async_mcc_disable(adapter);
3659
3660	/* Wait for all pending tx completions to arrive so that
3661	 * all tx skbs are freed.
3662	 */
3663	netif_tx_disable(netdev);
3664	be_tx_compl_clean(adapter);
3665
3666	be_rx_qs_destroy(adapter);
3667
3668	for_all_evt_queues(adapter, eqo, i) {
3669		if (msix_enabled(adapter))
3670			synchronize_irq(be_msix_vec_get(adapter, eqo));
3671		else
3672			synchronize_irq(netdev->irq);
3673		be_eq_clean(eqo);
3674	}
3675
3676	be_irq_unregister(adapter);
3677
3678	return 0;
3679}
3680
3681static int be_rx_qs_create(struct be_adapter *adapter)
3682{
3683	struct rss_info *rss = &adapter->rss_info;
3684	u8 rss_key[RSS_HASH_KEY_LEN];
3685	struct be_rx_obj *rxo;
3686	int rc, i, j;
3687
3688	for_all_rx_queues(adapter, rxo, i) {
3689		rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3690				    sizeof(struct be_eth_rx_d));
3691		if (rc)
3692			return rc;
3693	}
3694
3695	if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3696		rxo = default_rxo(adapter);
3697		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3698				       rx_frag_size, adapter->if_handle,
3699				       false, &rxo->rss_id);
3700		if (rc)
3701			return rc;
3702	}
3703
3704	for_all_rss_queues(adapter, rxo, i) {
3705		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3706				       rx_frag_size, adapter->if_handle,
3707				       true, &rxo->rss_id);
3708		if (rc)
3709			return rc;
3710	}
3711
3712	if (be_multi_rxq(adapter)) {
3713		for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3714			for_all_rss_queues(adapter, rxo, i) {
3715				if ((j + i) >= RSS_INDIR_TABLE_LEN)
3716					break;
3717				rss->rsstable[j + i] = rxo->rss_id;
3718				rss->rss_queue[j + i] = i;
3719			}
3720		}
3721		rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3722			RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3723
3724		if (!BEx_chip(adapter))
3725			rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3726				RSS_ENABLE_UDP_IPV6;
3727
3728		netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3729		rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3730				       RSS_INDIR_TABLE_LEN, rss_key);
3731		if (rc) {
3732			rss->rss_flags = RSS_ENABLE_NONE;
3733			return rc;
3734		}
3735
3736		memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3737	} else {
3738		/* Disable RSS, if only default RX Q is created */
3739		rss->rss_flags = RSS_ENABLE_NONE;
3740	}
3741
3742
3743	/* Post 1 less than RXQ-len to avoid head being equal to tail,
3744	 * which is a queue empty condition
3745	 */
3746	for_all_rx_queues(adapter, rxo, i)
3747		be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3748
3749	return 0;
3750}
3751
3752static int be_enable_if_filters(struct be_adapter *adapter)
3753{
3754	int status;
3755
3756	status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3757	if (status)
3758		return status;
3759
3760	/* Normally this condition usually true as the ->dev_mac is zeroed.
3761	 * But on BE3 VFs the initial MAC is pre-programmed by PF and
3762	 * subsequent be_dev_mac_add() can fail (after fresh boot)
3763	 */
3764	if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3765		int old_pmac_id = -1;
3766
3767		/* Remember old programmed MAC if any - can happen on BE3 VF */
3768		if (!is_zero_ether_addr(adapter->dev_mac))
3769			old_pmac_id = adapter->pmac_id[0];
3770
3771		status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3772		if (status)
3773			return status;
3774
3775		/* Delete the old programmed MAC as we successfully programmed
3776		 * a new MAC
3777		 */
3778		if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3779			be_dev_mac_del(adapter, old_pmac_id);
3780
3781		ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3782	}
3783
3784	if (adapter->vlans_added)
3785		be_vid_config(adapter);
3786
3787	__be_set_rx_mode(adapter);
3788
3789	return 0;
3790}
3791
3792static int be_open(struct net_device *netdev)
3793{
3794	struct be_adapter *adapter = netdev_priv(netdev);
3795	struct be_eq_obj *eqo;
3796	struct be_rx_obj *rxo;
3797	struct be_tx_obj *txo;
3798	u8 link_status;
3799	int status, i;
3800
3801	status = be_rx_qs_create(adapter);
3802	if (status)
3803		goto err;
3804
3805	status = be_enable_if_filters(adapter);
3806	if (status)
3807		goto err;
3808
3809	status = be_irq_register(adapter);
3810	if (status)
3811		goto err;
3812
3813	for_all_rx_queues(adapter, rxo, i)
3814		be_cq_notify(adapter, rxo->cq.id, true, 0);
3815
3816	for_all_tx_queues(adapter, txo, i)
3817		be_cq_notify(adapter, txo->cq.id, true, 0);
3818
3819	be_async_mcc_enable(adapter);
3820
3821	for_all_evt_queues(adapter, eqo, i) {
3822		napi_enable(&eqo->napi);
3823		be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3824	}
3825	adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3826
3827	status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3828	if (!status)
3829		be_link_status_update(adapter, link_status);
3830
3831	netif_tx_start_all_queues(netdev);
3832
3833	udp_tunnel_nic_reset_ntf(netdev);
3834
3835	return 0;
3836err:
3837	be_close(adapter->netdev);
3838	return -EIO;
3839}
3840
3841static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3842{
3843	u32 addr;
3844
3845	addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3846
3847	mac[5] = (u8)(addr & 0xFF);
3848	mac[4] = (u8)((addr >> 8) & 0xFF);
3849	mac[3] = (u8)((addr >> 16) & 0xFF);
3850	/* Use the OUI from the current MAC address */
3851	memcpy(mac, adapter->netdev->dev_addr, 3);
3852}
3853
3854/*
3855 * Generate a seed MAC address from the PF MAC Address using jhash.
3856 * MAC Address for VFs are assigned incrementally starting from the seed.
3857 * These addresses are programmed in the ASIC by the PF and the VF driver
3858 * queries for the MAC address during its probe.
3859 */
3860static int be_vf_eth_addr_config(struct be_adapter *adapter)
3861{
3862	u32 vf;
3863	int status = 0;
3864	u8 mac[ETH_ALEN];
3865	struct be_vf_cfg *vf_cfg;
3866
3867	be_vf_eth_addr_generate(adapter, mac);
3868
3869	for_all_vfs(adapter, vf_cfg, vf) {
3870		if (BEx_chip(adapter))
3871			status = be_cmd_pmac_add(adapter, mac,
3872						 vf_cfg->if_handle,
3873						 &vf_cfg->pmac_id, vf + 1);
3874		else
3875			status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3876						vf + 1);
3877
3878		if (status)
3879			dev_err(&adapter->pdev->dev,
3880				"Mac address assignment failed for VF %d\n",
3881				vf);
3882		else
3883			memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3884
3885		mac[5] += 1;
3886	}
3887	return status;
3888}
3889
3890static int be_vfs_mac_query(struct be_adapter *adapter)
3891{
3892	int status, vf;
3893	u8 mac[ETH_ALEN];
3894	struct be_vf_cfg *vf_cfg;
3895
3896	for_all_vfs(adapter, vf_cfg, vf) {
3897		status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3898					       mac, vf_cfg->if_handle,
3899					       false, vf+1);
3900		if (status)
3901			return status;
3902		memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3903	}
3904	return 0;
3905}
3906
3907static void be_vf_clear(struct be_adapter *adapter)
3908{
3909	struct be_vf_cfg *vf_cfg;
3910	u32 vf;
3911
3912	if (pci_vfs_assigned(adapter->pdev)) {
3913		dev_warn(&adapter->pdev->dev,
3914			 "VFs are assigned to VMs: not disabling VFs\n");
3915		goto done;
3916	}
3917
3918	pci_disable_sriov(adapter->pdev);
3919
3920	for_all_vfs(adapter, vf_cfg, vf) {
3921		if (BEx_chip(adapter))
3922			be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3923					vf_cfg->pmac_id, vf + 1);
3924		else
3925			be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3926				       vf + 1);
3927
3928		be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3929	}
3930
3931	if (BE3_chip(adapter))
3932		be_cmd_set_hsw_config(adapter, 0, 0,
3933				      adapter->if_handle,
3934				      PORT_FWD_TYPE_PASSTHRU, 0);
3935done:
3936	kfree(adapter->vf_cfg);
3937	adapter->num_vfs = 0;
3938	adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3939}
3940
3941static void be_clear_queues(struct be_adapter *adapter)
3942{
3943	be_mcc_queues_destroy(adapter);
3944	be_rx_cqs_destroy(adapter);
3945	be_tx_queues_destroy(adapter);
3946	be_evt_queues_destroy(adapter);
3947}
3948
3949static void be_cancel_worker(struct be_adapter *adapter)
3950{
3951	if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3952		cancel_delayed_work_sync(&adapter->work);
3953		adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3954	}
3955}
3956
3957static void be_cancel_err_detection(struct be_adapter *adapter)
3958{
3959	struct be_error_recovery *err_rec = &adapter->error_recovery;
3960
3961	if (!be_err_recovery_workq)
3962		return;
3963
3964	if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3965		cancel_delayed_work_sync(&err_rec->err_detection_work);
3966		adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3967	}
3968}
3969
3970/* VxLAN offload Notes:
3971 *
3972 * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
3973 * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
3974 * is expected to work across all types of IP tunnels once exported. Skyhawk
3975 * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
3976 * offloads in hw_enc_features only when a VxLAN port is added. If other (non
3977 * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
3978 * those other tunnels are unexported on the fly through ndo_features_check().
3979 */
3980static int be_vxlan_set_port(struct net_device *netdev, unsigned int table,
3981			     unsigned int entry, struct udp_tunnel_info *ti)
3982{
3983	struct be_adapter *adapter = netdev_priv(netdev);
3984	struct device *dev = &adapter->pdev->dev;
3985	int status;
3986
3987	status = be_cmd_manage_iface(adapter, adapter->if_handle,
3988				     OP_CONVERT_NORMAL_TO_TUNNEL);
3989	if (status) {
3990		dev_warn(dev, "Failed to convert normal interface to tunnel\n");
3991		return status;
3992	}
3993	adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
3994
3995	status = be_cmd_set_vxlan_port(adapter, ti->port);
3996	if (status) {
3997		dev_warn(dev, "Failed to add VxLAN port\n");
3998		return status;
3999	}
4000	adapter->vxlan_port = ti->port;
4001
4002	netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
4003				   NETIF_F_TSO | NETIF_F_TSO6 |
4004				   NETIF_F_GSO_UDP_TUNNEL;
4005
4006	dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
4007		 be16_to_cpu(ti->port));
4008	return 0;
4009}
4010
4011static int be_vxlan_unset_port(struct net_device *netdev, unsigned int table,
4012			       unsigned int entry, struct udp_tunnel_info *ti)
4013{
4014	struct be_adapter *adapter = netdev_priv(netdev);
4015
4016	if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
4017		be_cmd_manage_iface(adapter, adapter->if_handle,
4018				    OP_CONVERT_TUNNEL_TO_NORMAL);
4019
4020	if (adapter->vxlan_port)
4021		be_cmd_set_vxlan_port(adapter, 0);
4022
4023	adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
4024	adapter->vxlan_port = 0;
4025
4026	netdev->hw_enc_features = 0;
4027	return 0;
4028}
4029
4030static const struct udp_tunnel_nic_info be_udp_tunnels = {
4031	.set_port	= be_vxlan_set_port,
4032	.unset_port	= be_vxlan_unset_port,
4033	.flags		= UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
4034			  UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
4035	.tables		= {
4036		{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, },
4037	},
4038};
4039
4040static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
4041				struct be_resources *vft_res)
4042{
4043	struct be_resources res = adapter->pool_res;
4044	u32 vf_if_cap_flags = res.vf_if_cap_flags;
4045	struct be_resources res_mod = {0};
4046	u16 num_vf_qs = 1;
4047
4048	/* Distribute the queue resources among the PF and it's VFs */
4049	if (num_vfs) {
4050		/* Divide the rx queues evenly among the VFs and the PF, capped
4051		 * at VF-EQ-count. Any remainder queues belong to the PF.
4052		 */
4053		num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4054				res.max_rss_qs / (num_vfs + 1));
4055
4056		/* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4057		 * RSS Tables per port. Provide RSS on VFs, only if number of
4058		 * VFs requested is less than it's PF Pool's RSS Tables limit.
4059		 */
4060		if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4061			num_vf_qs = 1;
4062	}
4063
4064	/* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4065	 * which are modifiable using SET_PROFILE_CONFIG cmd.
4066	 */
4067	be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4068				  RESOURCE_MODIFIABLE, 0);
4069
4070	/* If RSS IFACE capability flags are modifiable for a VF, set the
4071	 * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4072	 * more than 1 RSSQ is available for a VF.
4073	 * Otherwise, provision only 1 queue pair for VF.
4074	 */
4075	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4076		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4077		if (num_vf_qs > 1) {
4078			vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4079			if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4080				vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4081		} else {
4082			vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4083					     BE_IF_FLAGS_DEFQ_RSS);
4084		}
4085	} else {
4086		num_vf_qs = 1;
4087	}
4088
4089	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4090		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4091		vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4092	}
4093
4094	vft_res->vf_if_cap_flags = vf_if_cap_flags;
4095	vft_res->max_rx_qs = num_vf_qs;
4096	vft_res->max_rss_qs = num_vf_qs;
4097	vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4098	vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4099
4100	/* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4101	 * among the PF and it's VFs, if the fields are changeable
4102	 */
4103	if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4104		vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4105
4106	if (res_mod.max_vlans == FIELD_MODIFIABLE)
4107		vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4108
4109	if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4110		vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4111
4112	if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4113		vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4114}
4115
4116static void be_if_destroy(struct be_adapter *adapter)
4117{
4118	be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4119
4120	kfree(adapter->pmac_id);
4121	adapter->pmac_id = NULL;
4122
4123	kfree(adapter->mc_list);
4124	adapter->mc_list = NULL;
4125
4126	kfree(adapter->uc_list);
4127	adapter->uc_list = NULL;
4128}
4129
4130static int be_clear(struct be_adapter *adapter)
4131{
4132	struct pci_dev *pdev = adapter->pdev;
4133	struct  be_resources vft_res = {0};
4134
4135	be_cancel_worker(adapter);
4136
4137	flush_workqueue(be_wq);
4138
4139	if (sriov_enabled(adapter))
4140		be_vf_clear(adapter);
4141
4142	/* Re-configure FW to distribute resources evenly across max-supported
4143	 * number of VFs, only when VFs are not already enabled.
4144	 */
4145	if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4146	    !pci_vfs_assigned(pdev)) {
4147		be_calculate_vf_res(adapter,
4148				    pci_sriov_get_totalvfs(pdev),
4149				    &vft_res);
4150		be_cmd_set_sriov_config(adapter, adapter->pool_res,
4151					pci_sriov_get_totalvfs(pdev),
4152					&vft_res);
4153	}
4154
4155	be_vxlan_unset_port(adapter->netdev, 0, 0, NULL);
4156
4157	be_if_destroy(adapter);
4158
4159	be_clear_queues(adapter);
4160
4161	be_msix_disable(adapter);
4162	adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4163	return 0;
4164}
4165
4166static int be_vfs_if_create(struct be_adapter *adapter)
4167{
4168	struct be_resources res = {0};
4169	u32 cap_flags, en_flags, vf;
4170	struct be_vf_cfg *vf_cfg;
4171	int status;
4172
4173	/* If a FW profile exists, then cap_flags are updated */
4174	cap_flags = BE_VF_IF_EN_FLAGS;
4175
4176	for_all_vfs(adapter, vf_cfg, vf) {
4177		if (!BE3_chip(adapter)) {
4178			status = be_cmd_get_profile_config(adapter, &res, NULL,
4179							   ACTIVE_PROFILE_TYPE,
4180							   RESOURCE_LIMITS,
4181							   vf + 1);
4182			if (!status) {
4183				cap_flags = res.if_cap_flags;
4184				/* Prevent VFs from enabling VLAN promiscuous
4185				 * mode
4186				 */
4187				cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4188			}
4189		}
4190
4191		/* PF should enable IF flags during proxy if_create call */
4192		en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4193		status = be_cmd_if_create(adapter, cap_flags, en_flags,
4194					  &vf_cfg->if_handle, vf + 1);
4195		if (status)
4196			return status;
4197	}
4198
4199	return 0;
4200}
4201
4202static int be_vf_setup_init(struct be_adapter *adapter)
4203{
4204	struct be_vf_cfg *vf_cfg;
4205	int vf;
4206
4207	adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4208				  GFP_KERNEL);
4209	if (!adapter->vf_cfg)
4210		return -ENOMEM;
4211
4212	for_all_vfs(adapter, vf_cfg, vf) {
4213		vf_cfg->if_handle = -1;
4214		vf_cfg->pmac_id = -1;
4215	}
4216	return 0;
4217}
4218
4219static int be_vf_setup(struct be_adapter *adapter)
4220{
4221	struct device *dev = &adapter->pdev->dev;
4222	struct be_vf_cfg *vf_cfg;
4223	int status, old_vfs, vf;
4224	bool spoofchk;
4225
4226	old_vfs = pci_num_vf(adapter->pdev);
4227
4228	status = be_vf_setup_init(adapter);
4229	if (status)
4230		goto err;
4231
4232	if (old_vfs) {
4233		for_all_vfs(adapter, vf_cfg, vf) {
4234			status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4235			if (status)
4236				goto err;
4237		}
4238
4239		status = be_vfs_mac_query(adapter);
4240		if (status)
4241			goto err;
4242	} else {
4243		status = be_vfs_if_create(adapter);
4244		if (status)
4245			goto err;
4246
4247		status = be_vf_eth_addr_config(adapter);
4248		if (status)
4249			goto err;
4250	}
4251
4252	for_all_vfs(adapter, vf_cfg, vf) {
4253		/* Allow VFs to programs MAC/VLAN filters */
4254		status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4255						  vf + 1);
4256		if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4257			status = be_cmd_set_fn_privileges(adapter,
4258							  vf_cfg->privileges |
4259							  BE_PRIV_FILTMGMT,
4260							  vf + 1);
4261			if (!status) {
4262				vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4263				dev_info(dev, "VF%d has FILTMGMT privilege\n",
4264					 vf);
4265			}
4266		}
4267
4268		/* Allow full available bandwidth */
4269		if (!old_vfs)
4270			be_cmd_config_qos(adapter, 0, 0, vf + 1);
4271
4272		status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4273					       vf_cfg->if_handle, NULL,
4274					       &spoofchk);
4275		if (!status)
4276			vf_cfg->spoofchk = spoofchk;
4277
4278		if (!old_vfs) {
4279			be_cmd_enable_vf(adapter, vf + 1);
4280			be_cmd_set_logical_link_config(adapter,
4281						       IFLA_VF_LINK_STATE_AUTO,
4282						       vf+1);
4283		}
4284	}
4285
4286	if (!old_vfs) {
4287		status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4288		if (status) {
4289			dev_err(dev, "SRIOV enable failed\n");
4290			adapter->num_vfs = 0;
4291			goto err;
4292		}
4293	}
4294
4295	if (BE3_chip(adapter)) {
4296		/* On BE3, enable VEB only when SRIOV is enabled */
4297		status = be_cmd_set_hsw_config(adapter, 0, 0,
4298					       adapter->if_handle,
4299					       PORT_FWD_TYPE_VEB, 0);
4300		if (status)
4301			goto err;
4302	}
4303
4304	adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4305	return 0;
4306err:
4307	dev_err(dev, "VF setup failed\n");
4308	be_vf_clear(adapter);
4309	return status;
4310}
4311
4312/* Converting function_mode bits on BE3 to SH mc_type enums */
4313
4314static u8 be_convert_mc_type(u32 function_mode)
4315{
4316	if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4317		return vNIC1;
4318	else if (function_mode & QNQ_MODE)
4319		return FLEX10;
4320	else if (function_mode & VNIC_MODE)
4321		return vNIC2;
4322	else if (function_mode & UMC_ENABLED)
4323		return UMC;
4324	else
4325		return MC_NONE;
4326}
4327
4328/* On BE2/BE3 FW does not suggest the supported limits */
4329static void BEx_get_resources(struct be_adapter *adapter,
4330			      struct be_resources *res)
4331{
4332	bool use_sriov = adapter->num_vfs ? 1 : 0;
4333
4334	if (be_physfn(adapter))
4335		res->max_uc_mac = BE_UC_PMAC_COUNT;
4336	else
4337		res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4338
4339	adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4340
4341	if (be_is_mc(adapter)) {
4342		/* Assuming that there are 4 channels per port,
4343		 * when multi-channel is enabled
4344		 */
4345		if (be_is_qnq_mode(adapter))
4346			res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4347		else
4348			/* In a non-qnq multichannel mode, the pvid
4349			 * takes up one vlan entry
4350			 */
4351			res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4352	} else {
4353		res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4354	}
4355
4356	res->max_mcast_mac = BE_MAX_MC;
4357
4358	/* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4359	 * 2) Create multiple TX rings on a BE3-R multi-channel interface
4360	 *    *only* if it is RSS-capable.
4361	 */
4362	if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4363	    be_virtfn(adapter) ||
4364	    (be_is_mc(adapter) &&
4365	     !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4366		res->max_tx_qs = 1;
4367	} else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4368		struct be_resources super_nic_res = {0};
4369
4370		/* On a SuperNIC profile, the driver needs to use the
4371		 * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4372		 */
4373		be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4374					  ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4375					  0);
4376		/* Some old versions of BE3 FW don't report max_tx_qs value */
4377		res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4378	} else {
4379		res->max_tx_qs = BE3_MAX_TX_QS;
4380	}
4381
4382	if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4383	    !use_sriov && be_physfn(adapter))
4384		res->max_rss_qs = (adapter->be3_native) ?
4385					   BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4386	res->max_rx_qs = res->max_rss_qs + 1;
4387
4388	if (be_physfn(adapter))
4389		res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4390					BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4391	else
4392		res->max_evt_qs = 1;
4393
4394	res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4395	res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4396	if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4397		res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4398}
4399
4400static void be_setup_init(struct be_adapter *adapter)
4401{
4402	adapter->vlan_prio_bmap = 0xff;
4403	adapter->phy.link_speed = -1;
4404	adapter->if_handle = -1;
4405	adapter->be3_native = false;
4406	adapter->if_flags = 0;
4407	adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4408	if (be_physfn(adapter))
4409		adapter->cmd_privileges = MAX_PRIVILEGES;
4410	else
4411		adapter->cmd_privileges = MIN_PRIVILEGES;
4412}
4413
4414/* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4415 * However, this HW limitation is not exposed to the host via any SLI cmd.
4416 * As a result, in the case of SRIOV and in particular multi-partition configs
4417 * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4418 * for distribution between the VFs. This self-imposed limit will determine the
4419 * no: of VFs for which RSS can be enabled.
4420 */
4421static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4422{
4423	struct be_port_resources port_res = {0};
4424	u8 rss_tables_on_port;
4425	u16 max_vfs = be_max_vfs(adapter);
4426
4427	be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4428				  RESOURCE_LIMITS, 0);
4429
4430	rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4431
4432	/* Each PF Pool's RSS Tables limit =
4433	 * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4434	 */
4435	adapter->pool_res.max_rss_tables =
4436		max_vfs * rss_tables_on_port / port_res.max_vfs;
4437}
4438
4439static int be_get_sriov_config(struct be_adapter *adapter)
4440{
4441	struct be_resources res = {0};
4442	int max_vfs, old_vfs;
4443
4444	be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4445				  RESOURCE_LIMITS, 0);
4446
4447	/* Some old versions of BE3 FW don't report max_vfs value */
4448	if (BE3_chip(adapter) && !res.max_vfs) {
4449		max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4450		res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4451	}
4452
4453	adapter->pool_res = res;
4454
4455	/* If during previous unload of the driver, the VFs were not disabled,
4456	 * then we cannot rely on the PF POOL limits for the TotalVFs value.
4457	 * Instead use the TotalVFs value stored in the pci-dev struct.
4458	 */
4459	old_vfs = pci_num_vf(adapter->pdev);
4460	if (old_vfs) {
4461		dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4462			 old_vfs);
4463
4464		adapter->pool_res.max_vfs =
4465			pci_sriov_get_totalvfs(adapter->pdev);
4466		adapter->num_vfs = old_vfs;
4467	}
4468
4469	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4470		be_calculate_pf_pool_rss_tables(adapter);
4471		dev_info(&adapter->pdev->dev,
4472			 "RSS can be enabled for all VFs if num_vfs <= %d\n",
4473			 be_max_pf_pool_rss_tables(adapter));
4474	}
4475	return 0;
4476}
4477
4478static void be_alloc_sriov_res(struct be_adapter *adapter)
4479{
4480	int old_vfs = pci_num_vf(adapter->pdev);
4481	struct  be_resources vft_res = {0};
4482	int status;
4483
4484	be_get_sriov_config(adapter);
4485
4486	if (!old_vfs)
4487		pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4488
4489	/* When the HW is in SRIOV capable configuration, the PF-pool
4490	 * resources are given to PF during driver load, if there are no
4491	 * old VFs. This facility is not available in BE3 FW.
4492	 * Also, this is done by FW in Lancer chip.
4493	 */
4494	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4495		be_calculate_vf_res(adapter, 0, &vft_res);
4496		status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4497						 &vft_res);
4498		if (status)
4499			dev_err(&adapter->pdev->dev,
4500				"Failed to optimize SRIOV resources\n");
4501	}
4502}
4503
4504static int be_get_resources(struct be_adapter *adapter)
4505{
4506	struct device *dev = &adapter->pdev->dev;
4507	struct be_resources res = {0};
4508	int status;
4509
4510	/* For Lancer, SH etc read per-function resource limits from FW.
4511	 * GET_FUNC_CONFIG returns per function guaranteed limits.
4512	 * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4513	 */
4514	if (BEx_chip(adapter)) {
4515		BEx_get_resources(adapter, &res);
4516	} else {
4517		status = be_cmd_get_func_config(adapter, &res);
4518		if (status)
4519			return status;
4520
4521		/* If a deafault RXQ must be created, we'll use up one RSSQ*/
4522		if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4523		    !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4524			res.max_rss_qs -= 1;
4525	}
4526
4527	/* If RoCE is supported stash away half the EQs for RoCE */
4528	res.max_nic_evt_qs = be_roce_supported(adapter) ?
4529				res.max_evt_qs / 2 : res.max_evt_qs;
4530	adapter->res = res;
4531
4532	/* If FW supports RSS default queue, then skip creating non-RSS
4533	 * queue for non-IP traffic.
4534	 */
4535	adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4536				 BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4537
4538	dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4539		 be_max_txqs(adapter), be_max_rxqs(adapter),
4540		 be_max_rss(adapter), be_max_nic_eqs(adapter),
4541		 be_max_vfs(adapter));
4542	dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4543		 be_max_uc(adapter), be_max_mc(adapter),
4544		 be_max_vlans(adapter));
4545
4546	/* Ensure RX and TX queues are created in pairs at init time */
4547	adapter->cfg_num_rx_irqs =
4548				min_t(u16, netif_get_num_default_rss_queues(),
4549				      be_max_qp_irqs(adapter));
4550	adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4551	return 0;
4552}
4553
4554static int be_get_config(struct be_adapter *adapter)
4555{
4556	int status, level;
4557	u16 profile_id;
4558
4559	status = be_cmd_get_cntl_attributes(adapter);
4560	if (status)
4561		return status;
4562
4563	status = be_cmd_query_fw_cfg(adapter);
4564	if (status)
4565		return status;
4566
4567	if (!lancer_chip(adapter) && be_physfn(adapter))
4568		be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4569
4570	if (BEx_chip(adapter)) {
4571		level = be_cmd_get_fw_log_level(adapter);
4572		adapter->msg_enable =
4573			level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4574	}
4575
4576	be_cmd_get_acpi_wol_cap(adapter);
4577	pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4578	pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4579
4580	be_cmd_query_port_name(adapter);
4581
4582	if (be_physfn(adapter)) {
4583		status = be_cmd_get_active_profile(adapter, &profile_id);
4584		if (!status)
4585			dev_info(&adapter->pdev->dev,
4586				 "Using profile 0x%x\n", profile_id);
4587	}
4588
4589	return 0;
4590}
4591
4592static int be_mac_setup(struct be_adapter *adapter)
4593{
4594	u8 mac[ETH_ALEN];
4595	int status;
4596
4597	if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4598		status = be_cmd_get_perm_mac(adapter, mac);
4599		if (status)
4600			return status;
4601
4602		memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4603		memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4604
4605		/* Initial MAC for BE3 VFs is already programmed by PF */
4606		if (BEx_chip(adapter) && be_virtfn(adapter))
4607			memcpy(adapter->dev_mac, mac, ETH_ALEN);
4608	}
4609
4610	return 0;
4611}
4612
4613static void be_schedule_worker(struct be_adapter *adapter)
4614{
4615	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4616	adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4617}
4618
4619static void be_destroy_err_recovery_workq(void)
4620{
4621	if (!be_err_recovery_workq)
4622		return;
4623
4624	flush_workqueue(be_err_recovery_workq);
4625	destroy_workqueue(be_err_recovery_workq);
4626	be_err_recovery_workq = NULL;
4627}
4628
4629static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4630{
4631	struct be_error_recovery *err_rec = &adapter->error_recovery;
4632
4633	if (!be_err_recovery_workq)
4634		return;
4635
4636	queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4637			   msecs_to_jiffies(delay));
4638	adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4639}
4640
4641static int be_setup_queues(struct be_adapter *adapter)
4642{
4643	struct net_device *netdev = adapter->netdev;
4644	int status;
4645
4646	status = be_evt_queues_create(adapter);
4647	if (status)
4648		goto err;
4649
4650	status = be_tx_qs_create(adapter);
4651	if (status)
4652		goto err;
4653
4654	status = be_rx_cqs_create(adapter);
4655	if (status)
4656		goto err;
4657
4658	status = be_mcc_queues_create(adapter);
4659	if (status)
4660		goto err;
4661
4662	status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4663	if (status)
4664		goto err;
4665
4666	status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4667	if (status)
4668		goto err;
4669
4670	return 0;
4671err:
4672	dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4673	return status;
4674}
4675
4676static int be_if_create(struct be_adapter *adapter)
4677{
4678	u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4679	u32 cap_flags = be_if_cap_flags(adapter);
4680	int status;
4681
4682	/* alloc required memory for other filtering fields */
4683	adapter->pmac_id = kcalloc(be_max_uc(adapter),
4684				   sizeof(*adapter->pmac_id), GFP_KERNEL);
4685	if (!adapter->pmac_id)
4686		return -ENOMEM;
4687
4688	adapter->mc_list = kcalloc(be_max_mc(adapter),
4689				   sizeof(*adapter->mc_list), GFP_KERNEL);
4690	if (!adapter->mc_list)
4691		return -ENOMEM;
4692
4693	adapter->uc_list = kcalloc(be_max_uc(adapter),
4694				   sizeof(*adapter->uc_list), GFP_KERNEL);
4695	if (!adapter->uc_list)
4696		return -ENOMEM;
4697
4698	if (adapter->cfg_num_rx_irqs == 1)
4699		cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4700
4701	en_flags &= cap_flags;
4702	/* will enable all the needed filter flags in be_open() */
4703	status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4704				  &adapter->if_handle, 0);
4705
4706	if (status)
4707		return status;
4708
4709	return 0;
4710}
4711
4712int be_update_queues(struct be_adapter *adapter)
4713{
4714	struct net_device *netdev = adapter->netdev;
4715	int status;
4716
4717	if (netif_running(netdev)) {
4718		/* be_tx_timeout() must not run concurrently with this
4719		 * function, synchronize with an already-running dev_watchdog
4720		 */
4721		netif_tx_lock_bh(netdev);
4722		/* device cannot transmit now, avoid dev_watchdog timeouts */
4723		netif_carrier_off(netdev);
4724		netif_tx_unlock_bh(netdev);
4725
4726		be_close(netdev);
4727	}
4728
4729	be_cancel_worker(adapter);
4730
4731	/* If any vectors have been shared with RoCE we cannot re-program
4732	 * the MSIx table.
4733	 */
4734	if (!adapter->num_msix_roce_vec)
4735		be_msix_disable(adapter);
4736
4737	be_clear_queues(adapter);
4738	status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4739	if (status)
4740		return status;
4741
4742	if (!msix_enabled(adapter)) {
4743		status = be_msix_enable(adapter);
4744		if (status)
4745			return status;
4746	}
4747
4748	status = be_if_create(adapter);
4749	if (status)
4750		return status;
4751
4752	status = be_setup_queues(adapter);
4753	if (status)
4754		return status;
4755
4756	be_schedule_worker(adapter);
4757
4758	/* The IF was destroyed and re-created. We need to clear
4759	 * all promiscuous flags valid for the destroyed IF.
4760	 * Without this promisc mode is not restored during
4761	 * be_open() because the driver thinks that it is
4762	 * already enabled in HW.
4763	 */
4764	adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
4765
4766	if (netif_running(netdev))
4767		status = be_open(netdev);
4768
4769	return status;
4770}
4771
4772static inline int fw_major_num(const char *fw_ver)
4773{
4774	int fw_major = 0, i;
4775
4776	i = sscanf(fw_ver, "%d.", &fw_major);
4777	if (i != 1)
4778		return 0;
4779
4780	return fw_major;
4781}
4782
4783/* If it is error recovery, FLR the PF
4784 * Else if any VFs are already enabled don't FLR the PF
4785 */
4786static bool be_reset_required(struct be_adapter *adapter)
4787{
4788	if (be_error_recovering(adapter))
4789		return true;
4790	else
4791		return pci_num_vf(adapter->pdev) == 0;
4792}
4793
4794/* Wait for the FW to be ready and perform the required initialization */
4795static int be_func_init(struct be_adapter *adapter)
4796{
4797	int status;
4798
4799	status = be_fw_wait_ready(adapter);
4800	if (status)
4801		return status;
4802
4803	/* FW is now ready; clear errors to allow cmds/doorbell */
4804	be_clear_error(adapter, BE_CLEAR_ALL);
4805
4806	if (be_reset_required(adapter)) {
4807		status = be_cmd_reset_function(adapter);
4808		if (status)
4809			return status;
4810
4811		/* Wait for interrupts to quiesce after an FLR */
4812		msleep(100);
4813	}
4814
4815	/* Tell FW we're ready to fire cmds */
4816	status = be_cmd_fw_init(adapter);
4817	if (status)
4818		return status;
4819
4820	/* Allow interrupts for other ULPs running on NIC function */
4821	be_intr_set(adapter, true);
4822
4823	return 0;
4824}
4825
4826static int be_setup(struct be_adapter *adapter)
4827{
4828	struct device *dev = &adapter->pdev->dev;
4829	int status;
4830
4831	status = be_func_init(adapter);
4832	if (status)
4833		return status;
4834
4835	be_setup_init(adapter);
4836
4837	if (!lancer_chip(adapter))
4838		be_cmd_req_native_mode(adapter);
4839
4840	/* invoke this cmd first to get pf_num and vf_num which are needed
4841	 * for issuing profile related cmds
4842	 */
4843	if (!BEx_chip(adapter)) {
4844		status = be_cmd_get_func_config(adapter, NULL);
4845		if (status)
4846			return status;
4847	}
4848
4849	status = be_get_config(adapter);
4850	if (status)
4851		goto err;
4852
4853	if (!BE2_chip(adapter) && be_physfn(adapter))
4854		be_alloc_sriov_res(adapter);
4855
4856	status = be_get_resources(adapter);
4857	if (status)
4858		goto err;
4859
4860	status = be_msix_enable(adapter);
4861	if (status)
4862		goto err;
4863
4864	/* will enable all the needed filter flags in be_open() */
4865	status = be_if_create(adapter);
4866	if (status)
4867		goto err;
4868
4869	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4870	rtnl_lock();
4871	status = be_setup_queues(adapter);
4872	rtnl_unlock();
4873	if (status)
4874		goto err;
4875
4876	be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4877
4878	status = be_mac_setup(adapter);
4879	if (status)
4880		goto err;
4881
4882	be_cmd_get_fw_ver(adapter);
4883	dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4884
4885	if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4886		dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4887			adapter->fw_ver);
4888		dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4889	}
4890
4891	status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4892					 adapter->rx_fc);
4893	if (status)
4894		be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4895					&adapter->rx_fc);
4896
4897	dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4898		 adapter->tx_fc, adapter->rx_fc);
4899
4900	if (be_physfn(adapter))
4901		be_cmd_set_logical_link_config(adapter,
4902					       IFLA_VF_LINK_STATE_AUTO, 0);
4903
4904	/* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4905	 * confusing a linux bridge or OVS that it might be connected to.
4906	 * Set the EVB to PASSTHRU mode which effectively disables the EVB
4907	 * when SRIOV is not enabled.
4908	 */
4909	if (BE3_chip(adapter))
4910		be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4911				      PORT_FWD_TYPE_PASSTHRU, 0);
4912
4913	if (adapter->num_vfs)
4914		be_vf_setup(adapter);
4915
4916	status = be_cmd_get_phy_info(adapter);
4917	if (!status && be_pause_supported(adapter))
4918		adapter->phy.fc_autoneg = 1;
4919
4920	if (be_physfn(adapter) && !lancer_chip(adapter))
4921		be_cmd_set_features(adapter);
4922
4923	be_schedule_worker(adapter);
4924	adapter->flags |= BE_FLAGS_SETUP_DONE;
4925	return 0;
4926err:
4927	be_clear(adapter);
4928	return status;
4929}
4930
4931#ifdef CONFIG_NET_POLL_CONTROLLER
4932static void be_netpoll(struct net_device *netdev)
4933{
4934	struct be_adapter *adapter = netdev_priv(netdev);
4935	struct be_eq_obj *eqo;
4936	int i;
4937
4938	for_all_evt_queues(adapter, eqo, i) {
4939		be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4940		napi_schedule(&eqo->napi);
4941	}
4942}
4943#endif
4944
4945int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4946{
4947	const struct firmware *fw;
4948	int status;
4949
4950	if (!netif_running(adapter->netdev)) {
4951		dev_err(&adapter->pdev->dev,
4952			"Firmware load not allowed (interface is down)\n");
4953		return -ENETDOWN;
4954	}
4955
4956	status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4957	if (status)
4958		goto fw_exit;
4959
4960	dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4961
4962	if (lancer_chip(adapter))
4963		status = lancer_fw_download(adapter, fw);
4964	else
4965		status = be_fw_download(adapter, fw);
4966
4967	if (!status)
4968		be_cmd_get_fw_ver(adapter);
4969
4970fw_exit:
4971	release_firmware(fw);
4972	return status;
4973}
4974
4975static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4976				 u16 flags, struct netlink_ext_ack *extack)
4977{
4978	struct be_adapter *adapter = netdev_priv(dev);
4979	struct nlattr *attr, *br_spec;
4980	int rem;
4981	int status = 0;
4982	u16 mode = 0;
4983
4984	if (!sriov_enabled(adapter))
4985		return -EOPNOTSUPP;
4986
4987	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4988	if (!br_spec)
4989		return -EINVAL;
4990
4991	nla_for_each_nested(attr, br_spec, rem) {
4992		if (nla_type(attr) != IFLA_BRIDGE_MODE)
4993			continue;
4994
4995		if (nla_len(attr) < sizeof(mode))
4996			return -EINVAL;
4997
4998		mode = nla_get_u16(attr);
4999		if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
5000			return -EOPNOTSUPP;
5001
5002		if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
5003			return -EINVAL;
5004
5005		status = be_cmd_set_hsw_config(adapter, 0, 0,
5006					       adapter->if_handle,
5007					       mode == BRIDGE_MODE_VEPA ?
5008					       PORT_FWD_TYPE_VEPA :
5009					       PORT_FWD_TYPE_VEB, 0);
5010		if (status)
5011			goto err;
5012
5013		dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
5014			 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
5015
5016		return status;
5017	}
5018err:
5019	dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
5020		mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
5021
5022	return status;
5023}
5024
5025static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
5026				 struct net_device *dev, u32 filter_mask,
5027				 int nlflags)
5028{
5029	struct be_adapter *adapter = netdev_priv(dev);
5030	int status = 0;
5031	u8 hsw_mode;
5032
5033	/* BE and Lancer chips support VEB mode only */
5034	if (BEx_chip(adapter) || lancer_chip(adapter)) {
5035		/* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
5036		if (!pci_sriov_get_totalvfs(adapter->pdev))
5037			return 0;
5038		hsw_mode = PORT_FWD_TYPE_VEB;
5039	} else {
5040		status = be_cmd_get_hsw_config(adapter, NULL, 0,
5041					       adapter->if_handle, &hsw_mode,
5042					       NULL);
5043		if (status)
5044			return 0;
5045
5046		if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
5047			return 0;
5048	}
5049
5050	return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
5051				       hsw_mode == PORT_FWD_TYPE_VEPA ?
5052				       BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
5053				       0, 0, nlflags, filter_mask, NULL);
5054}
5055
5056static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
5057					 void (*func)(struct work_struct *))
5058{
5059	struct be_cmd_work *work;
5060
5061	work = kzalloc(sizeof(*work), GFP_ATOMIC);
5062	if (!work) {
5063		dev_err(&adapter->pdev->dev,
5064			"be_work memory allocation failed\n");
5065		return NULL;
5066	}
5067
5068	INIT_WORK(&work->work, func);
5069	work->adapter = adapter;
5070	return work;
5071}
5072
5073static netdev_features_t be_features_check(struct sk_buff *skb,
5074					   struct net_device *dev,
5075					   netdev_features_t features)
5076{
5077	struct be_adapter *adapter = netdev_priv(dev);
5078	u8 l4_hdr = 0;
5079
5080	if (skb_is_gso(skb)) {
5081		/* IPv6 TSO requests with extension hdrs are a problem
5082		 * to Lancer and BE3 HW. Disable TSO6 feature.
5083		 */
5084		if (!skyhawk_chip(adapter) && is_ipv6_ext_hdr(skb))
5085			features &= ~NETIF_F_TSO6;
5086
5087		/* Lancer cannot handle the packet with MSS less than 256.
5088		 * Also it can't handle a TSO packet with a single segment
5089		 * Disable the GSO support in such cases
5090		 */
5091		if (lancer_chip(adapter) &&
5092		    (skb_shinfo(skb)->gso_size < 256 ||
5093		     skb_shinfo(skb)->gso_segs == 1))
5094			features &= ~NETIF_F_GSO_MASK;
5095	}
5096
5097	/* The code below restricts offload features for some tunneled and
5098	 * Q-in-Q packets.
5099	 * Offload features for normal (non tunnel) packets are unchanged.
5100	 */
5101	features = vlan_features_check(skb, features);
5102	if (!skb->encapsulation ||
5103	    !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5104		return features;
5105
5106	/* It's an encapsulated packet and VxLAN offloads are enabled. We
5107	 * should disable tunnel offload features if it's not a VxLAN packet,
5108	 * as tunnel offloads have been enabled only for VxLAN. This is done to
5109	 * allow other tunneled traffic like GRE work fine while VxLAN
5110	 * offloads are configured in Skyhawk-R.
5111	 */
5112	switch (vlan_get_protocol(skb)) {
5113	case htons(ETH_P_IP):
5114		l4_hdr = ip_hdr(skb)->protocol;
5115		break;
5116	case htons(ETH_P_IPV6):
5117		l4_hdr = ipv6_hdr(skb)->nexthdr;
5118		break;
5119	default:
5120		return features;
5121	}
5122
5123	if (l4_hdr != IPPROTO_UDP ||
5124	    skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5125	    skb->inner_protocol != htons(ETH_P_TEB) ||
5126	    skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5127		sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5128	    !adapter->vxlan_port ||
5129	    udp_hdr(skb)->dest != adapter->vxlan_port)
5130		return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5131
5132	return features;
5133}
5134
5135static int be_get_phys_port_id(struct net_device *dev,
5136			       struct netdev_phys_item_id *ppid)
5137{
5138	int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5139	struct be_adapter *adapter = netdev_priv(dev);
5140	u8 *id;
5141
5142	if (MAX_PHYS_ITEM_ID_LEN < id_len)
5143		return -ENOSPC;
5144
5145	ppid->id[0] = adapter->hba_port_num + 1;
5146	id = &ppid->id[1];
5147	for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5148	     i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5149		memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5150
5151	ppid->id_len = id_len;
5152
5153	return 0;
5154}
5155
5156static void be_set_rx_mode(struct net_device *dev)
5157{
5158	struct be_adapter *adapter = netdev_priv(dev);
5159	struct be_cmd_work *work;
5160
5161	work = be_alloc_work(adapter, be_work_set_rx_mode);
5162	if (work)
5163		queue_work(be_wq, &work->work);
5164}
5165
5166static const struct net_device_ops be_netdev_ops = {
5167	.ndo_open		= be_open,
5168	.ndo_stop		= be_close,
5169	.ndo_start_xmit		= be_xmit,
5170	.ndo_set_rx_mode	= be_set_rx_mode,
5171	.ndo_set_mac_address	= be_mac_addr_set,
5172	.ndo_get_stats64	= be_get_stats64,
5173	.ndo_validate_addr	= eth_validate_addr,
5174	.ndo_vlan_rx_add_vid	= be_vlan_add_vid,
5175	.ndo_vlan_rx_kill_vid	= be_vlan_rem_vid,
5176	.ndo_set_vf_mac		= be_set_vf_mac,
5177	.ndo_set_vf_vlan	= be_set_vf_vlan,
5178	.ndo_set_vf_rate	= be_set_vf_tx_rate,
5179	.ndo_get_vf_config	= be_get_vf_config,
5180	.ndo_set_vf_link_state  = be_set_vf_link_state,
5181	.ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5182	.ndo_tx_timeout		= be_tx_timeout,
5183#ifdef CONFIG_NET_POLL_CONTROLLER
5184	.ndo_poll_controller	= be_netpoll,
5185#endif
5186	.ndo_bridge_setlink	= be_ndo_bridge_setlink,
5187	.ndo_bridge_getlink	= be_ndo_bridge_getlink,
5188	.ndo_udp_tunnel_add	= udp_tunnel_nic_add_port,
5189	.ndo_udp_tunnel_del	= udp_tunnel_nic_del_port,
5190	.ndo_features_check	= be_features_check,
5191	.ndo_get_phys_port_id   = be_get_phys_port_id,
5192};
5193
5194static void be_netdev_init(struct net_device *netdev)
5195{
5196	struct be_adapter *adapter = netdev_priv(netdev);
5197
5198	netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5199		NETIF_F_GSO_UDP_TUNNEL |
5200		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5201		NETIF_F_HW_VLAN_CTAG_TX;
5202	if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5203		netdev->hw_features |= NETIF_F_RXHASH;
5204
5205	netdev->features |= netdev->hw_features |
5206		NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
 
5207
5208	netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5209		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5210
5211	netdev->priv_flags |= IFF_UNICAST_FLT;
5212
5213	netdev->flags |= IFF_MULTICAST;
5214
5215	netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5216
5217	netdev->netdev_ops = &be_netdev_ops;
5218
5219	netdev->ethtool_ops = &be_ethtool_ops;
5220
5221	if (!lancer_chip(adapter) && !BEx_chip(adapter) && !be_is_mc(adapter))
5222		netdev->udp_tunnel_nic_info = &be_udp_tunnels;
5223
5224	/* MTU range: 256 - 9000 */
5225	netdev->min_mtu = BE_MIN_MTU;
5226	netdev->max_mtu = BE_MAX_MTU;
5227}
5228
5229static void be_cleanup(struct be_adapter *adapter)
5230{
5231	struct net_device *netdev = adapter->netdev;
5232
5233	rtnl_lock();
5234	netif_device_detach(netdev);
5235	if (netif_running(netdev))
5236		be_close(netdev);
5237	rtnl_unlock();
5238
5239	be_clear(adapter);
5240}
5241
5242static int be_resume(struct be_adapter *adapter)
5243{
5244	struct net_device *netdev = adapter->netdev;
5245	int status;
5246
5247	status = be_setup(adapter);
5248	if (status)
5249		return status;
5250
5251	rtnl_lock();
5252	if (netif_running(netdev))
5253		status = be_open(netdev);
5254	rtnl_unlock();
5255
5256	if (status)
5257		return status;
5258
5259	netif_device_attach(netdev);
5260
5261	return 0;
5262}
5263
5264static void be_soft_reset(struct be_adapter *adapter)
5265{
5266	u32 val;
5267
5268	dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5269	val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5270	val |= SLIPORT_SOFTRESET_SR_MASK;
5271	iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5272}
5273
5274static bool be_err_is_recoverable(struct be_adapter *adapter)
5275{
5276	struct be_error_recovery *err_rec = &adapter->error_recovery;
5277	unsigned long initial_idle_time =
5278		msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5279	unsigned long recovery_interval =
5280		msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5281	u16 ue_err_code;
5282	u32 val;
5283
5284	val = be_POST_stage_get(adapter);
5285	if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5286		return false;
5287	ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5288	if (ue_err_code == 0)
5289		return false;
5290
5291	dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5292		ue_err_code);
5293
5294	if (time_before_eq(jiffies - err_rec->probe_time, initial_idle_time)) {
5295		dev_err(&adapter->pdev->dev,
5296			"Cannot recover within %lu sec from driver load\n",
5297			jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5298		return false;
5299	}
5300
5301	if (err_rec->last_recovery_time && time_before_eq(
5302		jiffies - err_rec->last_recovery_time, recovery_interval)) {
5303		dev_err(&adapter->pdev->dev,
5304			"Cannot recover within %lu sec from last recovery\n",
5305			jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5306		return false;
5307	}
5308
5309	if (ue_err_code == err_rec->last_err_code) {
5310		dev_err(&adapter->pdev->dev,
5311			"Cannot recover from a consecutive TPE error\n");
5312		return false;
5313	}
5314
5315	err_rec->last_recovery_time = jiffies;
5316	err_rec->last_err_code = ue_err_code;
5317	return true;
5318}
5319
5320static int be_tpe_recover(struct be_adapter *adapter)
5321{
5322	struct be_error_recovery *err_rec = &adapter->error_recovery;
5323	int status = -EAGAIN;
5324	u32 val;
5325
5326	switch (err_rec->recovery_state) {
5327	case ERR_RECOVERY_ST_NONE:
5328		err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5329		err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5330		break;
5331
5332	case ERR_RECOVERY_ST_DETECT:
5333		val = be_POST_stage_get(adapter);
5334		if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5335		    POST_STAGE_RECOVERABLE_ERR) {
5336			dev_err(&adapter->pdev->dev,
5337				"Unrecoverable HW error detected: 0x%x\n", val);
5338			status = -EINVAL;
5339			err_rec->resched_delay = 0;
5340			break;
5341		}
5342
5343		dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5344
5345		/* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5346		 * milliseconds before it checks for final error status in
5347		 * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5348		 * If it does, then PF0 initiates a Soft Reset.
5349		 */
5350		if (adapter->pf_num == 0) {
5351			err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5352			err_rec->resched_delay = err_rec->ue_to_reset_time -
5353					ERR_RECOVERY_UE_DETECT_DURATION;
5354			break;
5355		}
5356
5357		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5358		err_rec->resched_delay = err_rec->ue_to_poll_time -
5359					ERR_RECOVERY_UE_DETECT_DURATION;
5360		break;
5361
5362	case ERR_RECOVERY_ST_RESET:
5363		if (!be_err_is_recoverable(adapter)) {
5364			dev_err(&adapter->pdev->dev,
5365				"Failed to meet recovery criteria\n");
5366			status = -EIO;
5367			err_rec->resched_delay = 0;
5368			break;
5369		}
5370		be_soft_reset(adapter);
5371		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5372		err_rec->resched_delay = err_rec->ue_to_poll_time -
5373					err_rec->ue_to_reset_time;
5374		break;
5375
5376	case ERR_RECOVERY_ST_PRE_POLL:
5377		err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5378		err_rec->resched_delay = 0;
5379		status = 0;			/* done */
5380		break;
5381
5382	default:
5383		status = -EINVAL;
5384		err_rec->resched_delay = 0;
5385		break;
5386	}
5387
5388	return status;
5389}
5390
5391static int be_err_recover(struct be_adapter *adapter)
5392{
5393	int status;
5394
5395	if (!lancer_chip(adapter)) {
5396		if (!adapter->error_recovery.recovery_supported ||
5397		    adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5398			return -EIO;
5399		status = be_tpe_recover(adapter);
5400		if (status)
5401			goto err;
5402	}
5403
5404	/* Wait for adapter to reach quiescent state before
5405	 * destroying queues
5406	 */
5407	status = be_fw_wait_ready(adapter);
5408	if (status)
5409		goto err;
5410
5411	adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5412
5413	be_cleanup(adapter);
5414
5415	status = be_resume(adapter);
5416	if (status)
5417		goto err;
5418
5419	adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5420
5421err:
5422	return status;
5423}
5424
5425static void be_err_detection_task(struct work_struct *work)
5426{
5427	struct be_error_recovery *err_rec =
5428			container_of(work, struct be_error_recovery,
5429				     err_detection_work.work);
5430	struct be_adapter *adapter =
5431			container_of(err_rec, struct be_adapter,
5432				     error_recovery);
5433	u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5434	struct device *dev = &adapter->pdev->dev;
5435	int recovery_status;
5436
5437	be_detect_error(adapter);
5438	if (!be_check_error(adapter, BE_ERROR_HW))
5439		goto reschedule_task;
5440
5441	recovery_status = be_err_recover(adapter);
5442	if (!recovery_status) {
5443		err_rec->recovery_retries = 0;
5444		err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5445		dev_info(dev, "Adapter recovery successful\n");
5446		goto reschedule_task;
5447	} else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5448		/* BEx/SH recovery state machine */
5449		if (adapter->pf_num == 0 &&
5450		    err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5451			dev_err(&adapter->pdev->dev,
5452				"Adapter recovery in progress\n");
5453		resched_delay = err_rec->resched_delay;
5454		goto reschedule_task;
5455	} else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5456		/* For VFs, check if PF have allocated resources
5457		 * every second.
5458		 */
5459		dev_err(dev, "Re-trying adapter recovery\n");
5460		goto reschedule_task;
5461	} else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5462		   ERR_RECOVERY_MAX_RETRY_COUNT) {
5463		/* In case of another error during recovery, it takes 30 sec
5464		 * for adapter to come out of error. Retry error recovery after
5465		 * this time interval.
5466		 */
5467		dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5468		resched_delay = ERR_RECOVERY_RETRY_DELAY;
5469		goto reschedule_task;
5470	} else {
5471		dev_err(dev, "Adapter recovery failed\n");
5472		dev_err(dev, "Please reboot server to recover\n");
5473	}
5474
5475	return;
5476
5477reschedule_task:
5478	be_schedule_err_detection(adapter, resched_delay);
5479}
5480
5481static void be_log_sfp_info(struct be_adapter *adapter)
5482{
5483	int status;
5484
5485	status = be_cmd_query_sfp_info(adapter);
5486	if (!status) {
5487		dev_err(&adapter->pdev->dev,
5488			"Port %c: %s Vendor: %s part no: %s",
5489			adapter->port_name,
5490			be_misconfig_evt_port_state[adapter->phy_state],
5491			adapter->phy.vendor_name,
5492			adapter->phy.vendor_pn);
5493	}
5494	adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5495}
5496
5497static void be_worker(struct work_struct *work)
5498{
5499	struct be_adapter *adapter =
5500		container_of(work, struct be_adapter, work.work);
5501	struct be_rx_obj *rxo;
5502	int i;
5503
5504	if (be_physfn(adapter) &&
5505	    MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5506		be_cmd_get_die_temperature(adapter);
5507
5508	/* when interrupts are not yet enabled, just reap any pending
5509	 * mcc completions
5510	 */
5511	if (!netif_running(adapter->netdev)) {
 
5512		be_process_mcc(adapter);
 
5513		goto reschedule;
5514	}
5515
5516	if (!adapter->stats_cmd_sent) {
5517		if (lancer_chip(adapter))
5518			lancer_cmd_get_pport_stats(adapter,
5519						   &adapter->stats_cmd);
5520		else
5521			be_cmd_get_stats(adapter, &adapter->stats_cmd);
5522	}
5523
5524	for_all_rx_queues(adapter, rxo, i) {
5525		/* Replenish RX-queues starved due to memory
5526		 * allocation failures.
5527		 */
5528		if (rxo->rx_post_starved)
5529			be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5530	}
5531
5532	/* EQ-delay update for Skyhawk is done while notifying EQ */
5533	if (!skyhawk_chip(adapter))
5534		be_eqd_update(adapter, false);
5535
5536	if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5537		be_log_sfp_info(adapter);
5538
5539reschedule:
5540	adapter->work_counter++;
5541	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5542}
5543
5544static void be_unmap_pci_bars(struct be_adapter *adapter)
5545{
5546	if (adapter->csr)
5547		pci_iounmap(adapter->pdev, adapter->csr);
5548	if (adapter->db)
5549		pci_iounmap(adapter->pdev, adapter->db);
5550	if (adapter->pcicfg && adapter->pcicfg_mapped)
5551		pci_iounmap(adapter->pdev, adapter->pcicfg);
5552}
5553
5554static int db_bar(struct be_adapter *adapter)
5555{
5556	if (lancer_chip(adapter) || be_virtfn(adapter))
5557		return 0;
5558	else
5559		return 4;
5560}
5561
5562static int be_roce_map_pci_bars(struct be_adapter *adapter)
5563{
5564	if (skyhawk_chip(adapter)) {
5565		adapter->roce_db.size = 4096;
5566		adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5567							      db_bar(adapter));
5568		adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5569							       db_bar(adapter));
5570	}
5571	return 0;
5572}
5573
5574static int be_map_pci_bars(struct be_adapter *adapter)
5575{
5576	struct pci_dev *pdev = adapter->pdev;
5577	u8 __iomem *addr;
5578	u32 sli_intf;
5579
5580	pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5581	adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5582				SLI_INTF_FAMILY_SHIFT;
5583	adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5584
5585	if (BEx_chip(adapter) && be_physfn(adapter)) {
5586		adapter->csr = pci_iomap(pdev, 2, 0);
5587		if (!adapter->csr)
5588			return -ENOMEM;
5589	}
5590
5591	addr = pci_iomap(pdev, db_bar(adapter), 0);
5592	if (!addr)
5593		goto pci_map_err;
5594	adapter->db = addr;
5595
5596	if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5597		if (be_physfn(adapter)) {
5598			/* PCICFG is the 2nd BAR in BE2 */
5599			addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5600			if (!addr)
5601				goto pci_map_err;
5602			adapter->pcicfg = addr;
5603			adapter->pcicfg_mapped = true;
5604		} else {
5605			adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5606			adapter->pcicfg_mapped = false;
5607		}
5608	}
5609
5610	be_roce_map_pci_bars(adapter);
5611	return 0;
5612
5613pci_map_err:
5614	dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5615	be_unmap_pci_bars(adapter);
5616	return -ENOMEM;
5617}
5618
5619static void be_drv_cleanup(struct be_adapter *adapter)
5620{
5621	struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5622	struct device *dev = &adapter->pdev->dev;
5623
5624	if (mem->va)
5625		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5626
5627	mem = &adapter->rx_filter;
5628	if (mem->va)
5629		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5630
5631	mem = &adapter->stats_cmd;
5632	if (mem->va)
5633		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5634}
5635
5636/* Allocate and initialize various fields in be_adapter struct */
5637static int be_drv_init(struct be_adapter *adapter)
5638{
5639	struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5640	struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5641	struct be_dma_mem *rx_filter = &adapter->rx_filter;
5642	struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5643	struct device *dev = &adapter->pdev->dev;
5644	int status = 0;
5645
5646	mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5647	mbox_mem_alloc->va = dma_alloc_coherent(dev, mbox_mem_alloc->size,
5648						&mbox_mem_alloc->dma,
5649						GFP_KERNEL);
5650	if (!mbox_mem_alloc->va)
5651		return -ENOMEM;
5652
5653	mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5654	mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5655	mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5656
5657	rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5658	rx_filter->va = dma_alloc_coherent(dev, rx_filter->size,
5659					   &rx_filter->dma, GFP_KERNEL);
5660	if (!rx_filter->va) {
5661		status = -ENOMEM;
5662		goto free_mbox;
5663	}
5664
5665	if (lancer_chip(adapter))
5666		stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5667	else if (BE2_chip(adapter))
5668		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5669	else if (BE3_chip(adapter))
5670		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5671	else
5672		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5673	stats_cmd->va = dma_alloc_coherent(dev, stats_cmd->size,
5674					   &stats_cmd->dma, GFP_KERNEL);
5675	if (!stats_cmd->va) {
5676		status = -ENOMEM;
5677		goto free_rx_filter;
5678	}
5679
5680	mutex_init(&adapter->mbox_lock);
5681	mutex_init(&adapter->mcc_lock);
5682	mutex_init(&adapter->rx_filter_lock);
5683	spin_lock_init(&adapter->mcc_cq_lock);
5684	init_completion(&adapter->et_cmd_compl);
5685
5686	pci_save_state(adapter->pdev);
5687
5688	INIT_DELAYED_WORK(&adapter->work, be_worker);
5689
5690	adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5691	adapter->error_recovery.resched_delay = 0;
5692	INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5693			  be_err_detection_task);
5694
5695	adapter->rx_fc = true;
5696	adapter->tx_fc = true;
5697
5698	/* Must be a power of 2 or else MODULO will BUG_ON */
5699	adapter->be_get_temp_freq = 64;
5700
5701	return 0;
5702
5703free_rx_filter:
5704	dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5705free_mbox:
5706	dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5707			  mbox_mem_alloc->dma);
5708	return status;
5709}
5710
5711static void be_remove(struct pci_dev *pdev)
5712{
5713	struct be_adapter *adapter = pci_get_drvdata(pdev);
5714
5715	if (!adapter)
5716		return;
5717
5718	be_roce_dev_remove(adapter);
5719	be_intr_set(adapter, false);
5720
5721	be_cancel_err_detection(adapter);
5722
5723	unregister_netdev(adapter->netdev);
5724
5725	be_clear(adapter);
5726
5727	if (!pci_vfs_assigned(adapter->pdev))
5728		be_cmd_reset_function(adapter);
5729
5730	/* tell fw we're done with firing cmds */
5731	be_cmd_fw_clean(adapter);
5732
5733	be_unmap_pci_bars(adapter);
5734	be_drv_cleanup(adapter);
5735
5736	pci_disable_pcie_error_reporting(pdev);
5737
5738	pci_release_regions(pdev);
5739	pci_disable_device(pdev);
5740
5741	free_netdev(adapter->netdev);
5742}
5743
5744static ssize_t be_hwmon_show_temp(struct device *dev,
5745				  struct device_attribute *dev_attr,
5746				  char *buf)
5747{
5748	struct be_adapter *adapter = dev_get_drvdata(dev);
5749
5750	/* Unit: millidegree Celsius */
5751	if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5752		return -EIO;
5753	else
5754		return sprintf(buf, "%u\n",
5755			       adapter->hwmon_info.be_on_die_temp * 1000);
5756}
5757
5758static SENSOR_DEVICE_ATTR(temp1_input, 0444,
5759			  be_hwmon_show_temp, NULL, 1);
5760
5761static struct attribute *be_hwmon_attrs[] = {
5762	&sensor_dev_attr_temp1_input.dev_attr.attr,
5763	NULL
5764};
5765
5766ATTRIBUTE_GROUPS(be_hwmon);
5767
5768static char *mc_name(struct be_adapter *adapter)
5769{
5770	char *str = "";	/* default */
5771
5772	switch (adapter->mc_type) {
5773	case UMC:
5774		str = "UMC";
5775		break;
5776	case FLEX10:
5777		str = "FLEX10";
5778		break;
5779	case vNIC1:
5780		str = "vNIC-1";
5781		break;
5782	case nPAR:
5783		str = "nPAR";
5784		break;
5785	case UFP:
5786		str = "UFP";
5787		break;
5788	case vNIC2:
5789		str = "vNIC-2";
5790		break;
5791	default:
5792		str = "";
5793	}
5794
5795	return str;
5796}
5797
5798static inline char *func_name(struct be_adapter *adapter)
5799{
5800	return be_physfn(adapter) ? "PF" : "VF";
5801}
5802
5803static inline char *nic_name(struct pci_dev *pdev)
5804{
5805	switch (pdev->device) {
5806	case OC_DEVICE_ID1:
5807		return OC_NAME;
5808	case OC_DEVICE_ID2:
5809		return OC_NAME_BE;
5810	case OC_DEVICE_ID3:
5811	case OC_DEVICE_ID4:
5812		return OC_NAME_LANCER;
5813	case BE_DEVICE_ID2:
5814		return BE3_NAME;
5815	case OC_DEVICE_ID5:
5816	case OC_DEVICE_ID6:
5817		return OC_NAME_SH;
5818	default:
5819		return BE_NAME;
5820	}
5821}
5822
5823static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5824{
5825	struct be_adapter *adapter;
5826	struct net_device *netdev;
5827	int status = 0;
5828
5829	status = pci_enable_device(pdev);
5830	if (status)
5831		goto do_none;
5832
5833	status = pci_request_regions(pdev, DRV_NAME);
5834	if (status)
5835		goto disable_dev;
5836	pci_set_master(pdev);
5837
5838	netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5839	if (!netdev) {
5840		status = -ENOMEM;
5841		goto rel_reg;
5842	}
5843	adapter = netdev_priv(netdev);
5844	adapter->pdev = pdev;
5845	pci_set_drvdata(pdev, adapter);
5846	adapter->netdev = netdev;
5847	SET_NETDEV_DEV(netdev, &pdev->dev);
5848
5849	status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5850	if (!status) {
5851		netdev->features |= NETIF_F_HIGHDMA;
5852	} else {
5853		status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5854		if (status) {
5855			dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5856			goto free_netdev;
5857		}
5858	}
5859
5860	status = pci_enable_pcie_error_reporting(pdev);
5861	if (!status)
5862		dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5863
5864	status = be_map_pci_bars(adapter);
5865	if (status)
5866		goto free_netdev;
5867
5868	status = be_drv_init(adapter);
5869	if (status)
5870		goto unmap_bars;
5871
5872	status = be_setup(adapter);
5873	if (status)
5874		goto drv_cleanup;
5875
5876	be_netdev_init(netdev);
5877	status = register_netdev(netdev);
5878	if (status != 0)
5879		goto unsetup;
5880
5881	be_roce_dev_add(adapter);
5882
5883	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5884	adapter->error_recovery.probe_time = jiffies;
5885
5886	/* On Die temperature not supported for VF. */
5887	if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5888		adapter->hwmon_info.hwmon_dev =
5889			devm_hwmon_device_register_with_groups(&pdev->dev,
5890							       DRV_NAME,
5891							       adapter,
5892							       be_hwmon_groups);
5893		adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5894	}
5895
5896	dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5897		 func_name(adapter), mc_name(adapter), adapter->port_name);
5898
5899	return 0;
5900
5901unsetup:
5902	be_clear(adapter);
5903drv_cleanup:
5904	be_drv_cleanup(adapter);
5905unmap_bars:
5906	be_unmap_pci_bars(adapter);
5907free_netdev:
5908	free_netdev(netdev);
5909rel_reg:
5910	pci_release_regions(pdev);
5911disable_dev:
5912	pci_disable_device(pdev);
5913do_none:
5914	dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5915	return status;
5916}
5917
5918static int __maybe_unused be_suspend(struct device *dev_d)
5919{
5920	struct be_adapter *adapter = dev_get_drvdata(dev_d);
5921
5922	be_intr_set(adapter, false);
5923	be_cancel_err_detection(adapter);
5924
5925	be_cleanup(adapter);
5926
5927	return 0;
5928}
5929
5930static int __maybe_unused be_pci_resume(struct device *dev_d)
5931{
5932	struct be_adapter *adapter = dev_get_drvdata(dev_d);
5933	int status = 0;
5934
5935	status = be_resume(adapter);
5936	if (status)
5937		return status;
5938
5939	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5940
5941	return 0;
5942}
5943
5944/*
5945 * An FLR will stop BE from DMAing any data.
5946 */
5947static void be_shutdown(struct pci_dev *pdev)
5948{
5949	struct be_adapter *adapter = pci_get_drvdata(pdev);
5950
5951	if (!adapter)
5952		return;
5953
5954	be_roce_dev_shutdown(adapter);
5955	cancel_delayed_work_sync(&adapter->work);
5956	be_cancel_err_detection(adapter);
5957
5958	netif_device_detach(adapter->netdev);
5959
5960	be_cmd_reset_function(adapter);
5961
5962	pci_disable_device(pdev);
5963}
5964
5965static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
5966					    pci_channel_state_t state)
5967{
5968	struct be_adapter *adapter = pci_get_drvdata(pdev);
5969
5970	dev_err(&adapter->pdev->dev, "EEH error detected\n");
5971
5972	be_roce_dev_remove(adapter);
5973
5974	if (!be_check_error(adapter, BE_ERROR_EEH)) {
5975		be_set_error(adapter, BE_ERROR_EEH);
5976
5977		be_cancel_err_detection(adapter);
5978
5979		be_cleanup(adapter);
5980	}
5981
5982	if (state == pci_channel_io_perm_failure)
5983		return PCI_ERS_RESULT_DISCONNECT;
5984
5985	pci_disable_device(pdev);
5986
5987	/* The error could cause the FW to trigger a flash debug dump.
5988	 * Resetting the card while flash dump is in progress
5989	 * can cause it not to recover; wait for it to finish.
5990	 * Wait only for first function as it is needed only once per
5991	 * adapter.
5992	 */
5993	if (pdev->devfn == 0)
5994		ssleep(30);
5995
5996	return PCI_ERS_RESULT_NEED_RESET;
5997}
5998
5999static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6000{
6001	struct be_adapter *adapter = pci_get_drvdata(pdev);
6002	int status;
6003
6004	dev_info(&adapter->pdev->dev, "EEH reset\n");
6005
6006	status = pci_enable_device(pdev);
6007	if (status)
6008		return PCI_ERS_RESULT_DISCONNECT;
6009
6010	pci_set_master(pdev);
6011	pci_restore_state(pdev);
6012
6013	/* Check if card is ok and fw is ready */
6014	dev_info(&adapter->pdev->dev,
6015		 "Waiting for FW to be ready after EEH reset\n");
6016	status = be_fw_wait_ready(adapter);
6017	if (status)
6018		return PCI_ERS_RESULT_DISCONNECT;
6019
6020	be_clear_error(adapter, BE_CLEAR_ALL);
6021	return PCI_ERS_RESULT_RECOVERED;
6022}
6023
6024static void be_eeh_resume(struct pci_dev *pdev)
6025{
6026	int status = 0;
6027	struct be_adapter *adapter = pci_get_drvdata(pdev);
6028
6029	dev_info(&adapter->pdev->dev, "EEH resume\n");
6030
6031	pci_save_state(pdev);
6032
6033	status = be_resume(adapter);
6034	if (status)
6035		goto err;
6036
6037	be_roce_dev_add(adapter);
6038
6039	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6040	return;
6041err:
6042	dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6043}
6044
6045static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6046{
6047	struct be_adapter *adapter = pci_get_drvdata(pdev);
6048	struct be_resources vft_res = {0};
6049	int status;
6050
6051	if (!num_vfs)
6052		be_vf_clear(adapter);
6053
6054	adapter->num_vfs = num_vfs;
6055
6056	if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6057		dev_warn(&pdev->dev,
6058			 "Cannot disable VFs while they are assigned\n");
6059		return -EBUSY;
6060	}
6061
6062	/* When the HW is in SRIOV capable configuration, the PF-pool resources
6063	 * are equally distributed across the max-number of VFs. The user may
6064	 * request only a subset of the max-vfs to be enabled.
6065	 * Based on num_vfs, redistribute the resources across num_vfs so that
6066	 * each VF will have access to more number of resources.
6067	 * This facility is not available in BE3 FW.
6068	 * Also, this is done by FW in Lancer chip.
6069	 */
6070	if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6071		be_calculate_vf_res(adapter, adapter->num_vfs,
6072				    &vft_res);
6073		status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6074						 adapter->num_vfs, &vft_res);
6075		if (status)
6076			dev_err(&pdev->dev,
6077				"Failed to optimize SR-IOV resources\n");
6078	}
6079
6080	status = be_get_resources(adapter);
6081	if (status)
6082		return be_cmd_status(status);
6083
6084	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6085	rtnl_lock();
6086	status = be_update_queues(adapter);
6087	rtnl_unlock();
6088	if (status)
6089		return be_cmd_status(status);
6090
6091	if (adapter->num_vfs)
6092		status = be_vf_setup(adapter);
6093
6094	if (!status)
6095		return adapter->num_vfs;
6096
6097	return 0;
6098}
6099
6100static const struct pci_error_handlers be_eeh_handlers = {
6101	.error_detected = be_eeh_err_detected,
6102	.slot_reset = be_eeh_reset,
6103	.resume = be_eeh_resume,
6104};
6105
6106static SIMPLE_DEV_PM_OPS(be_pci_pm_ops, be_suspend, be_pci_resume);
6107
6108static struct pci_driver be_driver = {
6109	.name = DRV_NAME,
6110	.id_table = be_dev_ids,
6111	.probe = be_probe,
6112	.remove = be_remove,
6113	.driver.pm = &be_pci_pm_ops,
6114	.shutdown = be_shutdown,
6115	.sriov_configure = be_pci_sriov_configure,
6116	.err_handler = &be_eeh_handlers
6117};
6118
6119static int __init be_init_module(void)
6120{
6121	int status;
6122
6123	if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6124	    rx_frag_size != 2048) {
6125		printk(KERN_WARNING DRV_NAME
6126			" : Module param rx_frag_size must be 2048/4096/8192."
6127			" Using 2048\n");
6128		rx_frag_size = 2048;
6129	}
6130
6131	if (num_vfs > 0) {
6132		pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6133		pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6134	}
6135
6136	be_wq = create_singlethread_workqueue("be_wq");
6137	if (!be_wq) {
6138		pr_warn(DRV_NAME "workqueue creation failed\n");
6139		return -1;
6140	}
6141
6142	be_err_recovery_workq =
6143		create_singlethread_workqueue("be_err_recover");
6144	if (!be_err_recovery_workq)
6145		pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6146
6147	status = pci_register_driver(&be_driver);
6148	if (status) {
6149		destroy_workqueue(be_wq);
6150		be_destroy_err_recovery_workq();
6151	}
6152	return status;
6153}
6154module_init(be_init_module);
6155
6156static void __exit be_exit_module(void)
6157{
6158	pci_unregister_driver(&be_driver);
6159
6160	be_destroy_err_recovery_workq();
6161
6162	if (be_wq)
6163		destroy_workqueue(be_wq);
6164}
6165module_exit(be_exit_module);