Linux Audio

Check our new training course

Loading...
v6.13.7
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/*
   3 * Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
   4 */
   5
   6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   7
   8#ifdef CONFIG_RFS_ACCEL
   9#include <linux/cpu_rmap.h>
  10#endif /* CONFIG_RFS_ACCEL */
  11#include <linux/ethtool.h>
  12#include <linux/kernel.h>
  13#include <linux/module.h>
  14#include <linux/numa.h>
  15#include <linux/pci.h>
  16#include <linux/utsname.h>
  17#include <linux/version.h>
  18#include <linux/vmalloc.h>
  19#include <net/ip.h>
  20
  21#include "ena_netdev.h"
 
  22#include "ena_pci_id_tbl.h"
  23#include "ena_xdp.h"
  24
  25MODULE_AUTHOR("Amazon.com, Inc. or its affiliates");
  26MODULE_DESCRIPTION(DEVICE_NAME);
  27MODULE_LICENSE("GPL");
  28
  29/* Time in jiffies before concluding the transmitter is hung. */
  30#define TX_TIMEOUT  (5 * HZ)
  31
  32#define ENA_MAX_RINGS min_t(unsigned int, ENA_MAX_NUM_IO_QUEUES, num_possible_cpus())
  33
  34#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_IFUP | \
  35		NETIF_MSG_IFDOWN | NETIF_MSG_TX_ERR | NETIF_MSG_RX_ERR)
  36
  37static struct ena_aenq_handlers aenq_handlers;
  38
  39static struct workqueue_struct *ena_wq;
  40
  41MODULE_DEVICE_TABLE(pci, ena_pci_tbl);
  42
  43static int ena_rss_init_default(struct ena_adapter *adapter);
  44static void check_for_admin_com_state(struct ena_adapter *adapter);
  45static int ena_destroy_device(struct ena_adapter *adapter, bool graceful);
  46static int ena_restore_device(struct ena_adapter *adapter);
  47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  48static void ena_tx_timeout(struct net_device *dev, unsigned int txqueue)
  49{
  50	enum ena_regs_reset_reason_types reset_reason = ENA_REGS_RESET_OS_NETDEV_WD;
  51	struct ena_adapter *adapter = netdev_priv(dev);
  52	unsigned int time_since_last_napi, threshold;
  53	struct ena_ring *tx_ring;
  54	int napi_scheduled;
  55
  56	if (txqueue >= adapter->num_io_queues) {
  57		netdev_err(dev, "TX timeout on invalid queue %u\n", txqueue);
  58		goto schedule_reset;
  59	}
  60
  61	threshold = jiffies_to_usecs(dev->watchdog_timeo);
  62	tx_ring = &adapter->tx_ring[txqueue];
  63
  64	time_since_last_napi = jiffies_to_usecs(jiffies - tx_ring->tx_stats.last_napi_jiffies);
  65	napi_scheduled = !!(tx_ring->napi->state & NAPIF_STATE_SCHED);
  66
  67	netdev_err(dev,
  68		   "TX q %d is paused for too long (threshold %u). Time since last napi %u usec. napi scheduled: %d\n",
  69		   txqueue,
  70		   threshold,
  71		   time_since_last_napi,
  72		   napi_scheduled);
  73
  74	if (threshold < time_since_last_napi && napi_scheduled) {
  75		netdev_err(dev,
  76			   "napi handler hasn't been called for a long time but is scheduled\n");
  77			   reset_reason = ENA_REGS_RESET_SUSPECTED_POLL_STARVATION;
  78	}
  79schedule_reset:
  80	/* Change the state of the device to trigger reset
  81	 * Check that we are not in the middle or a trigger already
  82	 */
 
  83	if (test_and_set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
  84		return;
  85
  86	ena_reset_device(adapter, reset_reason);
  87	ena_increase_stat(&adapter->dev_stats.tx_timeout, 1, &adapter->syncp);
 
 
  88}
  89
  90static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu)
  91{
  92	int i;
  93
  94	for (i = 0; i < adapter->num_io_queues; i++)
  95		adapter->rx_ring[i].mtu = mtu;
  96}
  97
  98static int ena_change_mtu(struct net_device *dev, int new_mtu)
  99{
 100	struct ena_adapter *adapter = netdev_priv(dev);
 101	int ret;
 102
 103	ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
 104	if (!ret) {
 105		netif_dbg(adapter, drv, dev, "Set MTU to %d\n", new_mtu);
 106		update_rx_ring_mtu(adapter, new_mtu);
 107		WRITE_ONCE(dev->mtu, new_mtu);
 108	} else {
 109		netif_err(adapter, drv, dev, "Failed to set MTU to %d\n",
 110			  new_mtu);
 111	}
 112
 113	return ret;
 114}
 115
 116int ena_xmit_common(struct ena_adapter *adapter,
 117		    struct ena_ring *ring,
 118		    struct ena_tx_buffer *tx_info,
 119		    struct ena_com_tx_ctx *ena_tx_ctx,
 120		    u16 next_to_use,
 121		    u32 bytes)
 122{
 
 123	int rc, nb_hw_desc;
 124
 125	if (unlikely(ena_com_is_doorbell_needed(ring->ena_com_io_sq,
 126						ena_tx_ctx))) {
 127		netif_dbg(adapter, tx_queued, adapter->netdev,
 128			  "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n",
 129			  ring->qid);
 130		ena_ring_tx_doorbell(ring);
 131	}
 132
 133	/* prepare the packet's descriptors to dma engine */
 134	rc = ena_com_prepare_tx(ring->ena_com_io_sq, ena_tx_ctx,
 135				&nb_hw_desc);
 136
 137	/* In case there isn't enough space in the queue for the packet,
 138	 * we simply drop it. All other failure reasons of
 139	 * ena_com_prepare_tx() are fatal and therefore require a device reset.
 140	 */
 141	if (unlikely(rc)) {
 142		netif_err(adapter, tx_queued, adapter->netdev,
 143			  "Failed to prepare tx bufs\n");
 144		ena_increase_stat(&ring->tx_stats.prepare_ctx_err, 1, &ring->syncp);
 
 145		if (rc != -ENOMEM)
 146			ena_reset_device(adapter, ENA_REGS_RESET_DRIVER_INVALID_STATE);
 
 147		return rc;
 148	}
 149
 150	u64_stats_update_begin(&ring->syncp);
 151	ring->tx_stats.cnt++;
 152	ring->tx_stats.bytes += bytes;
 153	u64_stats_update_end(&ring->syncp);
 154
 155	tx_info->tx_descs = nb_hw_desc;
 156	tx_info->total_tx_size = bytes;
 157	tx_info->last_jiffies = jiffies;
 158	tx_info->print_once = 0;
 159
 160	ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
 161						 ring->ring_size);
 162	return 0;
 163}
 164
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 165static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter)
 166{
 167#ifdef CONFIG_RFS_ACCEL
 168	u32 i;
 169	int rc;
 170
 171	adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_io_queues);
 172	if (!adapter->netdev->rx_cpu_rmap)
 173		return -ENOMEM;
 174	for (i = 0; i < adapter->num_io_queues; i++) {
 175		int irq_idx = ENA_IO_IRQ_IDX(i);
 176
 177		rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap,
 178				      pci_irq_vector(adapter->pdev, irq_idx));
 179		if (rc) {
 180			free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
 181			adapter->netdev->rx_cpu_rmap = NULL;
 182			return rc;
 183		}
 184	}
 185#endif /* CONFIG_RFS_ACCEL */
 186	return 0;
 187}
 188
 189static void ena_init_io_rings_common(struct ena_adapter *adapter,
 190				     struct ena_ring *ring, u16 qid)
 191{
 192	ring->qid = qid;
 193	ring->pdev = adapter->pdev;
 194	ring->dev = &adapter->pdev->dev;
 195	ring->netdev = adapter->netdev;
 196	ring->napi = &adapter->ena_napi[qid].napi;
 197	ring->adapter = adapter;
 198	ring->ena_dev = adapter->ena_dev;
 199	ring->per_napi_packets = 0;
 200	ring->cpu = 0;
 201	ring->numa_node = 0;
 202	ring->no_interrupt_event_cnt = 0;
 203	u64_stats_init(&ring->syncp);
 204}
 205
 206void ena_init_io_rings(struct ena_adapter *adapter,
 207		       int first_index, int count)
 208{
 209	struct ena_com_dev *ena_dev;
 210	struct ena_ring *txr, *rxr;
 211	int i;
 212
 213	ena_dev = adapter->ena_dev;
 214
 215	for (i = first_index; i < first_index + count; i++) {
 216		txr = &adapter->tx_ring[i];
 217		rxr = &adapter->rx_ring[i];
 218
 219		/* TX common ring state */
 220		ena_init_io_rings_common(adapter, txr, i);
 221
 222		/* TX specific ring state */
 223		txr->ring_size = adapter->requested_tx_ring_size;
 224		txr->tx_max_header_size = ena_dev->tx_max_header_size;
 225		txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
 226		txr->sgl_size = adapter->max_tx_sgl_size;
 227		txr->smoothed_interval =
 228			ena_com_get_nonadaptive_moderation_interval_tx(ena_dev);
 229		txr->disable_meta_caching = adapter->disable_meta_caching;
 230		spin_lock_init(&txr->xdp_tx_lock);
 231
 232		/* Don't init RX queues for xdp queues */
 233		if (!ENA_IS_XDP_INDEX(adapter, i)) {
 234			/* RX common ring state */
 235			ena_init_io_rings_common(adapter, rxr, i);
 236
 237			/* RX specific ring state */
 238			rxr->ring_size = adapter->requested_rx_ring_size;
 239			rxr->rx_copybreak = adapter->rx_copybreak;
 240			rxr->sgl_size = adapter->max_rx_sgl_size;
 241			rxr->smoothed_interval =
 242				ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
 243			rxr->empty_rx_queue = 0;
 244			rxr->rx_headroom = NET_SKB_PAD;
 245			adapter->ena_napi[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
 246			rxr->xdp_ring = &adapter->tx_ring[i + adapter->num_io_queues];
 247		}
 248	}
 249}
 250
 251/* ena_setup_tx_resources - allocate I/O Tx resources (Descriptors)
 252 * @adapter: network interface device structure
 253 * @qid: queue index
 254 *
 255 * Return 0 on success, negative on failure
 256 */
 257static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
 258{
 259	struct ena_ring *tx_ring = &adapter->tx_ring[qid];
 260	struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)];
 261	int size, i, node;
 262
 263	if (tx_ring->tx_buffer_info) {
 264		netif_err(adapter, ifup,
 265			  adapter->netdev, "tx_buffer_info info is not NULL");
 266		return -EEXIST;
 267	}
 268
 269	size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size;
 270	node = cpu_to_node(ena_irq->cpu);
 271
 272	tx_ring->tx_buffer_info = vzalloc_node(size, node);
 273	if (!tx_ring->tx_buffer_info) {
 274		tx_ring->tx_buffer_info = vzalloc(size);
 275		if (!tx_ring->tx_buffer_info)
 276			goto err_tx_buffer_info;
 277	}
 278
 279	size = sizeof(u16) * tx_ring->ring_size;
 280	tx_ring->free_ids = vzalloc_node(size, node);
 281	if (!tx_ring->free_ids) {
 282		tx_ring->free_ids = vzalloc(size);
 283		if (!tx_ring->free_ids)
 284			goto err_tx_free_ids;
 285	}
 286
 287	size = tx_ring->tx_max_header_size;
 288	tx_ring->push_buf_intermediate_buf = vzalloc_node(size, node);
 289	if (!tx_ring->push_buf_intermediate_buf) {
 290		tx_ring->push_buf_intermediate_buf = vzalloc(size);
 291		if (!tx_ring->push_buf_intermediate_buf)
 292			goto err_push_buf_intermediate_buf;
 293	}
 294
 295	/* Req id ring for TX out of order completions */
 296	for (i = 0; i < tx_ring->ring_size; i++)
 297		tx_ring->free_ids[i] = i;
 298
 299	/* Reset tx statistics */
 300	memset(&tx_ring->tx_stats, 0x0, sizeof(tx_ring->tx_stats));
 301
 302	tx_ring->next_to_use = 0;
 303	tx_ring->next_to_clean = 0;
 304	tx_ring->cpu = ena_irq->cpu;
 305	tx_ring->numa_node = node;
 306	return 0;
 307
 308err_push_buf_intermediate_buf:
 309	vfree(tx_ring->free_ids);
 310	tx_ring->free_ids = NULL;
 311err_tx_free_ids:
 312	vfree(tx_ring->tx_buffer_info);
 313	tx_ring->tx_buffer_info = NULL;
 314err_tx_buffer_info:
 315	return -ENOMEM;
 316}
 317
 318/* ena_free_tx_resources - Free I/O Tx Resources per Queue
 319 * @adapter: network interface device structure
 320 * @qid: queue index
 321 *
 322 * Free all transmit software resources
 323 */
 324static void ena_free_tx_resources(struct ena_adapter *adapter, int qid)
 325{
 326	struct ena_ring *tx_ring = &adapter->tx_ring[qid];
 327
 328	vfree(tx_ring->tx_buffer_info);
 329	tx_ring->tx_buffer_info = NULL;
 330
 331	vfree(tx_ring->free_ids);
 332	tx_ring->free_ids = NULL;
 333
 334	vfree(tx_ring->push_buf_intermediate_buf);
 335	tx_ring->push_buf_intermediate_buf = NULL;
 336}
 337
 338int ena_setup_tx_resources_in_range(struct ena_adapter *adapter,
 339				    int first_index, int count)
 
 340{
 341	int i, rc = 0;
 342
 343	for (i = first_index; i < first_index + count; i++) {
 344		rc = ena_setup_tx_resources(adapter, i);
 345		if (rc)
 346			goto err_setup_tx;
 347	}
 348
 349	return 0;
 350
 351err_setup_tx:
 352
 353	netif_err(adapter, ifup, adapter->netdev,
 354		  "Tx queue %d: allocation failed\n", i);
 355
 356	/* rewind the index freeing the rings as we go */
 357	while (first_index < i--)
 358		ena_free_tx_resources(adapter, i);
 359	return rc;
 360}
 361
 362void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter,
 363					   int first_index, int count)
 364{
 365	int i;
 366
 367	for (i = first_index; i < first_index + count; i++)
 368		ena_free_tx_resources(adapter, i);
 369}
 370
 371/* ena_free_all_io_tx_resources - Free I/O Tx Resources for All Queues
 372 * @adapter: board private structure
 373 *
 374 * Free all transmit software resources
 375 */
 376void ena_free_all_io_tx_resources(struct ena_adapter *adapter)
 377{
 378	ena_free_all_io_tx_resources_in_range(adapter,
 379					      0,
 380					      adapter->xdp_num_queues +
 381					      adapter->num_io_queues);
 382}
 383
 384/* ena_setup_rx_resources - allocate I/O Rx resources (Descriptors)
 385 * @adapter: network interface device structure
 386 * @qid: queue index
 387 *
 388 * Returns 0 on success, negative on failure
 389 */
 390static int ena_setup_rx_resources(struct ena_adapter *adapter,
 391				  u32 qid)
 392{
 393	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
 394	struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)];
 395	int size, node, i;
 396
 397	if (rx_ring->rx_buffer_info) {
 398		netif_err(adapter, ifup, adapter->netdev,
 399			  "rx_buffer_info is not NULL");
 400		return -EEXIST;
 401	}
 402
 403	/* alloc extra element so in rx path
 404	 * we can always prefetch rx_info + 1
 405	 */
 406	size = sizeof(struct ena_rx_buffer) * (rx_ring->ring_size + 1);
 407	node = cpu_to_node(ena_irq->cpu);
 408
 409	rx_ring->rx_buffer_info = vzalloc_node(size, node);
 410	if (!rx_ring->rx_buffer_info) {
 411		rx_ring->rx_buffer_info = vzalloc(size);
 412		if (!rx_ring->rx_buffer_info)
 413			return -ENOMEM;
 414	}
 415
 416	size = sizeof(u16) * rx_ring->ring_size;
 417	rx_ring->free_ids = vzalloc_node(size, node);
 418	if (!rx_ring->free_ids) {
 419		rx_ring->free_ids = vzalloc(size);
 420		if (!rx_ring->free_ids) {
 421			vfree(rx_ring->rx_buffer_info);
 422			rx_ring->rx_buffer_info = NULL;
 423			return -ENOMEM;
 424		}
 425	}
 426
 427	/* Req id ring for receiving RX pkts out of order */
 428	for (i = 0; i < rx_ring->ring_size; i++)
 429		rx_ring->free_ids[i] = i;
 430
 431	/* Reset rx statistics */
 432	memset(&rx_ring->rx_stats, 0x0, sizeof(rx_ring->rx_stats));
 433
 434	rx_ring->next_to_clean = 0;
 435	rx_ring->next_to_use = 0;
 436	rx_ring->cpu = ena_irq->cpu;
 437	rx_ring->numa_node = node;
 438
 439	return 0;
 440}
 441
 442/* ena_free_rx_resources - Free I/O Rx Resources
 443 * @adapter: network interface device structure
 444 * @qid: queue index
 445 *
 446 * Free all receive software resources
 447 */
 448static void ena_free_rx_resources(struct ena_adapter *adapter,
 449				  u32 qid)
 450{
 451	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
 452
 453	vfree(rx_ring->rx_buffer_info);
 454	rx_ring->rx_buffer_info = NULL;
 455
 456	vfree(rx_ring->free_ids);
 457	rx_ring->free_ids = NULL;
 458}
 459
 460/* ena_setup_all_rx_resources - allocate I/O Rx queues resources for all queues
 461 * @adapter: board private structure
 462 *
 463 * Return 0 on success, negative on failure
 464 */
 465static int ena_setup_all_rx_resources(struct ena_adapter *adapter)
 466{
 467	int i, rc = 0;
 468
 469	for (i = 0; i < adapter->num_io_queues; i++) {
 470		rc = ena_setup_rx_resources(adapter, i);
 471		if (rc)
 472			goto err_setup_rx;
 473	}
 474
 475	return 0;
 476
 477err_setup_rx:
 478
 479	netif_err(adapter, ifup, adapter->netdev,
 480		  "Rx queue %d: allocation failed\n", i);
 481
 482	/* rewind the index freeing the rings as we go */
 483	while (i--)
 484		ena_free_rx_resources(adapter, i);
 485	return rc;
 486}
 487
 488/* ena_free_all_io_rx_resources - Free I/O Rx Resources for All Queues
 489 * @adapter: board private structure
 490 *
 491 * Free all receive software resources
 492 */
 493static void ena_free_all_io_rx_resources(struct ena_adapter *adapter)
 494{
 495	int i;
 496
 497	for (i = 0; i < adapter->num_io_queues; i++)
 498		ena_free_rx_resources(adapter, i);
 499}
 500
 501static struct page *ena_alloc_map_page(struct ena_ring *rx_ring,
 502				       dma_addr_t *dma)
 503{
 504	struct page *page;
 505
 506	/* This would allocate the page on the same NUMA node the executing code
 507	 * is running on.
 508	 */
 509	page = dev_alloc_page();
 510	if (!page) {
 511		ena_increase_stat(&rx_ring->rx_stats.page_alloc_fail, 1, &rx_ring->syncp);
 
 512		return ERR_PTR(-ENOSPC);
 513	}
 514
 515	/* To enable NIC-side port-mirroring, AKA SPAN port,
 516	 * we make the buffer readable from the nic as well
 517	 */
 518	*dma = dma_map_page(rx_ring->dev, page, 0, ENA_PAGE_SIZE,
 519			    DMA_BIDIRECTIONAL);
 520	if (unlikely(dma_mapping_error(rx_ring->dev, *dma))) {
 521		ena_increase_stat(&rx_ring->rx_stats.dma_mapping_err, 1,
 522				  &rx_ring->syncp);
 523		__free_page(page);
 524		return ERR_PTR(-EIO);
 525	}
 526
 527	return page;
 528}
 529
 530static int ena_alloc_rx_buffer(struct ena_ring *rx_ring,
 531			       struct ena_rx_buffer *rx_info)
 532{
 533	int headroom = rx_ring->rx_headroom;
 534	struct ena_com_buf *ena_buf;
 535	struct page *page;
 536	dma_addr_t dma;
 537	int tailroom;
 538
 539	/* restore page offset value in case it has been changed by device */
 540	rx_info->buf_offset = headroom;
 541
 542	/* if previous allocated page is not used */
 543	if (unlikely(rx_info->page))
 544		return 0;
 545
 546	/* We handle DMA here */
 547	page = ena_alloc_map_page(rx_ring, &dma);
 548	if (IS_ERR(page))
 549		return PTR_ERR(page);
 550
 551	netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
 552		  "Allocate page %p, rx_info %p\n", page, rx_info);
 553
 554	tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 555
 556	rx_info->page = page;
 557	rx_info->dma_addr = dma;
 558	rx_info->page_offset = 0;
 559	ena_buf = &rx_info->ena_buf;
 560	ena_buf->paddr = dma + headroom;
 561	ena_buf->len = ENA_PAGE_SIZE - headroom - tailroom;
 562
 563	return 0;
 564}
 565
 566static void ena_unmap_rx_buff_attrs(struct ena_ring *rx_ring,
 567				    struct ena_rx_buffer *rx_info,
 568				    unsigned long attrs)
 569{
 570	dma_unmap_page_attrs(rx_ring->dev, rx_info->dma_addr, ENA_PAGE_SIZE, DMA_BIDIRECTIONAL,
 571			     attrs);
 
 
 
 572}
 573
 574static void ena_free_rx_page(struct ena_ring *rx_ring,
 575			     struct ena_rx_buffer *rx_info)
 576{
 577	struct page *page = rx_info->page;
 578
 579	if (unlikely(!page)) {
 580		netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
 581			   "Trying to free unallocated buffer\n");
 582		return;
 583	}
 584
 585	ena_unmap_rx_buff_attrs(rx_ring, rx_info, 0);
 586
 587	__free_page(page);
 588	rx_info->page = NULL;
 589}
 590
 591static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num)
 592{
 593	u16 next_to_use, req_id;
 594	u32 i;
 595	int rc;
 596
 597	next_to_use = rx_ring->next_to_use;
 598
 599	for (i = 0; i < num; i++) {
 600		struct ena_rx_buffer *rx_info;
 601
 602		req_id = rx_ring->free_ids[next_to_use];
 603
 604		rx_info = &rx_ring->rx_buffer_info[req_id];
 605
 606		rc = ena_alloc_rx_buffer(rx_ring, rx_info);
 607		if (unlikely(rc < 0)) {
 608			netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
 609				   "Failed to allocate buffer for rx queue %d\n",
 610				   rx_ring->qid);
 611			break;
 612		}
 613		rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq,
 614						&rx_info->ena_buf,
 615						req_id);
 616		if (unlikely(rc)) {
 617			netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
 618				   "Failed to add buffer for rx queue %d\n",
 619				   rx_ring->qid);
 620			break;
 621		}
 622		next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use,
 623						   rx_ring->ring_size);
 624	}
 625
 626	if (unlikely(i < num)) {
 627		ena_increase_stat(&rx_ring->rx_stats.refil_partial, 1,
 628				  &rx_ring->syncp);
 629		netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
 630			   "Refilled rx qid %d with only %d buffers (from %d)\n",
 631			   rx_ring->qid, i, num);
 632	}
 633
 634	/* ena_com_write_sq_doorbell issues a wmb() */
 635	if (likely(i))
 636		ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
 637
 638	rx_ring->next_to_use = next_to_use;
 639
 640	return i;
 641}
 642
 643static void ena_free_rx_bufs(struct ena_adapter *adapter,
 644			     u32 qid)
 645{
 646	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
 647	u32 i;
 648
 649	for (i = 0; i < rx_ring->ring_size; i++) {
 650		struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i];
 651
 652		if (rx_info->page)
 653			ena_free_rx_page(rx_ring, rx_info);
 654	}
 655}
 656
 657/* ena_refill_all_rx_bufs - allocate all queues Rx buffers
 658 * @adapter: board private structure
 659 */
 660static void ena_refill_all_rx_bufs(struct ena_adapter *adapter)
 661{
 662	struct ena_ring *rx_ring;
 663	int i, rc, bufs_num;
 664
 665	for (i = 0; i < adapter->num_io_queues; i++) {
 666		rx_ring = &adapter->rx_ring[i];
 667		bufs_num = rx_ring->ring_size - 1;
 668		rc = ena_refill_rx_bufs(rx_ring, bufs_num);
 669
 670		if (unlikely(rc != bufs_num))
 671			netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
 672				   "Refilling Queue %d failed. allocated %d buffers from: %d\n",
 673				   i, rc, bufs_num);
 674	}
 675}
 676
 677static void ena_free_all_rx_bufs(struct ena_adapter *adapter)
 678{
 679	int i;
 680
 681	for (i = 0; i < adapter->num_io_queues; i++)
 682		ena_free_rx_bufs(adapter, i);
 683}
 684
 685void ena_unmap_tx_buff(struct ena_ring *tx_ring,
 686		       struct ena_tx_buffer *tx_info)
 687{
 688	struct ena_com_buf *ena_buf;
 689	u32 cnt;
 690	int i;
 691
 692	ena_buf = tx_info->bufs;
 693	cnt = tx_info->num_of_bufs;
 694
 695	if (unlikely(!cnt))
 696		return;
 697
 698	if (tx_info->map_linear_data) {
 699		dma_unmap_single(tx_ring->dev,
 700				 dma_unmap_addr(ena_buf, paddr),
 701				 dma_unmap_len(ena_buf, len),
 702				 DMA_TO_DEVICE);
 703		ena_buf++;
 704		cnt--;
 705	}
 706
 707	/* unmap remaining mapped pages */
 708	for (i = 0; i < cnt; i++) {
 709		dma_unmap_page(tx_ring->dev, dma_unmap_addr(ena_buf, paddr),
 710			       dma_unmap_len(ena_buf, len), DMA_TO_DEVICE);
 711		ena_buf++;
 712	}
 713}
 714
 715/* ena_free_tx_bufs - Free Tx Buffers per Queue
 716 * @tx_ring: TX ring for which buffers be freed
 717 */
 718static void ena_free_tx_bufs(struct ena_ring *tx_ring)
 719{
 720	bool print_once = true;
 721	bool is_xdp_ring;
 722	u32 i;
 723
 724	is_xdp_ring = ENA_IS_XDP_INDEX(tx_ring->adapter, tx_ring->qid);
 725
 726	for (i = 0; i < tx_ring->ring_size; i++) {
 727		struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
 728
 729		if (!tx_info->skb)
 730			continue;
 731
 732		if (print_once) {
 733			netif_notice(tx_ring->adapter, ifdown, tx_ring->netdev,
 734				     "Free uncompleted tx skb qid %d idx 0x%x\n",
 735				     tx_ring->qid, i);
 736			print_once = false;
 737		} else {
 738			netif_dbg(tx_ring->adapter, ifdown, tx_ring->netdev,
 739				  "Free uncompleted tx skb qid %d idx 0x%x\n",
 740				  tx_ring->qid, i);
 741		}
 742
 743		ena_unmap_tx_buff(tx_ring, tx_info);
 744
 745		if (is_xdp_ring)
 746			xdp_return_frame(tx_info->xdpf);
 747		else
 748			dev_kfree_skb_any(tx_info->skb);
 749	}
 750
 751	if (!is_xdp_ring)
 752		netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
 753							  tx_ring->qid));
 754}
 755
 756static void ena_free_all_tx_bufs(struct ena_adapter *adapter)
 757{
 758	struct ena_ring *tx_ring;
 759	int i;
 760
 761	for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) {
 762		tx_ring = &adapter->tx_ring[i];
 763		ena_free_tx_bufs(tx_ring);
 764	}
 765}
 766
 767static void ena_destroy_all_tx_queues(struct ena_adapter *adapter)
 768{
 769	u16 ena_qid;
 770	int i;
 771
 772	for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) {
 773		ena_qid = ENA_IO_TXQ_IDX(i);
 774		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
 775	}
 776}
 777
 778static void ena_destroy_all_rx_queues(struct ena_adapter *adapter)
 779{
 780	u16 ena_qid;
 781	int i;
 782
 783	for (i = 0; i < adapter->num_io_queues; i++) {
 784		ena_qid = ENA_IO_RXQ_IDX(i);
 785		cancel_work_sync(&adapter->ena_napi[i].dim.work);
 786		ena_xdp_unregister_rxq_info(&adapter->rx_ring[i]);
 787		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
 788	}
 789}
 790
 791static void ena_destroy_all_io_queues(struct ena_adapter *adapter)
 792{
 793	ena_destroy_all_tx_queues(adapter);
 794	ena_destroy_all_rx_queues(adapter);
 795}
 796
 797int handle_invalid_req_id(struct ena_ring *ring, u16 req_id,
 798			  struct ena_tx_buffer *tx_info, bool is_xdp)
 799{
 800	if (tx_info)
 801		netif_err(ring->adapter,
 802			  tx_done,
 803			  ring->netdev,
 804			  "tx_info doesn't have valid %s. qid %u req_id %u",
 805			   is_xdp ? "xdp frame" : "skb", ring->qid, req_id);
 806	else
 807		netif_err(ring->adapter,
 808			  tx_done,
 809			  ring->netdev,
 810			  "Invalid req_id %u in qid %u\n",
 811			  req_id, ring->qid);
 812
 813	ena_increase_stat(&ring->tx_stats.bad_req_id, 1, &ring->syncp);
 814	ena_reset_device(ring->adapter, ENA_REGS_RESET_INV_TX_REQ_ID);
 815
 816	return -EFAULT;
 817}
 818
 819static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id)
 820{
 821	struct ena_tx_buffer *tx_info;
 822
 823	tx_info = &tx_ring->tx_buffer_info[req_id];
 824	if (likely(tx_info->skb))
 825		return 0;
 826
 827	return handle_invalid_req_id(tx_ring, req_id, tx_info, false);
 828}
 829
 
 
 
 
 
 
 
 
 
 
 
 830static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget)
 831{
 832	struct netdev_queue *txq;
 833	bool above_thresh;
 834	u32 tx_bytes = 0;
 835	u32 total_done = 0;
 836	u16 next_to_clean;
 837	u16 req_id;
 838	int tx_pkts = 0;
 839	int rc;
 840
 841	next_to_clean = tx_ring->next_to_clean;
 842	txq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->qid);
 843
 844	while (tx_pkts < budget) {
 845		struct ena_tx_buffer *tx_info;
 846		struct sk_buff *skb;
 847
 848		rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq,
 849						&req_id);
 850		if (rc) {
 851			if (unlikely(rc == -EINVAL))
 852				handle_invalid_req_id(tx_ring, req_id, NULL, false);
 
 853			break;
 854		}
 855
 856		/* validate that the request id points to a valid skb */
 857		rc = validate_tx_req_id(tx_ring, req_id);
 858		if (rc)
 859			break;
 860
 861		tx_info = &tx_ring->tx_buffer_info[req_id];
 862		skb = tx_info->skb;
 863
 864		/* prefetch skb_end_pointer() to speedup skb_shinfo(skb) */
 865		prefetch(&skb->end);
 866
 867		tx_info->skb = NULL;
 868		tx_info->last_jiffies = 0;
 869
 870		ena_unmap_tx_buff(tx_ring, tx_info);
 871
 872		netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
 873			  "tx_poll: q %d skb %p completed\n", tx_ring->qid,
 874			  skb);
 875
 876		tx_bytes += tx_info->total_tx_size;
 877		dev_kfree_skb(skb);
 878		tx_pkts++;
 879		total_done += tx_info->tx_descs;
 880
 881		tx_ring->free_ids[next_to_clean] = req_id;
 882		next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
 883						     tx_ring->ring_size);
 884	}
 885
 886	tx_ring->next_to_clean = next_to_clean;
 887	ena_com_comp_ack(tx_ring->ena_com_io_sq, total_done);
 
 888
 889	netdev_tx_completed_queue(txq, tx_pkts, tx_bytes);
 890
 891	netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
 892		  "tx_poll: q %d done. total pkts: %d\n",
 893		  tx_ring->qid, tx_pkts);
 894
 895	/* need to make the rings circular update visible to
 896	 * ena_start_xmit() before checking for netif_queue_stopped().
 897	 */
 898	smp_mb();
 899
 900	above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
 901						    ENA_TX_WAKEUP_THRESH);
 902	if (unlikely(netif_tx_queue_stopped(txq) && above_thresh)) {
 903		__netif_tx_lock(txq, smp_processor_id());
 904		above_thresh =
 905			ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
 906						     ENA_TX_WAKEUP_THRESH);
 907		if (netif_tx_queue_stopped(txq) && above_thresh &&
 908		    test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags)) {
 909			netif_tx_wake_queue(txq);
 910			ena_increase_stat(&tx_ring->tx_stats.queue_wakeup, 1,
 911					  &tx_ring->syncp);
 912		}
 913		__netif_tx_unlock(txq);
 914	}
 915
 916	return tx_pkts;
 917}
 918
 919static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, void *first_frag, u16 len)
 920{
 921	struct sk_buff *skb;
 922
 923	if (!first_frag)
 924		skb = napi_alloc_skb(rx_ring->napi, len);
 925	else
 926		skb = napi_build_skb(first_frag, len);
 927
 928	if (unlikely(!skb)) {
 929		ena_increase_stat(&rx_ring->rx_stats.skb_alloc_fail, 1,
 930				  &rx_ring->syncp);
 931
 932		netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
 933			  "Failed to allocate skb. first_frag %s\n",
 934			  first_frag ? "provided" : "not provided");
 
 935	}
 936
 937	return skb;
 938}
 939
 940static bool ena_try_rx_buf_page_reuse(struct ena_rx_buffer *rx_info, u16 buf_len,
 941				      u16 len, int pkt_offset)
 942{
 943	struct ena_com_buf *ena_buf = &rx_info->ena_buf;
 944
 945	/* More than ENA_MIN_RX_BUF_SIZE left in the reused buffer
 946	 * for data + headroom + tailroom.
 947	 */
 948	if (SKB_DATA_ALIGN(len + pkt_offset) + ENA_MIN_RX_BUF_SIZE <= ena_buf->len) {
 949		page_ref_inc(rx_info->page);
 950		rx_info->page_offset += buf_len;
 951		ena_buf->paddr += buf_len;
 952		ena_buf->len -= buf_len;
 953		return true;
 954	}
 955
 956	return false;
 957}
 958
 959static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
 960				  struct ena_com_rx_buf_info *ena_bufs,
 961				  u32 descs,
 962				  u16 *next_to_clean)
 963{
 964	int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 965	bool is_xdp_loaded = ena_xdp_present_ring(rx_ring);
 966	struct ena_rx_buffer *rx_info;
 967	struct ena_adapter *adapter;
 968	int page_offset, pkt_offset;
 969	dma_addr_t pre_reuse_paddr;
 970	u16 len, req_id, buf = 0;
 971	bool reuse_rx_buf_page;
 972	struct sk_buff *skb;
 973	void *buf_addr;
 974	int buf_offset;
 975	u16 buf_len;
 976
 977	len = ena_bufs[buf].len;
 978	req_id = ena_bufs[buf].req_id;
 979
 980	rx_info = &rx_ring->rx_buffer_info[req_id];
 981
 982	if (unlikely(!rx_info->page)) {
 983		adapter = rx_ring->adapter;
 984		netif_err(adapter, rx_err, rx_ring->netdev,
 985			  "Page is NULL. qid %u req_id %u\n", rx_ring->qid, req_id);
 986		ena_increase_stat(&rx_ring->rx_stats.bad_req_id, 1, &rx_ring->syncp);
 987		ena_reset_device(adapter, ENA_REGS_RESET_INV_RX_REQ_ID);
 988		return NULL;
 989	}
 990
 991	netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
 992		  "rx_info %p page %p\n",
 993		  rx_info, rx_info->page);
 994
 995	buf_offset = rx_info->buf_offset;
 996	pkt_offset = buf_offset - rx_ring->rx_headroom;
 997	page_offset = rx_info->page_offset;
 998	buf_addr = page_address(rx_info->page) + page_offset;
 
 
 999
1000	if (len <= rx_ring->rx_copybreak) {
1001		skb = ena_alloc_skb(rx_ring, NULL, len);
1002		if (unlikely(!skb))
1003			return NULL;
1004
1005		skb_copy_to_linear_data(skb, buf_addr + buf_offset, len);
 
 
 
 
 
 
 
 
 
1006		dma_sync_single_for_device(rx_ring->dev,
1007					   dma_unmap_addr(&rx_info->ena_buf, paddr) + pkt_offset,
1008					   len,
1009					   DMA_FROM_DEVICE);
1010
1011		skb_put(skb, len);
1012		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1013			  "RX allocated small packet. len %d.\n", skb->len);
1014		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1015		rx_ring->free_ids[*next_to_clean] = req_id;
1016		*next_to_clean = ENA_RX_RING_IDX_ADD(*next_to_clean, descs,
1017						     rx_ring->ring_size);
1018		return skb;
1019	}
1020
1021	buf_len = SKB_DATA_ALIGN(len + buf_offset + tailroom);
1022
1023	/* If XDP isn't loaded try to reuse part of the RX buffer */
1024	reuse_rx_buf_page = !is_xdp_loaded &&
1025			    ena_try_rx_buf_page_reuse(rx_info, buf_len, len, pkt_offset);
1026
1027	if (!reuse_rx_buf_page)
1028		ena_unmap_rx_buff_attrs(rx_ring, rx_info, DMA_ATTR_SKIP_CPU_SYNC);
1029
1030	skb = ena_alloc_skb(rx_ring, buf_addr, buf_len);
1031	if (unlikely(!skb))
1032		return NULL;
1033
1034	/* Populate skb's linear part */
1035	skb_reserve(skb, buf_offset);
1036	skb_put(skb, len);
1037	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1038
1039	do {
1040		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1041			  "RX skb updated. len %d. data_len %d\n",
1042			  skb->len, skb->data_len);
1043
1044		if (!reuse_rx_buf_page)
1045			rx_info->page = NULL;
1046
1047		rx_ring->free_ids[*next_to_clean] = req_id;
1048		*next_to_clean =
1049			ENA_RX_RING_IDX_NEXT(*next_to_clean,
1050					     rx_ring->ring_size);
1051		if (likely(--descs == 0))
1052			break;
1053
1054		buf++;
1055		len = ena_bufs[buf].len;
1056		req_id = ena_bufs[buf].req_id;
1057
1058		rx_info = &rx_ring->rx_buffer_info[req_id];
1059
1060		/* rx_info->buf_offset includes rx_ring->rx_headroom */
1061		buf_offset = rx_info->buf_offset;
1062		pkt_offset = buf_offset - rx_ring->rx_headroom;
1063		buf_len = SKB_DATA_ALIGN(len + buf_offset + tailroom);
1064		page_offset = rx_info->page_offset;
1065
1066		pre_reuse_paddr = dma_unmap_addr(&rx_info->ena_buf, paddr);
1067
1068		reuse_rx_buf_page = !is_xdp_loaded &&
1069				    ena_try_rx_buf_page_reuse(rx_info, buf_len, len, pkt_offset);
1070
1071		dma_sync_single_for_cpu(rx_ring->dev,
1072					pre_reuse_paddr + pkt_offset,
1073					len,
1074					DMA_FROM_DEVICE);
1075
1076		if (!reuse_rx_buf_page)
1077			ena_unmap_rx_buff_attrs(rx_ring, rx_info, DMA_ATTR_SKIP_CPU_SYNC);
1078
1079		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page,
1080				page_offset + buf_offset, len, buf_len);
1081
1082	} while (1);
1083
1084	return skb;
1085}
1086
1087/* ena_rx_checksum - indicate in skb if hw indicated a good cksum
1088 * @adapter: structure containing adapter specific data
1089 * @ena_rx_ctx: received packet context/metadata
1090 * @skb: skb currently being received and modified
1091 */
1092static void ena_rx_checksum(struct ena_ring *rx_ring,
1093				   struct ena_com_rx_ctx *ena_rx_ctx,
1094				   struct sk_buff *skb)
1095{
1096	/* Rx csum disabled */
1097	if (unlikely(!(rx_ring->netdev->features & NETIF_F_RXCSUM))) {
1098		skb->ip_summed = CHECKSUM_NONE;
1099		return;
1100	}
1101
1102	/* For fragmented packets the checksum isn't valid */
1103	if (ena_rx_ctx->frag) {
1104		skb->ip_summed = CHECKSUM_NONE;
1105		return;
1106	}
1107
1108	/* if IP and error */
1109	if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) &&
1110		     (ena_rx_ctx->l3_csum_err))) {
1111		/* ipv4 checksum error */
1112		skb->ip_summed = CHECKSUM_NONE;
1113		ena_increase_stat(&rx_ring->rx_stats.csum_bad, 1,
1114				  &rx_ring->syncp);
1115		netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
1116			  "RX IPv4 header checksum error\n");
1117		return;
1118	}
1119
1120	/* if TCP/UDP */
1121	if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
1122		   (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP))) {
1123		if (unlikely(ena_rx_ctx->l4_csum_err)) {
1124			/* TCP/UDP checksum error */
1125			ena_increase_stat(&rx_ring->rx_stats.csum_bad, 1,
1126					  &rx_ring->syncp);
1127			netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
1128				  "RX L4 checksum error\n");
1129			skb->ip_summed = CHECKSUM_NONE;
1130			return;
1131		}
1132
1133		if (likely(ena_rx_ctx->l4_csum_checked)) {
1134			skb->ip_summed = CHECKSUM_UNNECESSARY;
1135			ena_increase_stat(&rx_ring->rx_stats.csum_good, 1,
1136					  &rx_ring->syncp);
1137		} else {
1138			ena_increase_stat(&rx_ring->rx_stats.csum_unchecked, 1,
1139					  &rx_ring->syncp);
1140			skb->ip_summed = CHECKSUM_NONE;
1141		}
1142	} else {
1143		skb->ip_summed = CHECKSUM_NONE;
1144		return;
1145	}
1146
1147}
1148
1149static void ena_set_rx_hash(struct ena_ring *rx_ring,
1150			    struct ena_com_rx_ctx *ena_rx_ctx,
1151			    struct sk_buff *skb)
1152{
1153	enum pkt_hash_types hash_type;
1154
1155	if (likely(rx_ring->netdev->features & NETIF_F_RXHASH)) {
1156		if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
1157			   (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)))
1158
1159			hash_type = PKT_HASH_TYPE_L4;
1160		else
1161			hash_type = PKT_HASH_TYPE_NONE;
1162
1163		/* Override hash type if the packet is fragmented */
1164		if (ena_rx_ctx->frag)
1165			hash_type = PKT_HASH_TYPE_NONE;
1166
1167		skb_set_hash(skb, ena_rx_ctx->hash, hash_type);
1168	}
1169}
1170
1171static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp, u16 num_descs)
1172{
1173	struct ena_rx_buffer *rx_info;
1174	int ret;
1175
1176	/* XDP multi-buffer packets not supported */
1177	if (unlikely(num_descs > 1)) {
1178		netdev_err_once(rx_ring->adapter->netdev,
1179				"xdp: dropped unsupported multi-buffer packets\n");
1180		ena_increase_stat(&rx_ring->rx_stats.xdp_drop, 1, &rx_ring->syncp);
1181		return ENA_XDP_DROP;
1182	}
1183
1184	rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id];
1185	xdp_prepare_buff(xdp, page_address(rx_info->page),
1186			 rx_info->buf_offset,
1187			 rx_ring->ena_bufs[0].len, false);
 
 
 
 
 
1188
1189	ret = ena_xdp_execute(rx_ring, xdp);
1190
1191	/* The xdp program might expand the headers */
1192	if (ret == ENA_XDP_PASS) {
1193		rx_info->buf_offset = xdp->data - xdp->data_hard_start;
1194		rx_ring->ena_bufs[0].len = xdp->data_end - xdp->data;
1195	}
1196
1197	return ret;
1198}
1199
1200/* ena_clean_rx_irq - Cleanup RX irq
1201 * @rx_ring: RX ring to clean
1202 * @napi: napi handler
1203 * @budget: how many packets driver is allowed to clean
1204 *
1205 * Returns the number of cleaned buffers.
1206 */
1207static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
1208			    u32 budget)
1209{
1210	u16 next_to_clean = rx_ring->next_to_clean;
1211	struct ena_com_rx_ctx ena_rx_ctx;
1212	struct ena_rx_buffer *rx_info;
1213	struct ena_adapter *adapter;
1214	u32 res_budget, work_done;
1215	int rx_copybreak_pkt = 0;
1216	int refill_threshold;
1217	struct sk_buff *skb;
1218	int refill_required;
1219	struct xdp_buff xdp;
1220	int xdp_flags = 0;
1221	int total_len = 0;
1222	int xdp_verdict;
1223	u8 pkt_offset;
1224	int rc = 0;
1225	int i;
1226
1227	netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1228		  "%s qid %d\n", __func__, rx_ring->qid);
1229	res_budget = budget;
1230	xdp_init_buff(&xdp, ENA_PAGE_SIZE, &rx_ring->xdp_rxq);
1231
1232	do {
1233		xdp_verdict = ENA_XDP_PASS;
1234		skb = NULL;
1235		ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
1236		ena_rx_ctx.max_bufs = rx_ring->sgl_size;
1237		ena_rx_ctx.descs = 0;
1238		ena_rx_ctx.pkt_offset = 0;
1239		rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq,
1240				    rx_ring->ena_com_io_sq,
1241				    &ena_rx_ctx);
1242		if (unlikely(rc))
1243			goto error;
1244
1245		if (unlikely(ena_rx_ctx.descs == 0))
1246			break;
1247
1248		/* First descriptor might have an offset set by the device */
1249		rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id];
1250		pkt_offset = ena_rx_ctx.pkt_offset;
1251		rx_info->buf_offset += pkt_offset;
1252
1253		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1254			  "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n",
1255			  rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
1256			  ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
1257
1258		dma_sync_single_for_cpu(rx_ring->dev,
1259					dma_unmap_addr(&rx_info->ena_buf, paddr) + pkt_offset,
1260					rx_ring->ena_bufs[0].len,
1261					DMA_FROM_DEVICE);
1262
1263		if (ena_xdp_present_ring(rx_ring))
1264			xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp, ena_rx_ctx.descs);
1265
1266		/* allocate skb and fill it */
1267		if (xdp_verdict == ENA_XDP_PASS)
1268			skb = ena_rx_skb(rx_ring,
1269					 rx_ring->ena_bufs,
1270					 ena_rx_ctx.descs,
1271					 &next_to_clean);
1272
1273		if (unlikely(!skb)) {
1274			for (i = 0; i < ena_rx_ctx.descs; i++) {
1275				int req_id = rx_ring->ena_bufs[i].req_id;
1276
1277				rx_ring->free_ids[next_to_clean] = req_id;
1278				next_to_clean =
1279					ENA_RX_RING_IDX_NEXT(next_to_clean,
1280							     rx_ring->ring_size);
1281
1282				/* Packets was passed for transmission, unmap it
1283				 * from RX side.
1284				 */
1285				if (xdp_verdict & ENA_XDP_FORWARDED) {
1286					ena_unmap_rx_buff_attrs(rx_ring,
1287								&rx_ring->rx_buffer_info[req_id],
1288								DMA_ATTR_SKIP_CPU_SYNC);
1289					rx_ring->rx_buffer_info[req_id].page = NULL;
1290				}
1291			}
1292			if (xdp_verdict != ENA_XDP_PASS) {
1293				xdp_flags |= xdp_verdict;
1294				total_len += ena_rx_ctx.ena_bufs[0].len;
1295				res_budget--;
1296				continue;
1297			}
1298			break;
1299		}
1300
1301		ena_rx_checksum(rx_ring, &ena_rx_ctx, skb);
1302
1303		ena_set_rx_hash(rx_ring, &ena_rx_ctx, skb);
1304
1305		skb_record_rx_queue(skb, rx_ring->qid);
1306
1307		if (rx_ring->ena_bufs[0].len <= rx_ring->rx_copybreak)
1308			rx_copybreak_pkt++;
1309
1310		total_len += skb->len;
1311
1312		napi_gro_receive(napi, skb);
1313
1314		res_budget--;
1315	} while (likely(res_budget));
1316
1317	work_done = budget - res_budget;
1318	rx_ring->per_napi_packets += work_done;
1319	u64_stats_update_begin(&rx_ring->syncp);
1320	rx_ring->rx_stats.bytes += total_len;
1321	rx_ring->rx_stats.cnt += work_done;
1322	rx_ring->rx_stats.rx_copybreak_pkt += rx_copybreak_pkt;
1323	u64_stats_update_end(&rx_ring->syncp);
1324
1325	rx_ring->next_to_clean = next_to_clean;
1326
1327	refill_required = ena_com_free_q_entries(rx_ring->ena_com_io_sq);
1328	refill_threshold =
1329		min_t(int, rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER,
1330		      ENA_RX_REFILL_THRESH_PACKET);
1331
1332	/* Optimization, try to batch new rx buffers */
1333	if (refill_required > refill_threshold)
 
1334		ena_refill_rx_bufs(rx_ring, refill_required);
 
1335
1336	if (xdp_flags & ENA_XDP_REDIRECT)
1337		xdp_do_flush();
1338
1339	return work_done;
1340
1341error:
1342	if (xdp_flags & ENA_XDP_REDIRECT)
1343		xdp_do_flush();
1344
1345	adapter = netdev_priv(rx_ring->netdev);
1346
1347	if (rc == -ENOSPC) {
1348		ena_increase_stat(&rx_ring->rx_stats.bad_desc_num, 1, &rx_ring->syncp);
 
1349		ena_reset_device(adapter, ENA_REGS_RESET_TOO_MANY_RX_DESCS);
1350	} else if (rc == -EFAULT) {
1351		ena_reset_device(adapter, ENA_REGS_RESET_RX_DESCRIPTOR_MALFORMED);
1352	} else {
1353		ena_increase_stat(&rx_ring->rx_stats.bad_req_id, 1,
1354				  &rx_ring->syncp);
1355		ena_reset_device(adapter, ENA_REGS_RESET_INV_RX_REQ_ID);
1356	}
1357	return 0;
1358}
1359
1360static void ena_dim_work(struct work_struct *w)
1361{
1362	struct dim *dim = container_of(w, struct dim, work);
1363	struct dim_cq_moder cur_moder =
1364		net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
1365	struct ena_napi *ena_napi = container_of(dim, struct ena_napi, dim);
1366
1367	ena_napi->rx_ring->smoothed_interval = cur_moder.usec;
1368	dim->state = DIM_START_MEASURE;
1369}
1370
1371static void ena_adjust_adaptive_rx_intr_moderation(struct ena_napi *ena_napi)
1372{
1373	struct dim_sample dim_sample;
1374	struct ena_ring *rx_ring = ena_napi->rx_ring;
1375
1376	if (!rx_ring->per_napi_packets)
1377		return;
1378
1379	rx_ring->non_empty_napi_events++;
1380
1381	dim_update_sample(rx_ring->non_empty_napi_events,
1382			  rx_ring->rx_stats.cnt,
1383			  rx_ring->rx_stats.bytes,
1384			  &dim_sample);
1385
1386	net_dim(&ena_napi->dim, &dim_sample);
1387
1388	rx_ring->per_napi_packets = 0;
1389}
1390
1391void ena_unmask_interrupt(struct ena_ring *tx_ring,
1392			  struct ena_ring *rx_ring)
1393{
1394	u32 rx_interval = tx_ring->smoothed_interval;
1395	struct ena_eth_io_intr_reg intr_reg;
1396
1397	/* Rx ring can be NULL when for XDP tx queues which don't have an
1398	 * accompanying rx_ring pair.
1399	 */
1400	if (rx_ring)
1401		rx_interval = ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev) ?
1402			rx_ring->smoothed_interval :
1403			ena_com_get_nonadaptive_moderation_interval_rx(rx_ring->ena_dev);
1404
1405	/* Update intr register: rx intr delay,
1406	 * tx intr delay and interrupt unmask
1407	 */
1408	ena_com_update_intr_reg(&intr_reg,
1409				rx_interval,
1410				tx_ring->smoothed_interval,
1411				true);
1412
1413	ena_increase_stat(&tx_ring->tx_stats.unmask_interrupt, 1,
1414			  &tx_ring->syncp);
1415
1416	/* It is a shared MSI-X.
1417	 * Tx and Rx CQ have pointer to it.
1418	 * So we use one of them to reach the intr reg
1419	 * The Tx ring is used because the rx_ring is NULL for XDP queues
1420	 */
1421	ena_com_unmask_intr(tx_ring->ena_com_io_cq, &intr_reg);
1422}
1423
1424void ena_update_ring_numa_node(struct ena_ring *tx_ring,
1425			       struct ena_ring *rx_ring)
1426{
1427	int cpu = get_cpu();
1428	int numa_node;
1429
1430	/* Check only one ring since the 2 rings are running on the same cpu */
1431	if (likely(tx_ring->cpu == cpu))
1432		goto out;
1433
1434	tx_ring->cpu = cpu;
1435	if (rx_ring)
1436		rx_ring->cpu = cpu;
1437
1438	numa_node = cpu_to_node(cpu);
1439
1440	if (likely(tx_ring->numa_node == numa_node))
1441		goto out;
1442
1443	put_cpu();
1444
1445	if (numa_node != NUMA_NO_NODE) {
1446		ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node);
1447		tx_ring->numa_node = numa_node;
1448		if (rx_ring) {
1449			rx_ring->numa_node = numa_node;
1450			ena_com_update_numa_node(rx_ring->ena_com_io_cq,
1451						 numa_node);
1452		}
1453	}
1454
1455	return;
1456out:
1457	put_cpu();
1458}
1459
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1460static int ena_io_poll(struct napi_struct *napi, int budget)
1461{
1462	struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
1463	struct ena_ring *tx_ring, *rx_ring;
1464	int tx_work_done;
1465	int rx_work_done = 0;
1466	int tx_budget;
1467	int napi_comp_call = 0;
1468	int ret;
1469
1470	tx_ring = ena_napi->tx_ring;
1471	rx_ring = ena_napi->rx_ring;
1472
1473	tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER;
1474
1475	if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
1476	    test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags)) {
1477		napi_complete_done(napi, 0);
1478		return 0;
1479	}
1480
1481	tx_work_done = ena_clean_tx_irq(tx_ring, tx_budget);
1482	/* On netpoll the budget is zero and the handler should only clean the
1483	 * tx completions.
1484	 */
1485	if (likely(budget))
1486		rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget);
1487
1488	/* If the device is about to reset or down, avoid unmask
1489	 * the interrupt and return 0 so NAPI won't reschedule
1490	 */
1491	if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
1492		     test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags))) {
1493		napi_complete_done(napi, 0);
1494		ret = 0;
1495
1496	} else if ((budget > rx_work_done) && (tx_budget > tx_work_done)) {
1497		napi_comp_call = 1;
1498
1499		/* Update numa and unmask the interrupt only when schedule
1500		 * from the interrupt context (vs from sk_busy_loop)
1501		 */
1502		if (napi_complete_done(napi, rx_work_done) &&
1503		    READ_ONCE(ena_napi->interrupts_masked)) {
1504			smp_rmb(); /* make sure interrupts_masked is read */
1505			WRITE_ONCE(ena_napi->interrupts_masked, false);
1506			/* We apply adaptive moderation on Rx path only.
1507			 * Tx uses static interrupt moderation.
1508			 */
1509			if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev))
1510				ena_adjust_adaptive_rx_intr_moderation(ena_napi);
1511
1512			ena_update_ring_numa_node(tx_ring, rx_ring);
1513			ena_unmask_interrupt(tx_ring, rx_ring);
1514		}
1515
1516		ret = rx_work_done;
1517	} else {
1518		ret = budget;
1519	}
1520
1521	u64_stats_update_begin(&tx_ring->syncp);
1522	tx_ring->tx_stats.napi_comp += napi_comp_call;
1523	tx_ring->tx_stats.tx_poll++;
1524	u64_stats_update_end(&tx_ring->syncp);
1525
1526	tx_ring->tx_stats.last_napi_jiffies = jiffies;
1527
1528	return ret;
1529}
1530
1531static irqreturn_t ena_intr_msix_mgmnt(int irq, void *data)
1532{
1533	struct ena_adapter *adapter = (struct ena_adapter *)data;
1534
1535	ena_com_admin_q_comp_intr_handler(adapter->ena_dev);
1536
1537	/* Don't call the aenq handler before probe is done */
1538	if (likely(test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags)))
1539		ena_com_aenq_intr_handler(adapter->ena_dev, data);
1540
1541	return IRQ_HANDLED;
1542}
1543
1544/* ena_intr_msix_io - MSI-X Interrupt Handler for Tx/Rx
1545 * @irq: interrupt number
1546 * @data: pointer to a network interface private napi device structure
1547 */
1548static irqreturn_t ena_intr_msix_io(int irq, void *data)
1549{
1550	struct ena_napi *ena_napi = data;
1551
1552	/* Used to check HW health */
1553	WRITE_ONCE(ena_napi->first_interrupt, true);
1554
1555	WRITE_ONCE(ena_napi->interrupts_masked, true);
1556	smp_wmb(); /* write interrupts_masked before calling napi */
1557
1558	napi_schedule_irqoff(&ena_napi->napi);
1559
1560	return IRQ_HANDLED;
1561}
1562
1563/* Reserve a single MSI-X vector for management (admin + aenq).
1564 * plus reserve one vector for each potential io queue.
1565 * the number of potential io queues is the minimum of what the device
1566 * supports and the number of vCPUs.
1567 */
1568static int ena_enable_msix(struct ena_adapter *adapter)
1569{
1570	int msix_vecs, irq_cnt;
1571
1572	if (test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) {
1573		netif_err(adapter, probe, adapter->netdev,
1574			  "Error, MSI-X is already enabled\n");
1575		return -EPERM;
1576	}
1577
1578	/* Reserved the max msix vectors we might need */
1579	msix_vecs = ENA_MAX_MSIX_VEC(adapter->max_num_io_queues);
1580	netif_dbg(adapter, probe, adapter->netdev,
1581		  "Trying to enable MSI-X, vectors %d\n", msix_vecs);
1582
1583	irq_cnt = pci_alloc_irq_vectors(adapter->pdev, ENA_MIN_MSIX_VEC,
1584					msix_vecs, PCI_IRQ_MSIX);
1585
1586	if (irq_cnt < 0) {
1587		netif_err(adapter, probe, adapter->netdev,
1588			  "Failed to enable MSI-X. irq_cnt %d\n", irq_cnt);
1589		return -ENOSPC;
1590	}
1591
1592	if (irq_cnt != msix_vecs) {
1593		netif_notice(adapter, probe, adapter->netdev,
1594			     "Enable only %d MSI-X (out of %d), reduce the number of queues\n",
1595			     irq_cnt, msix_vecs);
1596		adapter->num_io_queues = irq_cnt - ENA_ADMIN_MSIX_VEC;
1597	}
1598
1599	if (ena_init_rx_cpu_rmap(adapter))
1600		netif_warn(adapter, probe, adapter->netdev,
1601			   "Failed to map IRQs to CPUs\n");
1602
1603	adapter->msix_vecs = irq_cnt;
1604	set_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags);
1605
1606	return 0;
1607}
1608
1609static void ena_setup_mgmnt_intr(struct ena_adapter *adapter)
1610{
1611	u32 cpu;
1612
1613	snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name,
1614		 ENA_IRQNAME_SIZE, "ena-mgmnt@pci:%s",
1615		 pci_name(adapter->pdev));
1616	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler =
1617		ena_intr_msix_mgmnt;
1618	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter;
1619	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector =
1620		pci_irq_vector(adapter->pdev, ENA_MGMNT_IRQ_IDX);
1621	cpu = cpumask_first(cpu_online_mask);
1622	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].cpu = cpu;
1623	cpumask_set_cpu(cpu,
1624			&adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].affinity_hint_mask);
1625}
1626
1627static void ena_setup_io_intr(struct ena_adapter *adapter)
1628{
1629	struct net_device *netdev;
1630	int irq_idx, i, cpu;
1631	int io_queue_count;
1632
1633	netdev = adapter->netdev;
1634	io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
1635
1636	for (i = 0; i < io_queue_count; i++) {
1637		irq_idx = ENA_IO_IRQ_IDX(i);
1638		cpu = i % num_online_cpus();
1639
1640		snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE,
1641			 "%s-Tx-Rx-%d", netdev->name, i);
1642		adapter->irq_tbl[irq_idx].handler = ena_intr_msix_io;
1643		adapter->irq_tbl[irq_idx].data = &adapter->ena_napi[i];
1644		adapter->irq_tbl[irq_idx].vector =
1645			pci_irq_vector(adapter->pdev, irq_idx);
1646		adapter->irq_tbl[irq_idx].cpu = cpu;
1647
1648		cpumask_set_cpu(cpu,
1649				&adapter->irq_tbl[irq_idx].affinity_hint_mask);
1650	}
1651}
1652
1653static int ena_request_mgmnt_irq(struct ena_adapter *adapter)
1654{
1655	unsigned long flags = 0;
1656	struct ena_irq *irq;
1657	int rc;
1658
1659	irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
1660	rc = request_irq(irq->vector, irq->handler, flags, irq->name,
1661			 irq->data);
1662	if (rc) {
1663		netif_err(adapter, probe, adapter->netdev,
1664			  "Failed to request admin irq\n");
1665		return rc;
1666	}
1667
1668	netif_dbg(adapter, probe, adapter->netdev,
1669		  "Set affinity hint of mgmnt irq.to 0x%lx (irq vector: %d)\n",
1670		  irq->affinity_hint_mask.bits[0], irq->vector);
1671
1672	irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
1673
1674	return rc;
1675}
1676
1677static int ena_request_io_irq(struct ena_adapter *adapter)
1678{
1679	u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
1680	int rc = 0, i, k, irq_idx;
1681	unsigned long flags = 0;
1682	struct ena_irq *irq;
 
1683
1684	if (!test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) {
1685		netif_err(adapter, ifup, adapter->netdev,
1686			  "Failed to request I/O IRQ: MSI-X is not enabled\n");
1687		return -EINVAL;
1688	}
1689
1690	for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) {
1691		irq = &adapter->irq_tbl[i];
1692		rc = request_irq(irq->vector, irq->handler, flags, irq->name,
1693				 irq->data);
1694		if (rc) {
1695			netif_err(adapter, ifup, adapter->netdev,
1696				  "Failed to request I/O IRQ. index %d rc %d\n",
1697				   i, rc);
1698			goto err;
1699		}
1700
1701		netif_dbg(adapter, ifup, adapter->netdev,
1702			  "Set affinity hint of irq. index %d to 0x%lx (irq vector: %d)\n",
1703			  i, irq->affinity_hint_mask.bits[0], irq->vector);
1704
1705		irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
1706	}
1707
1708	/* Now that IO IRQs have been successfully allocated map them to the
1709	 * corresponding IO NAPI instance. Note that the mgmnt IRQ does not
1710	 * have a NAPI, so care must be taken to correctly map IRQs to NAPIs.
1711	 */
1712	for (i = 0; i < io_queue_count; i++) {
1713		irq_idx = ENA_IO_IRQ_IDX(i);
1714		irq = &adapter->irq_tbl[irq_idx];
1715		netif_napi_set_irq(&adapter->ena_napi[i].napi, irq->vector);
1716	}
1717
1718	return rc;
1719
1720err:
1721	for (k = ENA_IO_IRQ_FIRST_IDX; k < i; k++) {
1722		irq = &adapter->irq_tbl[k];
1723		free_irq(irq->vector, irq->data);
1724	}
1725
1726	return rc;
1727}
1728
1729static void ena_free_mgmnt_irq(struct ena_adapter *adapter)
1730{
1731	struct ena_irq *irq;
1732
1733	irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
1734	synchronize_irq(irq->vector);
1735	irq_set_affinity_hint(irq->vector, NULL);
1736	free_irq(irq->vector, irq->data);
1737}
1738
1739static void ena_free_io_irq(struct ena_adapter *adapter)
1740{
1741	u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
1742	struct ena_irq *irq;
1743	int i;
1744
1745#ifdef CONFIG_RFS_ACCEL
1746	if (adapter->msix_vecs >= 1) {
1747		free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
1748		adapter->netdev->rx_cpu_rmap = NULL;
1749	}
1750#endif /* CONFIG_RFS_ACCEL */
1751
1752	for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) {
1753		irq = &adapter->irq_tbl[i];
1754		irq_set_affinity_hint(irq->vector, NULL);
1755		free_irq(irq->vector, irq->data);
1756	}
1757}
1758
1759static void ena_disable_msix(struct ena_adapter *adapter)
1760{
1761	if (test_and_clear_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags))
1762		pci_free_irq_vectors(adapter->pdev);
1763}
1764
1765static void ena_disable_io_intr_sync(struct ena_adapter *adapter)
1766{
1767	u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
1768	int i;
1769
1770	if (!netif_running(adapter->netdev))
1771		return;
1772
1773	for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++)
1774		synchronize_irq(adapter->irq_tbl[i].vector);
1775}
1776
1777static void ena_del_napi_in_range(struct ena_adapter *adapter,
1778				  int first_index,
1779				  int count)
1780{
1781	int i;
1782
1783	for (i = first_index; i < first_index + count; i++) {
1784		netif_napi_del(&adapter->ena_napi[i].napi);
1785
1786		WARN_ON(ENA_IS_XDP_INDEX(adapter, i) &&
1787			adapter->ena_napi[i].rx_ring);
1788	}
1789}
1790
1791static void ena_init_napi_in_range(struct ena_adapter *adapter,
1792				   int first_index, int count)
1793{
1794	int (*napi_handler)(struct napi_struct *napi, int budget);
1795	int i;
1796
1797	for (i = first_index; i < first_index + count; i++) {
1798		struct ena_napi *napi = &adapter->ena_napi[i];
1799		struct ena_ring *rx_ring, *tx_ring;
1800
1801		memset(napi, 0, sizeof(*napi));
1802
1803		rx_ring = &adapter->rx_ring[i];
1804		tx_ring = &adapter->tx_ring[i];
1805
1806		napi_handler = ena_io_poll;
1807		if (ENA_IS_XDP_INDEX(adapter, i))
1808			napi_handler = ena_xdp_io_poll;
1809
1810		netif_napi_add(adapter->netdev, &napi->napi, napi_handler);
1811
1812		if (!ENA_IS_XDP_INDEX(adapter, i))
1813			napi->rx_ring = rx_ring;
1814
1815		napi->tx_ring = tx_ring;
 
 
 
 
 
1816		napi->qid = i;
1817	}
1818}
1819
1820static void ena_napi_disable_in_range(struct ena_adapter *adapter,
1821				      int first_index,
1822				      int count)
1823{
1824	struct napi_struct *napi;
1825	int i;
1826
1827	for (i = first_index; i < first_index + count; i++) {
1828		napi = &adapter->ena_napi[i].napi;
1829		if (!ENA_IS_XDP_INDEX(adapter, i)) {
1830			/* This API is supported for non-XDP queues only */
1831			netif_queue_set_napi(adapter->netdev, i,
1832					     NETDEV_QUEUE_TYPE_TX, NULL);
1833			netif_queue_set_napi(adapter->netdev, i,
1834					     NETDEV_QUEUE_TYPE_RX, NULL);
1835		}
1836		napi_disable(napi);
1837	}
1838}
1839
1840static void ena_napi_enable_in_range(struct ena_adapter *adapter,
1841				     int first_index,
1842				     int count)
1843{
1844	struct napi_struct *napi;
1845	int i;
1846
1847	for (i = first_index; i < first_index + count; i++) {
1848		napi = &adapter->ena_napi[i].napi;
1849		napi_enable(napi);
1850		if (!ENA_IS_XDP_INDEX(adapter, i)) {
1851			/* This API is supported for non-XDP queues only */
1852			netif_queue_set_napi(adapter->netdev, i,
1853					     NETDEV_QUEUE_TYPE_RX, napi);
1854			netif_queue_set_napi(adapter->netdev, i,
1855					     NETDEV_QUEUE_TYPE_TX, napi);
1856		}
1857	}
1858}
1859
1860/* Configure the Rx forwarding */
1861static int ena_rss_configure(struct ena_adapter *adapter)
1862{
1863	struct ena_com_dev *ena_dev = adapter->ena_dev;
1864	int rc;
1865
1866	/* In case the RSS table wasn't initialized by probe */
1867	if (!ena_dev->rss.tbl_log_size) {
1868		rc = ena_rss_init_default(adapter);
1869		if (rc && (rc != -EOPNOTSUPP)) {
1870			netif_err(adapter, ifup, adapter->netdev, "Failed to init RSS rc: %d\n", rc);
 
1871			return rc;
1872		}
1873	}
1874
1875	/* Set indirect table */
1876	rc = ena_com_indirect_table_set(ena_dev);
1877	if (unlikely(rc && rc != -EOPNOTSUPP))
1878		return rc;
1879
1880	/* Configure hash function (if supported) */
1881	rc = ena_com_set_hash_function(ena_dev);
1882	if (unlikely(rc && (rc != -EOPNOTSUPP)))
1883		return rc;
1884
1885	/* Configure hash inputs (if supported) */
1886	rc = ena_com_set_hash_ctrl(ena_dev);
1887	if (unlikely(rc && (rc != -EOPNOTSUPP)))
1888		return rc;
1889
1890	return 0;
1891}
1892
1893static int ena_up_complete(struct ena_adapter *adapter)
1894{
1895	int rc;
1896
1897	rc = ena_rss_configure(adapter);
1898	if (rc)
1899		return rc;
1900
1901	ena_change_mtu(adapter->netdev, adapter->netdev->mtu);
1902
1903	ena_refill_all_rx_bufs(adapter);
1904
1905	/* enable transmits */
1906	netif_tx_start_all_queues(adapter->netdev);
1907
1908	ena_napi_enable_in_range(adapter,
1909				 0,
1910				 adapter->xdp_num_queues + adapter->num_io_queues);
1911
1912	return 0;
1913}
1914
1915static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid)
1916{
1917	struct ena_com_create_io_ctx ctx;
1918	struct ena_com_dev *ena_dev;
1919	struct ena_ring *tx_ring;
1920	u32 msix_vector;
1921	u16 ena_qid;
1922	int rc;
1923
1924	ena_dev = adapter->ena_dev;
1925
1926	tx_ring = &adapter->tx_ring[qid];
1927	msix_vector = ENA_IO_IRQ_IDX(qid);
1928	ena_qid = ENA_IO_TXQ_IDX(qid);
1929
1930	memset(&ctx, 0x0, sizeof(ctx));
1931
1932	ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
1933	ctx.qid = ena_qid;
1934	ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
1935	ctx.msix_vector = msix_vector;
1936	ctx.queue_size = tx_ring->ring_size;
1937	ctx.numa_node = tx_ring->numa_node;
1938
1939	rc = ena_com_create_io_queue(ena_dev, &ctx);
1940	if (rc) {
1941		netif_err(adapter, ifup, adapter->netdev,
1942			  "Failed to create I/O TX queue num %d rc: %d\n",
1943			  qid, rc);
1944		return rc;
1945	}
1946
1947	rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1948				     &tx_ring->ena_com_io_sq,
1949				     &tx_ring->ena_com_io_cq);
1950	if (rc) {
1951		netif_err(adapter, ifup, adapter->netdev,
1952			  "Failed to get TX queue handlers. TX queue num %d rc: %d\n",
1953			  qid, rc);
1954		ena_com_destroy_io_queue(ena_dev, ena_qid);
1955		return rc;
1956	}
1957
1958	ena_com_update_numa_node(tx_ring->ena_com_io_cq, ctx.numa_node);
1959	return rc;
1960}
1961
1962int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter,
1963				     int first_index, int count)
1964{
1965	struct ena_com_dev *ena_dev = adapter->ena_dev;
1966	int rc, i;
1967
1968	for (i = first_index; i < first_index + count; i++) {
1969		rc = ena_create_io_tx_queue(adapter, i);
1970		if (rc)
1971			goto create_err;
1972	}
1973
1974	return 0;
1975
1976create_err:
1977	while (i-- > first_index)
1978		ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i));
1979
1980	return rc;
1981}
1982
1983static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid)
1984{
1985	struct ena_com_dev *ena_dev;
1986	struct ena_com_create_io_ctx ctx;
1987	struct ena_ring *rx_ring;
1988	u32 msix_vector;
1989	u16 ena_qid;
1990	int rc;
1991
1992	ena_dev = adapter->ena_dev;
1993
1994	rx_ring = &adapter->rx_ring[qid];
1995	msix_vector = ENA_IO_IRQ_IDX(qid);
1996	ena_qid = ENA_IO_RXQ_IDX(qid);
1997
1998	memset(&ctx, 0x0, sizeof(ctx));
1999
2000	ctx.qid = ena_qid;
2001	ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
2002	ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
2003	ctx.msix_vector = msix_vector;
2004	ctx.queue_size = rx_ring->ring_size;
2005	ctx.numa_node = rx_ring->numa_node;
2006
2007	rc = ena_com_create_io_queue(ena_dev, &ctx);
2008	if (rc) {
2009		netif_err(adapter, ifup, adapter->netdev,
2010			  "Failed to create I/O RX queue num %d rc: %d\n",
2011			  qid, rc);
2012		return rc;
2013	}
2014
2015	rc = ena_com_get_io_handlers(ena_dev, ena_qid,
2016				     &rx_ring->ena_com_io_sq,
2017				     &rx_ring->ena_com_io_cq);
2018	if (rc) {
2019		netif_err(adapter, ifup, adapter->netdev,
2020			  "Failed to get RX queue handlers. RX queue num %d rc: %d\n",
2021			  qid, rc);
2022		goto err;
2023	}
2024
2025	ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node);
2026
2027	return rc;
2028err:
2029	ena_com_destroy_io_queue(ena_dev, ena_qid);
2030	return rc;
2031}
2032
2033static int ena_create_all_io_rx_queues(struct ena_adapter *adapter)
2034{
2035	struct ena_com_dev *ena_dev = adapter->ena_dev;
2036	int rc, i;
2037
2038	for (i = 0; i < adapter->num_io_queues; i++) {
2039		rc = ena_create_io_rx_queue(adapter, i);
2040		if (rc)
2041			goto create_err;
2042		INIT_WORK(&adapter->ena_napi[i].dim.work, ena_dim_work);
2043
2044		ena_xdp_register_rxq_info(&adapter->rx_ring[i]);
2045	}
2046
2047	return 0;
2048
2049create_err:
2050	while (i--) {
2051		ena_xdp_unregister_rxq_info(&adapter->rx_ring[i]);
2052		cancel_work_sync(&adapter->ena_napi[i].dim.work);
2053		ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i));
2054	}
2055
2056	return rc;
2057}
2058
2059static void set_io_rings_size(struct ena_adapter *adapter,
2060			      int new_tx_size,
2061			      int new_rx_size)
2062{
2063	int i;
2064
2065	for (i = 0; i < adapter->num_io_queues; i++) {
2066		adapter->tx_ring[i].ring_size = new_tx_size;
2067		adapter->rx_ring[i].ring_size = new_rx_size;
2068	}
2069}
2070
2071/* This function allows queue allocation to backoff when the system is
2072 * low on memory. If there is not enough memory to allocate io queues
2073 * the driver will try to allocate smaller queues.
2074 *
2075 * The backoff algorithm is as follows:
2076 *  1. Try to allocate TX and RX and if successful.
2077 *  1.1. return success
2078 *
2079 *  2. Divide by 2 the size of the larger of RX and TX queues (or both if their size is the same).
2080 *
2081 *  3. If TX or RX is smaller than 256
2082 *  3.1. return failure.
2083 *  4. else
2084 *  4.1. go back to 1.
2085 */
2086static int create_queues_with_size_backoff(struct ena_adapter *adapter)
2087{
2088	int rc, cur_rx_ring_size, cur_tx_ring_size;
2089	int new_rx_ring_size, new_tx_ring_size;
2090
2091	/* current queue sizes might be set to smaller than the requested
2092	 * ones due to past queue allocation failures.
2093	 */
2094	set_io_rings_size(adapter, adapter->requested_tx_ring_size,
2095			  adapter->requested_rx_ring_size);
2096
2097	while (1) {
2098		if (ena_xdp_present(adapter)) {
2099			rc = ena_setup_and_create_all_xdp_queues(adapter);
2100
2101			if (rc)
2102				goto err_setup_tx;
2103		}
2104		rc = ena_setup_tx_resources_in_range(adapter,
2105						     0,
2106						     adapter->num_io_queues);
2107		if (rc)
2108			goto err_setup_tx;
2109
2110		rc = ena_create_io_tx_queues_in_range(adapter,
2111						      0,
2112						      adapter->num_io_queues);
2113		if (rc)
2114			goto err_create_tx_queues;
2115
2116		rc = ena_setup_all_rx_resources(adapter);
2117		if (rc)
2118			goto err_setup_rx;
2119
2120		rc = ena_create_all_io_rx_queues(adapter);
2121		if (rc)
2122			goto err_create_rx_queues;
2123
2124		return 0;
2125
2126err_create_rx_queues:
2127		ena_free_all_io_rx_resources(adapter);
2128err_setup_rx:
2129		ena_destroy_all_tx_queues(adapter);
2130err_create_tx_queues:
2131		ena_free_all_io_tx_resources(adapter);
2132err_setup_tx:
2133		if (rc != -ENOMEM) {
2134			netif_err(adapter, ifup, adapter->netdev,
2135				  "Queue creation failed with error code %d\n",
2136				  rc);
2137			return rc;
2138		}
2139
2140		cur_tx_ring_size = adapter->tx_ring[0].ring_size;
2141		cur_rx_ring_size = adapter->rx_ring[0].ring_size;
2142
2143		netif_err(adapter, ifup, adapter->netdev,
2144			  "Not enough memory to create queues with sizes TX=%d, RX=%d\n",
2145			  cur_tx_ring_size, cur_rx_ring_size);
2146
2147		new_tx_ring_size = cur_tx_ring_size;
2148		new_rx_ring_size = cur_rx_ring_size;
2149
2150		/* Decrease the size of the larger queue, or
2151		 * decrease both if they are the same size.
2152		 */
2153		if (cur_rx_ring_size <= cur_tx_ring_size)
2154			new_tx_ring_size = cur_tx_ring_size / 2;
2155		if (cur_rx_ring_size >= cur_tx_ring_size)
2156			new_rx_ring_size = cur_rx_ring_size / 2;
2157
2158		if (new_tx_ring_size < ENA_MIN_RING_SIZE ||
2159		    new_rx_ring_size < ENA_MIN_RING_SIZE) {
2160			netif_err(adapter, ifup, adapter->netdev,
2161				  "Queue creation failed with the smallest possible queue size of %d for both queues. Not retrying with smaller queues\n",
2162				  ENA_MIN_RING_SIZE);
2163			return rc;
2164		}
2165
2166		netif_err(adapter, ifup, adapter->netdev,
2167			  "Retrying queue creation with sizes TX=%d, RX=%d\n",
2168			  new_tx_ring_size,
2169			  new_rx_ring_size);
2170
2171		set_io_rings_size(adapter, new_tx_ring_size,
2172				  new_rx_ring_size);
2173	}
2174}
2175
2176int ena_up(struct ena_adapter *adapter)
2177{
2178	int io_queue_count, rc, i;
2179
2180	netif_dbg(adapter, ifup, adapter->netdev, "%s\n", __func__);
2181
2182	io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
2183	ena_setup_io_intr(adapter);
2184
2185	/* napi poll functions should be initialized before running
2186	 * request_irq(), to handle a rare condition where there is a pending
2187	 * interrupt, causing the ISR to fire immediately while the poll
2188	 * function wasn't set yet, causing a null dereference
2189	 */
2190	ena_init_napi_in_range(adapter, 0, io_queue_count);
2191
2192	/* Enabling DIM needs to happen before enabling IRQs since DIM
2193	 * is run from napi routine
2194	 */
2195	if (ena_com_interrupt_moderation_supported(adapter->ena_dev))
2196		ena_com_enable_adaptive_moderation(adapter->ena_dev);
2197
2198	rc = ena_request_io_irq(adapter);
2199	if (rc)
2200		goto err_req_irq;
2201
2202	rc = create_queues_with_size_backoff(adapter);
2203	if (rc)
2204		goto err_create_queues_with_backoff;
2205
2206	rc = ena_up_complete(adapter);
2207	if (rc)
2208		goto err_up;
2209
2210	if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags))
2211		netif_carrier_on(adapter->netdev);
2212
2213	ena_increase_stat(&adapter->dev_stats.interface_up, 1,
2214			  &adapter->syncp);
2215
2216	set_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2217
2218	/* Enable completion queues interrupt */
2219	for (i = 0; i < adapter->num_io_queues; i++)
2220		ena_unmask_interrupt(&adapter->tx_ring[i],
2221				     &adapter->rx_ring[i]);
2222
2223	/* schedule napi in case we had pending packets
2224	 * from the last time we disable napi
2225	 */
2226	for (i = 0; i < io_queue_count; i++)
2227		napi_schedule(&adapter->ena_napi[i].napi);
2228
2229	return rc;
2230
2231err_up:
2232	ena_destroy_all_tx_queues(adapter);
2233	ena_free_all_io_tx_resources(adapter);
2234	ena_destroy_all_rx_queues(adapter);
2235	ena_free_all_io_rx_resources(adapter);
2236err_create_queues_with_backoff:
2237	ena_free_io_irq(adapter);
2238err_req_irq:
2239	ena_del_napi_in_range(adapter, 0, io_queue_count);
2240
2241	return rc;
2242}
2243
2244void ena_down(struct ena_adapter *adapter)
2245{
2246	int io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
2247
2248	netif_dbg(adapter, ifdown, adapter->netdev, "%s\n", __func__);
2249
2250	clear_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2251
2252	ena_increase_stat(&adapter->dev_stats.interface_down, 1,
2253			  &adapter->syncp);
2254
2255	netif_carrier_off(adapter->netdev);
2256	netif_tx_disable(adapter->netdev);
2257
2258	/* After this point the napi handler won't enable the tx queue */
2259	ena_napi_disable_in_range(adapter, 0, io_queue_count);
2260
 
 
2261	if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) {
2262		int rc;
2263
2264		rc = ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
2265		if (rc)
2266			netif_err(adapter, ifdown, adapter->netdev,
2267				  "Device reset failed\n");
2268		/* stop submitting admin commands on a device that was reset */
2269		ena_com_set_admin_running_state(adapter->ena_dev, false);
2270	}
2271
2272	ena_destroy_all_io_queues(adapter);
2273
2274	ena_disable_io_intr_sync(adapter);
2275	ena_free_io_irq(adapter);
2276	ena_del_napi_in_range(adapter, 0, io_queue_count);
2277
2278	ena_free_all_tx_bufs(adapter);
2279	ena_free_all_rx_bufs(adapter);
2280	ena_free_all_io_tx_resources(adapter);
2281	ena_free_all_io_rx_resources(adapter);
2282}
2283
2284/* ena_open - Called when a network interface is made active
2285 * @netdev: network interface device structure
2286 *
2287 * Returns 0 on success, negative value on failure
2288 *
2289 * The open entry point is called when a network interface is made
2290 * active by the system (IFF_UP).  At this point all resources needed
2291 * for transmit and receive operations are allocated, the interrupt
2292 * handler is registered with the OS, the watchdog timer is started,
2293 * and the stack is notified that the interface is ready.
2294 */
2295static int ena_open(struct net_device *netdev)
2296{
2297	struct ena_adapter *adapter = netdev_priv(netdev);
2298	int rc;
2299
2300	/* Notify the stack of the actual queue counts. */
2301	rc = netif_set_real_num_tx_queues(netdev, adapter->num_io_queues);
2302	if (rc) {
2303		netif_err(adapter, ifup, netdev, "Can't set num tx queues\n");
2304		return rc;
2305	}
2306
2307	rc = netif_set_real_num_rx_queues(netdev, adapter->num_io_queues);
2308	if (rc) {
2309		netif_err(adapter, ifup, netdev, "Can't set num rx queues\n");
2310		return rc;
2311	}
2312
2313	rc = ena_up(adapter);
2314	if (rc)
2315		return rc;
2316
2317	return rc;
2318}
2319
2320/* ena_close - Disables a network interface
2321 * @netdev: network interface device structure
2322 *
2323 * Returns 0, this is not allowed to fail
2324 *
2325 * The close entry point is called when an interface is de-activated
2326 * by the OS.  The hardware is still under the drivers control, but
2327 * needs to be disabled.  A global MAC reset is issued to stop the
2328 * hardware, and all transmit and receive resources are freed.
2329 */
2330static int ena_close(struct net_device *netdev)
2331{
2332	struct ena_adapter *adapter = netdev_priv(netdev);
2333
2334	netif_dbg(adapter, ifdown, netdev, "%s\n", __func__);
2335
2336	if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
2337		return 0;
2338
2339	if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
2340		ena_down(adapter);
2341
2342	/* Check for device status and issue reset if needed*/
2343	check_for_admin_com_state(adapter);
2344	if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
2345		netif_err(adapter, ifdown, adapter->netdev,
2346			  "Destroy failure, restarting device\n");
2347		ena_dump_stats_to_dmesg(adapter);
2348		/* rtnl lock already obtained in dev_ioctl() layer */
2349		ena_destroy_device(adapter, false);
2350		ena_restore_device(adapter);
2351	}
2352
2353	return 0;
2354}
2355
2356int ena_update_queue_params(struct ena_adapter *adapter,
2357			    u32 new_tx_size,
2358			    u32 new_rx_size,
2359			    u32 new_llq_header_len)
2360{
2361	bool dev_was_up, large_llq_changed = false;
2362	int rc = 0;
2363
2364	dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2365	ena_close(adapter->netdev);
2366	adapter->requested_tx_ring_size = new_tx_size;
2367	adapter->requested_rx_ring_size = new_rx_size;
2368	ena_init_io_rings(adapter,
2369			  0,
2370			  adapter->xdp_num_queues +
2371			  adapter->num_io_queues);
2372
2373	large_llq_changed = adapter->ena_dev->tx_mem_queue_type ==
2374			    ENA_ADMIN_PLACEMENT_POLICY_DEV;
2375	large_llq_changed &=
2376		new_llq_header_len != adapter->ena_dev->tx_max_header_size;
2377
2378	/* a check that the configuration is valid is done by caller */
2379	if (large_llq_changed) {
2380		adapter->large_llq_header_enabled = !adapter->large_llq_header_enabled;
2381
2382		ena_destroy_device(adapter, false);
2383		rc = ena_restore_device(adapter);
2384	}
2385
2386	return dev_was_up && !rc ? ena_up(adapter) : rc;
2387}
2388
2389int ena_set_rx_copybreak(struct ena_adapter *adapter, u32 rx_copybreak)
2390{
2391	struct ena_ring *rx_ring;
2392	int i;
2393
2394	if (rx_copybreak > min_t(u16, adapter->netdev->mtu, ENA_PAGE_SIZE))
2395		return -EINVAL;
2396
2397	adapter->rx_copybreak = rx_copybreak;
2398
2399	for (i = 0; i < adapter->num_io_queues; i++) {
2400		rx_ring = &adapter->rx_ring[i];
2401		rx_ring->rx_copybreak = rx_copybreak;
2402	}
2403
2404	return 0;
2405}
2406
2407int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count)
2408{
2409	struct ena_com_dev *ena_dev = adapter->ena_dev;
2410	int prev_channel_count;
2411	bool dev_was_up;
2412
2413	dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2414	ena_close(adapter->netdev);
2415	prev_channel_count = adapter->num_io_queues;
2416	adapter->num_io_queues = new_channel_count;
2417	if (ena_xdp_present(adapter) &&
2418	    ena_xdp_allowed(adapter) == ENA_XDP_ALLOWED) {
2419		adapter->xdp_first_ring = new_channel_count;
2420		adapter->xdp_num_queues = new_channel_count;
2421		if (prev_channel_count > new_channel_count)
2422			ena_xdp_exchange_program_rx_in_range(adapter,
2423							     NULL,
2424							     new_channel_count,
2425							     prev_channel_count);
2426		else
2427			ena_xdp_exchange_program_rx_in_range(adapter,
2428							     adapter->xdp_bpf_prog,
2429							     prev_channel_count,
2430							     new_channel_count);
2431	}
2432
2433	/* We need to destroy the rss table so that the indirection
2434	 * table will be reinitialized by ena_up()
2435	 */
2436	ena_com_rss_destroy(ena_dev);
2437	ena_init_io_rings(adapter,
2438			  0,
2439			  adapter->xdp_num_queues +
2440			  adapter->num_io_queues);
2441	return dev_was_up ? ena_open(adapter->netdev) : 0;
2442}
2443
2444static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx,
2445			struct sk_buff *skb,
2446			bool disable_meta_caching)
2447{
2448	u32 mss = skb_shinfo(skb)->gso_size;
2449	struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta;
2450	u8 l4_protocol = 0;
2451
2452	if ((skb->ip_summed == CHECKSUM_PARTIAL) || mss) {
2453		ena_tx_ctx->l4_csum_enable = 1;
2454		if (mss) {
2455			ena_tx_ctx->tso_enable = 1;
2456			ena_meta->l4_hdr_len = tcp_hdr(skb)->doff;
2457			ena_tx_ctx->l4_csum_partial = 0;
2458		} else {
2459			ena_tx_ctx->tso_enable = 0;
2460			ena_meta->l4_hdr_len = 0;
2461			ena_tx_ctx->l4_csum_partial = 1;
2462		}
2463
2464		switch (ip_hdr(skb)->version) {
2465		case IPVERSION:
2466			ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4;
2467			if (ip_hdr(skb)->frag_off & htons(IP_DF))
2468				ena_tx_ctx->df = 1;
2469			if (mss)
2470				ena_tx_ctx->l3_csum_enable = 1;
2471			l4_protocol = ip_hdr(skb)->protocol;
2472			break;
2473		case 6:
2474			ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6;
2475			l4_protocol = ipv6_hdr(skb)->nexthdr;
2476			break;
2477		default:
2478			break;
2479		}
2480
2481		if (l4_protocol == IPPROTO_TCP)
2482			ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
2483		else
2484			ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
2485
2486		ena_meta->mss = mss;
2487		ena_meta->l3_hdr_len = skb_network_header_len(skb);
2488		ena_meta->l3_hdr_offset = skb_network_offset(skb);
2489		ena_tx_ctx->meta_valid = 1;
2490	} else if (disable_meta_caching) {
2491		memset(ena_meta, 0, sizeof(*ena_meta));
2492		ena_tx_ctx->meta_valid = 1;
2493	} else {
2494		ena_tx_ctx->meta_valid = 0;
2495	}
2496}
2497
2498static int ena_check_and_linearize_skb(struct ena_ring *tx_ring,
2499				       struct sk_buff *skb)
2500{
2501	int num_frags, header_len, rc;
2502
2503	num_frags = skb_shinfo(skb)->nr_frags;
2504	header_len = skb_headlen(skb);
2505
2506	if (num_frags < tx_ring->sgl_size)
2507		return 0;
2508
2509	if ((num_frags == tx_ring->sgl_size) &&
2510	    (header_len < tx_ring->tx_max_header_size))
2511		return 0;
2512
2513	ena_increase_stat(&tx_ring->tx_stats.linearize, 1, &tx_ring->syncp);
2514
2515	rc = skb_linearize(skb);
2516	if (unlikely(rc)) {
2517		ena_increase_stat(&tx_ring->tx_stats.linearize_failed, 1,
2518				  &tx_ring->syncp);
2519	}
2520
2521	return rc;
2522}
2523
2524static int ena_tx_map_skb(struct ena_ring *tx_ring,
2525			  struct ena_tx_buffer *tx_info,
2526			  struct sk_buff *skb,
2527			  void **push_hdr,
2528			  u16 *header_len)
2529{
2530	struct ena_adapter *adapter = tx_ring->adapter;
2531	struct ena_com_buf *ena_buf;
2532	dma_addr_t dma;
2533	u32 skb_head_len, frag_len, last_frag;
2534	u16 push_len = 0;
2535	u16 delta = 0;
2536	int i = 0;
2537
2538	skb_head_len = skb_headlen(skb);
2539	tx_info->skb = skb;
2540	ena_buf = tx_info->bufs;
2541
2542	if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
2543		/* When the device is LLQ mode, the driver will copy
2544		 * the header into the device memory space.
2545		 * the ena_com layer assume the header is in a linear
2546		 * memory space.
2547		 * This assumption might be wrong since part of the header
2548		 * can be in the fragmented buffers.
2549		 * Use skb_header_pointer to make sure the header is in a
2550		 * linear memory space.
2551		 */
2552
2553		push_len = min_t(u32, skb->len, tx_ring->tx_max_header_size);
2554		*push_hdr = skb_header_pointer(skb, 0, push_len,
2555					       tx_ring->push_buf_intermediate_buf);
2556		*header_len = push_len;
2557		if (unlikely(skb->data != *push_hdr)) {
2558			ena_increase_stat(&tx_ring->tx_stats.llq_buffer_copy, 1,
2559					  &tx_ring->syncp);
2560
2561			delta = push_len - skb_head_len;
2562		}
2563	} else {
2564		*push_hdr = NULL;
2565		*header_len = min_t(u32, skb_head_len,
2566				    tx_ring->tx_max_header_size);
2567	}
2568
2569	netif_dbg(adapter, tx_queued, adapter->netdev,
2570		  "skb: %p header_buf->vaddr: %p push_len: %d\n", skb,
2571		  *push_hdr, push_len);
2572
2573	if (skb_head_len > push_len) {
2574		dma = dma_map_single(tx_ring->dev, skb->data + push_len,
2575				     skb_head_len - push_len, DMA_TO_DEVICE);
2576		if (unlikely(dma_mapping_error(tx_ring->dev, dma)))
2577			goto error_report_dma_error;
2578
2579		ena_buf->paddr = dma;
2580		ena_buf->len = skb_head_len - push_len;
2581
2582		ena_buf++;
2583		tx_info->num_of_bufs++;
2584		tx_info->map_linear_data = 1;
2585	} else {
2586		tx_info->map_linear_data = 0;
2587	}
2588
2589	last_frag = skb_shinfo(skb)->nr_frags;
2590
2591	for (i = 0; i < last_frag; i++) {
2592		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2593
2594		frag_len = skb_frag_size(frag);
2595
2596		if (unlikely(delta >= frag_len)) {
2597			delta -= frag_len;
2598			continue;
2599		}
2600
2601		dma = skb_frag_dma_map(tx_ring->dev, frag, delta,
2602				       frag_len - delta, DMA_TO_DEVICE);
2603		if (unlikely(dma_mapping_error(tx_ring->dev, dma)))
2604			goto error_report_dma_error;
2605
2606		ena_buf->paddr = dma;
2607		ena_buf->len = frag_len - delta;
2608		ena_buf++;
2609		tx_info->num_of_bufs++;
2610		delta = 0;
2611	}
2612
2613	return 0;
2614
2615error_report_dma_error:
2616	ena_increase_stat(&tx_ring->tx_stats.dma_mapping_err, 1,
2617			  &tx_ring->syncp);
2618	netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map skb\n");
2619
2620	tx_info->skb = NULL;
2621
2622	tx_info->num_of_bufs += i;
2623	ena_unmap_tx_buff(tx_ring, tx_info);
2624
2625	return -EINVAL;
2626}
2627
2628/* Called with netif_tx_lock. */
2629static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
2630{
2631	struct ena_adapter *adapter = netdev_priv(dev);
2632	struct ena_tx_buffer *tx_info;
2633	struct ena_com_tx_ctx ena_tx_ctx;
2634	struct ena_ring *tx_ring;
2635	struct netdev_queue *txq;
2636	void *push_hdr;
2637	u16 next_to_use, req_id, header_len;
2638	int qid, rc;
2639
2640	netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb);
2641	/*  Determine which tx ring we will be placed on */
2642	qid = skb_get_queue_mapping(skb);
2643	tx_ring = &adapter->tx_ring[qid];
2644	txq = netdev_get_tx_queue(dev, qid);
2645
2646	rc = ena_check_and_linearize_skb(tx_ring, skb);
2647	if (unlikely(rc))
2648		goto error_drop_packet;
2649
 
 
2650	next_to_use = tx_ring->next_to_use;
2651	req_id = tx_ring->free_ids[next_to_use];
2652	tx_info = &tx_ring->tx_buffer_info[req_id];
2653	tx_info->num_of_bufs = 0;
2654
2655	WARN(tx_info->skb, "SKB isn't NULL req_id %d\n", req_id);
2656
2657	rc = ena_tx_map_skb(tx_ring, tx_info, skb, &push_hdr, &header_len);
2658	if (unlikely(rc))
2659		goto error_drop_packet;
2660
2661	memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx));
2662	ena_tx_ctx.ena_bufs = tx_info->bufs;
2663	ena_tx_ctx.push_header = push_hdr;
2664	ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
2665	ena_tx_ctx.req_id = req_id;
2666	ena_tx_ctx.header_len = header_len;
2667
2668	/* set flags and meta data */
2669	ena_tx_csum(&ena_tx_ctx, skb, tx_ring->disable_meta_caching);
2670
2671	rc = ena_xmit_common(adapter,
2672			     tx_ring,
2673			     tx_info,
2674			     &ena_tx_ctx,
2675			     next_to_use,
2676			     skb->len);
2677	if (rc)
2678		goto error_unmap_dma;
2679
2680	netdev_tx_sent_queue(txq, skb->len);
2681
2682	/* stop the queue when no more space available, the packet can have up
2683	 * to sgl_size + 2. one for the meta descriptor and one for header
2684	 * (if the header is larger than tx_max_header_size).
2685	 */
2686	if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
2687						   tx_ring->sgl_size + 2))) {
2688		netif_dbg(adapter, tx_queued, dev, "%s stop queue %d\n",
2689			  __func__, qid);
2690
2691		netif_tx_stop_queue(txq);
2692		ena_increase_stat(&tx_ring->tx_stats.queue_stop, 1,
2693				  &tx_ring->syncp);
2694
2695		/* There is a rare condition where this function decide to
2696		 * stop the queue but meanwhile clean_tx_irq updates
2697		 * next_to_completion and terminates.
2698		 * The queue will remain stopped forever.
2699		 * To solve this issue add a mb() to make sure that
2700		 * netif_tx_stop_queue() write is vissible before checking if
2701		 * there is additional space in the queue.
2702		 */
2703		smp_mb();
2704
2705		if (ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
2706						 ENA_TX_WAKEUP_THRESH)) {
2707			netif_tx_wake_queue(txq);
2708			ena_increase_stat(&tx_ring->tx_stats.queue_wakeup, 1,
2709					  &tx_ring->syncp);
2710		}
2711	}
2712
2713	skb_tx_timestamp(skb);
2714
2715	if (netif_xmit_stopped(txq) || !netdev_xmit_more())
2716		/* trigger the dma engine. ena_ring_tx_doorbell()
2717		 * calls a memory barrier inside it.
2718		 */
2719		ena_ring_tx_doorbell(tx_ring);
2720
2721	return NETDEV_TX_OK;
2722
2723error_unmap_dma:
2724	ena_unmap_tx_buff(tx_ring, tx_info);
2725	tx_info->skb = NULL;
2726
2727error_drop_packet:
2728	dev_kfree_skb(skb);
2729	return NETDEV_TX_OK;
2730}
2731
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2732static void ena_config_host_info(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
2733{
2734	struct device *dev = &pdev->dev;
2735	struct ena_admin_host_info *host_info;
2736	ssize_t ret;
2737	int rc;
2738
2739	/* Allocate only the host info */
2740	rc = ena_com_allocate_host_info(ena_dev);
2741	if (rc) {
2742		dev_err(dev, "Cannot allocate host info\n");
2743		return;
2744	}
2745
2746	host_info = ena_dev->host_attr.host_info;
2747
2748	host_info->bdf = pci_dev_id(pdev);
2749	host_info->os_type = ENA_ADMIN_OS_LINUX;
2750	host_info->kernel_ver = LINUX_VERSION_CODE;
2751	ret = strscpy(host_info->kernel_ver_str, utsname()->version,
2752		      sizeof(host_info->kernel_ver_str));
2753	if (ret < 0)
2754		dev_dbg(dev,
2755			"kernel version string will be truncated, status = %zd\n", ret);
2756
2757	host_info->os_dist = 0;
2758	ret = strscpy(host_info->os_dist_str, utsname()->release,
2759		      sizeof(host_info->os_dist_str));
2760	if (ret < 0)
2761		dev_dbg(dev,
2762			"OS distribution string will be truncated, status = %zd\n", ret);
2763
2764	host_info->driver_version =
2765		(DRV_MODULE_GEN_MAJOR) |
2766		(DRV_MODULE_GEN_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
2767		(DRV_MODULE_GEN_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT) |
2768		("K"[0] << ENA_ADMIN_HOST_INFO_MODULE_TYPE_SHIFT);
2769	host_info->num_cpus = num_online_cpus();
2770
2771	host_info->driver_supported_features =
2772		ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK |
2773		ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_MASK |
2774		ENA_ADMIN_HOST_INFO_RX_BUF_MIRRORING_MASK |
2775		ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK |
2776		ENA_ADMIN_HOST_INFO_RX_PAGE_REUSE_MASK;
2777
2778	rc = ena_com_set_host_attributes(ena_dev);
2779	if (rc) {
2780		if (rc == -EOPNOTSUPP)
2781			dev_warn(dev, "Cannot set host attributes\n");
2782		else
2783			dev_err(dev, "Cannot set host attributes\n");
2784
2785		goto err;
2786	}
2787
2788	return;
2789
2790err:
2791	ena_com_delete_host_info(ena_dev);
2792}
2793
2794static void ena_config_debug_area(struct ena_adapter *adapter)
2795{
2796	u32 debug_area_size;
2797	int rc, ss_count;
2798
2799	ss_count = ena_get_sset_count(adapter->netdev, ETH_SS_STATS);
2800	if (ss_count <= 0) {
2801		netif_err(adapter, drv, adapter->netdev,
2802			  "SS count is negative\n");
2803		return;
2804	}
2805
2806	/* allocate 32 bytes for each string and 64bit for the value */
2807	debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count;
2808
2809	rc = ena_com_allocate_debug_area(adapter->ena_dev, debug_area_size);
2810	if (rc) {
2811		netif_err(adapter, drv, adapter->netdev,
2812			  "Cannot allocate debug area\n");
2813		return;
2814	}
2815
2816	rc = ena_com_set_host_attributes(adapter->ena_dev);
2817	if (rc) {
2818		if (rc == -EOPNOTSUPP)
2819			netif_warn(adapter, drv, adapter->netdev, "Cannot set host attributes\n");
 
2820		else
2821			netif_err(adapter, drv, adapter->netdev,
2822				  "Cannot set host attributes\n");
2823		goto err;
2824	}
2825
2826	return;
2827err:
2828	ena_com_delete_debug_area(adapter->ena_dev);
2829}
2830
 
 
 
 
 
 
 
 
 
 
 
 
 
2831static void ena_get_stats64(struct net_device *netdev,
2832			    struct rtnl_link_stats64 *stats)
2833{
2834	struct ena_adapter *adapter = netdev_priv(netdev);
2835	struct ena_ring *rx_ring, *tx_ring;
2836	u64 total_xdp_rx_drops = 0;
2837	unsigned int start;
2838	u64 rx_drops;
2839	u64 tx_drops;
2840	int i;
2841
2842	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
2843		return;
2844
2845	for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) {
2846		u64 bytes, packets, xdp_rx_drops;
2847
2848		tx_ring = &adapter->tx_ring[i];
2849
2850		do {
2851			start = u64_stats_fetch_begin(&tx_ring->syncp);
2852			packets = tx_ring->tx_stats.cnt;
2853			bytes = tx_ring->tx_stats.bytes;
2854		} while (u64_stats_fetch_retry(&tx_ring->syncp, start));
2855
2856		stats->tx_packets += packets;
2857		stats->tx_bytes += bytes;
2858
2859		/* In XDP there isn't an RX queue counterpart */
2860		if (ENA_IS_XDP_INDEX(adapter, i))
2861			continue;
2862
2863		rx_ring = &adapter->rx_ring[i];
2864
2865		do {
2866			start = u64_stats_fetch_begin(&rx_ring->syncp);
2867			packets = rx_ring->rx_stats.cnt;
2868			bytes = rx_ring->rx_stats.bytes;
2869			xdp_rx_drops = rx_ring->rx_stats.xdp_drop;
2870		} while (u64_stats_fetch_retry(&rx_ring->syncp, start));
2871
2872		stats->rx_packets += packets;
2873		stats->rx_bytes += bytes;
2874		total_xdp_rx_drops += xdp_rx_drops;
2875	}
2876
2877	do {
2878		start = u64_stats_fetch_begin(&adapter->syncp);
2879		rx_drops = adapter->dev_stats.rx_drops;
2880		tx_drops = adapter->dev_stats.tx_drops;
2881	} while (u64_stats_fetch_retry(&adapter->syncp, start));
2882
2883	stats->rx_dropped = rx_drops + total_xdp_rx_drops;
2884	stats->tx_dropped = tx_drops;
2885
2886	stats->multicast = 0;
2887	stats->collisions = 0;
2888
2889	stats->rx_length_errors = 0;
2890	stats->rx_crc_errors = 0;
2891	stats->rx_frame_errors = 0;
2892	stats->rx_fifo_errors = 0;
2893	stats->rx_missed_errors = 0;
2894	stats->tx_window_errors = 0;
2895
2896	stats->rx_errors = 0;
2897	stats->tx_errors = 0;
2898}
2899
2900static const struct net_device_ops ena_netdev_ops = {
2901	.ndo_open		= ena_open,
2902	.ndo_stop		= ena_close,
2903	.ndo_start_xmit		= ena_start_xmit,
 
2904	.ndo_get_stats64	= ena_get_stats64,
2905	.ndo_tx_timeout		= ena_tx_timeout,
2906	.ndo_change_mtu		= ena_change_mtu,
 
2907	.ndo_validate_addr	= eth_validate_addr,
2908	.ndo_bpf		= ena_xdp,
2909	.ndo_xdp_xmit		= ena_xdp_xmit,
2910};
2911
2912static int ena_calc_io_queue_size(struct ena_adapter *adapter,
2913				  struct ena_com_dev_get_features_ctx *get_feat_ctx)
2914{
2915	struct ena_admin_feature_llq_desc *llq = &get_feat_ctx->llq;
2916	struct ena_com_dev *ena_dev = adapter->ena_dev;
2917	u32 tx_queue_size = ENA_DEFAULT_RING_SIZE;
2918	u32 rx_queue_size = ENA_DEFAULT_RING_SIZE;
2919	u32 max_tx_queue_size;
2920	u32 max_rx_queue_size;
2921
2922	/* If this function is called after driver load, the ring sizes have already
2923	 * been configured. Take it into account when recalculating ring size.
2924	 */
2925	if (adapter->tx_ring->ring_size)
2926		tx_queue_size = adapter->tx_ring->ring_size;
2927
2928	if (adapter->rx_ring->ring_size)
2929		rx_queue_size = adapter->rx_ring->ring_size;
2930
2931	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
2932		struct ena_admin_queue_ext_feature_fields *max_queue_ext =
2933			&get_feat_ctx->max_queue_ext.max_queue_ext;
2934		max_rx_queue_size = min_t(u32, max_queue_ext->max_rx_cq_depth,
2935					  max_queue_ext->max_rx_sq_depth);
2936		max_tx_queue_size = max_queue_ext->max_tx_cq_depth;
2937
2938		if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
2939			max_tx_queue_size = min_t(u32, max_tx_queue_size,
2940						  llq->max_llq_depth);
2941		else
2942			max_tx_queue_size = min_t(u32, max_tx_queue_size,
2943						  max_queue_ext->max_tx_sq_depth);
2944
2945		adapter->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
2946						 max_queue_ext->max_per_packet_tx_descs);
2947		adapter->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
2948						 max_queue_ext->max_per_packet_rx_descs);
2949	} else {
2950		struct ena_admin_queue_feature_desc *max_queues =
2951			&get_feat_ctx->max_queues;
2952		max_rx_queue_size = min_t(u32, max_queues->max_cq_depth,
2953					  max_queues->max_sq_depth);
2954		max_tx_queue_size = max_queues->max_cq_depth;
2955
2956		if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
2957			max_tx_queue_size = min_t(u32, max_tx_queue_size,
2958						  llq->max_llq_depth);
2959		else
2960			max_tx_queue_size = min_t(u32, max_tx_queue_size,
2961						  max_queues->max_sq_depth);
2962
2963		adapter->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
2964						 max_queues->max_packet_tx_descs);
2965		adapter->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
2966						 max_queues->max_packet_rx_descs);
2967	}
2968
2969	max_tx_queue_size = rounddown_pow_of_two(max_tx_queue_size);
2970	max_rx_queue_size = rounddown_pow_of_two(max_rx_queue_size);
2971
2972	if (max_tx_queue_size < ENA_MIN_RING_SIZE) {
2973		netdev_err(adapter->netdev, "Device max TX queue size: %d < minimum: %d\n",
2974			   max_tx_queue_size, ENA_MIN_RING_SIZE);
2975		return -EINVAL;
2976	}
2977
2978	if (max_rx_queue_size < ENA_MIN_RING_SIZE) {
2979		netdev_err(adapter->netdev, "Device max RX queue size: %d < minimum: %d\n",
2980			   max_rx_queue_size, ENA_MIN_RING_SIZE);
2981		return -EINVAL;
2982	}
2983
2984	/* When forcing large headers, we multiply the entry size by 2, and therefore divide
2985	 * the queue size by 2, leaving the amount of memory used by the queues unchanged.
2986	 */
2987	if (adapter->large_llq_header_enabled) {
2988		if ((llq->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B) &&
2989		    ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
2990			max_tx_queue_size /= 2;
2991			dev_info(&adapter->pdev->dev,
2992				 "Forcing large headers and decreasing maximum TX queue size to %d\n",
2993				 max_tx_queue_size);
2994		} else {
2995			dev_err(&adapter->pdev->dev,
2996				"Forcing large headers failed: LLQ is disabled or device does not support large headers\n");
2997
2998			adapter->large_llq_header_enabled = false;
2999		}
3000	}
3001
3002	tx_queue_size = clamp_val(tx_queue_size, ENA_MIN_RING_SIZE,
3003				  max_tx_queue_size);
3004	rx_queue_size = clamp_val(rx_queue_size, ENA_MIN_RING_SIZE,
3005				  max_rx_queue_size);
3006
3007	tx_queue_size = rounddown_pow_of_two(tx_queue_size);
3008	rx_queue_size = rounddown_pow_of_two(rx_queue_size);
3009
3010	adapter->max_tx_ring_size  = max_tx_queue_size;
3011	adapter->max_rx_ring_size = max_rx_queue_size;
3012	adapter->requested_tx_ring_size = tx_queue_size;
3013	adapter->requested_rx_ring_size = rx_queue_size;
3014
3015	return 0;
3016}
3017
3018static int ena_device_validate_params(struct ena_adapter *adapter,
3019				      struct ena_com_dev_get_features_ctx *get_feat_ctx)
3020{
3021	struct net_device *netdev = adapter->netdev;
3022	int rc;
3023
3024	rc = ether_addr_equal(get_feat_ctx->dev_attr.mac_addr,
3025			      adapter->mac_addr);
3026	if (!rc) {
3027		netif_err(adapter, drv, netdev,
3028			  "Error, mac address are different\n");
3029		return -EINVAL;
3030	}
3031
3032	if (get_feat_ctx->dev_attr.max_mtu < netdev->mtu) {
3033		netif_err(adapter, drv, netdev,
3034			  "Error, device max mtu is smaller than netdev MTU\n");
3035		return -EINVAL;
3036	}
3037
3038	return 0;
3039}
3040
3041static void set_default_llq_configurations(struct ena_adapter *adapter,
3042					   struct ena_llq_configurations *llq_config,
3043					   struct ena_admin_feature_llq_desc *llq)
3044{
3045	struct ena_com_dev *ena_dev = adapter->ena_dev;
3046
3047	llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER;
3048	llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY;
3049	llq_config->llq_num_decs_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2;
3050
3051	adapter->large_llq_header_supported =
3052		!!(ena_dev->supported_features & BIT(ENA_ADMIN_LLQ));
3053	adapter->large_llq_header_supported &=
3054		!!(llq->entry_size_ctrl_supported &
3055			ENA_ADMIN_LIST_ENTRY_SIZE_256B);
3056
3057	if ((llq->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B) &&
3058	    adapter->large_llq_header_enabled) {
3059		llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_256B;
3060		llq_config->llq_ring_entry_size_value = 256;
3061	} else {
3062		llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_128B;
3063		llq_config->llq_ring_entry_size_value = 128;
3064	}
3065}
3066
3067static int ena_set_queues_placement_policy(struct pci_dev *pdev,
3068					   struct ena_com_dev *ena_dev,
3069					   struct ena_admin_feature_llq_desc *llq,
3070					   struct ena_llq_configurations *llq_default_configurations)
3071{
3072	int rc;
3073	u32 llq_feature_mask;
3074
3075	llq_feature_mask = 1 << ENA_ADMIN_LLQ;
3076	if (!(ena_dev->supported_features & llq_feature_mask)) {
3077		dev_warn(&pdev->dev,
3078			"LLQ is not supported Fallback to host mode policy.\n");
3079		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3080		return 0;
3081	}
3082
3083	if (!ena_dev->mem_bar) {
3084		netdev_err(ena_dev->net_device,
3085			   "LLQ is advertised as supported but device doesn't expose mem bar\n");
3086		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3087		return 0;
3088	}
3089
3090	rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations);
3091	if (unlikely(rc)) {
3092		dev_err(&pdev->dev,
3093			"Failed to configure the device mode.  Fallback to host mode policy.\n");
3094		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3095	}
3096
3097	return 0;
3098}
3099
3100static int ena_map_llq_mem_bar(struct pci_dev *pdev, struct ena_com_dev *ena_dev,
3101			       int bars)
3102{
3103	bool has_mem_bar = !!(bars & BIT(ENA_MEM_BAR));
3104
3105	if (!has_mem_bar)
 
 
 
 
 
 
3106		return 0;
 
3107
3108	ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev,
3109					   pci_resource_start(pdev, ENA_MEM_BAR),
3110					   pci_resource_len(pdev, ENA_MEM_BAR));
3111
3112	if (!ena_dev->mem_bar)
3113		return -EFAULT;
3114
3115	return 0;
3116}
3117
3118static int ena_device_init(struct ena_adapter *adapter, struct pci_dev *pdev,
3119			   struct ena_com_dev_get_features_ctx *get_feat_ctx,
3120			   bool *wd_state)
3121{
3122	struct ena_com_dev *ena_dev = adapter->ena_dev;
3123	struct net_device *netdev = adapter->netdev;
3124	struct ena_llq_configurations llq_config;
3125	struct device *dev = &pdev->dev;
3126	bool readless_supported;
3127	u32 aenq_groups;
3128	int dma_width;
3129	int rc;
3130
3131	rc = ena_com_mmio_reg_read_request_init(ena_dev);
3132	if (rc) {
3133		dev_err(dev, "Failed to init mmio read less\n");
3134		return rc;
3135	}
3136
3137	/* The PCIe configuration space revision id indicate if mmio reg
3138	 * read is disabled
3139	 */
3140	readless_supported = !(pdev->revision & ENA_MMIO_DISABLE_REG_READ);
3141	ena_com_set_mmio_read_mode(ena_dev, readless_supported);
3142
3143	rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL);
3144	if (rc) {
3145		dev_err(dev, "Can not reset device\n");
3146		goto err_mmio_read_less;
3147	}
3148
3149	rc = ena_com_validate_version(ena_dev);
3150	if (rc) {
3151		dev_err(dev, "Device version is too low\n");
3152		goto err_mmio_read_less;
3153	}
3154
3155	dma_width = ena_com_get_dma_width(ena_dev);
3156	if (dma_width < 0) {
3157		dev_err(dev, "Invalid dma width value %d", dma_width);
3158		rc = dma_width;
3159		goto err_mmio_read_less;
3160	}
3161
3162	rc = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(dma_width));
3163	if (rc) {
3164		dev_err(dev, "dma_set_mask_and_coherent failed %d\n", rc);
3165		goto err_mmio_read_less;
3166	}
3167
3168	/* ENA admin level init */
3169	rc = ena_com_admin_init(ena_dev, &aenq_handlers);
3170	if (rc) {
3171		dev_err(dev,
3172			"Can not initialize ena admin queue with device\n");
3173		goto err_mmio_read_less;
3174	}
3175
3176	/* To enable the msix interrupts the driver needs to know the number
3177	 * of queues. So the driver uses polling mode to retrieve this
3178	 * information
3179	 */
3180	ena_com_set_admin_polling_mode(ena_dev, true);
3181
3182	ena_config_host_info(ena_dev, pdev);
3183
3184	/* Get Device Attributes*/
3185	rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
3186	if (rc) {
3187		dev_err(dev, "Cannot get attribute for ena device rc=%d\n", rc);
3188		goto err_admin_init;
3189	}
3190
3191	/* Try to turn all the available aenq groups */
3192	aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) |
3193		BIT(ENA_ADMIN_FATAL_ERROR) |
3194		BIT(ENA_ADMIN_WARNING) |
3195		BIT(ENA_ADMIN_NOTIFICATION) |
3196		BIT(ENA_ADMIN_KEEP_ALIVE);
3197
3198	aenq_groups &= get_feat_ctx->aenq.supported_groups;
3199
3200	rc = ena_com_set_aenq_config(ena_dev, aenq_groups);
3201	if (rc) {
3202		dev_err(dev, "Cannot configure aenq groups rc= %d\n", rc);
3203		goto err_admin_init;
3204	}
3205
3206	*wd_state = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE));
3207
3208	set_default_llq_configurations(adapter, &llq_config, &get_feat_ctx->llq);
3209
3210	rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx->llq,
3211					     &llq_config);
3212	if (rc) {
3213		netdev_err(netdev, "Cannot set queues placement policy rc= %d\n", rc);
3214		goto err_admin_init;
3215	}
3216
3217	rc = ena_calc_io_queue_size(adapter, get_feat_ctx);
3218	if (unlikely(rc))
3219		goto err_admin_init;
3220
3221	return 0;
3222
3223err_admin_init:
3224	ena_com_abort_admin_commands(ena_dev);
3225	ena_com_wait_for_abort_completion(ena_dev);
3226	ena_com_delete_host_info(ena_dev);
3227	ena_com_admin_destroy(ena_dev);
3228err_mmio_read_less:
3229	ena_com_mmio_reg_read_request_destroy(ena_dev);
3230
3231	return rc;
3232}
3233
3234static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter)
3235{
3236	struct ena_com_dev *ena_dev = adapter->ena_dev;
3237	struct device *dev = &adapter->pdev->dev;
3238	int rc;
3239
3240	rc = ena_enable_msix(adapter);
3241	if (rc) {
3242		dev_err(dev, "Can not reserve msix vectors\n");
3243		return rc;
3244	}
3245
3246	ena_setup_mgmnt_intr(adapter);
3247
3248	rc = ena_request_mgmnt_irq(adapter);
3249	if (rc) {
3250		dev_err(dev, "Can not setup management interrupts\n");
3251		goto err_disable_msix;
3252	}
3253
3254	ena_com_set_admin_polling_mode(ena_dev, false);
3255
3256	ena_com_admin_aenq_enable(ena_dev);
3257
3258	return 0;
3259
3260err_disable_msix:
3261	ena_disable_msix(adapter);
3262
3263	return rc;
3264}
3265
3266static int ena_destroy_device(struct ena_adapter *adapter, bool graceful)
3267{
3268	struct net_device *netdev = adapter->netdev;
3269	struct ena_com_dev *ena_dev = adapter->ena_dev;
3270	bool dev_up;
3271	int rc = 0;
3272
3273	if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
3274		return 0;
3275
3276	netif_carrier_off(netdev);
3277
3278	del_timer_sync(&adapter->timer_service);
3279
3280	dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
3281	adapter->dev_up_before_reset = dev_up;
3282	if (!graceful)
3283		ena_com_set_admin_running_state(ena_dev, false);
3284
3285	if (dev_up)
3286		ena_down(adapter);
3287
3288	/* Stop the device from sending AENQ events (in case reset flag is set
3289	 *  and device is up, ena_down() already reset the device.
3290	 */
3291	if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up))
3292		rc = ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
3293
3294	ena_free_mgmnt_irq(adapter);
3295
3296	ena_disable_msix(adapter);
3297
3298	ena_com_abort_admin_commands(ena_dev);
3299
3300	ena_com_wait_for_abort_completion(ena_dev);
3301
3302	ena_com_admin_destroy(ena_dev);
3303
3304	ena_com_mmio_reg_read_request_destroy(ena_dev);
3305
3306	/* return reset reason to default value */
3307	adapter->reset_reason = ENA_REGS_RESET_NORMAL;
3308
3309	clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
3310	clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
3311
3312	return rc;
3313}
3314
3315static int ena_restore_device(struct ena_adapter *adapter)
3316{
3317	struct ena_com_dev_get_features_ctx get_feat_ctx;
3318	struct ena_com_dev *ena_dev = adapter->ena_dev;
3319	struct pci_dev *pdev = adapter->pdev;
3320	struct ena_ring *txr;
3321	int rc, count, i;
3322	bool wd_state;
 
3323
3324	set_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
3325	rc = ena_device_init(adapter, adapter->pdev, &get_feat_ctx, &wd_state);
3326	if (rc) {
3327		dev_err(&pdev->dev, "Can not initialize device\n");
3328		goto err;
3329	}
3330	adapter->wd_state = wd_state;
3331
3332	count =  adapter->xdp_num_queues + adapter->num_io_queues;
3333	for (i = 0 ; i < count; i++) {
3334		txr = &adapter->tx_ring[i];
3335		txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
3336		txr->tx_max_header_size = ena_dev->tx_max_header_size;
3337	}
3338
3339	rc = ena_device_validate_params(adapter, &get_feat_ctx);
3340	if (rc) {
3341		dev_err(&pdev->dev, "Validation of device parameters failed\n");
3342		goto err_device_destroy;
3343	}
3344
3345	rc = ena_enable_msix_and_set_admin_interrupts(adapter);
3346	if (rc) {
3347		dev_err(&pdev->dev, "Enable MSI-X failed\n");
3348		goto err_device_destroy;
3349	}
3350	/* If the interface was up before the reset bring it up */
3351	if (adapter->dev_up_before_reset) {
3352		rc = ena_up(adapter);
3353		if (rc) {
3354			dev_err(&pdev->dev, "Failed to create I/O queues\n");
3355			goto err_disable_msix;
3356		}
3357	}
3358
3359	set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
3360
3361	clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
3362	if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags))
3363		netif_carrier_on(adapter->netdev);
3364
3365	mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
3366	adapter->last_keep_alive_jiffies = jiffies;
3367
3368	return rc;
3369err_disable_msix:
3370	ena_free_mgmnt_irq(adapter);
3371	ena_disable_msix(adapter);
3372err_device_destroy:
3373	ena_com_abort_admin_commands(ena_dev);
3374	ena_com_wait_for_abort_completion(ena_dev);
3375	ena_com_admin_destroy(ena_dev);
3376	ena_com_dev_reset(ena_dev, ENA_REGS_RESET_DRIVER_INVALID_STATE);
3377	ena_com_mmio_reg_read_request_destroy(ena_dev);
3378err:
3379	clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
3380	clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
3381	dev_err(&pdev->dev,
3382		"Reset attempt failed. Can not reset the device\n");
3383
3384	return rc;
3385}
3386
3387static void ena_fw_reset_device(struct work_struct *work)
3388{
3389	int rc = 0;
3390
3391	struct ena_adapter *adapter =
3392		container_of(work, struct ena_adapter, reset_task);
3393
3394	rtnl_lock();
3395
3396	if (likely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
3397		rc |= ena_destroy_device(adapter, false);
3398		rc |= ena_restore_device(adapter);
3399		adapter->dev_stats.reset_fail += !!rc;
3400
3401		dev_err(&adapter->pdev->dev, "Device reset completed successfully\n");
3402	}
3403
3404	rtnl_unlock();
3405}
3406
3407static int check_for_rx_interrupt_queue(struct ena_adapter *adapter,
3408					struct ena_ring *rx_ring)
3409{
3410	struct ena_napi *ena_napi = container_of(rx_ring->napi, struct ena_napi, napi);
3411
3412	if (likely(READ_ONCE(ena_napi->first_interrupt)))
3413		return 0;
3414
3415	if (ena_com_cq_empty(rx_ring->ena_com_io_cq))
3416		return 0;
3417
3418	rx_ring->no_interrupt_event_cnt++;
3419
3420	if (rx_ring->no_interrupt_event_cnt == ENA_MAX_NO_INTERRUPT_ITERATIONS) {
3421		netif_err(adapter, rx_err, adapter->netdev,
3422			  "Potential MSIX issue on Rx side Queue = %d. Reset the device\n",
3423			  rx_ring->qid);
3424
3425		ena_reset_device(adapter, ENA_REGS_RESET_MISS_INTERRUPT);
3426		return -EIO;
3427	}
3428
3429	return 0;
3430}
3431
3432static int check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
3433					  struct ena_ring *tx_ring)
3434{
3435	struct ena_napi *ena_napi = container_of(tx_ring->napi, struct ena_napi, napi);
3436	enum ena_regs_reset_reason_types reset_reason = ENA_REGS_RESET_MISS_TX_CMPL;
3437	unsigned int time_since_last_napi;
3438	unsigned int missing_tx_comp_to;
3439	bool is_tx_comp_time_expired;
3440	struct ena_tx_buffer *tx_buf;
3441	unsigned long last_jiffies;
3442	int napi_scheduled;
3443	u32 missed_tx = 0;
3444	int i, rc = 0;
3445
3446	missing_tx_comp_to = jiffies_to_msecs(adapter->missing_tx_completion_to);
3447
3448	for (i = 0; i < tx_ring->ring_size; i++) {
3449		tx_buf = &tx_ring->tx_buffer_info[i];
3450		last_jiffies = tx_buf->last_jiffies;
3451
3452		if (last_jiffies == 0)
3453			/* no pending Tx at this location */
3454			continue;
3455
3456		is_tx_comp_time_expired = time_is_before_jiffies(last_jiffies +
3457			 2 * adapter->missing_tx_completion_to);
3458
3459		if (unlikely(!READ_ONCE(ena_napi->first_interrupt) && is_tx_comp_time_expired)) {
3460			/* If after graceful period interrupt is still not
3461			 * received, we schedule a reset
3462			 */
3463			netif_err(adapter, tx_err, adapter->netdev,
3464				  "Potential MSIX issue on Tx side Queue = %d. Reset the device\n",
3465				  tx_ring->qid);
3466			ena_reset_device(adapter, ENA_REGS_RESET_MISS_INTERRUPT);
3467			return -EIO;
3468		}
3469
3470		is_tx_comp_time_expired = time_is_before_jiffies(last_jiffies +
3471			adapter->missing_tx_completion_to);
3472
3473		if (unlikely(is_tx_comp_time_expired)) {
3474			time_since_last_napi =
3475				jiffies_to_usecs(jiffies - tx_ring->tx_stats.last_napi_jiffies);
3476			napi_scheduled = !!(ena_napi->napi.state & NAPIF_STATE_SCHED);
3477
3478			if (missing_tx_comp_to < time_since_last_napi && napi_scheduled) {
3479				/* We suspect napi isn't called because the
3480				 * bottom half is not run. Require a bigger
3481				 * timeout for these cases
3482				 */
3483				if (!time_is_before_jiffies(last_jiffies +
3484					2 * adapter->missing_tx_completion_to))
3485					continue;
3486
3487				reset_reason = ENA_REGS_RESET_SUSPECTED_POLL_STARVATION;
3488			}
3489
3490			missed_tx++;
3491
3492			if (tx_buf->print_once)
3493				continue;
3494
3495			netif_notice(adapter, tx_err, adapter->netdev,
3496				     "TX hasn't completed, qid %d, index %d. %u usecs from last napi execution, napi scheduled: %d\n",
3497				     tx_ring->qid, i, time_since_last_napi, napi_scheduled);
3498
3499			tx_buf->print_once = 1;
 
3500		}
3501	}
3502
3503	if (unlikely(missed_tx > adapter->missing_tx_completion_threshold)) {
3504		netif_err(adapter, tx_err, adapter->netdev,
3505			  "Lost TX completions are above the threshold (%d > %d). Completion transmission timeout: %u.\n",
3506			  missed_tx,
3507			  adapter->missing_tx_completion_threshold,
3508			  missing_tx_comp_to);
3509		netif_err(adapter, tx_err, adapter->netdev,
3510			  "Resetting the device\n");
3511
3512		ena_reset_device(adapter, reset_reason);
3513		rc = -EIO;
3514	}
3515
3516	ena_increase_stat(&tx_ring->tx_stats.missed_tx, missed_tx,
3517			  &tx_ring->syncp);
3518
3519	return rc;
3520}
3521
3522static void check_for_missing_completions(struct ena_adapter *adapter)
3523{
3524	struct ena_ring *tx_ring;
3525	struct ena_ring *rx_ring;
3526	int qid, budget, rc;
3527	int io_queue_count;
3528
3529	io_queue_count = adapter->xdp_num_queues + adapter->num_io_queues;
3530
3531	/* Make sure the driver doesn't turn the device in other process */
3532	smp_rmb();
3533
3534	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
3535		return;
3536
3537	if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
3538		return;
3539
3540	if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT)
3541		return;
3542
3543	budget = min_t(u32, io_queue_count, ENA_MONITORED_TX_QUEUES);
3544
3545	qid = adapter->last_monitored_tx_qid;
3546
3547	while (budget) {
3548		qid = (qid + 1) % io_queue_count;
3549
3550		tx_ring = &adapter->tx_ring[qid];
3551		rx_ring = &adapter->rx_ring[qid];
3552
3553		rc = check_missing_comp_in_tx_queue(adapter, tx_ring);
3554		if (unlikely(rc))
3555			return;
3556
3557		rc =  !ENA_IS_XDP_INDEX(adapter, qid) ?
3558			check_for_rx_interrupt_queue(adapter, rx_ring) : 0;
3559		if (unlikely(rc))
3560			return;
3561
3562		budget--;
 
 
3563	}
3564
3565	adapter->last_monitored_tx_qid = qid;
3566}
3567
3568/* trigger napi schedule after 2 consecutive detections */
3569#define EMPTY_RX_REFILL 2
3570/* For the rare case where the device runs out of Rx descriptors and the
3571 * napi handler failed to refill new Rx descriptors (due to a lack of memory
3572 * for example).
3573 * This case will lead to a deadlock:
3574 * The device won't send interrupts since all the new Rx packets will be dropped
3575 * The napi handler won't allocate new Rx descriptors so the device will be
3576 * able to send new packets.
3577 *
3578 * This scenario can happen when the kernel's vm.min_free_kbytes is too small.
3579 * It is recommended to have at least 512MB, with a minimum of 128MB for
3580 * constrained environment).
3581 *
3582 * When such a situation is detected - Reschedule napi
3583 */
3584static void check_for_empty_rx_ring(struct ena_adapter *adapter)
3585{
3586	struct ena_ring *rx_ring;
3587	int i, refill_required;
3588
3589	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
3590		return;
3591
3592	if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
3593		return;
3594
3595	for (i = 0; i < adapter->num_io_queues; i++) {
3596		rx_ring = &adapter->rx_ring[i];
3597
3598		refill_required = ena_com_free_q_entries(rx_ring->ena_com_io_sq);
3599		if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
3600			rx_ring->empty_rx_queue++;
3601
3602			if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) {
3603				ena_increase_stat(&rx_ring->rx_stats.empty_rx_ring, 1,
3604						  &rx_ring->syncp);
3605
3606				netif_err(adapter, drv, adapter->netdev,
3607					  "Trigger refill for ring %d\n", i);
3608
3609				napi_schedule(rx_ring->napi);
3610				rx_ring->empty_rx_queue = 0;
3611			}
3612		} else {
3613			rx_ring->empty_rx_queue = 0;
3614		}
3615	}
3616}
3617
3618/* Check for keep alive expiration */
3619static void check_for_missing_keep_alive(struct ena_adapter *adapter)
3620{
3621	unsigned long keep_alive_expired;
3622
3623	if (!adapter->wd_state)
3624		return;
3625
3626	if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3627		return;
3628
3629	keep_alive_expired = adapter->last_keep_alive_jiffies +
3630			     adapter->keep_alive_timeout;
3631	if (unlikely(time_is_before_jiffies(keep_alive_expired))) {
3632		netif_err(adapter, drv, adapter->netdev,
3633			  "Keep alive watchdog timeout.\n");
3634		ena_increase_stat(&adapter->dev_stats.wd_expired, 1,
3635				  &adapter->syncp);
3636		ena_reset_device(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO);
3637	}
3638}
3639
3640static void check_for_admin_com_state(struct ena_adapter *adapter)
3641{
3642	if (unlikely(!ena_com_get_admin_running_state(adapter->ena_dev))) {
3643		netif_err(adapter, drv, adapter->netdev,
3644			  "ENA admin queue is not in running state!\n");
3645		ena_increase_stat(&adapter->dev_stats.admin_q_pause, 1,
3646				  &adapter->syncp);
3647		ena_reset_device(adapter, ENA_REGS_RESET_ADMIN_TO);
3648	}
3649}
3650
3651static void ena_update_hints(struct ena_adapter *adapter,
3652			     struct ena_admin_ena_hw_hints *hints)
3653{
3654	struct net_device *netdev = adapter->netdev;
3655
3656	if (hints->admin_completion_tx_timeout)
3657		adapter->ena_dev->admin_queue.completion_timeout =
3658			hints->admin_completion_tx_timeout * 1000;
3659
3660	if (hints->mmio_read_timeout)
3661		/* convert to usec */
3662		adapter->ena_dev->mmio_read.reg_read_to =
3663			hints->mmio_read_timeout * 1000;
3664
3665	if (hints->missed_tx_completion_count_threshold_to_reset)
3666		adapter->missing_tx_completion_threshold =
3667			hints->missed_tx_completion_count_threshold_to_reset;
3668
3669	if (hints->missing_tx_completion_timeout) {
3670		if (hints->missing_tx_completion_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3671			adapter->missing_tx_completion_to = ENA_HW_HINTS_NO_TIMEOUT;
3672		else
3673			adapter->missing_tx_completion_to =
3674				msecs_to_jiffies(hints->missing_tx_completion_timeout);
3675	}
3676
3677	if (hints->netdev_wd_timeout)
3678		netdev->watchdog_timeo = msecs_to_jiffies(hints->netdev_wd_timeout);
3679
3680	if (hints->driver_watchdog_timeout) {
3681		if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3682			adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT;
3683		else
3684			adapter->keep_alive_timeout =
3685				msecs_to_jiffies(hints->driver_watchdog_timeout);
3686	}
3687}
3688
3689static void ena_update_host_info(struct ena_admin_host_info *host_info,
3690				 struct net_device *netdev)
3691{
3692	host_info->supported_network_features[0] =
3693		netdev->features & GENMASK_ULL(31, 0);
3694	host_info->supported_network_features[1] =
3695		(netdev->features & GENMASK_ULL(63, 32)) >> 32;
3696}
3697
3698static void ena_timer_service(struct timer_list *t)
3699{
3700	struct ena_adapter *adapter = from_timer(adapter, t, timer_service);
3701	u8 *debug_area = adapter->ena_dev->host_attr.debug_area_virt_addr;
3702	struct ena_admin_host_info *host_info =
3703		adapter->ena_dev->host_attr.host_info;
3704
3705	check_for_missing_keep_alive(adapter);
3706
3707	check_for_admin_com_state(adapter);
3708
3709	check_for_missing_completions(adapter);
3710
3711	check_for_empty_rx_ring(adapter);
3712
3713	if (debug_area)
3714		ena_dump_stats_to_buf(adapter, debug_area);
3715
3716	if (host_info)
3717		ena_update_host_info(host_info, adapter->netdev);
3718
3719	if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
3720		netif_err(adapter, drv, adapter->netdev,
3721			  "Trigger reset is on\n");
3722		ena_dump_stats_to_dmesg(adapter);
3723		queue_work(ena_wq, &adapter->reset_task);
3724		return;
3725	}
3726
3727	/* Reset the timer */
3728	mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
3729}
3730
3731static u32 ena_calc_max_io_queue_num(struct pci_dev *pdev,
3732				     struct ena_com_dev *ena_dev,
3733				     struct ena_com_dev_get_features_ctx *get_feat_ctx)
3734{
3735	u32 io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues;
3736
3737	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
3738		struct ena_admin_queue_ext_feature_fields *max_queue_ext =
3739			&get_feat_ctx->max_queue_ext.max_queue_ext;
3740		io_rx_num = min_t(u32, max_queue_ext->max_rx_sq_num,
3741				  max_queue_ext->max_rx_cq_num);
3742
3743		io_tx_sq_num = max_queue_ext->max_tx_sq_num;
3744		io_tx_cq_num = max_queue_ext->max_tx_cq_num;
3745	} else {
3746		struct ena_admin_queue_feature_desc *max_queues =
3747			&get_feat_ctx->max_queues;
3748		io_tx_sq_num = max_queues->max_sq_num;
3749		io_tx_cq_num = max_queues->max_cq_num;
3750		io_rx_num = min_t(u32, io_tx_sq_num, io_tx_cq_num);
3751	}
3752
3753	/* In case of LLQ use the llq fields for the tx SQ/CQ */
3754	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
3755		io_tx_sq_num = get_feat_ctx->llq.max_llq_num;
3756
3757	max_num_io_queues = min_t(u32, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES);
3758	max_num_io_queues = min_t(u32, max_num_io_queues, io_rx_num);
3759	max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_sq_num);
3760	max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_cq_num);
3761	/* 1 IRQ for mgmnt and 1 IRQs for each IO direction */
3762	max_num_io_queues = min_t(u32, max_num_io_queues, pci_msix_vec_count(pdev) - 1);
3763
3764	return max_num_io_queues;
3765}
3766
3767static void ena_set_dev_offloads(struct ena_com_dev_get_features_ctx *feat,
3768				 struct net_device *netdev)
3769{
3770	netdev_features_t dev_features = 0;
3771
3772	/* Set offload features */
3773	if (feat->offload.tx &
3774		ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK)
3775		dev_features |= NETIF_F_IP_CSUM;
3776
3777	if (feat->offload.tx &
3778		ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK)
3779		dev_features |= NETIF_F_IPV6_CSUM;
3780
3781	if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK)
3782		dev_features |= NETIF_F_TSO;
3783
3784	if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK)
3785		dev_features |= NETIF_F_TSO6;
3786
3787	if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK)
3788		dev_features |= NETIF_F_TSO_ECN;
3789
3790	if (feat->offload.rx_supported &
3791		ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK)
3792		dev_features |= NETIF_F_RXCSUM;
3793
3794	if (feat->offload.rx_supported &
3795		ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK)
3796		dev_features |= NETIF_F_RXCSUM;
3797
3798	netdev->features =
3799		dev_features |
3800		NETIF_F_SG |
3801		NETIF_F_RXHASH |
3802		NETIF_F_HIGHDMA;
3803
3804	netdev->hw_features |= netdev->features;
3805	netdev->vlan_features |= netdev->features;
3806}
3807
3808static void ena_set_conf_feat_params(struct ena_adapter *adapter,
3809				     struct ena_com_dev_get_features_ctx *feat)
3810{
3811	struct net_device *netdev = adapter->netdev;
3812
3813	/* Copy mac address */
3814	if (!is_valid_ether_addr(feat->dev_attr.mac_addr)) {
3815		eth_hw_addr_random(netdev);
3816		ether_addr_copy(adapter->mac_addr, netdev->dev_addr);
3817	} else {
3818		ether_addr_copy(adapter->mac_addr, feat->dev_attr.mac_addr);
3819		eth_hw_addr_set(netdev, adapter->mac_addr);
3820	}
3821
3822	/* Set offload features */
3823	ena_set_dev_offloads(feat, netdev);
3824
3825	adapter->max_mtu = feat->dev_attr.max_mtu;
3826	netdev->max_mtu = adapter->max_mtu;
3827	netdev->min_mtu = ENA_MIN_MTU;
3828}
3829
3830static int ena_rss_init_default(struct ena_adapter *adapter)
3831{
3832	struct ena_com_dev *ena_dev = adapter->ena_dev;
3833	struct device *dev = &adapter->pdev->dev;
3834	int rc, i;
3835	u32 val;
3836
3837	rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE);
3838	if (unlikely(rc)) {
3839		dev_err(dev, "Cannot init indirect table\n");
3840		goto err_rss_init;
3841	}
3842
3843	for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
3844		val = ethtool_rxfh_indir_default(i, adapter->num_io_queues);
3845		rc = ena_com_indirect_table_fill_entry(ena_dev, i,
3846						       ENA_IO_RXQ_IDX(val));
3847		if (unlikely(rc)) {
3848			dev_err(dev, "Cannot fill indirect table\n");
3849			goto err_fill_indir;
3850		}
3851	}
3852
3853	rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_TOEPLITZ, NULL, ENA_HASH_KEY_SIZE,
3854					0xFFFFFFFF);
3855	if (unlikely(rc && (rc != -EOPNOTSUPP))) {
3856		dev_err(dev, "Cannot fill hash function\n");
3857		goto err_fill_indir;
3858	}
3859
3860	rc = ena_com_set_default_hash_ctrl(ena_dev);
3861	if (unlikely(rc && (rc != -EOPNOTSUPP))) {
3862		dev_err(dev, "Cannot fill hash control\n");
3863		goto err_fill_indir;
3864	}
3865
3866	return 0;
3867
3868err_fill_indir:
3869	ena_com_rss_destroy(ena_dev);
3870err_rss_init:
3871
3872	return rc;
3873}
3874
3875static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
3876{
3877	int release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
3878
3879	pci_release_selected_regions(pdev, release_bars);
3880}
3881
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3882/* ena_probe - Device Initialization Routine
3883 * @pdev: PCI device information struct
3884 * @ent: entry in ena_pci_tbl
3885 *
3886 * Returns 0 on success, negative on failure
3887 *
3888 * ena_probe initializes an adapter identified by a pci_dev structure.
3889 * The OS initialization, configuring of the adapter private structure,
3890 * and a hardware reset occur.
3891 */
3892static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
3893{
3894	struct ena_com_dev_get_features_ctx get_feat_ctx;
3895	struct ena_com_dev *ena_dev = NULL;
3896	struct ena_adapter *adapter;
3897	struct net_device *netdev;
3898	static int adapters_found;
3899	u32 max_num_io_queues;
3900	bool wd_state;
3901	int bars, rc;
3902
3903	dev_dbg(&pdev->dev, "%s\n", __func__);
3904
3905	rc = pci_enable_device_mem(pdev);
3906	if (rc) {
3907		dev_err(&pdev->dev, "pci_enable_device_mem() failed!\n");
3908		return rc;
3909	}
3910
3911	rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(ENA_MAX_PHYS_ADDR_SIZE_BITS));
3912	if (rc) {
3913		dev_err(&pdev->dev, "dma_set_mask_and_coherent failed %d\n", rc);
3914		goto err_disable_device;
3915	}
3916
3917	pci_set_master(pdev);
3918
3919	ena_dev = vzalloc(sizeof(*ena_dev));
3920	if (!ena_dev) {
3921		rc = -ENOMEM;
3922		goto err_disable_device;
3923	}
3924
3925	bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
3926	rc = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
3927	if (rc) {
3928		dev_err(&pdev->dev, "pci_request_selected_regions failed %d\n",
3929			rc);
3930		goto err_free_ena_dev;
3931	}
3932
3933	ena_dev->reg_bar = devm_ioremap(&pdev->dev,
3934					pci_resource_start(pdev, ENA_REG_BAR),
3935					pci_resource_len(pdev, ENA_REG_BAR));
3936	if (!ena_dev->reg_bar) {
3937		dev_err(&pdev->dev, "Failed to remap regs bar\n");
3938		rc = -EFAULT;
3939		goto err_free_region;
3940	}
3941
3942	ena_dev->ena_min_poll_delay_us = ENA_ADMIN_POLL_DELAY_US;
3943
3944	ena_dev->dmadev = &pdev->dev;
3945
3946	netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), ENA_MAX_RINGS);
3947	if (!netdev) {
3948		dev_err(&pdev->dev, "alloc_etherdev_mq failed\n");
3949		rc = -ENOMEM;
3950		goto err_free_region;
3951	}
3952
3953	SET_NETDEV_DEV(netdev, &pdev->dev);
3954	adapter = netdev_priv(netdev);
3955	adapter->ena_dev = ena_dev;
3956	adapter->netdev = netdev;
3957	adapter->pdev = pdev;
3958	adapter->msg_enable = DEFAULT_MSG_ENABLE;
3959
3960	ena_dev->net_device = netdev;
3961
3962	pci_set_drvdata(pdev, adapter);
3963
3964	rc = ena_com_allocate_customer_metrics_buffer(ena_dev);
3965	if (rc) {
3966		netdev_err(netdev, "ena_com_allocate_customer_metrics_buffer failed\n");
 
 
3967		goto err_netdev_destroy;
3968	}
3969
3970	rc = ena_map_llq_mem_bar(pdev, ena_dev, bars);
3971	if (rc) {
3972		dev_err(&pdev->dev, "ENA LLQ bar mapping failed\n");
3973		goto err_metrics_destroy;
3974	}
3975
3976	rc = ena_device_init(adapter, pdev, &get_feat_ctx, &wd_state);
3977	if (rc) {
3978		dev_err(&pdev->dev, "ENA device init failed\n");
3979		if (rc == -ETIME)
3980			rc = -EPROBE_DEFER;
3981		goto err_metrics_destroy;
3982	}
3983
3984	/* Initial TX and RX interrupt delay. Assumes 1 usec granularity.
3985	 * Updated during device initialization with the real granularity
3986	 */
3987	ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS;
3988	ena_dev->intr_moder_rx_interval = ENA_INTR_INITIAL_RX_INTERVAL_USECS;
3989	ena_dev->intr_delay_resolution = ENA_DEFAULT_INTR_DELAY_RESOLUTION;
3990	max_num_io_queues = ena_calc_max_io_queue_num(pdev, ena_dev, &get_feat_ctx);
 
3991	if (unlikely(!max_num_io_queues)) {
3992		rc = -EFAULT;
3993		goto err_device_destroy;
3994	}
3995
3996	ena_set_conf_feat_params(adapter, &get_feat_ctx);
3997
3998	adapter->reset_reason = ENA_REGS_RESET_NORMAL;
3999
4000	adapter->num_io_queues = max_num_io_queues;
4001	adapter->max_num_io_queues = max_num_io_queues;
4002	adapter->last_monitored_tx_qid = 0;
4003
4004	adapter->xdp_first_ring = 0;
4005	adapter->xdp_num_queues = 0;
4006
4007	adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK;
4008	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
4009		adapter->disable_meta_caching =
4010			!!(get_feat_ctx.llq.accel_mode.u.get.supported_flags &
4011			   BIT(ENA_ADMIN_DISABLE_META_CACHING));
4012
4013	adapter->wd_state = wd_state;
4014
4015	snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", adapters_found);
4016
4017	rc = ena_com_init_interrupt_moderation(adapter->ena_dev);
4018	if (rc) {
4019		dev_err(&pdev->dev,
4020			"Failed to query interrupt moderation feature\n");
4021		goto err_device_destroy;
4022	}
4023
4024	ena_init_io_rings(adapter,
4025			  0,
4026			  adapter->xdp_num_queues +
4027			  adapter->num_io_queues);
4028
4029	netdev->netdev_ops = &ena_netdev_ops;
4030	netdev->watchdog_timeo = TX_TIMEOUT;
4031	ena_set_ethtool_ops(netdev);
4032
4033	netdev->priv_flags |= IFF_UNICAST_FLT;
4034
4035	u64_stats_init(&adapter->syncp);
4036
4037	rc = ena_enable_msix_and_set_admin_interrupts(adapter);
4038	if (rc) {
4039		dev_err(&pdev->dev,
4040			"Failed to enable and set the admin interrupts\n");
4041		goto err_worker_destroy;
4042	}
4043	rc = ena_rss_init_default(adapter);
4044	if (rc && (rc != -EOPNOTSUPP)) {
4045		dev_err(&pdev->dev, "Cannot init RSS rc: %d\n", rc);
4046		goto err_free_msix;
4047	}
4048
4049	ena_config_debug_area(adapter);
4050
4051	if (ena_xdp_legal_queue_count(adapter, adapter->num_io_queues))
4052		netdev->xdp_features = NETDEV_XDP_ACT_BASIC |
4053				       NETDEV_XDP_ACT_REDIRECT;
4054
4055	memcpy(adapter->netdev->perm_addr, adapter->mac_addr, netdev->addr_len);
4056
4057	netif_carrier_off(netdev);
4058
4059	rc = register_netdev(netdev);
4060	if (rc) {
4061		dev_err(&pdev->dev, "Cannot register net device\n");
4062		goto err_rss;
4063	}
4064
4065	INIT_WORK(&adapter->reset_task, ena_fw_reset_device);
4066
4067	adapter->last_keep_alive_jiffies = jiffies;
4068	adapter->keep_alive_timeout = ENA_DEVICE_KALIVE_TIMEOUT;
4069	adapter->missing_tx_completion_to = TX_TIMEOUT;
4070	adapter->missing_tx_completion_threshold = MAX_NUM_OF_TIMEOUTED_PACKETS;
4071
4072	ena_update_hints(adapter, &get_feat_ctx.hw_hints);
4073
4074	timer_setup(&adapter->timer_service, ena_timer_service, 0);
4075	mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
4076
4077	dev_info(&pdev->dev,
4078		 "%s found at mem %lx, mac addr %pM\n",
4079		 DEVICE_NAME, (long)pci_resource_start(pdev, 0),
4080		 netdev->dev_addr);
4081
4082	set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
4083
4084	adapters_found++;
4085
4086	return 0;
4087
4088err_rss:
4089	ena_com_delete_debug_area(ena_dev);
4090	ena_com_rss_destroy(ena_dev);
4091err_free_msix:
4092	ena_com_dev_reset(ena_dev, ENA_REGS_RESET_INIT_ERR);
4093	/* stop submitting admin commands on a device that was reset */
4094	ena_com_set_admin_running_state(ena_dev, false);
4095	ena_free_mgmnt_irq(adapter);
4096	ena_disable_msix(adapter);
4097err_worker_destroy:
4098	del_timer(&adapter->timer_service);
4099err_device_destroy:
4100	ena_com_delete_host_info(ena_dev);
4101	ena_com_admin_destroy(ena_dev);
4102err_metrics_destroy:
4103	ena_com_delete_customer_metrics_buffer(ena_dev);
4104err_netdev_destroy:
4105	free_netdev(netdev);
4106err_free_region:
4107	ena_release_bars(ena_dev, pdev);
4108err_free_ena_dev:
4109	vfree(ena_dev);
4110err_disable_device:
4111	pci_disable_device(pdev);
4112	return rc;
4113}
4114
4115/*****************************************************************************/
4116
4117/* __ena_shutoff - Helper used in both PCI remove/shutdown routines
4118 * @pdev: PCI device information struct
4119 * @shutdown: Is it a shutdown operation? If false, means it is a removal
4120 *
4121 * __ena_shutoff is a helper routine that does the real work on shutdown and
4122 * removal paths; the difference between those paths is with regards to whether
4123 * dettach or unregister the netdevice.
4124 */
4125static void __ena_shutoff(struct pci_dev *pdev, bool shutdown)
4126{
4127	struct ena_adapter *adapter = pci_get_drvdata(pdev);
4128	struct ena_com_dev *ena_dev;
4129	struct net_device *netdev;
4130
4131	ena_dev = adapter->ena_dev;
4132	netdev = adapter->netdev;
4133
4134#ifdef CONFIG_RFS_ACCEL
4135	if ((adapter->msix_vecs >= 1) && (netdev->rx_cpu_rmap)) {
4136		free_irq_cpu_rmap(netdev->rx_cpu_rmap);
4137		netdev->rx_cpu_rmap = NULL;
4138	}
4139
4140#endif /* CONFIG_RFS_ACCEL */
 
4141	/* Make sure timer and reset routine won't be called after
4142	 * freeing device resources.
4143	 */
4144	del_timer_sync(&adapter->timer_service);
4145	cancel_work_sync(&adapter->reset_task);
4146
4147	rtnl_lock(); /* lock released inside the below if-else block */
4148	adapter->reset_reason = ENA_REGS_RESET_SHUTDOWN;
4149	ena_destroy_device(adapter, true);
4150
4151	if (shutdown) {
4152		netif_device_detach(netdev);
4153		dev_close(netdev);
4154		rtnl_unlock();
4155	} else {
4156		rtnl_unlock();
4157		unregister_netdev(netdev);
4158		free_netdev(netdev);
4159	}
4160
4161	ena_com_rss_destroy(ena_dev);
4162
4163	ena_com_delete_debug_area(ena_dev);
4164
4165	ena_com_delete_host_info(ena_dev);
4166
4167	ena_com_delete_customer_metrics_buffer(ena_dev);
4168
4169	ena_release_bars(ena_dev, pdev);
4170
4171	pci_disable_device(pdev);
4172
4173	vfree(ena_dev);
4174}
4175
4176/* ena_remove - Device Removal Routine
4177 * @pdev: PCI device information struct
4178 *
4179 * ena_remove is called by the PCI subsystem to alert the driver
4180 * that it should release a PCI device.
4181 */
4182
4183static void ena_remove(struct pci_dev *pdev)
4184{
4185	__ena_shutoff(pdev, false);
4186}
4187
4188/* ena_shutdown - Device Shutdown Routine
4189 * @pdev: PCI device information struct
4190 *
4191 * ena_shutdown is called by the PCI subsystem to alert the driver that
4192 * a shutdown/reboot (or kexec) is happening and device must be disabled.
4193 */
4194
4195static void ena_shutdown(struct pci_dev *pdev)
4196{
4197	__ena_shutoff(pdev, true);
4198}
4199
4200/* ena_suspend - PM suspend callback
4201 * @dev_d: Device information struct
4202 */
4203static int __maybe_unused ena_suspend(struct device *dev_d)
4204{
4205	struct pci_dev *pdev = to_pci_dev(dev_d);
4206	struct ena_adapter *adapter = pci_get_drvdata(pdev);
4207
4208	ena_increase_stat(&adapter->dev_stats.suspend, 1, &adapter->syncp);
4209
4210	rtnl_lock();
4211	if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
4212		dev_err(&pdev->dev,
4213			"Ignoring device reset request as the device is being suspended\n");
4214		clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
4215	}
4216	ena_destroy_device(adapter, true);
4217	rtnl_unlock();
4218	return 0;
4219}
4220
4221/* ena_resume - PM resume callback
4222 * @dev_d: Device information struct
4223 */
4224static int __maybe_unused ena_resume(struct device *dev_d)
4225{
4226	struct ena_adapter *adapter = dev_get_drvdata(dev_d);
4227	int rc;
4228
4229	ena_increase_stat(&adapter->dev_stats.resume, 1, &adapter->syncp);
4230
4231	rtnl_lock();
4232	rc = ena_restore_device(adapter);
4233	rtnl_unlock();
4234	return rc;
4235}
4236
4237static SIMPLE_DEV_PM_OPS(ena_pm_ops, ena_suspend, ena_resume);
4238
4239static struct pci_driver ena_pci_driver = {
4240	.name		= DRV_MODULE_NAME,
4241	.id_table	= ena_pci_tbl,
4242	.probe		= ena_probe,
4243	.remove		= ena_remove,
4244	.shutdown	= ena_shutdown,
4245	.driver.pm	= &ena_pm_ops,
4246	.sriov_configure = pci_sriov_configure_simple,
4247};
4248
4249static int __init ena_init(void)
4250{
4251	int ret;
4252
4253	ena_wq = create_singlethread_workqueue(DRV_MODULE_NAME);
4254	if (!ena_wq) {
4255		pr_err("Failed to create workqueue\n");
4256		return -ENOMEM;
4257	}
4258
4259	ret = pci_register_driver(&ena_pci_driver);
4260	if (ret)
4261		destroy_workqueue(ena_wq);
4262
4263	return ret;
4264}
4265
4266static void __exit ena_cleanup(void)
4267{
4268	pci_unregister_driver(&ena_pci_driver);
4269
4270	if (ena_wq) {
4271		destroy_workqueue(ena_wq);
4272		ena_wq = NULL;
4273	}
4274}
4275
4276/******************************************************************************
4277 ******************************** AENQ Handlers *******************************
4278 *****************************************************************************/
4279/* ena_update_on_link_change:
4280 * Notify the network interface about the change in link status
4281 */
4282static void ena_update_on_link_change(void *adapter_data,
4283				      struct ena_admin_aenq_entry *aenq_e)
4284{
4285	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
4286	struct ena_admin_aenq_link_change_desc *aenq_desc =
4287		(struct ena_admin_aenq_link_change_desc *)aenq_e;
4288	int status = aenq_desc->flags &
4289		ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK;
4290
4291	if (status) {
4292		netif_dbg(adapter, ifup, adapter->netdev, "%s\n", __func__);
4293		set_bit(ENA_FLAG_LINK_UP, &adapter->flags);
4294		if (!test_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags))
4295			netif_carrier_on(adapter->netdev);
4296	} else {
4297		clear_bit(ENA_FLAG_LINK_UP, &adapter->flags);
4298		netif_carrier_off(adapter->netdev);
4299	}
4300}
4301
4302static void ena_keep_alive_wd(void *adapter_data,
4303			      struct ena_admin_aenq_entry *aenq_e)
4304{
4305	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
4306	struct ena_admin_aenq_keep_alive_desc *desc;
4307	u64 rx_drops;
4308	u64 tx_drops;
4309
4310	desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e;
4311	adapter->last_keep_alive_jiffies = jiffies;
4312
4313	rx_drops = ((u64)desc->rx_drops_high << 32) | desc->rx_drops_low;
4314	tx_drops = ((u64)desc->tx_drops_high << 32) | desc->tx_drops_low;
4315
4316	u64_stats_update_begin(&adapter->syncp);
4317	/* These stats are accumulated by the device, so the counters indicate
4318	 * all drops since last reset.
4319	 */
4320	adapter->dev_stats.rx_drops = rx_drops;
4321	adapter->dev_stats.tx_drops = tx_drops;
4322	u64_stats_update_end(&adapter->syncp);
4323}
4324
4325static void ena_notification(void *adapter_data,
4326			     struct ena_admin_aenq_entry *aenq_e)
4327{
4328	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
4329	struct ena_admin_ena_hw_hints *hints;
4330
4331	WARN(aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION,
4332	     "Invalid group(%x) expected %x\n",
4333	     aenq_e->aenq_common_desc.group,
4334	     ENA_ADMIN_NOTIFICATION);
4335
4336	switch (aenq_e->aenq_common_desc.syndrome) {
4337	case ENA_ADMIN_UPDATE_HINTS:
4338		hints = (struct ena_admin_ena_hw_hints *)
4339			(&aenq_e->inline_data_w4);
4340		ena_update_hints(adapter, hints);
4341		break;
4342	default:
4343		netif_err(adapter, drv, adapter->netdev,
4344			  "Invalid aenq notification link state %d\n",
4345			  aenq_e->aenq_common_desc.syndrome);
4346	}
4347}
4348
4349/* This handler will called for unknown event group or unimplemented handlers*/
4350static void unimplemented_aenq_handler(void *data,
4351				       struct ena_admin_aenq_entry *aenq_e)
4352{
4353	struct ena_adapter *adapter = (struct ena_adapter *)data;
4354
4355	netif_err(adapter, drv, adapter->netdev,
4356		  "Unknown event was received or event with unimplemented handler\n");
4357}
4358
4359static struct ena_aenq_handlers aenq_handlers = {
4360	.handlers = {
4361		[ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change,
4362		[ENA_ADMIN_NOTIFICATION] = ena_notification,
4363		[ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd,
4364	},
4365	.unimplemented_handler = unimplemented_aenq_handler
4366};
4367
4368module_init(ena_init);
4369module_exit(ena_cleanup);
v6.2
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/*
   3 * Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
   4 */
   5
   6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   7
   8#ifdef CONFIG_RFS_ACCEL
   9#include <linux/cpu_rmap.h>
  10#endif /* CONFIG_RFS_ACCEL */
  11#include <linux/ethtool.h>
  12#include <linux/kernel.h>
  13#include <linux/module.h>
  14#include <linux/numa.h>
  15#include <linux/pci.h>
  16#include <linux/utsname.h>
  17#include <linux/version.h>
  18#include <linux/vmalloc.h>
  19#include <net/ip.h>
  20
  21#include "ena_netdev.h"
  22#include <linux/bpf_trace.h>
  23#include "ena_pci_id_tbl.h"
 
  24
  25MODULE_AUTHOR("Amazon.com, Inc. or its affiliates");
  26MODULE_DESCRIPTION(DEVICE_NAME);
  27MODULE_LICENSE("GPL");
  28
  29/* Time in jiffies before concluding the transmitter is hung. */
  30#define TX_TIMEOUT  (5 * HZ)
  31
  32#define ENA_MAX_RINGS min_t(unsigned int, ENA_MAX_NUM_IO_QUEUES, num_possible_cpus())
  33
  34#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_IFUP | \
  35		NETIF_MSG_TX_DONE | NETIF_MSG_TX_ERR | NETIF_MSG_RX_ERR)
  36
  37static struct ena_aenq_handlers aenq_handlers;
  38
  39static struct workqueue_struct *ena_wq;
  40
  41MODULE_DEVICE_TABLE(pci, ena_pci_tbl);
  42
  43static int ena_rss_init_default(struct ena_adapter *adapter);
  44static void check_for_admin_com_state(struct ena_adapter *adapter);
  45static void ena_destroy_device(struct ena_adapter *adapter, bool graceful);
  46static int ena_restore_device(struct ena_adapter *adapter);
  47
  48static void ena_init_io_rings(struct ena_adapter *adapter,
  49			      int first_index, int count);
  50static void ena_init_napi_in_range(struct ena_adapter *adapter, int first_index,
  51				   int count);
  52static void ena_del_napi_in_range(struct ena_adapter *adapter, int first_index,
  53				  int count);
  54static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid);
  55static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter,
  56					   int first_index,
  57					   int count);
  58static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid);
  59static void ena_free_tx_resources(struct ena_adapter *adapter, int qid);
  60static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget);
  61static void ena_destroy_all_tx_queues(struct ena_adapter *adapter);
  62static void ena_free_all_io_tx_resources(struct ena_adapter *adapter);
  63static void ena_napi_disable_in_range(struct ena_adapter *adapter,
  64				      int first_index, int count);
  65static void ena_napi_enable_in_range(struct ena_adapter *adapter,
  66				     int first_index, int count);
  67static int ena_up(struct ena_adapter *adapter);
  68static void ena_down(struct ena_adapter *adapter);
  69static void ena_unmask_interrupt(struct ena_ring *tx_ring,
  70				 struct ena_ring *rx_ring);
  71static void ena_update_ring_numa_node(struct ena_ring *tx_ring,
  72				      struct ena_ring *rx_ring);
  73static void ena_unmap_tx_buff(struct ena_ring *tx_ring,
  74			      struct ena_tx_buffer *tx_info);
  75static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter,
  76					    int first_index, int count);
  77
  78/* Increase a stat by cnt while holding syncp seqlock on 32bit machines */
  79static void ena_increase_stat(u64 *statp, u64 cnt,
  80			      struct u64_stats_sync *syncp)
  81{
  82	u64_stats_update_begin(syncp);
  83	(*statp) += cnt;
  84	u64_stats_update_end(syncp);
  85}
  86
  87static void ena_ring_tx_doorbell(struct ena_ring *tx_ring)
  88{
  89	ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
  90	ena_increase_stat(&tx_ring->tx_stats.doorbells, 1, &tx_ring->syncp);
  91}
  92
  93static void ena_tx_timeout(struct net_device *dev, unsigned int txqueue)
  94{
 
  95	struct ena_adapter *adapter = netdev_priv(dev);
 
 
 
 
 
 
 
 
  96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  97	/* Change the state of the device to trigger reset
  98	 * Check that we are not in the middle or a trigger already
  99	 */
 100
 101	if (test_and_set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
 102		return;
 103
 104	ena_reset_device(adapter, ENA_REGS_RESET_OS_NETDEV_WD);
 105	ena_increase_stat(&adapter->dev_stats.tx_timeout, 1, &adapter->syncp);
 106
 107	netif_err(adapter, tx_err, dev, "Transmit time out\n");
 108}
 109
 110static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu)
 111{
 112	int i;
 113
 114	for (i = 0; i < adapter->num_io_queues; i++)
 115		adapter->rx_ring[i].mtu = mtu;
 116}
 117
 118static int ena_change_mtu(struct net_device *dev, int new_mtu)
 119{
 120	struct ena_adapter *adapter = netdev_priv(dev);
 121	int ret;
 122
 123	ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
 124	if (!ret) {
 125		netif_dbg(adapter, drv, dev, "Set MTU to %d\n", new_mtu);
 126		update_rx_ring_mtu(adapter, new_mtu);
 127		dev->mtu = new_mtu;
 128	} else {
 129		netif_err(adapter, drv, dev, "Failed to set MTU to %d\n",
 130			  new_mtu);
 131	}
 132
 133	return ret;
 134}
 135
 136static int ena_xmit_common(struct net_device *dev,
 137			   struct ena_ring *ring,
 138			   struct ena_tx_buffer *tx_info,
 139			   struct ena_com_tx_ctx *ena_tx_ctx,
 140			   u16 next_to_use,
 141			   u32 bytes)
 142{
 143	struct ena_adapter *adapter = netdev_priv(dev);
 144	int rc, nb_hw_desc;
 145
 146	if (unlikely(ena_com_is_doorbell_needed(ring->ena_com_io_sq,
 147						ena_tx_ctx))) {
 148		netif_dbg(adapter, tx_queued, dev,
 149			  "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n",
 150			  ring->qid);
 151		ena_ring_tx_doorbell(ring);
 152	}
 153
 154	/* prepare the packet's descriptors to dma engine */
 155	rc = ena_com_prepare_tx(ring->ena_com_io_sq, ena_tx_ctx,
 156				&nb_hw_desc);
 157
 158	/* In case there isn't enough space in the queue for the packet,
 159	 * we simply drop it. All other failure reasons of
 160	 * ena_com_prepare_tx() are fatal and therefore require a device reset.
 161	 */
 162	if (unlikely(rc)) {
 163		netif_err(adapter, tx_queued, dev,
 164			  "Failed to prepare tx bufs\n");
 165		ena_increase_stat(&ring->tx_stats.prepare_ctx_err, 1,
 166				  &ring->syncp);
 167		if (rc != -ENOMEM)
 168			ena_reset_device(adapter,
 169					 ENA_REGS_RESET_DRIVER_INVALID_STATE);
 170		return rc;
 171	}
 172
 173	u64_stats_update_begin(&ring->syncp);
 174	ring->tx_stats.cnt++;
 175	ring->tx_stats.bytes += bytes;
 176	u64_stats_update_end(&ring->syncp);
 177
 178	tx_info->tx_descs = nb_hw_desc;
 
 179	tx_info->last_jiffies = jiffies;
 180	tx_info->print_once = 0;
 181
 182	ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
 183						 ring->ring_size);
 184	return 0;
 185}
 186
 187/* This is the XDP napi callback. XDP queues use a separate napi callback
 188 * than Rx/Tx queues.
 189 */
 190static int ena_xdp_io_poll(struct napi_struct *napi, int budget)
 191{
 192	struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
 193	u32 xdp_work_done, xdp_budget;
 194	struct ena_ring *xdp_ring;
 195	int napi_comp_call = 0;
 196	int ret;
 197
 198	xdp_ring = ena_napi->xdp_ring;
 199
 200	xdp_budget = budget;
 201
 202	if (!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags) ||
 203	    test_bit(ENA_FLAG_TRIGGER_RESET, &xdp_ring->adapter->flags)) {
 204		napi_complete_done(napi, 0);
 205		return 0;
 206	}
 207
 208	xdp_work_done = ena_clean_xdp_irq(xdp_ring, xdp_budget);
 209
 210	/* If the device is about to reset or down, avoid unmask
 211	 * the interrupt and return 0 so NAPI won't reschedule
 212	 */
 213	if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags))) {
 214		napi_complete_done(napi, 0);
 215		ret = 0;
 216	} else if (xdp_budget > xdp_work_done) {
 217		napi_comp_call = 1;
 218		if (napi_complete_done(napi, xdp_work_done))
 219			ena_unmask_interrupt(xdp_ring, NULL);
 220		ena_update_ring_numa_node(xdp_ring, NULL);
 221		ret = xdp_work_done;
 222	} else {
 223		ret = xdp_budget;
 224	}
 225
 226	u64_stats_update_begin(&xdp_ring->syncp);
 227	xdp_ring->tx_stats.napi_comp += napi_comp_call;
 228	xdp_ring->tx_stats.tx_poll++;
 229	u64_stats_update_end(&xdp_ring->syncp);
 230	xdp_ring->tx_stats.last_napi_jiffies = jiffies;
 231
 232	return ret;
 233}
 234
 235static int ena_xdp_tx_map_frame(struct ena_ring *xdp_ring,
 236				struct ena_tx_buffer *tx_info,
 237				struct xdp_frame *xdpf,
 238				struct ena_com_tx_ctx *ena_tx_ctx)
 239{
 240	struct ena_adapter *adapter = xdp_ring->adapter;
 241	struct ena_com_buf *ena_buf;
 242	int push_len = 0;
 243	dma_addr_t dma;
 244	void *data;
 245	u32 size;
 246
 247	tx_info->xdpf = xdpf;
 248	data = tx_info->xdpf->data;
 249	size = tx_info->xdpf->len;
 250
 251	if (xdp_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
 252		/* Designate part of the packet for LLQ */
 253		push_len = min_t(u32, size, xdp_ring->tx_max_header_size);
 254
 255		ena_tx_ctx->push_header = data;
 256
 257		size -= push_len;
 258		data += push_len;
 259	}
 260
 261	ena_tx_ctx->header_len = push_len;
 262
 263	if (size > 0) {
 264		dma = dma_map_single(xdp_ring->dev,
 265				     data,
 266				     size,
 267				     DMA_TO_DEVICE);
 268		if (unlikely(dma_mapping_error(xdp_ring->dev, dma)))
 269			goto error_report_dma_error;
 270
 271		tx_info->map_linear_data = 0;
 272
 273		ena_buf = tx_info->bufs;
 274		ena_buf->paddr = dma;
 275		ena_buf->len = size;
 276
 277		ena_tx_ctx->ena_bufs = ena_buf;
 278		ena_tx_ctx->num_bufs = tx_info->num_of_bufs = 1;
 279	}
 280
 281	return 0;
 282
 283error_report_dma_error:
 284	ena_increase_stat(&xdp_ring->tx_stats.dma_mapping_err, 1,
 285			  &xdp_ring->syncp);
 286	netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map xdp buff\n");
 287
 288	return -EINVAL;
 289}
 290
 291static int ena_xdp_xmit_frame(struct ena_ring *xdp_ring,
 292			      struct net_device *dev,
 293			      struct xdp_frame *xdpf,
 294			      int flags)
 295{
 296	struct ena_com_tx_ctx ena_tx_ctx = {};
 297	struct ena_tx_buffer *tx_info;
 298	u16 next_to_use, req_id;
 299	int rc;
 300
 301	next_to_use = xdp_ring->next_to_use;
 302	req_id = xdp_ring->free_ids[next_to_use];
 303	tx_info = &xdp_ring->tx_buffer_info[req_id];
 304	tx_info->num_of_bufs = 0;
 305
 306	rc = ena_xdp_tx_map_frame(xdp_ring, tx_info, xdpf, &ena_tx_ctx);
 307	if (unlikely(rc))
 308		return rc;
 309
 310	ena_tx_ctx.req_id = req_id;
 311
 312	rc = ena_xmit_common(dev,
 313			     xdp_ring,
 314			     tx_info,
 315			     &ena_tx_ctx,
 316			     next_to_use,
 317			     xdpf->len);
 318	if (rc)
 319		goto error_unmap_dma;
 320
 321	/* trigger the dma engine. ena_ring_tx_doorbell()
 322	 * calls a memory barrier inside it.
 323	 */
 324	if (flags & XDP_XMIT_FLUSH)
 325		ena_ring_tx_doorbell(xdp_ring);
 326
 327	return rc;
 328
 329error_unmap_dma:
 330	ena_unmap_tx_buff(xdp_ring, tx_info);
 331	tx_info->xdpf = NULL;
 332	return rc;
 333}
 334
 335static int ena_xdp_xmit(struct net_device *dev, int n,
 336			struct xdp_frame **frames, u32 flags)
 337{
 338	struct ena_adapter *adapter = netdev_priv(dev);
 339	struct ena_ring *xdp_ring;
 340	int qid, i, nxmit = 0;
 341
 342	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
 343		return -EINVAL;
 344
 345	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
 346		return -ENETDOWN;
 347
 348	/* We assume that all rings have the same XDP program */
 349	if (!READ_ONCE(adapter->rx_ring->xdp_bpf_prog))
 350		return -ENXIO;
 351
 352	qid = smp_processor_id() % adapter->xdp_num_queues;
 353	qid += adapter->xdp_first_ring;
 354	xdp_ring = &adapter->tx_ring[qid];
 355
 356	/* Other CPU ids might try to send thorugh this queue */
 357	spin_lock(&xdp_ring->xdp_tx_lock);
 358
 359	for (i = 0; i < n; i++) {
 360		if (ena_xdp_xmit_frame(xdp_ring, dev, frames[i], 0))
 361			break;
 362		nxmit++;
 363	}
 364
 365	/* Ring doorbell to make device aware of the packets */
 366	if (flags & XDP_XMIT_FLUSH)
 367		ena_ring_tx_doorbell(xdp_ring);
 368
 369	spin_unlock(&xdp_ring->xdp_tx_lock);
 370
 371	/* Return number of packets sent */
 372	return nxmit;
 373}
 374
 375static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp)
 376{
 377	u32 verdict = ENA_XDP_PASS;
 378	struct bpf_prog *xdp_prog;
 379	struct ena_ring *xdp_ring;
 380	struct xdp_frame *xdpf;
 381	u64 *xdp_stat;
 382
 383	xdp_prog = READ_ONCE(rx_ring->xdp_bpf_prog);
 384
 385	if (!xdp_prog)
 386		goto out;
 387
 388	verdict = bpf_prog_run_xdp(xdp_prog, xdp);
 389
 390	switch (verdict) {
 391	case XDP_TX:
 392		xdpf = xdp_convert_buff_to_frame(xdp);
 393		if (unlikely(!xdpf)) {
 394			trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
 395			xdp_stat = &rx_ring->rx_stats.xdp_aborted;
 396			verdict = ENA_XDP_DROP;
 397			break;
 398		}
 399
 400		/* Find xmit queue */
 401		xdp_ring = rx_ring->xdp_ring;
 402
 403		/* The XDP queues are shared between XDP_TX and XDP_REDIRECT */
 404		spin_lock(&xdp_ring->xdp_tx_lock);
 405
 406		if (ena_xdp_xmit_frame(xdp_ring, rx_ring->netdev, xdpf,
 407				       XDP_XMIT_FLUSH))
 408			xdp_return_frame(xdpf);
 409
 410		spin_unlock(&xdp_ring->xdp_tx_lock);
 411		xdp_stat = &rx_ring->rx_stats.xdp_tx;
 412		verdict = ENA_XDP_TX;
 413		break;
 414	case XDP_REDIRECT:
 415		if (likely(!xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog))) {
 416			xdp_stat = &rx_ring->rx_stats.xdp_redirect;
 417			verdict = ENA_XDP_REDIRECT;
 418			break;
 419		}
 420		trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
 421		xdp_stat = &rx_ring->rx_stats.xdp_aborted;
 422		verdict = ENA_XDP_DROP;
 423		break;
 424	case XDP_ABORTED:
 425		trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
 426		xdp_stat = &rx_ring->rx_stats.xdp_aborted;
 427		verdict = ENA_XDP_DROP;
 428		break;
 429	case XDP_DROP:
 430		xdp_stat = &rx_ring->rx_stats.xdp_drop;
 431		verdict = ENA_XDP_DROP;
 432		break;
 433	case XDP_PASS:
 434		xdp_stat = &rx_ring->rx_stats.xdp_pass;
 435		verdict = ENA_XDP_PASS;
 436		break;
 437	default:
 438		bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, verdict);
 439		xdp_stat = &rx_ring->rx_stats.xdp_invalid;
 440		verdict = ENA_XDP_DROP;
 441	}
 442
 443	ena_increase_stat(xdp_stat, 1, &rx_ring->syncp);
 444out:
 445	return verdict;
 446}
 447
 448static void ena_init_all_xdp_queues(struct ena_adapter *adapter)
 449{
 450	adapter->xdp_first_ring = adapter->num_io_queues;
 451	adapter->xdp_num_queues = adapter->num_io_queues;
 452
 453	ena_init_io_rings(adapter,
 454			  adapter->xdp_first_ring,
 455			  adapter->xdp_num_queues);
 456}
 457
 458static int ena_setup_and_create_all_xdp_queues(struct ena_adapter *adapter)
 459{
 460	int rc = 0;
 461
 462	rc = ena_setup_tx_resources_in_range(adapter, adapter->xdp_first_ring,
 463					     adapter->xdp_num_queues);
 464	if (rc)
 465		goto setup_err;
 466
 467	rc = ena_create_io_tx_queues_in_range(adapter,
 468					      adapter->xdp_first_ring,
 469					      adapter->xdp_num_queues);
 470	if (rc)
 471		goto create_err;
 472
 473	return 0;
 474
 475create_err:
 476	ena_free_all_io_tx_resources(adapter);
 477setup_err:
 478	return rc;
 479}
 480
 481/* Provides a way for both kernel and bpf-prog to know
 482 * more about the RX-queue a given XDP frame arrived on.
 483 */
 484static int ena_xdp_register_rxq_info(struct ena_ring *rx_ring)
 485{
 486	int rc;
 487
 488	rc = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, rx_ring->qid, 0);
 489
 490	if (rc) {
 491		netif_err(rx_ring->adapter, ifup, rx_ring->netdev,
 492			  "Failed to register xdp rx queue info. RX queue num %d rc: %d\n",
 493			  rx_ring->qid, rc);
 494		goto err;
 495	}
 496
 497	rc = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, MEM_TYPE_PAGE_SHARED,
 498					NULL);
 499
 500	if (rc) {
 501		netif_err(rx_ring->adapter, ifup, rx_ring->netdev,
 502			  "Failed to register xdp rx queue info memory model. RX queue num %d rc: %d\n",
 503			  rx_ring->qid, rc);
 504		xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
 505	}
 506
 507err:
 508	return rc;
 509}
 510
 511static void ena_xdp_unregister_rxq_info(struct ena_ring *rx_ring)
 512{
 513	xdp_rxq_info_unreg_mem_model(&rx_ring->xdp_rxq);
 514	xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
 515}
 516
 517static void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter,
 518						 struct bpf_prog *prog,
 519						 int first, int count)
 520{
 521	struct bpf_prog *old_bpf_prog;
 522	struct ena_ring *rx_ring;
 523	int i = 0;
 524
 525	for (i = first; i < count; i++) {
 526		rx_ring = &adapter->rx_ring[i];
 527		old_bpf_prog = xchg(&rx_ring->xdp_bpf_prog, prog);
 528
 529		if (!old_bpf_prog && prog) {
 530			ena_xdp_register_rxq_info(rx_ring);
 531			rx_ring->rx_headroom = XDP_PACKET_HEADROOM;
 532		} else if (old_bpf_prog && !prog) {
 533			ena_xdp_unregister_rxq_info(rx_ring);
 534			rx_ring->rx_headroom = NET_SKB_PAD;
 535		}
 536	}
 537}
 538
 539static void ena_xdp_exchange_program(struct ena_adapter *adapter,
 540				     struct bpf_prog *prog)
 541{
 542	struct bpf_prog *old_bpf_prog = xchg(&adapter->xdp_bpf_prog, prog);
 543
 544	ena_xdp_exchange_program_rx_in_range(adapter,
 545					     prog,
 546					     0,
 547					     adapter->num_io_queues);
 548
 549	if (old_bpf_prog)
 550		bpf_prog_put(old_bpf_prog);
 551}
 552
 553static int ena_destroy_and_free_all_xdp_queues(struct ena_adapter *adapter)
 554{
 555	bool was_up;
 556	int rc;
 557
 558	was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
 559
 560	if (was_up)
 561		ena_down(adapter);
 562
 563	adapter->xdp_first_ring = 0;
 564	adapter->xdp_num_queues = 0;
 565	ena_xdp_exchange_program(adapter, NULL);
 566	if (was_up) {
 567		rc = ena_up(adapter);
 568		if (rc)
 569			return rc;
 570	}
 571	return 0;
 572}
 573
 574static int ena_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf)
 575{
 576	struct ena_adapter *adapter = netdev_priv(netdev);
 577	struct bpf_prog *prog = bpf->prog;
 578	struct bpf_prog *old_bpf_prog;
 579	int rc, prev_mtu;
 580	bool is_up;
 581
 582	is_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
 583	rc = ena_xdp_allowed(adapter);
 584	if (rc == ENA_XDP_ALLOWED) {
 585		old_bpf_prog = adapter->xdp_bpf_prog;
 586		if (prog) {
 587			if (!is_up) {
 588				ena_init_all_xdp_queues(adapter);
 589			} else if (!old_bpf_prog) {
 590				ena_down(adapter);
 591				ena_init_all_xdp_queues(adapter);
 592			}
 593			ena_xdp_exchange_program(adapter, prog);
 594
 595			if (is_up && !old_bpf_prog) {
 596				rc = ena_up(adapter);
 597				if (rc)
 598					return rc;
 599			}
 600		} else if (old_bpf_prog) {
 601			rc = ena_destroy_and_free_all_xdp_queues(adapter);
 602			if (rc)
 603				return rc;
 604		}
 605
 606		prev_mtu = netdev->max_mtu;
 607		netdev->max_mtu = prog ? ENA_XDP_MAX_MTU : adapter->max_mtu;
 608
 609		if (!old_bpf_prog)
 610			netif_info(adapter, drv, adapter->netdev,
 611				   "XDP program is set, changing the max_mtu from %d to %d",
 612				   prev_mtu, netdev->max_mtu);
 613
 614	} else if (rc == ENA_XDP_CURRENT_MTU_TOO_LARGE) {
 615		netif_err(adapter, drv, adapter->netdev,
 616			  "Failed to set xdp program, the current MTU (%d) is larger than the maximum allowed MTU (%lu) while xdp is on",
 617			  netdev->mtu, ENA_XDP_MAX_MTU);
 618		NL_SET_ERR_MSG_MOD(bpf->extack,
 619				   "Failed to set xdp program, the current MTU is larger than the maximum allowed MTU. Check the dmesg for more info");
 620		return -EINVAL;
 621	} else if (rc == ENA_XDP_NO_ENOUGH_QUEUES) {
 622		netif_err(adapter, drv, adapter->netdev,
 623			  "Failed to set xdp program, the Rx/Tx channel count should be at most half of the maximum allowed channel count. The current queue count (%d), the maximal queue count (%d)\n",
 624			  adapter->num_io_queues, adapter->max_num_io_queues);
 625		NL_SET_ERR_MSG_MOD(bpf->extack,
 626				   "Failed to set xdp program, there is no enough space for allocating XDP queues, Check the dmesg for more info");
 627		return -EINVAL;
 628	}
 629
 630	return 0;
 631}
 632
 633/* This is the main xdp callback, it's used by the kernel to set/unset the xdp
 634 * program as well as to query the current xdp program id.
 635 */
 636static int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf)
 637{
 638	switch (bpf->command) {
 639	case XDP_SETUP_PROG:
 640		return ena_xdp_set(netdev, bpf);
 641	default:
 642		return -EINVAL;
 643	}
 644	return 0;
 645}
 646
 647static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter)
 648{
 649#ifdef CONFIG_RFS_ACCEL
 650	u32 i;
 651	int rc;
 652
 653	adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_io_queues);
 654	if (!adapter->netdev->rx_cpu_rmap)
 655		return -ENOMEM;
 656	for (i = 0; i < adapter->num_io_queues; i++) {
 657		int irq_idx = ENA_IO_IRQ_IDX(i);
 658
 659		rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap,
 660				      pci_irq_vector(adapter->pdev, irq_idx));
 661		if (rc) {
 662			free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
 663			adapter->netdev->rx_cpu_rmap = NULL;
 664			return rc;
 665		}
 666	}
 667#endif /* CONFIG_RFS_ACCEL */
 668	return 0;
 669}
 670
 671static void ena_init_io_rings_common(struct ena_adapter *adapter,
 672				     struct ena_ring *ring, u16 qid)
 673{
 674	ring->qid = qid;
 675	ring->pdev = adapter->pdev;
 676	ring->dev = &adapter->pdev->dev;
 677	ring->netdev = adapter->netdev;
 678	ring->napi = &adapter->ena_napi[qid].napi;
 679	ring->adapter = adapter;
 680	ring->ena_dev = adapter->ena_dev;
 681	ring->per_napi_packets = 0;
 682	ring->cpu = 0;
 683	ring->numa_node = 0;
 684	ring->no_interrupt_event_cnt = 0;
 685	u64_stats_init(&ring->syncp);
 686}
 687
 688static void ena_init_io_rings(struct ena_adapter *adapter,
 689			      int first_index, int count)
 690{
 691	struct ena_com_dev *ena_dev;
 692	struct ena_ring *txr, *rxr;
 693	int i;
 694
 695	ena_dev = adapter->ena_dev;
 696
 697	for (i = first_index; i < first_index + count; i++) {
 698		txr = &adapter->tx_ring[i];
 699		rxr = &adapter->rx_ring[i];
 700
 701		/* TX common ring state */
 702		ena_init_io_rings_common(adapter, txr, i);
 703
 704		/* TX specific ring state */
 705		txr->ring_size = adapter->requested_tx_ring_size;
 706		txr->tx_max_header_size = ena_dev->tx_max_header_size;
 707		txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
 708		txr->sgl_size = adapter->max_tx_sgl_size;
 709		txr->smoothed_interval =
 710			ena_com_get_nonadaptive_moderation_interval_tx(ena_dev);
 711		txr->disable_meta_caching = adapter->disable_meta_caching;
 712		spin_lock_init(&txr->xdp_tx_lock);
 713
 714		/* Don't init RX queues for xdp queues */
 715		if (!ENA_IS_XDP_INDEX(adapter, i)) {
 716			/* RX common ring state */
 717			ena_init_io_rings_common(adapter, rxr, i);
 718
 719			/* RX specific ring state */
 720			rxr->ring_size = adapter->requested_rx_ring_size;
 721			rxr->rx_copybreak = adapter->rx_copybreak;
 722			rxr->sgl_size = adapter->max_rx_sgl_size;
 723			rxr->smoothed_interval =
 724				ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
 725			rxr->empty_rx_queue = 0;
 726			rxr->rx_headroom = NET_SKB_PAD;
 727			adapter->ena_napi[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
 728			rxr->xdp_ring = &adapter->tx_ring[i + adapter->num_io_queues];
 729		}
 730	}
 731}
 732
 733/* ena_setup_tx_resources - allocate I/O Tx resources (Descriptors)
 734 * @adapter: network interface device structure
 735 * @qid: queue index
 736 *
 737 * Return 0 on success, negative on failure
 738 */
 739static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
 740{
 741	struct ena_ring *tx_ring = &adapter->tx_ring[qid];
 742	struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)];
 743	int size, i, node;
 744
 745	if (tx_ring->tx_buffer_info) {
 746		netif_err(adapter, ifup,
 747			  adapter->netdev, "tx_buffer_info info is not NULL");
 748		return -EEXIST;
 749	}
 750
 751	size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size;
 752	node = cpu_to_node(ena_irq->cpu);
 753
 754	tx_ring->tx_buffer_info = vzalloc_node(size, node);
 755	if (!tx_ring->tx_buffer_info) {
 756		tx_ring->tx_buffer_info = vzalloc(size);
 757		if (!tx_ring->tx_buffer_info)
 758			goto err_tx_buffer_info;
 759	}
 760
 761	size = sizeof(u16) * tx_ring->ring_size;
 762	tx_ring->free_ids = vzalloc_node(size, node);
 763	if (!tx_ring->free_ids) {
 764		tx_ring->free_ids = vzalloc(size);
 765		if (!tx_ring->free_ids)
 766			goto err_tx_free_ids;
 767	}
 768
 769	size = tx_ring->tx_max_header_size;
 770	tx_ring->push_buf_intermediate_buf = vzalloc_node(size, node);
 771	if (!tx_ring->push_buf_intermediate_buf) {
 772		tx_ring->push_buf_intermediate_buf = vzalloc(size);
 773		if (!tx_ring->push_buf_intermediate_buf)
 774			goto err_push_buf_intermediate_buf;
 775	}
 776
 777	/* Req id ring for TX out of order completions */
 778	for (i = 0; i < tx_ring->ring_size; i++)
 779		tx_ring->free_ids[i] = i;
 780
 781	/* Reset tx statistics */
 782	memset(&tx_ring->tx_stats, 0x0, sizeof(tx_ring->tx_stats));
 783
 784	tx_ring->next_to_use = 0;
 785	tx_ring->next_to_clean = 0;
 786	tx_ring->cpu = ena_irq->cpu;
 787	tx_ring->numa_node = node;
 788	return 0;
 789
 790err_push_buf_intermediate_buf:
 791	vfree(tx_ring->free_ids);
 792	tx_ring->free_ids = NULL;
 793err_tx_free_ids:
 794	vfree(tx_ring->tx_buffer_info);
 795	tx_ring->tx_buffer_info = NULL;
 796err_tx_buffer_info:
 797	return -ENOMEM;
 798}
 799
 800/* ena_free_tx_resources - Free I/O Tx Resources per Queue
 801 * @adapter: network interface device structure
 802 * @qid: queue index
 803 *
 804 * Free all transmit software resources
 805 */
 806static void ena_free_tx_resources(struct ena_adapter *adapter, int qid)
 807{
 808	struct ena_ring *tx_ring = &adapter->tx_ring[qid];
 809
 810	vfree(tx_ring->tx_buffer_info);
 811	tx_ring->tx_buffer_info = NULL;
 812
 813	vfree(tx_ring->free_ids);
 814	tx_ring->free_ids = NULL;
 815
 816	vfree(tx_ring->push_buf_intermediate_buf);
 817	tx_ring->push_buf_intermediate_buf = NULL;
 818}
 819
 820static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter,
 821					   int first_index,
 822					   int count)
 823{
 824	int i, rc = 0;
 825
 826	for (i = first_index; i < first_index + count; i++) {
 827		rc = ena_setup_tx_resources(adapter, i);
 828		if (rc)
 829			goto err_setup_tx;
 830	}
 831
 832	return 0;
 833
 834err_setup_tx:
 835
 836	netif_err(adapter, ifup, adapter->netdev,
 837		  "Tx queue %d: allocation failed\n", i);
 838
 839	/* rewind the index freeing the rings as we go */
 840	while (first_index < i--)
 841		ena_free_tx_resources(adapter, i);
 842	return rc;
 843}
 844
 845static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter,
 846						  int first_index, int count)
 847{
 848	int i;
 849
 850	for (i = first_index; i < first_index + count; i++)
 851		ena_free_tx_resources(adapter, i);
 852}
 853
 854/* ena_free_all_io_tx_resources - Free I/O Tx Resources for All Queues
 855 * @adapter: board private structure
 856 *
 857 * Free all transmit software resources
 858 */
 859static void ena_free_all_io_tx_resources(struct ena_adapter *adapter)
 860{
 861	ena_free_all_io_tx_resources_in_range(adapter,
 862					      0,
 863					      adapter->xdp_num_queues +
 864					      adapter->num_io_queues);
 865}
 866
 867/* ena_setup_rx_resources - allocate I/O Rx resources (Descriptors)
 868 * @adapter: network interface device structure
 869 * @qid: queue index
 870 *
 871 * Returns 0 on success, negative on failure
 872 */
 873static int ena_setup_rx_resources(struct ena_adapter *adapter,
 874				  u32 qid)
 875{
 876	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
 877	struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)];
 878	int size, node, i;
 879
 880	if (rx_ring->rx_buffer_info) {
 881		netif_err(adapter, ifup, adapter->netdev,
 882			  "rx_buffer_info is not NULL");
 883		return -EEXIST;
 884	}
 885
 886	/* alloc extra element so in rx path
 887	 * we can always prefetch rx_info + 1
 888	 */
 889	size = sizeof(struct ena_rx_buffer) * (rx_ring->ring_size + 1);
 890	node = cpu_to_node(ena_irq->cpu);
 891
 892	rx_ring->rx_buffer_info = vzalloc_node(size, node);
 893	if (!rx_ring->rx_buffer_info) {
 894		rx_ring->rx_buffer_info = vzalloc(size);
 895		if (!rx_ring->rx_buffer_info)
 896			return -ENOMEM;
 897	}
 898
 899	size = sizeof(u16) * rx_ring->ring_size;
 900	rx_ring->free_ids = vzalloc_node(size, node);
 901	if (!rx_ring->free_ids) {
 902		rx_ring->free_ids = vzalloc(size);
 903		if (!rx_ring->free_ids) {
 904			vfree(rx_ring->rx_buffer_info);
 905			rx_ring->rx_buffer_info = NULL;
 906			return -ENOMEM;
 907		}
 908	}
 909
 910	/* Req id ring for receiving RX pkts out of order */
 911	for (i = 0; i < rx_ring->ring_size; i++)
 912		rx_ring->free_ids[i] = i;
 913
 914	/* Reset rx statistics */
 915	memset(&rx_ring->rx_stats, 0x0, sizeof(rx_ring->rx_stats));
 916
 917	rx_ring->next_to_clean = 0;
 918	rx_ring->next_to_use = 0;
 919	rx_ring->cpu = ena_irq->cpu;
 920	rx_ring->numa_node = node;
 921
 922	return 0;
 923}
 924
 925/* ena_free_rx_resources - Free I/O Rx Resources
 926 * @adapter: network interface device structure
 927 * @qid: queue index
 928 *
 929 * Free all receive software resources
 930 */
 931static void ena_free_rx_resources(struct ena_adapter *adapter,
 932				  u32 qid)
 933{
 934	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
 935
 936	vfree(rx_ring->rx_buffer_info);
 937	rx_ring->rx_buffer_info = NULL;
 938
 939	vfree(rx_ring->free_ids);
 940	rx_ring->free_ids = NULL;
 941}
 942
 943/* ena_setup_all_rx_resources - allocate I/O Rx queues resources for all queues
 944 * @adapter: board private structure
 945 *
 946 * Return 0 on success, negative on failure
 947 */
 948static int ena_setup_all_rx_resources(struct ena_adapter *adapter)
 949{
 950	int i, rc = 0;
 951
 952	for (i = 0; i < adapter->num_io_queues; i++) {
 953		rc = ena_setup_rx_resources(adapter, i);
 954		if (rc)
 955			goto err_setup_rx;
 956	}
 957
 958	return 0;
 959
 960err_setup_rx:
 961
 962	netif_err(adapter, ifup, adapter->netdev,
 963		  "Rx queue %d: allocation failed\n", i);
 964
 965	/* rewind the index freeing the rings as we go */
 966	while (i--)
 967		ena_free_rx_resources(adapter, i);
 968	return rc;
 969}
 970
 971/* ena_free_all_io_rx_resources - Free I/O Rx Resources for All Queues
 972 * @adapter: board private structure
 973 *
 974 * Free all receive software resources
 975 */
 976static void ena_free_all_io_rx_resources(struct ena_adapter *adapter)
 977{
 978	int i;
 979
 980	for (i = 0; i < adapter->num_io_queues; i++)
 981		ena_free_rx_resources(adapter, i);
 982}
 983
 984static struct page *ena_alloc_map_page(struct ena_ring *rx_ring,
 985				       dma_addr_t *dma)
 986{
 987	struct page *page;
 988
 989	/* This would allocate the page on the same NUMA node the executing code
 990	 * is running on.
 991	 */
 992	page = dev_alloc_page();
 993	if (!page) {
 994		ena_increase_stat(&rx_ring->rx_stats.page_alloc_fail, 1,
 995				  &rx_ring->syncp);
 996		return ERR_PTR(-ENOSPC);
 997	}
 998
 999	/* To enable NIC-side port-mirroring, AKA SPAN port,
1000	 * we make the buffer readable from the nic as well
1001	 */
1002	*dma = dma_map_page(rx_ring->dev, page, 0, ENA_PAGE_SIZE,
1003			    DMA_BIDIRECTIONAL);
1004	if (unlikely(dma_mapping_error(rx_ring->dev, *dma))) {
1005		ena_increase_stat(&rx_ring->rx_stats.dma_mapping_err, 1,
1006				  &rx_ring->syncp);
1007		__free_page(page);
1008		return ERR_PTR(-EIO);
1009	}
1010
1011	return page;
1012}
1013
1014static int ena_alloc_rx_buffer(struct ena_ring *rx_ring,
1015			       struct ena_rx_buffer *rx_info)
1016{
1017	int headroom = rx_ring->rx_headroom;
1018	struct ena_com_buf *ena_buf;
1019	struct page *page;
1020	dma_addr_t dma;
1021	int tailroom;
1022
1023	/* restore page offset value in case it has been changed by device */
1024	rx_info->page_offset = headroom;
1025
1026	/* if previous allocated page is not used */
1027	if (unlikely(rx_info->page))
1028		return 0;
1029
1030	/* We handle DMA here */
1031	page = ena_alloc_map_page(rx_ring, &dma);
1032	if (unlikely(IS_ERR(page)))
1033		return PTR_ERR(page);
1034
1035	netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1036		  "Allocate page %p, rx_info %p\n", page, rx_info);
1037
1038	tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1039
1040	rx_info->page = page;
 
 
1041	ena_buf = &rx_info->ena_buf;
1042	ena_buf->paddr = dma + headroom;
1043	ena_buf->len = ENA_PAGE_SIZE - headroom - tailroom;
1044
1045	return 0;
1046}
1047
1048static void ena_unmap_rx_buff(struct ena_ring *rx_ring,
1049			      struct ena_rx_buffer *rx_info)
 
1050{
1051	struct ena_com_buf *ena_buf = &rx_info->ena_buf;
1052
1053	dma_unmap_page(rx_ring->dev, ena_buf->paddr - rx_ring->rx_headroom,
1054		       ENA_PAGE_SIZE,
1055		       DMA_BIDIRECTIONAL);
1056}
1057
1058static void ena_free_rx_page(struct ena_ring *rx_ring,
1059			     struct ena_rx_buffer *rx_info)
1060{
1061	struct page *page = rx_info->page;
1062
1063	if (unlikely(!page)) {
1064		netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
1065			   "Trying to free unallocated buffer\n");
1066		return;
1067	}
1068
1069	ena_unmap_rx_buff(rx_ring, rx_info);
1070
1071	__free_page(page);
1072	rx_info->page = NULL;
1073}
1074
1075static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num)
1076{
1077	u16 next_to_use, req_id;
1078	u32 i;
1079	int rc;
1080
1081	next_to_use = rx_ring->next_to_use;
1082
1083	for (i = 0; i < num; i++) {
1084		struct ena_rx_buffer *rx_info;
1085
1086		req_id = rx_ring->free_ids[next_to_use];
1087
1088		rx_info = &rx_ring->rx_buffer_info[req_id];
1089
1090		rc = ena_alloc_rx_buffer(rx_ring, rx_info);
1091		if (unlikely(rc < 0)) {
1092			netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
1093				   "Failed to allocate buffer for rx queue %d\n",
1094				   rx_ring->qid);
1095			break;
1096		}
1097		rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq,
1098						&rx_info->ena_buf,
1099						req_id);
1100		if (unlikely(rc)) {
1101			netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
1102				   "Failed to add buffer for rx queue %d\n",
1103				   rx_ring->qid);
1104			break;
1105		}
1106		next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use,
1107						   rx_ring->ring_size);
1108	}
1109
1110	if (unlikely(i < num)) {
1111		ena_increase_stat(&rx_ring->rx_stats.refil_partial, 1,
1112				  &rx_ring->syncp);
1113		netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
1114			   "Refilled rx qid %d with only %d buffers (from %d)\n",
1115			   rx_ring->qid, i, num);
1116	}
1117
1118	/* ena_com_write_sq_doorbell issues a wmb() */
1119	if (likely(i))
1120		ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
1121
1122	rx_ring->next_to_use = next_to_use;
1123
1124	return i;
1125}
1126
1127static void ena_free_rx_bufs(struct ena_adapter *adapter,
1128			     u32 qid)
1129{
1130	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
1131	u32 i;
1132
1133	for (i = 0; i < rx_ring->ring_size; i++) {
1134		struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i];
1135
1136		if (rx_info->page)
1137			ena_free_rx_page(rx_ring, rx_info);
1138	}
1139}
1140
1141/* ena_refill_all_rx_bufs - allocate all queues Rx buffers
1142 * @adapter: board private structure
1143 */
1144static void ena_refill_all_rx_bufs(struct ena_adapter *adapter)
1145{
1146	struct ena_ring *rx_ring;
1147	int i, rc, bufs_num;
1148
1149	for (i = 0; i < adapter->num_io_queues; i++) {
1150		rx_ring = &adapter->rx_ring[i];
1151		bufs_num = rx_ring->ring_size - 1;
1152		rc = ena_refill_rx_bufs(rx_ring, bufs_num);
1153
1154		if (unlikely(rc != bufs_num))
1155			netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
1156				   "Refilling Queue %d failed. allocated %d buffers from: %d\n",
1157				   i, rc, bufs_num);
1158	}
1159}
1160
1161static void ena_free_all_rx_bufs(struct ena_adapter *adapter)
1162{
1163	int i;
1164
1165	for (i = 0; i < adapter->num_io_queues; i++)
1166		ena_free_rx_bufs(adapter, i);
1167}
1168
1169static void ena_unmap_tx_buff(struct ena_ring *tx_ring,
1170			      struct ena_tx_buffer *tx_info)
1171{
1172	struct ena_com_buf *ena_buf;
1173	u32 cnt;
1174	int i;
1175
1176	ena_buf = tx_info->bufs;
1177	cnt = tx_info->num_of_bufs;
1178
1179	if (unlikely(!cnt))
1180		return;
1181
1182	if (tx_info->map_linear_data) {
1183		dma_unmap_single(tx_ring->dev,
1184				 dma_unmap_addr(ena_buf, paddr),
1185				 dma_unmap_len(ena_buf, len),
1186				 DMA_TO_DEVICE);
1187		ena_buf++;
1188		cnt--;
1189	}
1190
1191	/* unmap remaining mapped pages */
1192	for (i = 0; i < cnt; i++) {
1193		dma_unmap_page(tx_ring->dev, dma_unmap_addr(ena_buf, paddr),
1194			       dma_unmap_len(ena_buf, len), DMA_TO_DEVICE);
1195		ena_buf++;
1196	}
1197}
1198
1199/* ena_free_tx_bufs - Free Tx Buffers per Queue
1200 * @tx_ring: TX ring for which buffers be freed
1201 */
1202static void ena_free_tx_bufs(struct ena_ring *tx_ring)
1203{
1204	bool print_once = true;
 
1205	u32 i;
1206
 
 
1207	for (i = 0; i < tx_ring->ring_size; i++) {
1208		struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
1209
1210		if (!tx_info->skb)
1211			continue;
1212
1213		if (print_once) {
1214			netif_notice(tx_ring->adapter, ifdown, tx_ring->netdev,
1215				     "Free uncompleted tx skb qid %d idx 0x%x\n",
1216				     tx_ring->qid, i);
1217			print_once = false;
1218		} else {
1219			netif_dbg(tx_ring->adapter, ifdown, tx_ring->netdev,
1220				  "Free uncompleted tx skb qid %d idx 0x%x\n",
1221				  tx_ring->qid, i);
1222		}
1223
1224		ena_unmap_tx_buff(tx_ring, tx_info);
1225
1226		dev_kfree_skb_any(tx_info->skb);
 
 
 
1227	}
1228	netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
1229						  tx_ring->qid));
 
 
1230}
1231
1232static void ena_free_all_tx_bufs(struct ena_adapter *adapter)
1233{
1234	struct ena_ring *tx_ring;
1235	int i;
1236
1237	for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) {
1238		tx_ring = &adapter->tx_ring[i];
1239		ena_free_tx_bufs(tx_ring);
1240	}
1241}
1242
1243static void ena_destroy_all_tx_queues(struct ena_adapter *adapter)
1244{
1245	u16 ena_qid;
1246	int i;
1247
1248	for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) {
1249		ena_qid = ENA_IO_TXQ_IDX(i);
1250		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
1251	}
1252}
1253
1254static void ena_destroy_all_rx_queues(struct ena_adapter *adapter)
1255{
1256	u16 ena_qid;
1257	int i;
1258
1259	for (i = 0; i < adapter->num_io_queues; i++) {
1260		ena_qid = ENA_IO_RXQ_IDX(i);
1261		cancel_work_sync(&adapter->ena_napi[i].dim.work);
 
1262		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
1263	}
1264}
1265
1266static void ena_destroy_all_io_queues(struct ena_adapter *adapter)
1267{
1268	ena_destroy_all_tx_queues(adapter);
1269	ena_destroy_all_rx_queues(adapter);
1270}
1271
1272static int handle_invalid_req_id(struct ena_ring *ring, u16 req_id,
1273				 struct ena_tx_buffer *tx_info, bool is_xdp)
1274{
1275	if (tx_info)
1276		netif_err(ring->adapter,
1277			  tx_done,
1278			  ring->netdev,
1279			  "tx_info doesn't have valid %s. qid %u req_id %u",
1280			   is_xdp ? "xdp frame" : "skb", ring->qid, req_id);
1281	else
1282		netif_err(ring->adapter,
1283			  tx_done,
1284			  ring->netdev,
1285			  "Invalid req_id %u in qid %u\n",
1286			  req_id, ring->qid);
1287
1288	ena_increase_stat(&ring->tx_stats.bad_req_id, 1, &ring->syncp);
1289	ena_reset_device(ring->adapter, ENA_REGS_RESET_INV_TX_REQ_ID);
1290
1291	return -EFAULT;
1292}
1293
1294static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id)
1295{
1296	struct ena_tx_buffer *tx_info;
1297
1298	tx_info = &tx_ring->tx_buffer_info[req_id];
1299	if (likely(tx_info->skb))
1300		return 0;
1301
1302	return handle_invalid_req_id(tx_ring, req_id, tx_info, false);
1303}
1304
1305static int validate_xdp_req_id(struct ena_ring *xdp_ring, u16 req_id)
1306{
1307	struct ena_tx_buffer *tx_info;
1308
1309	tx_info = &xdp_ring->tx_buffer_info[req_id];
1310	if (likely(tx_info->xdpf))
1311		return 0;
1312
1313	return handle_invalid_req_id(xdp_ring, req_id, tx_info, true);
1314}
1315
1316static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget)
1317{
1318	struct netdev_queue *txq;
1319	bool above_thresh;
1320	u32 tx_bytes = 0;
1321	u32 total_done = 0;
1322	u16 next_to_clean;
1323	u16 req_id;
1324	int tx_pkts = 0;
1325	int rc;
1326
1327	next_to_clean = tx_ring->next_to_clean;
1328	txq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->qid);
1329
1330	while (tx_pkts < budget) {
1331		struct ena_tx_buffer *tx_info;
1332		struct sk_buff *skb;
1333
1334		rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq,
1335						&req_id);
1336		if (rc) {
1337			if (unlikely(rc == -EINVAL))
1338				handle_invalid_req_id(tx_ring, req_id, NULL,
1339						      false);
1340			break;
1341		}
1342
1343		/* validate that the request id points to a valid skb */
1344		rc = validate_tx_req_id(tx_ring, req_id);
1345		if (rc)
1346			break;
1347
1348		tx_info = &tx_ring->tx_buffer_info[req_id];
1349		skb = tx_info->skb;
1350
1351		/* prefetch skb_end_pointer() to speedup skb_shinfo(skb) */
1352		prefetch(&skb->end);
1353
1354		tx_info->skb = NULL;
1355		tx_info->last_jiffies = 0;
1356
1357		ena_unmap_tx_buff(tx_ring, tx_info);
1358
1359		netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
1360			  "tx_poll: q %d skb %p completed\n", tx_ring->qid,
1361			  skb);
1362
1363		tx_bytes += skb->len;
1364		dev_kfree_skb(skb);
1365		tx_pkts++;
1366		total_done += tx_info->tx_descs;
1367
1368		tx_ring->free_ids[next_to_clean] = req_id;
1369		next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
1370						     tx_ring->ring_size);
1371	}
1372
1373	tx_ring->next_to_clean = next_to_clean;
1374	ena_com_comp_ack(tx_ring->ena_com_io_sq, total_done);
1375	ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq);
1376
1377	netdev_tx_completed_queue(txq, tx_pkts, tx_bytes);
1378
1379	netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
1380		  "tx_poll: q %d done. total pkts: %d\n",
1381		  tx_ring->qid, tx_pkts);
1382
1383	/* need to make the rings circular update visible to
1384	 * ena_start_xmit() before checking for netif_queue_stopped().
1385	 */
1386	smp_mb();
1387
1388	above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
1389						    ENA_TX_WAKEUP_THRESH);
1390	if (unlikely(netif_tx_queue_stopped(txq) && above_thresh)) {
1391		__netif_tx_lock(txq, smp_processor_id());
1392		above_thresh =
1393			ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
1394						     ENA_TX_WAKEUP_THRESH);
1395		if (netif_tx_queue_stopped(txq) && above_thresh &&
1396		    test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags)) {
1397			netif_tx_wake_queue(txq);
1398			ena_increase_stat(&tx_ring->tx_stats.queue_wakeup, 1,
1399					  &tx_ring->syncp);
1400		}
1401		__netif_tx_unlock(txq);
1402	}
1403
1404	return tx_pkts;
1405}
1406
1407static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, void *first_frag)
1408{
1409	struct sk_buff *skb;
1410
1411	if (!first_frag)
1412		skb = napi_alloc_skb(rx_ring->napi, rx_ring->rx_copybreak);
1413	else
1414		skb = napi_build_skb(first_frag, ENA_PAGE_SIZE);
1415
1416	if (unlikely(!skb)) {
1417		ena_increase_stat(&rx_ring->rx_stats.skb_alloc_fail, 1,
1418				  &rx_ring->syncp);
1419
1420		netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
1421			  "Failed to allocate skb. first_frag %s\n",
1422			  first_frag ? "provided" : "not provided");
1423		return NULL;
1424	}
1425
1426	return skb;
1427}
1428
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1429static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
1430				  struct ena_com_rx_buf_info *ena_bufs,
1431				  u32 descs,
1432				  u16 *next_to_clean)
1433{
 
 
1434	struct ena_rx_buffer *rx_info;
1435	struct ena_adapter *adapter;
 
 
1436	u16 len, req_id, buf = 0;
 
1437	struct sk_buff *skb;
1438	void *page_addr;
1439	u32 page_offset;
1440	void *data_addr;
1441
1442	len = ena_bufs[buf].len;
1443	req_id = ena_bufs[buf].req_id;
1444
1445	rx_info = &rx_ring->rx_buffer_info[req_id];
1446
1447	if (unlikely(!rx_info->page)) {
1448		adapter = rx_ring->adapter;
1449		netif_err(adapter, rx_err, rx_ring->netdev,
1450			  "Page is NULL. qid %u req_id %u\n", rx_ring->qid, req_id);
1451		ena_increase_stat(&rx_ring->rx_stats.bad_req_id, 1, &rx_ring->syncp);
1452		ena_reset_device(adapter, ENA_REGS_RESET_INV_RX_REQ_ID);
1453		return NULL;
1454	}
1455
1456	netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1457		  "rx_info %p page %p\n",
1458		  rx_info, rx_info->page);
1459
1460	/* save virt address of first buffer */
1461	page_addr = page_address(rx_info->page);
1462	page_offset = rx_info->page_offset;
1463	data_addr = page_addr + page_offset;
1464
1465	prefetch(data_addr);
1466
1467	if (len <= rx_ring->rx_copybreak) {
1468		skb = ena_alloc_skb(rx_ring, NULL);
1469		if (unlikely(!skb))
1470			return NULL;
1471
1472		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1473			  "RX allocated small packet. len %d. data_len %d\n",
1474			  skb->len, skb->data_len);
1475
1476		/* sync this buffer for CPU use */
1477		dma_sync_single_for_cpu(rx_ring->dev,
1478					dma_unmap_addr(&rx_info->ena_buf, paddr),
1479					len,
1480					DMA_FROM_DEVICE);
1481		skb_copy_to_linear_data(skb, data_addr, len);
1482		dma_sync_single_for_device(rx_ring->dev,
1483					   dma_unmap_addr(&rx_info->ena_buf, paddr),
1484					   len,
1485					   DMA_FROM_DEVICE);
1486
1487		skb_put(skb, len);
 
 
1488		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1489		rx_ring->free_ids[*next_to_clean] = req_id;
1490		*next_to_clean = ENA_RX_RING_IDX_ADD(*next_to_clean, descs,
1491						     rx_ring->ring_size);
1492		return skb;
1493	}
1494
1495	ena_unmap_rx_buff(rx_ring, rx_info);
 
 
 
 
1496
1497	skb = ena_alloc_skb(rx_ring, page_addr);
 
 
 
1498	if (unlikely(!skb))
1499		return NULL;
1500
1501	/* Populate skb's linear part */
1502	skb_reserve(skb, page_offset);
1503	skb_put(skb, len);
1504	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1505
1506	do {
1507		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1508			  "RX skb updated. len %d. data_len %d\n",
1509			  skb->len, skb->data_len);
1510
1511		rx_info->page = NULL;
 
1512
1513		rx_ring->free_ids[*next_to_clean] = req_id;
1514		*next_to_clean =
1515			ENA_RX_RING_IDX_NEXT(*next_to_clean,
1516					     rx_ring->ring_size);
1517		if (likely(--descs == 0))
1518			break;
1519
1520		buf++;
1521		len = ena_bufs[buf].len;
1522		req_id = ena_bufs[buf].req_id;
1523
1524		rx_info = &rx_ring->rx_buffer_info[req_id];
1525
1526		ena_unmap_rx_buff(rx_ring, rx_info);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1527
1528		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page,
1529				rx_info->page_offset, len, ENA_PAGE_SIZE);
1530
1531	} while (1);
1532
1533	return skb;
1534}
1535
1536/* ena_rx_checksum - indicate in skb if hw indicated a good cksum
1537 * @adapter: structure containing adapter specific data
1538 * @ena_rx_ctx: received packet context/metadata
1539 * @skb: skb currently being received and modified
1540 */
1541static void ena_rx_checksum(struct ena_ring *rx_ring,
1542				   struct ena_com_rx_ctx *ena_rx_ctx,
1543				   struct sk_buff *skb)
1544{
1545	/* Rx csum disabled */
1546	if (unlikely(!(rx_ring->netdev->features & NETIF_F_RXCSUM))) {
1547		skb->ip_summed = CHECKSUM_NONE;
1548		return;
1549	}
1550
1551	/* For fragmented packets the checksum isn't valid */
1552	if (ena_rx_ctx->frag) {
1553		skb->ip_summed = CHECKSUM_NONE;
1554		return;
1555	}
1556
1557	/* if IP and error */
1558	if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) &&
1559		     (ena_rx_ctx->l3_csum_err))) {
1560		/* ipv4 checksum error */
1561		skb->ip_summed = CHECKSUM_NONE;
1562		ena_increase_stat(&rx_ring->rx_stats.csum_bad, 1,
1563				  &rx_ring->syncp);
1564		netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
1565			  "RX IPv4 header checksum error\n");
1566		return;
1567	}
1568
1569	/* if TCP/UDP */
1570	if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
1571		   (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP))) {
1572		if (unlikely(ena_rx_ctx->l4_csum_err)) {
1573			/* TCP/UDP checksum error */
1574			ena_increase_stat(&rx_ring->rx_stats.csum_bad, 1,
1575					  &rx_ring->syncp);
1576			netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
1577				  "RX L4 checksum error\n");
1578			skb->ip_summed = CHECKSUM_NONE;
1579			return;
1580		}
1581
1582		if (likely(ena_rx_ctx->l4_csum_checked)) {
1583			skb->ip_summed = CHECKSUM_UNNECESSARY;
1584			ena_increase_stat(&rx_ring->rx_stats.csum_good, 1,
1585					  &rx_ring->syncp);
1586		} else {
1587			ena_increase_stat(&rx_ring->rx_stats.csum_unchecked, 1,
1588					  &rx_ring->syncp);
1589			skb->ip_summed = CHECKSUM_NONE;
1590		}
1591	} else {
1592		skb->ip_summed = CHECKSUM_NONE;
1593		return;
1594	}
1595
1596}
1597
1598static void ena_set_rx_hash(struct ena_ring *rx_ring,
1599			    struct ena_com_rx_ctx *ena_rx_ctx,
1600			    struct sk_buff *skb)
1601{
1602	enum pkt_hash_types hash_type;
1603
1604	if (likely(rx_ring->netdev->features & NETIF_F_RXHASH)) {
1605		if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
1606			   (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)))
1607
1608			hash_type = PKT_HASH_TYPE_L4;
1609		else
1610			hash_type = PKT_HASH_TYPE_NONE;
1611
1612		/* Override hash type if the packet is fragmented */
1613		if (ena_rx_ctx->frag)
1614			hash_type = PKT_HASH_TYPE_NONE;
1615
1616		skb_set_hash(skb, ena_rx_ctx->hash, hash_type);
1617	}
1618}
1619
1620static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp)
1621{
1622	struct ena_rx_buffer *rx_info;
1623	int ret;
1624
 
 
 
 
 
 
 
 
1625	rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id];
1626	xdp_prepare_buff(xdp, page_address(rx_info->page),
1627			 rx_info->page_offset,
1628			 rx_ring->ena_bufs[0].len, false);
1629	/* If for some reason we received a bigger packet than
1630	 * we expect, then we simply drop it
1631	 */
1632	if (unlikely(rx_ring->ena_bufs[0].len > ENA_XDP_MAX_MTU))
1633		return ENA_XDP_DROP;
1634
1635	ret = ena_xdp_execute(rx_ring, xdp);
1636
1637	/* The xdp program might expand the headers */
1638	if (ret == ENA_XDP_PASS) {
1639		rx_info->page_offset = xdp->data - xdp->data_hard_start;
1640		rx_ring->ena_bufs[0].len = xdp->data_end - xdp->data;
1641	}
1642
1643	return ret;
1644}
 
1645/* ena_clean_rx_irq - Cleanup RX irq
1646 * @rx_ring: RX ring to clean
1647 * @napi: napi handler
1648 * @budget: how many packets driver is allowed to clean
1649 *
1650 * Returns the number of cleaned buffers.
1651 */
1652static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
1653			    u32 budget)
1654{
1655	u16 next_to_clean = rx_ring->next_to_clean;
1656	struct ena_com_rx_ctx ena_rx_ctx;
1657	struct ena_rx_buffer *rx_info;
1658	struct ena_adapter *adapter;
1659	u32 res_budget, work_done;
1660	int rx_copybreak_pkt = 0;
1661	int refill_threshold;
1662	struct sk_buff *skb;
1663	int refill_required;
1664	struct xdp_buff xdp;
1665	int xdp_flags = 0;
1666	int total_len = 0;
1667	int xdp_verdict;
 
1668	int rc = 0;
1669	int i;
1670
1671	netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1672		  "%s qid %d\n", __func__, rx_ring->qid);
1673	res_budget = budget;
1674	xdp_init_buff(&xdp, ENA_PAGE_SIZE, &rx_ring->xdp_rxq);
1675
1676	do {
1677		xdp_verdict = ENA_XDP_PASS;
1678		skb = NULL;
1679		ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
1680		ena_rx_ctx.max_bufs = rx_ring->sgl_size;
1681		ena_rx_ctx.descs = 0;
1682		ena_rx_ctx.pkt_offset = 0;
1683		rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq,
1684				    rx_ring->ena_com_io_sq,
1685				    &ena_rx_ctx);
1686		if (unlikely(rc))
1687			goto error;
1688
1689		if (unlikely(ena_rx_ctx.descs == 0))
1690			break;
1691
1692		/* First descriptor might have an offset set by the device */
1693		rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id];
1694		rx_info->page_offset += ena_rx_ctx.pkt_offset;
 
1695
1696		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1697			  "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n",
1698			  rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
1699			  ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
1700
 
 
 
 
 
1701		if (ena_xdp_present_ring(rx_ring))
1702			xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp);
1703
1704		/* allocate skb and fill it */
1705		if (xdp_verdict == ENA_XDP_PASS)
1706			skb = ena_rx_skb(rx_ring,
1707					 rx_ring->ena_bufs,
1708					 ena_rx_ctx.descs,
1709					 &next_to_clean);
1710
1711		if (unlikely(!skb)) {
1712			for (i = 0; i < ena_rx_ctx.descs; i++) {
1713				int req_id = rx_ring->ena_bufs[i].req_id;
1714
1715				rx_ring->free_ids[next_to_clean] = req_id;
1716				next_to_clean =
1717					ENA_RX_RING_IDX_NEXT(next_to_clean,
1718							     rx_ring->ring_size);
1719
1720				/* Packets was passed for transmission, unmap it
1721				 * from RX side.
1722				 */
1723				if (xdp_verdict & ENA_XDP_FORWARDED) {
1724					ena_unmap_rx_buff(rx_ring,
1725							  &rx_ring->rx_buffer_info[req_id]);
 
1726					rx_ring->rx_buffer_info[req_id].page = NULL;
1727				}
1728			}
1729			if (xdp_verdict != ENA_XDP_PASS) {
1730				xdp_flags |= xdp_verdict;
1731				total_len += ena_rx_ctx.ena_bufs[0].len;
1732				res_budget--;
1733				continue;
1734			}
1735			break;
1736		}
1737
1738		ena_rx_checksum(rx_ring, &ena_rx_ctx, skb);
1739
1740		ena_set_rx_hash(rx_ring, &ena_rx_ctx, skb);
1741
1742		skb_record_rx_queue(skb, rx_ring->qid);
1743
1744		if (rx_ring->ena_bufs[0].len <= rx_ring->rx_copybreak)
1745			rx_copybreak_pkt++;
1746
1747		total_len += skb->len;
1748
1749		napi_gro_receive(napi, skb);
1750
1751		res_budget--;
1752	} while (likely(res_budget));
1753
1754	work_done = budget - res_budget;
1755	rx_ring->per_napi_packets += work_done;
1756	u64_stats_update_begin(&rx_ring->syncp);
1757	rx_ring->rx_stats.bytes += total_len;
1758	rx_ring->rx_stats.cnt += work_done;
1759	rx_ring->rx_stats.rx_copybreak_pkt += rx_copybreak_pkt;
1760	u64_stats_update_end(&rx_ring->syncp);
1761
1762	rx_ring->next_to_clean = next_to_clean;
1763
1764	refill_required = ena_com_free_q_entries(rx_ring->ena_com_io_sq);
1765	refill_threshold =
1766		min_t(int, rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER,
1767		      ENA_RX_REFILL_THRESH_PACKET);
1768
1769	/* Optimization, try to batch new rx buffers */
1770	if (refill_required > refill_threshold) {
1771		ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq);
1772		ena_refill_rx_bufs(rx_ring, refill_required);
1773	}
1774
1775	if (xdp_flags & ENA_XDP_REDIRECT)
1776		xdp_do_flush_map();
1777
1778	return work_done;
1779
1780error:
 
 
 
1781	adapter = netdev_priv(rx_ring->netdev);
1782
1783	if (rc == -ENOSPC) {
1784		ena_increase_stat(&rx_ring->rx_stats.bad_desc_num, 1,
1785				  &rx_ring->syncp);
1786		ena_reset_device(adapter, ENA_REGS_RESET_TOO_MANY_RX_DESCS);
 
 
1787	} else {
1788		ena_increase_stat(&rx_ring->rx_stats.bad_req_id, 1,
1789				  &rx_ring->syncp);
1790		ena_reset_device(adapter, ENA_REGS_RESET_INV_RX_REQ_ID);
1791	}
1792	return 0;
1793}
1794
1795static void ena_dim_work(struct work_struct *w)
1796{
1797	struct dim *dim = container_of(w, struct dim, work);
1798	struct dim_cq_moder cur_moder =
1799		net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
1800	struct ena_napi *ena_napi = container_of(dim, struct ena_napi, dim);
1801
1802	ena_napi->rx_ring->smoothed_interval = cur_moder.usec;
1803	dim->state = DIM_START_MEASURE;
1804}
1805
1806static void ena_adjust_adaptive_rx_intr_moderation(struct ena_napi *ena_napi)
1807{
1808	struct dim_sample dim_sample;
1809	struct ena_ring *rx_ring = ena_napi->rx_ring;
1810
1811	if (!rx_ring->per_napi_packets)
1812		return;
1813
1814	rx_ring->non_empty_napi_events++;
1815
1816	dim_update_sample(rx_ring->non_empty_napi_events,
1817			  rx_ring->rx_stats.cnt,
1818			  rx_ring->rx_stats.bytes,
1819			  &dim_sample);
1820
1821	net_dim(&ena_napi->dim, dim_sample);
1822
1823	rx_ring->per_napi_packets = 0;
1824}
1825
1826static void ena_unmask_interrupt(struct ena_ring *tx_ring,
1827					struct ena_ring *rx_ring)
1828{
1829	u32 rx_interval = tx_ring->smoothed_interval;
1830	struct ena_eth_io_intr_reg intr_reg;
1831
1832	/* Rx ring can be NULL when for XDP tx queues which don't have an
1833	 * accompanying rx_ring pair.
1834	 */
1835	if (rx_ring)
1836		rx_interval = ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev) ?
1837			rx_ring->smoothed_interval :
1838			ena_com_get_nonadaptive_moderation_interval_rx(rx_ring->ena_dev);
1839
1840	/* Update intr register: rx intr delay,
1841	 * tx intr delay and interrupt unmask
1842	 */
1843	ena_com_update_intr_reg(&intr_reg,
1844				rx_interval,
1845				tx_ring->smoothed_interval,
1846				true);
1847
1848	ena_increase_stat(&tx_ring->tx_stats.unmask_interrupt, 1,
1849			  &tx_ring->syncp);
1850
1851	/* It is a shared MSI-X.
1852	 * Tx and Rx CQ have pointer to it.
1853	 * So we use one of them to reach the intr reg
1854	 * The Tx ring is used because the rx_ring is NULL for XDP queues
1855	 */
1856	ena_com_unmask_intr(tx_ring->ena_com_io_cq, &intr_reg);
1857}
1858
1859static void ena_update_ring_numa_node(struct ena_ring *tx_ring,
1860					     struct ena_ring *rx_ring)
1861{
1862	int cpu = get_cpu();
1863	int numa_node;
1864
1865	/* Check only one ring since the 2 rings are running on the same cpu */
1866	if (likely(tx_ring->cpu == cpu))
1867		goto out;
1868
1869	tx_ring->cpu = cpu;
1870	if (rx_ring)
1871		rx_ring->cpu = cpu;
1872
1873	numa_node = cpu_to_node(cpu);
1874
1875	if (likely(tx_ring->numa_node == numa_node))
1876		goto out;
1877
1878	put_cpu();
1879
1880	if (numa_node != NUMA_NO_NODE) {
1881		ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node);
1882		tx_ring->numa_node = numa_node;
1883		if (rx_ring) {
1884			rx_ring->numa_node = numa_node;
1885			ena_com_update_numa_node(rx_ring->ena_com_io_cq,
1886						 numa_node);
1887		}
1888	}
1889
1890	return;
1891out:
1892	put_cpu();
1893}
1894
1895static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget)
1896{
1897	u32 total_done = 0;
1898	u16 next_to_clean;
1899	u32 tx_bytes = 0;
1900	int tx_pkts = 0;
1901	u16 req_id;
1902	int rc;
1903
1904	if (unlikely(!xdp_ring))
1905		return 0;
1906	next_to_clean = xdp_ring->next_to_clean;
1907
1908	while (tx_pkts < budget) {
1909		struct ena_tx_buffer *tx_info;
1910		struct xdp_frame *xdpf;
1911
1912		rc = ena_com_tx_comp_req_id_get(xdp_ring->ena_com_io_cq,
1913						&req_id);
1914		if (rc) {
1915			if (unlikely(rc == -EINVAL))
1916				handle_invalid_req_id(xdp_ring, req_id, NULL,
1917						      true);
1918			break;
1919		}
1920
1921		/* validate that the request id points to a valid xdp_frame */
1922		rc = validate_xdp_req_id(xdp_ring, req_id);
1923		if (rc)
1924			break;
1925
1926		tx_info = &xdp_ring->tx_buffer_info[req_id];
1927		xdpf = tx_info->xdpf;
1928
1929		tx_info->xdpf = NULL;
1930		tx_info->last_jiffies = 0;
1931		ena_unmap_tx_buff(xdp_ring, tx_info);
1932
1933		netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev,
1934			  "tx_poll: q %d skb %p completed\n", xdp_ring->qid,
1935			  xdpf);
1936
1937		tx_bytes += xdpf->len;
1938		tx_pkts++;
1939		total_done += tx_info->tx_descs;
1940
1941		xdp_return_frame(xdpf);
1942		xdp_ring->free_ids[next_to_clean] = req_id;
1943		next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
1944						     xdp_ring->ring_size);
1945	}
1946
1947	xdp_ring->next_to_clean = next_to_clean;
1948	ena_com_comp_ack(xdp_ring->ena_com_io_sq, total_done);
1949	ena_com_update_dev_comp_head(xdp_ring->ena_com_io_cq);
1950
1951	netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev,
1952		  "tx_poll: q %d done. total pkts: %d\n",
1953		  xdp_ring->qid, tx_pkts);
1954
1955	return tx_pkts;
1956}
1957
1958static int ena_io_poll(struct napi_struct *napi, int budget)
1959{
1960	struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
1961	struct ena_ring *tx_ring, *rx_ring;
1962	int tx_work_done;
1963	int rx_work_done = 0;
1964	int tx_budget;
1965	int napi_comp_call = 0;
1966	int ret;
1967
1968	tx_ring = ena_napi->tx_ring;
1969	rx_ring = ena_napi->rx_ring;
1970
1971	tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER;
1972
1973	if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
1974	    test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags)) {
1975		napi_complete_done(napi, 0);
1976		return 0;
1977	}
1978
1979	tx_work_done = ena_clean_tx_irq(tx_ring, tx_budget);
1980	/* On netpoll the budget is zero and the handler should only clean the
1981	 * tx completions.
1982	 */
1983	if (likely(budget))
1984		rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget);
1985
1986	/* If the device is about to reset or down, avoid unmask
1987	 * the interrupt and return 0 so NAPI won't reschedule
1988	 */
1989	if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
1990		     test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags))) {
1991		napi_complete_done(napi, 0);
1992		ret = 0;
1993
1994	} else if ((budget > rx_work_done) && (tx_budget > tx_work_done)) {
1995		napi_comp_call = 1;
1996
1997		/* Update numa and unmask the interrupt only when schedule
1998		 * from the interrupt context (vs from sk_busy_loop)
1999		 */
2000		if (napi_complete_done(napi, rx_work_done) &&
2001		    READ_ONCE(ena_napi->interrupts_masked)) {
2002			smp_rmb(); /* make sure interrupts_masked is read */
2003			WRITE_ONCE(ena_napi->interrupts_masked, false);
2004			/* We apply adaptive moderation on Rx path only.
2005			 * Tx uses static interrupt moderation.
2006			 */
2007			if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev))
2008				ena_adjust_adaptive_rx_intr_moderation(ena_napi);
2009
2010			ena_update_ring_numa_node(tx_ring, rx_ring);
2011			ena_unmask_interrupt(tx_ring, rx_ring);
2012		}
2013
2014		ret = rx_work_done;
2015	} else {
2016		ret = budget;
2017	}
2018
2019	u64_stats_update_begin(&tx_ring->syncp);
2020	tx_ring->tx_stats.napi_comp += napi_comp_call;
2021	tx_ring->tx_stats.tx_poll++;
2022	u64_stats_update_end(&tx_ring->syncp);
2023
2024	tx_ring->tx_stats.last_napi_jiffies = jiffies;
2025
2026	return ret;
2027}
2028
2029static irqreturn_t ena_intr_msix_mgmnt(int irq, void *data)
2030{
2031	struct ena_adapter *adapter = (struct ena_adapter *)data;
2032
2033	ena_com_admin_q_comp_intr_handler(adapter->ena_dev);
2034
2035	/* Don't call the aenq handler before probe is done */
2036	if (likely(test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags)))
2037		ena_com_aenq_intr_handler(adapter->ena_dev, data);
2038
2039	return IRQ_HANDLED;
2040}
2041
2042/* ena_intr_msix_io - MSI-X Interrupt Handler for Tx/Rx
2043 * @irq: interrupt number
2044 * @data: pointer to a network interface private napi device structure
2045 */
2046static irqreturn_t ena_intr_msix_io(int irq, void *data)
2047{
2048	struct ena_napi *ena_napi = data;
2049
2050	/* Used to check HW health */
2051	WRITE_ONCE(ena_napi->first_interrupt, true);
2052
2053	WRITE_ONCE(ena_napi->interrupts_masked, true);
2054	smp_wmb(); /* write interrupts_masked before calling napi */
2055
2056	napi_schedule_irqoff(&ena_napi->napi);
2057
2058	return IRQ_HANDLED;
2059}
2060
2061/* Reserve a single MSI-X vector for management (admin + aenq).
2062 * plus reserve one vector for each potential io queue.
2063 * the number of potential io queues is the minimum of what the device
2064 * supports and the number of vCPUs.
2065 */
2066static int ena_enable_msix(struct ena_adapter *adapter)
2067{
2068	int msix_vecs, irq_cnt;
2069
2070	if (test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) {
2071		netif_err(adapter, probe, adapter->netdev,
2072			  "Error, MSI-X is already enabled\n");
2073		return -EPERM;
2074	}
2075
2076	/* Reserved the max msix vectors we might need */
2077	msix_vecs = ENA_MAX_MSIX_VEC(adapter->max_num_io_queues);
2078	netif_dbg(adapter, probe, adapter->netdev,
2079		  "Trying to enable MSI-X, vectors %d\n", msix_vecs);
2080
2081	irq_cnt = pci_alloc_irq_vectors(adapter->pdev, ENA_MIN_MSIX_VEC,
2082					msix_vecs, PCI_IRQ_MSIX);
2083
2084	if (irq_cnt < 0) {
2085		netif_err(adapter, probe, adapter->netdev,
2086			  "Failed to enable MSI-X. irq_cnt %d\n", irq_cnt);
2087		return -ENOSPC;
2088	}
2089
2090	if (irq_cnt != msix_vecs) {
2091		netif_notice(adapter, probe, adapter->netdev,
2092			     "Enable only %d MSI-X (out of %d), reduce the number of queues\n",
2093			     irq_cnt, msix_vecs);
2094		adapter->num_io_queues = irq_cnt - ENA_ADMIN_MSIX_VEC;
2095	}
2096
2097	if (ena_init_rx_cpu_rmap(adapter))
2098		netif_warn(adapter, probe, adapter->netdev,
2099			   "Failed to map IRQs to CPUs\n");
2100
2101	adapter->msix_vecs = irq_cnt;
2102	set_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags);
2103
2104	return 0;
2105}
2106
2107static void ena_setup_mgmnt_intr(struct ena_adapter *adapter)
2108{
2109	u32 cpu;
2110
2111	snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name,
2112		 ENA_IRQNAME_SIZE, "ena-mgmnt@pci:%s",
2113		 pci_name(adapter->pdev));
2114	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler =
2115		ena_intr_msix_mgmnt;
2116	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter;
2117	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector =
2118		pci_irq_vector(adapter->pdev, ENA_MGMNT_IRQ_IDX);
2119	cpu = cpumask_first(cpu_online_mask);
2120	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].cpu = cpu;
2121	cpumask_set_cpu(cpu,
2122			&adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].affinity_hint_mask);
2123}
2124
2125static void ena_setup_io_intr(struct ena_adapter *adapter)
2126{
2127	struct net_device *netdev;
2128	int irq_idx, i, cpu;
2129	int io_queue_count;
2130
2131	netdev = adapter->netdev;
2132	io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
2133
2134	for (i = 0; i < io_queue_count; i++) {
2135		irq_idx = ENA_IO_IRQ_IDX(i);
2136		cpu = i % num_online_cpus();
2137
2138		snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE,
2139			 "%s-Tx-Rx-%d", netdev->name, i);
2140		adapter->irq_tbl[irq_idx].handler = ena_intr_msix_io;
2141		adapter->irq_tbl[irq_idx].data = &adapter->ena_napi[i];
2142		adapter->irq_tbl[irq_idx].vector =
2143			pci_irq_vector(adapter->pdev, irq_idx);
2144		adapter->irq_tbl[irq_idx].cpu = cpu;
2145
2146		cpumask_set_cpu(cpu,
2147				&adapter->irq_tbl[irq_idx].affinity_hint_mask);
2148	}
2149}
2150
2151static int ena_request_mgmnt_irq(struct ena_adapter *adapter)
2152{
2153	unsigned long flags = 0;
2154	struct ena_irq *irq;
2155	int rc;
2156
2157	irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
2158	rc = request_irq(irq->vector, irq->handler, flags, irq->name,
2159			 irq->data);
2160	if (rc) {
2161		netif_err(adapter, probe, adapter->netdev,
2162			  "Failed to request admin irq\n");
2163		return rc;
2164	}
2165
2166	netif_dbg(adapter, probe, adapter->netdev,
2167		  "Set affinity hint of mgmnt irq.to 0x%lx (irq vector: %d)\n",
2168		  irq->affinity_hint_mask.bits[0], irq->vector);
2169
2170	irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
2171
2172	return rc;
2173}
2174
2175static int ena_request_io_irq(struct ena_adapter *adapter)
2176{
2177	u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
 
2178	unsigned long flags = 0;
2179	struct ena_irq *irq;
2180	int rc = 0, i, k;
2181
2182	if (!test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) {
2183		netif_err(adapter, ifup, adapter->netdev,
2184			  "Failed to request I/O IRQ: MSI-X is not enabled\n");
2185		return -EINVAL;
2186	}
2187
2188	for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) {
2189		irq = &adapter->irq_tbl[i];
2190		rc = request_irq(irq->vector, irq->handler, flags, irq->name,
2191				 irq->data);
2192		if (rc) {
2193			netif_err(adapter, ifup, adapter->netdev,
2194				  "Failed to request I/O IRQ. index %d rc %d\n",
2195				   i, rc);
2196			goto err;
2197		}
2198
2199		netif_dbg(adapter, ifup, adapter->netdev,
2200			  "Set affinity hint of irq. index %d to 0x%lx (irq vector: %d)\n",
2201			  i, irq->affinity_hint_mask.bits[0], irq->vector);
2202
2203		irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
2204	}
2205
 
 
 
 
 
 
 
 
 
 
2206	return rc;
2207
2208err:
2209	for (k = ENA_IO_IRQ_FIRST_IDX; k < i; k++) {
2210		irq = &adapter->irq_tbl[k];
2211		free_irq(irq->vector, irq->data);
2212	}
2213
2214	return rc;
2215}
2216
2217static void ena_free_mgmnt_irq(struct ena_adapter *adapter)
2218{
2219	struct ena_irq *irq;
2220
2221	irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
2222	synchronize_irq(irq->vector);
2223	irq_set_affinity_hint(irq->vector, NULL);
2224	free_irq(irq->vector, irq->data);
2225}
2226
2227static void ena_free_io_irq(struct ena_adapter *adapter)
2228{
2229	u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
2230	struct ena_irq *irq;
2231	int i;
2232
2233#ifdef CONFIG_RFS_ACCEL
2234	if (adapter->msix_vecs >= 1) {
2235		free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
2236		adapter->netdev->rx_cpu_rmap = NULL;
2237	}
2238#endif /* CONFIG_RFS_ACCEL */
2239
2240	for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) {
2241		irq = &adapter->irq_tbl[i];
2242		irq_set_affinity_hint(irq->vector, NULL);
2243		free_irq(irq->vector, irq->data);
2244	}
2245}
2246
2247static void ena_disable_msix(struct ena_adapter *adapter)
2248{
2249	if (test_and_clear_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags))
2250		pci_free_irq_vectors(adapter->pdev);
2251}
2252
2253static void ena_disable_io_intr_sync(struct ena_adapter *adapter)
2254{
2255	u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
2256	int i;
2257
2258	if (!netif_running(adapter->netdev))
2259		return;
2260
2261	for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++)
2262		synchronize_irq(adapter->irq_tbl[i].vector);
2263}
2264
2265static void ena_del_napi_in_range(struct ena_adapter *adapter,
2266				  int first_index,
2267				  int count)
2268{
2269	int i;
2270
2271	for (i = first_index; i < first_index + count; i++) {
2272		netif_napi_del(&adapter->ena_napi[i].napi);
2273
2274		WARN_ON(!ENA_IS_XDP_INDEX(adapter, i) &&
2275			adapter->ena_napi[i].xdp_ring);
2276	}
2277}
2278
2279static void ena_init_napi_in_range(struct ena_adapter *adapter,
2280				   int first_index, int count)
2281{
 
2282	int i;
2283
2284	for (i = first_index; i < first_index + count; i++) {
2285		struct ena_napi *napi = &adapter->ena_napi[i];
 
 
 
 
 
 
 
 
 
 
 
 
2286
2287		netif_napi_add(adapter->netdev, &napi->napi,
2288			       ENA_IS_XDP_INDEX(adapter, i) ? ena_xdp_io_poll : ena_io_poll);
2289
2290		if (!ENA_IS_XDP_INDEX(adapter, i)) {
2291			napi->rx_ring = &adapter->rx_ring[i];
2292			napi->tx_ring = &adapter->tx_ring[i];
2293		} else {
2294			napi->xdp_ring = &adapter->tx_ring[i];
2295		}
2296		napi->qid = i;
2297	}
2298}
2299
2300static void ena_napi_disable_in_range(struct ena_adapter *adapter,
2301				      int first_index,
2302				      int count)
2303{
 
2304	int i;
2305
2306	for (i = first_index; i < first_index + count; i++)
2307		napi_disable(&adapter->ena_napi[i].napi);
 
 
 
 
 
 
 
 
 
2308}
2309
2310static void ena_napi_enable_in_range(struct ena_adapter *adapter,
2311				     int first_index,
2312				     int count)
2313{
 
2314	int i;
2315
2316	for (i = first_index; i < first_index + count; i++)
2317		napi_enable(&adapter->ena_napi[i].napi);
 
 
 
 
 
 
 
 
 
2318}
2319
2320/* Configure the Rx forwarding */
2321static int ena_rss_configure(struct ena_adapter *adapter)
2322{
2323	struct ena_com_dev *ena_dev = adapter->ena_dev;
2324	int rc;
2325
2326	/* In case the RSS table wasn't initialized by probe */
2327	if (!ena_dev->rss.tbl_log_size) {
2328		rc = ena_rss_init_default(adapter);
2329		if (rc && (rc != -EOPNOTSUPP)) {
2330			netif_err(adapter, ifup, adapter->netdev,
2331				  "Failed to init RSS rc: %d\n", rc);
2332			return rc;
2333		}
2334	}
2335
2336	/* Set indirect table */
2337	rc = ena_com_indirect_table_set(ena_dev);
2338	if (unlikely(rc && rc != -EOPNOTSUPP))
2339		return rc;
2340
2341	/* Configure hash function (if supported) */
2342	rc = ena_com_set_hash_function(ena_dev);
2343	if (unlikely(rc && (rc != -EOPNOTSUPP)))
2344		return rc;
2345
2346	/* Configure hash inputs (if supported) */
2347	rc = ena_com_set_hash_ctrl(ena_dev);
2348	if (unlikely(rc && (rc != -EOPNOTSUPP)))
2349		return rc;
2350
2351	return 0;
2352}
2353
2354static int ena_up_complete(struct ena_adapter *adapter)
2355{
2356	int rc;
2357
2358	rc = ena_rss_configure(adapter);
2359	if (rc)
2360		return rc;
2361
2362	ena_change_mtu(adapter->netdev, adapter->netdev->mtu);
2363
2364	ena_refill_all_rx_bufs(adapter);
2365
2366	/* enable transmits */
2367	netif_tx_start_all_queues(adapter->netdev);
2368
2369	ena_napi_enable_in_range(adapter,
2370				 0,
2371				 adapter->xdp_num_queues + adapter->num_io_queues);
2372
2373	return 0;
2374}
2375
2376static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid)
2377{
2378	struct ena_com_create_io_ctx ctx;
2379	struct ena_com_dev *ena_dev;
2380	struct ena_ring *tx_ring;
2381	u32 msix_vector;
2382	u16 ena_qid;
2383	int rc;
2384
2385	ena_dev = adapter->ena_dev;
2386
2387	tx_ring = &adapter->tx_ring[qid];
2388	msix_vector = ENA_IO_IRQ_IDX(qid);
2389	ena_qid = ENA_IO_TXQ_IDX(qid);
2390
2391	memset(&ctx, 0x0, sizeof(ctx));
2392
2393	ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
2394	ctx.qid = ena_qid;
2395	ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
2396	ctx.msix_vector = msix_vector;
2397	ctx.queue_size = tx_ring->ring_size;
2398	ctx.numa_node = tx_ring->numa_node;
2399
2400	rc = ena_com_create_io_queue(ena_dev, &ctx);
2401	if (rc) {
2402		netif_err(adapter, ifup, adapter->netdev,
2403			  "Failed to create I/O TX queue num %d rc: %d\n",
2404			  qid, rc);
2405		return rc;
2406	}
2407
2408	rc = ena_com_get_io_handlers(ena_dev, ena_qid,
2409				     &tx_ring->ena_com_io_sq,
2410				     &tx_ring->ena_com_io_cq);
2411	if (rc) {
2412		netif_err(adapter, ifup, adapter->netdev,
2413			  "Failed to get TX queue handlers. TX queue num %d rc: %d\n",
2414			  qid, rc);
2415		ena_com_destroy_io_queue(ena_dev, ena_qid);
2416		return rc;
2417	}
2418
2419	ena_com_update_numa_node(tx_ring->ena_com_io_cq, ctx.numa_node);
2420	return rc;
2421}
2422
2423static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter,
2424					    int first_index, int count)
2425{
2426	struct ena_com_dev *ena_dev = adapter->ena_dev;
2427	int rc, i;
2428
2429	for (i = first_index; i < first_index + count; i++) {
2430		rc = ena_create_io_tx_queue(adapter, i);
2431		if (rc)
2432			goto create_err;
2433	}
2434
2435	return 0;
2436
2437create_err:
2438	while (i-- > first_index)
2439		ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i));
2440
2441	return rc;
2442}
2443
2444static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid)
2445{
2446	struct ena_com_dev *ena_dev;
2447	struct ena_com_create_io_ctx ctx;
2448	struct ena_ring *rx_ring;
2449	u32 msix_vector;
2450	u16 ena_qid;
2451	int rc;
2452
2453	ena_dev = adapter->ena_dev;
2454
2455	rx_ring = &adapter->rx_ring[qid];
2456	msix_vector = ENA_IO_IRQ_IDX(qid);
2457	ena_qid = ENA_IO_RXQ_IDX(qid);
2458
2459	memset(&ctx, 0x0, sizeof(ctx));
2460
2461	ctx.qid = ena_qid;
2462	ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
2463	ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
2464	ctx.msix_vector = msix_vector;
2465	ctx.queue_size = rx_ring->ring_size;
2466	ctx.numa_node = rx_ring->numa_node;
2467
2468	rc = ena_com_create_io_queue(ena_dev, &ctx);
2469	if (rc) {
2470		netif_err(adapter, ifup, adapter->netdev,
2471			  "Failed to create I/O RX queue num %d rc: %d\n",
2472			  qid, rc);
2473		return rc;
2474	}
2475
2476	rc = ena_com_get_io_handlers(ena_dev, ena_qid,
2477				     &rx_ring->ena_com_io_sq,
2478				     &rx_ring->ena_com_io_cq);
2479	if (rc) {
2480		netif_err(adapter, ifup, adapter->netdev,
2481			  "Failed to get RX queue handlers. RX queue num %d rc: %d\n",
2482			  qid, rc);
2483		goto err;
2484	}
2485
2486	ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node);
2487
2488	return rc;
2489err:
2490	ena_com_destroy_io_queue(ena_dev, ena_qid);
2491	return rc;
2492}
2493
2494static int ena_create_all_io_rx_queues(struct ena_adapter *adapter)
2495{
2496	struct ena_com_dev *ena_dev = adapter->ena_dev;
2497	int rc, i;
2498
2499	for (i = 0; i < adapter->num_io_queues; i++) {
2500		rc = ena_create_io_rx_queue(adapter, i);
2501		if (rc)
2502			goto create_err;
2503		INIT_WORK(&adapter->ena_napi[i].dim.work, ena_dim_work);
 
 
2504	}
2505
2506	return 0;
2507
2508create_err:
2509	while (i--) {
 
2510		cancel_work_sync(&adapter->ena_napi[i].dim.work);
2511		ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i));
2512	}
2513
2514	return rc;
2515}
2516
2517static void set_io_rings_size(struct ena_adapter *adapter,
2518			      int new_tx_size,
2519			      int new_rx_size)
2520{
2521	int i;
2522
2523	for (i = 0; i < adapter->num_io_queues; i++) {
2524		adapter->tx_ring[i].ring_size = new_tx_size;
2525		adapter->rx_ring[i].ring_size = new_rx_size;
2526	}
2527}
2528
2529/* This function allows queue allocation to backoff when the system is
2530 * low on memory. If there is not enough memory to allocate io queues
2531 * the driver will try to allocate smaller queues.
2532 *
2533 * The backoff algorithm is as follows:
2534 *  1. Try to allocate TX and RX and if successful.
2535 *  1.1. return success
2536 *
2537 *  2. Divide by 2 the size of the larger of RX and TX queues (or both if their size is the same).
2538 *
2539 *  3. If TX or RX is smaller than 256
2540 *  3.1. return failure.
2541 *  4. else
2542 *  4.1. go back to 1.
2543 */
2544static int create_queues_with_size_backoff(struct ena_adapter *adapter)
2545{
2546	int rc, cur_rx_ring_size, cur_tx_ring_size;
2547	int new_rx_ring_size, new_tx_ring_size;
2548
2549	/* current queue sizes might be set to smaller than the requested
2550	 * ones due to past queue allocation failures.
2551	 */
2552	set_io_rings_size(adapter, adapter->requested_tx_ring_size,
2553			  adapter->requested_rx_ring_size);
2554
2555	while (1) {
2556		if (ena_xdp_present(adapter)) {
2557			rc = ena_setup_and_create_all_xdp_queues(adapter);
2558
2559			if (rc)
2560				goto err_setup_tx;
2561		}
2562		rc = ena_setup_tx_resources_in_range(adapter,
2563						     0,
2564						     adapter->num_io_queues);
2565		if (rc)
2566			goto err_setup_tx;
2567
2568		rc = ena_create_io_tx_queues_in_range(adapter,
2569						      0,
2570						      adapter->num_io_queues);
2571		if (rc)
2572			goto err_create_tx_queues;
2573
2574		rc = ena_setup_all_rx_resources(adapter);
2575		if (rc)
2576			goto err_setup_rx;
2577
2578		rc = ena_create_all_io_rx_queues(adapter);
2579		if (rc)
2580			goto err_create_rx_queues;
2581
2582		return 0;
2583
2584err_create_rx_queues:
2585		ena_free_all_io_rx_resources(adapter);
2586err_setup_rx:
2587		ena_destroy_all_tx_queues(adapter);
2588err_create_tx_queues:
2589		ena_free_all_io_tx_resources(adapter);
2590err_setup_tx:
2591		if (rc != -ENOMEM) {
2592			netif_err(adapter, ifup, adapter->netdev,
2593				  "Queue creation failed with error code %d\n",
2594				  rc);
2595			return rc;
2596		}
2597
2598		cur_tx_ring_size = adapter->tx_ring[0].ring_size;
2599		cur_rx_ring_size = adapter->rx_ring[0].ring_size;
2600
2601		netif_err(adapter, ifup, adapter->netdev,
2602			  "Not enough memory to create queues with sizes TX=%d, RX=%d\n",
2603			  cur_tx_ring_size, cur_rx_ring_size);
2604
2605		new_tx_ring_size = cur_tx_ring_size;
2606		new_rx_ring_size = cur_rx_ring_size;
2607
2608		/* Decrease the size of the larger queue, or
2609		 * decrease both if they are the same size.
2610		 */
2611		if (cur_rx_ring_size <= cur_tx_ring_size)
2612			new_tx_ring_size = cur_tx_ring_size / 2;
2613		if (cur_rx_ring_size >= cur_tx_ring_size)
2614			new_rx_ring_size = cur_rx_ring_size / 2;
2615
2616		if (new_tx_ring_size < ENA_MIN_RING_SIZE ||
2617		    new_rx_ring_size < ENA_MIN_RING_SIZE) {
2618			netif_err(adapter, ifup, adapter->netdev,
2619				  "Queue creation failed with the smallest possible queue size of %d for both queues. Not retrying with smaller queues\n",
2620				  ENA_MIN_RING_SIZE);
2621			return rc;
2622		}
2623
2624		netif_err(adapter, ifup, adapter->netdev,
2625			  "Retrying queue creation with sizes TX=%d, RX=%d\n",
2626			  new_tx_ring_size,
2627			  new_rx_ring_size);
2628
2629		set_io_rings_size(adapter, new_tx_ring_size,
2630				  new_rx_ring_size);
2631	}
2632}
2633
2634static int ena_up(struct ena_adapter *adapter)
2635{
2636	int io_queue_count, rc, i;
2637
2638	netif_dbg(adapter, ifup, adapter->netdev, "%s\n", __func__);
2639
2640	io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
2641	ena_setup_io_intr(adapter);
2642
2643	/* napi poll functions should be initialized before running
2644	 * request_irq(), to handle a rare condition where there is a pending
2645	 * interrupt, causing the ISR to fire immediately while the poll
2646	 * function wasn't set yet, causing a null dereference
2647	 */
2648	ena_init_napi_in_range(adapter, 0, io_queue_count);
2649
 
 
 
 
 
 
2650	rc = ena_request_io_irq(adapter);
2651	if (rc)
2652		goto err_req_irq;
2653
2654	rc = create_queues_with_size_backoff(adapter);
2655	if (rc)
2656		goto err_create_queues_with_backoff;
2657
2658	rc = ena_up_complete(adapter);
2659	if (rc)
2660		goto err_up;
2661
2662	if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags))
2663		netif_carrier_on(adapter->netdev);
2664
2665	ena_increase_stat(&adapter->dev_stats.interface_up, 1,
2666			  &adapter->syncp);
2667
2668	set_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2669
2670	/* Enable completion queues interrupt */
2671	for (i = 0; i < adapter->num_io_queues; i++)
2672		ena_unmask_interrupt(&adapter->tx_ring[i],
2673				     &adapter->rx_ring[i]);
2674
2675	/* schedule napi in case we had pending packets
2676	 * from the last time we disable napi
2677	 */
2678	for (i = 0; i < io_queue_count; i++)
2679		napi_schedule(&adapter->ena_napi[i].napi);
2680
2681	return rc;
2682
2683err_up:
2684	ena_destroy_all_tx_queues(adapter);
2685	ena_free_all_io_tx_resources(adapter);
2686	ena_destroy_all_rx_queues(adapter);
2687	ena_free_all_io_rx_resources(adapter);
2688err_create_queues_with_backoff:
2689	ena_free_io_irq(adapter);
2690err_req_irq:
2691	ena_del_napi_in_range(adapter, 0, io_queue_count);
2692
2693	return rc;
2694}
2695
2696static void ena_down(struct ena_adapter *adapter)
2697{
2698	int io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
2699
2700	netif_info(adapter, ifdown, adapter->netdev, "%s\n", __func__);
2701
2702	clear_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2703
2704	ena_increase_stat(&adapter->dev_stats.interface_down, 1,
2705			  &adapter->syncp);
2706
2707	netif_carrier_off(adapter->netdev);
2708	netif_tx_disable(adapter->netdev);
2709
2710	/* After this point the napi handler won't enable the tx queue */
2711	ena_napi_disable_in_range(adapter, 0, io_queue_count);
2712
2713	/* After destroy the queue there won't be any new interrupts */
2714
2715	if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) {
2716		int rc;
2717
2718		rc = ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
2719		if (rc)
2720			netif_err(adapter, ifdown, adapter->netdev,
2721				  "Device reset failed\n");
2722		/* stop submitting admin commands on a device that was reset */
2723		ena_com_set_admin_running_state(adapter->ena_dev, false);
2724	}
2725
2726	ena_destroy_all_io_queues(adapter);
2727
2728	ena_disable_io_intr_sync(adapter);
2729	ena_free_io_irq(adapter);
2730	ena_del_napi_in_range(adapter, 0, io_queue_count);
2731
2732	ena_free_all_tx_bufs(adapter);
2733	ena_free_all_rx_bufs(adapter);
2734	ena_free_all_io_tx_resources(adapter);
2735	ena_free_all_io_rx_resources(adapter);
2736}
2737
2738/* ena_open - Called when a network interface is made active
2739 * @netdev: network interface device structure
2740 *
2741 * Returns 0 on success, negative value on failure
2742 *
2743 * The open entry point is called when a network interface is made
2744 * active by the system (IFF_UP).  At this point all resources needed
2745 * for transmit and receive operations are allocated, the interrupt
2746 * handler is registered with the OS, the watchdog timer is started,
2747 * and the stack is notified that the interface is ready.
2748 */
2749static int ena_open(struct net_device *netdev)
2750{
2751	struct ena_adapter *adapter = netdev_priv(netdev);
2752	int rc;
2753
2754	/* Notify the stack of the actual queue counts. */
2755	rc = netif_set_real_num_tx_queues(netdev, adapter->num_io_queues);
2756	if (rc) {
2757		netif_err(adapter, ifup, netdev, "Can't set num tx queues\n");
2758		return rc;
2759	}
2760
2761	rc = netif_set_real_num_rx_queues(netdev, adapter->num_io_queues);
2762	if (rc) {
2763		netif_err(adapter, ifup, netdev, "Can't set num rx queues\n");
2764		return rc;
2765	}
2766
2767	rc = ena_up(adapter);
2768	if (rc)
2769		return rc;
2770
2771	return rc;
2772}
2773
2774/* ena_close - Disables a network interface
2775 * @netdev: network interface device structure
2776 *
2777 * Returns 0, this is not allowed to fail
2778 *
2779 * The close entry point is called when an interface is de-activated
2780 * by the OS.  The hardware is still under the drivers control, but
2781 * needs to be disabled.  A global MAC reset is issued to stop the
2782 * hardware, and all transmit and receive resources are freed.
2783 */
2784static int ena_close(struct net_device *netdev)
2785{
2786	struct ena_adapter *adapter = netdev_priv(netdev);
2787
2788	netif_dbg(adapter, ifdown, netdev, "%s\n", __func__);
2789
2790	if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
2791		return 0;
2792
2793	if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
2794		ena_down(adapter);
2795
2796	/* Check for device status and issue reset if needed*/
2797	check_for_admin_com_state(adapter);
2798	if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
2799		netif_err(adapter, ifdown, adapter->netdev,
2800			  "Destroy failure, restarting device\n");
2801		ena_dump_stats_to_dmesg(adapter);
2802		/* rtnl lock already obtained in dev_ioctl() layer */
2803		ena_destroy_device(adapter, false);
2804		ena_restore_device(adapter);
2805	}
2806
2807	return 0;
2808}
2809
2810int ena_update_queue_sizes(struct ena_adapter *adapter,
2811			   u32 new_tx_size,
2812			   u32 new_rx_size)
 
2813{
2814	bool dev_was_up;
 
2815
2816	dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2817	ena_close(adapter->netdev);
2818	adapter->requested_tx_ring_size = new_tx_size;
2819	adapter->requested_rx_ring_size = new_rx_size;
2820	ena_init_io_rings(adapter,
2821			  0,
2822			  adapter->xdp_num_queues +
2823			  adapter->num_io_queues);
2824	return dev_was_up ? ena_up(adapter) : 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2825}
2826
2827int ena_set_rx_copybreak(struct ena_adapter *adapter, u32 rx_copybreak)
2828{
2829	struct ena_ring *rx_ring;
2830	int i;
2831
2832	if (rx_copybreak > min_t(u16, adapter->netdev->mtu, ENA_PAGE_SIZE))
2833		return -EINVAL;
2834
2835	adapter->rx_copybreak = rx_copybreak;
2836
2837	for (i = 0; i < adapter->num_io_queues; i++) {
2838		rx_ring = &adapter->rx_ring[i];
2839		rx_ring->rx_copybreak = rx_copybreak;
2840	}
2841
2842	return 0;
2843}
2844
2845int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count)
2846{
2847	struct ena_com_dev *ena_dev = adapter->ena_dev;
2848	int prev_channel_count;
2849	bool dev_was_up;
2850
2851	dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2852	ena_close(adapter->netdev);
2853	prev_channel_count = adapter->num_io_queues;
2854	adapter->num_io_queues = new_channel_count;
2855	if (ena_xdp_present(adapter) &&
2856	    ena_xdp_allowed(adapter) == ENA_XDP_ALLOWED) {
2857		adapter->xdp_first_ring = new_channel_count;
2858		adapter->xdp_num_queues = new_channel_count;
2859		if (prev_channel_count > new_channel_count)
2860			ena_xdp_exchange_program_rx_in_range(adapter,
2861							     NULL,
2862							     new_channel_count,
2863							     prev_channel_count);
2864		else
2865			ena_xdp_exchange_program_rx_in_range(adapter,
2866							     adapter->xdp_bpf_prog,
2867							     prev_channel_count,
2868							     new_channel_count);
2869	}
2870
2871	/* We need to destroy the rss table so that the indirection
2872	 * table will be reinitialized by ena_up()
2873	 */
2874	ena_com_rss_destroy(ena_dev);
2875	ena_init_io_rings(adapter,
2876			  0,
2877			  adapter->xdp_num_queues +
2878			  adapter->num_io_queues);
2879	return dev_was_up ? ena_open(adapter->netdev) : 0;
2880}
2881
2882static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx,
2883			struct sk_buff *skb,
2884			bool disable_meta_caching)
2885{
2886	u32 mss = skb_shinfo(skb)->gso_size;
2887	struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta;
2888	u8 l4_protocol = 0;
2889
2890	if ((skb->ip_summed == CHECKSUM_PARTIAL) || mss) {
2891		ena_tx_ctx->l4_csum_enable = 1;
2892		if (mss) {
2893			ena_tx_ctx->tso_enable = 1;
2894			ena_meta->l4_hdr_len = tcp_hdr(skb)->doff;
2895			ena_tx_ctx->l4_csum_partial = 0;
2896		} else {
2897			ena_tx_ctx->tso_enable = 0;
2898			ena_meta->l4_hdr_len = 0;
2899			ena_tx_ctx->l4_csum_partial = 1;
2900		}
2901
2902		switch (ip_hdr(skb)->version) {
2903		case IPVERSION:
2904			ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4;
2905			if (ip_hdr(skb)->frag_off & htons(IP_DF))
2906				ena_tx_ctx->df = 1;
2907			if (mss)
2908				ena_tx_ctx->l3_csum_enable = 1;
2909			l4_protocol = ip_hdr(skb)->protocol;
2910			break;
2911		case 6:
2912			ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6;
2913			l4_protocol = ipv6_hdr(skb)->nexthdr;
2914			break;
2915		default:
2916			break;
2917		}
2918
2919		if (l4_protocol == IPPROTO_TCP)
2920			ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
2921		else
2922			ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
2923
2924		ena_meta->mss = mss;
2925		ena_meta->l3_hdr_len = skb_network_header_len(skb);
2926		ena_meta->l3_hdr_offset = skb_network_offset(skb);
2927		ena_tx_ctx->meta_valid = 1;
2928	} else if (disable_meta_caching) {
2929		memset(ena_meta, 0, sizeof(*ena_meta));
2930		ena_tx_ctx->meta_valid = 1;
2931	} else {
2932		ena_tx_ctx->meta_valid = 0;
2933	}
2934}
2935
2936static int ena_check_and_linearize_skb(struct ena_ring *tx_ring,
2937				       struct sk_buff *skb)
2938{
2939	int num_frags, header_len, rc;
2940
2941	num_frags = skb_shinfo(skb)->nr_frags;
2942	header_len = skb_headlen(skb);
2943
2944	if (num_frags < tx_ring->sgl_size)
2945		return 0;
2946
2947	if ((num_frags == tx_ring->sgl_size) &&
2948	    (header_len < tx_ring->tx_max_header_size))
2949		return 0;
2950
2951	ena_increase_stat(&tx_ring->tx_stats.linearize, 1, &tx_ring->syncp);
2952
2953	rc = skb_linearize(skb);
2954	if (unlikely(rc)) {
2955		ena_increase_stat(&tx_ring->tx_stats.linearize_failed, 1,
2956				  &tx_ring->syncp);
2957	}
2958
2959	return rc;
2960}
2961
2962static int ena_tx_map_skb(struct ena_ring *tx_ring,
2963			  struct ena_tx_buffer *tx_info,
2964			  struct sk_buff *skb,
2965			  void **push_hdr,
2966			  u16 *header_len)
2967{
2968	struct ena_adapter *adapter = tx_ring->adapter;
2969	struct ena_com_buf *ena_buf;
2970	dma_addr_t dma;
2971	u32 skb_head_len, frag_len, last_frag;
2972	u16 push_len = 0;
2973	u16 delta = 0;
2974	int i = 0;
2975
2976	skb_head_len = skb_headlen(skb);
2977	tx_info->skb = skb;
2978	ena_buf = tx_info->bufs;
2979
2980	if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
2981		/* When the device is LLQ mode, the driver will copy
2982		 * the header into the device memory space.
2983		 * the ena_com layer assume the header is in a linear
2984		 * memory space.
2985		 * This assumption might be wrong since part of the header
2986		 * can be in the fragmented buffers.
2987		 * Use skb_header_pointer to make sure the header is in a
2988		 * linear memory space.
2989		 */
2990
2991		push_len = min_t(u32, skb->len, tx_ring->tx_max_header_size);
2992		*push_hdr = skb_header_pointer(skb, 0, push_len,
2993					       tx_ring->push_buf_intermediate_buf);
2994		*header_len = push_len;
2995		if (unlikely(skb->data != *push_hdr)) {
2996			ena_increase_stat(&tx_ring->tx_stats.llq_buffer_copy, 1,
2997					  &tx_ring->syncp);
2998
2999			delta = push_len - skb_head_len;
3000		}
3001	} else {
3002		*push_hdr = NULL;
3003		*header_len = min_t(u32, skb_head_len,
3004				    tx_ring->tx_max_header_size);
3005	}
3006
3007	netif_dbg(adapter, tx_queued, adapter->netdev,
3008		  "skb: %p header_buf->vaddr: %p push_len: %d\n", skb,
3009		  *push_hdr, push_len);
3010
3011	if (skb_head_len > push_len) {
3012		dma = dma_map_single(tx_ring->dev, skb->data + push_len,
3013				     skb_head_len - push_len, DMA_TO_DEVICE);
3014		if (unlikely(dma_mapping_error(tx_ring->dev, dma)))
3015			goto error_report_dma_error;
3016
3017		ena_buf->paddr = dma;
3018		ena_buf->len = skb_head_len - push_len;
3019
3020		ena_buf++;
3021		tx_info->num_of_bufs++;
3022		tx_info->map_linear_data = 1;
3023	} else {
3024		tx_info->map_linear_data = 0;
3025	}
3026
3027	last_frag = skb_shinfo(skb)->nr_frags;
3028
3029	for (i = 0; i < last_frag; i++) {
3030		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
3031
3032		frag_len = skb_frag_size(frag);
3033
3034		if (unlikely(delta >= frag_len)) {
3035			delta -= frag_len;
3036			continue;
3037		}
3038
3039		dma = skb_frag_dma_map(tx_ring->dev, frag, delta,
3040				       frag_len - delta, DMA_TO_DEVICE);
3041		if (unlikely(dma_mapping_error(tx_ring->dev, dma)))
3042			goto error_report_dma_error;
3043
3044		ena_buf->paddr = dma;
3045		ena_buf->len = frag_len - delta;
3046		ena_buf++;
3047		tx_info->num_of_bufs++;
3048		delta = 0;
3049	}
3050
3051	return 0;
3052
3053error_report_dma_error:
3054	ena_increase_stat(&tx_ring->tx_stats.dma_mapping_err, 1,
3055			  &tx_ring->syncp);
3056	netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map skb\n");
3057
3058	tx_info->skb = NULL;
3059
3060	tx_info->num_of_bufs += i;
3061	ena_unmap_tx_buff(tx_ring, tx_info);
3062
3063	return -EINVAL;
3064}
3065
3066/* Called with netif_tx_lock. */
3067static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
3068{
3069	struct ena_adapter *adapter = netdev_priv(dev);
3070	struct ena_tx_buffer *tx_info;
3071	struct ena_com_tx_ctx ena_tx_ctx;
3072	struct ena_ring *tx_ring;
3073	struct netdev_queue *txq;
3074	void *push_hdr;
3075	u16 next_to_use, req_id, header_len;
3076	int qid, rc;
3077
3078	netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb);
3079	/*  Determine which tx ring we will be placed on */
3080	qid = skb_get_queue_mapping(skb);
3081	tx_ring = &adapter->tx_ring[qid];
3082	txq = netdev_get_tx_queue(dev, qid);
3083
3084	rc = ena_check_and_linearize_skb(tx_ring, skb);
3085	if (unlikely(rc))
3086		goto error_drop_packet;
3087
3088	skb_tx_timestamp(skb);
3089
3090	next_to_use = tx_ring->next_to_use;
3091	req_id = tx_ring->free_ids[next_to_use];
3092	tx_info = &tx_ring->tx_buffer_info[req_id];
3093	tx_info->num_of_bufs = 0;
3094
3095	WARN(tx_info->skb, "SKB isn't NULL req_id %d\n", req_id);
3096
3097	rc = ena_tx_map_skb(tx_ring, tx_info, skb, &push_hdr, &header_len);
3098	if (unlikely(rc))
3099		goto error_drop_packet;
3100
3101	memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx));
3102	ena_tx_ctx.ena_bufs = tx_info->bufs;
3103	ena_tx_ctx.push_header = push_hdr;
3104	ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
3105	ena_tx_ctx.req_id = req_id;
3106	ena_tx_ctx.header_len = header_len;
3107
3108	/* set flags and meta data */
3109	ena_tx_csum(&ena_tx_ctx, skb, tx_ring->disable_meta_caching);
3110
3111	rc = ena_xmit_common(dev,
3112			     tx_ring,
3113			     tx_info,
3114			     &ena_tx_ctx,
3115			     next_to_use,
3116			     skb->len);
3117	if (rc)
3118		goto error_unmap_dma;
3119
3120	netdev_tx_sent_queue(txq, skb->len);
3121
3122	/* stop the queue when no more space available, the packet can have up
3123	 * to sgl_size + 2. one for the meta descriptor and one for header
3124	 * (if the header is larger than tx_max_header_size).
3125	 */
3126	if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
3127						   tx_ring->sgl_size + 2))) {
3128		netif_dbg(adapter, tx_queued, dev, "%s stop queue %d\n",
3129			  __func__, qid);
3130
3131		netif_tx_stop_queue(txq);
3132		ena_increase_stat(&tx_ring->tx_stats.queue_stop, 1,
3133				  &tx_ring->syncp);
3134
3135		/* There is a rare condition where this function decide to
3136		 * stop the queue but meanwhile clean_tx_irq updates
3137		 * next_to_completion and terminates.
3138		 * The queue will remain stopped forever.
3139		 * To solve this issue add a mb() to make sure that
3140		 * netif_tx_stop_queue() write is vissible before checking if
3141		 * there is additional space in the queue.
3142		 */
3143		smp_mb();
3144
3145		if (ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
3146						 ENA_TX_WAKEUP_THRESH)) {
3147			netif_tx_wake_queue(txq);
3148			ena_increase_stat(&tx_ring->tx_stats.queue_wakeup, 1,
3149					  &tx_ring->syncp);
3150		}
3151	}
3152
 
 
3153	if (netif_xmit_stopped(txq) || !netdev_xmit_more())
3154		/* trigger the dma engine. ena_ring_tx_doorbell()
3155		 * calls a memory barrier inside it.
3156		 */
3157		ena_ring_tx_doorbell(tx_ring);
3158
3159	return NETDEV_TX_OK;
3160
3161error_unmap_dma:
3162	ena_unmap_tx_buff(tx_ring, tx_info);
3163	tx_info->skb = NULL;
3164
3165error_drop_packet:
3166	dev_kfree_skb(skb);
3167	return NETDEV_TX_OK;
3168}
3169
3170static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb,
3171			    struct net_device *sb_dev)
3172{
3173	u16 qid;
3174	/* we suspect that this is good for in--kernel network services that
3175	 * want to loop incoming skb rx to tx in normal user generated traffic,
3176	 * most probably we will not get to this
3177	 */
3178	if (skb_rx_queue_recorded(skb))
3179		qid = skb_get_rx_queue(skb);
3180	else
3181		qid = netdev_pick_tx(dev, skb, NULL);
3182
3183	return qid;
3184}
3185
3186static void ena_config_host_info(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
3187{
3188	struct device *dev = &pdev->dev;
3189	struct ena_admin_host_info *host_info;
 
3190	int rc;
3191
3192	/* Allocate only the host info */
3193	rc = ena_com_allocate_host_info(ena_dev);
3194	if (rc) {
3195		dev_err(dev, "Cannot allocate host info\n");
3196		return;
3197	}
3198
3199	host_info = ena_dev->host_attr.host_info;
3200
3201	host_info->bdf = (pdev->bus->number << 8) | pdev->devfn;
3202	host_info->os_type = ENA_ADMIN_OS_LINUX;
3203	host_info->kernel_ver = LINUX_VERSION_CODE;
3204	strscpy(host_info->kernel_ver_str, utsname()->version,
3205		sizeof(host_info->kernel_ver_str) - 1);
 
 
 
 
3206	host_info->os_dist = 0;
3207	strncpy(host_info->os_dist_str, utsname()->release,
3208		sizeof(host_info->os_dist_str) - 1);
 
 
 
 
3209	host_info->driver_version =
3210		(DRV_MODULE_GEN_MAJOR) |
3211		(DRV_MODULE_GEN_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
3212		(DRV_MODULE_GEN_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT) |
3213		("K"[0] << ENA_ADMIN_HOST_INFO_MODULE_TYPE_SHIFT);
3214	host_info->num_cpus = num_online_cpus();
3215
3216	host_info->driver_supported_features =
3217		ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK |
3218		ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_MASK |
3219		ENA_ADMIN_HOST_INFO_RX_BUF_MIRRORING_MASK |
3220		ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK;
 
3221
3222	rc = ena_com_set_host_attributes(ena_dev);
3223	if (rc) {
3224		if (rc == -EOPNOTSUPP)
3225			dev_warn(dev, "Cannot set host attributes\n");
3226		else
3227			dev_err(dev, "Cannot set host attributes\n");
3228
3229		goto err;
3230	}
3231
3232	return;
3233
3234err:
3235	ena_com_delete_host_info(ena_dev);
3236}
3237
3238static void ena_config_debug_area(struct ena_adapter *adapter)
3239{
3240	u32 debug_area_size;
3241	int rc, ss_count;
3242
3243	ss_count = ena_get_sset_count(adapter->netdev, ETH_SS_STATS);
3244	if (ss_count <= 0) {
3245		netif_err(adapter, drv, adapter->netdev,
3246			  "SS count is negative\n");
3247		return;
3248	}
3249
3250	/* allocate 32 bytes for each string and 64bit for the value */
3251	debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count;
3252
3253	rc = ena_com_allocate_debug_area(adapter->ena_dev, debug_area_size);
3254	if (rc) {
3255		netif_err(adapter, drv, adapter->netdev,
3256			  "Cannot allocate debug area\n");
3257		return;
3258	}
3259
3260	rc = ena_com_set_host_attributes(adapter->ena_dev);
3261	if (rc) {
3262		if (rc == -EOPNOTSUPP)
3263			netif_warn(adapter, drv, adapter->netdev,
3264				   "Cannot set host attributes\n");
3265		else
3266			netif_err(adapter, drv, adapter->netdev,
3267				  "Cannot set host attributes\n");
3268		goto err;
3269	}
3270
3271	return;
3272err:
3273	ena_com_delete_debug_area(adapter->ena_dev);
3274}
3275
3276int ena_update_hw_stats(struct ena_adapter *adapter)
3277{
3278	int rc;
3279
3280	rc = ena_com_get_eni_stats(adapter->ena_dev, &adapter->eni_stats);
3281	if (rc) {
3282		netdev_err(adapter->netdev, "Failed to get ENI stats\n");
3283		return rc;
3284	}
3285
3286	return 0;
3287}
3288
3289static void ena_get_stats64(struct net_device *netdev,
3290			    struct rtnl_link_stats64 *stats)
3291{
3292	struct ena_adapter *adapter = netdev_priv(netdev);
3293	struct ena_ring *rx_ring, *tx_ring;
 
3294	unsigned int start;
3295	u64 rx_drops;
3296	u64 tx_drops;
3297	int i;
3298
3299	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
3300		return;
3301
3302	for (i = 0; i < adapter->num_io_queues; i++) {
3303		u64 bytes, packets;
3304
3305		tx_ring = &adapter->tx_ring[i];
3306
3307		do {
3308			start = u64_stats_fetch_begin(&tx_ring->syncp);
3309			packets = tx_ring->tx_stats.cnt;
3310			bytes = tx_ring->tx_stats.bytes;
3311		} while (u64_stats_fetch_retry(&tx_ring->syncp, start));
3312
3313		stats->tx_packets += packets;
3314		stats->tx_bytes += bytes;
3315
 
 
 
 
3316		rx_ring = &adapter->rx_ring[i];
3317
3318		do {
3319			start = u64_stats_fetch_begin(&rx_ring->syncp);
3320			packets = rx_ring->rx_stats.cnt;
3321			bytes = rx_ring->rx_stats.bytes;
 
3322		} while (u64_stats_fetch_retry(&rx_ring->syncp, start));
3323
3324		stats->rx_packets += packets;
3325		stats->rx_bytes += bytes;
 
3326	}
3327
3328	do {
3329		start = u64_stats_fetch_begin(&adapter->syncp);
3330		rx_drops = adapter->dev_stats.rx_drops;
3331		tx_drops = adapter->dev_stats.tx_drops;
3332	} while (u64_stats_fetch_retry(&adapter->syncp, start));
3333
3334	stats->rx_dropped = rx_drops;
3335	stats->tx_dropped = tx_drops;
3336
3337	stats->multicast = 0;
3338	stats->collisions = 0;
3339
3340	stats->rx_length_errors = 0;
3341	stats->rx_crc_errors = 0;
3342	stats->rx_frame_errors = 0;
3343	stats->rx_fifo_errors = 0;
3344	stats->rx_missed_errors = 0;
3345	stats->tx_window_errors = 0;
3346
3347	stats->rx_errors = 0;
3348	stats->tx_errors = 0;
3349}
3350
3351static const struct net_device_ops ena_netdev_ops = {
3352	.ndo_open		= ena_open,
3353	.ndo_stop		= ena_close,
3354	.ndo_start_xmit		= ena_start_xmit,
3355	.ndo_select_queue	= ena_select_queue,
3356	.ndo_get_stats64	= ena_get_stats64,
3357	.ndo_tx_timeout		= ena_tx_timeout,
3358	.ndo_change_mtu		= ena_change_mtu,
3359	.ndo_set_mac_address	= NULL,
3360	.ndo_validate_addr	= eth_validate_addr,
3361	.ndo_bpf		= ena_xdp,
3362	.ndo_xdp_xmit		= ena_xdp_xmit,
3363};
3364
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3365static int ena_device_validate_params(struct ena_adapter *adapter,
3366				      struct ena_com_dev_get_features_ctx *get_feat_ctx)
3367{
3368	struct net_device *netdev = adapter->netdev;
3369	int rc;
3370
3371	rc = ether_addr_equal(get_feat_ctx->dev_attr.mac_addr,
3372			      adapter->mac_addr);
3373	if (!rc) {
3374		netif_err(adapter, drv, netdev,
3375			  "Error, mac address are different\n");
3376		return -EINVAL;
3377	}
3378
3379	if (get_feat_ctx->dev_attr.max_mtu < netdev->mtu) {
3380		netif_err(adapter, drv, netdev,
3381			  "Error, device max mtu is smaller than netdev MTU\n");
3382		return -EINVAL;
3383	}
3384
3385	return 0;
3386}
3387
3388static void set_default_llq_configurations(struct ena_llq_configurations *llq_config)
 
 
3389{
 
 
3390	llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER;
3391	llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY;
3392	llq_config->llq_num_decs_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2;
3393	llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_128B;
3394	llq_config->llq_ring_entry_size_value = 128;
 
 
 
 
 
 
 
 
 
 
 
 
 
3395}
3396
3397static int ena_set_queues_placement_policy(struct pci_dev *pdev,
3398					   struct ena_com_dev *ena_dev,
3399					   struct ena_admin_feature_llq_desc *llq,
3400					   struct ena_llq_configurations *llq_default_configurations)
3401{
3402	int rc;
3403	u32 llq_feature_mask;
3404
3405	llq_feature_mask = 1 << ENA_ADMIN_LLQ;
3406	if (!(ena_dev->supported_features & llq_feature_mask)) {
3407		dev_warn(&pdev->dev,
3408			"LLQ is not supported Fallback to host mode policy.\n");
3409		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3410		return 0;
3411	}
3412
 
 
 
 
 
 
 
3413	rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations);
3414	if (unlikely(rc)) {
3415		dev_err(&pdev->dev,
3416			"Failed to configure the device mode.  Fallback to host mode policy.\n");
3417		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3418	}
3419
3420	return 0;
3421}
3422
3423static int ena_map_llq_mem_bar(struct pci_dev *pdev, struct ena_com_dev *ena_dev,
3424			       int bars)
3425{
3426	bool has_mem_bar = !!(bars & BIT(ENA_MEM_BAR));
3427
3428	if (!has_mem_bar) {
3429		if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
3430			dev_err(&pdev->dev,
3431				"ENA device does not expose LLQ bar. Fallback to host mode policy.\n");
3432			ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3433		}
3434
3435		return 0;
3436	}
3437
3438	ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev,
3439					   pci_resource_start(pdev, ENA_MEM_BAR),
3440					   pci_resource_len(pdev, ENA_MEM_BAR));
3441
3442	if (!ena_dev->mem_bar)
3443		return -EFAULT;
3444
3445	return 0;
3446}
3447
3448static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev,
3449			   struct ena_com_dev_get_features_ctx *get_feat_ctx,
3450			   bool *wd_state)
3451{
 
 
3452	struct ena_llq_configurations llq_config;
3453	struct device *dev = &pdev->dev;
3454	bool readless_supported;
3455	u32 aenq_groups;
3456	int dma_width;
3457	int rc;
3458
3459	rc = ena_com_mmio_reg_read_request_init(ena_dev);
3460	if (rc) {
3461		dev_err(dev, "Failed to init mmio read less\n");
3462		return rc;
3463	}
3464
3465	/* The PCIe configuration space revision id indicate if mmio reg
3466	 * read is disabled
3467	 */
3468	readless_supported = !(pdev->revision & ENA_MMIO_DISABLE_REG_READ);
3469	ena_com_set_mmio_read_mode(ena_dev, readless_supported);
3470
3471	rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL);
3472	if (rc) {
3473		dev_err(dev, "Can not reset device\n");
3474		goto err_mmio_read_less;
3475	}
3476
3477	rc = ena_com_validate_version(ena_dev);
3478	if (rc) {
3479		dev_err(dev, "Device version is too low\n");
3480		goto err_mmio_read_less;
3481	}
3482
3483	dma_width = ena_com_get_dma_width(ena_dev);
3484	if (dma_width < 0) {
3485		dev_err(dev, "Invalid dma width value %d", dma_width);
3486		rc = dma_width;
3487		goto err_mmio_read_less;
3488	}
3489
3490	rc = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(dma_width));
3491	if (rc) {
3492		dev_err(dev, "dma_set_mask_and_coherent failed %d\n", rc);
3493		goto err_mmio_read_less;
3494	}
3495
3496	/* ENA admin level init */
3497	rc = ena_com_admin_init(ena_dev, &aenq_handlers);
3498	if (rc) {
3499		dev_err(dev,
3500			"Can not initialize ena admin queue with device\n");
3501		goto err_mmio_read_less;
3502	}
3503
3504	/* To enable the msix interrupts the driver needs to know the number
3505	 * of queues. So the driver uses polling mode to retrieve this
3506	 * information
3507	 */
3508	ena_com_set_admin_polling_mode(ena_dev, true);
3509
3510	ena_config_host_info(ena_dev, pdev);
3511
3512	/* Get Device Attributes*/
3513	rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
3514	if (rc) {
3515		dev_err(dev, "Cannot get attribute for ena device rc=%d\n", rc);
3516		goto err_admin_init;
3517	}
3518
3519	/* Try to turn all the available aenq groups */
3520	aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) |
3521		BIT(ENA_ADMIN_FATAL_ERROR) |
3522		BIT(ENA_ADMIN_WARNING) |
3523		BIT(ENA_ADMIN_NOTIFICATION) |
3524		BIT(ENA_ADMIN_KEEP_ALIVE);
3525
3526	aenq_groups &= get_feat_ctx->aenq.supported_groups;
3527
3528	rc = ena_com_set_aenq_config(ena_dev, aenq_groups);
3529	if (rc) {
3530		dev_err(dev, "Cannot configure aenq groups rc= %d\n", rc);
3531		goto err_admin_init;
3532	}
3533
3534	*wd_state = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE));
3535
3536	set_default_llq_configurations(&llq_config);
3537
3538	rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx->llq,
3539					     &llq_config);
3540	if (rc) {
3541		dev_err(dev, "ENA device init failed\n");
3542		goto err_admin_init;
3543	}
3544
 
 
 
 
3545	return 0;
3546
3547err_admin_init:
 
 
3548	ena_com_delete_host_info(ena_dev);
3549	ena_com_admin_destroy(ena_dev);
3550err_mmio_read_less:
3551	ena_com_mmio_reg_read_request_destroy(ena_dev);
3552
3553	return rc;
3554}
3555
3556static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter)
3557{
3558	struct ena_com_dev *ena_dev = adapter->ena_dev;
3559	struct device *dev = &adapter->pdev->dev;
3560	int rc;
3561
3562	rc = ena_enable_msix(adapter);
3563	if (rc) {
3564		dev_err(dev, "Can not reserve msix vectors\n");
3565		return rc;
3566	}
3567
3568	ena_setup_mgmnt_intr(adapter);
3569
3570	rc = ena_request_mgmnt_irq(adapter);
3571	if (rc) {
3572		dev_err(dev, "Can not setup management interrupts\n");
3573		goto err_disable_msix;
3574	}
3575
3576	ena_com_set_admin_polling_mode(ena_dev, false);
3577
3578	ena_com_admin_aenq_enable(ena_dev);
3579
3580	return 0;
3581
3582err_disable_msix:
3583	ena_disable_msix(adapter);
3584
3585	return rc;
3586}
3587
3588static void ena_destroy_device(struct ena_adapter *adapter, bool graceful)
3589{
3590	struct net_device *netdev = adapter->netdev;
3591	struct ena_com_dev *ena_dev = adapter->ena_dev;
3592	bool dev_up;
 
3593
3594	if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
3595		return;
3596
3597	netif_carrier_off(netdev);
3598
3599	del_timer_sync(&adapter->timer_service);
3600
3601	dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
3602	adapter->dev_up_before_reset = dev_up;
3603	if (!graceful)
3604		ena_com_set_admin_running_state(ena_dev, false);
3605
3606	if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
3607		ena_down(adapter);
3608
3609	/* Stop the device from sending AENQ events (in case reset flag is set
3610	 *  and device is up, ena_down() already reset the device.
3611	 */
3612	if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up))
3613		ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
3614
3615	ena_free_mgmnt_irq(adapter);
3616
3617	ena_disable_msix(adapter);
3618
3619	ena_com_abort_admin_commands(ena_dev);
3620
3621	ena_com_wait_for_abort_completion(ena_dev);
3622
3623	ena_com_admin_destroy(ena_dev);
3624
3625	ena_com_mmio_reg_read_request_destroy(ena_dev);
3626
3627	/* return reset reason to default value */
3628	adapter->reset_reason = ENA_REGS_RESET_NORMAL;
3629
3630	clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
3631	clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
 
 
3632}
3633
3634static int ena_restore_device(struct ena_adapter *adapter)
3635{
3636	struct ena_com_dev_get_features_ctx get_feat_ctx;
3637	struct ena_com_dev *ena_dev = adapter->ena_dev;
3638	struct pci_dev *pdev = adapter->pdev;
 
 
3639	bool wd_state;
3640	int rc;
3641
3642	set_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
3643	rc = ena_device_init(ena_dev, adapter->pdev, &get_feat_ctx, &wd_state);
3644	if (rc) {
3645		dev_err(&pdev->dev, "Can not initialize device\n");
3646		goto err;
3647	}
3648	adapter->wd_state = wd_state;
3649
 
 
 
 
 
 
 
3650	rc = ena_device_validate_params(adapter, &get_feat_ctx);
3651	if (rc) {
3652		dev_err(&pdev->dev, "Validation of device parameters failed\n");
3653		goto err_device_destroy;
3654	}
3655
3656	rc = ena_enable_msix_and_set_admin_interrupts(adapter);
3657	if (rc) {
3658		dev_err(&pdev->dev, "Enable MSI-X failed\n");
3659		goto err_device_destroy;
3660	}
3661	/* If the interface was up before the reset bring it up */
3662	if (adapter->dev_up_before_reset) {
3663		rc = ena_up(adapter);
3664		if (rc) {
3665			dev_err(&pdev->dev, "Failed to create I/O queues\n");
3666			goto err_disable_msix;
3667		}
3668	}
3669
3670	set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
3671
3672	clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
3673	if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags))
3674		netif_carrier_on(adapter->netdev);
3675
3676	mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
3677	adapter->last_keep_alive_jiffies = jiffies;
3678
3679	return rc;
3680err_disable_msix:
3681	ena_free_mgmnt_irq(adapter);
3682	ena_disable_msix(adapter);
3683err_device_destroy:
3684	ena_com_abort_admin_commands(ena_dev);
3685	ena_com_wait_for_abort_completion(ena_dev);
3686	ena_com_admin_destroy(ena_dev);
3687	ena_com_dev_reset(ena_dev, ENA_REGS_RESET_DRIVER_INVALID_STATE);
3688	ena_com_mmio_reg_read_request_destroy(ena_dev);
3689err:
3690	clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
3691	clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
3692	dev_err(&pdev->dev,
3693		"Reset attempt failed. Can not reset the device\n");
3694
3695	return rc;
3696}
3697
3698static void ena_fw_reset_device(struct work_struct *work)
3699{
 
 
3700	struct ena_adapter *adapter =
3701		container_of(work, struct ena_adapter, reset_task);
3702
3703	rtnl_lock();
3704
3705	if (likely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
3706		ena_destroy_device(adapter, false);
3707		ena_restore_device(adapter);
 
3708
3709		dev_err(&adapter->pdev->dev, "Device reset completed successfully\n");
3710	}
3711
3712	rtnl_unlock();
3713}
3714
3715static int check_for_rx_interrupt_queue(struct ena_adapter *adapter,
3716					struct ena_ring *rx_ring)
3717{
3718	struct ena_napi *ena_napi = container_of(rx_ring->napi, struct ena_napi, napi);
3719
3720	if (likely(READ_ONCE(ena_napi->first_interrupt)))
3721		return 0;
3722
3723	if (ena_com_cq_empty(rx_ring->ena_com_io_cq))
3724		return 0;
3725
3726	rx_ring->no_interrupt_event_cnt++;
3727
3728	if (rx_ring->no_interrupt_event_cnt == ENA_MAX_NO_INTERRUPT_ITERATIONS) {
3729		netif_err(adapter, rx_err, adapter->netdev,
3730			  "Potential MSIX issue on Rx side Queue = %d. Reset the device\n",
3731			  rx_ring->qid);
3732
3733		ena_reset_device(adapter, ENA_REGS_RESET_MISS_INTERRUPT);
3734		return -EIO;
3735	}
3736
3737	return 0;
3738}
3739
3740static int check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
3741					  struct ena_ring *tx_ring)
3742{
3743	struct ena_napi *ena_napi = container_of(tx_ring->napi, struct ena_napi, napi);
 
3744	unsigned int time_since_last_napi;
3745	unsigned int missing_tx_comp_to;
3746	bool is_tx_comp_time_expired;
3747	struct ena_tx_buffer *tx_buf;
3748	unsigned long last_jiffies;
 
3749	u32 missed_tx = 0;
3750	int i, rc = 0;
3751
 
 
3752	for (i = 0; i < tx_ring->ring_size; i++) {
3753		tx_buf = &tx_ring->tx_buffer_info[i];
3754		last_jiffies = tx_buf->last_jiffies;
3755
3756		if (last_jiffies == 0)
3757			/* no pending Tx at this location */
3758			continue;
3759
3760		is_tx_comp_time_expired = time_is_before_jiffies(last_jiffies +
3761			 2 * adapter->missing_tx_completion_to);
3762
3763		if (unlikely(!READ_ONCE(ena_napi->first_interrupt) && is_tx_comp_time_expired)) {
3764			/* If after graceful period interrupt is still not
3765			 * received, we schedule a reset
3766			 */
3767			netif_err(adapter, tx_err, adapter->netdev,
3768				  "Potential MSIX issue on Tx side Queue = %d. Reset the device\n",
3769				  tx_ring->qid);
3770			ena_reset_device(adapter, ENA_REGS_RESET_MISS_INTERRUPT);
3771			return -EIO;
3772		}
3773
3774		is_tx_comp_time_expired = time_is_before_jiffies(last_jiffies +
3775			adapter->missing_tx_completion_to);
3776
3777		if (unlikely(is_tx_comp_time_expired)) {
3778			if (!tx_buf->print_once) {
3779				time_since_last_napi = jiffies_to_usecs(jiffies - tx_ring->tx_stats.last_napi_jiffies);
3780				missing_tx_comp_to = jiffies_to_msecs(adapter->missing_tx_completion_to);
3781				netif_notice(adapter, tx_err, adapter->netdev,
3782					     "Found a Tx that wasn't completed on time, qid %d, index %d. %u usecs have passed since last napi execution. Missing Tx timeout value %u msecs\n",
3783					     tx_ring->qid, i, time_since_last_napi, missing_tx_comp_to);
 
 
 
 
 
 
 
 
3784			}
3785
 
 
 
 
 
 
 
 
 
3786			tx_buf->print_once = 1;
3787			missed_tx++;
3788		}
3789	}
3790
3791	if (unlikely(missed_tx > adapter->missing_tx_completion_threshold)) {
3792		netif_err(adapter, tx_err, adapter->netdev,
3793			  "The number of lost tx completions is above the threshold (%d > %d). Reset the device\n",
3794			  missed_tx,
3795			  adapter->missing_tx_completion_threshold);
3796		ena_reset_device(adapter, ENA_REGS_RESET_MISS_TX_CMPL);
 
 
 
 
3797		rc = -EIO;
3798	}
3799
3800	ena_increase_stat(&tx_ring->tx_stats.missed_tx, missed_tx,
3801			  &tx_ring->syncp);
3802
3803	return rc;
3804}
3805
3806static void check_for_missing_completions(struct ena_adapter *adapter)
3807{
3808	struct ena_ring *tx_ring;
3809	struct ena_ring *rx_ring;
3810	int i, budget, rc;
3811	int io_queue_count;
3812
3813	io_queue_count = adapter->xdp_num_queues + adapter->num_io_queues;
 
3814	/* Make sure the driver doesn't turn the device in other process */
3815	smp_rmb();
3816
3817	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
3818		return;
3819
3820	if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
3821		return;
3822
3823	if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT)
3824		return;
3825
3826	budget = ENA_MONITORED_TX_QUEUES;
 
 
3827
3828	for (i = adapter->last_monitored_tx_qid; i < io_queue_count; i++) {
3829		tx_ring = &adapter->tx_ring[i];
3830		rx_ring = &adapter->rx_ring[i];
 
 
3831
3832		rc = check_missing_comp_in_tx_queue(adapter, tx_ring);
3833		if (unlikely(rc))
3834			return;
3835
3836		rc =  !ENA_IS_XDP_INDEX(adapter, i) ?
3837			check_for_rx_interrupt_queue(adapter, rx_ring) : 0;
3838		if (unlikely(rc))
3839			return;
3840
3841		budget--;
3842		if (!budget)
3843			break;
3844	}
3845
3846	adapter->last_monitored_tx_qid = i % io_queue_count;
3847}
3848
3849/* trigger napi schedule after 2 consecutive detections */
3850#define EMPTY_RX_REFILL 2
3851/* For the rare case where the device runs out of Rx descriptors and the
3852 * napi handler failed to refill new Rx descriptors (due to a lack of memory
3853 * for example).
3854 * This case will lead to a deadlock:
3855 * The device won't send interrupts since all the new Rx packets will be dropped
3856 * The napi handler won't allocate new Rx descriptors so the device will be
3857 * able to send new packets.
3858 *
3859 * This scenario can happen when the kernel's vm.min_free_kbytes is too small.
3860 * It is recommended to have at least 512MB, with a minimum of 128MB for
3861 * constrained environment).
3862 *
3863 * When such a situation is detected - Reschedule napi
3864 */
3865static void check_for_empty_rx_ring(struct ena_adapter *adapter)
3866{
3867	struct ena_ring *rx_ring;
3868	int i, refill_required;
3869
3870	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
3871		return;
3872
3873	if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
3874		return;
3875
3876	for (i = 0; i < adapter->num_io_queues; i++) {
3877		rx_ring = &adapter->rx_ring[i];
3878
3879		refill_required = ena_com_free_q_entries(rx_ring->ena_com_io_sq);
3880		if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
3881			rx_ring->empty_rx_queue++;
3882
3883			if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) {
3884				ena_increase_stat(&rx_ring->rx_stats.empty_rx_ring, 1,
3885						  &rx_ring->syncp);
3886
3887				netif_err(adapter, drv, adapter->netdev,
3888					  "Trigger refill for ring %d\n", i);
3889
3890				napi_schedule(rx_ring->napi);
3891				rx_ring->empty_rx_queue = 0;
3892			}
3893		} else {
3894			rx_ring->empty_rx_queue = 0;
3895		}
3896	}
3897}
3898
3899/* Check for keep alive expiration */
3900static void check_for_missing_keep_alive(struct ena_adapter *adapter)
3901{
3902	unsigned long keep_alive_expired;
3903
3904	if (!adapter->wd_state)
3905		return;
3906
3907	if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3908		return;
3909
3910	keep_alive_expired = adapter->last_keep_alive_jiffies +
3911			     adapter->keep_alive_timeout;
3912	if (unlikely(time_is_before_jiffies(keep_alive_expired))) {
3913		netif_err(adapter, drv, adapter->netdev,
3914			  "Keep alive watchdog timeout.\n");
3915		ena_increase_stat(&adapter->dev_stats.wd_expired, 1,
3916				  &adapter->syncp);
3917		ena_reset_device(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO);
3918	}
3919}
3920
3921static void check_for_admin_com_state(struct ena_adapter *adapter)
3922{
3923	if (unlikely(!ena_com_get_admin_running_state(adapter->ena_dev))) {
3924		netif_err(adapter, drv, adapter->netdev,
3925			  "ENA admin queue is not in running state!\n");
3926		ena_increase_stat(&adapter->dev_stats.admin_q_pause, 1,
3927				  &adapter->syncp);
3928		ena_reset_device(adapter, ENA_REGS_RESET_ADMIN_TO);
3929	}
3930}
3931
3932static void ena_update_hints(struct ena_adapter *adapter,
3933			     struct ena_admin_ena_hw_hints *hints)
3934{
3935	struct net_device *netdev = adapter->netdev;
3936
3937	if (hints->admin_completion_tx_timeout)
3938		adapter->ena_dev->admin_queue.completion_timeout =
3939			hints->admin_completion_tx_timeout * 1000;
3940
3941	if (hints->mmio_read_timeout)
3942		/* convert to usec */
3943		adapter->ena_dev->mmio_read.reg_read_to =
3944			hints->mmio_read_timeout * 1000;
3945
3946	if (hints->missed_tx_completion_count_threshold_to_reset)
3947		adapter->missing_tx_completion_threshold =
3948			hints->missed_tx_completion_count_threshold_to_reset;
3949
3950	if (hints->missing_tx_completion_timeout) {
3951		if (hints->missing_tx_completion_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3952			adapter->missing_tx_completion_to = ENA_HW_HINTS_NO_TIMEOUT;
3953		else
3954			adapter->missing_tx_completion_to =
3955				msecs_to_jiffies(hints->missing_tx_completion_timeout);
3956	}
3957
3958	if (hints->netdev_wd_timeout)
3959		netdev->watchdog_timeo = msecs_to_jiffies(hints->netdev_wd_timeout);
3960
3961	if (hints->driver_watchdog_timeout) {
3962		if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3963			adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT;
3964		else
3965			adapter->keep_alive_timeout =
3966				msecs_to_jiffies(hints->driver_watchdog_timeout);
3967	}
3968}
3969
3970static void ena_update_host_info(struct ena_admin_host_info *host_info,
3971				 struct net_device *netdev)
3972{
3973	host_info->supported_network_features[0] =
3974		netdev->features & GENMASK_ULL(31, 0);
3975	host_info->supported_network_features[1] =
3976		(netdev->features & GENMASK_ULL(63, 32)) >> 32;
3977}
3978
3979static void ena_timer_service(struct timer_list *t)
3980{
3981	struct ena_adapter *adapter = from_timer(adapter, t, timer_service);
3982	u8 *debug_area = adapter->ena_dev->host_attr.debug_area_virt_addr;
3983	struct ena_admin_host_info *host_info =
3984		adapter->ena_dev->host_attr.host_info;
3985
3986	check_for_missing_keep_alive(adapter);
3987
3988	check_for_admin_com_state(adapter);
3989
3990	check_for_missing_completions(adapter);
3991
3992	check_for_empty_rx_ring(adapter);
3993
3994	if (debug_area)
3995		ena_dump_stats_to_buf(adapter, debug_area);
3996
3997	if (host_info)
3998		ena_update_host_info(host_info, adapter->netdev);
3999
4000	if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
4001		netif_err(adapter, drv, adapter->netdev,
4002			  "Trigger reset is on\n");
4003		ena_dump_stats_to_dmesg(adapter);
4004		queue_work(ena_wq, &adapter->reset_task);
4005		return;
4006	}
4007
4008	/* Reset the timer */
4009	mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
4010}
4011
4012static u32 ena_calc_max_io_queue_num(struct pci_dev *pdev,
4013				     struct ena_com_dev *ena_dev,
4014				     struct ena_com_dev_get_features_ctx *get_feat_ctx)
4015{
4016	u32 io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues;
4017
4018	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
4019		struct ena_admin_queue_ext_feature_fields *max_queue_ext =
4020			&get_feat_ctx->max_queue_ext.max_queue_ext;
4021		io_rx_num = min_t(u32, max_queue_ext->max_rx_sq_num,
4022				  max_queue_ext->max_rx_cq_num);
4023
4024		io_tx_sq_num = max_queue_ext->max_tx_sq_num;
4025		io_tx_cq_num = max_queue_ext->max_tx_cq_num;
4026	} else {
4027		struct ena_admin_queue_feature_desc *max_queues =
4028			&get_feat_ctx->max_queues;
4029		io_tx_sq_num = max_queues->max_sq_num;
4030		io_tx_cq_num = max_queues->max_cq_num;
4031		io_rx_num = min_t(u32, io_tx_sq_num, io_tx_cq_num);
4032	}
4033
4034	/* In case of LLQ use the llq fields for the tx SQ/CQ */
4035	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
4036		io_tx_sq_num = get_feat_ctx->llq.max_llq_num;
4037
4038	max_num_io_queues = min_t(u32, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES);
4039	max_num_io_queues = min_t(u32, max_num_io_queues, io_rx_num);
4040	max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_sq_num);
4041	max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_cq_num);
4042	/* 1 IRQ for mgmnt and 1 IRQs for each IO direction */
4043	max_num_io_queues = min_t(u32, max_num_io_queues, pci_msix_vec_count(pdev) - 1);
4044
4045	return max_num_io_queues;
4046}
4047
4048static void ena_set_dev_offloads(struct ena_com_dev_get_features_ctx *feat,
4049				 struct net_device *netdev)
4050{
4051	netdev_features_t dev_features = 0;
4052
4053	/* Set offload features */
4054	if (feat->offload.tx &
4055		ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK)
4056		dev_features |= NETIF_F_IP_CSUM;
4057
4058	if (feat->offload.tx &
4059		ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK)
4060		dev_features |= NETIF_F_IPV6_CSUM;
4061
4062	if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK)
4063		dev_features |= NETIF_F_TSO;
4064
4065	if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK)
4066		dev_features |= NETIF_F_TSO6;
4067
4068	if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK)
4069		dev_features |= NETIF_F_TSO_ECN;
4070
4071	if (feat->offload.rx_supported &
4072		ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK)
4073		dev_features |= NETIF_F_RXCSUM;
4074
4075	if (feat->offload.rx_supported &
4076		ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK)
4077		dev_features |= NETIF_F_RXCSUM;
4078
4079	netdev->features =
4080		dev_features |
4081		NETIF_F_SG |
4082		NETIF_F_RXHASH |
4083		NETIF_F_HIGHDMA;
4084
4085	netdev->hw_features |= netdev->features;
4086	netdev->vlan_features |= netdev->features;
4087}
4088
4089static void ena_set_conf_feat_params(struct ena_adapter *adapter,
4090				     struct ena_com_dev_get_features_ctx *feat)
4091{
4092	struct net_device *netdev = adapter->netdev;
4093
4094	/* Copy mac address */
4095	if (!is_valid_ether_addr(feat->dev_attr.mac_addr)) {
4096		eth_hw_addr_random(netdev);
4097		ether_addr_copy(adapter->mac_addr, netdev->dev_addr);
4098	} else {
4099		ether_addr_copy(adapter->mac_addr, feat->dev_attr.mac_addr);
4100		eth_hw_addr_set(netdev, adapter->mac_addr);
4101	}
4102
4103	/* Set offload features */
4104	ena_set_dev_offloads(feat, netdev);
4105
4106	adapter->max_mtu = feat->dev_attr.max_mtu;
4107	netdev->max_mtu = adapter->max_mtu;
4108	netdev->min_mtu = ENA_MIN_MTU;
4109}
4110
4111static int ena_rss_init_default(struct ena_adapter *adapter)
4112{
4113	struct ena_com_dev *ena_dev = adapter->ena_dev;
4114	struct device *dev = &adapter->pdev->dev;
4115	int rc, i;
4116	u32 val;
4117
4118	rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE);
4119	if (unlikely(rc)) {
4120		dev_err(dev, "Cannot init indirect table\n");
4121		goto err_rss_init;
4122	}
4123
4124	for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
4125		val = ethtool_rxfh_indir_default(i, adapter->num_io_queues);
4126		rc = ena_com_indirect_table_fill_entry(ena_dev, i,
4127						       ENA_IO_RXQ_IDX(val));
4128		if (unlikely(rc)) {
4129			dev_err(dev, "Cannot fill indirect table\n");
4130			goto err_fill_indir;
4131		}
4132	}
4133
4134	rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_TOEPLITZ, NULL,
4135					ENA_HASH_KEY_SIZE, 0xFFFFFFFF);
4136	if (unlikely(rc && (rc != -EOPNOTSUPP))) {
4137		dev_err(dev, "Cannot fill hash function\n");
4138		goto err_fill_indir;
4139	}
4140
4141	rc = ena_com_set_default_hash_ctrl(ena_dev);
4142	if (unlikely(rc && (rc != -EOPNOTSUPP))) {
4143		dev_err(dev, "Cannot fill hash control\n");
4144		goto err_fill_indir;
4145	}
4146
4147	return 0;
4148
4149err_fill_indir:
4150	ena_com_rss_destroy(ena_dev);
4151err_rss_init:
4152
4153	return rc;
4154}
4155
4156static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
4157{
4158	int release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
4159
4160	pci_release_selected_regions(pdev, release_bars);
4161}
4162
4163
4164static void ena_calc_io_queue_size(struct ena_adapter *adapter,
4165				   struct ena_com_dev_get_features_ctx *get_feat_ctx)
4166{
4167	struct ena_admin_feature_llq_desc *llq = &get_feat_ctx->llq;
4168	struct ena_com_dev *ena_dev = adapter->ena_dev;
4169	u32 tx_queue_size = ENA_DEFAULT_RING_SIZE;
4170	u32 rx_queue_size = ENA_DEFAULT_RING_SIZE;
4171	u32 max_tx_queue_size;
4172	u32 max_rx_queue_size;
4173
4174	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
4175		struct ena_admin_queue_ext_feature_fields *max_queue_ext =
4176			&get_feat_ctx->max_queue_ext.max_queue_ext;
4177		max_rx_queue_size = min_t(u32, max_queue_ext->max_rx_cq_depth,
4178					  max_queue_ext->max_rx_sq_depth);
4179		max_tx_queue_size = max_queue_ext->max_tx_cq_depth;
4180
4181		if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
4182			max_tx_queue_size = min_t(u32, max_tx_queue_size,
4183						  llq->max_llq_depth);
4184		else
4185			max_tx_queue_size = min_t(u32, max_tx_queue_size,
4186						  max_queue_ext->max_tx_sq_depth);
4187
4188		adapter->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
4189						 max_queue_ext->max_per_packet_tx_descs);
4190		adapter->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
4191						 max_queue_ext->max_per_packet_rx_descs);
4192	} else {
4193		struct ena_admin_queue_feature_desc *max_queues =
4194			&get_feat_ctx->max_queues;
4195		max_rx_queue_size = min_t(u32, max_queues->max_cq_depth,
4196					  max_queues->max_sq_depth);
4197		max_tx_queue_size = max_queues->max_cq_depth;
4198
4199		if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
4200			max_tx_queue_size = min_t(u32, max_tx_queue_size,
4201						  llq->max_llq_depth);
4202		else
4203			max_tx_queue_size = min_t(u32, max_tx_queue_size,
4204						  max_queues->max_sq_depth);
4205
4206		adapter->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
4207						 max_queues->max_packet_tx_descs);
4208		adapter->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
4209						 max_queues->max_packet_rx_descs);
4210	}
4211
4212	max_tx_queue_size = rounddown_pow_of_two(max_tx_queue_size);
4213	max_rx_queue_size = rounddown_pow_of_two(max_rx_queue_size);
4214
4215	tx_queue_size = clamp_val(tx_queue_size, ENA_MIN_RING_SIZE,
4216				  max_tx_queue_size);
4217	rx_queue_size = clamp_val(rx_queue_size, ENA_MIN_RING_SIZE,
4218				  max_rx_queue_size);
4219
4220	tx_queue_size = rounddown_pow_of_two(tx_queue_size);
4221	rx_queue_size = rounddown_pow_of_two(rx_queue_size);
4222
4223	adapter->max_tx_ring_size  = max_tx_queue_size;
4224	adapter->max_rx_ring_size = max_rx_queue_size;
4225	adapter->requested_tx_ring_size = tx_queue_size;
4226	adapter->requested_rx_ring_size = rx_queue_size;
4227}
4228
4229/* ena_probe - Device Initialization Routine
4230 * @pdev: PCI device information struct
4231 * @ent: entry in ena_pci_tbl
4232 *
4233 * Returns 0 on success, negative on failure
4234 *
4235 * ena_probe initializes an adapter identified by a pci_dev structure.
4236 * The OS initialization, configuring of the adapter private structure,
4237 * and a hardware reset occur.
4238 */
4239static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
4240{
4241	struct ena_com_dev_get_features_ctx get_feat_ctx;
4242	struct ena_com_dev *ena_dev = NULL;
4243	struct ena_adapter *adapter;
4244	struct net_device *netdev;
4245	static int adapters_found;
4246	u32 max_num_io_queues;
4247	bool wd_state;
4248	int bars, rc;
4249
4250	dev_dbg(&pdev->dev, "%s\n", __func__);
4251
4252	rc = pci_enable_device_mem(pdev);
4253	if (rc) {
4254		dev_err(&pdev->dev, "pci_enable_device_mem() failed!\n");
4255		return rc;
4256	}
4257
4258	rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(ENA_MAX_PHYS_ADDR_SIZE_BITS));
4259	if (rc) {
4260		dev_err(&pdev->dev, "dma_set_mask_and_coherent failed %d\n", rc);
4261		goto err_disable_device;
4262	}
4263
4264	pci_set_master(pdev);
4265
4266	ena_dev = vzalloc(sizeof(*ena_dev));
4267	if (!ena_dev) {
4268		rc = -ENOMEM;
4269		goto err_disable_device;
4270	}
4271
4272	bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
4273	rc = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
4274	if (rc) {
4275		dev_err(&pdev->dev, "pci_request_selected_regions failed %d\n",
4276			rc);
4277		goto err_free_ena_dev;
4278	}
4279
4280	ena_dev->reg_bar = devm_ioremap(&pdev->dev,
4281					pci_resource_start(pdev, ENA_REG_BAR),
4282					pci_resource_len(pdev, ENA_REG_BAR));
4283	if (!ena_dev->reg_bar) {
4284		dev_err(&pdev->dev, "Failed to remap regs bar\n");
4285		rc = -EFAULT;
4286		goto err_free_region;
4287	}
4288
4289	ena_dev->ena_min_poll_delay_us = ENA_ADMIN_POLL_DELAY_US;
4290
4291	ena_dev->dmadev = &pdev->dev;
4292
4293	netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), ENA_MAX_RINGS);
4294	if (!netdev) {
4295		dev_err(&pdev->dev, "alloc_etherdev_mq failed\n");
4296		rc = -ENOMEM;
4297		goto err_free_region;
4298	}
4299
4300	SET_NETDEV_DEV(netdev, &pdev->dev);
4301	adapter = netdev_priv(netdev);
4302	adapter->ena_dev = ena_dev;
4303	adapter->netdev = netdev;
4304	adapter->pdev = pdev;
4305	adapter->msg_enable = DEFAULT_MSG_ENABLE;
4306
4307	ena_dev->net_device = netdev;
4308
4309	pci_set_drvdata(pdev, adapter);
4310
4311	rc = ena_device_init(ena_dev, pdev, &get_feat_ctx, &wd_state);
4312	if (rc) {
4313		dev_err(&pdev->dev, "ENA device init failed\n");
4314		if (rc == -ETIME)
4315			rc = -EPROBE_DEFER;
4316		goto err_netdev_destroy;
4317	}
4318
4319	rc = ena_map_llq_mem_bar(pdev, ena_dev, bars);
4320	if (rc) {
4321		dev_err(&pdev->dev, "ENA llq bar mapping failed\n");
4322		goto err_device_destroy;
 
 
 
 
 
 
 
 
4323	}
4324
4325	/* Initial TX and RX interrupt delay. Assumes 1 usec granularity.
4326	 * Updated during device initialization with the real granularity
4327	 */
4328	ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS;
4329	ena_dev->intr_moder_rx_interval = ENA_INTR_INITIAL_RX_INTERVAL_USECS;
4330	ena_dev->intr_delay_resolution = ENA_DEFAULT_INTR_DELAY_RESOLUTION;
4331	max_num_io_queues = ena_calc_max_io_queue_num(pdev, ena_dev, &get_feat_ctx);
4332	ena_calc_io_queue_size(adapter, &get_feat_ctx);
4333	if (unlikely(!max_num_io_queues)) {
4334		rc = -EFAULT;
4335		goto err_device_destroy;
4336	}
4337
4338	ena_set_conf_feat_params(adapter, &get_feat_ctx);
4339
4340	adapter->reset_reason = ENA_REGS_RESET_NORMAL;
4341
4342	adapter->num_io_queues = max_num_io_queues;
4343	adapter->max_num_io_queues = max_num_io_queues;
4344	adapter->last_monitored_tx_qid = 0;
4345
4346	adapter->xdp_first_ring = 0;
4347	adapter->xdp_num_queues = 0;
4348
4349	adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK;
4350	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
4351		adapter->disable_meta_caching =
4352			!!(get_feat_ctx.llq.accel_mode.u.get.supported_flags &
4353			   BIT(ENA_ADMIN_DISABLE_META_CACHING));
4354
4355	adapter->wd_state = wd_state;
4356
4357	snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", adapters_found);
4358
4359	rc = ena_com_init_interrupt_moderation(adapter->ena_dev);
4360	if (rc) {
4361		dev_err(&pdev->dev,
4362			"Failed to query interrupt moderation feature\n");
4363		goto err_device_destroy;
4364	}
 
4365	ena_init_io_rings(adapter,
4366			  0,
4367			  adapter->xdp_num_queues +
4368			  adapter->num_io_queues);
4369
4370	netdev->netdev_ops = &ena_netdev_ops;
4371	netdev->watchdog_timeo = TX_TIMEOUT;
4372	ena_set_ethtool_ops(netdev);
4373
4374	netdev->priv_flags |= IFF_UNICAST_FLT;
4375
4376	u64_stats_init(&adapter->syncp);
4377
4378	rc = ena_enable_msix_and_set_admin_interrupts(adapter);
4379	if (rc) {
4380		dev_err(&pdev->dev,
4381			"Failed to enable and set the admin interrupts\n");
4382		goto err_worker_destroy;
4383	}
4384	rc = ena_rss_init_default(adapter);
4385	if (rc && (rc != -EOPNOTSUPP)) {
4386		dev_err(&pdev->dev, "Cannot init RSS rc: %d\n", rc);
4387		goto err_free_msix;
4388	}
4389
4390	ena_config_debug_area(adapter);
4391
 
 
 
 
4392	memcpy(adapter->netdev->perm_addr, adapter->mac_addr, netdev->addr_len);
4393
4394	netif_carrier_off(netdev);
4395
4396	rc = register_netdev(netdev);
4397	if (rc) {
4398		dev_err(&pdev->dev, "Cannot register net device\n");
4399		goto err_rss;
4400	}
4401
4402	INIT_WORK(&adapter->reset_task, ena_fw_reset_device);
4403
4404	adapter->last_keep_alive_jiffies = jiffies;
4405	adapter->keep_alive_timeout = ENA_DEVICE_KALIVE_TIMEOUT;
4406	adapter->missing_tx_completion_to = TX_TIMEOUT;
4407	adapter->missing_tx_completion_threshold = MAX_NUM_OF_TIMEOUTED_PACKETS;
4408
4409	ena_update_hints(adapter, &get_feat_ctx.hw_hints);
4410
4411	timer_setup(&adapter->timer_service, ena_timer_service, 0);
4412	mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
4413
4414	dev_info(&pdev->dev,
4415		 "%s found at mem %lx, mac addr %pM\n",
4416		 DEVICE_NAME, (long)pci_resource_start(pdev, 0),
4417		 netdev->dev_addr);
4418
4419	set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
4420
4421	adapters_found++;
4422
4423	return 0;
4424
4425err_rss:
4426	ena_com_delete_debug_area(ena_dev);
4427	ena_com_rss_destroy(ena_dev);
4428err_free_msix:
4429	ena_com_dev_reset(ena_dev, ENA_REGS_RESET_INIT_ERR);
4430	/* stop submitting admin commands on a device that was reset */
4431	ena_com_set_admin_running_state(ena_dev, false);
4432	ena_free_mgmnt_irq(adapter);
4433	ena_disable_msix(adapter);
4434err_worker_destroy:
4435	del_timer(&adapter->timer_service);
4436err_device_destroy:
4437	ena_com_delete_host_info(ena_dev);
4438	ena_com_admin_destroy(ena_dev);
 
 
4439err_netdev_destroy:
4440	free_netdev(netdev);
4441err_free_region:
4442	ena_release_bars(ena_dev, pdev);
4443err_free_ena_dev:
4444	vfree(ena_dev);
4445err_disable_device:
4446	pci_disable_device(pdev);
4447	return rc;
4448}
4449
4450/*****************************************************************************/
4451
4452/* __ena_shutoff - Helper used in both PCI remove/shutdown routines
4453 * @pdev: PCI device information struct
4454 * @shutdown: Is it a shutdown operation? If false, means it is a removal
4455 *
4456 * __ena_shutoff is a helper routine that does the real work on shutdown and
4457 * removal paths; the difference between those paths is with regards to whether
4458 * dettach or unregister the netdevice.
4459 */
4460static void __ena_shutoff(struct pci_dev *pdev, bool shutdown)
4461{
4462	struct ena_adapter *adapter = pci_get_drvdata(pdev);
4463	struct ena_com_dev *ena_dev;
4464	struct net_device *netdev;
4465
4466	ena_dev = adapter->ena_dev;
4467	netdev = adapter->netdev;
4468
4469#ifdef CONFIG_RFS_ACCEL
4470	if ((adapter->msix_vecs >= 1) && (netdev->rx_cpu_rmap)) {
4471		free_irq_cpu_rmap(netdev->rx_cpu_rmap);
4472		netdev->rx_cpu_rmap = NULL;
4473	}
 
4474#endif /* CONFIG_RFS_ACCEL */
4475
4476	/* Make sure timer and reset routine won't be called after
4477	 * freeing device resources.
4478	 */
4479	del_timer_sync(&adapter->timer_service);
4480	cancel_work_sync(&adapter->reset_task);
4481
4482	rtnl_lock(); /* lock released inside the below if-else block */
4483	adapter->reset_reason = ENA_REGS_RESET_SHUTDOWN;
4484	ena_destroy_device(adapter, true);
 
4485	if (shutdown) {
4486		netif_device_detach(netdev);
4487		dev_close(netdev);
4488		rtnl_unlock();
4489	} else {
4490		rtnl_unlock();
4491		unregister_netdev(netdev);
4492		free_netdev(netdev);
4493	}
4494
4495	ena_com_rss_destroy(ena_dev);
4496
4497	ena_com_delete_debug_area(ena_dev);
4498
4499	ena_com_delete_host_info(ena_dev);
 
 
4500
4501	ena_release_bars(ena_dev, pdev);
4502
4503	pci_disable_device(pdev);
4504
4505	vfree(ena_dev);
4506}
4507
4508/* ena_remove - Device Removal Routine
4509 * @pdev: PCI device information struct
4510 *
4511 * ena_remove is called by the PCI subsystem to alert the driver
4512 * that it should release a PCI device.
4513 */
4514
4515static void ena_remove(struct pci_dev *pdev)
4516{
4517	__ena_shutoff(pdev, false);
4518}
4519
4520/* ena_shutdown - Device Shutdown Routine
4521 * @pdev: PCI device information struct
4522 *
4523 * ena_shutdown is called by the PCI subsystem to alert the driver that
4524 * a shutdown/reboot (or kexec) is happening and device must be disabled.
4525 */
4526
4527static void ena_shutdown(struct pci_dev *pdev)
4528{
4529	__ena_shutoff(pdev, true);
4530}
4531
4532/* ena_suspend - PM suspend callback
4533 * @dev_d: Device information struct
4534 */
4535static int __maybe_unused ena_suspend(struct device *dev_d)
4536{
4537	struct pci_dev *pdev = to_pci_dev(dev_d);
4538	struct ena_adapter *adapter = pci_get_drvdata(pdev);
4539
4540	ena_increase_stat(&adapter->dev_stats.suspend, 1, &adapter->syncp);
4541
4542	rtnl_lock();
4543	if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
4544		dev_err(&pdev->dev,
4545			"Ignoring device reset request as the device is being suspended\n");
4546		clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
4547	}
4548	ena_destroy_device(adapter, true);
4549	rtnl_unlock();
4550	return 0;
4551}
4552
4553/* ena_resume - PM resume callback
4554 * @dev_d: Device information struct
4555 */
4556static int __maybe_unused ena_resume(struct device *dev_d)
4557{
4558	struct ena_adapter *adapter = dev_get_drvdata(dev_d);
4559	int rc;
4560
4561	ena_increase_stat(&adapter->dev_stats.resume, 1, &adapter->syncp);
4562
4563	rtnl_lock();
4564	rc = ena_restore_device(adapter);
4565	rtnl_unlock();
4566	return rc;
4567}
4568
4569static SIMPLE_DEV_PM_OPS(ena_pm_ops, ena_suspend, ena_resume);
4570
4571static struct pci_driver ena_pci_driver = {
4572	.name		= DRV_MODULE_NAME,
4573	.id_table	= ena_pci_tbl,
4574	.probe		= ena_probe,
4575	.remove		= ena_remove,
4576	.shutdown	= ena_shutdown,
4577	.driver.pm	= &ena_pm_ops,
4578	.sriov_configure = pci_sriov_configure_simple,
4579};
4580
4581static int __init ena_init(void)
4582{
4583	int ret;
4584
4585	ena_wq = create_singlethread_workqueue(DRV_MODULE_NAME);
4586	if (!ena_wq) {
4587		pr_err("Failed to create workqueue\n");
4588		return -ENOMEM;
4589	}
4590
4591	ret = pci_register_driver(&ena_pci_driver);
4592	if (ret)
4593		destroy_workqueue(ena_wq);
4594
4595	return ret;
4596}
4597
4598static void __exit ena_cleanup(void)
4599{
4600	pci_unregister_driver(&ena_pci_driver);
4601
4602	if (ena_wq) {
4603		destroy_workqueue(ena_wq);
4604		ena_wq = NULL;
4605	}
4606}
4607
4608/******************************************************************************
4609 ******************************** AENQ Handlers *******************************
4610 *****************************************************************************/
4611/* ena_update_on_link_change:
4612 * Notify the network interface about the change in link status
4613 */
4614static void ena_update_on_link_change(void *adapter_data,
4615				      struct ena_admin_aenq_entry *aenq_e)
4616{
4617	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
4618	struct ena_admin_aenq_link_change_desc *aenq_desc =
4619		(struct ena_admin_aenq_link_change_desc *)aenq_e;
4620	int status = aenq_desc->flags &
4621		ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK;
4622
4623	if (status) {
4624		netif_dbg(adapter, ifup, adapter->netdev, "%s\n", __func__);
4625		set_bit(ENA_FLAG_LINK_UP, &adapter->flags);
4626		if (!test_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags))
4627			netif_carrier_on(adapter->netdev);
4628	} else {
4629		clear_bit(ENA_FLAG_LINK_UP, &adapter->flags);
4630		netif_carrier_off(adapter->netdev);
4631	}
4632}
4633
4634static void ena_keep_alive_wd(void *adapter_data,
4635			      struct ena_admin_aenq_entry *aenq_e)
4636{
4637	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
4638	struct ena_admin_aenq_keep_alive_desc *desc;
4639	u64 rx_drops;
4640	u64 tx_drops;
4641
4642	desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e;
4643	adapter->last_keep_alive_jiffies = jiffies;
4644
4645	rx_drops = ((u64)desc->rx_drops_high << 32) | desc->rx_drops_low;
4646	tx_drops = ((u64)desc->tx_drops_high << 32) | desc->tx_drops_low;
4647
4648	u64_stats_update_begin(&adapter->syncp);
4649	/* These stats are accumulated by the device, so the counters indicate
4650	 * all drops since last reset.
4651	 */
4652	adapter->dev_stats.rx_drops = rx_drops;
4653	adapter->dev_stats.tx_drops = tx_drops;
4654	u64_stats_update_end(&adapter->syncp);
4655}
4656
4657static void ena_notification(void *adapter_data,
4658			     struct ena_admin_aenq_entry *aenq_e)
4659{
4660	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
4661	struct ena_admin_ena_hw_hints *hints;
4662
4663	WARN(aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION,
4664	     "Invalid group(%x) expected %x\n",
4665	     aenq_e->aenq_common_desc.group,
4666	     ENA_ADMIN_NOTIFICATION);
4667
4668	switch (aenq_e->aenq_common_desc.syndrome) {
4669	case ENA_ADMIN_UPDATE_HINTS:
4670		hints = (struct ena_admin_ena_hw_hints *)
4671			(&aenq_e->inline_data_w4);
4672		ena_update_hints(adapter, hints);
4673		break;
4674	default:
4675		netif_err(adapter, drv, adapter->netdev,
4676			  "Invalid aenq notification link state %d\n",
4677			  aenq_e->aenq_common_desc.syndrome);
4678	}
4679}
4680
4681/* This handler will called for unknown event group or unimplemented handlers*/
4682static void unimplemented_aenq_handler(void *data,
4683				       struct ena_admin_aenq_entry *aenq_e)
4684{
4685	struct ena_adapter *adapter = (struct ena_adapter *)data;
4686
4687	netif_err(adapter, drv, adapter->netdev,
4688		  "Unknown event was received or event with unimplemented handler\n");
4689}
4690
4691static struct ena_aenq_handlers aenq_handlers = {
4692	.handlers = {
4693		[ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change,
4694		[ENA_ADMIN_NOTIFICATION] = ena_notification,
4695		[ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd,
4696	},
4697	.unimplemented_handler = unimplemented_aenq_handler
4698};
4699
4700module_init(ena_init);
4701module_exit(ena_cleanup);