Linux Audio

Check our new training course

Loading...
v6.8
   1/*
   2 * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 *
  32 */
  33
  34#include <asm/page.h>
  35#include <linux/mlx4/cq.h>
  36#include <linux/slab.h>
  37#include <linux/mlx4/qp.h>
  38#include <linux/skbuff.h>
  39#include <linux/if_vlan.h>
  40#include <linux/prefetch.h>
  41#include <linux/vmalloc.h>
  42#include <linux/tcp.h>
  43#include <linux/ip.h>
  44#include <linux/ipv6.h>
  45#include <linux/indirect_call_wrapper.h>
  46#include <net/ipv6.h>
  47
  48#include "mlx4_en.h"
  49
  50int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
  51			   struct mlx4_en_tx_ring **pring, u32 size,
  52			   u16 stride, int node, int queue_index)
  53{
  54	struct mlx4_en_dev *mdev = priv->mdev;
  55	struct mlx4_en_tx_ring *ring;
  56	int tmp;
  57	int err;
  58
  59	ring = kzalloc_node(sizeof(*ring), GFP_KERNEL, node);
  60	if (!ring) {
  61		en_err(priv, "Failed allocating TX ring\n");
  62		return -ENOMEM;
  63	}
  64
  65	ring->size = size;
  66	ring->size_mask = size - 1;
  67	ring->sp_stride = stride;
  68	ring->full_size = ring->size - HEADROOM - MLX4_MAX_DESC_TXBBS;
  69
  70	tmp = size * sizeof(struct mlx4_en_tx_info);
  71	ring->tx_info = kvmalloc_node(tmp, GFP_KERNEL, node);
  72	if (!ring->tx_info) {
  73		err = -ENOMEM;
  74		goto err_ring;
  75	}
  76
  77	en_dbg(DRV, priv, "Allocated tx_info ring at addr:%p size:%d\n",
  78		 ring->tx_info, tmp);
  79
  80	ring->bounce_buf = kmalloc_node(MLX4_TX_BOUNCE_BUFFER_SIZE,
  81					GFP_KERNEL, node);
  82	if (!ring->bounce_buf) {
  83		ring->bounce_buf = kmalloc(MLX4_TX_BOUNCE_BUFFER_SIZE,
  84					   GFP_KERNEL);
  85		if (!ring->bounce_buf) {
  86			err = -ENOMEM;
  87			goto err_info;
  88		}
  89	}
  90	ring->buf_size = ALIGN(size * ring->sp_stride, MLX4_EN_PAGE_SIZE);
  91
  92	/* Allocate HW buffers on provided NUMA node */
  93	set_dev_node(&mdev->dev->persist->pdev->dev, node);
  94	err = mlx4_alloc_hwq_res(mdev->dev, &ring->sp_wqres, ring->buf_size);
  95	set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node);
  96	if (err) {
  97		en_err(priv, "Failed allocating hwq resources\n");
  98		goto err_bounce;
  99	}
 100
 101	ring->buf = ring->sp_wqres.buf.direct.buf;
 102
 103	en_dbg(DRV, priv, "Allocated TX ring (addr:%p) - buf:%p size:%d buf_size:%d dma:%llx\n",
 104	       ring, ring->buf, ring->size, ring->buf_size,
 105	       (unsigned long long) ring->sp_wqres.buf.direct.map);
 106
 107	err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &ring->qpn,
 108				    MLX4_RESERVE_ETH_BF_QP,
 109				    MLX4_RES_USAGE_DRIVER);
 110	if (err) {
 111		en_err(priv, "failed reserving qp for TX ring\n");
 112		goto err_hwq_res;
 113	}
 114
 115	err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->sp_qp);
 116	if (err) {
 117		en_err(priv, "Failed allocating qp %d\n", ring->qpn);
 118		goto err_reserve;
 119	}
 120	ring->sp_qp.event = mlx4_en_sqp_event;
 121
 122	err = mlx4_bf_alloc(mdev->dev, &ring->bf, node);
 123	if (err) {
 124		en_dbg(DRV, priv, "working without blueflame (%d)\n", err);
 125		ring->bf.uar = &mdev->priv_uar;
 126		ring->bf.uar->map = mdev->uar_map;
 127		ring->bf_enabled = false;
 128		ring->bf_alloced = false;
 129		priv->pflags &= ~MLX4_EN_PRIV_FLAGS_BLUEFLAME;
 130	} else {
 131		ring->bf_alloced = true;
 132		ring->bf_enabled = !!(priv->pflags &
 133				      MLX4_EN_PRIV_FLAGS_BLUEFLAME);
 134	}
 135	ring->doorbell_address = ring->bf.uar->map + MLX4_SEND_DOORBELL;
 136
 137	ring->hwtstamp_tx_type = priv->hwtstamp_config.tx_type;
 138	ring->queue_index = queue_index;
 139
 140	if (queue_index < priv->num_tx_rings_p_up)
 141		cpumask_set_cpu(cpumask_local_spread(queue_index,
 142						     priv->mdev->dev->numa_node),
 143				&ring->sp_affinity_mask);
 144
 145	*pring = ring;
 146	return 0;
 147
 148err_reserve:
 149	mlx4_qp_release_range(mdev->dev, ring->qpn, 1);
 150err_hwq_res:
 151	mlx4_free_hwq_res(mdev->dev, &ring->sp_wqres, ring->buf_size);
 152err_bounce:
 153	kfree(ring->bounce_buf);
 154	ring->bounce_buf = NULL;
 155err_info:
 156	kvfree(ring->tx_info);
 157	ring->tx_info = NULL;
 158err_ring:
 159	kfree(ring);
 160	*pring = NULL;
 161	return err;
 162}
 163
 164void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv,
 165			     struct mlx4_en_tx_ring **pring)
 166{
 167	struct mlx4_en_dev *mdev = priv->mdev;
 168	struct mlx4_en_tx_ring *ring = *pring;
 169	en_dbg(DRV, priv, "Destroying tx ring, qpn: %d\n", ring->qpn);
 170
 171	if (ring->bf_alloced)
 172		mlx4_bf_free(mdev->dev, &ring->bf);
 173	mlx4_qp_remove(mdev->dev, &ring->sp_qp);
 174	mlx4_qp_free(mdev->dev, &ring->sp_qp);
 175	mlx4_qp_release_range(priv->mdev->dev, ring->qpn, 1);
 176	mlx4_free_hwq_res(mdev->dev, &ring->sp_wqres, ring->buf_size);
 177	kfree(ring->bounce_buf);
 178	ring->bounce_buf = NULL;
 179	kvfree(ring->tx_info);
 180	ring->tx_info = NULL;
 181	kfree(ring);
 182	*pring = NULL;
 183}
 184
 185int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
 186			     struct mlx4_en_tx_ring *ring,
 187			     int cq, int user_prio)
 188{
 189	struct mlx4_en_dev *mdev = priv->mdev;
 190	int err;
 191
 192	ring->sp_cqn = cq;
 193	ring->prod = 0;
 194	ring->cons = 0xffffffff;
 195	ring->last_nr_txbb = 1;
 196	memset(ring->tx_info, 0, ring->size * sizeof(struct mlx4_en_tx_info));
 197	memset(ring->buf, 0, ring->buf_size);
 198	ring->free_tx_desc = mlx4_en_free_tx_desc;
 199
 200	ring->sp_qp_state = MLX4_QP_STATE_RST;
 201	ring->doorbell_qpn = cpu_to_be32(ring->sp_qp.qpn << 8);
 202	ring->mr_key = cpu_to_be32(mdev->mr.key);
 203
 204	mlx4_en_fill_qp_context(priv, ring->size, ring->sp_stride, 1, 0, ring->qpn,
 205				ring->sp_cqn, user_prio, &ring->sp_context);
 206	if (ring->bf_alloced)
 207		ring->sp_context.usr_page =
 208			cpu_to_be32(mlx4_to_hw_uar_index(mdev->dev,
 209							 ring->bf.uar->index));
 210
 211	err = mlx4_qp_to_ready(mdev->dev, &ring->sp_wqres.mtt, &ring->sp_context,
 212			       &ring->sp_qp, &ring->sp_qp_state);
 213	if (!cpumask_empty(&ring->sp_affinity_mask))
 214		netif_set_xps_queue(priv->dev, &ring->sp_affinity_mask,
 215				    ring->queue_index);
 216
 217	return err;
 218}
 219
 220void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv,
 221				struct mlx4_en_tx_ring *ring)
 222{
 223	struct mlx4_en_dev *mdev = priv->mdev;
 224
 225	mlx4_qp_modify(mdev->dev, NULL, ring->sp_qp_state,
 226		       MLX4_QP_STATE_RST, NULL, 0, 0, &ring->sp_qp);
 227}
 228
 229static inline bool mlx4_en_is_tx_ring_full(struct mlx4_en_tx_ring *ring)
 230{
 231	u32 used = READ_ONCE(ring->prod) - READ_ONCE(ring->cons);
 232
 233	return used > ring->full_size;
 234}
 235
 236static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv,
 237			      struct mlx4_en_tx_ring *ring, int index,
 238			      u8 owner)
 239{
 240	__be32 stamp = cpu_to_be32(STAMP_VAL | (!!owner << STAMP_SHIFT));
 241	struct mlx4_en_tx_desc *tx_desc = ring->buf + (index << LOG_TXBB_SIZE);
 242	struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
 243	void *end = ring->buf + ring->buf_size;
 244	__be32 *ptr = (__be32 *)tx_desc;
 245	int i;
 246
 247	/* Optimize the common case when there are no wraparounds */
 248	if (likely((void *)tx_desc +
 249		   (tx_info->nr_txbb << LOG_TXBB_SIZE) <= end)) {
 250		/* Stamp the freed descriptor */
 251		for (i = 0; i < tx_info->nr_txbb << LOG_TXBB_SIZE;
 252		     i += STAMP_STRIDE) {
 253			*ptr = stamp;
 254			ptr += STAMP_DWORDS;
 255		}
 256	} else {
 257		/* Stamp the freed descriptor */
 258		for (i = 0; i < tx_info->nr_txbb << LOG_TXBB_SIZE;
 259		     i += STAMP_STRIDE) {
 260			*ptr = stamp;
 261			ptr += STAMP_DWORDS;
 262			if ((void *)ptr >= end) {
 263				ptr = ring->buf;
 264				stamp ^= cpu_to_be32(0x80000000);
 265			}
 266		}
 267	}
 268}
 269
 270INDIRECT_CALLABLE_DECLARE(u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
 271						   struct mlx4_en_tx_ring *ring,
 272						   int index, u64 timestamp,
 273						   int napi_mode));
 274
 275u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
 276			 struct mlx4_en_tx_ring *ring,
 277			 int index, u64 timestamp,
 278			 int napi_mode)
 279{
 280	struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
 281	struct mlx4_en_tx_desc *tx_desc = ring->buf + (index << LOG_TXBB_SIZE);
 282	struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset;
 283	void *end = ring->buf + ring->buf_size;
 284	struct sk_buff *skb = tx_info->skb;
 285	int nr_maps = tx_info->nr_maps;
 286	int i;
 287
 288	/* We do not touch skb here, so prefetch skb->users location
 289	 * to speedup consume_skb()
 290	 */
 291	prefetchw(&skb->users);
 292
 293	if (unlikely(timestamp)) {
 294		struct skb_shared_hwtstamps hwts;
 295
 296		mlx4_en_fill_hwtstamps(priv->mdev, &hwts, timestamp);
 297		skb_tstamp_tx(skb, &hwts);
 298	}
 299
 300	if (!tx_info->inl) {
 301		if (tx_info->linear)
 302			dma_unmap_single(priv->ddev,
 303					 tx_info->map0_dma,
 304					 tx_info->map0_byte_count,
 305					 DMA_TO_DEVICE);
 306		else
 307			dma_unmap_page(priv->ddev,
 308				       tx_info->map0_dma,
 309				       tx_info->map0_byte_count,
 310				       DMA_TO_DEVICE);
 311		/* Optimize the common case when there are no wraparounds */
 312		if (likely((void *)tx_desc +
 313			   (tx_info->nr_txbb << LOG_TXBB_SIZE) <= end)) {
 314			for (i = 1; i < nr_maps; i++) {
 315				data++;
 316				dma_unmap_page(priv->ddev,
 317					(dma_addr_t)be64_to_cpu(data->addr),
 318					be32_to_cpu(data->byte_count),
 319					DMA_TO_DEVICE);
 320			}
 321		} else {
 322			if ((void *)data >= end)
 323				data = ring->buf + ((void *)data - end);
 324
 325			for (i = 1; i < nr_maps; i++) {
 326				data++;
 327				/* Check for wraparound before unmapping */
 328				if ((void *) data >= end)
 329					data = ring->buf;
 330				dma_unmap_page(priv->ddev,
 331					(dma_addr_t)be64_to_cpu(data->addr),
 332					be32_to_cpu(data->byte_count),
 333					DMA_TO_DEVICE);
 334			}
 335		}
 336	}
 337	napi_consume_skb(skb, napi_mode);
 338
 339	return tx_info->nr_txbb;
 340}
 341
 342INDIRECT_CALLABLE_DECLARE(u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv,
 343						      struct mlx4_en_tx_ring *ring,
 344						      int index, u64 timestamp,
 345						      int napi_mode));
 346
 347u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv,
 348			    struct mlx4_en_tx_ring *ring,
 349			    int index, u64 timestamp,
 350			    int napi_mode)
 351{
 352	struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
 353	struct mlx4_en_rx_alloc frame = {
 354		.page = tx_info->page,
 355		.dma = tx_info->map0_dma,
 356	};
 357
 358	if (!napi_mode || !mlx4_en_rx_recycle(ring->recycle_ring, &frame)) {
 359		dma_unmap_page(priv->ddev, tx_info->map0_dma,
 360			       PAGE_SIZE, priv->dma_dir);
 361		put_page(tx_info->page);
 362	}
 363
 364	return tx_info->nr_txbb;
 365}
 366
 367int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring)
 368{
 369	struct mlx4_en_priv *priv = netdev_priv(dev);
 370	int cnt = 0;
 371
 372	/* Skip last polled descriptor */
 373	ring->cons += ring->last_nr_txbb;
 374	en_dbg(DRV, priv, "Freeing Tx buf - cons:0x%x prod:0x%x\n",
 375		 ring->cons, ring->prod);
 376
 377	if ((u32) (ring->prod - ring->cons) > ring->size) {
 378		if (netif_msg_tx_err(priv))
 379			en_warn(priv, "Tx consumer passed producer!\n");
 380		return 0;
 381	}
 382
 383	while (ring->cons != ring->prod) {
 384		ring->last_nr_txbb = ring->free_tx_desc(priv, ring,
 385						ring->cons & ring->size_mask,
 386						0, 0 /* Non-NAPI caller */);
 387		ring->cons += ring->last_nr_txbb;
 388		cnt++;
 389	}
 390
 391	if (ring->tx_queue)
 392		netdev_tx_reset_queue(ring->tx_queue);
 393
 394	if (cnt)
 395		en_dbg(DRV, priv, "Freed %d uncompleted tx descriptors\n", cnt);
 396
 397	return cnt;
 398}
 399
 400static void mlx4_en_handle_err_cqe(struct mlx4_en_priv *priv, struct mlx4_err_cqe *err_cqe,
 401				   u16 cqe_index, struct mlx4_en_tx_ring *ring)
 402{
 403	struct mlx4_en_dev *mdev = priv->mdev;
 404	struct mlx4_en_tx_info *tx_info;
 405	struct mlx4_en_tx_desc *tx_desc;
 406	u16 wqe_index;
 407	int desc_size;
 408
 409	en_err(priv, "CQE error - cqn 0x%x, ci 0x%x, vendor syndrome: 0x%x syndrome: 0x%x\n",
 410	       ring->sp_cqn, cqe_index, err_cqe->vendor_err_syndrome, err_cqe->syndrome);
 411	print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, err_cqe, sizeof(*err_cqe),
 412		       false);
 413
 414	wqe_index = be16_to_cpu(err_cqe->wqe_index) & ring->size_mask;
 415	tx_info = &ring->tx_info[wqe_index];
 416	desc_size = tx_info->nr_txbb << LOG_TXBB_SIZE;
 417	en_err(priv, "Related WQE - qpn 0x%x, wqe index 0x%x, wqe size 0x%x\n", ring->qpn,
 418	       wqe_index, desc_size);
 419	tx_desc = ring->buf + (wqe_index << LOG_TXBB_SIZE);
 420	print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, tx_desc, desc_size, false);
 421
 422	if (test_and_set_bit(MLX4_EN_STATE_FLAG_RESTARTING, &priv->state))
 423		return;
 424
 425	en_err(priv, "Scheduling port restart\n");
 426	queue_work(mdev->workqueue, &priv->restart_task);
 427}
 428
 429int mlx4_en_process_tx_cq(struct net_device *dev,
 430			  struct mlx4_en_cq *cq, int napi_budget)
 431{
 432	struct mlx4_en_priv *priv = netdev_priv(dev);
 433	struct mlx4_cq *mcq = &cq->mcq;
 434	struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->type][cq->ring];
 435	struct mlx4_cqe *cqe;
 436	u16 index, ring_index, stamp_index;
 437	u32 txbbs_skipped = 0;
 438	u32 txbbs_stamp = 0;
 439	u32 cons_index = mcq->cons_index;
 440	int size = cq->size;
 441	u32 size_mask = ring->size_mask;
 442	struct mlx4_cqe *buf = cq->buf;
 443	u32 packets = 0;
 444	u32 bytes = 0;
 445	int factor = priv->cqe_factor;
 446	int done = 0;
 447	int budget = priv->tx_work_limit;
 448	u32 last_nr_txbb;
 449	u32 ring_cons;
 450
 451	if (unlikely(!priv->port_up))
 452		return 0;
 453
 454	netdev_txq_bql_complete_prefetchw(ring->tx_queue);
 455
 456	index = cons_index & size_mask;
 457	cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor;
 458	last_nr_txbb = READ_ONCE(ring->last_nr_txbb);
 459	ring_cons = READ_ONCE(ring->cons);
 460	ring_index = ring_cons & size_mask;
 461	stamp_index = ring_index;
 462
 463	/* Process all completed CQEs */
 464	while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
 465			cons_index & size) && (done < budget)) {
 466		u16 new_index;
 467
 468		/*
 469		 * make sure we read the CQE after we read the
 470		 * ownership bit
 471		 */
 472		dma_rmb();
 473
 474		if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
 475			     MLX4_CQE_OPCODE_ERROR))
 476			if (!test_and_set_bit(MLX4_EN_TX_RING_STATE_RECOVERING, &ring->state))
 477				mlx4_en_handle_err_cqe(priv, (struct mlx4_err_cqe *)cqe, index,
 478						       ring);
 479
 480		/* Skip over last polled CQE */
 481		new_index = be16_to_cpu(cqe->wqe_index) & size_mask;
 482
 483		do {
 484			u64 timestamp = 0;
 485
 486			txbbs_skipped += last_nr_txbb;
 487			ring_index = (ring_index + last_nr_txbb) & size_mask;
 488
 489			if (unlikely(ring->tx_info[ring_index].ts_requested))
 490				timestamp = mlx4_en_get_cqe_ts(cqe);
 491
 492			/* free next descriptor */
 493			last_nr_txbb = INDIRECT_CALL_2(ring->free_tx_desc,
 494						       mlx4_en_free_tx_desc,
 495						       mlx4_en_recycle_tx_desc,
 496					priv, ring, ring_index,
 497					timestamp, napi_budget);
 498
 499			mlx4_en_stamp_wqe(priv, ring, stamp_index,
 500					  !!((ring_cons + txbbs_stamp) &
 501						ring->size));
 502			stamp_index = ring_index;
 503			txbbs_stamp = txbbs_skipped;
 504			packets++;
 505			bytes += ring->tx_info[ring_index].nr_bytes;
 506		} while ((++done < budget) && (ring_index != new_index));
 507
 508		++cons_index;
 509		index = cons_index & size_mask;
 510		cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor;
 511	}
 512
 513	/*
 514	 * To prevent CQ overflow we first update CQ consumer and only then
 515	 * the ring consumer.
 516	 */
 517	mcq->cons_index = cons_index;
 518	mlx4_cq_set_ci(mcq);
 519	wmb();
 520
 521	/* we want to dirty this cache line once */
 522	WRITE_ONCE(ring->last_nr_txbb, last_nr_txbb);
 523	WRITE_ONCE(ring->cons, ring_cons + txbbs_skipped);
 524
 525	if (cq->type == TX_XDP)
 526		return done;
 527
 528	netdev_tx_completed_queue(ring->tx_queue, packets, bytes);
 529
 530	/* Wakeup Tx queue if this stopped, and ring is not full.
 531	 */
 532	if (netif_tx_queue_stopped(ring->tx_queue) &&
 533	    !mlx4_en_is_tx_ring_full(ring)) {
 534		netif_tx_wake_queue(ring->tx_queue);
 535		ring->wake_queue++;
 536	}
 537
 538	return done;
 539}
 540
 541void mlx4_en_tx_irq(struct mlx4_cq *mcq)
 542{
 543	struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq);
 544	struct mlx4_en_priv *priv = netdev_priv(cq->dev);
 545
 546	if (likely(priv->port_up))
 547		napi_schedule_irqoff(&cq->napi);
 548	else
 549		mlx4_en_arm_cq(priv, cq);
 550}
 551
 552/* TX CQ polling - called by NAPI */
 553int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget)
 554{
 555	struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi);
 556	struct net_device *dev = cq->dev;
 557	struct mlx4_en_priv *priv = netdev_priv(dev);
 558	int work_done;
 559
 560	work_done = mlx4_en_process_tx_cq(dev, cq, budget);
 561	if (work_done >= budget)
 562		return budget;
 563
 564	if (napi_complete_done(napi, work_done))
 565		mlx4_en_arm_cq(priv, cq);
 566
 567	return 0;
 568}
 569
 570static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv,
 571						      struct mlx4_en_tx_ring *ring,
 572						      u32 index,
 573						      unsigned int desc_size)
 574{
 575	u32 copy = (ring->size - index) << LOG_TXBB_SIZE;
 576	int i;
 577
 578	for (i = desc_size - copy - 4; i >= 0; i -= 4) {
 579		if ((i & (TXBB_SIZE - 1)) == 0)
 580			wmb();
 581
 582		*((u32 *) (ring->buf + i)) =
 583			*((u32 *) (ring->bounce_buf + copy + i));
 584	}
 585
 586	for (i = copy - 4; i >= 4 ; i -= 4) {
 587		if ((i & (TXBB_SIZE - 1)) == 0)
 588			wmb();
 589
 590		*((u32 *)(ring->buf + (index << LOG_TXBB_SIZE) + i)) =
 591			*((u32 *) (ring->bounce_buf + i));
 592	}
 593
 594	/* Return real descriptor location */
 595	return ring->buf + (index << LOG_TXBB_SIZE);
 596}
 597
 598/* Decide if skb can be inlined in tx descriptor to avoid dma mapping
 599 *
 600 * It seems strange we do not simply use skb_copy_bits().
 601 * This would allow to inline all skbs iff skb->len <= inline_thold
 602 *
 603 * Note that caller already checked skb was not a gso packet
 604 */
 605static bool is_inline(int inline_thold, const struct sk_buff *skb,
 606		      const struct skb_shared_info *shinfo,
 607		      void **pfrag)
 608{
 609	void *ptr;
 610
 611	if (skb->len > inline_thold || !inline_thold)
 612		return false;
 613
 614	if (shinfo->nr_frags == 1) {
 615		ptr = skb_frag_address_safe(&shinfo->frags[0]);
 616		if (unlikely(!ptr))
 617			return false;
 618		*pfrag = ptr;
 619		return true;
 620	}
 621	if (shinfo->nr_frags)
 622		return false;
 623	return true;
 624}
 625
 626static int inline_size(const struct sk_buff *skb)
 627{
 628	if (skb->len + CTRL_SIZE + sizeof(struct mlx4_wqe_inline_seg)
 629	    <= MLX4_INLINE_ALIGN)
 630		return ALIGN(skb->len + CTRL_SIZE +
 631			     sizeof(struct mlx4_wqe_inline_seg), 16);
 632	else
 633		return ALIGN(skb->len + CTRL_SIZE + 2 *
 634			     sizeof(struct mlx4_wqe_inline_seg), 16);
 635}
 636
 637static int get_real_size(const struct sk_buff *skb,
 638			 const struct skb_shared_info *shinfo,
 639			 struct net_device *dev,
 640			 int *lso_header_size,
 641			 bool *inline_ok,
 642			 void **pfrag,
 643			 int *hopbyhop)
 644{
 645	struct mlx4_en_priv *priv = netdev_priv(dev);
 646	int real_size;
 647
 648	if (shinfo->gso_size) {
 649		*inline_ok = false;
 650		*hopbyhop = 0;
 651		if (skb->encapsulation) {
 652			*lso_header_size = skb_inner_tcp_all_headers(skb);
 653		} else {
 654			/* Detects large IPV6 TCP packets and prepares for removal of
 655			 * HBH header that has been pushed by ip6_xmit(),
 656			 * mainly so that tcpdump can dissect them.
 657			 */
 658			if (ipv6_has_hopopt_jumbo(skb))
 659				*hopbyhop = sizeof(struct hop_jumbo_hdr);
 660			*lso_header_size = skb_tcp_all_headers(skb);
 661		}
 662		real_size = CTRL_SIZE + shinfo->nr_frags * DS_SIZE +
 663			ALIGN(*lso_header_size - *hopbyhop + 4, DS_SIZE);
 664		if (unlikely(*lso_header_size != skb_headlen(skb))) {
 665			/* We add a segment for the skb linear buffer only if
 666			 * it contains data */
 667			if (*lso_header_size < skb_headlen(skb))
 668				real_size += DS_SIZE;
 669			else {
 670				if (netif_msg_tx_err(priv))
 671					en_warn(priv, "Non-linear headers\n");
 672				return 0;
 673			}
 674		}
 675	} else {
 676		*lso_header_size = 0;
 677		*inline_ok = is_inline(priv->prof->inline_thold, skb,
 678				       shinfo, pfrag);
 679
 680		if (*inline_ok)
 681			real_size = inline_size(skb);
 682		else
 683			real_size = CTRL_SIZE +
 684				    (shinfo->nr_frags + 1) * DS_SIZE;
 685	}
 686
 687	return real_size;
 688}
 689
 690static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc,
 691			     const struct sk_buff *skb,
 692			     const struct skb_shared_info *shinfo,
 693			     void *fragptr)
 694{
 695	struct mlx4_wqe_inline_seg *inl = &tx_desc->inl;
 696	int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof(*inl);
 697	unsigned int hlen = skb_headlen(skb);
 698
 699	if (skb->len <= spc) {
 700		if (likely(skb->len >= MIN_PKT_LEN)) {
 701			inl->byte_count = cpu_to_be32(1 << 31 | skb->len);
 702		} else {
 703			inl->byte_count = cpu_to_be32(1 << 31 | MIN_PKT_LEN);
 704			memset(inl->data + skb->len, 0,
 705			       MIN_PKT_LEN - skb->len);
 706		}
 707		skb_copy_from_linear_data(skb, inl->data, hlen);
 708		if (shinfo->nr_frags)
 709			memcpy(inl->data + hlen, fragptr,
 710			       skb_frag_size(&shinfo->frags[0]));
 711
 712	} else {
 713		inl->byte_count = cpu_to_be32(1 << 31 | spc);
 714		if (hlen <= spc) {
 715			skb_copy_from_linear_data(skb, inl->data, hlen);
 716			if (hlen < spc) {
 717				memcpy(inl->data + hlen,
 718				       fragptr, spc - hlen);
 719				fragptr +=  spc - hlen;
 720			}
 721			inl = (void *)inl->data + spc;
 722			memcpy(inl->data, fragptr, skb->len - spc);
 723		} else {
 724			skb_copy_from_linear_data(skb, inl->data, spc);
 725			inl = (void *)inl->data + spc;
 726			skb_copy_from_linear_data_offset(skb, spc, inl->data,
 727							 hlen - spc);
 728			if (shinfo->nr_frags)
 729				memcpy(inl->data + hlen - spc,
 730				       fragptr,
 731				       skb_frag_size(&shinfo->frags[0]));
 732		}
 733
 734		dma_wmb();
 735		inl->byte_count = cpu_to_be32(1 << 31 | (skb->len - spc));
 736	}
 737}
 738
 739u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
 740			 struct net_device *sb_dev)
 741{
 742	struct mlx4_en_priv *priv = netdev_priv(dev);
 743	u16 rings_p_up = priv->num_tx_rings_p_up;
 744
 745	if (netdev_get_num_tc(dev))
 746		return netdev_pick_tx(dev, skb, NULL);
 747
 748	return netdev_pick_tx(dev, skb, NULL) % rings_p_up;
 749}
 750
 751static void mlx4_bf_copy(void __iomem *dst, const void *src,
 752			 unsigned int bytecnt)
 753{
 754	__iowrite64_copy(dst, src, bytecnt / 8);
 755}
 756
 757void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring)
 758{
 759	wmb();
 760	/* Since there is no iowrite*_native() that writes the
 761	 * value as is, without byteswapping - using the one
 762	 * the doesn't do byteswapping in the relevant arch
 763	 * endianness.
 764	 */
 765#if defined(__LITTLE_ENDIAN)
 766	iowrite32(
 767#else
 768	iowrite32be(
 769#endif
 770		  (__force u32)ring->doorbell_qpn, ring->doorbell_address);
 771}
 772
 773static void mlx4_en_tx_write_desc(struct mlx4_en_tx_ring *ring,
 774				  struct mlx4_en_tx_desc *tx_desc,
 775				  union mlx4_wqe_qpn_vlan qpn_vlan,
 776				  int desc_size, int bf_index,
 777				  __be32 op_own, bool bf_ok,
 778				  bool send_doorbell)
 779{
 780	tx_desc->ctrl.qpn_vlan = qpn_vlan;
 781
 782	if (bf_ok) {
 783		op_own |= htonl((bf_index & 0xffff) << 8);
 784		/* Ensure new descriptor hits memory
 785		 * before setting ownership of this descriptor to HW
 786		 */
 787		dma_wmb();
 788		tx_desc->ctrl.owner_opcode = op_own;
 789
 790		wmb();
 791
 792		mlx4_bf_copy(ring->bf.reg + ring->bf.offset, &tx_desc->ctrl,
 793			     desc_size);
 794
 795		wmb();
 796
 797		ring->bf.offset ^= ring->bf.buf_size;
 798	} else {
 799		/* Ensure new descriptor hits memory
 800		 * before setting ownership of this descriptor to HW
 801		 */
 802		dma_wmb();
 803		tx_desc->ctrl.owner_opcode = op_own;
 804		if (send_doorbell)
 805			mlx4_en_xmit_doorbell(ring);
 806		else
 807			ring->xmit_more++;
 808	}
 809}
 810
 811static bool mlx4_en_build_dma_wqe(struct mlx4_en_priv *priv,
 812				  struct skb_shared_info *shinfo,
 813				  struct mlx4_wqe_data_seg *data,
 814				  struct sk_buff *skb,
 815				  int lso_header_size,
 816				  __be32 mr_key,
 817				  struct mlx4_en_tx_info *tx_info)
 818{
 819	struct device *ddev = priv->ddev;
 820	dma_addr_t dma = 0;
 821	u32 byte_count = 0;
 822	int i_frag;
 823
 824	/* Map fragments if any */
 825	for (i_frag = shinfo->nr_frags - 1; i_frag >= 0; i_frag--) {
 826		const skb_frag_t *frag = &shinfo->frags[i_frag];
 827		byte_count = skb_frag_size(frag);
 828		dma = skb_frag_dma_map(ddev, frag,
 829				       0, byte_count,
 830				       DMA_TO_DEVICE);
 831		if (dma_mapping_error(ddev, dma))
 832			goto tx_drop_unmap;
 833
 834		data->addr = cpu_to_be64(dma);
 835		data->lkey = mr_key;
 836		dma_wmb();
 837		data->byte_count = cpu_to_be32(byte_count);
 838		--data;
 839	}
 840
 841	/* Map linear part if needed */
 842	if (tx_info->linear) {
 843		byte_count = skb_headlen(skb) - lso_header_size;
 844
 845		dma = dma_map_single(ddev, skb->data +
 846				     lso_header_size, byte_count,
 847				     DMA_TO_DEVICE);
 848		if (dma_mapping_error(ddev, dma))
 849			goto tx_drop_unmap;
 850
 851		data->addr = cpu_to_be64(dma);
 852		data->lkey = mr_key;
 853		dma_wmb();
 854		data->byte_count = cpu_to_be32(byte_count);
 855	}
 856	/* tx completion can avoid cache line miss for common cases */
 857	tx_info->map0_dma = dma;
 858	tx_info->map0_byte_count = byte_count;
 859
 860	return true;
 861
 862tx_drop_unmap:
 863	en_err(priv, "DMA mapping error\n");
 864
 865	while (++i_frag < shinfo->nr_frags) {
 866		++data;
 867		dma_unmap_page(ddev, (dma_addr_t)be64_to_cpu(data->addr),
 868			       be32_to_cpu(data->byte_count),
 869			       DMA_TO_DEVICE);
 870	}
 871
 872	return false;
 873}
 874
 875netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 876{
 877	struct skb_shared_info *shinfo = skb_shinfo(skb);
 878	struct mlx4_en_priv *priv = netdev_priv(dev);
 879	union mlx4_wqe_qpn_vlan	qpn_vlan = {};
 880	struct mlx4_en_tx_ring *ring;
 881	struct mlx4_en_tx_desc *tx_desc;
 882	struct mlx4_wqe_data_seg *data;
 883	struct mlx4_en_tx_info *tx_info;
 884	u32 __maybe_unused ring_cons;
 885	int tx_ind;
 886	int nr_txbb;
 887	int desc_size;
 888	int real_size;
 889	u32 index, bf_index;
 890	struct ipv6hdr *h6;
 891	__be32 op_own;
 892	int lso_header_size;
 893	void *fragptr = NULL;
 894	bool bounce = false;
 895	bool send_doorbell;
 896	bool stop_queue;
 897	bool inline_ok;
 898	u8 data_offset;
 899	int hopbyhop;
 900	bool bf_ok;
 901
 902	tx_ind = skb_get_queue_mapping(skb);
 903	ring = priv->tx_ring[TX][tx_ind];
 904
 905	if (unlikely(!priv->port_up))
 906		goto tx_drop;
 907
 908	real_size = get_real_size(skb, shinfo, dev, &lso_header_size,
 909				  &inline_ok, &fragptr, &hopbyhop);
 910	if (unlikely(!real_size))
 911		goto tx_drop_count;
 912
 913	/* Align descriptor to TXBB size */
 914	desc_size = ALIGN(real_size, TXBB_SIZE);
 915	nr_txbb = desc_size >> LOG_TXBB_SIZE;
 916
 917	bf_ok = ring->bf_enabled;
 918	if (skb_vlan_tag_present(skb)) {
 919		u16 vlan_proto;
 920
 921		qpn_vlan.vlan_tag = cpu_to_be16(skb_vlan_tag_get(skb));
 922		vlan_proto = be16_to_cpu(skb->vlan_proto);
 923		if (vlan_proto == ETH_P_8021AD)
 924			qpn_vlan.ins_vlan = MLX4_WQE_CTRL_INS_SVLAN;
 925		else if (vlan_proto == ETH_P_8021Q)
 926			qpn_vlan.ins_vlan = MLX4_WQE_CTRL_INS_CVLAN;
 927		else
 928			qpn_vlan.ins_vlan = 0;
 929		bf_ok = false;
 930	}
 931
 932	netdev_txq_bql_enqueue_prefetchw(ring->tx_queue);
 933
 934	/* Packet is good - grab an index and transmit it */
 935	index = ring->prod & ring->size_mask;
 936	bf_index = ring->prod;
 937
 938	/* See if we have enough space for whole descriptor TXBB for setting
 939	 * SW ownership on next descriptor; if not, use a bounce buffer. */
 940	if (likely(index + nr_txbb <= ring->size))
 941		tx_desc = ring->buf + (index << LOG_TXBB_SIZE);
 942	else {
 943		if (unlikely(nr_txbb > MLX4_MAX_DESC_TXBBS)) {
 944			if (netif_msg_tx_err(priv))
 945				en_warn(priv, "Oversized header or SG list\n");
 946			goto tx_drop_count;
 947		}
 948		tx_desc = (struct mlx4_en_tx_desc *) ring->bounce_buf;
 949		bounce = true;
 950		bf_ok = false;
 951	}
 952
 953	/* Save skb in tx_info ring */
 954	tx_info = &ring->tx_info[index];
 955	tx_info->skb = skb;
 956	tx_info->nr_txbb = nr_txbb;
 957
 958	if (!lso_header_size) {
 959		data = &tx_desc->data;
 960		data_offset = offsetof(struct mlx4_en_tx_desc, data);
 961	} else {
 962		int lso_align = ALIGN(lso_header_size - hopbyhop + 4, DS_SIZE);
 963
 964		data = (void *)&tx_desc->lso + lso_align;
 965		data_offset = offsetof(struct mlx4_en_tx_desc, lso) + lso_align;
 966	}
 967
 968	/* valid only for none inline segments */
 969	tx_info->data_offset = data_offset;
 970
 971	tx_info->inl = inline_ok;
 972
 973	tx_info->linear = lso_header_size < skb_headlen(skb) && !inline_ok;
 974
 975	tx_info->nr_maps = shinfo->nr_frags + tx_info->linear;
 976	data += tx_info->nr_maps - 1;
 977
 978	if (!tx_info->inl)
 979		if (!mlx4_en_build_dma_wqe(priv, shinfo, data, skb,
 980					   lso_header_size, ring->mr_key,
 981					   tx_info))
 982			goto tx_drop_count;
 983
 984	/*
 985	 * For timestamping add flag to skb_shinfo and
 986	 * set flag for further reference
 987	 */
 988	tx_info->ts_requested = 0;
 989	if (unlikely(ring->hwtstamp_tx_type == HWTSTAMP_TX_ON &&
 990		     shinfo->tx_flags & SKBTX_HW_TSTAMP)) {
 991		shinfo->tx_flags |= SKBTX_IN_PROGRESS;
 992		tx_info->ts_requested = 1;
 993	}
 994
 995	/* Prepare ctrl segement apart opcode+ownership, which depends on
 996	 * whether LSO is used */
 997	tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
 998	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
 999		if (!skb->encapsulation)
1000			tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM |
1001								 MLX4_WQE_CTRL_TCP_UDP_CSUM);
1002		else
1003			tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM);
1004		ring->tx_csum++;
1005	}
1006
1007	if (priv->flags & MLX4_EN_FLAG_ENABLE_HW_LOOPBACK) {
1008		struct ethhdr *ethh;
1009
1010		/* Copy dst mac address to wqe. This allows loopback in eSwitch,
1011		 * so that VFs and PF can communicate with each other
1012		 */
1013		ethh = (struct ethhdr *)skb->data;
1014		tx_desc->ctrl.srcrb_flags16[0] = get_unaligned((__be16 *)ethh->h_dest);
1015		tx_desc->ctrl.imm = get_unaligned((__be32 *)(ethh->h_dest + 2));
1016	}
1017
1018	/* Handle LSO (TSO) packets */
1019	if (lso_header_size) {
1020		int i;
1021
1022		/* Mark opcode as LSO */
1023		op_own = cpu_to_be32(MLX4_OPCODE_LSO | (1 << 6)) |
1024			((ring->prod & ring->size) ?
1025				cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0);
1026
1027		lso_header_size -= hopbyhop;
1028		/* Fill in the LSO prefix */
1029		tx_desc->lso.mss_hdr_size = cpu_to_be32(
1030			shinfo->gso_size << 16 | lso_header_size);
1031
1032
1033		if (unlikely(hopbyhop)) {
1034			/* remove the HBH header.
1035			 * Layout: [Ethernet header][IPv6 header][HBH][TCP header]
1036			 */
1037			memcpy(tx_desc->lso.header, skb->data, ETH_HLEN + sizeof(*h6));
1038			h6 = (struct ipv6hdr *)((char *)tx_desc->lso.header + ETH_HLEN);
1039			h6->nexthdr = IPPROTO_TCP;
1040			/* Copy the TCP header after the IPv6 one */
1041			memcpy(h6 + 1,
1042			       skb->data + ETH_HLEN + sizeof(*h6) +
1043					sizeof(struct hop_jumbo_hdr),
1044			       tcp_hdrlen(skb));
1045			/* Leave ipv6 payload_len set to 0, as LSO v2 specs request. */
1046		} else {
1047			/* Copy headers;
1048			 * note that we already verified that it is linear
1049			 */
1050			memcpy(tx_desc->lso.header, skb->data, lso_header_size);
1051		}
1052		ring->tso_packets++;
1053
1054		i = shinfo->gso_segs;
1055		tx_info->nr_bytes = skb->len + (i - 1) * lso_header_size;
1056		ring->packets += i;
1057	} else {
1058		/* Normal (Non LSO) packet */
1059		op_own = cpu_to_be32(MLX4_OPCODE_SEND) |
1060			((ring->prod & ring->size) ?
1061			 cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0);
1062		tx_info->nr_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
1063		ring->packets++;
1064	}
1065	ring->bytes += tx_info->nr_bytes;
1066
1067	if (tx_info->inl)
1068		build_inline_wqe(tx_desc, skb, shinfo, fragptr);
1069
1070	if (skb->encapsulation) {
1071		union {
1072			struct iphdr *v4;
1073			struct ipv6hdr *v6;
1074			unsigned char *hdr;
1075		} ip;
1076		u8 proto;
1077
1078		ip.hdr = skb_inner_network_header(skb);
1079		proto = (ip.v4->version == 4) ? ip.v4->protocol :
1080						ip.v6->nexthdr;
1081
1082		if (proto == IPPROTO_TCP || proto == IPPROTO_UDP)
1083			op_own |= cpu_to_be32(MLX4_WQE_CTRL_IIP | MLX4_WQE_CTRL_ILP);
1084		else
1085			op_own |= cpu_to_be32(MLX4_WQE_CTRL_IIP);
1086	}
1087
1088	WRITE_ONCE(ring->prod, ring->prod + nr_txbb);
1089
1090	/* If we used a bounce buffer then copy descriptor back into place */
1091	if (unlikely(bounce))
1092		tx_desc = mlx4_en_bounce_to_desc(priv, ring, index, desc_size);
1093
1094	skb_tx_timestamp(skb);
1095
1096	/* Check available TXBBs And 2K spare for prefetch */
1097	stop_queue = mlx4_en_is_tx_ring_full(ring);
1098	if (unlikely(stop_queue)) {
1099		netif_tx_stop_queue(ring->tx_queue);
1100		ring->queue_stopped++;
1101	}
1102
1103	send_doorbell = __netdev_tx_sent_queue(ring->tx_queue,
1104					       tx_info->nr_bytes,
1105					       netdev_xmit_more());
1106
1107	real_size = (real_size / 16) & 0x3f;
1108
1109	bf_ok &= desc_size <= MAX_BF && send_doorbell;
1110
1111	if (bf_ok)
1112		qpn_vlan.bf_qpn = ring->doorbell_qpn | cpu_to_be32(real_size);
1113	else
1114		qpn_vlan.fence_size = real_size;
1115
1116	mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, desc_size, bf_index,
1117			      op_own, bf_ok, send_doorbell);
1118
1119	if (unlikely(stop_queue)) {
1120		/* If queue was emptied after the if (stop_queue) , and before
1121		 * the netif_tx_stop_queue() - need to wake the queue,
1122		 * or else it will remain stopped forever.
1123		 * Need a memory barrier to make sure ring->cons was not
1124		 * updated before queue was stopped.
1125		 */
1126		smp_rmb();
1127
1128		if (unlikely(!mlx4_en_is_tx_ring_full(ring))) {
1129			netif_tx_wake_queue(ring->tx_queue);
1130			ring->wake_queue++;
1131		}
1132	}
1133	return NETDEV_TX_OK;
1134
1135tx_drop_count:
1136	ring->tx_dropped++;
1137tx_drop:
1138	dev_kfree_skb_any(skb);
1139	return NETDEV_TX_OK;
1140}
1141
1142#define MLX4_EN_XDP_TX_NRTXBB  1
1143#define MLX4_EN_XDP_TX_REAL_SZ (((CTRL_SIZE + MLX4_EN_XDP_TX_NRTXBB * DS_SIZE) \
1144				 / 16) & 0x3f)
1145
1146void mlx4_en_init_tx_xdp_ring_descs(struct mlx4_en_priv *priv,
1147				    struct mlx4_en_tx_ring *ring)
1148{
1149	int i;
1150
1151	for (i = 0; i < ring->size; i++) {
1152		struct mlx4_en_tx_info *tx_info = &ring->tx_info[i];
1153		struct mlx4_en_tx_desc *tx_desc = ring->buf +
1154			(i << LOG_TXBB_SIZE);
1155
1156		tx_info->map0_byte_count = PAGE_SIZE;
1157		tx_info->nr_txbb = MLX4_EN_XDP_TX_NRTXBB;
1158		tx_info->data_offset = offsetof(struct mlx4_en_tx_desc, data);
1159		tx_info->ts_requested = 0;
1160		tx_info->nr_maps = 1;
1161		tx_info->linear = 1;
1162		tx_info->inl = 0;
1163
1164		tx_desc->data.lkey = ring->mr_key;
1165		tx_desc->ctrl.qpn_vlan.fence_size = MLX4_EN_XDP_TX_REAL_SZ;
1166		tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
1167	}
1168}
1169
1170netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
1171			       struct mlx4_en_rx_alloc *frame,
1172			       struct mlx4_en_priv *priv, unsigned int length,
1173			       int tx_ind, bool *doorbell_pending)
1174{
1175	struct mlx4_en_tx_desc *tx_desc;
1176	struct mlx4_en_tx_info *tx_info;
1177	struct mlx4_wqe_data_seg *data;
1178	struct mlx4_en_tx_ring *ring;
1179	dma_addr_t dma;
1180	__be32 op_own;
1181	int index;
1182
1183	if (unlikely(!priv->port_up))
1184		goto tx_drop;
1185
1186	ring = priv->tx_ring[TX_XDP][tx_ind];
1187
1188	if (unlikely(mlx4_en_is_tx_ring_full(ring)))
1189		goto tx_drop_count;
1190
1191	index = ring->prod & ring->size_mask;
1192	tx_info = &ring->tx_info[index];
1193
1194	tx_desc = ring->buf + (index << LOG_TXBB_SIZE);
1195	data = &tx_desc->data;
1196
1197	dma = frame->dma;
1198
1199	tx_info->page = frame->page;
1200	frame->page = NULL;
1201	tx_info->map0_dma = dma;
1202	tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN);
1203
1204	dma_sync_single_range_for_device(priv->ddev, dma, frame->page_offset,
1205					 length, DMA_TO_DEVICE);
1206
1207	data->addr = cpu_to_be64(dma + frame->page_offset);
1208	dma_wmb();
1209	data->byte_count = cpu_to_be32(length);
1210
1211	/* tx completion can avoid cache line miss for common cases */
1212
1213	op_own = cpu_to_be32(MLX4_OPCODE_SEND) |
1214		((ring->prod & ring->size) ?
1215		 cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0);
1216
1217	rx_ring->xdp_tx++;
1218
1219	WRITE_ONCE(ring->prod, ring->prod + MLX4_EN_XDP_TX_NRTXBB);
1220
1221	/* Ensure new descriptor hits memory
1222	 * before setting ownership of this descriptor to HW
1223	 */
1224	dma_wmb();
1225	tx_desc->ctrl.owner_opcode = op_own;
1226	ring->xmit_more++;
1227
1228	*doorbell_pending = true;
1229
1230	return NETDEV_TX_OK;
1231
1232tx_drop_count:
1233	rx_ring->xdp_tx_full++;
1234	*doorbell_pending = true;
1235tx_drop:
1236	return NETDEV_TX_BUSY;
1237}
v6.13.7
   1/*
   2 * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 *
  32 */
  33
  34#include <asm/page.h>
  35#include <linux/mlx4/cq.h>
  36#include <linux/slab.h>
  37#include <linux/mlx4/qp.h>
  38#include <linux/skbuff.h>
  39#include <linux/if_vlan.h>
  40#include <linux/prefetch.h>
  41#include <linux/vmalloc.h>
  42#include <linux/tcp.h>
  43#include <linux/ip.h>
  44#include <linux/ipv6.h>
  45#include <linux/indirect_call_wrapper.h>
  46#include <net/ipv6.h>
  47
  48#include "mlx4_en.h"
  49
  50int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
  51			   struct mlx4_en_tx_ring **pring, u32 size,
  52			   u16 stride, int node, int queue_index)
  53{
  54	struct mlx4_en_dev *mdev = priv->mdev;
  55	struct mlx4_en_tx_ring *ring;
  56	int tmp;
  57	int err;
  58
  59	ring = kzalloc_node(sizeof(*ring), GFP_KERNEL, node);
  60	if (!ring) {
  61		en_err(priv, "Failed allocating TX ring\n");
  62		return -ENOMEM;
  63	}
  64
  65	ring->size = size;
  66	ring->size_mask = size - 1;
  67	ring->sp_stride = stride;
  68	ring->full_size = ring->size - HEADROOM - MLX4_MAX_DESC_TXBBS;
  69
  70	tmp = size * sizeof(struct mlx4_en_tx_info);
  71	ring->tx_info = kvmalloc_node(tmp, GFP_KERNEL, node);
  72	if (!ring->tx_info) {
  73		err = -ENOMEM;
  74		goto err_ring;
  75	}
  76
  77	en_dbg(DRV, priv, "Allocated tx_info ring at addr:%p size:%d\n",
  78		 ring->tx_info, tmp);
  79
  80	ring->bounce_buf = kmalloc_node(MLX4_TX_BOUNCE_BUFFER_SIZE,
  81					GFP_KERNEL, node);
  82	if (!ring->bounce_buf) {
  83		ring->bounce_buf = kmalloc(MLX4_TX_BOUNCE_BUFFER_SIZE,
  84					   GFP_KERNEL);
  85		if (!ring->bounce_buf) {
  86			err = -ENOMEM;
  87			goto err_info;
  88		}
  89	}
  90	ring->buf_size = ALIGN(size * ring->sp_stride, MLX4_EN_PAGE_SIZE);
  91
  92	/* Allocate HW buffers on provided NUMA node */
  93	set_dev_node(&mdev->dev->persist->pdev->dev, node);
  94	err = mlx4_alloc_hwq_res(mdev->dev, &ring->sp_wqres, ring->buf_size);
  95	set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node);
  96	if (err) {
  97		en_err(priv, "Failed allocating hwq resources\n");
  98		goto err_bounce;
  99	}
 100
 101	ring->buf = ring->sp_wqres.buf.direct.buf;
 102
 103	en_dbg(DRV, priv, "Allocated TX ring (addr:%p) - buf:%p size:%d buf_size:%d dma:%llx\n",
 104	       ring, ring->buf, ring->size, ring->buf_size,
 105	       (unsigned long long) ring->sp_wqres.buf.direct.map);
 106
 107	err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &ring->qpn,
 108				    MLX4_RESERVE_ETH_BF_QP,
 109				    MLX4_RES_USAGE_DRIVER);
 110	if (err) {
 111		en_err(priv, "failed reserving qp for TX ring\n");
 112		goto err_hwq_res;
 113	}
 114
 115	err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->sp_qp);
 116	if (err) {
 117		en_err(priv, "Failed allocating qp %d\n", ring->qpn);
 118		goto err_reserve;
 119	}
 120	ring->sp_qp.event = mlx4_en_sqp_event;
 121
 122	err = mlx4_bf_alloc(mdev->dev, &ring->bf, node);
 123	if (err) {
 124		en_dbg(DRV, priv, "working without blueflame (%d)\n", err);
 125		ring->bf.uar = &mdev->priv_uar;
 126		ring->bf.uar->map = mdev->uar_map;
 127		ring->bf_enabled = false;
 128		ring->bf_alloced = false;
 129		priv->pflags &= ~MLX4_EN_PRIV_FLAGS_BLUEFLAME;
 130	} else {
 131		ring->bf_alloced = true;
 132		ring->bf_enabled = !!(priv->pflags &
 133				      MLX4_EN_PRIV_FLAGS_BLUEFLAME);
 134	}
 135	ring->doorbell_address = ring->bf.uar->map + MLX4_SEND_DOORBELL;
 136
 137	ring->hwtstamp_tx_type = priv->hwtstamp_config.tx_type;
 138	ring->queue_index = queue_index;
 139
 140	if (queue_index < priv->num_tx_rings_p_up)
 141		cpumask_set_cpu(cpumask_local_spread(queue_index,
 142						     priv->mdev->dev->numa_node),
 143				&ring->sp_affinity_mask);
 144
 145	*pring = ring;
 146	return 0;
 147
 148err_reserve:
 149	mlx4_qp_release_range(mdev->dev, ring->qpn, 1);
 150err_hwq_res:
 151	mlx4_free_hwq_res(mdev->dev, &ring->sp_wqres, ring->buf_size);
 152err_bounce:
 153	kfree(ring->bounce_buf);
 154	ring->bounce_buf = NULL;
 155err_info:
 156	kvfree(ring->tx_info);
 157	ring->tx_info = NULL;
 158err_ring:
 159	kfree(ring);
 160	*pring = NULL;
 161	return err;
 162}
 163
 164void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv,
 165			     struct mlx4_en_tx_ring **pring)
 166{
 167	struct mlx4_en_dev *mdev = priv->mdev;
 168	struct mlx4_en_tx_ring *ring = *pring;
 169	en_dbg(DRV, priv, "Destroying tx ring, qpn: %d\n", ring->qpn);
 170
 171	if (ring->bf_alloced)
 172		mlx4_bf_free(mdev->dev, &ring->bf);
 173	mlx4_qp_remove(mdev->dev, &ring->sp_qp);
 174	mlx4_qp_free(mdev->dev, &ring->sp_qp);
 175	mlx4_qp_release_range(priv->mdev->dev, ring->qpn, 1);
 176	mlx4_free_hwq_res(mdev->dev, &ring->sp_wqres, ring->buf_size);
 177	kfree(ring->bounce_buf);
 178	ring->bounce_buf = NULL;
 179	kvfree(ring->tx_info);
 180	ring->tx_info = NULL;
 181	kfree(ring);
 182	*pring = NULL;
 183}
 184
 185int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
 186			     struct mlx4_en_tx_ring *ring,
 187			     int cq, int user_prio)
 188{
 189	struct mlx4_en_dev *mdev = priv->mdev;
 190	int err;
 191
 192	ring->sp_cqn = cq;
 193	ring->prod = 0;
 194	ring->cons = 0xffffffff;
 195	ring->last_nr_txbb = 1;
 196	memset(ring->tx_info, 0, ring->size * sizeof(struct mlx4_en_tx_info));
 197	memset(ring->buf, 0, ring->buf_size);
 198	ring->free_tx_desc = mlx4_en_free_tx_desc;
 199
 200	ring->sp_qp_state = MLX4_QP_STATE_RST;
 201	ring->doorbell_qpn = cpu_to_be32(ring->sp_qp.qpn << 8);
 202	ring->mr_key = cpu_to_be32(mdev->mr.key);
 203
 204	mlx4_en_fill_qp_context(priv, ring->size, ring->sp_stride, 1, 0, ring->qpn,
 205				ring->sp_cqn, user_prio, &ring->sp_context);
 206	if (ring->bf_alloced)
 207		ring->sp_context.usr_page =
 208			cpu_to_be32(mlx4_to_hw_uar_index(mdev->dev,
 209							 ring->bf.uar->index));
 210
 211	err = mlx4_qp_to_ready(mdev->dev, &ring->sp_wqres.mtt, &ring->sp_context,
 212			       &ring->sp_qp, &ring->sp_qp_state);
 213	if (!cpumask_empty(&ring->sp_affinity_mask))
 214		netif_set_xps_queue(priv->dev, &ring->sp_affinity_mask,
 215				    ring->queue_index);
 216
 217	return err;
 218}
 219
 220void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv,
 221				struct mlx4_en_tx_ring *ring)
 222{
 223	struct mlx4_en_dev *mdev = priv->mdev;
 224
 225	mlx4_qp_modify(mdev->dev, NULL, ring->sp_qp_state,
 226		       MLX4_QP_STATE_RST, NULL, 0, 0, &ring->sp_qp);
 227}
 228
 229static inline bool mlx4_en_is_tx_ring_full(struct mlx4_en_tx_ring *ring)
 230{
 231	u32 used = READ_ONCE(ring->prod) - READ_ONCE(ring->cons);
 232
 233	return used > ring->full_size;
 234}
 235
 236static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv,
 237			      struct mlx4_en_tx_ring *ring, int index,
 238			      u8 owner)
 239{
 240	__be32 stamp = cpu_to_be32(STAMP_VAL | (!!owner << STAMP_SHIFT));
 241	struct mlx4_en_tx_desc *tx_desc = ring->buf + (index << LOG_TXBB_SIZE);
 242	struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
 243	void *end = ring->buf + ring->buf_size;
 244	__be32 *ptr = (__be32 *)tx_desc;
 245	int i;
 246
 247	/* Optimize the common case when there are no wraparounds */
 248	if (likely((void *)tx_desc +
 249		   (tx_info->nr_txbb << LOG_TXBB_SIZE) <= end)) {
 250		/* Stamp the freed descriptor */
 251		for (i = 0; i < tx_info->nr_txbb << LOG_TXBB_SIZE;
 252		     i += STAMP_STRIDE) {
 253			*ptr = stamp;
 254			ptr += STAMP_DWORDS;
 255		}
 256	} else {
 257		/* Stamp the freed descriptor */
 258		for (i = 0; i < tx_info->nr_txbb << LOG_TXBB_SIZE;
 259		     i += STAMP_STRIDE) {
 260			*ptr = stamp;
 261			ptr += STAMP_DWORDS;
 262			if ((void *)ptr >= end) {
 263				ptr = ring->buf;
 264				stamp ^= cpu_to_be32(0x80000000);
 265			}
 266		}
 267	}
 268}
 269
 270INDIRECT_CALLABLE_DECLARE(u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
 271						   struct mlx4_en_tx_ring *ring,
 272						   int index, u64 timestamp,
 273						   int napi_mode));
 274
 275u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
 276			 struct mlx4_en_tx_ring *ring,
 277			 int index, u64 timestamp,
 278			 int napi_mode)
 279{
 280	struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
 281	struct mlx4_en_tx_desc *tx_desc = ring->buf + (index << LOG_TXBB_SIZE);
 282	struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset;
 283	void *end = ring->buf + ring->buf_size;
 284	struct sk_buff *skb = tx_info->skb;
 285	int nr_maps = tx_info->nr_maps;
 286	int i;
 287
 288	/* We do not touch skb here, so prefetch skb->users location
 289	 * to speedup consume_skb()
 290	 */
 291	prefetchw(&skb->users);
 292
 293	if (unlikely(timestamp)) {
 294		struct skb_shared_hwtstamps hwts;
 295
 296		mlx4_en_fill_hwtstamps(priv->mdev, &hwts, timestamp);
 297		skb_tstamp_tx(skb, &hwts);
 298	}
 299
 300	if (!tx_info->inl) {
 301		if (tx_info->linear)
 302			dma_unmap_single(priv->ddev,
 303					 tx_info->map0_dma,
 304					 tx_info->map0_byte_count,
 305					 DMA_TO_DEVICE);
 306		else
 307			dma_unmap_page(priv->ddev,
 308				       tx_info->map0_dma,
 309				       tx_info->map0_byte_count,
 310				       DMA_TO_DEVICE);
 311		/* Optimize the common case when there are no wraparounds */
 312		if (likely((void *)tx_desc +
 313			   (tx_info->nr_txbb << LOG_TXBB_SIZE) <= end)) {
 314			for (i = 1; i < nr_maps; i++) {
 315				data++;
 316				dma_unmap_page(priv->ddev,
 317					(dma_addr_t)be64_to_cpu(data->addr),
 318					be32_to_cpu(data->byte_count),
 319					DMA_TO_DEVICE);
 320			}
 321		} else {
 322			if ((void *)data >= end)
 323				data = ring->buf + ((void *)data - end);
 324
 325			for (i = 1; i < nr_maps; i++) {
 326				data++;
 327				/* Check for wraparound before unmapping */
 328				if ((void *) data >= end)
 329					data = ring->buf;
 330				dma_unmap_page(priv->ddev,
 331					(dma_addr_t)be64_to_cpu(data->addr),
 332					be32_to_cpu(data->byte_count),
 333					DMA_TO_DEVICE);
 334			}
 335		}
 336	}
 337	napi_consume_skb(skb, napi_mode);
 338
 339	return tx_info->nr_txbb;
 340}
 341
 342INDIRECT_CALLABLE_DECLARE(u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv,
 343						      struct mlx4_en_tx_ring *ring,
 344						      int index, u64 timestamp,
 345						      int napi_mode));
 346
 347u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv,
 348			    struct mlx4_en_tx_ring *ring,
 349			    int index, u64 timestamp,
 350			    int napi_mode)
 351{
 352	struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
 353	struct mlx4_en_rx_alloc frame = {
 354		.page = tx_info->page,
 355		.dma = tx_info->map0_dma,
 356	};
 357
 358	if (!napi_mode || !mlx4_en_rx_recycle(ring->recycle_ring, &frame)) {
 359		dma_unmap_page(priv->ddev, tx_info->map0_dma,
 360			       PAGE_SIZE, priv->dma_dir);
 361		put_page(tx_info->page);
 362	}
 363
 364	return tx_info->nr_txbb;
 365}
 366
 367int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring)
 368{
 369	struct mlx4_en_priv *priv = netdev_priv(dev);
 370	int cnt = 0;
 371
 372	/* Skip last polled descriptor */
 373	ring->cons += ring->last_nr_txbb;
 374	en_dbg(DRV, priv, "Freeing Tx buf - cons:0x%x prod:0x%x\n",
 375		 ring->cons, ring->prod);
 376
 377	if ((u32) (ring->prod - ring->cons) > ring->size) {
 378		if (netif_msg_tx_err(priv))
 379			en_warn(priv, "Tx consumer passed producer!\n");
 380		return 0;
 381	}
 382
 383	while (ring->cons != ring->prod) {
 384		ring->last_nr_txbb = ring->free_tx_desc(priv, ring,
 385						ring->cons & ring->size_mask,
 386						0, 0 /* Non-NAPI caller */);
 387		ring->cons += ring->last_nr_txbb;
 388		cnt++;
 389	}
 390
 391	if (ring->tx_queue)
 392		netdev_tx_reset_queue(ring->tx_queue);
 393
 394	if (cnt)
 395		en_dbg(DRV, priv, "Freed %d uncompleted tx descriptors\n", cnt);
 396
 397	return cnt;
 398}
 399
 400static void mlx4_en_handle_err_cqe(struct mlx4_en_priv *priv, struct mlx4_err_cqe *err_cqe,
 401				   u16 cqe_index, struct mlx4_en_tx_ring *ring)
 402{
 403	struct mlx4_en_dev *mdev = priv->mdev;
 404	struct mlx4_en_tx_info *tx_info;
 405	struct mlx4_en_tx_desc *tx_desc;
 406	u16 wqe_index;
 407	int desc_size;
 408
 409	en_err(priv, "CQE error - cqn 0x%x, ci 0x%x, vendor syndrome: 0x%x syndrome: 0x%x\n",
 410	       ring->sp_cqn, cqe_index, err_cqe->vendor_err_syndrome, err_cqe->syndrome);
 411	print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, err_cqe, sizeof(*err_cqe),
 412		       false);
 413
 414	wqe_index = be16_to_cpu(err_cqe->wqe_index) & ring->size_mask;
 415	tx_info = &ring->tx_info[wqe_index];
 416	desc_size = tx_info->nr_txbb << LOG_TXBB_SIZE;
 417	en_err(priv, "Related WQE - qpn 0x%x, wqe index 0x%x, wqe size 0x%x\n", ring->qpn,
 418	       wqe_index, desc_size);
 419	tx_desc = ring->buf + (wqe_index << LOG_TXBB_SIZE);
 420	print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, tx_desc, desc_size, false);
 421
 422	if (test_and_set_bit(MLX4_EN_STATE_FLAG_RESTARTING, &priv->state))
 423		return;
 424
 425	en_err(priv, "Scheduling port restart\n");
 426	queue_work(mdev->workqueue, &priv->restart_task);
 427}
 428
 429int mlx4_en_process_tx_cq(struct net_device *dev,
 430			  struct mlx4_en_cq *cq, int napi_budget)
 431{
 432	struct mlx4_en_priv *priv = netdev_priv(dev);
 433	struct mlx4_cq *mcq = &cq->mcq;
 434	struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->type][cq->ring];
 435	struct mlx4_cqe *cqe;
 436	u16 index, ring_index, stamp_index;
 437	u32 txbbs_skipped = 0;
 438	u32 txbbs_stamp = 0;
 439	u32 cons_index = mcq->cons_index;
 440	int size = cq->size;
 441	u32 size_mask = ring->size_mask;
 442	struct mlx4_cqe *buf = cq->buf;
 443	u32 packets = 0;
 444	u32 bytes = 0;
 445	int factor = priv->cqe_factor;
 446	int done = 0;
 447	int budget = priv->tx_work_limit;
 448	u32 last_nr_txbb;
 449	u32 ring_cons;
 450
 451	if (unlikely(!priv->port_up))
 452		return 0;
 453
 454	netdev_txq_bql_complete_prefetchw(ring->tx_queue);
 455
 456	index = cons_index & size_mask;
 457	cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor;
 458	last_nr_txbb = READ_ONCE(ring->last_nr_txbb);
 459	ring_cons = READ_ONCE(ring->cons);
 460	ring_index = ring_cons & size_mask;
 461	stamp_index = ring_index;
 462
 463	/* Process all completed CQEs */
 464	while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
 465			cons_index & size) && (done < budget)) {
 466		u16 new_index;
 467
 468		/*
 469		 * make sure we read the CQE after we read the
 470		 * ownership bit
 471		 */
 472		dma_rmb();
 473
 474		if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
 475			     MLX4_CQE_OPCODE_ERROR))
 476			if (!test_and_set_bit(MLX4_EN_TX_RING_STATE_RECOVERING, &ring->state))
 477				mlx4_en_handle_err_cqe(priv, (struct mlx4_err_cqe *)cqe, index,
 478						       ring);
 479
 480		/* Skip over last polled CQE */
 481		new_index = be16_to_cpu(cqe->wqe_index) & size_mask;
 482
 483		do {
 484			u64 timestamp = 0;
 485
 486			txbbs_skipped += last_nr_txbb;
 487			ring_index = (ring_index + last_nr_txbb) & size_mask;
 488
 489			if (unlikely(ring->tx_info[ring_index].ts_requested))
 490				timestamp = mlx4_en_get_cqe_ts(cqe);
 491
 492			/* free next descriptor */
 493			last_nr_txbb = INDIRECT_CALL_2(ring->free_tx_desc,
 494						       mlx4_en_free_tx_desc,
 495						       mlx4_en_recycle_tx_desc,
 496					priv, ring, ring_index,
 497					timestamp, napi_budget);
 498
 499			mlx4_en_stamp_wqe(priv, ring, stamp_index,
 500					  !!((ring_cons + txbbs_stamp) &
 501						ring->size));
 502			stamp_index = ring_index;
 503			txbbs_stamp = txbbs_skipped;
 504			packets++;
 505			bytes += ring->tx_info[ring_index].nr_bytes;
 506		} while ((++done < budget) && (ring_index != new_index));
 507
 508		++cons_index;
 509		index = cons_index & size_mask;
 510		cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor;
 511	}
 512
 513	/*
 514	 * To prevent CQ overflow we first update CQ consumer and only then
 515	 * the ring consumer.
 516	 */
 517	mcq->cons_index = cons_index;
 518	mlx4_cq_set_ci(mcq);
 519	wmb();
 520
 521	/* we want to dirty this cache line once */
 522	WRITE_ONCE(ring->last_nr_txbb, last_nr_txbb);
 523	WRITE_ONCE(ring->cons, ring_cons + txbbs_skipped);
 524
 525	if (cq->type == TX_XDP)
 526		return done;
 527
 528	netdev_tx_completed_queue(ring->tx_queue, packets, bytes);
 529
 530	/* Wakeup Tx queue if this stopped, and ring is not full.
 531	 */
 532	if (netif_tx_queue_stopped(ring->tx_queue) &&
 533	    !mlx4_en_is_tx_ring_full(ring)) {
 534		netif_tx_wake_queue(ring->tx_queue);
 535		ring->wake_queue++;
 536	}
 537
 538	return done;
 539}
 540
 541void mlx4_en_tx_irq(struct mlx4_cq *mcq)
 542{
 543	struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq);
 544	struct mlx4_en_priv *priv = netdev_priv(cq->dev);
 545
 546	if (likely(priv->port_up))
 547		napi_schedule_irqoff(&cq->napi);
 548	else
 549		mlx4_en_arm_cq(priv, cq);
 550}
 551
 552/* TX CQ polling - called by NAPI */
 553int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget)
 554{
 555	struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi);
 556	struct net_device *dev = cq->dev;
 557	struct mlx4_en_priv *priv = netdev_priv(dev);
 558	int work_done;
 559
 560	work_done = mlx4_en_process_tx_cq(dev, cq, budget);
 561	if (work_done >= budget)
 562		return budget;
 563
 564	if (napi_complete_done(napi, work_done))
 565		mlx4_en_arm_cq(priv, cq);
 566
 567	return 0;
 568}
 569
 570static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv,
 571						      struct mlx4_en_tx_ring *ring,
 572						      u32 index,
 573						      unsigned int desc_size)
 574{
 575	u32 copy = (ring->size - index) << LOG_TXBB_SIZE;
 576	int i;
 577
 578	for (i = desc_size - copy - 4; i >= 0; i -= 4) {
 579		if ((i & (TXBB_SIZE - 1)) == 0)
 580			wmb();
 581
 582		*((u32 *) (ring->buf + i)) =
 583			*((u32 *) (ring->bounce_buf + copy + i));
 584	}
 585
 586	for (i = copy - 4; i >= 4 ; i -= 4) {
 587		if ((i & (TXBB_SIZE - 1)) == 0)
 588			wmb();
 589
 590		*((u32 *)(ring->buf + (index << LOG_TXBB_SIZE) + i)) =
 591			*((u32 *) (ring->bounce_buf + i));
 592	}
 593
 594	/* Return real descriptor location */
 595	return ring->buf + (index << LOG_TXBB_SIZE);
 596}
 597
 598/* Decide if skb can be inlined in tx descriptor to avoid dma mapping
 599 *
 600 * It seems strange we do not simply use skb_copy_bits().
 601 * This would allow to inline all skbs iff skb->len <= inline_thold
 602 *
 603 * Note that caller already checked skb was not a gso packet
 604 */
 605static bool is_inline(int inline_thold, const struct sk_buff *skb,
 606		      const struct skb_shared_info *shinfo,
 607		      void **pfrag)
 608{
 609	void *ptr;
 610
 611	if (skb->len > inline_thold || !inline_thold)
 612		return false;
 613
 614	if (shinfo->nr_frags == 1) {
 615		ptr = skb_frag_address_safe(&shinfo->frags[0]);
 616		if (unlikely(!ptr))
 617			return false;
 618		*pfrag = ptr;
 619		return true;
 620	}
 621	if (shinfo->nr_frags)
 622		return false;
 623	return true;
 624}
 625
 626static int inline_size(const struct sk_buff *skb)
 627{
 628	if (skb->len + CTRL_SIZE + sizeof(struct mlx4_wqe_inline_seg)
 629	    <= MLX4_INLINE_ALIGN)
 630		return ALIGN(skb->len + CTRL_SIZE +
 631			     sizeof(struct mlx4_wqe_inline_seg), 16);
 632	else
 633		return ALIGN(skb->len + CTRL_SIZE + 2 *
 634			     sizeof(struct mlx4_wqe_inline_seg), 16);
 635}
 636
 637static int get_real_size(const struct sk_buff *skb,
 638			 const struct skb_shared_info *shinfo,
 639			 struct net_device *dev,
 640			 int *lso_header_size,
 641			 bool *inline_ok,
 642			 void **pfrag,
 643			 int *hopbyhop)
 644{
 645	struct mlx4_en_priv *priv = netdev_priv(dev);
 646	int real_size;
 647
 648	if (shinfo->gso_size) {
 649		*inline_ok = false;
 650		*hopbyhop = 0;
 651		if (skb->encapsulation) {
 652			*lso_header_size = skb_inner_tcp_all_headers(skb);
 653		} else {
 654			/* Detects large IPV6 TCP packets and prepares for removal of
 655			 * HBH header that has been pushed by ip6_xmit(),
 656			 * mainly so that tcpdump can dissect them.
 657			 */
 658			if (ipv6_has_hopopt_jumbo(skb))
 659				*hopbyhop = sizeof(struct hop_jumbo_hdr);
 660			*lso_header_size = skb_tcp_all_headers(skb);
 661		}
 662		real_size = CTRL_SIZE + shinfo->nr_frags * DS_SIZE +
 663			ALIGN(*lso_header_size - *hopbyhop + 4, DS_SIZE);
 664		if (unlikely(*lso_header_size != skb_headlen(skb))) {
 665			/* We add a segment for the skb linear buffer only if
 666			 * it contains data */
 667			if (*lso_header_size < skb_headlen(skb))
 668				real_size += DS_SIZE;
 669			else {
 670				if (netif_msg_tx_err(priv))
 671					en_warn(priv, "Non-linear headers\n");
 672				return 0;
 673			}
 674		}
 675	} else {
 676		*lso_header_size = 0;
 677		*inline_ok = is_inline(priv->prof->inline_thold, skb,
 678				       shinfo, pfrag);
 679
 680		if (*inline_ok)
 681			real_size = inline_size(skb);
 682		else
 683			real_size = CTRL_SIZE +
 684				    (shinfo->nr_frags + 1) * DS_SIZE;
 685	}
 686
 687	return real_size;
 688}
 689
 690static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc,
 691			     const struct sk_buff *skb,
 692			     const struct skb_shared_info *shinfo,
 693			     void *fragptr)
 694{
 695	struct mlx4_wqe_inline_seg *inl = &tx_desc->inl;
 696	int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof(*inl);
 697	unsigned int hlen = skb_headlen(skb);
 698
 699	if (skb->len <= spc) {
 700		if (likely(skb->len >= MIN_PKT_LEN)) {
 701			inl->byte_count = cpu_to_be32(1 << 31 | skb->len);
 702		} else {
 703			inl->byte_count = cpu_to_be32(1 << 31 | MIN_PKT_LEN);
 704			memset(inl->data + skb->len, 0,
 705			       MIN_PKT_LEN - skb->len);
 706		}
 707		skb_copy_from_linear_data(skb, inl->data, hlen);
 708		if (shinfo->nr_frags)
 709			memcpy(inl->data + hlen, fragptr,
 710			       skb_frag_size(&shinfo->frags[0]));
 711
 712	} else {
 713		inl->byte_count = cpu_to_be32(1 << 31 | spc);
 714		if (hlen <= spc) {
 715			skb_copy_from_linear_data(skb, inl->data, hlen);
 716			if (hlen < spc) {
 717				memcpy(inl->data + hlen,
 718				       fragptr, spc - hlen);
 719				fragptr +=  spc - hlen;
 720			}
 721			inl = (void *)inl->data + spc;
 722			memcpy(inl->data, fragptr, skb->len - spc);
 723		} else {
 724			skb_copy_from_linear_data(skb, inl->data, spc);
 725			inl = (void *)inl->data + spc;
 726			skb_copy_from_linear_data_offset(skb, spc, inl->data,
 727							 hlen - spc);
 728			if (shinfo->nr_frags)
 729				memcpy(inl->data + hlen - spc,
 730				       fragptr,
 731				       skb_frag_size(&shinfo->frags[0]));
 732		}
 733
 734		dma_wmb();
 735		inl->byte_count = cpu_to_be32(1 << 31 | (skb->len - spc));
 736	}
 737}
 738
 739u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
 740			 struct net_device *sb_dev)
 741{
 742	struct mlx4_en_priv *priv = netdev_priv(dev);
 743	u16 rings_p_up = priv->num_tx_rings_p_up;
 744
 745	if (netdev_get_num_tc(dev))
 746		return netdev_pick_tx(dev, skb, NULL);
 747
 748	return netdev_pick_tx(dev, skb, NULL) % rings_p_up;
 749}
 750
 751static void mlx4_bf_copy(void __iomem *dst, const void *src,
 752			 unsigned int bytecnt)
 753{
 754	__iowrite64_copy(dst, src, bytecnt / 8);
 755}
 756
 757void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring)
 758{
 759	wmb();
 760	/* Since there is no iowrite*_native() that writes the
 761	 * value as is, without byteswapping - using the one
 762	 * the doesn't do byteswapping in the relevant arch
 763	 * endianness.
 764	 */
 765#if defined(__LITTLE_ENDIAN)
 766	iowrite32(
 767#else
 768	iowrite32be(
 769#endif
 770		  (__force u32)ring->doorbell_qpn, ring->doorbell_address);
 771}
 772
 773static void mlx4_en_tx_write_desc(struct mlx4_en_tx_ring *ring,
 774				  struct mlx4_en_tx_desc *tx_desc,
 775				  union mlx4_wqe_qpn_vlan qpn_vlan,
 776				  int desc_size, int bf_index,
 777				  __be32 op_own, bool bf_ok,
 778				  bool send_doorbell)
 779{
 780	tx_desc->ctrl.qpn_vlan = qpn_vlan;
 781
 782	if (bf_ok) {
 783		op_own |= htonl((bf_index & 0xffff) << 8);
 784		/* Ensure new descriptor hits memory
 785		 * before setting ownership of this descriptor to HW
 786		 */
 787		dma_wmb();
 788		tx_desc->ctrl.owner_opcode = op_own;
 789
 790		wmb();
 791
 792		mlx4_bf_copy(ring->bf.reg + ring->bf.offset, &tx_desc->ctrl,
 793			     desc_size);
 794
 795		wmb();
 796
 797		ring->bf.offset ^= ring->bf.buf_size;
 798	} else {
 799		/* Ensure new descriptor hits memory
 800		 * before setting ownership of this descriptor to HW
 801		 */
 802		dma_wmb();
 803		tx_desc->ctrl.owner_opcode = op_own;
 804		if (send_doorbell)
 805			mlx4_en_xmit_doorbell(ring);
 806		else
 807			ring->xmit_more++;
 808	}
 809}
 810
 811static bool mlx4_en_build_dma_wqe(struct mlx4_en_priv *priv,
 812				  struct skb_shared_info *shinfo,
 813				  struct mlx4_wqe_data_seg *data,
 814				  struct sk_buff *skb,
 815				  int lso_header_size,
 816				  __be32 mr_key,
 817				  struct mlx4_en_tx_info *tx_info)
 818{
 819	struct device *ddev = priv->ddev;
 820	dma_addr_t dma = 0;
 821	u32 byte_count = 0;
 822	int i_frag;
 823
 824	/* Map fragments if any */
 825	for (i_frag = shinfo->nr_frags - 1; i_frag >= 0; i_frag--) {
 826		const skb_frag_t *frag = &shinfo->frags[i_frag];
 827		byte_count = skb_frag_size(frag);
 828		dma = skb_frag_dma_map(ddev, frag,
 829				       0, byte_count,
 830				       DMA_TO_DEVICE);
 831		if (dma_mapping_error(ddev, dma))
 832			goto tx_drop_unmap;
 833
 834		data->addr = cpu_to_be64(dma);
 835		data->lkey = mr_key;
 836		dma_wmb();
 837		data->byte_count = cpu_to_be32(byte_count);
 838		--data;
 839	}
 840
 841	/* Map linear part if needed */
 842	if (tx_info->linear) {
 843		byte_count = skb_headlen(skb) - lso_header_size;
 844
 845		dma = dma_map_single(ddev, skb->data +
 846				     lso_header_size, byte_count,
 847				     DMA_TO_DEVICE);
 848		if (dma_mapping_error(ddev, dma))
 849			goto tx_drop_unmap;
 850
 851		data->addr = cpu_to_be64(dma);
 852		data->lkey = mr_key;
 853		dma_wmb();
 854		data->byte_count = cpu_to_be32(byte_count);
 855	}
 856	/* tx completion can avoid cache line miss for common cases */
 857	tx_info->map0_dma = dma;
 858	tx_info->map0_byte_count = byte_count;
 859
 860	return true;
 861
 862tx_drop_unmap:
 863	en_err(priv, "DMA mapping error\n");
 864
 865	while (++i_frag < shinfo->nr_frags) {
 866		++data;
 867		dma_unmap_page(ddev, (dma_addr_t)be64_to_cpu(data->addr),
 868			       be32_to_cpu(data->byte_count),
 869			       DMA_TO_DEVICE);
 870	}
 871
 872	return false;
 873}
 874
 875netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 876{
 877	struct skb_shared_info *shinfo = skb_shinfo(skb);
 878	struct mlx4_en_priv *priv = netdev_priv(dev);
 879	union mlx4_wqe_qpn_vlan	qpn_vlan = {};
 880	struct mlx4_en_tx_ring *ring;
 881	struct mlx4_en_tx_desc *tx_desc;
 882	struct mlx4_wqe_data_seg *data;
 883	struct mlx4_en_tx_info *tx_info;
 884	u32 __maybe_unused ring_cons;
 885	int tx_ind;
 886	int nr_txbb;
 887	int desc_size;
 888	int real_size;
 889	u32 index, bf_index;
 890	struct ipv6hdr *h6;
 891	__be32 op_own;
 892	int lso_header_size;
 893	void *fragptr = NULL;
 894	bool bounce = false;
 895	bool send_doorbell;
 896	bool stop_queue;
 897	bool inline_ok;
 898	u8 data_offset;
 899	int hopbyhop;
 900	bool bf_ok;
 901
 902	tx_ind = skb_get_queue_mapping(skb);
 903	ring = priv->tx_ring[TX][tx_ind];
 904
 905	if (unlikely(!priv->port_up))
 906		goto tx_drop;
 907
 908	real_size = get_real_size(skb, shinfo, dev, &lso_header_size,
 909				  &inline_ok, &fragptr, &hopbyhop);
 910	if (unlikely(!real_size))
 911		goto tx_drop_count;
 912
 913	/* Align descriptor to TXBB size */
 914	desc_size = ALIGN(real_size, TXBB_SIZE);
 915	nr_txbb = desc_size >> LOG_TXBB_SIZE;
 916
 917	bf_ok = ring->bf_enabled;
 918	if (skb_vlan_tag_present(skb)) {
 919		u16 vlan_proto;
 920
 921		qpn_vlan.vlan_tag = cpu_to_be16(skb_vlan_tag_get(skb));
 922		vlan_proto = be16_to_cpu(skb->vlan_proto);
 923		if (vlan_proto == ETH_P_8021AD)
 924			qpn_vlan.ins_vlan = MLX4_WQE_CTRL_INS_SVLAN;
 925		else if (vlan_proto == ETH_P_8021Q)
 926			qpn_vlan.ins_vlan = MLX4_WQE_CTRL_INS_CVLAN;
 927		else
 928			qpn_vlan.ins_vlan = 0;
 929		bf_ok = false;
 930	}
 931
 932	netdev_txq_bql_enqueue_prefetchw(ring->tx_queue);
 933
 934	/* Packet is good - grab an index and transmit it */
 935	index = ring->prod & ring->size_mask;
 936	bf_index = ring->prod;
 937
 938	/* See if we have enough space for whole descriptor TXBB for setting
 939	 * SW ownership on next descriptor; if not, use a bounce buffer. */
 940	if (likely(index + nr_txbb <= ring->size))
 941		tx_desc = ring->buf + (index << LOG_TXBB_SIZE);
 942	else {
 943		if (unlikely(nr_txbb > MLX4_MAX_DESC_TXBBS)) {
 944			if (netif_msg_tx_err(priv))
 945				en_warn(priv, "Oversized header or SG list\n");
 946			goto tx_drop_count;
 947		}
 948		tx_desc = (struct mlx4_en_tx_desc *) ring->bounce_buf;
 949		bounce = true;
 950		bf_ok = false;
 951	}
 952
 953	/* Save skb in tx_info ring */
 954	tx_info = &ring->tx_info[index];
 955	tx_info->skb = skb;
 956	tx_info->nr_txbb = nr_txbb;
 957
 958	if (!lso_header_size) {
 959		data = &tx_desc->data;
 960		data_offset = offsetof(struct mlx4_en_tx_desc, data);
 961	} else {
 962		int lso_align = ALIGN(lso_header_size - hopbyhop + 4, DS_SIZE);
 963
 964		data = (void *)&tx_desc->lso + lso_align;
 965		data_offset = offsetof(struct mlx4_en_tx_desc, lso) + lso_align;
 966	}
 967
 968	/* valid only for none inline segments */
 969	tx_info->data_offset = data_offset;
 970
 971	tx_info->inl = inline_ok;
 972
 973	tx_info->linear = lso_header_size < skb_headlen(skb) && !inline_ok;
 974
 975	tx_info->nr_maps = shinfo->nr_frags + tx_info->linear;
 976	data += tx_info->nr_maps - 1;
 977
 978	if (!tx_info->inl)
 979		if (!mlx4_en_build_dma_wqe(priv, shinfo, data, skb,
 980					   lso_header_size, ring->mr_key,
 981					   tx_info))
 982			goto tx_drop_count;
 983
 984	/*
 985	 * For timestamping add flag to skb_shinfo and
 986	 * set flag for further reference
 987	 */
 988	tx_info->ts_requested = 0;
 989	if (unlikely(ring->hwtstamp_tx_type == HWTSTAMP_TX_ON &&
 990		     shinfo->tx_flags & SKBTX_HW_TSTAMP)) {
 991		shinfo->tx_flags |= SKBTX_IN_PROGRESS;
 992		tx_info->ts_requested = 1;
 993	}
 994
 995	/* Prepare ctrl segment apart opcode+ownership, which depends on
 996	 * whether LSO is used */
 997	tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
 998	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
 999		if (!skb->encapsulation)
1000			tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM |
1001								 MLX4_WQE_CTRL_TCP_UDP_CSUM);
1002		else
1003			tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM);
1004		ring->tx_csum++;
1005	}
1006
1007	if (priv->flags & MLX4_EN_FLAG_ENABLE_HW_LOOPBACK) {
1008		struct ethhdr *ethh;
1009
1010		/* Copy dst mac address to wqe. This allows loopback in eSwitch,
1011		 * so that VFs and PF can communicate with each other
1012		 */
1013		ethh = (struct ethhdr *)skb->data;
1014		tx_desc->ctrl.srcrb_flags16[0] = get_unaligned((__be16 *)ethh->h_dest);
1015		tx_desc->ctrl.imm = get_unaligned((__be32 *)(ethh->h_dest + 2));
1016	}
1017
1018	/* Handle LSO (TSO) packets */
1019	if (lso_header_size) {
1020		int i;
1021
1022		/* Mark opcode as LSO */
1023		op_own = cpu_to_be32(MLX4_OPCODE_LSO | (1 << 6)) |
1024			((ring->prod & ring->size) ?
1025				cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0);
1026
1027		lso_header_size -= hopbyhop;
1028		/* Fill in the LSO prefix */
1029		tx_desc->lso.mss_hdr_size = cpu_to_be32(
1030			shinfo->gso_size << 16 | lso_header_size);
1031
1032
1033		if (unlikely(hopbyhop)) {
1034			/* remove the HBH header.
1035			 * Layout: [Ethernet header][IPv6 header][HBH][TCP header]
1036			 */
1037			memcpy(tx_desc->lso.header, skb->data, ETH_HLEN + sizeof(*h6));
1038			h6 = (struct ipv6hdr *)((char *)tx_desc->lso.header + ETH_HLEN);
1039			h6->nexthdr = IPPROTO_TCP;
1040			/* Copy the TCP header after the IPv6 one */
1041			memcpy(h6 + 1,
1042			       skb->data + ETH_HLEN + sizeof(*h6) +
1043					sizeof(struct hop_jumbo_hdr),
1044			       tcp_hdrlen(skb));
1045			/* Leave ipv6 payload_len set to 0, as LSO v2 specs request. */
1046		} else {
1047			/* Copy headers;
1048			 * note that we already verified that it is linear
1049			 */
1050			memcpy(tx_desc->lso.header, skb->data, lso_header_size);
1051		}
1052		ring->tso_packets++;
1053
1054		i = shinfo->gso_segs;
1055		tx_info->nr_bytes = skb->len + (i - 1) * lso_header_size;
1056		ring->packets += i;
1057	} else {
1058		/* Normal (Non LSO) packet */
1059		op_own = cpu_to_be32(MLX4_OPCODE_SEND) |
1060			((ring->prod & ring->size) ?
1061			 cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0);
1062		tx_info->nr_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
1063		ring->packets++;
1064	}
1065	ring->bytes += tx_info->nr_bytes;
1066
1067	if (tx_info->inl)
1068		build_inline_wqe(tx_desc, skb, shinfo, fragptr);
1069
1070	if (skb->encapsulation) {
1071		union {
1072			struct iphdr *v4;
1073			struct ipv6hdr *v6;
1074			unsigned char *hdr;
1075		} ip;
1076		u8 proto;
1077
1078		ip.hdr = skb_inner_network_header(skb);
1079		proto = (ip.v4->version == 4) ? ip.v4->protocol :
1080						ip.v6->nexthdr;
1081
1082		if (proto == IPPROTO_TCP || proto == IPPROTO_UDP)
1083			op_own |= cpu_to_be32(MLX4_WQE_CTRL_IIP | MLX4_WQE_CTRL_ILP);
1084		else
1085			op_own |= cpu_to_be32(MLX4_WQE_CTRL_IIP);
1086	}
1087
1088	WRITE_ONCE(ring->prod, ring->prod + nr_txbb);
1089
1090	/* If we used a bounce buffer then copy descriptor back into place */
1091	if (unlikely(bounce))
1092		tx_desc = mlx4_en_bounce_to_desc(priv, ring, index, desc_size);
1093
1094	skb_tx_timestamp(skb);
1095
1096	/* Check available TXBBs And 2K spare for prefetch */
1097	stop_queue = mlx4_en_is_tx_ring_full(ring);
1098	if (unlikely(stop_queue)) {
1099		netif_tx_stop_queue(ring->tx_queue);
1100		ring->queue_stopped++;
1101	}
1102
1103	send_doorbell = __netdev_tx_sent_queue(ring->tx_queue,
1104					       tx_info->nr_bytes,
1105					       netdev_xmit_more());
1106
1107	real_size = (real_size / 16) & 0x3f;
1108
1109	bf_ok &= desc_size <= MAX_BF && send_doorbell;
1110
1111	if (bf_ok)
1112		qpn_vlan.bf_qpn = ring->doorbell_qpn | cpu_to_be32(real_size);
1113	else
1114		qpn_vlan.fence_size = real_size;
1115
1116	mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, desc_size, bf_index,
1117			      op_own, bf_ok, send_doorbell);
1118
1119	if (unlikely(stop_queue)) {
1120		/* If queue was emptied after the if (stop_queue) , and before
1121		 * the netif_tx_stop_queue() - need to wake the queue,
1122		 * or else it will remain stopped forever.
1123		 * Need a memory barrier to make sure ring->cons was not
1124		 * updated before queue was stopped.
1125		 */
1126		smp_rmb();
1127
1128		if (unlikely(!mlx4_en_is_tx_ring_full(ring))) {
1129			netif_tx_wake_queue(ring->tx_queue);
1130			ring->wake_queue++;
1131		}
1132	}
1133	return NETDEV_TX_OK;
1134
1135tx_drop_count:
1136	ring->tx_dropped++;
1137tx_drop:
1138	dev_kfree_skb_any(skb);
1139	return NETDEV_TX_OK;
1140}
1141
1142#define MLX4_EN_XDP_TX_NRTXBB  1
1143#define MLX4_EN_XDP_TX_REAL_SZ (((CTRL_SIZE + MLX4_EN_XDP_TX_NRTXBB * DS_SIZE) \
1144				 / 16) & 0x3f)
1145
1146void mlx4_en_init_tx_xdp_ring_descs(struct mlx4_en_priv *priv,
1147				    struct mlx4_en_tx_ring *ring)
1148{
1149	int i;
1150
1151	for (i = 0; i < ring->size; i++) {
1152		struct mlx4_en_tx_info *tx_info = &ring->tx_info[i];
1153		struct mlx4_en_tx_desc *tx_desc = ring->buf +
1154			(i << LOG_TXBB_SIZE);
1155
1156		tx_info->map0_byte_count = PAGE_SIZE;
1157		tx_info->nr_txbb = MLX4_EN_XDP_TX_NRTXBB;
1158		tx_info->data_offset = offsetof(struct mlx4_en_tx_desc, data);
1159		tx_info->ts_requested = 0;
1160		tx_info->nr_maps = 1;
1161		tx_info->linear = 1;
1162		tx_info->inl = 0;
1163
1164		tx_desc->data.lkey = ring->mr_key;
1165		tx_desc->ctrl.qpn_vlan.fence_size = MLX4_EN_XDP_TX_REAL_SZ;
1166		tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
1167	}
1168}
1169
1170netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
1171			       struct mlx4_en_rx_alloc *frame,
1172			       struct mlx4_en_priv *priv, unsigned int length,
1173			       int tx_ind, bool *doorbell_pending)
1174{
1175	struct mlx4_en_tx_desc *tx_desc;
1176	struct mlx4_en_tx_info *tx_info;
1177	struct mlx4_wqe_data_seg *data;
1178	struct mlx4_en_tx_ring *ring;
1179	dma_addr_t dma;
1180	__be32 op_own;
1181	int index;
1182
1183	if (unlikely(!priv->port_up))
1184		goto tx_drop;
1185
1186	ring = priv->tx_ring[TX_XDP][tx_ind];
1187
1188	if (unlikely(mlx4_en_is_tx_ring_full(ring)))
1189		goto tx_drop_count;
1190
1191	index = ring->prod & ring->size_mask;
1192	tx_info = &ring->tx_info[index];
1193
1194	tx_desc = ring->buf + (index << LOG_TXBB_SIZE);
1195	data = &tx_desc->data;
1196
1197	dma = frame->dma;
1198
1199	tx_info->page = frame->page;
1200	frame->page = NULL;
1201	tx_info->map0_dma = dma;
1202	tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN);
1203
1204	dma_sync_single_range_for_device(priv->ddev, dma, frame->page_offset,
1205					 length, DMA_TO_DEVICE);
1206
1207	data->addr = cpu_to_be64(dma + frame->page_offset);
1208	dma_wmb();
1209	data->byte_count = cpu_to_be32(length);
1210
1211	/* tx completion can avoid cache line miss for common cases */
1212
1213	op_own = cpu_to_be32(MLX4_OPCODE_SEND) |
1214		((ring->prod & ring->size) ?
1215		 cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0);
1216
1217	rx_ring->xdp_tx++;
1218
1219	WRITE_ONCE(ring->prod, ring->prod + MLX4_EN_XDP_TX_NRTXBB);
1220
1221	/* Ensure new descriptor hits memory
1222	 * before setting ownership of this descriptor to HW
1223	 */
1224	dma_wmb();
1225	tx_desc->ctrl.owner_opcode = op_own;
1226	ring->xmit_more++;
1227
1228	*doorbell_pending = true;
1229
1230	return NETDEV_TX_OK;
1231
1232tx_drop_count:
1233	rx_ring->xdp_tx_full++;
1234	*doorbell_pending = true;
1235tx_drop:
1236	return NETDEV_TX_BUSY;
1237}