Linux Audio

Check our new training course

Linux debugging, profiling, tracing and performance analysis training

Mar 24-27, 2025, special US time zones
Register
Loading...
   1// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
   2/* Copyright (C) 2015-2019 Netronome Systems, Inc. */
   3
   4/*
   5 * nfp_net_common.c
   6 * Netronome network device driver: Common functions between PF and VF
   7 * Authors: Jakub Kicinski <jakub.kicinski@netronome.com>
   8 *          Jason McMullan <jason.mcmullan@netronome.com>
   9 *          Rolf Neugebauer <rolf.neugebauer@netronome.com>
  10 *          Brad Petrus <brad.petrus@netronome.com>
  11 *          Chris Telfer <chris.telfer@netronome.com>
  12 */
  13
  14#include <linux/bitfield.h>
  15#include <linux/bpf.h>
 
  16#include <linux/module.h>
  17#include <linux/kernel.h>
  18#include <linux/init.h>
  19#include <linux/fs.h>
  20#include <linux/netdevice.h>
  21#include <linux/etherdevice.h>
  22#include <linux/interrupt.h>
  23#include <linux/ip.h>
  24#include <linux/ipv6.h>
  25#include <linux/mm.h>
  26#include <linux/overflow.h>
  27#include <linux/page_ref.h>
  28#include <linux/pci.h>
  29#include <linux/pci_regs.h>
 
  30#include <linux/ethtool.h>
  31#include <linux/log2.h>
  32#include <linux/if_vlan.h>
  33#include <linux/if_bridge.h>
  34#include <linux/random.h>
  35#include <linux/vmalloc.h>
  36#include <linux/ktime.h>
  37
  38#include <net/tls.h>
  39#include <net/vxlan.h>
  40#include <net/xdp_sock_drv.h>
  41#include <net/xfrm.h>
  42
  43#include "nfpcore/nfp_dev.h"
  44#include "nfpcore/nfp_nsp.h"
  45#include "ccm.h"
  46#include "nfp_app.h"
  47#include "nfp_net_ctrl.h"
  48#include "nfp_net.h"
  49#include "nfp_net_dp.h"
  50#include "nfp_net_sriov.h"
  51#include "nfp_net_xsk.h"
  52#include "nfp_port.h"
  53#include "crypto/crypto.h"
  54#include "crypto/fw.h"
  55
  56static int nfp_net_mc_unsync(struct net_device *netdev, const unsigned char *addr);
  57
  58/**
  59 * nfp_net_get_fw_version() - Read and parse the FW version
  60 * @fw_ver:	Output fw_version structure to read to
  61 * @ctrl_bar:	Mapped address of the control BAR
  62 */
  63void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver,
  64			    void __iomem *ctrl_bar)
  65{
  66	u32 reg;
  67
  68	reg = readl(ctrl_bar + NFP_NET_CFG_VERSION);
  69	put_unaligned_le32(reg, fw_ver);
  70}
  71
  72u32 nfp_qcp_queue_offset(const struct nfp_dev_info *dev_info, u16 queue)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  73{
  74	queue &= dev_info->qc_idx_mask;
  75	return dev_info->qc_addr_offset + NFP_QCP_QUEUE_ADDR_SZ * queue;
 
 
 
 
 
 
 
 
  76}
  77
  78/* Firmware reconfig
  79 *
  80 * Firmware reconfig may take a while so we have two versions of it -
  81 * synchronous and asynchronous (posted).  All synchronous callers are holding
  82 * RTNL so we don't have to worry about serializing them.
  83 */
  84static void nfp_net_reconfig_start(struct nfp_net *nn, u32 update)
  85{
  86	nn_writel(nn, NFP_NET_CFG_UPDATE, update);
  87	/* ensure update is written before pinging HW */
  88	nn_pci_flush(nn);
  89	nfp_qcp_wr_ptr_add(nn->qcp_cfg, 1);
  90	nn->reconfig_in_progress_update = update;
  91}
  92
  93/* Pass 0 as update to run posted reconfigs. */
  94static void nfp_net_reconfig_start_async(struct nfp_net *nn, u32 update)
  95{
  96	update |= nn->reconfig_posted;
  97	nn->reconfig_posted = 0;
  98
  99	nfp_net_reconfig_start(nn, update);
 100
 101	nn->reconfig_timer_active = true;
 102	mod_timer(&nn->reconfig_timer, jiffies + NFP_NET_POLL_TIMEOUT * HZ);
 103}
 104
 105static bool nfp_net_reconfig_check_done(struct nfp_net *nn, bool last_check)
 106{
 107	u32 reg;
 108
 109	reg = nn_readl(nn, NFP_NET_CFG_UPDATE);
 110	if (reg == 0)
 111		return true;
 112	if (reg & NFP_NET_CFG_UPDATE_ERR) {
 113		nn_err(nn, "Reconfig error (status: 0x%08x update: 0x%08x ctrl: 0x%08x)\n",
 114		       reg, nn->reconfig_in_progress_update,
 115		       nn_readl(nn, NFP_NET_CFG_CTRL));
 116		return true;
 117	} else if (last_check) {
 118		nn_err(nn, "Reconfig timeout (status: 0x%08x update: 0x%08x ctrl: 0x%08x)\n",
 119		       reg, nn->reconfig_in_progress_update,
 120		       nn_readl(nn, NFP_NET_CFG_CTRL));
 121		return true;
 122	}
 123
 124	return false;
 125}
 126
 127static bool __nfp_net_reconfig_wait(struct nfp_net *nn, unsigned long deadline)
 128{
 129	bool timed_out = false;
 130	int i;
 131
 132	/* Poll update field, waiting for NFP to ack the config.
 133	 * Do an opportunistic wait-busy loop, afterward sleep.
 134	 */
 135	for (i = 0; i < 50; i++) {
 136		if (nfp_net_reconfig_check_done(nn, false))
 137			return false;
 138		udelay(4);
 139	}
 140
 141	while (!nfp_net_reconfig_check_done(nn, timed_out)) {
 142		usleep_range(250, 500);
 143		timed_out = time_is_before_eq_jiffies(deadline);
 144	}
 145
 146	return timed_out;
 147}
 148
 149static int nfp_net_reconfig_wait(struct nfp_net *nn, unsigned long deadline)
 150{
 151	if (__nfp_net_reconfig_wait(nn, deadline))
 152		return -EIO;
 153
 154	if (nn_readl(nn, NFP_NET_CFG_UPDATE) & NFP_NET_CFG_UPDATE_ERR)
 155		return -EIO;
 156
 157	return 0;
 158}
 159
 160static void nfp_net_reconfig_timer(struct timer_list *t)
 161{
 162	struct nfp_net *nn = from_timer(nn, t, reconfig_timer);
 163
 164	spin_lock_bh(&nn->reconfig_lock);
 165
 166	nn->reconfig_timer_active = false;
 167
 168	/* If sync caller is present it will take over from us */
 169	if (nn->reconfig_sync_present)
 170		goto done;
 171
 172	/* Read reconfig status and report errors */
 173	nfp_net_reconfig_check_done(nn, true);
 174
 175	if (nn->reconfig_posted)
 176		nfp_net_reconfig_start_async(nn, 0);
 177done:
 178	spin_unlock_bh(&nn->reconfig_lock);
 179}
 180
 181/**
 182 * nfp_net_reconfig_post() - Post async reconfig request
 183 * @nn:      NFP Net device to reconfigure
 184 * @update:  The value for the update field in the BAR config
 185 *
 186 * Record FW reconfiguration request.  Reconfiguration will be kicked off
 187 * whenever reconfiguration machinery is idle.  Multiple requests can be
 188 * merged together!
 189 */
 190static void nfp_net_reconfig_post(struct nfp_net *nn, u32 update)
 191{
 192	spin_lock_bh(&nn->reconfig_lock);
 193
 194	/* Sync caller will kick off async reconf when it's done, just post */
 195	if (nn->reconfig_sync_present) {
 196		nn->reconfig_posted |= update;
 197		goto done;
 198	}
 199
 200	/* Opportunistically check if the previous command is done */
 201	if (!nn->reconfig_timer_active ||
 202	    nfp_net_reconfig_check_done(nn, false))
 203		nfp_net_reconfig_start_async(nn, update);
 204	else
 205		nn->reconfig_posted |= update;
 206done:
 207	spin_unlock_bh(&nn->reconfig_lock);
 208}
 209
 210static void nfp_net_reconfig_sync_enter(struct nfp_net *nn)
 211{
 212	bool cancelled_timer = false;
 213	u32 pre_posted_requests;
 214
 215	spin_lock_bh(&nn->reconfig_lock);
 216
 217	WARN_ON(nn->reconfig_sync_present);
 218	nn->reconfig_sync_present = true;
 219
 220	if (nn->reconfig_timer_active) {
 221		nn->reconfig_timer_active = false;
 222		cancelled_timer = true;
 223	}
 224	pre_posted_requests = nn->reconfig_posted;
 225	nn->reconfig_posted = 0;
 226
 227	spin_unlock_bh(&nn->reconfig_lock);
 228
 229	if (cancelled_timer) {
 230		del_timer_sync(&nn->reconfig_timer);
 231		nfp_net_reconfig_wait(nn, nn->reconfig_timer.expires);
 232	}
 233
 234	/* Run the posted reconfigs which were issued before we started */
 235	if (pre_posted_requests) {
 236		nfp_net_reconfig_start(nn, pre_posted_requests);
 237		nfp_net_reconfig_wait(nn, jiffies + HZ * NFP_NET_POLL_TIMEOUT);
 238	}
 239}
 240
 241static void nfp_net_reconfig_wait_posted(struct nfp_net *nn)
 242{
 243	nfp_net_reconfig_sync_enter(nn);
 244
 245	spin_lock_bh(&nn->reconfig_lock);
 246	nn->reconfig_sync_present = false;
 247	spin_unlock_bh(&nn->reconfig_lock);
 248}
 249
 250/**
 251 * __nfp_net_reconfig() - Reconfigure the firmware
 252 * @nn:      NFP Net device to reconfigure
 253 * @update:  The value for the update field in the BAR config
 254 *
 255 * Write the update word to the BAR and ping the reconfig queue.  The
 256 * poll until the firmware has acknowledged the update by zeroing the
 257 * update word.
 258 *
 259 * Return: Negative errno on error, 0 on success
 260 */
 261int __nfp_net_reconfig(struct nfp_net *nn, u32 update)
 262{
 263	int ret;
 264
 265	nfp_net_reconfig_sync_enter(nn);
 266
 267	nfp_net_reconfig_start(nn, update);
 268	ret = nfp_net_reconfig_wait(nn, jiffies + HZ * NFP_NET_POLL_TIMEOUT);
 269
 270	spin_lock_bh(&nn->reconfig_lock);
 271
 272	if (nn->reconfig_posted)
 273		nfp_net_reconfig_start_async(nn, 0);
 274
 275	nn->reconfig_sync_present = false;
 276
 277	spin_unlock_bh(&nn->reconfig_lock);
 278
 279	return ret;
 280}
 281
 282int nfp_net_reconfig(struct nfp_net *nn, u32 update)
 283{
 284	int ret;
 285
 286	nn_ctrl_bar_lock(nn);
 287	ret = __nfp_net_reconfig(nn, update);
 288	nn_ctrl_bar_unlock(nn);
 289
 290	return ret;
 291}
 292
 293int nfp_net_mbox_lock(struct nfp_net *nn, unsigned int data_size)
 294{
 295	if (nn->tlv_caps.mbox_len < NFP_NET_CFG_MBOX_SIMPLE_VAL + data_size) {
 296		nn_err(nn, "mailbox too small for %u of data (%u)\n",
 297		       data_size, nn->tlv_caps.mbox_len);
 298		return -EIO;
 299	}
 300
 301	nn_ctrl_bar_lock(nn);
 302	return 0;
 303}
 304
 305/**
 306 * nfp_net_mbox_reconfig() - Reconfigure the firmware via the mailbox
 307 * @nn:        NFP Net device to reconfigure
 308 * @mbox_cmd:  The value for the mailbox command
 309 *
 310 * Helper function for mailbox updates
 311 *
 312 * Return: Negative errno on error, 0 on success
 313 */
 314int nfp_net_mbox_reconfig(struct nfp_net *nn, u32 mbox_cmd)
 315{
 316	u32 mbox = nn->tlv_caps.mbox_off;
 317	int ret;
 318
 319	nn_writeq(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_CMD, mbox_cmd);
 320
 321	ret = __nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_MBOX);
 322	if (ret) {
 323		nn_err(nn, "Mailbox update error\n");
 324		return ret;
 325	}
 326
 327	return -nn_readl(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_RET);
 328}
 329
 330void nfp_net_mbox_reconfig_post(struct nfp_net *nn, u32 mbox_cmd)
 331{
 332	u32 mbox = nn->tlv_caps.mbox_off;
 333
 334	nn_writeq(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_CMD, mbox_cmd);
 335
 336	nfp_net_reconfig_post(nn, NFP_NET_CFG_UPDATE_MBOX);
 337}
 338
 339int nfp_net_mbox_reconfig_wait_posted(struct nfp_net *nn)
 340{
 341	u32 mbox = nn->tlv_caps.mbox_off;
 342
 343	nfp_net_reconfig_wait_posted(nn);
 344
 345	return -nn_readl(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_RET);
 346}
 347
 348int nfp_net_mbox_reconfig_and_unlock(struct nfp_net *nn, u32 mbox_cmd)
 349{
 350	int ret;
 351
 352	ret = nfp_net_mbox_reconfig(nn, mbox_cmd);
 353	nn_ctrl_bar_unlock(nn);
 354	return ret;
 355}
 356
 357/* Interrupt configuration and handling
 358 */
 359
 360/**
 
 
 
 
 
 
 
 
 
 
 
 
 
 361 * nfp_net_irqs_alloc() - allocates MSI-X irqs
 362 * @pdev:        PCI device structure
 363 * @irq_entries: Array to be initialized and used to hold the irq entries
 364 * @min_irqs:    Minimal acceptable number of interrupts
 365 * @wanted_irqs: Target number of interrupts to allocate
 366 *
 367 * Return: Number of irqs obtained or 0 on error.
 368 */
 369unsigned int
 370nfp_net_irqs_alloc(struct pci_dev *pdev, struct msix_entry *irq_entries,
 371		   unsigned int min_irqs, unsigned int wanted_irqs)
 372{
 373	unsigned int i;
 374	int got_irqs;
 375
 376	for (i = 0; i < wanted_irqs; i++)
 377		irq_entries[i].entry = i;
 378
 379	got_irqs = pci_enable_msix_range(pdev, irq_entries,
 380					 min_irqs, wanted_irqs);
 381	if (got_irqs < 0) {
 382		dev_err(&pdev->dev, "Failed to enable %d-%d MSI-X (err=%d)\n",
 383			min_irqs, wanted_irqs, got_irqs);
 384		return 0;
 385	}
 386
 387	if (got_irqs < wanted_irqs)
 388		dev_warn(&pdev->dev, "Unable to allocate %d IRQs got only %d\n",
 389			 wanted_irqs, got_irqs);
 390
 391	return got_irqs;
 392}
 393
 394/**
 395 * nfp_net_irqs_assign() - Assign interrupts allocated externally to netdev
 396 * @nn:		 NFP Network structure
 397 * @irq_entries: Table of allocated interrupts
 398 * @n:		 Size of @irq_entries (number of entries to grab)
 399 *
 400 * After interrupts are allocated with nfp_net_irqs_alloc() this function
 401 * should be called to assign them to a specific netdev (port).
 402 */
 403void
 404nfp_net_irqs_assign(struct nfp_net *nn, struct msix_entry *irq_entries,
 405		    unsigned int n)
 406{
 407	struct nfp_net_dp *dp = &nn->dp;
 408
 409	nn->max_r_vecs = n - NFP_NET_NON_Q_VECTORS;
 410	dp->num_r_vecs = nn->max_r_vecs;
 411
 412	memcpy(nn->irq_entries, irq_entries, sizeof(*irq_entries) * n);
 413
 414	if (dp->num_rx_rings > dp->num_r_vecs ||
 415	    dp->num_tx_rings > dp->num_r_vecs)
 416		dev_warn(nn->dp.dev, "More rings (%d,%d) than vectors (%d).\n",
 417			 dp->num_rx_rings, dp->num_tx_rings,
 418			 dp->num_r_vecs);
 419
 420	dp->num_rx_rings = min(dp->num_r_vecs, dp->num_rx_rings);
 421	dp->num_tx_rings = min(dp->num_r_vecs, dp->num_tx_rings);
 422	dp->num_stack_tx_rings = dp->num_tx_rings;
 423}
 424
 425/**
 426 * nfp_net_irqs_disable() - Disable interrupts
 427 * @pdev:        PCI device structure
 428 *
 429 * Undoes what @nfp_net_irqs_alloc() does.
 430 */
 431void nfp_net_irqs_disable(struct pci_dev *pdev)
 432{
 433	pci_disable_msix(pdev);
 434}
 435
 436/**
 437 * nfp_net_irq_rxtx() - Interrupt service routine for RX/TX rings.
 438 * @irq:      Interrupt
 439 * @data:     Opaque data structure
 440 *
 441 * Return: Indicate if the interrupt has been handled.
 442 */
 443static irqreturn_t nfp_net_irq_rxtx(int irq, void *data)
 444{
 445	struct nfp_net_r_vector *r_vec = data;
 446
 447	/* Currently we cannot tell if it's a rx or tx interrupt,
 448	 * since dim does not need accurate event_ctr to calculate,
 449	 * we just use this counter for both rx and tx dim.
 450	 */
 451	r_vec->event_ctr++;
 452
 453	napi_schedule_irqoff(&r_vec->napi);
 454
 455	/* The FW auto-masks any interrupt, either via the MASK bit in
 456	 * the MSI-X table or via the per entry ICR field.  So there
 457	 * is no need to disable interrupts here.
 458	 */
 459	return IRQ_HANDLED;
 460}
 461
 462static irqreturn_t nfp_ctrl_irq_rxtx(int irq, void *data)
 463{
 464	struct nfp_net_r_vector *r_vec = data;
 465
 466	tasklet_schedule(&r_vec->tasklet);
 467
 468	return IRQ_HANDLED;
 469}
 470
 471/**
 472 * nfp_net_read_link_status() - Reread link status from control BAR
 473 * @nn:       NFP Network structure
 474 */
 475static void nfp_net_read_link_status(struct nfp_net *nn)
 476{
 477	unsigned long flags;
 478	bool link_up;
 479	u16 sts;
 480
 481	spin_lock_irqsave(&nn->link_status_lock, flags);
 482
 483	sts = nn_readw(nn, NFP_NET_CFG_STS);
 484	link_up = !!(sts & NFP_NET_CFG_STS_LINK);
 485
 486	if (nn->link_up == link_up)
 487		goto out;
 488
 489	nn->link_up = link_up;
 490	if (nn->port) {
 491		set_bit(NFP_PORT_CHANGED, &nn->port->flags);
 492		if (nn->port->link_cb)
 493			nn->port->link_cb(nn->port);
 494	}
 495
 496	if (nn->link_up) {
 497		netif_carrier_on(nn->dp.netdev);
 498		netdev_info(nn->dp.netdev, "NIC Link is Up\n");
 499	} else {
 500		netif_carrier_off(nn->dp.netdev);
 501		netdev_info(nn->dp.netdev, "NIC Link is Down\n");
 502	}
 503out:
 504	spin_unlock_irqrestore(&nn->link_status_lock, flags);
 505}
 506
 507/**
 508 * nfp_net_irq_lsc() - Interrupt service routine for link state changes
 509 * @irq:      Interrupt
 510 * @data:     Opaque data structure
 511 *
 512 * Return: Indicate if the interrupt has been handled.
 513 */
 514static irqreturn_t nfp_net_irq_lsc(int irq, void *data)
 515{
 516	struct nfp_net *nn = data;
 517	struct msix_entry *entry;
 518
 519	entry = &nn->irq_entries[NFP_NET_IRQ_LSC_IDX];
 520
 521	nfp_net_read_link_status(nn);
 522
 523	nfp_net_irq_unmask(nn, entry->entry);
 524
 525	return IRQ_HANDLED;
 526}
 527
 528/**
 529 * nfp_net_irq_exn() - Interrupt service routine for exceptions
 530 * @irq:      Interrupt
 531 * @data:     Opaque data structure
 532 *
 533 * Return: Indicate if the interrupt has been handled.
 534 */
 535static irqreturn_t nfp_net_irq_exn(int irq, void *data)
 536{
 537	struct nfp_net *nn = data;
 538
 539	nn_err(nn, "%s: UNIMPLEMENTED.\n", __func__);
 540	/* XXX TO BE IMPLEMENTED */
 541	return IRQ_HANDLED;
 542}
 543
 544/**
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 545 * nfp_net_aux_irq_request() - Request an auxiliary interrupt (LSC or EXN)
 546 * @nn:		NFP Network structure
 547 * @ctrl_offset: Control BAR offset where IRQ configuration should be written
 548 * @format:	printf-style format to construct the interrupt name
 549 * @name:	Pointer to allocated space for interrupt name
 550 * @name_sz:	Size of space for interrupt name
 551 * @vector_idx:	Index of MSI-X vector used for this interrupt
 552 * @handler:	IRQ handler to register for this interrupt
 553 */
 554static int
 555nfp_net_aux_irq_request(struct nfp_net *nn, u32 ctrl_offset,
 556			const char *format, char *name, size_t name_sz,
 557			unsigned int vector_idx, irq_handler_t handler)
 558{
 559	struct msix_entry *entry;
 560	int err;
 561
 562	entry = &nn->irq_entries[vector_idx];
 563
 564	snprintf(name, name_sz, format, nfp_net_name(nn));
 565	err = request_irq(entry->vector, handler, 0, name, nn);
 566	if (err) {
 567		nn_err(nn, "Failed to request IRQ %d (err=%d).\n",
 568		       entry->vector, err);
 569		return err;
 570	}
 571	nn_writeb(nn, ctrl_offset, entry->entry);
 572	nfp_net_irq_unmask(nn, entry->entry);
 573
 574	return 0;
 575}
 576
 577/**
 578 * nfp_net_aux_irq_free() - Free an auxiliary interrupt (LSC or EXN)
 579 * @nn:		NFP Network structure
 580 * @ctrl_offset: Control BAR offset where IRQ configuration should be written
 581 * @vector_idx:	Index of MSI-X vector used for this interrupt
 582 */
 583static void nfp_net_aux_irq_free(struct nfp_net *nn, u32 ctrl_offset,
 584				 unsigned int vector_idx)
 585{
 586	nn_writeb(nn, ctrl_offset, 0xff);
 587	nn_pci_flush(nn);
 588	free_irq(nn->irq_entries[vector_idx].vector, nn);
 589}
 590
 591struct sk_buff *
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 592nfp_net_tls_tx(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
 593	       struct sk_buff *skb, u64 *tls_handle, int *nr_frags)
 594{
 595#ifdef CONFIG_TLS_DEVICE
 596	struct nfp_net_tls_offload_ctx *ntls;
 597	struct sk_buff *nskb;
 598	bool resync_pending;
 599	u32 datalen, seq;
 600
 601	if (likely(!dp->ktls_tx))
 602		return skb;
 603	if (!tls_is_skb_tx_device_offloaded(skb))
 604		return skb;
 605
 606	datalen = skb->len - skb_tcp_all_headers(skb);
 607	seq = ntohl(tcp_hdr(skb)->seq);
 608	ntls = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX);
 609	resync_pending = tls_offload_tx_resync_pending(skb->sk);
 610	if (unlikely(resync_pending || ntls->next_seq != seq)) {
 611		/* Pure ACK out of order already */
 612		if (!datalen)
 613			return skb;
 614
 615		u64_stats_update_begin(&r_vec->tx_sync);
 616		r_vec->tls_tx_fallback++;
 617		u64_stats_update_end(&r_vec->tx_sync);
 618
 619		nskb = tls_encrypt_skb(skb);
 620		if (!nskb) {
 621			u64_stats_update_begin(&r_vec->tx_sync);
 622			r_vec->tls_tx_no_fallback++;
 623			u64_stats_update_end(&r_vec->tx_sync);
 624			return NULL;
 625		}
 626		/* encryption wasn't necessary */
 627		if (nskb == skb)
 628			return skb;
 629		/* we don't re-check ring space */
 630		if (unlikely(skb_is_nonlinear(nskb))) {
 631			nn_dp_warn(dp, "tls_encrypt_skb() produced fragmented frame\n");
 632			u64_stats_update_begin(&r_vec->tx_sync);
 633			r_vec->tx_errors++;
 634			u64_stats_update_end(&r_vec->tx_sync);
 635			dev_kfree_skb_any(nskb);
 636			return NULL;
 637		}
 638
 639		/* jump forward, a TX may have gotten lost, need to sync TX */
 640		if (!resync_pending && seq - ntls->next_seq < U32_MAX / 4)
 641			tls_offload_tx_resync_request(nskb->sk, seq,
 642						      ntls->next_seq);
 643
 644		*nr_frags = 0;
 645		return nskb;
 646	}
 647
 648	if (datalen) {
 649		u64_stats_update_begin(&r_vec->tx_sync);
 650		if (!skb_is_gso(skb))
 651			r_vec->hw_tls_tx++;
 652		else
 653			r_vec->hw_tls_tx += skb_shinfo(skb)->gso_segs;
 654		u64_stats_update_end(&r_vec->tx_sync);
 655	}
 656
 657	memcpy(tls_handle, ntls->fw_handle, sizeof(ntls->fw_handle));
 658	ntls->next_seq += datalen;
 659#endif
 660	return skb;
 661}
 662
 663void nfp_net_tls_tx_undo(struct sk_buff *skb, u64 tls_handle)
 664{
 665#ifdef CONFIG_TLS_DEVICE
 666	struct nfp_net_tls_offload_ctx *ntls;
 667	u32 datalen, seq;
 668
 669	if (!tls_handle)
 670		return;
 671	if (WARN_ON_ONCE(!tls_is_skb_tx_device_offloaded(skb)))
 672		return;
 673
 674	datalen = skb->len - skb_tcp_all_headers(skb);
 675	seq = ntohl(tcp_hdr(skb)->seq);
 676
 677	ntls = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX);
 678	if (ntls->next_seq == seq + datalen)
 679		ntls->next_seq = seq;
 680	else
 681		WARN_ON_ONCE(1);
 682#endif
 683}
 684
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 685static void nfp_net_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 686{
 687	struct nfp_net *nn = netdev_priv(netdev);
 688
 689	nn_warn(nn, "TX watchdog timeout on ring: %u\n", txqueue);
 690}
 691
 692/* Receive processing */
 
 693static unsigned int
 694nfp_net_calc_fl_bufsz_data(struct nfp_net_dp *dp)
 695{
 696	unsigned int fl_bufsz = 0;
 697
 
 
 698	if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
 699		fl_bufsz += NFP_NET_MAX_PREPEND;
 700	else
 701		fl_bufsz += dp->rx_offset;
 702	fl_bufsz += ETH_HLEN + VLAN_HLEN * 2 + dp->mtu;
 703
 
 
 
 704	return fl_bufsz;
 705}
 706
 707static unsigned int nfp_net_calc_fl_bufsz(struct nfp_net_dp *dp)
 
 708{
 709	unsigned int fl_bufsz;
 
 
 
 
 710
 711	fl_bufsz = NFP_NET_RX_BUF_HEADROOM;
 712	fl_bufsz += dp->rx_dma_off;
 713	fl_bufsz += nfp_net_calc_fl_bufsz_data(dp);
 
 
 
 
 
 
 
 
 
 714
 715	fl_bufsz = SKB_DATA_ALIGN(fl_bufsz);
 716	fl_bufsz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 
 
 717
 718	return fl_bufsz;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 719}
 720
 721static unsigned int nfp_net_calc_fl_bufsz_xsk(struct nfp_net_dp *dp)
 722{
 723	unsigned int fl_bufsz;
 724
 725	fl_bufsz = XDP_PACKET_HEADROOM;
 726	fl_bufsz += nfp_net_calc_fl_bufsz_data(dp);
 
 
 
 
 727
 728	return fl_bufsz;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 729}
 730
 731/* Setup and Configuration
 732 */
 733
 734/**
 735 * nfp_net_vecs_init() - Assign IRQs and setup rvecs.
 736 * @nn:		NFP Network structure
 737 */
 738static void nfp_net_vecs_init(struct nfp_net *nn)
 739{
 740	int numa_node = dev_to_node(&nn->pdev->dev);
 741	struct nfp_net_r_vector *r_vec;
 742	unsigned int r;
 743
 744	nn->lsc_handler = nfp_net_irq_lsc;
 745	nn->exn_handler = nfp_net_irq_exn;
 746
 747	for (r = 0; r < nn->max_r_vecs; r++) {
 748		struct msix_entry *entry;
 749
 750		entry = &nn->irq_entries[NFP_NET_NON_Q_VECTORS + r];
 751
 752		r_vec = &nn->r_vecs[r];
 753		r_vec->nfp_net = nn;
 754		r_vec->irq_entry = entry->entry;
 755		r_vec->irq_vector = entry->vector;
 756
 757		if (nn->dp.netdev) {
 758			r_vec->handler = nfp_net_irq_rxtx;
 759		} else {
 760			r_vec->handler = nfp_ctrl_irq_rxtx;
 761
 762			__skb_queue_head_init(&r_vec->queue);
 763			spin_lock_init(&r_vec->lock);
 764			tasklet_setup(&r_vec->tasklet, nn->dp.ops->ctrl_poll);
 765			tasklet_disable(&r_vec->tasklet);
 766		}
 767
 768		cpumask_set_cpu(cpumask_local_spread(r, numa_node), &r_vec->affinity_mask);
 769	}
 770}
 771
 772static void
 773nfp_net_napi_add(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, int idx)
 
 
 
 774{
 775	if (dp->netdev)
 776		netif_napi_add(dp->netdev, &r_vec->napi,
 777			       nfp_net_has_xsk_pool_slow(dp, idx) ? dp->ops->xsk_poll : dp->ops->poll);
 778	else
 779		tasklet_enable(&r_vec->tasklet);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 780}
 781
 782static void
 783nfp_net_napi_del(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 784{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 785	if (dp->netdev)
 786		netif_napi_del(&r_vec->napi);
 787	else
 788		tasklet_disable(&r_vec->tasklet);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 789}
 790
 791static void
 792nfp_net_vector_assign_rings(struct nfp_net_dp *dp,
 793			    struct nfp_net_r_vector *r_vec, int idx)
 794{
 795	r_vec->rx_ring = idx < dp->num_rx_rings ? &dp->rx_rings[idx] : NULL;
 796	r_vec->tx_ring =
 797		idx < dp->num_stack_tx_rings ? &dp->tx_rings[idx] : NULL;
 798
 799	r_vec->xdp_ring = idx < dp->num_tx_rings - dp->num_stack_tx_rings ?
 800		&dp->tx_rings[dp->num_stack_tx_rings + idx] : NULL;
 801
 802	if (nfp_net_has_xsk_pool_slow(dp, idx) || r_vec->xsk_pool) {
 803		r_vec->xsk_pool = dp->xdp_prog ? dp->xsk_pools[idx] : NULL;
 804
 805		if (r_vec->xsk_pool)
 806			xsk_pool_set_rxq_info(r_vec->xsk_pool,
 807					      &r_vec->rx_ring->xdp_rxq);
 808
 809		nfp_net_napi_del(dp, r_vec);
 810		nfp_net_napi_add(dp, r_vec, idx);
 811	}
 812}
 813
 814static int
 815nfp_net_prepare_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
 816		       int idx)
 817{
 818	int err;
 819
 820	nfp_net_napi_add(&nn->dp, r_vec, idx);
 
 
 
 
 
 821
 822	snprintf(r_vec->name, sizeof(r_vec->name),
 823		 "%s-rxtx-%d", nfp_net_name(nn), idx);
 824	err = request_irq(r_vec->irq_vector, r_vec->handler, 0, r_vec->name,
 825			  r_vec);
 826	if (err) {
 827		nfp_net_napi_del(&nn->dp, r_vec);
 
 
 
 
 828		nn_err(nn, "Error requesting IRQ %d\n", r_vec->irq_vector);
 829		return err;
 830	}
 831	disable_irq(r_vec->irq_vector);
 832
 833	irq_set_affinity_hint(r_vec->irq_vector, &r_vec->affinity_mask);
 834
 835	nn_dbg(nn, "RV%02d: irq=%03d/%03d\n", idx, r_vec->irq_vector,
 836	       r_vec->irq_entry);
 837
 838	return 0;
 839}
 840
 841static void
 842nfp_net_cleanup_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec)
 843{
 844	irq_set_affinity_hint(r_vec->irq_vector, NULL);
 845	nfp_net_napi_del(&nn->dp, r_vec);
 
 
 
 
 846	free_irq(r_vec->irq_vector, r_vec);
 847}
 848
 849/**
 850 * nfp_net_rss_write_itbl() - Write RSS indirection table to device
 851 * @nn:      NFP Net device to reconfigure
 852 */
 853void nfp_net_rss_write_itbl(struct nfp_net *nn)
 854{
 855	int i;
 856
 857	for (i = 0; i < NFP_NET_CFG_RSS_ITBL_SZ; i += 4)
 858		nn_writel(nn, NFP_NET_CFG_RSS_ITBL + i,
 859			  get_unaligned_le32(nn->rss_itbl + i));
 860}
 861
 862/**
 863 * nfp_net_rss_write_key() - Write RSS hash key to device
 864 * @nn:      NFP Net device to reconfigure
 865 */
 866void nfp_net_rss_write_key(struct nfp_net *nn)
 867{
 868	int i;
 869
 870	for (i = 0; i < nfp_net_rss_key_sz(nn); i += 4)
 871		nn_writel(nn, NFP_NET_CFG_RSS_KEY + i,
 872			  get_unaligned_le32(nn->rss_key + i));
 873}
 874
 875/**
 876 * nfp_net_coalesce_write_cfg() - Write irq coalescence configuration to HW
 877 * @nn:      NFP Net device to reconfigure
 878 */
 879void nfp_net_coalesce_write_cfg(struct nfp_net *nn)
 880{
 881	u8 i;
 882	u32 factor;
 883	u32 value;
 884
 885	/* Compute factor used to convert coalesce '_usecs' parameters to
 886	 * ME timestamp ticks.  There are 16 ME clock cycles for each timestamp
 887	 * count.
 888	 */
 889	factor = nn->tlv_caps.me_freq_mhz / 16;
 890
 891	/* copy RX interrupt coalesce parameters */
 892	value = (nn->rx_coalesce_max_frames << 16) |
 893		(factor * nn->rx_coalesce_usecs);
 894	for (i = 0; i < nn->dp.num_rx_rings; i++)
 895		nn_writel(nn, NFP_NET_CFG_RXR_IRQ_MOD(i), value);
 896
 897	/* copy TX interrupt coalesce parameters */
 898	value = (nn->tx_coalesce_max_frames << 16) |
 899		(factor * nn->tx_coalesce_usecs);
 900	for (i = 0; i < nn->dp.num_tx_rings; i++)
 901		nn_writel(nn, NFP_NET_CFG_TXR_IRQ_MOD(i), value);
 902}
 903
 904/**
 905 * nfp_net_write_mac_addr() - Write mac address to the device control BAR
 906 * @nn:      NFP Net device to reconfigure
 907 * @addr:    MAC address to write
 908 *
 909 * Writes the MAC address from the netdev to the device control BAR.  Does not
 910 * perform the required reconfig.  We do a bit of byte swapping dance because
 911 * firmware is LE.
 912 */
 913static void nfp_net_write_mac_addr(struct nfp_net *nn, const u8 *addr)
 914{
 915	nn_writel(nn, NFP_NET_CFG_MACADDR + 0, get_unaligned_be32(addr));
 916	nn_writew(nn, NFP_NET_CFG_MACADDR + 6, get_unaligned_be16(addr + 4));
 917}
 918
 
 
 
 
 
 
 
 
 
 
 
 919/**
 920 * nfp_net_clear_config_and_disable() - Clear control BAR and disable NFP
 921 * @nn:      NFP Net device to reconfigure
 922 *
 923 * Warning: must be fully idempotent.
 924 */
 925static void nfp_net_clear_config_and_disable(struct nfp_net *nn)
 926{
 927	u32 new_ctrl, new_ctrl_w1, update;
 928	unsigned int r;
 929	int err;
 930
 931	new_ctrl = nn->dp.ctrl;
 932	new_ctrl &= ~NFP_NET_CFG_CTRL_ENABLE;
 933	update = NFP_NET_CFG_UPDATE_GEN;
 934	update |= NFP_NET_CFG_UPDATE_MSIX;
 935	update |= NFP_NET_CFG_UPDATE_RING;
 936
 937	if (nn->cap & NFP_NET_CFG_CTRL_RINGCFG)
 938		new_ctrl &= ~NFP_NET_CFG_CTRL_RINGCFG;
 939
 940	if (!(nn->cap_w1 & NFP_NET_CFG_CTRL_FREELIST_EN)) {
 941		nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, 0);
 942		nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, 0);
 943	}
 944
 945	nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
 946	err = nfp_net_reconfig(nn, update);
 947	if (err)
 948		nn_err(nn, "Could not disable device: %d\n", err);
 949
 950	if (nn->cap_w1 & NFP_NET_CFG_CTRL_FREELIST_EN) {
 951		new_ctrl_w1 = nn->dp.ctrl_w1;
 952		new_ctrl_w1 &= ~NFP_NET_CFG_CTRL_FREELIST_EN;
 953		nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, 0);
 954		nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, 0);
 955
 956		nn_writel(nn, NFP_NET_CFG_CTRL_WORD1, new_ctrl_w1);
 957		err = nfp_net_reconfig(nn, update);
 958		if (err)
 959			nn_err(nn, "Could not disable FREELIST_EN: %d\n", err);
 960		nn->dp.ctrl_w1 = new_ctrl_w1;
 961	}
 962
 963	for (r = 0; r < nn->dp.num_rx_rings; r++) {
 964		nfp_net_rx_ring_reset(&nn->dp.rx_rings[r]);
 965		if (nfp_net_has_xsk_pool_slow(&nn->dp, nn->dp.rx_rings[r].idx))
 966			nfp_net_xsk_rx_bufs_free(&nn->dp.rx_rings[r]);
 967	}
 968	for (r = 0; r < nn->dp.num_tx_rings; r++)
 969		nfp_net_tx_ring_reset(&nn->dp, &nn->dp.tx_rings[r]);
 970	for (r = 0; r < nn->dp.num_r_vecs; r++)
 971		nfp_net_vec_clear_ring_data(nn, r);
 972
 973	nn->dp.ctrl = new_ctrl;
 974}
 975
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 976/**
 977 * nfp_net_set_config_and_enable() - Write control BAR and enable NFP
 978 * @nn:      NFP Net device to reconfigure
 979 */
 980static int nfp_net_set_config_and_enable(struct nfp_net *nn)
 981{
 982	u32 bufsz, new_ctrl, new_ctrl_w1, update = 0;
 983	unsigned int r;
 984	int err;
 985
 986	new_ctrl = nn->dp.ctrl;
 987	new_ctrl_w1 = nn->dp.ctrl_w1;
 988
 989	if (nn->dp.ctrl & NFP_NET_CFG_CTRL_RSS_ANY) {
 990		nfp_net_rss_write_key(nn);
 991		nfp_net_rss_write_itbl(nn);
 992		nn_writel(nn, NFP_NET_CFG_RSS_CTRL, nn->rss_cfg);
 993		update |= NFP_NET_CFG_UPDATE_RSS;
 994	}
 995
 996	if (nn->dp.ctrl & NFP_NET_CFG_CTRL_IRQMOD) {
 997		nfp_net_coalesce_write_cfg(nn);
 998		update |= NFP_NET_CFG_UPDATE_IRQMOD;
 999	}
1000
1001	for (r = 0; r < nn->dp.num_tx_rings; r++)
1002		nfp_net_tx_ring_hw_cfg_write(nn, &nn->dp.tx_rings[r], r);
1003	for (r = 0; r < nn->dp.num_rx_rings; r++)
1004		nfp_net_rx_ring_hw_cfg_write(nn, &nn->dp.rx_rings[r], r);
1005
1006	nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE,
1007		  U64_MAX >> (64 - nn->dp.num_tx_rings));
1008
1009	nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE,
1010		  U64_MAX >> (64 - nn->dp.num_rx_rings));
1011
1012	if (nn->dp.netdev)
1013		nfp_net_write_mac_addr(nn, nn->dp.netdev->dev_addr);
1014
1015	nn_writel(nn, NFP_NET_CFG_MTU, nn->dp.mtu);
1016
1017	bufsz = nn->dp.fl_bufsz - nn->dp.rx_dma_off - NFP_NET_RX_BUF_NON_DATA;
1018	nn_writel(nn, NFP_NET_CFG_FLBUFSZ, bufsz);
1019
1020	/* Enable device
1021	 * Step 1: Replace the CTRL_ENABLE by NFP_NET_CFG_CTRL_FREELIST_EN if
1022	 * FREELIST_EN exits.
1023	 */
1024	if (nn->cap_w1 & NFP_NET_CFG_CTRL_FREELIST_EN)
1025		new_ctrl_w1 |= NFP_NET_CFG_CTRL_FREELIST_EN;
1026	else
1027		new_ctrl |= NFP_NET_CFG_CTRL_ENABLE;
1028	update |= NFP_NET_CFG_UPDATE_GEN;
1029	update |= NFP_NET_CFG_UPDATE_MSIX;
1030	update |= NFP_NET_CFG_UPDATE_RING;
1031	if (nn->cap & NFP_NET_CFG_CTRL_RINGCFG)
1032		new_ctrl |= NFP_NET_CFG_CTRL_RINGCFG;
1033
1034	/* Step 2: Send the configuration and write the freelist.
1035	 * - The freelist only need to be written once.
1036	 */
1037	nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
1038	nn_writel(nn, NFP_NET_CFG_CTRL_WORD1, new_ctrl_w1);
1039	err = nfp_net_reconfig(nn, update);
1040	if (err) {
1041		nfp_net_clear_config_and_disable(nn);
1042		return err;
1043	}
1044
1045	nn->dp.ctrl = new_ctrl;
1046	nn->dp.ctrl_w1 = new_ctrl_w1;
1047
1048	for (r = 0; r < nn->dp.num_rx_rings; r++)
1049		nfp_net_rx_ring_fill_freelist(&nn->dp, &nn->dp.rx_rings[r]);
1050
1051	/* Step 3: Do the NFP_NET_CFG_CTRL_ENABLE. Send the configuration.
1052	 */
1053	if (nn->cap_w1 & NFP_NET_CFG_CTRL_FREELIST_EN) {
1054		new_ctrl |= NFP_NET_CFG_CTRL_ENABLE;
1055		nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
1056
1057		err = nfp_net_reconfig(nn, update);
1058		if (err) {
1059			nfp_net_clear_config_and_disable(nn);
1060			return err;
1061		}
1062		nn->dp.ctrl = new_ctrl;
1063	}
1064
1065	return 0;
1066}
1067
1068/**
1069 * nfp_net_close_stack() - Quiesce the stack (part of close)
1070 * @nn:	     NFP Net device to reconfigure
1071 */
1072static void nfp_net_close_stack(struct nfp_net *nn)
1073{
1074	struct nfp_net_r_vector *r_vec;
1075	unsigned int r;
1076
1077	disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
1078	netif_carrier_off(nn->dp.netdev);
1079	nn->link_up = false;
1080
1081	for (r = 0; r < nn->dp.num_r_vecs; r++) {
1082		r_vec = &nn->r_vecs[r];
1083
1084		disable_irq(r_vec->irq_vector);
1085		napi_disable(&r_vec->napi);
1086
1087		if (r_vec->rx_ring)
1088			cancel_work_sync(&r_vec->rx_dim.work);
1089
1090		if (r_vec->tx_ring)
1091			cancel_work_sync(&r_vec->tx_dim.work);
1092	}
1093
1094	netif_tx_disable(nn->dp.netdev);
1095}
1096
1097/**
1098 * nfp_net_close_free_all() - Free all runtime resources
1099 * @nn:      NFP Net device to reconfigure
1100 */
1101static void nfp_net_close_free_all(struct nfp_net *nn)
1102{
1103	unsigned int r;
1104
1105	nfp_net_tx_rings_free(&nn->dp);
1106	nfp_net_rx_rings_free(&nn->dp);
1107
1108	for (r = 0; r < nn->dp.num_r_vecs; r++)
1109		nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
1110
1111	nfp_net_aux_irq_free(nn, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX);
1112	nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX);
1113}
1114
1115/**
1116 * nfp_net_netdev_close() - Called when the device is downed
1117 * @netdev:      netdev structure
1118 */
1119static int nfp_net_netdev_close(struct net_device *netdev)
1120{
1121	struct nfp_net *nn = netdev_priv(netdev);
1122
1123	/* Step 1: Disable RX and TX rings from the Linux kernel perspective
1124	 */
1125	nfp_net_close_stack(nn);
1126
1127	/* Step 2: Tell NFP
1128	 */
1129	if (nn->cap_w1 & NFP_NET_CFG_CTRL_MCAST_FILTER)
1130		__dev_mc_unsync(netdev, nfp_net_mc_unsync);
1131
1132	nfp_net_clear_config_and_disable(nn);
1133	nfp_port_configure(netdev, false);
1134
1135	/* Step 3: Free resources
1136	 */
1137	nfp_net_close_free_all(nn);
1138
1139	nn_dbg(nn, "%s down", netdev->name);
1140	return 0;
1141}
1142
1143void nfp_ctrl_close(struct nfp_net *nn)
1144{
1145	int r;
1146
1147	rtnl_lock();
1148
1149	for (r = 0; r < nn->dp.num_r_vecs; r++) {
1150		disable_irq(nn->r_vecs[r].irq_vector);
1151		tasklet_disable(&nn->r_vecs[r].tasklet);
1152	}
1153
1154	nfp_net_clear_config_and_disable(nn);
1155
1156	nfp_net_close_free_all(nn);
1157
1158	rtnl_unlock();
1159}
1160
1161static void nfp_net_rx_dim_work(struct work_struct *work)
1162{
1163	struct nfp_net_r_vector *r_vec;
1164	unsigned int factor, value;
1165	struct dim_cq_moder moder;
1166	struct nfp_net *nn;
1167	struct dim *dim;
1168
1169	dim = container_of(work, struct dim, work);
1170	moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
1171	r_vec = container_of(dim, struct nfp_net_r_vector, rx_dim);
1172	nn = r_vec->nfp_net;
1173
1174	/* Compute factor used to convert coalesce '_usecs' parameters to
1175	 * ME timestamp ticks.  There are 16 ME clock cycles for each timestamp
1176	 * count.
1177	 */
1178	factor = nn->tlv_caps.me_freq_mhz / 16;
1179	if (nfp_net_coalesce_para_check(factor * moder.usec) ||
1180	    nfp_net_coalesce_para_check(moder.pkts))
1181		return;
1182
1183	/* copy RX interrupt coalesce parameters */
1184	value = (moder.pkts << 16) | (factor * moder.usec);
1185	nn_writel(nn, NFP_NET_CFG_RXR_IRQ_MOD(r_vec->rx_ring->idx), value);
1186	(void)nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_IRQMOD);
1187
1188	dim->state = DIM_START_MEASURE;
1189}
1190
1191static void nfp_net_tx_dim_work(struct work_struct *work)
1192{
1193	struct nfp_net_r_vector *r_vec;
1194	unsigned int factor, value;
1195	struct dim_cq_moder moder;
1196	struct nfp_net *nn;
1197	struct dim *dim;
1198
1199	dim = container_of(work, struct dim, work);
1200	moder = net_dim_get_tx_moderation(dim->mode, dim->profile_ix);
1201	r_vec = container_of(dim, struct nfp_net_r_vector, tx_dim);
1202	nn = r_vec->nfp_net;
1203
1204	/* Compute factor used to convert coalesce '_usecs' parameters to
1205	 * ME timestamp ticks.  There are 16 ME clock cycles for each timestamp
1206	 * count.
1207	 */
1208	factor = nn->tlv_caps.me_freq_mhz / 16;
1209	if (nfp_net_coalesce_para_check(factor * moder.usec) ||
1210	    nfp_net_coalesce_para_check(moder.pkts))
1211		return;
1212
1213	/* copy TX interrupt coalesce parameters */
1214	value = (moder.pkts << 16) | (factor * moder.usec);
1215	nn_writel(nn, NFP_NET_CFG_TXR_IRQ_MOD(r_vec->tx_ring->idx), value);
1216	(void)nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_IRQMOD);
1217
1218	dim->state = DIM_START_MEASURE;
1219}
1220
1221/**
1222 * nfp_net_open_stack() - Start the device from stack's perspective
1223 * @nn:      NFP Net device to reconfigure
1224 */
1225static void nfp_net_open_stack(struct nfp_net *nn)
1226{
1227	struct nfp_net_r_vector *r_vec;
1228	unsigned int r;
1229
1230	for (r = 0; r < nn->dp.num_r_vecs; r++) {
1231		r_vec = &nn->r_vecs[r];
1232
1233		if (r_vec->rx_ring) {
1234			INIT_WORK(&r_vec->rx_dim.work, nfp_net_rx_dim_work);
1235			r_vec->rx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
1236		}
1237
1238		if (r_vec->tx_ring) {
1239			INIT_WORK(&r_vec->tx_dim.work, nfp_net_tx_dim_work);
1240			r_vec->tx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
1241		}
1242
1243		napi_enable(&r_vec->napi);
1244		enable_irq(r_vec->irq_vector);
1245	}
1246
1247	netif_tx_wake_all_queues(nn->dp.netdev);
1248
1249	enable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
1250	nfp_net_read_link_status(nn);
1251}
1252
1253static int nfp_net_open_alloc_all(struct nfp_net *nn)
1254{
1255	int err, r;
1256
1257	err = nfp_net_aux_irq_request(nn, NFP_NET_CFG_EXN, "%s-exn",
1258				      nn->exn_name, sizeof(nn->exn_name),
1259				      NFP_NET_IRQ_EXN_IDX, nn->exn_handler);
1260	if (err)
1261		return err;
1262	err = nfp_net_aux_irq_request(nn, NFP_NET_CFG_LSC, "%s-lsc",
1263				      nn->lsc_name, sizeof(nn->lsc_name),
1264				      NFP_NET_IRQ_LSC_IDX, nn->lsc_handler);
1265	if (err)
1266		goto err_free_exn;
1267	disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
1268
1269	for (r = 0; r < nn->dp.num_r_vecs; r++) {
1270		err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r);
1271		if (err)
1272			goto err_cleanup_vec_p;
1273	}
1274
1275	err = nfp_net_rx_rings_prepare(nn, &nn->dp);
1276	if (err)
1277		goto err_cleanup_vec;
1278
1279	err = nfp_net_tx_rings_prepare(nn, &nn->dp);
1280	if (err)
1281		goto err_free_rx_rings;
1282
1283	for (r = 0; r < nn->max_r_vecs; r++)
1284		nfp_net_vector_assign_rings(&nn->dp, &nn->r_vecs[r], r);
1285
1286	return 0;
1287
1288err_free_rx_rings:
1289	nfp_net_rx_rings_free(&nn->dp);
1290err_cleanup_vec:
1291	r = nn->dp.num_r_vecs;
1292err_cleanup_vec_p:
1293	while (r--)
1294		nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
1295	nfp_net_aux_irq_free(nn, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX);
1296err_free_exn:
1297	nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX);
1298	return err;
1299}
1300
1301static int nfp_net_netdev_open(struct net_device *netdev)
1302{
1303	struct nfp_net *nn = netdev_priv(netdev);
1304	int err;
1305
1306	/* Step 1: Allocate resources for rings and the like
1307	 * - Request interrupts
1308	 * - Allocate RX and TX ring resources
1309	 * - Setup initial RSS table
1310	 */
1311	err = nfp_net_open_alloc_all(nn);
1312	if (err)
1313		return err;
1314
1315	err = netif_set_real_num_tx_queues(netdev, nn->dp.num_stack_tx_rings);
1316	if (err)
1317		goto err_free_all;
1318
1319	err = netif_set_real_num_rx_queues(netdev, nn->dp.num_rx_rings);
1320	if (err)
1321		goto err_free_all;
1322
1323	/* Step 2: Configure the NFP
1324	 * - Ifup the physical interface if it exists
1325	 * - Enable rings from 0 to tx_rings/rx_rings - 1.
1326	 * - Write MAC address (in case it changed)
1327	 * - Set the MTU
1328	 * - Set the Freelist buffer size
1329	 * - Enable the FW
1330	 */
1331	err = nfp_port_configure(netdev, true);
1332	if (err)
1333		goto err_free_all;
1334
1335	err = nfp_net_set_config_and_enable(nn);
1336	if (err)
1337		goto err_port_disable;
1338
1339	/* Step 3: Enable for kernel
1340	 * - put some freelist descriptors on each RX ring
1341	 * - enable NAPI on each ring
1342	 * - enable all TX queues
1343	 * - set link state
1344	 */
1345	nfp_net_open_stack(nn);
1346
1347	return 0;
1348
1349err_port_disable:
1350	nfp_port_configure(netdev, false);
1351err_free_all:
1352	nfp_net_close_free_all(nn);
1353	return err;
1354}
1355
1356int nfp_ctrl_open(struct nfp_net *nn)
1357{
1358	int err, r;
1359
1360	/* ring dumping depends on vNICs being opened/closed under rtnl */
1361	rtnl_lock();
1362
1363	err = nfp_net_open_alloc_all(nn);
1364	if (err)
1365		goto err_unlock;
1366
1367	err = nfp_net_set_config_and_enable(nn);
1368	if (err)
1369		goto err_free_all;
1370
1371	for (r = 0; r < nn->dp.num_r_vecs; r++)
1372		enable_irq(nn->r_vecs[r].irq_vector);
1373
1374	rtnl_unlock();
1375
1376	return 0;
1377
1378err_free_all:
1379	nfp_net_close_free_all(nn);
1380err_unlock:
1381	rtnl_unlock();
1382	return err;
1383}
1384
1385int nfp_net_sched_mbox_amsg_work(struct nfp_net *nn, u32 cmd, const void *data, size_t len,
1386				 int (*cb)(struct nfp_net *, struct nfp_mbox_amsg_entry *))
1387{
1388	struct nfp_mbox_amsg_entry *entry;
1389
1390	entry = kmalloc(sizeof(*entry) + len, GFP_ATOMIC);
1391	if (!entry)
1392		return -ENOMEM;
1393
1394	memcpy(entry->msg, data, len);
1395	entry->cmd = cmd;
1396	entry->cfg = cb;
1397
1398	spin_lock_bh(&nn->mbox_amsg.lock);
1399	list_add_tail(&entry->list, &nn->mbox_amsg.list);
1400	spin_unlock_bh(&nn->mbox_amsg.lock);
1401
1402	schedule_work(&nn->mbox_amsg.work);
1403
1404	return 0;
1405}
1406
1407static void nfp_net_mbox_amsg_work(struct work_struct *work)
1408{
1409	struct nfp_net *nn = container_of(work, struct nfp_net, mbox_amsg.work);
1410	struct nfp_mbox_amsg_entry *entry, *tmp;
1411	struct list_head tmp_list;
1412
1413	INIT_LIST_HEAD(&tmp_list);
1414
1415	spin_lock_bh(&nn->mbox_amsg.lock);
1416	list_splice_init(&nn->mbox_amsg.list, &tmp_list);
1417	spin_unlock_bh(&nn->mbox_amsg.lock);
1418
1419	list_for_each_entry_safe(entry, tmp, &tmp_list, list) {
1420		int err = entry->cfg(nn, entry);
1421
1422		if (err)
1423			nn_err(nn, "Config cmd %d to HW failed %d.\n", entry->cmd, err);
1424
1425		list_del(&entry->list);
1426		kfree(entry);
1427	}
1428}
1429
1430static int nfp_net_mc_cfg(struct nfp_net *nn, struct nfp_mbox_amsg_entry *entry)
1431{
1432	unsigned char *addr = entry->msg;
1433	int ret;
1434
1435	ret = nfp_net_mbox_lock(nn, NFP_NET_CFG_MULTICAST_SZ);
1436	if (ret)
1437		return ret;
1438
1439	nn_writel(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_MULTICAST_MAC_HI,
1440		  get_unaligned_be32(addr));
1441	nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_MULTICAST_MAC_LO,
1442		  get_unaligned_be16(addr + 4));
1443
1444	return nfp_net_mbox_reconfig_and_unlock(nn, entry->cmd);
1445}
1446
1447static int nfp_net_mc_sync(struct net_device *netdev, const unsigned char *addr)
1448{
1449	struct nfp_net *nn = netdev_priv(netdev);
1450
1451	if (netdev_mc_count(netdev) > NFP_NET_CFG_MAC_MC_MAX) {
1452		nn_err(nn, "Requested number of MC addresses (%d) exceeds maximum (%d).\n",
1453		       netdev_mc_count(netdev), NFP_NET_CFG_MAC_MC_MAX);
1454		return -EINVAL;
1455	}
1456
1457	return nfp_net_sched_mbox_amsg_work(nn, NFP_NET_CFG_MBOX_CMD_MULTICAST_ADD, addr,
1458					    NFP_NET_CFG_MULTICAST_SZ, nfp_net_mc_cfg);
1459}
1460
1461static int nfp_net_mc_unsync(struct net_device *netdev, const unsigned char *addr)
1462{
1463	struct nfp_net *nn = netdev_priv(netdev);
1464
1465	return nfp_net_sched_mbox_amsg_work(nn, NFP_NET_CFG_MBOX_CMD_MULTICAST_DEL, addr,
1466					    NFP_NET_CFG_MULTICAST_SZ, nfp_net_mc_cfg);
1467}
1468
1469static void nfp_net_set_rx_mode(struct net_device *netdev)
1470{
1471	struct nfp_net *nn = netdev_priv(netdev);
1472	u32 new_ctrl, new_ctrl_w1;
1473
1474	new_ctrl = nn->dp.ctrl;
1475	new_ctrl_w1 = nn->dp.ctrl_w1;
1476
1477	if (!netdev_mc_empty(netdev) || netdev->flags & IFF_ALLMULTI)
1478		new_ctrl |= nn->cap & NFP_NET_CFG_CTRL_L2MC;
1479	else
1480		new_ctrl &= ~NFP_NET_CFG_CTRL_L2MC;
1481
1482	if (netdev->flags & IFF_ALLMULTI)
1483		new_ctrl_w1 &= ~NFP_NET_CFG_CTRL_MCAST_FILTER;
1484	else
1485		new_ctrl_w1 |= nn->cap_w1 & NFP_NET_CFG_CTRL_MCAST_FILTER;
1486
1487	if (netdev->flags & IFF_PROMISC) {
1488		if (nn->cap & NFP_NET_CFG_CTRL_PROMISC)
1489			new_ctrl |= NFP_NET_CFG_CTRL_PROMISC;
1490		else
1491			nn_warn(nn, "FW does not support promiscuous mode\n");
1492	} else {
1493		new_ctrl &= ~NFP_NET_CFG_CTRL_PROMISC;
1494	}
1495
1496	if ((nn->cap_w1 & NFP_NET_CFG_CTRL_MCAST_FILTER) &&
1497	    __dev_mc_sync(netdev, nfp_net_mc_sync, nfp_net_mc_unsync))
1498		netdev_err(netdev, "Sync mc address failed\n");
1499
1500	if (new_ctrl == nn->dp.ctrl && new_ctrl_w1 == nn->dp.ctrl_w1)
1501		return;
1502
1503	if (new_ctrl != nn->dp.ctrl)
1504		nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
1505	if (new_ctrl_w1 != nn->dp.ctrl_w1)
1506		nn_writel(nn, NFP_NET_CFG_CTRL_WORD1, new_ctrl_w1);
1507	nfp_net_reconfig_post(nn, NFP_NET_CFG_UPDATE_GEN);
1508
1509	nn->dp.ctrl = new_ctrl;
1510	nn->dp.ctrl_w1 = new_ctrl_w1;
1511}
1512
1513static void nfp_net_rss_init_itbl(struct nfp_net *nn)
1514{
1515	int i;
1516
1517	for (i = 0; i < sizeof(nn->rss_itbl); i++)
1518		nn->rss_itbl[i] =
1519			ethtool_rxfh_indir_default(i, nn->dp.num_rx_rings);
1520}
1521
1522static void nfp_net_dp_swap(struct nfp_net *nn, struct nfp_net_dp *dp)
1523{
1524	struct nfp_net_dp new_dp = *dp;
1525
1526	*dp = nn->dp;
1527	nn->dp = new_dp;
1528
1529	nn->dp.netdev->mtu = new_dp.mtu;
1530
1531	if (!netif_is_rxfh_configured(nn->dp.netdev))
1532		nfp_net_rss_init_itbl(nn);
1533}
1534
1535static int nfp_net_dp_swap_enable(struct nfp_net *nn, struct nfp_net_dp *dp)
1536{
1537	unsigned int r;
1538	int err;
1539
1540	nfp_net_dp_swap(nn, dp);
1541
1542	for (r = 0; r <	nn->max_r_vecs; r++)
1543		nfp_net_vector_assign_rings(&nn->dp, &nn->r_vecs[r], r);
1544
1545	err = netif_set_real_num_queues(nn->dp.netdev,
1546					nn->dp.num_stack_tx_rings,
1547					nn->dp.num_rx_rings);
1548	if (err)
1549		return err;
1550
 
 
 
 
 
 
 
1551	return nfp_net_set_config_and_enable(nn);
1552}
1553
1554struct nfp_net_dp *nfp_net_clone_dp(struct nfp_net *nn)
1555{
1556	struct nfp_net_dp *new;
1557
1558	new = kmalloc(sizeof(*new), GFP_KERNEL);
1559	if (!new)
1560		return NULL;
1561
1562	*new = nn->dp;
1563
1564	new->xsk_pools = kmemdup(new->xsk_pools,
1565				 array_size(nn->max_r_vecs,
1566					    sizeof(new->xsk_pools)),
1567				 GFP_KERNEL);
1568	if (!new->xsk_pools) {
1569		kfree(new);
1570		return NULL;
1571	}
1572
1573	/* Clear things which need to be recomputed */
1574	new->fl_bufsz = 0;
1575	new->tx_rings = NULL;
1576	new->rx_rings = NULL;
1577	new->num_r_vecs = 0;
1578	new->num_stack_tx_rings = 0;
1579	new->txrwb = NULL;
1580	new->txrwb_dma = 0;
1581
1582	return new;
1583}
1584
1585static void nfp_net_free_dp(struct nfp_net_dp *dp)
1586{
1587	kfree(dp->xsk_pools);
1588	kfree(dp);
1589}
1590
1591static int
1592nfp_net_check_config(struct nfp_net *nn, struct nfp_net_dp *dp,
1593		     struct netlink_ext_ack *extack)
1594{
1595	unsigned int r, xsk_min_fl_bufsz;
1596
1597	/* XDP-enabled tests */
1598	if (!dp->xdp_prog)
1599		return 0;
1600	if (dp->fl_bufsz > PAGE_SIZE) {
1601		NL_SET_ERR_MSG_MOD(extack, "MTU too large w/ XDP enabled");
1602		return -EINVAL;
1603	}
1604	if (dp->num_tx_rings > nn->max_tx_rings) {
1605		NL_SET_ERR_MSG_MOD(extack, "Insufficient number of TX rings w/ XDP enabled");
1606		return -EINVAL;
1607	}
1608
1609	xsk_min_fl_bufsz = nfp_net_calc_fl_bufsz_xsk(dp);
1610	for (r = 0; r < nn->max_r_vecs; r++) {
1611		if (!dp->xsk_pools[r])
1612			continue;
1613
1614		if (xsk_pool_get_rx_frame_size(dp->xsk_pools[r]) < xsk_min_fl_bufsz) {
1615			NL_SET_ERR_MSG_MOD(extack,
1616					   "XSK buffer pool chunk size too small");
1617			return -EINVAL;
1618		}
1619	}
1620
1621	return 0;
1622}
1623
1624int nfp_net_ring_reconfig(struct nfp_net *nn, struct nfp_net_dp *dp,
1625			  struct netlink_ext_ack *extack)
1626{
1627	int r, err;
1628
1629	dp->fl_bufsz = nfp_net_calc_fl_bufsz(dp);
1630
1631	dp->num_stack_tx_rings = dp->num_tx_rings;
1632	if (dp->xdp_prog)
1633		dp->num_stack_tx_rings -= dp->num_rx_rings;
1634
1635	dp->num_r_vecs = max(dp->num_rx_rings, dp->num_stack_tx_rings);
1636
1637	err = nfp_net_check_config(nn, dp, extack);
1638	if (err)
1639		goto exit_free_dp;
1640
1641	if (!netif_running(dp->netdev)) {
1642		nfp_net_dp_swap(nn, dp);
1643		err = 0;
1644		goto exit_free_dp;
1645	}
1646
1647	/* Prepare new rings */
1648	for (r = nn->dp.num_r_vecs; r < dp->num_r_vecs; r++) {
1649		err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r);
1650		if (err) {
1651			dp->num_r_vecs = r;
1652			goto err_cleanup_vecs;
1653		}
1654	}
1655
1656	err = nfp_net_rx_rings_prepare(nn, dp);
1657	if (err)
1658		goto err_cleanup_vecs;
1659
1660	err = nfp_net_tx_rings_prepare(nn, dp);
1661	if (err)
1662		goto err_free_rx;
1663
1664	/* Stop device, swap in new rings, try to start the firmware */
1665	nfp_net_close_stack(nn);
1666	nfp_net_clear_config_and_disable(nn);
1667
1668	err = nfp_net_dp_swap_enable(nn, dp);
1669	if (err) {
1670		int err2;
1671
1672		nfp_net_clear_config_and_disable(nn);
1673
1674		/* Try with old configuration and old rings */
1675		err2 = nfp_net_dp_swap_enable(nn, dp);
1676		if (err2)
1677			nn_err(nn, "Can't restore ring config - FW communication failed (%d,%d)\n",
1678			       err, err2);
1679	}
1680	for (r = dp->num_r_vecs - 1; r >= nn->dp.num_r_vecs; r--)
1681		nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
1682
1683	nfp_net_rx_rings_free(dp);
1684	nfp_net_tx_rings_free(dp);
1685
1686	nfp_net_open_stack(nn);
1687exit_free_dp:
1688	nfp_net_free_dp(dp);
1689
1690	return err;
1691
1692err_free_rx:
1693	nfp_net_rx_rings_free(dp);
1694err_cleanup_vecs:
1695	for (r = dp->num_r_vecs - 1; r >= nn->dp.num_r_vecs; r--)
1696		nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
1697	nfp_net_free_dp(dp);
1698	return err;
1699}
1700
1701static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu)
1702{
1703	struct nfp_net *nn = netdev_priv(netdev);
1704	struct nfp_net_dp *dp;
1705	int err;
1706
1707	err = nfp_app_check_mtu(nn->app, netdev, new_mtu);
1708	if (err)
1709		return err;
1710
1711	dp = nfp_net_clone_dp(nn);
1712	if (!dp)
1713		return -ENOMEM;
1714
1715	dp->mtu = new_mtu;
1716
1717	return nfp_net_ring_reconfig(nn, dp, NULL);
1718}
1719
1720static int
1721nfp_net_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1722{
1723	const u32 cmd = NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_ADD;
1724	struct nfp_net *nn = netdev_priv(netdev);
1725	int err;
1726
1727	/* Priority tagged packets with vlan id 0 are processed by the
1728	 * NFP as untagged packets
1729	 */
1730	if (!vid)
1731		return 0;
1732
1733	err = nfp_net_mbox_lock(nn, NFP_NET_CFG_VLAN_FILTER_SZ);
1734	if (err)
1735		return err;
1736
1737	nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_VID, vid);
1738	nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_PROTO,
1739		  ETH_P_8021Q);
1740
1741	return nfp_net_mbox_reconfig_and_unlock(nn, cmd);
1742}
1743
1744static int
1745nfp_net_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
1746{
1747	const u32 cmd = NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_KILL;
1748	struct nfp_net *nn = netdev_priv(netdev);
1749	int err;
1750
1751	/* Priority tagged packets with vlan id 0 are processed by the
1752	 * NFP as untagged packets
1753	 */
1754	if (!vid)
1755		return 0;
1756
1757	err = nfp_net_mbox_lock(nn, NFP_NET_CFG_VLAN_FILTER_SZ);
1758	if (err)
1759		return err;
1760
1761	nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_VID, vid);
1762	nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_PROTO,
1763		  ETH_P_8021Q);
1764
1765	return nfp_net_mbox_reconfig_and_unlock(nn, cmd);
1766}
1767
1768static void
1769nfp_net_fs_fill_v4(struct nfp_net *nn, struct nfp_fs_entry *entry, u32 op, u32 *addr)
1770{
1771	unsigned int i;
1772
1773	union {
1774		struct {
1775			__be16 loc;
1776			u8 k_proto, m_proto;
1777			__be32 k_sip, m_sip, k_dip, m_dip;
1778			__be16 k_sport, m_sport, k_dport, m_dport;
1779		};
1780		__be32 val[7];
1781	} v4_rule;
1782
1783	nn_writel(nn, *addr, op);
1784	*addr += sizeof(u32);
1785
1786	v4_rule.loc     = cpu_to_be16(entry->loc);
1787	v4_rule.k_proto = entry->key.l4_proto;
1788	v4_rule.m_proto = entry->msk.l4_proto;
1789	v4_rule.k_sip   = entry->key.sip4;
1790	v4_rule.m_sip   = entry->msk.sip4;
1791	v4_rule.k_dip   = entry->key.dip4;
1792	v4_rule.m_dip   = entry->msk.dip4;
1793	v4_rule.k_sport = entry->key.sport;
1794	v4_rule.m_sport = entry->msk.sport;
1795	v4_rule.k_dport = entry->key.dport;
1796	v4_rule.m_dport = entry->msk.dport;
1797
1798	for (i = 0; i < ARRAY_SIZE(v4_rule.val); i++, *addr += sizeof(__be32))
1799		nn_writel(nn, *addr, be32_to_cpu(v4_rule.val[i]));
1800}
1801
1802static void
1803nfp_net_fs_fill_v6(struct nfp_net *nn, struct nfp_fs_entry *entry, u32 op, u32 *addr)
1804{
1805	unsigned int i;
1806
1807	union {
1808		struct {
1809			__be16 loc;
1810			u8 k_proto, m_proto;
1811			__be32 k_sip[4], m_sip[4], k_dip[4], m_dip[4];
1812			__be16 k_sport, m_sport, k_dport, m_dport;
1813		};
1814		__be32 val[19];
1815	} v6_rule;
1816
1817	nn_writel(nn, *addr, op);
1818	*addr += sizeof(u32);
1819
1820	v6_rule.loc     = cpu_to_be16(entry->loc);
1821	v6_rule.k_proto = entry->key.l4_proto;
1822	v6_rule.m_proto = entry->msk.l4_proto;
1823	for (i = 0; i < 4; i++) {
1824		v6_rule.k_sip[i] = entry->key.sip6[i];
1825		v6_rule.m_sip[i] = entry->msk.sip6[i];
1826		v6_rule.k_dip[i] = entry->key.dip6[i];
1827		v6_rule.m_dip[i] = entry->msk.dip6[i];
1828	}
1829	v6_rule.k_sport = entry->key.sport;
1830	v6_rule.m_sport = entry->msk.sport;
1831	v6_rule.k_dport = entry->key.dport;
1832	v6_rule.m_dport = entry->msk.dport;
1833
1834	for (i = 0; i < ARRAY_SIZE(v6_rule.val); i++, *addr += sizeof(__be32))
1835		nn_writel(nn, *addr, be32_to_cpu(v6_rule.val[i]));
1836}
1837
1838#define NFP_FS_QUEUE_ID	GENMASK(22, 16)
1839#define NFP_FS_ACT	GENMASK(15, 0)
1840#define NFP_FS_ACT_DROP	BIT(0)
1841#define NFP_FS_ACT_Q	BIT(1)
1842static void
1843nfp_net_fs_fill_act(struct nfp_net *nn, struct nfp_fs_entry *entry, u32 addr)
1844{
1845	u32 action = 0; /* 0 means default passthrough */
1846
1847	if (entry->action == RX_CLS_FLOW_DISC)
1848		action = NFP_FS_ACT_DROP;
1849	else if (!(entry->flow_type & FLOW_RSS))
1850		action = FIELD_PREP(NFP_FS_QUEUE_ID, entry->action) | NFP_FS_ACT_Q;
1851
1852	nn_writel(nn, addr, action);
1853}
1854
1855int nfp_net_fs_add_hw(struct nfp_net *nn, struct nfp_fs_entry *entry)
1856{
1857	u32 addr = nn->tlv_caps.mbox_off + NFP_NET_CFG_MBOX_SIMPLE_VAL;
1858	int err;
1859
1860	err = nfp_net_mbox_lock(nn, NFP_NET_CFG_FS_SZ);
1861	if (err)
1862		return err;
1863
1864	switch (entry->flow_type & ~FLOW_RSS) {
1865	case TCP_V4_FLOW:
1866	case UDP_V4_FLOW:
1867	case SCTP_V4_FLOW:
1868	case IPV4_USER_FLOW:
1869		nfp_net_fs_fill_v4(nn, entry, NFP_NET_CFG_MBOX_CMD_FS_ADD_V4, &addr);
1870		break;
1871	case TCP_V6_FLOW:
1872	case UDP_V6_FLOW:
1873	case SCTP_V6_FLOW:
1874	case IPV6_USER_FLOW:
1875		nfp_net_fs_fill_v6(nn, entry, NFP_NET_CFG_MBOX_CMD_FS_ADD_V6, &addr);
1876		break;
1877	case ETHER_FLOW:
1878		nn_writel(nn, addr, NFP_NET_CFG_MBOX_CMD_FS_ADD_ETHTYPE);
1879		addr += sizeof(u32);
1880		nn_writew(nn, addr, be16_to_cpu(entry->key.l3_proto));
1881		addr += sizeof(u32);
1882		break;
1883	}
1884
1885	nfp_net_fs_fill_act(nn, entry, addr);
1886
1887	err = nfp_net_mbox_reconfig_and_unlock(nn, NFP_NET_CFG_MBOX_CMD_FLOW_STEER);
1888	if (err) {
1889		nn_err(nn, "Add new fs rule failed with %d\n", err);
1890		return -EIO;
1891	}
1892
1893	return 0;
1894}
1895
1896int nfp_net_fs_del_hw(struct nfp_net *nn, struct nfp_fs_entry *entry)
1897{
1898	u32 addr = nn->tlv_caps.mbox_off + NFP_NET_CFG_MBOX_SIMPLE_VAL;
1899	int err;
1900
1901	err = nfp_net_mbox_lock(nn, NFP_NET_CFG_FS_SZ);
1902	if (err)
1903		return err;
1904
1905	switch (entry->flow_type & ~FLOW_RSS) {
1906	case TCP_V4_FLOW:
1907	case UDP_V4_FLOW:
1908	case SCTP_V4_FLOW:
1909	case IPV4_USER_FLOW:
1910		nfp_net_fs_fill_v4(nn, entry, NFP_NET_CFG_MBOX_CMD_FS_DEL_V4, &addr);
1911		break;
1912	case TCP_V6_FLOW:
1913	case UDP_V6_FLOW:
1914	case SCTP_V6_FLOW:
1915	case IPV6_USER_FLOW:
1916		nfp_net_fs_fill_v6(nn, entry, NFP_NET_CFG_MBOX_CMD_FS_DEL_V6, &addr);
1917		break;
1918	case ETHER_FLOW:
1919		nn_writel(nn, addr, NFP_NET_CFG_MBOX_CMD_FS_DEL_ETHTYPE);
1920		addr += sizeof(u32);
1921		nn_writew(nn, addr, be16_to_cpu(entry->key.l3_proto));
1922		addr += sizeof(u32);
1923		break;
1924	}
1925
1926	nfp_net_fs_fill_act(nn, entry, addr);
1927
1928	err = nfp_net_mbox_reconfig_and_unlock(nn, NFP_NET_CFG_MBOX_CMD_FLOW_STEER);
1929	if (err) {
1930		nn_err(nn, "Delete fs rule failed with %d\n", err);
1931		return -EIO;
1932	}
1933
1934	return 0;
1935}
1936
1937static void nfp_net_fs_clean(struct nfp_net *nn)
1938{
1939	struct nfp_fs_entry *entry, *tmp;
1940
1941	list_for_each_entry_safe(entry, tmp, &nn->fs.list, node) {
1942		nfp_net_fs_del_hw(nn, entry);
1943		list_del(&entry->node);
1944		kfree(entry);
1945	}
1946}
1947
1948static void nfp_net_stat64(struct net_device *netdev,
1949			   struct rtnl_link_stats64 *stats)
1950{
1951	struct nfp_net *nn = netdev_priv(netdev);
1952	int r;
1953
1954	/* Collect software stats */
1955	for (r = 0; r < nn->max_r_vecs; r++) {
1956		struct nfp_net_r_vector *r_vec = &nn->r_vecs[r];
1957		u64 data[3];
1958		unsigned int start;
1959
1960		do {
1961			start = u64_stats_fetch_begin(&r_vec->rx_sync);
1962			data[0] = r_vec->rx_pkts;
1963			data[1] = r_vec->rx_bytes;
1964			data[2] = r_vec->rx_drops;
1965		} while (u64_stats_fetch_retry(&r_vec->rx_sync, start));
1966		stats->rx_packets += data[0];
1967		stats->rx_bytes += data[1];
1968		stats->rx_dropped += data[2];
1969
1970		do {
1971			start = u64_stats_fetch_begin(&r_vec->tx_sync);
1972			data[0] = r_vec->tx_pkts;
1973			data[1] = r_vec->tx_bytes;
1974			data[2] = r_vec->tx_errors;
1975		} while (u64_stats_fetch_retry(&r_vec->tx_sync, start));
1976		stats->tx_packets += data[0];
1977		stats->tx_bytes += data[1];
1978		stats->tx_errors += data[2];
1979	}
1980
1981	/* Add in device stats */
1982	stats->multicast += nn_readq(nn, NFP_NET_CFG_STATS_RX_MC_FRAMES);
1983	stats->rx_dropped += nn_readq(nn, NFP_NET_CFG_STATS_RX_DISCARDS);
1984	stats->rx_errors += nn_readq(nn, NFP_NET_CFG_STATS_RX_ERRORS);
1985
1986	stats->tx_dropped += nn_readq(nn, NFP_NET_CFG_STATS_TX_DISCARDS);
1987	stats->tx_errors += nn_readq(nn, NFP_NET_CFG_STATS_TX_ERRORS);
1988}
1989
1990static int nfp_net_set_features(struct net_device *netdev,
1991				netdev_features_t features)
1992{
1993	netdev_features_t changed = netdev->features ^ features;
1994	struct nfp_net *nn = netdev_priv(netdev);
1995	u32 new_ctrl;
1996	int err;
1997
1998	/* Assume this is not called with features we have not advertised */
1999
2000	new_ctrl = nn->dp.ctrl;
2001
2002	if (changed & NETIF_F_RXCSUM) {
2003		if (features & NETIF_F_RXCSUM)
2004			new_ctrl |= nn->cap & NFP_NET_CFG_CTRL_RXCSUM_ANY;
2005		else
2006			new_ctrl &= ~NFP_NET_CFG_CTRL_RXCSUM_ANY;
2007	}
2008
2009	if (changed & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) {
2010		if (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))
2011			new_ctrl |= NFP_NET_CFG_CTRL_TXCSUM;
2012		else
2013			new_ctrl &= ~NFP_NET_CFG_CTRL_TXCSUM;
2014	}
2015
2016	if (changed & (NETIF_F_TSO | NETIF_F_TSO6)) {
2017		if (features & (NETIF_F_TSO | NETIF_F_TSO6))
2018			new_ctrl |= nn->cap & NFP_NET_CFG_CTRL_LSO2 ?:
2019					      NFP_NET_CFG_CTRL_LSO;
2020		else
2021			new_ctrl &= ~NFP_NET_CFG_CTRL_LSO_ANY;
2022	}
2023
2024	if (changed & NETIF_F_HW_VLAN_CTAG_RX) {
2025		if (features & NETIF_F_HW_VLAN_CTAG_RX)
2026			new_ctrl |= nn->cap & NFP_NET_CFG_CTRL_RXVLAN_V2 ?:
2027				    NFP_NET_CFG_CTRL_RXVLAN;
2028		else
2029			new_ctrl &= ~NFP_NET_CFG_CTRL_RXVLAN_ANY;
2030	}
2031
2032	if (changed & NETIF_F_HW_VLAN_CTAG_TX) {
2033		if (features & NETIF_F_HW_VLAN_CTAG_TX)
2034			new_ctrl |= nn->cap & NFP_NET_CFG_CTRL_TXVLAN_V2 ?:
2035				    NFP_NET_CFG_CTRL_TXVLAN;
2036		else
2037			new_ctrl &= ~NFP_NET_CFG_CTRL_TXVLAN_ANY;
2038	}
2039
2040	if (changed & NETIF_F_HW_VLAN_CTAG_FILTER) {
2041		if (features & NETIF_F_HW_VLAN_CTAG_FILTER)
2042			new_ctrl |= NFP_NET_CFG_CTRL_CTAG_FILTER;
2043		else
2044			new_ctrl &= ~NFP_NET_CFG_CTRL_CTAG_FILTER;
2045	}
2046
2047	if (changed & NETIF_F_HW_VLAN_STAG_RX) {
2048		if (features & NETIF_F_HW_VLAN_STAG_RX)
2049			new_ctrl |= NFP_NET_CFG_CTRL_RXQINQ;
2050		else
2051			new_ctrl &= ~NFP_NET_CFG_CTRL_RXQINQ;
2052	}
2053
2054	if (changed & NETIF_F_SG) {
2055		if (features & NETIF_F_SG)
2056			new_ctrl |= NFP_NET_CFG_CTRL_GATHER;
2057		else
2058			new_ctrl &= ~NFP_NET_CFG_CTRL_GATHER;
2059	}
2060
2061	err = nfp_port_set_features(netdev, features);
2062	if (err)
2063		return err;
2064
2065	nn_dbg(nn, "Feature change 0x%llx -> 0x%llx (changed=0x%llx)\n",
2066	       netdev->features, features, changed);
2067
2068	if (new_ctrl == nn->dp.ctrl)
2069		return 0;
2070
2071	nn_dbg(nn, "NIC ctrl: 0x%x -> 0x%x\n", nn->dp.ctrl, new_ctrl);
2072	nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
2073	err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN);
2074	if (err)
2075		return err;
2076
2077	nn->dp.ctrl = new_ctrl;
2078
2079	return 0;
2080}
2081
2082static netdev_features_t
2083nfp_net_fix_features(struct net_device *netdev,
2084		     netdev_features_t features)
2085{
2086	if ((features & NETIF_F_HW_VLAN_CTAG_RX) &&
2087	    (features & NETIF_F_HW_VLAN_STAG_RX)) {
2088		if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) {
2089			features &= ~NETIF_F_HW_VLAN_CTAG_RX;
2090			netdev->wanted_features &= ~NETIF_F_HW_VLAN_CTAG_RX;
2091			netdev_warn(netdev,
2092				    "S-tag and C-tag stripping can't be enabled at the same time. Enabling S-tag stripping and disabling C-tag stripping\n");
2093		} else if (netdev->features & NETIF_F_HW_VLAN_STAG_RX) {
2094			features &= ~NETIF_F_HW_VLAN_STAG_RX;
2095			netdev->wanted_features &= ~NETIF_F_HW_VLAN_STAG_RX;
2096			netdev_warn(netdev,
2097				    "S-tag and C-tag stripping can't be enabled at the same time. Enabling C-tag stripping and disabling S-tag stripping\n");
2098		}
2099	}
2100	return features;
2101}
2102
2103static netdev_features_t
2104nfp_net_features_check(struct sk_buff *skb, struct net_device *dev,
2105		       netdev_features_t features)
2106{
2107	u8 l4_hdr;
2108
2109	/* We can't do TSO over double tagged packets (802.1AD) */
2110	features &= vlan_features_check(skb, features);
2111
2112	if (!skb->encapsulation)
2113		return features;
2114
2115	/* Ensure that inner L4 header offset fits into TX descriptor field */
2116	if (skb_is_gso(skb)) {
2117		u32 hdrlen;
2118
2119		if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
2120			hdrlen = skb_inner_transport_offset(skb) + sizeof(struct udphdr);
2121		else
2122			hdrlen = skb_inner_tcp_all_headers(skb);
2123
2124		/* Assume worst case scenario of having longest possible
2125		 * metadata prepend - 8B
2126		 */
2127		if (unlikely(hdrlen > NFP_NET_LSO_MAX_HDR_SZ - 8))
2128			features &= ~NETIF_F_GSO_MASK;
2129	}
2130
2131	if (xfrm_offload(skb))
2132		return features;
2133
2134	/* VXLAN/GRE check */
2135	switch (vlan_get_protocol(skb)) {
2136	case htons(ETH_P_IP):
2137		l4_hdr = ip_hdr(skb)->protocol;
2138		break;
2139	case htons(ETH_P_IPV6):
2140		l4_hdr = ipv6_hdr(skb)->nexthdr;
2141		break;
2142	default:
2143		return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
2144	}
2145
2146	if (skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
2147	    skb->inner_protocol != htons(ETH_P_TEB) ||
2148	    (l4_hdr != IPPROTO_UDP && l4_hdr != IPPROTO_GRE) ||
2149	    (l4_hdr == IPPROTO_UDP &&
2150	     (skb_inner_mac_header(skb) - skb_transport_header(skb) !=
2151	      sizeof(struct udphdr) + sizeof(struct vxlanhdr))))
2152		return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
2153
2154	return features;
2155}
2156
2157static int
2158nfp_net_get_phys_port_name(struct net_device *netdev, char *name, size_t len)
2159{
2160	struct nfp_net *nn = netdev_priv(netdev);
2161	int n;
2162
2163	/* If port is defined, devlink_port is registered and devlink core
2164	 * is taking care of name formatting.
2165	 */
2166	if (nn->port)
2167		return -EOPNOTSUPP;
2168
2169	if (nn->dp.is_vf || nn->vnic_no_name)
2170		return -EOPNOTSUPP;
2171
2172	n = snprintf(name, len, "n%d", nn->id);
2173	if (n >= len)
2174		return -EINVAL;
2175
2176	return 0;
2177}
2178
2179static int nfp_net_xdp_setup_drv(struct nfp_net *nn, struct netdev_bpf *bpf)
2180{
2181	struct bpf_prog *prog = bpf->prog;
2182	struct nfp_net_dp *dp;
2183	int err;
2184
2185	if (!prog == !nn->dp.xdp_prog) {
2186		WRITE_ONCE(nn->dp.xdp_prog, prog);
2187		xdp_attachment_setup(&nn->xdp, bpf);
2188		return 0;
2189	}
2190
2191	dp = nfp_net_clone_dp(nn);
2192	if (!dp)
2193		return -ENOMEM;
2194
2195	dp->xdp_prog = prog;
2196	dp->num_tx_rings += prog ? nn->dp.num_rx_rings : -nn->dp.num_rx_rings;
2197	dp->rx_dma_dir = prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
2198	dp->rx_dma_off = prog ? XDP_PACKET_HEADROOM - nn->dp.rx_offset : 0;
2199
2200	/* We need RX reconfig to remap the buffers (BIDIR vs FROM_DEV) */
2201	err = nfp_net_ring_reconfig(nn, dp, bpf->extack);
2202	if (err)
2203		return err;
2204
2205	xdp_attachment_setup(&nn->xdp, bpf);
2206	return 0;
2207}
2208
2209static int nfp_net_xdp_setup_hw(struct nfp_net *nn, struct netdev_bpf *bpf)
2210{
2211	int err;
2212
2213	err = nfp_app_xdp_offload(nn->app, nn, bpf->prog, bpf->extack);
2214	if (err)
2215		return err;
2216
2217	xdp_attachment_setup(&nn->xdp_hw, bpf);
2218	return 0;
2219}
2220
2221static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
2222{
2223	struct nfp_net *nn = netdev_priv(netdev);
2224
2225	switch (xdp->command) {
2226	case XDP_SETUP_PROG:
2227		return nfp_net_xdp_setup_drv(nn, xdp);
2228	case XDP_SETUP_PROG_HW:
2229		return nfp_net_xdp_setup_hw(nn, xdp);
2230	case XDP_SETUP_XSK_POOL:
2231		return nfp_net_xsk_setup_pool(netdev, xdp->xsk.pool,
2232					      xdp->xsk.queue_id);
2233	default:
2234		return nfp_app_bpf(nn->app, nn, xdp);
2235	}
2236}
2237
2238static int nfp_net_set_mac_address(struct net_device *netdev, void *addr)
2239{
2240	struct nfp_net *nn = netdev_priv(netdev);
2241	struct sockaddr *saddr = addr;
2242	int err;
2243
2244	err = eth_prepare_mac_addr_change(netdev, addr);
2245	if (err)
2246		return err;
2247
2248	nfp_net_write_mac_addr(nn, saddr->sa_data);
2249
2250	err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_MACADDR);
2251	if (err)
2252		return err;
2253
2254	eth_commit_mac_addr_change(netdev, addr);
2255
2256	return 0;
2257}
2258
2259static int nfp_net_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
2260				  struct net_device *dev, u32 filter_mask,
2261				  int nlflags)
2262{
2263	struct nfp_net *nn = netdev_priv(dev);
2264	u16 mode;
2265
2266	if (!(nn->cap & NFP_NET_CFG_CTRL_VEPA))
2267		return -EOPNOTSUPP;
2268
2269	mode = (nn->dp.ctrl & NFP_NET_CFG_CTRL_VEPA) ?
2270	       BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB;
2271
2272	return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, 0, 0,
2273				       nlflags, filter_mask, NULL);
2274}
2275
2276static int nfp_net_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
2277				  u16 flags, struct netlink_ext_ack *extack)
2278{
2279	struct nfp_net *nn = netdev_priv(dev);
2280	struct nlattr *attr, *br_spec;
2281	int rem, err;
2282	u32 new_ctrl;
2283	u16 mode;
2284
2285	if (!(nn->cap & NFP_NET_CFG_CTRL_VEPA))
2286		return -EOPNOTSUPP;
2287
2288	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
2289	if (!br_spec)
2290		return -EINVAL;
2291
2292	nla_for_each_nested(attr, br_spec, rem) {
2293		if (nla_type(attr) != IFLA_BRIDGE_MODE)
2294			continue;
2295
2296		new_ctrl = nn->dp.ctrl;
2297		mode = nla_get_u16(attr);
2298		if (mode == BRIDGE_MODE_VEPA)
2299			new_ctrl |= NFP_NET_CFG_CTRL_VEPA;
2300		else if (mode == BRIDGE_MODE_VEB)
2301			new_ctrl &= ~NFP_NET_CFG_CTRL_VEPA;
2302		else
2303			return -EOPNOTSUPP;
2304
2305		if (new_ctrl == nn->dp.ctrl)
2306			return 0;
2307
2308		nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
2309		err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN);
2310		if (!err)
2311			nn->dp.ctrl = new_ctrl;
2312
2313		return err;
2314	}
2315
2316	return -EINVAL;
2317}
2318
2319const struct net_device_ops nfp_nfd3_netdev_ops = {
2320	.ndo_init		= nfp_app_ndo_init,
2321	.ndo_uninit		= nfp_app_ndo_uninit,
2322	.ndo_open		= nfp_net_netdev_open,
2323	.ndo_stop		= nfp_net_netdev_close,
2324	.ndo_start_xmit		= nfp_net_tx,
2325	.ndo_get_stats64	= nfp_net_stat64,
2326	.ndo_vlan_rx_add_vid	= nfp_net_vlan_rx_add_vid,
2327	.ndo_vlan_rx_kill_vid	= nfp_net_vlan_rx_kill_vid,
2328	.ndo_set_vf_mac         = nfp_app_set_vf_mac,
2329	.ndo_set_vf_vlan        = nfp_app_set_vf_vlan,
2330	.ndo_set_vf_rate	= nfp_app_set_vf_rate,
2331	.ndo_set_vf_spoofchk    = nfp_app_set_vf_spoofchk,
2332	.ndo_set_vf_trust	= nfp_app_set_vf_trust,
2333	.ndo_get_vf_config	= nfp_app_get_vf_config,
2334	.ndo_set_vf_link_state  = nfp_app_set_vf_link_state,
2335	.ndo_setup_tc		= nfp_port_setup_tc,
2336	.ndo_tx_timeout		= nfp_net_tx_timeout,
2337	.ndo_set_rx_mode	= nfp_net_set_rx_mode,
2338	.ndo_change_mtu		= nfp_net_change_mtu,
2339	.ndo_set_mac_address	= nfp_net_set_mac_address,
2340	.ndo_set_features	= nfp_net_set_features,
2341	.ndo_fix_features	= nfp_net_fix_features,
2342	.ndo_features_check	= nfp_net_features_check,
2343	.ndo_get_phys_port_name	= nfp_net_get_phys_port_name,
2344	.ndo_bpf		= nfp_net_xdp,
2345	.ndo_xsk_wakeup		= nfp_net_xsk_wakeup,
2346	.ndo_bridge_getlink     = nfp_net_bridge_getlink,
2347	.ndo_bridge_setlink     = nfp_net_bridge_setlink,
2348};
2349
2350const struct net_device_ops nfp_nfdk_netdev_ops = {
2351	.ndo_init		= nfp_app_ndo_init,
2352	.ndo_uninit		= nfp_app_ndo_uninit,
2353	.ndo_open		= nfp_net_netdev_open,
2354	.ndo_stop		= nfp_net_netdev_close,
2355	.ndo_start_xmit		= nfp_net_tx,
2356	.ndo_get_stats64	= nfp_net_stat64,
2357	.ndo_vlan_rx_add_vid	= nfp_net_vlan_rx_add_vid,
2358	.ndo_vlan_rx_kill_vid	= nfp_net_vlan_rx_kill_vid,
2359	.ndo_set_vf_mac         = nfp_app_set_vf_mac,
2360	.ndo_set_vf_vlan        = nfp_app_set_vf_vlan,
2361	.ndo_set_vf_rate	= nfp_app_set_vf_rate,
2362	.ndo_set_vf_spoofchk    = nfp_app_set_vf_spoofchk,
2363	.ndo_set_vf_trust	= nfp_app_set_vf_trust,
2364	.ndo_get_vf_config	= nfp_app_get_vf_config,
2365	.ndo_set_vf_link_state  = nfp_app_set_vf_link_state,
2366	.ndo_setup_tc		= nfp_port_setup_tc,
2367	.ndo_tx_timeout		= nfp_net_tx_timeout,
2368	.ndo_set_rx_mode	= nfp_net_set_rx_mode,
2369	.ndo_change_mtu		= nfp_net_change_mtu,
2370	.ndo_set_mac_address	= nfp_net_set_mac_address,
2371	.ndo_set_features	= nfp_net_set_features,
2372	.ndo_fix_features	= nfp_net_fix_features,
2373	.ndo_features_check	= nfp_net_features_check,
2374	.ndo_get_phys_port_name	= nfp_net_get_phys_port_name,
2375	.ndo_bpf		= nfp_net_xdp,
2376	.ndo_bridge_getlink     = nfp_net_bridge_getlink,
2377	.ndo_bridge_setlink     = nfp_net_bridge_setlink,
2378};
2379
2380static int nfp_udp_tunnel_sync(struct net_device *netdev, unsigned int table)
2381{
2382	struct nfp_net *nn = netdev_priv(netdev);
2383	int i;
2384
2385	BUILD_BUG_ON(NFP_NET_N_VXLAN_PORTS & 1);
2386	for (i = 0; i < NFP_NET_N_VXLAN_PORTS; i += 2) {
2387		struct udp_tunnel_info ti0, ti1;
2388
2389		udp_tunnel_nic_get_port(netdev, table, i, &ti0);
2390		udp_tunnel_nic_get_port(netdev, table, i + 1, &ti1);
2391
2392		nn_writel(nn, NFP_NET_CFG_VXLAN_PORT + i * sizeof(ti0.port),
2393			  be16_to_cpu(ti1.port) << 16 | be16_to_cpu(ti0.port));
2394	}
2395
2396	return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_VXLAN);
2397}
2398
2399static const struct udp_tunnel_nic_info nfp_udp_tunnels = {
2400	.sync_table     = nfp_udp_tunnel_sync,
2401	.flags          = UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
2402			  UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
2403	.tables         = {
2404		{
2405			.n_entries      = NFP_NET_N_VXLAN_PORTS,
2406			.tunnel_types   = UDP_TUNNEL_TYPE_VXLAN,
2407		},
2408	},
2409};
2410
2411/**
2412 * nfp_net_info() - Print general info about the NIC
2413 * @nn:      NFP Net device to reconfigure
2414 */
2415void nfp_net_info(struct nfp_net *nn)
2416{
2417	nn_info(nn, "NFP-6xxx %sNetdev: TxQs=%d/%d RxQs=%d/%d\n",
2418		nn->dp.is_vf ? "VF " : "",
2419		nn->dp.num_tx_rings, nn->max_tx_rings,
2420		nn->dp.num_rx_rings, nn->max_rx_rings);
2421	nn_info(nn, "VER: %d.%d.%d.%d, Maximum supported MTU: %d\n",
2422		nn->fw_ver.extend, nn->fw_ver.class,
2423		nn->fw_ver.major, nn->fw_ver.minor,
2424		nn->max_mtu);
2425	nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2426		nn->cap,
2427		nn->cap & NFP_NET_CFG_CTRL_PROMISC  ? "PROMISC "  : "",
2428		nn->cap & NFP_NET_CFG_CTRL_L2BC     ? "L2BCFILT " : "",
2429		nn->cap & NFP_NET_CFG_CTRL_L2MC     ? "L2MCFILT " : "",
2430		nn->cap & NFP_NET_CFG_CTRL_RXCSUM   ? "RXCSUM "   : "",
2431		nn->cap & NFP_NET_CFG_CTRL_TXCSUM   ? "TXCSUM "   : "",
2432		nn->cap & NFP_NET_CFG_CTRL_RXVLAN   ? "RXVLAN "   : "",
2433		nn->cap & NFP_NET_CFG_CTRL_TXVLAN   ? "TXVLAN "   : "",
2434		nn->cap & NFP_NET_CFG_CTRL_RXQINQ   ? "RXQINQ "   : "",
2435		nn->cap & NFP_NET_CFG_CTRL_RXVLAN_V2 ? "RXVLANv2 "   : "",
2436		nn->cap & NFP_NET_CFG_CTRL_TXVLAN_V2   ? "TXVLANv2 "   : "",
2437		nn->cap & NFP_NET_CFG_CTRL_SCATTER  ? "SCATTER "  : "",
2438		nn->cap & NFP_NET_CFG_CTRL_GATHER   ? "GATHER "   : "",
2439		nn->cap & NFP_NET_CFG_CTRL_LSO      ? "TSO1 "     : "",
2440		nn->cap & NFP_NET_CFG_CTRL_LSO2     ? "TSO2 "     : "",
2441		nn->cap & NFP_NET_CFG_CTRL_RSS      ? "RSS1 "     : "",
2442		nn->cap & NFP_NET_CFG_CTRL_RSS2     ? "RSS2 "     : "",
2443		nn->cap & NFP_NET_CFG_CTRL_CTAG_FILTER ? "CTAG_FILTER " : "",
2444		nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO ? "AUTOMASK " : "",
2445		nn->cap & NFP_NET_CFG_CTRL_IRQMOD   ? "IRQMOD "   : "",
2446		nn->cap & NFP_NET_CFG_CTRL_TXRWB    ? "TXRWB "    : "",
2447		nn->cap & NFP_NET_CFG_CTRL_VEPA     ? "VEPA "     : "",
2448		nn->cap & NFP_NET_CFG_CTRL_VXLAN    ? "VXLAN "    : "",
2449		nn->cap & NFP_NET_CFG_CTRL_NVGRE    ? "NVGRE "	  : "",
2450		nn->cap & NFP_NET_CFG_CTRL_CSUM_COMPLETE ?
2451						      "RXCSUM_COMPLETE " : "",
2452		nn->cap & NFP_NET_CFG_CTRL_LIVE_ADDR ? "LIVE_ADDR " : "",
2453		nn->cap_w1 & NFP_NET_CFG_CTRL_MCAST_FILTER ? "MULTICAST_FILTER " : "",
2454		nn->cap_w1 & NFP_NET_CFG_CTRL_USO ? "USO " : "",
2455		nfp_app_extra_cap(nn->app, nn));
2456}
2457
2458/**
2459 * nfp_net_alloc() - Allocate netdev and related structure
2460 * @pdev:         PCI device
2461 * @dev_info:     NFP ASIC params
2462 * @ctrl_bar:     PCI IOMEM with vNIC config memory
2463 * @needs_netdev: Whether to allocate a netdev for this vNIC
2464 * @max_tx_rings: Maximum number of TX rings supported by device
2465 * @max_rx_rings: Maximum number of RX rings supported by device
2466 *
2467 * This function allocates a netdev device and fills in the initial
2468 * part of the @struct nfp_net structure.  In case of control device
2469 * nfp_net structure is allocated without the netdev.
2470 *
2471 * Return: NFP Net device structure, or ERR_PTR on error.
2472 */
2473struct nfp_net *
2474nfp_net_alloc(struct pci_dev *pdev, const struct nfp_dev_info *dev_info,
2475	      void __iomem *ctrl_bar, bool needs_netdev,
2476	      unsigned int max_tx_rings, unsigned int max_rx_rings)
2477{
2478	u64 dma_mask = dma_get_mask(&pdev->dev);
2479	struct nfp_net *nn;
2480	int err;
2481
2482	if (needs_netdev) {
2483		struct net_device *netdev;
2484
2485		netdev = alloc_etherdev_mqs(sizeof(struct nfp_net),
2486					    max_tx_rings, max_rx_rings);
2487		if (!netdev)
2488			return ERR_PTR(-ENOMEM);
2489
2490		SET_NETDEV_DEV(netdev, &pdev->dev);
2491		nn = netdev_priv(netdev);
2492		nn->dp.netdev = netdev;
2493	} else {
2494		nn = vzalloc(sizeof(*nn));
2495		if (!nn)
2496			return ERR_PTR(-ENOMEM);
2497	}
2498
2499	nn->dp.dev = &pdev->dev;
2500	nn->dp.ctrl_bar = ctrl_bar;
2501	nn->dev_info = dev_info;
2502	nn->pdev = pdev;
2503	nfp_net_get_fw_version(&nn->fw_ver, ctrl_bar);
2504
2505	switch (FIELD_GET(NFP_NET_CFG_VERSION_DP_MASK, nn->fw_ver.extend)) {
2506	case NFP_NET_CFG_VERSION_DP_NFD3:
2507		nn->dp.ops = &nfp_nfd3_ops;
2508		break;
2509	case NFP_NET_CFG_VERSION_DP_NFDK:
2510		if (nn->fw_ver.major < 5) {
2511			dev_err(&pdev->dev,
2512				"NFDK must use ABI 5 or newer, found: %d\n",
2513				nn->fw_ver.major);
2514			err = -EINVAL;
2515			goto err_free_nn;
2516		}
2517		nn->dp.ops = &nfp_nfdk_ops;
2518		break;
2519	default:
2520		err = -EINVAL;
2521		goto err_free_nn;
2522	}
2523
2524	if ((dma_mask & nn->dp.ops->dma_mask) != dma_mask) {
2525		dev_err(&pdev->dev,
2526			"DMA mask of loaded firmware: %llx, required DMA mask: %llx\n",
2527			nn->dp.ops->dma_mask, dma_mask);
2528		err = -EINVAL;
2529		goto err_free_nn;
2530	}
2531
2532	nn->max_tx_rings = max_tx_rings;
2533	nn->max_rx_rings = max_rx_rings;
2534
2535	nn->dp.num_tx_rings = min_t(unsigned int,
2536				    max_tx_rings, num_online_cpus());
2537	nn->dp.num_rx_rings = min_t(unsigned int, max_rx_rings,
2538				 netif_get_num_default_rss_queues());
2539
2540	nn->dp.num_r_vecs = max(nn->dp.num_tx_rings, nn->dp.num_rx_rings);
2541	nn->dp.num_r_vecs = min_t(unsigned int,
2542				  nn->dp.num_r_vecs, num_online_cpus());
2543	nn->max_r_vecs = nn->dp.num_r_vecs;
2544
2545	nn->dp.xsk_pools = kcalloc(nn->max_r_vecs, sizeof(nn->dp.xsk_pools),
2546				   GFP_KERNEL);
2547	if (!nn->dp.xsk_pools) {
2548		err = -ENOMEM;
2549		goto err_free_nn;
2550	}
2551
2552	nn->dp.txd_cnt = NFP_NET_TX_DESCS_DEFAULT;
2553	nn->dp.rxd_cnt = NFP_NET_RX_DESCS_DEFAULT;
2554
2555	sema_init(&nn->bar_lock, 1);
2556
2557	spin_lock_init(&nn->reconfig_lock);
2558	spin_lock_init(&nn->link_status_lock);
2559
2560	timer_setup(&nn->reconfig_timer, nfp_net_reconfig_timer, 0);
2561
2562	err = nfp_net_tlv_caps_parse(&nn->pdev->dev, nn->dp.ctrl_bar,
2563				     &nn->tlv_caps);
2564	if (err)
2565		goto err_free_nn;
2566
2567	err = nfp_ccm_mbox_alloc(nn);
2568	if (err)
2569		goto err_free_nn;
2570
2571	return nn;
2572
2573err_free_nn:
2574	if (nn->dp.netdev)
2575		free_netdev(nn->dp.netdev);
2576	else
2577		vfree(nn);
2578	return ERR_PTR(err);
2579}
2580
2581/**
2582 * nfp_net_free() - Undo what @nfp_net_alloc() did
2583 * @nn:      NFP Net device to reconfigure
2584 */
2585void nfp_net_free(struct nfp_net *nn)
2586{
2587	WARN_ON(timer_pending(&nn->reconfig_timer) || nn->reconfig_posted);
2588	nfp_ccm_mbox_free(nn);
2589
2590	kfree(nn->dp.xsk_pools);
2591	if (nn->dp.netdev)
2592		free_netdev(nn->dp.netdev);
2593	else
2594		vfree(nn);
2595}
2596
2597/**
2598 * nfp_net_rss_key_sz() - Get current size of the RSS key
2599 * @nn:		NFP Net device instance
2600 *
2601 * Return: size of the RSS key for currently selected hash function.
2602 */
2603unsigned int nfp_net_rss_key_sz(struct nfp_net *nn)
2604{
2605	switch (nn->rss_hfunc) {
2606	case ETH_RSS_HASH_TOP:
2607		return NFP_NET_CFG_RSS_KEY_SZ;
2608	case ETH_RSS_HASH_XOR:
2609		return 0;
2610	case ETH_RSS_HASH_CRC32:
2611		return 4;
2612	}
2613
2614	nn_warn(nn, "Unknown hash function: %u\n", nn->rss_hfunc);
2615	return 0;
2616}
2617
2618/**
2619 * nfp_net_rss_init() - Set the initial RSS parameters
2620 * @nn:	     NFP Net device to reconfigure
2621 */
2622static void nfp_net_rss_init(struct nfp_net *nn)
2623{
2624	unsigned long func_bit, rss_cap_hfunc;
2625	u32 reg;
2626
2627	/* Read the RSS function capability and select first supported func */
2628	reg = nn_readl(nn, NFP_NET_CFG_RSS_CAP);
2629	rss_cap_hfunc =	FIELD_GET(NFP_NET_CFG_RSS_CAP_HFUNC, reg);
2630	if (!rss_cap_hfunc)
2631		rss_cap_hfunc =	FIELD_GET(NFP_NET_CFG_RSS_CAP_HFUNC,
2632					  NFP_NET_CFG_RSS_TOEPLITZ);
2633
2634	func_bit = find_first_bit(&rss_cap_hfunc, NFP_NET_CFG_RSS_HFUNCS);
2635	if (func_bit == NFP_NET_CFG_RSS_HFUNCS) {
2636		dev_warn(nn->dp.dev,
2637			 "Bad RSS config, defaulting to Toeplitz hash\n");
2638		func_bit = ETH_RSS_HASH_TOP_BIT;
2639	}
2640	nn->rss_hfunc = 1 << func_bit;
2641
2642	netdev_rss_key_fill(nn->rss_key, nfp_net_rss_key_sz(nn));
2643
2644	nfp_net_rss_init_itbl(nn);
2645
2646	/* Enable IPv4/IPv6 TCP by default */
2647	nn->rss_cfg = NFP_NET_CFG_RSS_IPV4_TCP |
2648		      NFP_NET_CFG_RSS_IPV6_TCP |
2649		      NFP_NET_CFG_RSS_IPV4_UDP |
2650		      NFP_NET_CFG_RSS_IPV6_UDP |
2651		      FIELD_PREP(NFP_NET_CFG_RSS_HFUNC, nn->rss_hfunc) |
2652		      NFP_NET_CFG_RSS_MASK;
2653}
2654
2655/**
2656 * nfp_net_irqmod_init() - Set the initial IRQ moderation parameters
2657 * @nn:	     NFP Net device to reconfigure
2658 */
2659static void nfp_net_irqmod_init(struct nfp_net *nn)
2660{
2661	nn->rx_coalesce_usecs      = 50;
2662	nn->rx_coalesce_max_frames = 64;
2663	nn->tx_coalesce_usecs      = 50;
2664	nn->tx_coalesce_max_frames = 64;
2665
2666	nn->rx_coalesce_adapt_on   = true;
2667	nn->tx_coalesce_adapt_on   = true;
2668}
2669
2670static void nfp_net_netdev_init(struct nfp_net *nn)
2671{
2672	struct net_device *netdev = nn->dp.netdev;
2673
2674	nfp_net_write_mac_addr(nn, nn->dp.netdev->dev_addr);
2675
2676	netdev->mtu = nn->dp.mtu;
2677
2678	/* Advertise/enable offloads based on capabilities
2679	 *
2680	 * Note: netdev->features show the currently enabled features
2681	 * and netdev->hw_features advertises which features are
2682	 * supported.  By default we enable most features.
2683	 */
2684	if (nn->cap & NFP_NET_CFG_CTRL_LIVE_ADDR)
2685		netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
2686
2687	netdev->hw_features = NETIF_F_HIGHDMA;
2688	if (nn->cap & NFP_NET_CFG_CTRL_RXCSUM_ANY) {
2689		netdev->hw_features |= NETIF_F_RXCSUM;
2690		nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_RXCSUM_ANY;
2691	}
2692	if (nn->cap & NFP_NET_CFG_CTRL_TXCSUM) {
2693		netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
2694		nn->dp.ctrl |= NFP_NET_CFG_CTRL_TXCSUM;
2695	}
2696	if (nn->cap & NFP_NET_CFG_CTRL_GATHER) {
2697		netdev->hw_features |= NETIF_F_SG;
2698		nn->dp.ctrl |= NFP_NET_CFG_CTRL_GATHER;
2699	}
2700	if ((nn->cap & NFP_NET_CFG_CTRL_LSO && nn->fw_ver.major > 2) ||
2701	    nn->cap & NFP_NET_CFG_CTRL_LSO2) {
2702		netdev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6;
2703		if (nn->cap_w1 & NFP_NET_CFG_CTRL_USO)
2704			netdev->hw_features |= NETIF_F_GSO_UDP_L4;
2705		nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_LSO2 ?:
2706					 NFP_NET_CFG_CTRL_LSO;
2707	}
2708	if (nn->cap & NFP_NET_CFG_CTRL_RSS_ANY)
2709		netdev->hw_features |= NETIF_F_RXHASH;
2710
2711#ifdef CONFIG_NFP_NET_IPSEC
2712	if (nn->cap_w1 & NFP_NET_CFG_CTRL_IPSEC)
2713		netdev->hw_features |= NETIF_F_HW_ESP | NETIF_F_HW_ESP_TX_CSUM;
2714#endif
2715
2716	if (nn->cap & NFP_NET_CFG_CTRL_VXLAN) {
2717		if (nn->cap & NFP_NET_CFG_CTRL_LSO) {
2718			netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL |
2719					       NETIF_F_GSO_UDP_TUNNEL_CSUM |
2720					       NETIF_F_GSO_PARTIAL;
2721			netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM;
2722		}
2723		netdev->udp_tunnel_nic_info = &nfp_udp_tunnels;
2724		nn->dp.ctrl |= NFP_NET_CFG_CTRL_VXLAN;
2725	}
2726	if (nn->cap & NFP_NET_CFG_CTRL_NVGRE) {
2727		if (nn->cap & NFP_NET_CFG_CTRL_LSO)
2728			netdev->hw_features |= NETIF_F_GSO_GRE;
2729		nn->dp.ctrl |= NFP_NET_CFG_CTRL_NVGRE;
2730	}
2731	if (nn->cap & (NFP_NET_CFG_CTRL_VXLAN | NFP_NET_CFG_CTRL_NVGRE))
2732		netdev->hw_enc_features = netdev->hw_features;
2733
2734	netdev->vlan_features = netdev->hw_features;
2735
2736	if (nn->cap & NFP_NET_CFG_CTRL_RXVLAN_ANY) {
2737		netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
2738		nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_RXVLAN_V2 ?:
2739			       NFP_NET_CFG_CTRL_RXVLAN;
2740	}
2741	if (nn->cap & NFP_NET_CFG_CTRL_TXVLAN_ANY) {
2742		if (nn->cap & NFP_NET_CFG_CTRL_LSO2) {
2743			nn_warn(nn, "Device advertises both TSO2 and TXVLAN. Refusing to enable TXVLAN.\n");
2744		} else {
2745			netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX;
2746			nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_TXVLAN_V2 ?:
2747				       NFP_NET_CFG_CTRL_TXVLAN;
2748		}
2749	}
2750	if (nn->cap & NFP_NET_CFG_CTRL_CTAG_FILTER) {
2751		netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
2752		nn->dp.ctrl |= NFP_NET_CFG_CTRL_CTAG_FILTER;
2753	}
2754	if (nn->cap & NFP_NET_CFG_CTRL_RXQINQ) {
2755		netdev->hw_features |= NETIF_F_HW_VLAN_STAG_RX;
2756		nn->dp.ctrl |= NFP_NET_CFG_CTRL_RXQINQ;
2757	}
2758
2759	netdev->features = netdev->hw_features;
2760
2761	if (nfp_app_has_tc(nn->app) && nn->port)
2762		netdev->hw_features |= NETIF_F_HW_TC;
2763
2764	/* C-Tag strip and S-Tag strip can't be supported simultaneously,
2765	 * so enable C-Tag strip and disable S-Tag strip by default.
2766	 */
2767	netdev->features &= ~NETIF_F_HW_VLAN_STAG_RX;
2768	nn->dp.ctrl &= ~NFP_NET_CFG_CTRL_RXQINQ;
2769
2770	netdev->xdp_features = NETDEV_XDP_ACT_BASIC;
2771	if (nn->app && nn->app->type->id == NFP_APP_BPF_NIC)
2772		netdev->xdp_features |= NETDEV_XDP_ACT_HW_OFFLOAD;
2773
2774	/* Finalise the netdev setup */
2775	switch (nn->dp.ops->version) {
2776	case NFP_NFD_VER_NFD3:
2777		netdev->netdev_ops = &nfp_nfd3_netdev_ops;
2778		netdev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
2779		netdev->xdp_features |= NETDEV_XDP_ACT_REDIRECT;
2780		break;
2781	case NFP_NFD_VER_NFDK:
2782		netdev->netdev_ops = &nfp_nfdk_netdev_ops;
2783		break;
2784	}
2785
2786	netdev->watchdog_timeo = msecs_to_jiffies(5 * 1000);
2787
2788	/* MTU range: 68 - hw-specific max */
2789	netdev->min_mtu = ETH_MIN_MTU;
2790	netdev->max_mtu = nn->max_mtu;
2791
2792	netif_set_tso_max_segs(netdev, NFP_NET_LSO_MAX_SEGS);
2793
2794	netif_carrier_off(netdev);
2795
2796	nfp_net_set_ethtool_ops(netdev);
2797}
2798
2799static int nfp_net_read_caps(struct nfp_net *nn)
2800{
2801	/* Get some of the read-only fields from the BAR */
2802	nn->cap = nn_readl(nn, NFP_NET_CFG_CAP);
2803	nn->cap_w1 = nn_readl(nn, NFP_NET_CFG_CAP_WORD1);
2804	nn->max_mtu = nn_readl(nn, NFP_NET_CFG_MAX_MTU);
2805
2806	/* ABI 4.x and ctrl vNIC always use chained metadata, in other cases
2807	 * we allow use of non-chained metadata if RSS(v1) is the only
2808	 * advertised capability requiring metadata.
2809	 */
2810	nn->dp.chained_metadata_format = nn->fw_ver.major == 4 ||
2811					 !nn->dp.netdev ||
2812					 !(nn->cap & NFP_NET_CFG_CTRL_RSS) ||
2813					 nn->cap & NFP_NET_CFG_CTRL_CHAIN_META;
2814	/* RSS(v1) uses non-chained metadata format, except in ABI 4.x where
2815	 * it has the same meaning as RSSv2.
2816	 */
2817	if (nn->dp.chained_metadata_format && nn->fw_ver.major != 4)
2818		nn->cap &= ~NFP_NET_CFG_CTRL_RSS;
2819
2820	/* Determine RX packet/metadata boundary offset */
2821	if (nn->fw_ver.major >= 2) {
2822		u32 reg;
2823
2824		reg = nn_readl(nn, NFP_NET_CFG_RX_OFFSET);
2825		if (reg > NFP_NET_MAX_PREPEND) {
2826			nn_err(nn, "Invalid rx offset: %d\n", reg);
2827			return -EINVAL;
2828		}
2829		nn->dp.rx_offset = reg;
2830	} else {
2831		nn->dp.rx_offset = NFP_NET_RX_OFFSET;
2832	}
2833
2834	/* Mask out NFD-version-specific features */
2835	nn->cap &= nn->dp.ops->cap_mask;
2836
2837	/* For control vNICs mask out the capabilities app doesn't want. */
2838	if (!nn->dp.netdev)
2839		nn->cap &= nn->app->type->ctrl_cap_mask;
2840
2841	return 0;
2842}
2843
2844/**
2845 * nfp_net_init() - Initialise/finalise the nfp_net structure
2846 * @nn:		NFP Net device structure
2847 *
2848 * Return: 0 on success or negative errno on error.
2849 */
2850int nfp_net_init(struct nfp_net *nn)
2851{
2852	int err;
2853
2854	nn->dp.rx_dma_dir = DMA_FROM_DEVICE;
2855
2856	err = nfp_net_read_caps(nn);
2857	if (err)
2858		return err;
2859
2860	/* Set default MTU and Freelist buffer size */
2861	if (!nfp_net_is_data_vnic(nn) && nn->app->ctrl_mtu) {
2862		nn->dp.mtu = min(nn->app->ctrl_mtu, nn->max_mtu);
2863	} else if (nn->max_mtu < NFP_NET_DEFAULT_MTU) {
2864		nn->dp.mtu = nn->max_mtu;
2865	} else {
2866		nn->dp.mtu = NFP_NET_DEFAULT_MTU;
2867	}
2868	nn->dp.fl_bufsz = nfp_net_calc_fl_bufsz(&nn->dp);
2869
2870	if (nfp_app_ctrl_uses_data_vnics(nn->app))
2871		nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_CMSG_DATA;
2872
2873	if (nn->cap & NFP_NET_CFG_CTRL_RSS_ANY) {
2874		nfp_net_rss_init(nn);
2875		nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_RSS2 ?:
2876					 NFP_NET_CFG_CTRL_RSS;
2877	}
2878
2879	/* Allow L2 Broadcast and Multicast through by default, if supported */
2880	if (nn->cap & NFP_NET_CFG_CTRL_L2BC)
2881		nn->dp.ctrl |= NFP_NET_CFG_CTRL_L2BC;
2882
2883	/* Allow IRQ moderation, if supported */
2884	if (nn->cap & NFP_NET_CFG_CTRL_IRQMOD) {
2885		nfp_net_irqmod_init(nn);
2886		nn->dp.ctrl |= NFP_NET_CFG_CTRL_IRQMOD;
2887	}
2888
2889	/* Enable TX pointer writeback, if supported */
2890	if (nn->cap & NFP_NET_CFG_CTRL_TXRWB)
2891		nn->dp.ctrl |= NFP_NET_CFG_CTRL_TXRWB;
2892
2893	if (nn->cap_w1 & NFP_NET_CFG_CTRL_MCAST_FILTER)
2894		nn->dp.ctrl_w1 |= NFP_NET_CFG_CTRL_MCAST_FILTER;
2895
2896	/* Stash the re-configuration queue away.  First odd queue in TX Bar */
2897	nn->qcp_cfg = nn->tx_bar + NFP_QCP_QUEUE_ADDR_SZ;
2898
2899	/* Make sure the FW knows the netdev is supposed to be disabled here */
2900	nn_writel(nn, NFP_NET_CFG_CTRL, 0);
2901	nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, 0);
2902	nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, 0);
2903	nn_writel(nn, NFP_NET_CFG_CTRL_WORD1, 0);
2904	err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_RING |
2905				   NFP_NET_CFG_UPDATE_GEN);
2906	if (err)
2907		return err;
2908
2909	if (nn->dp.netdev) {
2910		nfp_net_netdev_init(nn);
2911
2912		err = nfp_ccm_mbox_init(nn);
2913		if (err)
2914			return err;
2915
2916		err = nfp_net_tls_init(nn);
2917		if (err)
2918			goto err_clean_mbox;
2919
2920		nfp_net_ipsec_init(nn);
2921	}
2922
2923	nfp_net_vecs_init(nn);
2924
2925	if (!nn->dp.netdev)
2926		return 0;
2927
2928	spin_lock_init(&nn->mbox_amsg.lock);
2929	INIT_LIST_HEAD(&nn->mbox_amsg.list);
2930	INIT_WORK(&nn->mbox_amsg.work, nfp_net_mbox_amsg_work);
2931
2932	INIT_LIST_HEAD(&nn->fs.list);
2933
2934	return register_netdev(nn->dp.netdev);
2935
2936err_clean_mbox:
2937	nfp_ccm_mbox_clean(nn);
2938	return err;
2939}
2940
2941/**
2942 * nfp_net_clean() - Undo what nfp_net_init() did.
2943 * @nn:		NFP Net device structure
2944 */
2945void nfp_net_clean(struct nfp_net *nn)
2946{
2947	if (!nn->dp.netdev)
2948		return;
2949
2950	unregister_netdev(nn->dp.netdev);
2951	nfp_net_ipsec_clean(nn);
2952	nfp_ccm_mbox_clean(nn);
2953	nfp_net_fs_clean(nn);
2954	flush_work(&nn->mbox_amsg.work);
2955	nfp_net_reconfig_wait_posted(nn);
2956}
   1// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
   2/* Copyright (C) 2015-2018 Netronome Systems, Inc. */
   3
   4/*
   5 * nfp_net_common.c
   6 * Netronome network device driver: Common functions between PF and VF
   7 * Authors: Jakub Kicinski <jakub.kicinski@netronome.com>
   8 *          Jason McMullan <jason.mcmullan@netronome.com>
   9 *          Rolf Neugebauer <rolf.neugebauer@netronome.com>
  10 *          Brad Petrus <brad.petrus@netronome.com>
  11 *          Chris Telfer <chris.telfer@netronome.com>
  12 */
  13
  14#include <linux/bitfield.h>
  15#include <linux/bpf.h>
  16#include <linux/bpf_trace.h>
  17#include <linux/module.h>
  18#include <linux/kernel.h>
  19#include <linux/init.h>
  20#include <linux/fs.h>
  21#include <linux/netdevice.h>
  22#include <linux/etherdevice.h>
  23#include <linux/interrupt.h>
  24#include <linux/ip.h>
  25#include <linux/ipv6.h>
  26#include <linux/mm.h>
  27#include <linux/overflow.h>
  28#include <linux/page_ref.h>
  29#include <linux/pci.h>
  30#include <linux/pci_regs.h>
  31#include <linux/msi.h>
  32#include <linux/ethtool.h>
  33#include <linux/log2.h>
  34#include <linux/if_vlan.h>
 
  35#include <linux/random.h>
  36#include <linux/vmalloc.h>
  37#include <linux/ktime.h>
  38
  39#include <net/tls.h>
  40#include <net/vxlan.h>
 
 
  41
 
  42#include "nfpcore/nfp_nsp.h"
  43#include "ccm.h"
  44#include "nfp_app.h"
  45#include "nfp_net_ctrl.h"
  46#include "nfp_net.h"
 
  47#include "nfp_net_sriov.h"
 
  48#include "nfp_port.h"
  49#include "crypto/crypto.h"
  50#include "crypto/fw.h"
  51
 
 
  52/**
  53 * nfp_net_get_fw_version() - Read and parse the FW version
  54 * @fw_ver:	Output fw_version structure to read to
  55 * @ctrl_bar:	Mapped address of the control BAR
  56 */
  57void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver,
  58			    void __iomem *ctrl_bar)
  59{
  60	u32 reg;
  61
  62	reg = readl(ctrl_bar + NFP_NET_CFG_VERSION);
  63	put_unaligned_le32(reg, fw_ver);
  64}
  65
  66static dma_addr_t nfp_net_dma_map_rx(struct nfp_net_dp *dp, void *frag)
  67{
  68	return dma_map_single_attrs(dp->dev, frag + NFP_NET_RX_BUF_HEADROOM,
  69				    dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA,
  70				    dp->rx_dma_dir, DMA_ATTR_SKIP_CPU_SYNC);
  71}
  72
  73static void
  74nfp_net_dma_sync_dev_rx(const struct nfp_net_dp *dp, dma_addr_t dma_addr)
  75{
  76	dma_sync_single_for_device(dp->dev, dma_addr,
  77				   dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA,
  78				   dp->rx_dma_dir);
  79}
  80
  81static void nfp_net_dma_unmap_rx(struct nfp_net_dp *dp, dma_addr_t dma_addr)
  82{
  83	dma_unmap_single_attrs(dp->dev, dma_addr,
  84			       dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA,
  85			       dp->rx_dma_dir, DMA_ATTR_SKIP_CPU_SYNC);
  86}
  87
  88static void nfp_net_dma_sync_cpu_rx(struct nfp_net_dp *dp, dma_addr_t dma_addr,
  89				    unsigned int len)
  90{
  91	dma_sync_single_for_cpu(dp->dev, dma_addr - NFP_NET_RX_BUF_HEADROOM,
  92				len, dp->rx_dma_dir);
  93}
  94
  95/* Firmware reconfig
  96 *
  97 * Firmware reconfig may take a while so we have two versions of it -
  98 * synchronous and asynchronous (posted).  All synchronous callers are holding
  99 * RTNL so we don't have to worry about serializing them.
 100 */
 101static void nfp_net_reconfig_start(struct nfp_net *nn, u32 update)
 102{
 103	nn_writel(nn, NFP_NET_CFG_UPDATE, update);
 104	/* ensure update is written before pinging HW */
 105	nn_pci_flush(nn);
 106	nfp_qcp_wr_ptr_add(nn->qcp_cfg, 1);
 107	nn->reconfig_in_progress_update = update;
 108}
 109
 110/* Pass 0 as update to run posted reconfigs. */
 111static void nfp_net_reconfig_start_async(struct nfp_net *nn, u32 update)
 112{
 113	update |= nn->reconfig_posted;
 114	nn->reconfig_posted = 0;
 115
 116	nfp_net_reconfig_start(nn, update);
 117
 118	nn->reconfig_timer_active = true;
 119	mod_timer(&nn->reconfig_timer, jiffies + NFP_NET_POLL_TIMEOUT * HZ);
 120}
 121
 122static bool nfp_net_reconfig_check_done(struct nfp_net *nn, bool last_check)
 123{
 124	u32 reg;
 125
 126	reg = nn_readl(nn, NFP_NET_CFG_UPDATE);
 127	if (reg == 0)
 128		return true;
 129	if (reg & NFP_NET_CFG_UPDATE_ERR) {
 130		nn_err(nn, "Reconfig error (status: 0x%08x update: 0x%08x ctrl: 0x%08x)\n",
 131		       reg, nn->reconfig_in_progress_update,
 132		       nn_readl(nn, NFP_NET_CFG_CTRL));
 133		return true;
 134	} else if (last_check) {
 135		nn_err(nn, "Reconfig timeout (status: 0x%08x update: 0x%08x ctrl: 0x%08x)\n",
 136		       reg, nn->reconfig_in_progress_update,
 137		       nn_readl(nn, NFP_NET_CFG_CTRL));
 138		return true;
 139	}
 140
 141	return false;
 142}
 143
 144static bool __nfp_net_reconfig_wait(struct nfp_net *nn, unsigned long deadline)
 145{
 146	bool timed_out = false;
 147	int i;
 148
 149	/* Poll update field, waiting for NFP to ack the config.
 150	 * Do an opportunistic wait-busy loop, afterward sleep.
 151	 */
 152	for (i = 0; i < 50; i++) {
 153		if (nfp_net_reconfig_check_done(nn, false))
 154			return false;
 155		udelay(4);
 156	}
 157
 158	while (!nfp_net_reconfig_check_done(nn, timed_out)) {
 159		usleep_range(250, 500);
 160		timed_out = time_is_before_eq_jiffies(deadline);
 161	}
 162
 163	return timed_out;
 164}
 165
 166static int nfp_net_reconfig_wait(struct nfp_net *nn, unsigned long deadline)
 167{
 168	if (__nfp_net_reconfig_wait(nn, deadline))
 169		return -EIO;
 170
 171	if (nn_readl(nn, NFP_NET_CFG_UPDATE) & NFP_NET_CFG_UPDATE_ERR)
 172		return -EIO;
 173
 174	return 0;
 175}
 176
 177static void nfp_net_reconfig_timer(struct timer_list *t)
 178{
 179	struct nfp_net *nn = from_timer(nn, t, reconfig_timer);
 180
 181	spin_lock_bh(&nn->reconfig_lock);
 182
 183	nn->reconfig_timer_active = false;
 184
 185	/* If sync caller is present it will take over from us */
 186	if (nn->reconfig_sync_present)
 187		goto done;
 188
 189	/* Read reconfig status and report errors */
 190	nfp_net_reconfig_check_done(nn, true);
 191
 192	if (nn->reconfig_posted)
 193		nfp_net_reconfig_start_async(nn, 0);
 194done:
 195	spin_unlock_bh(&nn->reconfig_lock);
 196}
 197
 198/**
 199 * nfp_net_reconfig_post() - Post async reconfig request
 200 * @nn:      NFP Net device to reconfigure
 201 * @update:  The value for the update field in the BAR config
 202 *
 203 * Record FW reconfiguration request.  Reconfiguration will be kicked off
 204 * whenever reconfiguration machinery is idle.  Multiple requests can be
 205 * merged together!
 206 */
 207static void nfp_net_reconfig_post(struct nfp_net *nn, u32 update)
 208{
 209	spin_lock_bh(&nn->reconfig_lock);
 210
 211	/* Sync caller will kick off async reconf when it's done, just post */
 212	if (nn->reconfig_sync_present) {
 213		nn->reconfig_posted |= update;
 214		goto done;
 215	}
 216
 217	/* Opportunistically check if the previous command is done */
 218	if (!nn->reconfig_timer_active ||
 219	    nfp_net_reconfig_check_done(nn, false))
 220		nfp_net_reconfig_start_async(nn, update);
 221	else
 222		nn->reconfig_posted |= update;
 223done:
 224	spin_unlock_bh(&nn->reconfig_lock);
 225}
 226
 227static void nfp_net_reconfig_sync_enter(struct nfp_net *nn)
 228{
 229	bool cancelled_timer = false;
 230	u32 pre_posted_requests;
 231
 232	spin_lock_bh(&nn->reconfig_lock);
 233
 234	WARN_ON(nn->reconfig_sync_present);
 235	nn->reconfig_sync_present = true;
 236
 237	if (nn->reconfig_timer_active) {
 238		nn->reconfig_timer_active = false;
 239		cancelled_timer = true;
 240	}
 241	pre_posted_requests = nn->reconfig_posted;
 242	nn->reconfig_posted = 0;
 243
 244	spin_unlock_bh(&nn->reconfig_lock);
 245
 246	if (cancelled_timer) {
 247		del_timer_sync(&nn->reconfig_timer);
 248		nfp_net_reconfig_wait(nn, nn->reconfig_timer.expires);
 249	}
 250
 251	/* Run the posted reconfigs which were issued before we started */
 252	if (pre_posted_requests) {
 253		nfp_net_reconfig_start(nn, pre_posted_requests);
 254		nfp_net_reconfig_wait(nn, jiffies + HZ * NFP_NET_POLL_TIMEOUT);
 255	}
 256}
 257
 258static void nfp_net_reconfig_wait_posted(struct nfp_net *nn)
 259{
 260	nfp_net_reconfig_sync_enter(nn);
 261
 262	spin_lock_bh(&nn->reconfig_lock);
 263	nn->reconfig_sync_present = false;
 264	spin_unlock_bh(&nn->reconfig_lock);
 265}
 266
 267/**
 268 * __nfp_net_reconfig() - Reconfigure the firmware
 269 * @nn:      NFP Net device to reconfigure
 270 * @update:  The value for the update field in the BAR config
 271 *
 272 * Write the update word to the BAR and ping the reconfig queue.  The
 273 * poll until the firmware has acknowledged the update by zeroing the
 274 * update word.
 275 *
 276 * Return: Negative errno on error, 0 on success
 277 */
 278int __nfp_net_reconfig(struct nfp_net *nn, u32 update)
 279{
 280	int ret;
 281
 282	nfp_net_reconfig_sync_enter(nn);
 283
 284	nfp_net_reconfig_start(nn, update);
 285	ret = nfp_net_reconfig_wait(nn, jiffies + HZ * NFP_NET_POLL_TIMEOUT);
 286
 287	spin_lock_bh(&nn->reconfig_lock);
 288
 289	if (nn->reconfig_posted)
 290		nfp_net_reconfig_start_async(nn, 0);
 291
 292	nn->reconfig_sync_present = false;
 293
 294	spin_unlock_bh(&nn->reconfig_lock);
 295
 296	return ret;
 297}
 298
 299int nfp_net_reconfig(struct nfp_net *nn, u32 update)
 300{
 301	int ret;
 302
 303	nn_ctrl_bar_lock(nn);
 304	ret = __nfp_net_reconfig(nn, update);
 305	nn_ctrl_bar_unlock(nn);
 306
 307	return ret;
 308}
 309
 310int nfp_net_mbox_lock(struct nfp_net *nn, unsigned int data_size)
 311{
 312	if (nn->tlv_caps.mbox_len < NFP_NET_CFG_MBOX_SIMPLE_VAL + data_size) {
 313		nn_err(nn, "mailbox too small for %u of data (%u)\n",
 314		       data_size, nn->tlv_caps.mbox_len);
 315		return -EIO;
 316	}
 317
 318	nn_ctrl_bar_lock(nn);
 319	return 0;
 320}
 321
 322/**
 323 * nfp_net_mbox_reconfig() - Reconfigure the firmware via the mailbox
 324 * @nn:        NFP Net device to reconfigure
 325 * @mbox_cmd:  The value for the mailbox command
 326 *
 327 * Helper function for mailbox updates
 328 *
 329 * Return: Negative errno on error, 0 on success
 330 */
 331int nfp_net_mbox_reconfig(struct nfp_net *nn, u32 mbox_cmd)
 332{
 333	u32 mbox = nn->tlv_caps.mbox_off;
 334	int ret;
 335
 336	nn_writeq(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_CMD, mbox_cmd);
 337
 338	ret = __nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_MBOX);
 339	if (ret) {
 340		nn_err(nn, "Mailbox update error\n");
 341		return ret;
 342	}
 343
 344	return -nn_readl(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_RET);
 345}
 346
 347void nfp_net_mbox_reconfig_post(struct nfp_net *nn, u32 mbox_cmd)
 348{
 349	u32 mbox = nn->tlv_caps.mbox_off;
 350
 351	nn_writeq(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_CMD, mbox_cmd);
 352
 353	nfp_net_reconfig_post(nn, NFP_NET_CFG_UPDATE_MBOX);
 354}
 355
 356int nfp_net_mbox_reconfig_wait_posted(struct nfp_net *nn)
 357{
 358	u32 mbox = nn->tlv_caps.mbox_off;
 359
 360	nfp_net_reconfig_wait_posted(nn);
 361
 362	return -nn_readl(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_RET);
 363}
 364
 365int nfp_net_mbox_reconfig_and_unlock(struct nfp_net *nn, u32 mbox_cmd)
 366{
 367	int ret;
 368
 369	ret = nfp_net_mbox_reconfig(nn, mbox_cmd);
 370	nn_ctrl_bar_unlock(nn);
 371	return ret;
 372}
 373
 374/* Interrupt configuration and handling
 375 */
 376
 377/**
 378 * nfp_net_irq_unmask() - Unmask automasked interrupt
 379 * @nn:       NFP Network structure
 380 * @entry_nr: MSI-X table entry
 381 *
 382 * Clear the ICR for the IRQ entry.
 383 */
 384static void nfp_net_irq_unmask(struct nfp_net *nn, unsigned int entry_nr)
 385{
 386	nn_writeb(nn, NFP_NET_CFG_ICR(entry_nr), NFP_NET_CFG_ICR_UNMASKED);
 387	nn_pci_flush(nn);
 388}
 389
 390/**
 391 * nfp_net_irqs_alloc() - allocates MSI-X irqs
 392 * @pdev:        PCI device structure
 393 * @irq_entries: Array to be initialized and used to hold the irq entries
 394 * @min_irqs:    Minimal acceptable number of interrupts
 395 * @wanted_irqs: Target number of interrupts to allocate
 396 *
 397 * Return: Number of irqs obtained or 0 on error.
 398 */
 399unsigned int
 400nfp_net_irqs_alloc(struct pci_dev *pdev, struct msix_entry *irq_entries,
 401		   unsigned int min_irqs, unsigned int wanted_irqs)
 402{
 403	unsigned int i;
 404	int got_irqs;
 405
 406	for (i = 0; i < wanted_irqs; i++)
 407		irq_entries[i].entry = i;
 408
 409	got_irqs = pci_enable_msix_range(pdev, irq_entries,
 410					 min_irqs, wanted_irqs);
 411	if (got_irqs < 0) {
 412		dev_err(&pdev->dev, "Failed to enable %d-%d MSI-X (err=%d)\n",
 413			min_irqs, wanted_irqs, got_irqs);
 414		return 0;
 415	}
 416
 417	if (got_irqs < wanted_irqs)
 418		dev_warn(&pdev->dev, "Unable to allocate %d IRQs got only %d\n",
 419			 wanted_irqs, got_irqs);
 420
 421	return got_irqs;
 422}
 423
 424/**
 425 * nfp_net_irqs_assign() - Assign interrupts allocated externally to netdev
 426 * @nn:		 NFP Network structure
 427 * @irq_entries: Table of allocated interrupts
 428 * @n:		 Size of @irq_entries (number of entries to grab)
 429 *
 430 * After interrupts are allocated with nfp_net_irqs_alloc() this function
 431 * should be called to assign them to a specific netdev (port).
 432 */
 433void
 434nfp_net_irqs_assign(struct nfp_net *nn, struct msix_entry *irq_entries,
 435		    unsigned int n)
 436{
 437	struct nfp_net_dp *dp = &nn->dp;
 438
 439	nn->max_r_vecs = n - NFP_NET_NON_Q_VECTORS;
 440	dp->num_r_vecs = nn->max_r_vecs;
 441
 442	memcpy(nn->irq_entries, irq_entries, sizeof(*irq_entries) * n);
 443
 444	if (dp->num_rx_rings > dp->num_r_vecs ||
 445	    dp->num_tx_rings > dp->num_r_vecs)
 446		dev_warn(nn->dp.dev, "More rings (%d,%d) than vectors (%d).\n",
 447			 dp->num_rx_rings, dp->num_tx_rings,
 448			 dp->num_r_vecs);
 449
 450	dp->num_rx_rings = min(dp->num_r_vecs, dp->num_rx_rings);
 451	dp->num_tx_rings = min(dp->num_r_vecs, dp->num_tx_rings);
 452	dp->num_stack_tx_rings = dp->num_tx_rings;
 453}
 454
 455/**
 456 * nfp_net_irqs_disable() - Disable interrupts
 457 * @pdev:        PCI device structure
 458 *
 459 * Undoes what @nfp_net_irqs_alloc() does.
 460 */
 461void nfp_net_irqs_disable(struct pci_dev *pdev)
 462{
 463	pci_disable_msix(pdev);
 464}
 465
 466/**
 467 * nfp_net_irq_rxtx() - Interrupt service routine for RX/TX rings.
 468 * @irq:      Interrupt
 469 * @data:     Opaque data structure
 470 *
 471 * Return: Indicate if the interrupt has been handled.
 472 */
 473static irqreturn_t nfp_net_irq_rxtx(int irq, void *data)
 474{
 475	struct nfp_net_r_vector *r_vec = data;
 476
 
 
 
 
 
 
 477	napi_schedule_irqoff(&r_vec->napi);
 478
 479	/* The FW auto-masks any interrupt, either via the MASK bit in
 480	 * the MSI-X table or via the per entry ICR field.  So there
 481	 * is no need to disable interrupts here.
 482	 */
 483	return IRQ_HANDLED;
 484}
 485
 486static irqreturn_t nfp_ctrl_irq_rxtx(int irq, void *data)
 487{
 488	struct nfp_net_r_vector *r_vec = data;
 489
 490	tasklet_schedule(&r_vec->tasklet);
 491
 492	return IRQ_HANDLED;
 493}
 494
 495/**
 496 * nfp_net_read_link_status() - Reread link status from control BAR
 497 * @nn:       NFP Network structure
 498 */
 499static void nfp_net_read_link_status(struct nfp_net *nn)
 500{
 501	unsigned long flags;
 502	bool link_up;
 503	u32 sts;
 504
 505	spin_lock_irqsave(&nn->link_status_lock, flags);
 506
 507	sts = nn_readl(nn, NFP_NET_CFG_STS);
 508	link_up = !!(sts & NFP_NET_CFG_STS_LINK);
 509
 510	if (nn->link_up == link_up)
 511		goto out;
 512
 513	nn->link_up = link_up;
 514	if (nn->port)
 515		set_bit(NFP_PORT_CHANGED, &nn->port->flags);
 
 
 
 516
 517	if (nn->link_up) {
 518		netif_carrier_on(nn->dp.netdev);
 519		netdev_info(nn->dp.netdev, "NIC Link is Up\n");
 520	} else {
 521		netif_carrier_off(nn->dp.netdev);
 522		netdev_info(nn->dp.netdev, "NIC Link is Down\n");
 523	}
 524out:
 525	spin_unlock_irqrestore(&nn->link_status_lock, flags);
 526}
 527
 528/**
 529 * nfp_net_irq_lsc() - Interrupt service routine for link state changes
 530 * @irq:      Interrupt
 531 * @data:     Opaque data structure
 532 *
 533 * Return: Indicate if the interrupt has been handled.
 534 */
 535static irqreturn_t nfp_net_irq_lsc(int irq, void *data)
 536{
 537	struct nfp_net *nn = data;
 538	struct msix_entry *entry;
 539
 540	entry = &nn->irq_entries[NFP_NET_IRQ_LSC_IDX];
 541
 542	nfp_net_read_link_status(nn);
 543
 544	nfp_net_irq_unmask(nn, entry->entry);
 545
 546	return IRQ_HANDLED;
 547}
 548
 549/**
 550 * nfp_net_irq_exn() - Interrupt service routine for exceptions
 551 * @irq:      Interrupt
 552 * @data:     Opaque data structure
 553 *
 554 * Return: Indicate if the interrupt has been handled.
 555 */
 556static irqreturn_t nfp_net_irq_exn(int irq, void *data)
 557{
 558	struct nfp_net *nn = data;
 559
 560	nn_err(nn, "%s: UNIMPLEMENTED.\n", __func__);
 561	/* XXX TO BE IMPLEMENTED */
 562	return IRQ_HANDLED;
 563}
 564
 565/**
 566 * nfp_net_tx_ring_init() - Fill in the boilerplate for a TX ring
 567 * @tx_ring:  TX ring structure
 568 * @r_vec:    IRQ vector servicing this ring
 569 * @idx:      Ring index
 570 * @is_xdp:   Is this an XDP TX ring?
 571 */
 572static void
 573nfp_net_tx_ring_init(struct nfp_net_tx_ring *tx_ring,
 574		     struct nfp_net_r_vector *r_vec, unsigned int idx,
 575		     bool is_xdp)
 576{
 577	struct nfp_net *nn = r_vec->nfp_net;
 578
 579	tx_ring->idx = idx;
 580	tx_ring->r_vec = r_vec;
 581	tx_ring->is_xdp = is_xdp;
 582	u64_stats_init(&tx_ring->r_vec->tx_sync);
 583
 584	tx_ring->qcidx = tx_ring->idx * nn->stride_tx;
 585	tx_ring->qcp_q = nn->tx_bar + NFP_QCP_QUEUE_OFF(tx_ring->qcidx);
 586}
 587
 588/**
 589 * nfp_net_rx_ring_init() - Fill in the boilerplate for a RX ring
 590 * @rx_ring:  RX ring structure
 591 * @r_vec:    IRQ vector servicing this ring
 592 * @idx:      Ring index
 593 */
 594static void
 595nfp_net_rx_ring_init(struct nfp_net_rx_ring *rx_ring,
 596		     struct nfp_net_r_vector *r_vec, unsigned int idx)
 597{
 598	struct nfp_net *nn = r_vec->nfp_net;
 599
 600	rx_ring->idx = idx;
 601	rx_ring->r_vec = r_vec;
 602	u64_stats_init(&rx_ring->r_vec->rx_sync);
 603
 604	rx_ring->fl_qcidx = rx_ring->idx * nn->stride_rx;
 605	rx_ring->qcp_fl = nn->rx_bar + NFP_QCP_QUEUE_OFF(rx_ring->fl_qcidx);
 606}
 607
 608/**
 609 * nfp_net_aux_irq_request() - Request an auxiliary interrupt (LSC or EXN)
 610 * @nn:		NFP Network structure
 611 * @ctrl_offset: Control BAR offset where IRQ configuration should be written
 612 * @format:	printf-style format to construct the interrupt name
 613 * @name:	Pointer to allocated space for interrupt name
 614 * @name_sz:	Size of space for interrupt name
 615 * @vector_idx:	Index of MSI-X vector used for this interrupt
 616 * @handler:	IRQ handler to register for this interrupt
 617 */
 618static int
 619nfp_net_aux_irq_request(struct nfp_net *nn, u32 ctrl_offset,
 620			const char *format, char *name, size_t name_sz,
 621			unsigned int vector_idx, irq_handler_t handler)
 622{
 623	struct msix_entry *entry;
 624	int err;
 625
 626	entry = &nn->irq_entries[vector_idx];
 627
 628	snprintf(name, name_sz, format, nfp_net_name(nn));
 629	err = request_irq(entry->vector, handler, 0, name, nn);
 630	if (err) {
 631		nn_err(nn, "Failed to request IRQ %d (err=%d).\n",
 632		       entry->vector, err);
 633		return err;
 634	}
 635	nn_writeb(nn, ctrl_offset, entry->entry);
 636	nfp_net_irq_unmask(nn, entry->entry);
 637
 638	return 0;
 639}
 640
 641/**
 642 * nfp_net_aux_irq_free() - Free an auxiliary interrupt (LSC or EXN)
 643 * @nn:		NFP Network structure
 644 * @ctrl_offset: Control BAR offset where IRQ configuration should be written
 645 * @vector_idx:	Index of MSI-X vector used for this interrupt
 646 */
 647static void nfp_net_aux_irq_free(struct nfp_net *nn, u32 ctrl_offset,
 648				 unsigned int vector_idx)
 649{
 650	nn_writeb(nn, ctrl_offset, 0xff);
 651	nn_pci_flush(nn);
 652	free_irq(nn->irq_entries[vector_idx].vector, nn);
 653}
 654
 655/* Transmit
 656 *
 657 * One queue controller peripheral queue is used for transmit.  The
 658 * driver en-queues packets for transmit by advancing the write
 659 * pointer.  The device indicates that packets have transmitted by
 660 * advancing the read pointer.  The driver maintains a local copy of
 661 * the read and write pointer in @struct nfp_net_tx_ring.  The driver
 662 * keeps @wr_p in sync with the queue controller write pointer and can
 663 * determine how many packets have been transmitted by comparing its
 664 * copy of the read pointer @rd_p with the read pointer maintained by
 665 * the queue controller peripheral.
 666 */
 667
 668/**
 669 * nfp_net_tx_full() - Check if the TX ring is full
 670 * @tx_ring: TX ring to check
 671 * @dcnt:    Number of descriptors that need to be enqueued (must be >= 1)
 672 *
 673 * This function checks, based on the *host copy* of read/write
 674 * pointer if a given TX ring is full.  The real TX queue may have
 675 * some newly made available slots.
 676 *
 677 * Return: True if the ring is full.
 678 */
 679static int nfp_net_tx_full(struct nfp_net_tx_ring *tx_ring, int dcnt)
 680{
 681	return (tx_ring->wr_p - tx_ring->rd_p) >= (tx_ring->cnt - dcnt);
 682}
 683
 684/* Wrappers for deciding when to stop and restart TX queues */
 685static int nfp_net_tx_ring_should_wake(struct nfp_net_tx_ring *tx_ring)
 686{
 687	return !nfp_net_tx_full(tx_ring, MAX_SKB_FRAGS * 4);
 688}
 689
 690static int nfp_net_tx_ring_should_stop(struct nfp_net_tx_ring *tx_ring)
 691{
 692	return nfp_net_tx_full(tx_ring, MAX_SKB_FRAGS + 1);
 693}
 694
 695/**
 696 * nfp_net_tx_ring_stop() - stop tx ring
 697 * @nd_q:    netdev queue
 698 * @tx_ring: driver tx queue structure
 699 *
 700 * Safely stop TX ring.  Remember that while we are running .start_xmit()
 701 * someone else may be cleaning the TX ring completions so we need to be
 702 * extra careful here.
 703 */
 704static void nfp_net_tx_ring_stop(struct netdev_queue *nd_q,
 705				 struct nfp_net_tx_ring *tx_ring)
 706{
 707	netif_tx_stop_queue(nd_q);
 708
 709	/* We can race with the TX completion out of NAPI so recheck */
 710	smp_mb();
 711	if (unlikely(nfp_net_tx_ring_should_wake(tx_ring)))
 712		netif_tx_start_queue(nd_q);
 713}
 714
 715/**
 716 * nfp_net_tx_tso() - Set up Tx descriptor for LSO
 717 * @r_vec: per-ring structure
 718 * @txbuf: Pointer to driver soft TX descriptor
 719 * @txd: Pointer to HW TX descriptor
 720 * @skb: Pointer to SKB
 721 * @md_bytes: Prepend length
 722 *
 723 * Set up Tx descriptor for LSO, do nothing for non-LSO skbs.
 724 * Return error on packet header greater than maximum supported LSO header size.
 725 */
 726static void nfp_net_tx_tso(struct nfp_net_r_vector *r_vec,
 727			   struct nfp_net_tx_buf *txbuf,
 728			   struct nfp_net_tx_desc *txd, struct sk_buff *skb,
 729			   u32 md_bytes)
 730{
 731	u32 l3_offset, l4_offset, hdrlen;
 732	u16 mss;
 733
 734	if (!skb_is_gso(skb))
 735		return;
 736
 737	if (!skb->encapsulation) {
 738		l3_offset = skb_network_offset(skb);
 739		l4_offset = skb_transport_offset(skb);
 740		hdrlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
 741	} else {
 742		l3_offset = skb_inner_network_offset(skb);
 743		l4_offset = skb_inner_transport_offset(skb);
 744		hdrlen = skb_inner_transport_header(skb) - skb->data +
 745			inner_tcp_hdrlen(skb);
 746	}
 747
 748	txbuf->pkt_cnt = skb_shinfo(skb)->gso_segs;
 749	txbuf->real_len += hdrlen * (txbuf->pkt_cnt - 1);
 750
 751	mss = skb_shinfo(skb)->gso_size & PCIE_DESC_TX_MSS_MASK;
 752	txd->l3_offset = l3_offset - md_bytes;
 753	txd->l4_offset = l4_offset - md_bytes;
 754	txd->lso_hdrlen = hdrlen - md_bytes;
 755	txd->mss = cpu_to_le16(mss);
 756	txd->flags |= PCIE_DESC_TX_LSO;
 757
 758	u64_stats_update_begin(&r_vec->tx_sync);
 759	r_vec->tx_lso++;
 760	u64_stats_update_end(&r_vec->tx_sync);
 761}
 762
 763/**
 764 * nfp_net_tx_csum() - Set TX CSUM offload flags in TX descriptor
 765 * @dp:  NFP Net data path struct
 766 * @r_vec: per-ring structure
 767 * @txbuf: Pointer to driver soft TX descriptor
 768 * @txd: Pointer to TX descriptor
 769 * @skb: Pointer to SKB
 770 *
 771 * This function sets the TX checksum flags in the TX descriptor based
 772 * on the configuration and the protocol of the packet to be transmitted.
 773 */
 774static void nfp_net_tx_csum(struct nfp_net_dp *dp,
 775			    struct nfp_net_r_vector *r_vec,
 776			    struct nfp_net_tx_buf *txbuf,
 777			    struct nfp_net_tx_desc *txd, struct sk_buff *skb)
 778{
 779	struct ipv6hdr *ipv6h;
 780	struct iphdr *iph;
 781	u8 l4_hdr;
 782
 783	if (!(dp->ctrl & NFP_NET_CFG_CTRL_TXCSUM))
 784		return;
 785
 786	if (skb->ip_summed != CHECKSUM_PARTIAL)
 787		return;
 788
 789	txd->flags |= PCIE_DESC_TX_CSUM;
 790	if (skb->encapsulation)
 791		txd->flags |= PCIE_DESC_TX_ENCAP;
 792
 793	iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb);
 794	ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb);
 795
 796	if (iph->version == 4) {
 797		txd->flags |= PCIE_DESC_TX_IP4_CSUM;
 798		l4_hdr = iph->protocol;
 799	} else if (ipv6h->version == 6) {
 800		l4_hdr = ipv6h->nexthdr;
 801	} else {
 802		nn_dp_warn(dp, "partial checksum but ipv=%x!\n", iph->version);
 803		return;
 804	}
 805
 806	switch (l4_hdr) {
 807	case IPPROTO_TCP:
 808		txd->flags |= PCIE_DESC_TX_TCP_CSUM;
 809		break;
 810	case IPPROTO_UDP:
 811		txd->flags |= PCIE_DESC_TX_UDP_CSUM;
 812		break;
 813	default:
 814		nn_dp_warn(dp, "partial checksum but l4 proto=%x!\n", l4_hdr);
 815		return;
 816	}
 817
 818	u64_stats_update_begin(&r_vec->tx_sync);
 819	if (skb->encapsulation)
 820		r_vec->hw_csum_tx_inner += txbuf->pkt_cnt;
 821	else
 822		r_vec->hw_csum_tx += txbuf->pkt_cnt;
 823	u64_stats_update_end(&r_vec->tx_sync);
 824}
 825
 826static struct sk_buff *
 827nfp_net_tls_tx(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
 828	       struct sk_buff *skb, u64 *tls_handle, int *nr_frags)
 829{
 830#ifdef CONFIG_TLS_DEVICE
 831	struct nfp_net_tls_offload_ctx *ntls;
 832	struct sk_buff *nskb;
 833	bool resync_pending;
 834	u32 datalen, seq;
 835
 836	if (likely(!dp->ktls_tx))
 837		return skb;
 838	if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk))
 839		return skb;
 840
 841	datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb));
 842	seq = ntohl(tcp_hdr(skb)->seq);
 843	ntls = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX);
 844	resync_pending = tls_offload_tx_resync_pending(skb->sk);
 845	if (unlikely(resync_pending || ntls->next_seq != seq)) {
 846		/* Pure ACK out of order already */
 847		if (!datalen)
 848			return skb;
 849
 850		u64_stats_update_begin(&r_vec->tx_sync);
 851		r_vec->tls_tx_fallback++;
 852		u64_stats_update_end(&r_vec->tx_sync);
 853
 854		nskb = tls_encrypt_skb(skb);
 855		if (!nskb) {
 856			u64_stats_update_begin(&r_vec->tx_sync);
 857			r_vec->tls_tx_no_fallback++;
 858			u64_stats_update_end(&r_vec->tx_sync);
 859			return NULL;
 860		}
 861		/* encryption wasn't necessary */
 862		if (nskb == skb)
 863			return skb;
 864		/* we don't re-check ring space */
 865		if (unlikely(skb_is_nonlinear(nskb))) {
 866			nn_dp_warn(dp, "tls_encrypt_skb() produced fragmented frame\n");
 867			u64_stats_update_begin(&r_vec->tx_sync);
 868			r_vec->tx_errors++;
 869			u64_stats_update_end(&r_vec->tx_sync);
 870			dev_kfree_skb_any(nskb);
 871			return NULL;
 872		}
 873
 874		/* jump forward, a TX may have gotten lost, need to sync TX */
 875		if (!resync_pending && seq - ntls->next_seq < U32_MAX / 4)
 876			tls_offload_tx_resync_request(nskb->sk, seq,
 877						      ntls->next_seq);
 878
 879		*nr_frags = 0;
 880		return nskb;
 881	}
 882
 883	if (datalen) {
 884		u64_stats_update_begin(&r_vec->tx_sync);
 885		if (!skb_is_gso(skb))
 886			r_vec->hw_tls_tx++;
 887		else
 888			r_vec->hw_tls_tx += skb_shinfo(skb)->gso_segs;
 889		u64_stats_update_end(&r_vec->tx_sync);
 890	}
 891
 892	memcpy(tls_handle, ntls->fw_handle, sizeof(ntls->fw_handle));
 893	ntls->next_seq += datalen;
 894#endif
 895	return skb;
 896}
 897
 898static void nfp_net_tls_tx_undo(struct sk_buff *skb, u64 tls_handle)
 899{
 900#ifdef CONFIG_TLS_DEVICE
 901	struct nfp_net_tls_offload_ctx *ntls;
 902	u32 datalen, seq;
 903
 904	if (!tls_handle)
 905		return;
 906	if (WARN_ON_ONCE(!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk)))
 907		return;
 908
 909	datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb));
 910	seq = ntohl(tcp_hdr(skb)->seq);
 911
 912	ntls = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX);
 913	if (ntls->next_seq == seq + datalen)
 914		ntls->next_seq = seq;
 915	else
 916		WARN_ON_ONCE(1);
 917#endif
 918}
 919
 920static void nfp_net_tx_xmit_more_flush(struct nfp_net_tx_ring *tx_ring)
 921{
 922	wmb();
 923	nfp_qcp_wr_ptr_add(tx_ring->qcp_q, tx_ring->wr_ptr_add);
 924	tx_ring->wr_ptr_add = 0;
 925}
 926
 927static int nfp_net_prep_tx_meta(struct sk_buff *skb, u64 tls_handle)
 928{
 929	struct metadata_dst *md_dst = skb_metadata_dst(skb);
 930	unsigned char *data;
 931	u32 meta_id = 0;
 932	int md_bytes;
 933
 934	if (likely(!md_dst && !tls_handle))
 935		return 0;
 936	if (unlikely(md_dst && md_dst->type != METADATA_HW_PORT_MUX)) {
 937		if (!tls_handle)
 938			return 0;
 939		md_dst = NULL;
 940	}
 941
 942	md_bytes = 4 + !!md_dst * 4 + !!tls_handle * 8;
 943
 944	if (unlikely(skb_cow_head(skb, md_bytes)))
 945		return -ENOMEM;
 946
 947	meta_id = 0;
 948	data = skb_push(skb, md_bytes) + md_bytes;
 949	if (md_dst) {
 950		data -= 4;
 951		put_unaligned_be32(md_dst->u.port_info.port_id, data);
 952		meta_id = NFP_NET_META_PORTID;
 953	}
 954	if (tls_handle) {
 955		/* conn handle is opaque, we just use u64 to be able to quickly
 956		 * compare it to zero
 957		 */
 958		data -= 8;
 959		memcpy(data, &tls_handle, sizeof(tls_handle));
 960		meta_id <<= NFP_NET_META_FIELD_SIZE;
 961		meta_id |= NFP_NET_META_CONN_HANDLE;
 962	}
 963
 964	data -= 4;
 965	put_unaligned_be32(meta_id, data);
 966
 967	return md_bytes;
 968}
 969
 970/**
 971 * nfp_net_tx() - Main transmit entry point
 972 * @skb:    SKB to transmit
 973 * @netdev: netdev structure
 974 *
 975 * Return: NETDEV_TX_OK on success.
 976 */
 977static netdev_tx_t nfp_net_tx(struct sk_buff *skb, struct net_device *netdev)
 978{
 979	struct nfp_net *nn = netdev_priv(netdev);
 980	const skb_frag_t *frag;
 981	int f, nr_frags, wr_idx, md_bytes;
 982	struct nfp_net_tx_ring *tx_ring;
 983	struct nfp_net_r_vector *r_vec;
 984	struct nfp_net_tx_buf *txbuf;
 985	struct nfp_net_tx_desc *txd;
 986	struct netdev_queue *nd_q;
 987	struct nfp_net_dp *dp;
 988	dma_addr_t dma_addr;
 989	unsigned int fsize;
 990	u64 tls_handle = 0;
 991	u16 qidx;
 992
 993	dp = &nn->dp;
 994	qidx = skb_get_queue_mapping(skb);
 995	tx_ring = &dp->tx_rings[qidx];
 996	r_vec = tx_ring->r_vec;
 997
 998	nr_frags = skb_shinfo(skb)->nr_frags;
 999
1000	if (unlikely(nfp_net_tx_full(tx_ring, nr_frags + 1))) {
1001		nn_dp_warn(dp, "TX ring %d busy. wrp=%u rdp=%u\n",
1002			   qidx, tx_ring->wr_p, tx_ring->rd_p);
1003		nd_q = netdev_get_tx_queue(dp->netdev, qidx);
1004		netif_tx_stop_queue(nd_q);
1005		nfp_net_tx_xmit_more_flush(tx_ring);
1006		u64_stats_update_begin(&r_vec->tx_sync);
1007		r_vec->tx_busy++;
1008		u64_stats_update_end(&r_vec->tx_sync);
1009		return NETDEV_TX_BUSY;
1010	}
1011
1012	skb = nfp_net_tls_tx(dp, r_vec, skb, &tls_handle, &nr_frags);
1013	if (unlikely(!skb)) {
1014		nfp_net_tx_xmit_more_flush(tx_ring);
1015		return NETDEV_TX_OK;
1016	}
1017
1018	md_bytes = nfp_net_prep_tx_meta(skb, tls_handle);
1019	if (unlikely(md_bytes < 0))
1020		goto err_flush;
1021
1022	/* Start with the head skbuf */
1023	dma_addr = dma_map_single(dp->dev, skb->data, skb_headlen(skb),
1024				  DMA_TO_DEVICE);
1025	if (dma_mapping_error(dp->dev, dma_addr))
1026		goto err_dma_err;
1027
1028	wr_idx = D_IDX(tx_ring, tx_ring->wr_p);
1029
1030	/* Stash the soft descriptor of the head then initialize it */
1031	txbuf = &tx_ring->txbufs[wr_idx];
1032	txbuf->skb = skb;
1033	txbuf->dma_addr = dma_addr;
1034	txbuf->fidx = -1;
1035	txbuf->pkt_cnt = 1;
1036	txbuf->real_len = skb->len;
1037
1038	/* Build TX descriptor */
1039	txd = &tx_ring->txds[wr_idx];
1040	txd->offset_eop = (nr_frags ? 0 : PCIE_DESC_TX_EOP) | md_bytes;
1041	txd->dma_len = cpu_to_le16(skb_headlen(skb));
1042	nfp_desc_set_dma_addr(txd, dma_addr);
1043	txd->data_len = cpu_to_le16(skb->len);
1044
1045	txd->flags = 0;
1046	txd->mss = 0;
1047	txd->lso_hdrlen = 0;
1048
1049	/* Do not reorder - tso may adjust pkt cnt, vlan may override fields */
1050	nfp_net_tx_tso(r_vec, txbuf, txd, skb, md_bytes);
1051	nfp_net_tx_csum(dp, r_vec, txbuf, txd, skb);
1052	if (skb_vlan_tag_present(skb) && dp->ctrl & NFP_NET_CFG_CTRL_TXVLAN) {
1053		txd->flags |= PCIE_DESC_TX_VLAN;
1054		txd->vlan = cpu_to_le16(skb_vlan_tag_get(skb));
1055	}
1056
1057	/* Gather DMA */
1058	if (nr_frags > 0) {
1059		__le64 second_half;
1060
1061		/* all descs must match except for in addr, length and eop */
1062		second_half = txd->vals8[1];
1063
1064		for (f = 0; f < nr_frags; f++) {
1065			frag = &skb_shinfo(skb)->frags[f];
1066			fsize = skb_frag_size(frag);
1067
1068			dma_addr = skb_frag_dma_map(dp->dev, frag, 0,
1069						    fsize, DMA_TO_DEVICE);
1070			if (dma_mapping_error(dp->dev, dma_addr))
1071				goto err_unmap;
1072
1073			wr_idx = D_IDX(tx_ring, wr_idx + 1);
1074			tx_ring->txbufs[wr_idx].skb = skb;
1075			tx_ring->txbufs[wr_idx].dma_addr = dma_addr;
1076			tx_ring->txbufs[wr_idx].fidx = f;
1077
1078			txd = &tx_ring->txds[wr_idx];
1079			txd->dma_len = cpu_to_le16(fsize);
1080			nfp_desc_set_dma_addr(txd, dma_addr);
1081			txd->offset_eop = md_bytes |
1082				((f == nr_frags - 1) ? PCIE_DESC_TX_EOP : 0);
1083			txd->vals8[1] = second_half;
1084		}
1085
1086		u64_stats_update_begin(&r_vec->tx_sync);
1087		r_vec->tx_gather++;
1088		u64_stats_update_end(&r_vec->tx_sync);
1089	}
1090
1091	skb_tx_timestamp(skb);
1092
1093	nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx);
1094
1095	tx_ring->wr_p += nr_frags + 1;
1096	if (nfp_net_tx_ring_should_stop(tx_ring))
1097		nfp_net_tx_ring_stop(nd_q, tx_ring);
1098
1099	tx_ring->wr_ptr_add += nr_frags + 1;
1100	if (__netdev_tx_sent_queue(nd_q, txbuf->real_len, netdev_xmit_more()))
1101		nfp_net_tx_xmit_more_flush(tx_ring);
1102
1103	return NETDEV_TX_OK;
1104
1105err_unmap:
1106	while (--f >= 0) {
1107		frag = &skb_shinfo(skb)->frags[f];
1108		dma_unmap_page(dp->dev, tx_ring->txbufs[wr_idx].dma_addr,
1109			       skb_frag_size(frag), DMA_TO_DEVICE);
1110		tx_ring->txbufs[wr_idx].skb = NULL;
1111		tx_ring->txbufs[wr_idx].dma_addr = 0;
1112		tx_ring->txbufs[wr_idx].fidx = -2;
1113		wr_idx = wr_idx - 1;
1114		if (wr_idx < 0)
1115			wr_idx += tx_ring->cnt;
1116	}
1117	dma_unmap_single(dp->dev, tx_ring->txbufs[wr_idx].dma_addr,
1118			 skb_headlen(skb), DMA_TO_DEVICE);
1119	tx_ring->txbufs[wr_idx].skb = NULL;
1120	tx_ring->txbufs[wr_idx].dma_addr = 0;
1121	tx_ring->txbufs[wr_idx].fidx = -2;
1122err_dma_err:
1123	nn_dp_warn(dp, "Failed to map DMA TX buffer\n");
1124err_flush:
1125	nfp_net_tx_xmit_more_flush(tx_ring);
1126	u64_stats_update_begin(&r_vec->tx_sync);
1127	r_vec->tx_errors++;
1128	u64_stats_update_end(&r_vec->tx_sync);
1129	nfp_net_tls_tx_undo(skb, tls_handle);
1130	dev_kfree_skb_any(skb);
1131	return NETDEV_TX_OK;
1132}
1133
1134/**
1135 * nfp_net_tx_complete() - Handled completed TX packets
1136 * @tx_ring:	TX ring structure
1137 * @budget:	NAPI budget (only used as bool to determine if in NAPI context)
1138 */
1139static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget)
1140{
1141	struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
1142	struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
1143	struct netdev_queue *nd_q;
1144	u32 done_pkts = 0, done_bytes = 0;
1145	u32 qcp_rd_p;
1146	int todo;
1147
1148	if (tx_ring->wr_p == tx_ring->rd_p)
1149		return;
1150
1151	/* Work out how many descriptors have been transmitted */
1152	qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q);
1153
1154	if (qcp_rd_p == tx_ring->qcp_rd_p)
1155		return;
1156
1157	todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p);
1158
1159	while (todo--) {
1160		const skb_frag_t *frag;
1161		struct nfp_net_tx_buf *tx_buf;
1162		struct sk_buff *skb;
1163		int fidx, nr_frags;
1164		int idx;
1165
1166		idx = D_IDX(tx_ring, tx_ring->rd_p++);
1167		tx_buf = &tx_ring->txbufs[idx];
1168
1169		skb = tx_buf->skb;
1170		if (!skb)
1171			continue;
1172
1173		nr_frags = skb_shinfo(skb)->nr_frags;
1174		fidx = tx_buf->fidx;
1175
1176		if (fidx == -1) {
1177			/* unmap head */
1178			dma_unmap_single(dp->dev, tx_buf->dma_addr,
1179					 skb_headlen(skb), DMA_TO_DEVICE);
1180
1181			done_pkts += tx_buf->pkt_cnt;
1182			done_bytes += tx_buf->real_len;
1183		} else {
1184			/* unmap fragment */
1185			frag = &skb_shinfo(skb)->frags[fidx];
1186			dma_unmap_page(dp->dev, tx_buf->dma_addr,
1187				       skb_frag_size(frag), DMA_TO_DEVICE);
1188		}
1189
1190		/* check for last gather fragment */
1191		if (fidx == nr_frags - 1)
1192			napi_consume_skb(skb, budget);
1193
1194		tx_buf->dma_addr = 0;
1195		tx_buf->skb = NULL;
1196		tx_buf->fidx = -2;
1197	}
1198
1199	tx_ring->qcp_rd_p = qcp_rd_p;
1200
1201	u64_stats_update_begin(&r_vec->tx_sync);
1202	r_vec->tx_bytes += done_bytes;
1203	r_vec->tx_pkts += done_pkts;
1204	u64_stats_update_end(&r_vec->tx_sync);
1205
1206	if (!dp->netdev)
1207		return;
1208
1209	nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx);
1210	netdev_tx_completed_queue(nd_q, done_pkts, done_bytes);
1211	if (nfp_net_tx_ring_should_wake(tx_ring)) {
1212		/* Make sure TX thread will see updated tx_ring->rd_p */
1213		smp_mb();
1214
1215		if (unlikely(netif_tx_queue_stopped(nd_q)))
1216			netif_tx_wake_queue(nd_q);
1217	}
1218
1219	WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt,
1220		  "TX ring corruption rd_p=%u wr_p=%u cnt=%u\n",
1221		  tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt);
1222}
1223
1224static bool nfp_net_xdp_complete(struct nfp_net_tx_ring *tx_ring)
1225{
1226	struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
1227	u32 done_pkts = 0, done_bytes = 0;
1228	bool done_all;
1229	int idx, todo;
1230	u32 qcp_rd_p;
1231
1232	/* Work out how many descriptors have been transmitted */
1233	qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q);
1234
1235	if (qcp_rd_p == tx_ring->qcp_rd_p)
1236		return true;
1237
1238	todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p);
1239
1240	done_all = todo <= NFP_NET_XDP_MAX_COMPLETE;
1241	todo = min(todo, NFP_NET_XDP_MAX_COMPLETE);
1242
1243	tx_ring->qcp_rd_p = D_IDX(tx_ring, tx_ring->qcp_rd_p + todo);
1244
1245	done_pkts = todo;
1246	while (todo--) {
1247		idx = D_IDX(tx_ring, tx_ring->rd_p);
1248		tx_ring->rd_p++;
1249
1250		done_bytes += tx_ring->txbufs[idx].real_len;
1251	}
1252
1253	u64_stats_update_begin(&r_vec->tx_sync);
1254	r_vec->tx_bytes += done_bytes;
1255	r_vec->tx_pkts += done_pkts;
1256	u64_stats_update_end(&r_vec->tx_sync);
1257
1258	WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt,
1259		  "XDP TX ring corruption rd_p=%u wr_p=%u cnt=%u\n",
1260		  tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt);
1261
1262	return done_all;
1263}
1264
1265/**
1266 * nfp_net_tx_ring_reset() - Free any untransmitted buffers and reset pointers
1267 * @dp:		NFP Net data path struct
1268 * @tx_ring:	TX ring structure
1269 *
1270 * Assumes that the device is stopped, must be idempotent.
1271 */
1272static void
1273nfp_net_tx_ring_reset(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring)
1274{
1275	const skb_frag_t *frag;
1276	struct netdev_queue *nd_q;
1277
1278	while (!tx_ring->is_xdp && tx_ring->rd_p != tx_ring->wr_p) {
1279		struct nfp_net_tx_buf *tx_buf;
1280		struct sk_buff *skb;
1281		int idx, nr_frags;
1282
1283		idx = D_IDX(tx_ring, tx_ring->rd_p);
1284		tx_buf = &tx_ring->txbufs[idx];
1285
1286		skb = tx_ring->txbufs[idx].skb;
1287		nr_frags = skb_shinfo(skb)->nr_frags;
1288
1289		if (tx_buf->fidx == -1) {
1290			/* unmap head */
1291			dma_unmap_single(dp->dev, tx_buf->dma_addr,
1292					 skb_headlen(skb), DMA_TO_DEVICE);
1293		} else {
1294			/* unmap fragment */
1295			frag = &skb_shinfo(skb)->frags[tx_buf->fidx];
1296			dma_unmap_page(dp->dev, tx_buf->dma_addr,
1297				       skb_frag_size(frag), DMA_TO_DEVICE);
1298		}
1299
1300		/* check for last gather fragment */
1301		if (tx_buf->fidx == nr_frags - 1)
1302			dev_kfree_skb_any(skb);
1303
1304		tx_buf->dma_addr = 0;
1305		tx_buf->skb = NULL;
1306		tx_buf->fidx = -2;
1307
1308		tx_ring->qcp_rd_p++;
1309		tx_ring->rd_p++;
1310	}
1311
1312	memset(tx_ring->txds, 0, tx_ring->size);
1313	tx_ring->wr_p = 0;
1314	tx_ring->rd_p = 0;
1315	tx_ring->qcp_rd_p = 0;
1316	tx_ring->wr_ptr_add = 0;
1317
1318	if (tx_ring->is_xdp || !dp->netdev)
1319		return;
1320
1321	nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx);
1322	netdev_tx_reset_queue(nd_q);
1323}
1324
1325static void nfp_net_tx_timeout(struct net_device *netdev, unsigned int txqueue)
1326{
1327	struct nfp_net *nn = netdev_priv(netdev);
1328
1329	nn_warn(nn, "TX watchdog timeout on ring: %u\n", txqueue);
1330}
1331
1332/* Receive processing
1333 */
1334static unsigned int
1335nfp_net_calc_fl_bufsz(struct nfp_net_dp *dp)
1336{
1337	unsigned int fl_bufsz;
1338
1339	fl_bufsz = NFP_NET_RX_BUF_HEADROOM;
1340	fl_bufsz += dp->rx_dma_off;
1341	if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
1342		fl_bufsz += NFP_NET_MAX_PREPEND;
1343	else
1344		fl_bufsz += dp->rx_offset;
1345	fl_bufsz += ETH_HLEN + VLAN_HLEN * 2 + dp->mtu;
1346
1347	fl_bufsz = SKB_DATA_ALIGN(fl_bufsz);
1348	fl_bufsz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1349
1350	return fl_bufsz;
1351}
1352
1353static void
1354nfp_net_free_frag(void *frag, bool xdp)
1355{
1356	if (!xdp)
1357		skb_free_frag(frag);
1358	else
1359		__free_page(virt_to_page(frag));
1360}
1361
1362/**
1363 * nfp_net_rx_alloc_one() - Allocate and map page frag for RX
1364 * @dp:		NFP Net data path struct
1365 * @dma_addr:	Pointer to storage for DMA address (output param)
1366 *
1367 * This function will allcate a new page frag, map it for DMA.
1368 *
1369 * Return: allocated page frag or NULL on failure.
1370 */
1371static void *nfp_net_rx_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr)
1372{
1373	void *frag;
1374
1375	if (!dp->xdp_prog) {
1376		frag = netdev_alloc_frag(dp->fl_bufsz);
1377	} else {
1378		struct page *page;
1379
1380		page = alloc_page(GFP_KERNEL);
1381		frag = page ? page_address(page) : NULL;
1382	}
1383	if (!frag) {
1384		nn_dp_warn(dp, "Failed to alloc receive page frag\n");
1385		return NULL;
1386	}
1387
1388	*dma_addr = nfp_net_dma_map_rx(dp, frag);
1389	if (dma_mapping_error(dp->dev, *dma_addr)) {
1390		nfp_net_free_frag(frag, dp->xdp_prog);
1391		nn_dp_warn(dp, "Failed to map DMA RX buffer\n");
1392		return NULL;
1393	}
1394
1395	return frag;
1396}
1397
1398static void *nfp_net_napi_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr)
1399{
1400	void *frag;
1401
1402	if (!dp->xdp_prog) {
1403		frag = napi_alloc_frag(dp->fl_bufsz);
1404		if (unlikely(!frag))
1405			return NULL;
1406	} else {
1407		struct page *page;
1408
1409		page = dev_alloc_page();
1410		if (unlikely(!page))
1411			return NULL;
1412		frag = page_address(page);
1413	}
1414
1415	*dma_addr = nfp_net_dma_map_rx(dp, frag);
1416	if (dma_mapping_error(dp->dev, *dma_addr)) {
1417		nfp_net_free_frag(frag, dp->xdp_prog);
1418		nn_dp_warn(dp, "Failed to map DMA RX buffer\n");
1419		return NULL;
1420	}
1421
1422	return frag;
1423}
1424
1425/**
1426 * nfp_net_rx_give_one() - Put mapped skb on the software and hardware rings
1427 * @dp:		NFP Net data path struct
1428 * @rx_ring:	RX ring structure
1429 * @frag:	page fragment buffer
1430 * @dma_addr:	DMA address of skb mapping
1431 */
1432static void nfp_net_rx_give_one(const struct nfp_net_dp *dp,
1433				struct nfp_net_rx_ring *rx_ring,
1434				void *frag, dma_addr_t dma_addr)
1435{
1436	unsigned int wr_idx;
1437
1438	wr_idx = D_IDX(rx_ring, rx_ring->wr_p);
1439
1440	nfp_net_dma_sync_dev_rx(dp, dma_addr);
1441
1442	/* Stash SKB and DMA address away */
1443	rx_ring->rxbufs[wr_idx].frag = frag;
1444	rx_ring->rxbufs[wr_idx].dma_addr = dma_addr;
1445
1446	/* Fill freelist descriptor */
1447	rx_ring->rxds[wr_idx].fld.reserved = 0;
1448	rx_ring->rxds[wr_idx].fld.meta_len_dd = 0;
1449	nfp_desc_set_dma_addr(&rx_ring->rxds[wr_idx].fld,
1450			      dma_addr + dp->rx_dma_off);
1451
1452	rx_ring->wr_p++;
1453	if (!(rx_ring->wr_p % NFP_NET_FL_BATCH)) {
1454		/* Update write pointer of the freelist queue. Make
1455		 * sure all writes are flushed before telling the hardware.
1456		 */
1457		wmb();
1458		nfp_qcp_wr_ptr_add(rx_ring->qcp_fl, NFP_NET_FL_BATCH);
1459	}
1460}
1461
1462/**
1463 * nfp_net_rx_ring_reset() - Reflect in SW state of freelist after disable
1464 * @rx_ring:	RX ring structure
1465 *
1466 * Assumes that the device is stopped, must be idempotent.
1467 */
1468static void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring)
1469{
1470	unsigned int wr_idx, last_idx;
1471
1472	/* wr_p == rd_p means ring was never fed FL bufs.  RX rings are always
1473	 * kept at cnt - 1 FL bufs.
1474	 */
1475	if (rx_ring->wr_p == 0 && rx_ring->rd_p == 0)
1476		return;
1477
1478	/* Move the empty entry to the end of the list */
1479	wr_idx = D_IDX(rx_ring, rx_ring->wr_p);
1480	last_idx = rx_ring->cnt - 1;
1481	rx_ring->rxbufs[wr_idx].dma_addr = rx_ring->rxbufs[last_idx].dma_addr;
1482	rx_ring->rxbufs[wr_idx].frag = rx_ring->rxbufs[last_idx].frag;
1483	rx_ring->rxbufs[last_idx].dma_addr = 0;
1484	rx_ring->rxbufs[last_idx].frag = NULL;
1485
1486	memset(rx_ring->rxds, 0, rx_ring->size);
1487	rx_ring->wr_p = 0;
1488	rx_ring->rd_p = 0;
1489}
1490
1491/**
1492 * nfp_net_rx_ring_bufs_free() - Free any buffers currently on the RX ring
1493 * @dp:		NFP Net data path struct
1494 * @rx_ring:	RX ring to remove buffers from
1495 *
1496 * Assumes that the device is stopped and buffers are in [0, ring->cnt - 1)
1497 * entries.  After device is disabled nfp_net_rx_ring_reset() must be called
1498 * to restore required ring geometry.
1499 */
1500static void
1501nfp_net_rx_ring_bufs_free(struct nfp_net_dp *dp,
1502			  struct nfp_net_rx_ring *rx_ring)
1503{
1504	unsigned int i;
1505
1506	for (i = 0; i < rx_ring->cnt - 1; i++) {
1507		/* NULL skb can only happen when initial filling of the ring
1508		 * fails to allocate enough buffers and calls here to free
1509		 * already allocated ones.
1510		 */
1511		if (!rx_ring->rxbufs[i].frag)
1512			continue;
1513
1514		nfp_net_dma_unmap_rx(dp, rx_ring->rxbufs[i].dma_addr);
1515		nfp_net_free_frag(rx_ring->rxbufs[i].frag, dp->xdp_prog);
1516		rx_ring->rxbufs[i].dma_addr = 0;
1517		rx_ring->rxbufs[i].frag = NULL;
1518	}
1519}
1520
1521/**
1522 * nfp_net_rx_ring_bufs_alloc() - Fill RX ring with buffers (don't give to FW)
1523 * @dp:		NFP Net data path struct
1524 * @rx_ring:	RX ring to remove buffers from
1525 */
1526static int
1527nfp_net_rx_ring_bufs_alloc(struct nfp_net_dp *dp,
1528			   struct nfp_net_rx_ring *rx_ring)
1529{
1530	struct nfp_net_rx_buf *rxbufs;
1531	unsigned int i;
1532
1533	rxbufs = rx_ring->rxbufs;
1534
1535	for (i = 0; i < rx_ring->cnt - 1; i++) {
1536		rxbufs[i].frag = nfp_net_rx_alloc_one(dp, &rxbufs[i].dma_addr);
1537		if (!rxbufs[i].frag) {
1538			nfp_net_rx_ring_bufs_free(dp, rx_ring);
1539			return -ENOMEM;
1540		}
1541	}
1542
1543	return 0;
1544}
1545
1546/**
1547 * nfp_net_rx_ring_fill_freelist() - Give buffers from the ring to FW
1548 * @dp:	     NFP Net data path struct
1549 * @rx_ring: RX ring to fill
1550 */
1551static void
1552nfp_net_rx_ring_fill_freelist(struct nfp_net_dp *dp,
1553			      struct nfp_net_rx_ring *rx_ring)
1554{
1555	unsigned int i;
1556
1557	for (i = 0; i < rx_ring->cnt - 1; i++)
1558		nfp_net_rx_give_one(dp, rx_ring, rx_ring->rxbufs[i].frag,
1559				    rx_ring->rxbufs[i].dma_addr);
1560}
1561
1562/**
1563 * nfp_net_rx_csum_has_errors() - group check if rxd has any csum errors
1564 * @flags: RX descriptor flags field in CPU byte order
1565 */
1566static int nfp_net_rx_csum_has_errors(u16 flags)
1567{
1568	u16 csum_all_checked, csum_all_ok;
1569
1570	csum_all_checked = flags & __PCIE_DESC_RX_CSUM_ALL;
1571	csum_all_ok = flags & __PCIE_DESC_RX_CSUM_ALL_OK;
1572
1573	return csum_all_checked != (csum_all_ok << PCIE_DESC_RX_CSUM_OK_SHIFT);
1574}
1575
1576/**
1577 * nfp_net_rx_csum() - set SKB checksum field based on RX descriptor flags
1578 * @dp:  NFP Net data path struct
1579 * @r_vec: per-ring structure
1580 * @rxd: Pointer to RX descriptor
1581 * @meta: Parsed metadata prepend
1582 * @skb: Pointer to SKB
1583 */
1584static void nfp_net_rx_csum(struct nfp_net_dp *dp,
1585			    struct nfp_net_r_vector *r_vec,
1586			    struct nfp_net_rx_desc *rxd,
1587			    struct nfp_meta_parsed *meta, struct sk_buff *skb)
1588{
1589	skb_checksum_none_assert(skb);
1590
1591	if (!(dp->netdev->features & NETIF_F_RXCSUM))
1592		return;
1593
1594	if (meta->csum_type) {
1595		skb->ip_summed = meta->csum_type;
1596		skb->csum = meta->csum;
1597		u64_stats_update_begin(&r_vec->rx_sync);
1598		r_vec->hw_csum_rx_complete++;
1599		u64_stats_update_end(&r_vec->rx_sync);
1600		return;
1601	}
1602
1603	if (nfp_net_rx_csum_has_errors(le16_to_cpu(rxd->rxd.flags))) {
1604		u64_stats_update_begin(&r_vec->rx_sync);
1605		r_vec->hw_csum_rx_error++;
1606		u64_stats_update_end(&r_vec->rx_sync);
1607		return;
1608	}
1609
1610	/* Assume that the firmware will never report inner CSUM_OK unless outer
1611	 * L4 headers were successfully parsed. FW will always report zero UDP
1612	 * checksum as CSUM_OK.
1613	 */
1614	if (rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM_OK ||
1615	    rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM_OK) {
1616		__skb_incr_checksum_unnecessary(skb);
1617		u64_stats_update_begin(&r_vec->rx_sync);
1618		r_vec->hw_csum_rx_ok++;
1619		u64_stats_update_end(&r_vec->rx_sync);
1620	}
1621
1622	if (rxd->rxd.flags & PCIE_DESC_RX_I_TCP_CSUM_OK ||
1623	    rxd->rxd.flags & PCIE_DESC_RX_I_UDP_CSUM_OK) {
1624		__skb_incr_checksum_unnecessary(skb);
1625		u64_stats_update_begin(&r_vec->rx_sync);
1626		r_vec->hw_csum_rx_inner_ok++;
1627		u64_stats_update_end(&r_vec->rx_sync);
1628	}
1629}
1630
1631static void
1632nfp_net_set_hash(struct net_device *netdev, struct nfp_meta_parsed *meta,
1633		 unsigned int type, __be32 *hash)
1634{
1635	if (!(netdev->features & NETIF_F_RXHASH))
1636		return;
1637
1638	switch (type) {
1639	case NFP_NET_RSS_IPV4:
1640	case NFP_NET_RSS_IPV6:
1641	case NFP_NET_RSS_IPV6_EX:
1642		meta->hash_type = PKT_HASH_TYPE_L3;
1643		break;
1644	default:
1645		meta->hash_type = PKT_HASH_TYPE_L4;
1646		break;
1647	}
1648
1649	meta->hash = get_unaligned_be32(hash);
1650}
1651
1652static void
1653nfp_net_set_hash_desc(struct net_device *netdev, struct nfp_meta_parsed *meta,
1654		      void *data, struct nfp_net_rx_desc *rxd)
1655{
1656	struct nfp_net_rx_hash *rx_hash = data;
1657
1658	if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS))
1659		return;
1660
1661	nfp_net_set_hash(netdev, meta, get_unaligned_be32(&rx_hash->hash_type),
1662			 &rx_hash->hash);
1663}
1664
1665static bool
1666nfp_net_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta,
1667		   void *data, void *pkt, unsigned int pkt_len, int meta_len)
1668{
1669	u32 meta_info;
1670
1671	meta_info = get_unaligned_be32(data);
1672	data += 4;
1673
1674	while (meta_info) {
1675		switch (meta_info & NFP_NET_META_FIELD_MASK) {
1676		case NFP_NET_META_HASH:
1677			meta_info >>= NFP_NET_META_FIELD_SIZE;
1678			nfp_net_set_hash(netdev, meta,
1679					 meta_info & NFP_NET_META_FIELD_MASK,
1680					 (__be32 *)data);
1681			data += 4;
1682			break;
1683		case NFP_NET_META_MARK:
1684			meta->mark = get_unaligned_be32(data);
1685			data += 4;
1686			break;
1687		case NFP_NET_META_PORTID:
1688			meta->portid = get_unaligned_be32(data);
1689			data += 4;
1690			break;
1691		case NFP_NET_META_CSUM:
1692			meta->csum_type = CHECKSUM_COMPLETE;
1693			meta->csum =
1694				(__force __wsum)__get_unaligned_cpu32(data);
1695			data += 4;
1696			break;
1697		case NFP_NET_META_RESYNC_INFO:
1698			if (nfp_net_tls_rx_resync_req(netdev, data, pkt,
1699						      pkt_len))
1700				return false;
1701			data += sizeof(struct nfp_net_tls_resync_req);
1702			break;
1703		default:
1704			return true;
1705		}
1706
1707		meta_info >>= NFP_NET_META_FIELD_SIZE;
1708	}
1709
1710	return data != pkt;
1711}
1712
1713static void
1714nfp_net_rx_drop(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
1715		struct nfp_net_rx_ring *rx_ring, struct nfp_net_rx_buf *rxbuf,
1716		struct sk_buff *skb)
1717{
1718	u64_stats_update_begin(&r_vec->rx_sync);
1719	r_vec->rx_drops++;
1720	/* If we have both skb and rxbuf the replacement buffer allocation
1721	 * must have failed, count this as an alloc failure.
1722	 */
1723	if (skb && rxbuf)
1724		r_vec->rx_replace_buf_alloc_fail++;
1725	u64_stats_update_end(&r_vec->rx_sync);
1726
1727	/* skb is build based on the frag, free_skb() would free the frag
1728	 * so to be able to reuse it we need an extra ref.
1729	 */
1730	if (skb && rxbuf && skb->head == rxbuf->frag)
1731		page_ref_inc(virt_to_head_page(rxbuf->frag));
1732	if (rxbuf)
1733		nfp_net_rx_give_one(dp, rx_ring, rxbuf->frag, rxbuf->dma_addr);
1734	if (skb)
1735		dev_kfree_skb_any(skb);
1736}
1737
1738static bool
1739nfp_net_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring,
1740		   struct nfp_net_tx_ring *tx_ring,
1741		   struct nfp_net_rx_buf *rxbuf, unsigned int dma_off,
1742		   unsigned int pkt_len, bool *completed)
1743{
1744	unsigned int dma_map_sz = dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA;
1745	struct nfp_net_tx_buf *txbuf;
1746	struct nfp_net_tx_desc *txd;
1747	int wr_idx;
1748
1749	/* Reject if xdp_adjust_tail grow packet beyond DMA area */
1750	if (pkt_len + dma_off > dma_map_sz)
1751		return false;
1752
1753	if (unlikely(nfp_net_tx_full(tx_ring, 1))) {
1754		if (!*completed) {
1755			nfp_net_xdp_complete(tx_ring);
1756			*completed = true;
1757		}
1758
1759		if (unlikely(nfp_net_tx_full(tx_ring, 1))) {
1760			nfp_net_rx_drop(dp, rx_ring->r_vec, rx_ring, rxbuf,
1761					NULL);
1762			return false;
1763		}
1764	}
1765
1766	wr_idx = D_IDX(tx_ring, tx_ring->wr_p);
1767
1768	/* Stash the soft descriptor of the head then initialize it */
1769	txbuf = &tx_ring->txbufs[wr_idx];
1770
1771	nfp_net_rx_give_one(dp, rx_ring, txbuf->frag, txbuf->dma_addr);
1772
1773	txbuf->frag = rxbuf->frag;
1774	txbuf->dma_addr = rxbuf->dma_addr;
1775	txbuf->fidx = -1;
1776	txbuf->pkt_cnt = 1;
1777	txbuf->real_len = pkt_len;
1778
1779	dma_sync_single_for_device(dp->dev, rxbuf->dma_addr + dma_off,
1780				   pkt_len, DMA_BIDIRECTIONAL);
1781
1782	/* Build TX descriptor */
1783	txd = &tx_ring->txds[wr_idx];
1784	txd->offset_eop = PCIE_DESC_TX_EOP;
1785	txd->dma_len = cpu_to_le16(pkt_len);
1786	nfp_desc_set_dma_addr(txd, rxbuf->dma_addr + dma_off);
1787	txd->data_len = cpu_to_le16(pkt_len);
1788
1789	txd->flags = 0;
1790	txd->mss = 0;
1791	txd->lso_hdrlen = 0;
1792
1793	tx_ring->wr_p++;
1794	tx_ring->wr_ptr_add++;
1795	return true;
1796}
1797
1798/**
1799 * nfp_net_rx() - receive up to @budget packets on @rx_ring
1800 * @rx_ring:   RX ring to receive from
1801 * @budget:    NAPI budget
1802 *
1803 * Note, this function is separated out from the napi poll function to
1804 * more cleanly separate packet receive code from other bookkeeping
1805 * functions performed in the napi poll function.
1806 *
1807 * Return: Number of packets received.
1808 */
1809static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
1810{
1811	struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
1812	struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
1813	struct nfp_net_tx_ring *tx_ring;
1814	struct bpf_prog *xdp_prog;
1815	bool xdp_tx_cmpl = false;
1816	unsigned int true_bufsz;
1817	struct sk_buff *skb;
1818	int pkts_polled = 0;
1819	struct xdp_buff xdp;
1820	int idx;
1821
1822	xdp_prog = READ_ONCE(dp->xdp_prog);
1823	true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz;
1824	xdp_init_buff(&xdp, PAGE_SIZE - NFP_NET_RX_BUF_HEADROOM,
1825		      &rx_ring->xdp_rxq);
1826	tx_ring = r_vec->xdp_ring;
1827
1828	while (pkts_polled < budget) {
1829		unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off;
1830		struct nfp_net_rx_buf *rxbuf;
1831		struct nfp_net_rx_desc *rxd;
1832		struct nfp_meta_parsed meta;
1833		bool redir_egress = false;
1834		struct net_device *netdev;
1835		dma_addr_t new_dma_addr;
1836		u32 meta_len_xdp = 0;
1837		void *new_frag;
1838
1839		idx = D_IDX(rx_ring, rx_ring->rd_p);
1840
1841		rxd = &rx_ring->rxds[idx];
1842		if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD))
1843			break;
1844
1845		/* Memory barrier to ensure that we won't do other reads
1846		 * before the DD bit.
1847		 */
1848		dma_rmb();
1849
1850		memset(&meta, 0, sizeof(meta));
1851
1852		rx_ring->rd_p++;
1853		pkts_polled++;
1854
1855		rxbuf =	&rx_ring->rxbufs[idx];
1856		/*         < meta_len >
1857		 *  <-- [rx_offset] -->
1858		 *  ---------------------------------------------------------
1859		 * | [XX] |  metadata  |             packet           | XXXX |
1860		 *  ---------------------------------------------------------
1861		 *         <---------------- data_len --------------->
1862		 *
1863		 * The rx_offset is fixed for all packets, the meta_len can vary
1864		 * on a packet by packet basis. If rx_offset is set to zero
1865		 * (_RX_OFFSET_DYNAMIC) metadata starts at the beginning of the
1866		 * buffer and is immediately followed by the packet (no [XX]).
1867		 */
1868		meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK;
1869		data_len = le16_to_cpu(rxd->rxd.data_len);
1870		pkt_len = data_len - meta_len;
1871
1872		pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off;
1873		if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
1874			pkt_off += meta_len;
1875		else
1876			pkt_off += dp->rx_offset;
1877		meta_off = pkt_off - meta_len;
1878
1879		/* Stats update */
1880		u64_stats_update_begin(&r_vec->rx_sync);
1881		r_vec->rx_pkts++;
1882		r_vec->rx_bytes += pkt_len;
1883		u64_stats_update_end(&r_vec->rx_sync);
1884
1885		if (unlikely(meta_len > NFP_NET_MAX_PREPEND ||
1886			     (dp->rx_offset && meta_len > dp->rx_offset))) {
1887			nn_dp_warn(dp, "oversized RX packet metadata %u\n",
1888				   meta_len);
1889			nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
1890			continue;
1891		}
1892
1893		nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off,
1894					data_len);
1895
1896		if (!dp->chained_metadata_format) {
1897			nfp_net_set_hash_desc(dp->netdev, &meta,
1898					      rxbuf->frag + meta_off, rxd);
1899		} else if (meta_len) {
1900			if (unlikely(nfp_net_parse_meta(dp->netdev, &meta,
1901							rxbuf->frag + meta_off,
1902							rxbuf->frag + pkt_off,
1903							pkt_len, meta_len))) {
1904				nn_dp_warn(dp, "invalid RX packet metadata\n");
1905				nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf,
1906						NULL);
1907				continue;
1908			}
1909		}
1910
1911		if (xdp_prog && !meta.portid) {
1912			void *orig_data = rxbuf->frag + pkt_off;
1913			unsigned int dma_off;
1914			int act;
1915
1916			xdp_prepare_buff(&xdp,
1917					 rxbuf->frag + NFP_NET_RX_BUF_HEADROOM,
1918					 pkt_off - NFP_NET_RX_BUF_HEADROOM,
1919					 pkt_len, true);
1920
1921			act = bpf_prog_run_xdp(xdp_prog, &xdp);
1922
1923			pkt_len = xdp.data_end - xdp.data;
1924			pkt_off += xdp.data - orig_data;
1925
1926			switch (act) {
1927			case XDP_PASS:
1928				meta_len_xdp = xdp.data - xdp.data_meta;
1929				break;
1930			case XDP_TX:
1931				dma_off = pkt_off - NFP_NET_RX_BUF_HEADROOM;
1932				if (unlikely(!nfp_net_tx_xdp_buf(dp, rx_ring,
1933								 tx_ring, rxbuf,
1934								 dma_off,
1935								 pkt_len,
1936								 &xdp_tx_cmpl)))
1937					trace_xdp_exception(dp->netdev,
1938							    xdp_prog, act);
1939				continue;
1940			default:
1941				bpf_warn_invalid_xdp_action(act);
1942				fallthrough;
1943			case XDP_ABORTED:
1944				trace_xdp_exception(dp->netdev, xdp_prog, act);
1945				fallthrough;
1946			case XDP_DROP:
1947				nfp_net_rx_give_one(dp, rx_ring, rxbuf->frag,
1948						    rxbuf->dma_addr);
1949				continue;
1950			}
1951		}
1952
1953		if (likely(!meta.portid)) {
1954			netdev = dp->netdev;
1955		} else if (meta.portid == NFP_META_PORT_ID_CTRL) {
1956			struct nfp_net *nn = netdev_priv(dp->netdev);
1957
1958			nfp_app_ctrl_rx_raw(nn->app, rxbuf->frag + pkt_off,
1959					    pkt_len);
1960			nfp_net_rx_give_one(dp, rx_ring, rxbuf->frag,
1961					    rxbuf->dma_addr);
1962			continue;
1963		} else {
1964			struct nfp_net *nn;
1965
1966			nn = netdev_priv(dp->netdev);
1967			netdev = nfp_app_dev_get(nn->app, meta.portid,
1968						 &redir_egress);
1969			if (unlikely(!netdev)) {
1970				nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf,
1971						NULL);
1972				continue;
1973			}
1974
1975			if (nfp_netdev_is_nfp_repr(netdev))
1976				nfp_repr_inc_rx_stats(netdev, pkt_len);
1977		}
1978
1979		skb = build_skb(rxbuf->frag, true_bufsz);
1980		if (unlikely(!skb)) {
1981			nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
1982			continue;
1983		}
1984		new_frag = nfp_net_napi_alloc_one(dp, &new_dma_addr);
1985		if (unlikely(!new_frag)) {
1986			nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, skb);
1987			continue;
1988		}
1989
1990		nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr);
1991
1992		nfp_net_rx_give_one(dp, rx_ring, new_frag, new_dma_addr);
1993
1994		skb_reserve(skb, pkt_off);
1995		skb_put(skb, pkt_len);
1996
1997		skb->mark = meta.mark;
1998		skb_set_hash(skb, meta.hash, meta.hash_type);
1999
2000		skb_record_rx_queue(skb, rx_ring->idx);
2001		skb->protocol = eth_type_trans(skb, netdev);
2002
2003		nfp_net_rx_csum(dp, r_vec, rxd, &meta, skb);
2004
2005#ifdef CONFIG_TLS_DEVICE
2006		if (rxd->rxd.flags & PCIE_DESC_RX_DECRYPTED) {
2007			skb->decrypted = true;
2008			u64_stats_update_begin(&r_vec->rx_sync);
2009			r_vec->hw_tls_rx++;
2010			u64_stats_update_end(&r_vec->rx_sync);
2011		}
2012#endif
2013
2014		if (rxd->rxd.flags & PCIE_DESC_RX_VLAN)
2015			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
2016					       le16_to_cpu(rxd->rxd.vlan));
2017		if (meta_len_xdp)
2018			skb_metadata_set(skb, meta_len_xdp);
2019
2020		if (likely(!redir_egress)) {
2021			napi_gro_receive(&rx_ring->r_vec->napi, skb);
2022		} else {
2023			skb->dev = netdev;
2024			skb_reset_network_header(skb);
2025			__skb_push(skb, ETH_HLEN);
2026			dev_queue_xmit(skb);
2027		}
2028	}
2029
2030	if (xdp_prog) {
2031		if (tx_ring->wr_ptr_add)
2032			nfp_net_tx_xmit_more_flush(tx_ring);
2033		else if (unlikely(tx_ring->wr_p != tx_ring->rd_p) &&
2034			 !xdp_tx_cmpl)
2035			if (!nfp_net_xdp_complete(tx_ring))
2036				pkts_polled = budget;
2037	}
2038
2039	return pkts_polled;
2040}
2041
2042/**
2043 * nfp_net_poll() - napi poll function
2044 * @napi:    NAPI structure
2045 * @budget:  NAPI budget
2046 *
2047 * Return: number of packets polled.
2048 */
2049static int nfp_net_poll(struct napi_struct *napi, int budget)
2050{
2051	struct nfp_net_r_vector *r_vec =
2052		container_of(napi, struct nfp_net_r_vector, napi);
2053	unsigned int pkts_polled = 0;
2054
2055	if (r_vec->tx_ring)
2056		nfp_net_tx_complete(r_vec->tx_ring, budget);
2057	if (r_vec->rx_ring)
2058		pkts_polled = nfp_net_rx(r_vec->rx_ring, budget);
2059
2060	if (pkts_polled < budget)
2061		if (napi_complete_done(napi, pkts_polled))
2062			nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry);
2063
2064	return pkts_polled;
2065}
2066
2067/* Control device data path
2068 */
2069
2070static bool
2071nfp_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
2072		struct sk_buff *skb, bool old)
2073{
2074	unsigned int real_len = skb->len, meta_len = 0;
2075	struct nfp_net_tx_ring *tx_ring;
2076	struct nfp_net_tx_buf *txbuf;
2077	struct nfp_net_tx_desc *txd;
2078	struct nfp_net_dp *dp;
2079	dma_addr_t dma_addr;
2080	int wr_idx;
2081
2082	dp = &r_vec->nfp_net->dp;
2083	tx_ring = r_vec->tx_ring;
2084
2085	if (WARN_ON_ONCE(skb_shinfo(skb)->nr_frags)) {
2086		nn_dp_warn(dp, "Driver's CTRL TX does not implement gather\n");
2087		goto err_free;
2088	}
2089
2090	if (unlikely(nfp_net_tx_full(tx_ring, 1))) {
2091		u64_stats_update_begin(&r_vec->tx_sync);
2092		r_vec->tx_busy++;
2093		u64_stats_update_end(&r_vec->tx_sync);
2094		if (!old)
2095			__skb_queue_tail(&r_vec->queue, skb);
2096		else
2097			__skb_queue_head(&r_vec->queue, skb);
2098		return true;
2099	}
2100
2101	if (nfp_app_ctrl_has_meta(nn->app)) {
2102		if (unlikely(skb_headroom(skb) < 8)) {
2103			nn_dp_warn(dp, "CTRL TX on skb without headroom\n");
2104			goto err_free;
2105		}
2106		meta_len = 8;
2107		put_unaligned_be32(NFP_META_PORT_ID_CTRL, skb_push(skb, 4));
2108		put_unaligned_be32(NFP_NET_META_PORTID, skb_push(skb, 4));
2109	}
2110
2111	/* Start with the head skbuf */
2112	dma_addr = dma_map_single(dp->dev, skb->data, skb_headlen(skb),
2113				  DMA_TO_DEVICE);
2114	if (dma_mapping_error(dp->dev, dma_addr))
2115		goto err_dma_warn;
2116
2117	wr_idx = D_IDX(tx_ring, tx_ring->wr_p);
2118
2119	/* Stash the soft descriptor of the head then initialize it */
2120	txbuf = &tx_ring->txbufs[wr_idx];
2121	txbuf->skb = skb;
2122	txbuf->dma_addr = dma_addr;
2123	txbuf->fidx = -1;
2124	txbuf->pkt_cnt = 1;
2125	txbuf->real_len = real_len;
2126
2127	/* Build TX descriptor */
2128	txd = &tx_ring->txds[wr_idx];
2129	txd->offset_eop = meta_len | PCIE_DESC_TX_EOP;
2130	txd->dma_len = cpu_to_le16(skb_headlen(skb));
2131	nfp_desc_set_dma_addr(txd, dma_addr);
2132	txd->data_len = cpu_to_le16(skb->len);
2133
2134	txd->flags = 0;
2135	txd->mss = 0;
2136	txd->lso_hdrlen = 0;
2137
2138	tx_ring->wr_p++;
2139	tx_ring->wr_ptr_add++;
2140	nfp_net_tx_xmit_more_flush(tx_ring);
2141
2142	return false;
2143
2144err_dma_warn:
2145	nn_dp_warn(dp, "Failed to DMA map TX CTRL buffer\n");
2146err_free:
2147	u64_stats_update_begin(&r_vec->tx_sync);
2148	r_vec->tx_errors++;
2149	u64_stats_update_end(&r_vec->tx_sync);
2150	dev_kfree_skb_any(skb);
2151	return false;
2152}
2153
2154bool __nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb)
2155{
2156	struct nfp_net_r_vector *r_vec = &nn->r_vecs[0];
2157
2158	return nfp_ctrl_tx_one(nn, r_vec, skb, false);
2159}
2160
2161bool nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb)
2162{
2163	struct nfp_net_r_vector *r_vec = &nn->r_vecs[0];
2164	bool ret;
2165
2166	spin_lock_bh(&r_vec->lock);
2167	ret = nfp_ctrl_tx_one(nn, r_vec, skb, false);
2168	spin_unlock_bh(&r_vec->lock);
2169
2170	return ret;
2171}
2172
2173static void __nfp_ctrl_tx_queued(struct nfp_net_r_vector *r_vec)
2174{
2175	struct sk_buff *skb;
2176
2177	while ((skb = __skb_dequeue(&r_vec->queue)))
2178		if (nfp_ctrl_tx_one(r_vec->nfp_net, r_vec, skb, true))
2179			return;
2180}
2181
2182static bool
2183nfp_ctrl_meta_ok(struct nfp_net *nn, void *data, unsigned int meta_len)
2184{
2185	u32 meta_type, meta_tag;
2186
2187	if (!nfp_app_ctrl_has_meta(nn->app))
2188		return !meta_len;
2189
2190	if (meta_len != 8)
2191		return false;
2192
2193	meta_type = get_unaligned_be32(data);
2194	meta_tag = get_unaligned_be32(data + 4);
2195
2196	return (meta_type == NFP_NET_META_PORTID &&
2197		meta_tag == NFP_META_PORT_ID_CTRL);
2198}
2199
2200static bool
2201nfp_ctrl_rx_one(struct nfp_net *nn, struct nfp_net_dp *dp,
2202		struct nfp_net_r_vector *r_vec, struct nfp_net_rx_ring *rx_ring)
2203{
2204	unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off;
2205	struct nfp_net_rx_buf *rxbuf;
2206	struct nfp_net_rx_desc *rxd;
2207	dma_addr_t new_dma_addr;
2208	struct sk_buff *skb;
2209	void *new_frag;
2210	int idx;
2211
2212	idx = D_IDX(rx_ring, rx_ring->rd_p);
2213
2214	rxd = &rx_ring->rxds[idx];
2215	if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD))
2216		return false;
2217
2218	/* Memory barrier to ensure that we won't do other reads
2219	 * before the DD bit.
2220	 */
2221	dma_rmb();
2222
2223	rx_ring->rd_p++;
2224
2225	rxbuf =	&rx_ring->rxbufs[idx];
2226	meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK;
2227	data_len = le16_to_cpu(rxd->rxd.data_len);
2228	pkt_len = data_len - meta_len;
2229
2230	pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off;
2231	if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
2232		pkt_off += meta_len;
2233	else
2234		pkt_off += dp->rx_offset;
2235	meta_off = pkt_off - meta_len;
2236
2237	/* Stats update */
2238	u64_stats_update_begin(&r_vec->rx_sync);
2239	r_vec->rx_pkts++;
2240	r_vec->rx_bytes += pkt_len;
2241	u64_stats_update_end(&r_vec->rx_sync);
2242
2243	nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off,	data_len);
2244
2245	if (unlikely(!nfp_ctrl_meta_ok(nn, rxbuf->frag + meta_off, meta_len))) {
2246		nn_dp_warn(dp, "incorrect metadata for ctrl packet (%d)\n",
2247			   meta_len);
2248		nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
2249		return true;
2250	}
2251
2252	skb = build_skb(rxbuf->frag, dp->fl_bufsz);
2253	if (unlikely(!skb)) {
2254		nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
2255		return true;
2256	}
2257	new_frag = nfp_net_napi_alloc_one(dp, &new_dma_addr);
2258	if (unlikely(!new_frag)) {
2259		nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, skb);
2260		return true;
2261	}
2262
2263	nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr);
2264
2265	nfp_net_rx_give_one(dp, rx_ring, new_frag, new_dma_addr);
2266
2267	skb_reserve(skb, pkt_off);
2268	skb_put(skb, pkt_len);
2269
2270	nfp_app_ctrl_rx(nn->app, skb);
2271
2272	return true;
2273}
2274
2275static bool nfp_ctrl_rx(struct nfp_net_r_vector *r_vec)
2276{
2277	struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring;
2278	struct nfp_net *nn = r_vec->nfp_net;
2279	struct nfp_net_dp *dp = &nn->dp;
2280	unsigned int budget = 512;
2281
2282	while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring) && budget--)
2283		continue;
2284
2285	return budget;
2286}
2287
2288static void nfp_ctrl_poll(struct tasklet_struct *t)
2289{
2290	struct nfp_net_r_vector *r_vec = from_tasklet(r_vec, t, tasklet);
2291
2292	spin_lock(&r_vec->lock);
2293	nfp_net_tx_complete(r_vec->tx_ring, 0);
2294	__nfp_ctrl_tx_queued(r_vec);
2295	spin_unlock(&r_vec->lock);
2296
2297	if (nfp_ctrl_rx(r_vec)) {
2298		nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry);
2299	} else {
2300		tasklet_schedule(&r_vec->tasklet);
2301		nn_dp_warn(&r_vec->nfp_net->dp,
2302			   "control message budget exceeded!\n");
2303	}
2304}
2305
2306/* Setup and Configuration
2307 */
2308
2309/**
2310 * nfp_net_vecs_init() - Assign IRQs and setup rvecs.
2311 * @nn:		NFP Network structure
2312 */
2313static void nfp_net_vecs_init(struct nfp_net *nn)
2314{
 
2315	struct nfp_net_r_vector *r_vec;
2316	int r;
2317
2318	nn->lsc_handler = nfp_net_irq_lsc;
2319	nn->exn_handler = nfp_net_irq_exn;
2320
2321	for (r = 0; r < nn->max_r_vecs; r++) {
2322		struct msix_entry *entry;
2323
2324		entry = &nn->irq_entries[NFP_NET_NON_Q_VECTORS + r];
2325
2326		r_vec = &nn->r_vecs[r];
2327		r_vec->nfp_net = nn;
2328		r_vec->irq_entry = entry->entry;
2329		r_vec->irq_vector = entry->vector;
2330
2331		if (nn->dp.netdev) {
2332			r_vec->handler = nfp_net_irq_rxtx;
2333		} else {
2334			r_vec->handler = nfp_ctrl_irq_rxtx;
2335
2336			__skb_queue_head_init(&r_vec->queue);
2337			spin_lock_init(&r_vec->lock);
2338			tasklet_setup(&r_vec->tasklet, nfp_ctrl_poll);
2339			tasklet_disable(&r_vec->tasklet);
2340		}
2341
2342		cpumask_set_cpu(r, &r_vec->affinity_mask);
2343	}
2344}
2345
2346/**
2347 * nfp_net_tx_ring_free() - Free resources allocated to a TX ring
2348 * @tx_ring:   TX ring to free
2349 */
2350static void nfp_net_tx_ring_free(struct nfp_net_tx_ring *tx_ring)
2351{
2352	struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
2353	struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
2354
2355	kvfree(tx_ring->txbufs);
2356
2357	if (tx_ring->txds)
2358		dma_free_coherent(dp->dev, tx_ring->size,
2359				  tx_ring->txds, tx_ring->dma);
2360
2361	tx_ring->cnt = 0;
2362	tx_ring->txbufs = NULL;
2363	tx_ring->txds = NULL;
2364	tx_ring->dma = 0;
2365	tx_ring->size = 0;
2366}
2367
2368/**
2369 * nfp_net_tx_ring_alloc() - Allocate resource for a TX ring
2370 * @dp:        NFP Net data path struct
2371 * @tx_ring:   TX Ring structure to allocate
2372 *
2373 * Return: 0 on success, negative errno otherwise.
2374 */
2375static int
2376nfp_net_tx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring)
2377{
2378	struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
2379
2380	tx_ring->cnt = dp->txd_cnt;
2381
2382	tx_ring->size = array_size(tx_ring->cnt, sizeof(*tx_ring->txds));
2383	tx_ring->txds = dma_alloc_coherent(dp->dev, tx_ring->size,
2384					   &tx_ring->dma,
2385					   GFP_KERNEL | __GFP_NOWARN);
2386	if (!tx_ring->txds) {
2387		netdev_warn(dp->netdev, "failed to allocate TX descriptor ring memory, requested descriptor count: %d, consider lowering descriptor count\n",
2388			    tx_ring->cnt);
2389		goto err_alloc;
2390	}
2391
2392	tx_ring->txbufs = kvcalloc(tx_ring->cnt, sizeof(*tx_ring->txbufs),
2393				   GFP_KERNEL);
2394	if (!tx_ring->txbufs)
2395		goto err_alloc;
2396
2397	if (!tx_ring->is_xdp && dp->netdev)
2398		netif_set_xps_queue(dp->netdev, &r_vec->affinity_mask,
2399				    tx_ring->idx);
2400
2401	return 0;
2402
2403err_alloc:
2404	nfp_net_tx_ring_free(tx_ring);
2405	return -ENOMEM;
2406}
2407
2408static void
2409nfp_net_tx_ring_bufs_free(struct nfp_net_dp *dp,
2410			  struct nfp_net_tx_ring *tx_ring)
2411{
2412	unsigned int i;
2413
2414	if (!tx_ring->is_xdp)
2415		return;
2416
2417	for (i = 0; i < tx_ring->cnt; i++) {
2418		if (!tx_ring->txbufs[i].frag)
2419			return;
2420
2421		nfp_net_dma_unmap_rx(dp, tx_ring->txbufs[i].dma_addr);
2422		__free_page(virt_to_page(tx_ring->txbufs[i].frag));
2423	}
2424}
2425
2426static int
2427nfp_net_tx_ring_bufs_alloc(struct nfp_net_dp *dp,
2428			   struct nfp_net_tx_ring *tx_ring)
2429{
2430	struct nfp_net_tx_buf *txbufs = tx_ring->txbufs;
2431	unsigned int i;
2432
2433	if (!tx_ring->is_xdp)
2434		return 0;
2435
2436	for (i = 0; i < tx_ring->cnt; i++) {
2437		txbufs[i].frag = nfp_net_rx_alloc_one(dp, &txbufs[i].dma_addr);
2438		if (!txbufs[i].frag) {
2439			nfp_net_tx_ring_bufs_free(dp, tx_ring);
2440			return -ENOMEM;
2441		}
2442	}
2443
2444	return 0;
2445}
2446
2447static int nfp_net_tx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp)
2448{
2449	unsigned int r;
2450
2451	dp->tx_rings = kcalloc(dp->num_tx_rings, sizeof(*dp->tx_rings),
2452			       GFP_KERNEL);
2453	if (!dp->tx_rings)
2454		return -ENOMEM;
2455
2456	for (r = 0; r < dp->num_tx_rings; r++) {
2457		int bias = 0;
2458
2459		if (r >= dp->num_stack_tx_rings)
2460			bias = dp->num_stack_tx_rings;
2461
2462		nfp_net_tx_ring_init(&dp->tx_rings[r], &nn->r_vecs[r - bias],
2463				     r, bias);
2464
2465		if (nfp_net_tx_ring_alloc(dp, &dp->tx_rings[r]))
2466			goto err_free_prev;
2467
2468		if (nfp_net_tx_ring_bufs_alloc(dp, &dp->tx_rings[r]))
2469			goto err_free_ring;
2470	}
2471
2472	return 0;
2473
2474err_free_prev:
2475	while (r--) {
2476		nfp_net_tx_ring_bufs_free(dp, &dp->tx_rings[r]);
2477err_free_ring:
2478		nfp_net_tx_ring_free(&dp->tx_rings[r]);
2479	}
2480	kfree(dp->tx_rings);
2481	return -ENOMEM;
2482}
2483
2484static void nfp_net_tx_rings_free(struct nfp_net_dp *dp)
2485{
2486	unsigned int r;
2487
2488	for (r = 0; r < dp->num_tx_rings; r++) {
2489		nfp_net_tx_ring_bufs_free(dp, &dp->tx_rings[r]);
2490		nfp_net_tx_ring_free(&dp->tx_rings[r]);
2491	}
2492
2493	kfree(dp->tx_rings);
2494}
2495
2496/**
2497 * nfp_net_rx_ring_free() - Free resources allocated to a RX ring
2498 * @rx_ring:  RX ring to free
2499 */
2500static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring)
2501{
2502	struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
2503	struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
2504
2505	if (dp->netdev)
2506		xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
2507	kvfree(rx_ring->rxbufs);
2508
2509	if (rx_ring->rxds)
2510		dma_free_coherent(dp->dev, rx_ring->size,
2511				  rx_ring->rxds, rx_ring->dma);
2512
2513	rx_ring->cnt = 0;
2514	rx_ring->rxbufs = NULL;
2515	rx_ring->rxds = NULL;
2516	rx_ring->dma = 0;
2517	rx_ring->size = 0;
2518}
2519
2520/**
2521 * nfp_net_rx_ring_alloc() - Allocate resource for a RX ring
2522 * @dp:	      NFP Net data path struct
2523 * @rx_ring:  RX ring to allocate
2524 *
2525 * Return: 0 on success, negative errno otherwise.
2526 */
2527static int
2528nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring)
2529{
2530	int err;
2531
2532	if (dp->netdev) {
2533		err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, dp->netdev,
2534				       rx_ring->idx, rx_ring->r_vec->napi.napi_id);
2535		if (err < 0)
2536			return err;
2537	}
2538
2539	rx_ring->cnt = dp->rxd_cnt;
2540	rx_ring->size = array_size(rx_ring->cnt, sizeof(*rx_ring->rxds));
2541	rx_ring->rxds = dma_alloc_coherent(dp->dev, rx_ring->size,
2542					   &rx_ring->dma,
2543					   GFP_KERNEL | __GFP_NOWARN);
2544	if (!rx_ring->rxds) {
2545		netdev_warn(dp->netdev, "failed to allocate RX descriptor ring memory, requested descriptor count: %d, consider lowering descriptor count\n",
2546			    rx_ring->cnt);
2547		goto err_alloc;
2548	}
2549
2550	rx_ring->rxbufs = kvcalloc(rx_ring->cnt, sizeof(*rx_ring->rxbufs),
2551				   GFP_KERNEL);
2552	if (!rx_ring->rxbufs)
2553		goto err_alloc;
2554
2555	return 0;
2556
2557err_alloc:
2558	nfp_net_rx_ring_free(rx_ring);
2559	return -ENOMEM;
2560}
2561
2562static int nfp_net_rx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp)
2563{
2564	unsigned int r;
2565
2566	dp->rx_rings = kcalloc(dp->num_rx_rings, sizeof(*dp->rx_rings),
2567			       GFP_KERNEL);
2568	if (!dp->rx_rings)
2569		return -ENOMEM;
2570
2571	for (r = 0; r < dp->num_rx_rings; r++) {
2572		nfp_net_rx_ring_init(&dp->rx_rings[r], &nn->r_vecs[r], r);
2573
2574		if (nfp_net_rx_ring_alloc(dp, &dp->rx_rings[r]))
2575			goto err_free_prev;
2576
2577		if (nfp_net_rx_ring_bufs_alloc(dp, &dp->rx_rings[r]))
2578			goto err_free_ring;
2579	}
2580
2581	return 0;
2582
2583err_free_prev:
2584	while (r--) {
2585		nfp_net_rx_ring_bufs_free(dp, &dp->rx_rings[r]);
2586err_free_ring:
2587		nfp_net_rx_ring_free(&dp->rx_rings[r]);
2588	}
2589	kfree(dp->rx_rings);
2590	return -ENOMEM;
2591}
2592
2593static void nfp_net_rx_rings_free(struct nfp_net_dp *dp)
2594{
2595	unsigned int r;
2596
2597	for (r = 0; r < dp->num_rx_rings; r++) {
2598		nfp_net_rx_ring_bufs_free(dp, &dp->rx_rings[r]);
2599		nfp_net_rx_ring_free(&dp->rx_rings[r]);
2600	}
2601
2602	kfree(dp->rx_rings);
2603}
2604
2605static void
2606nfp_net_vector_assign_rings(struct nfp_net_dp *dp,
2607			    struct nfp_net_r_vector *r_vec, int idx)
2608{
2609	r_vec->rx_ring = idx < dp->num_rx_rings ? &dp->rx_rings[idx] : NULL;
2610	r_vec->tx_ring =
2611		idx < dp->num_stack_tx_rings ? &dp->tx_rings[idx] : NULL;
2612
2613	r_vec->xdp_ring = idx < dp->num_tx_rings - dp->num_stack_tx_rings ?
2614		&dp->tx_rings[dp->num_stack_tx_rings + idx] : NULL;
 
 
 
 
 
 
 
 
 
 
 
2615}
2616
2617static int
2618nfp_net_prepare_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
2619		       int idx)
2620{
2621	int err;
2622
2623	/* Setup NAPI */
2624	if (nn->dp.netdev)
2625		netif_napi_add(nn->dp.netdev, &r_vec->napi,
2626			       nfp_net_poll, NAPI_POLL_WEIGHT);
2627	else
2628		tasklet_enable(&r_vec->tasklet);
2629
2630	snprintf(r_vec->name, sizeof(r_vec->name),
2631		 "%s-rxtx-%d", nfp_net_name(nn), idx);
2632	err = request_irq(r_vec->irq_vector, r_vec->handler, 0, r_vec->name,
2633			  r_vec);
2634	if (err) {
2635		if (nn->dp.netdev)
2636			netif_napi_del(&r_vec->napi);
2637		else
2638			tasklet_disable(&r_vec->tasklet);
2639
2640		nn_err(nn, "Error requesting IRQ %d\n", r_vec->irq_vector);
2641		return err;
2642	}
2643	disable_irq(r_vec->irq_vector);
2644
2645	irq_set_affinity_hint(r_vec->irq_vector, &r_vec->affinity_mask);
2646
2647	nn_dbg(nn, "RV%02d: irq=%03d/%03d\n", idx, r_vec->irq_vector,
2648	       r_vec->irq_entry);
2649
2650	return 0;
2651}
2652
2653static void
2654nfp_net_cleanup_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec)
2655{
2656	irq_set_affinity_hint(r_vec->irq_vector, NULL);
2657	if (nn->dp.netdev)
2658		netif_napi_del(&r_vec->napi);
2659	else
2660		tasklet_disable(&r_vec->tasklet);
2661
2662	free_irq(r_vec->irq_vector, r_vec);
2663}
2664
2665/**
2666 * nfp_net_rss_write_itbl() - Write RSS indirection table to device
2667 * @nn:      NFP Net device to reconfigure
2668 */
2669void nfp_net_rss_write_itbl(struct nfp_net *nn)
2670{
2671	int i;
2672
2673	for (i = 0; i < NFP_NET_CFG_RSS_ITBL_SZ; i += 4)
2674		nn_writel(nn, NFP_NET_CFG_RSS_ITBL + i,
2675			  get_unaligned_le32(nn->rss_itbl + i));
2676}
2677
2678/**
2679 * nfp_net_rss_write_key() - Write RSS hash key to device
2680 * @nn:      NFP Net device to reconfigure
2681 */
2682void nfp_net_rss_write_key(struct nfp_net *nn)
2683{
2684	int i;
2685
2686	for (i = 0; i < nfp_net_rss_key_sz(nn); i += 4)
2687		nn_writel(nn, NFP_NET_CFG_RSS_KEY + i,
2688			  get_unaligned_le32(nn->rss_key + i));
2689}
2690
2691/**
2692 * nfp_net_coalesce_write_cfg() - Write irq coalescence configuration to HW
2693 * @nn:      NFP Net device to reconfigure
2694 */
2695void nfp_net_coalesce_write_cfg(struct nfp_net *nn)
2696{
2697	u8 i;
2698	u32 factor;
2699	u32 value;
2700
2701	/* Compute factor used to convert coalesce '_usecs' parameters to
2702	 * ME timestamp ticks.  There are 16 ME clock cycles for each timestamp
2703	 * count.
2704	 */
2705	factor = nn->tlv_caps.me_freq_mhz / 16;
2706
2707	/* copy RX interrupt coalesce parameters */
2708	value = (nn->rx_coalesce_max_frames << 16) |
2709		(factor * nn->rx_coalesce_usecs);
2710	for (i = 0; i < nn->dp.num_rx_rings; i++)
2711		nn_writel(nn, NFP_NET_CFG_RXR_IRQ_MOD(i), value);
2712
2713	/* copy TX interrupt coalesce parameters */
2714	value = (nn->tx_coalesce_max_frames << 16) |
2715		(factor * nn->tx_coalesce_usecs);
2716	for (i = 0; i < nn->dp.num_tx_rings; i++)
2717		nn_writel(nn, NFP_NET_CFG_TXR_IRQ_MOD(i), value);
2718}
2719
2720/**
2721 * nfp_net_write_mac_addr() - Write mac address to the device control BAR
2722 * @nn:      NFP Net device to reconfigure
2723 * @addr:    MAC address to write
2724 *
2725 * Writes the MAC address from the netdev to the device control BAR.  Does not
2726 * perform the required reconfig.  We do a bit of byte swapping dance because
2727 * firmware is LE.
2728 */
2729static void nfp_net_write_mac_addr(struct nfp_net *nn, const u8 *addr)
2730{
2731	nn_writel(nn, NFP_NET_CFG_MACADDR + 0, get_unaligned_be32(addr));
2732	nn_writew(nn, NFP_NET_CFG_MACADDR + 6, get_unaligned_be16(addr + 4));
2733}
2734
2735static void nfp_net_vec_clear_ring_data(struct nfp_net *nn, unsigned int idx)
2736{
2737	nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(idx), 0);
2738	nn_writeb(nn, NFP_NET_CFG_RXR_SZ(idx), 0);
2739	nn_writeb(nn, NFP_NET_CFG_RXR_VEC(idx), 0);
2740
2741	nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), 0);
2742	nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), 0);
2743	nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), 0);
2744}
2745
2746/**
2747 * nfp_net_clear_config_and_disable() - Clear control BAR and disable NFP
2748 * @nn:      NFP Net device to reconfigure
2749 *
2750 * Warning: must be fully idempotent.
2751 */
2752static void nfp_net_clear_config_and_disable(struct nfp_net *nn)
2753{
2754	u32 new_ctrl, update;
2755	unsigned int r;
2756	int err;
2757
2758	new_ctrl = nn->dp.ctrl;
2759	new_ctrl &= ~NFP_NET_CFG_CTRL_ENABLE;
2760	update = NFP_NET_CFG_UPDATE_GEN;
2761	update |= NFP_NET_CFG_UPDATE_MSIX;
2762	update |= NFP_NET_CFG_UPDATE_RING;
2763
2764	if (nn->cap & NFP_NET_CFG_CTRL_RINGCFG)
2765		new_ctrl &= ~NFP_NET_CFG_CTRL_RINGCFG;
2766
2767	nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, 0);
2768	nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, 0);
 
 
2769
2770	nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
2771	err = nfp_net_reconfig(nn, update);
2772	if (err)
2773		nn_err(nn, "Could not disable device: %d\n", err);
2774
2775	for (r = 0; r < nn->dp.num_rx_rings; r++)
 
 
 
 
 
 
 
 
 
 
 
 
 
2776		nfp_net_rx_ring_reset(&nn->dp.rx_rings[r]);
 
 
 
2777	for (r = 0; r < nn->dp.num_tx_rings; r++)
2778		nfp_net_tx_ring_reset(&nn->dp, &nn->dp.tx_rings[r]);
2779	for (r = 0; r < nn->dp.num_r_vecs; r++)
2780		nfp_net_vec_clear_ring_data(nn, r);
2781
2782	nn->dp.ctrl = new_ctrl;
2783}
2784
2785static void
2786nfp_net_rx_ring_hw_cfg_write(struct nfp_net *nn,
2787			     struct nfp_net_rx_ring *rx_ring, unsigned int idx)
2788{
2789	/* Write the DMA address, size and MSI-X info to the device */
2790	nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(idx), rx_ring->dma);
2791	nn_writeb(nn, NFP_NET_CFG_RXR_SZ(idx), ilog2(rx_ring->cnt));
2792	nn_writeb(nn, NFP_NET_CFG_RXR_VEC(idx), rx_ring->r_vec->irq_entry);
2793}
2794
2795static void
2796nfp_net_tx_ring_hw_cfg_write(struct nfp_net *nn,
2797			     struct nfp_net_tx_ring *tx_ring, unsigned int idx)
2798{
2799	nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), tx_ring->dma);
2800	nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), ilog2(tx_ring->cnt));
2801	nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), tx_ring->r_vec->irq_entry);
2802}
2803
2804/**
2805 * nfp_net_set_config_and_enable() - Write control BAR and enable NFP
2806 * @nn:      NFP Net device to reconfigure
2807 */
2808static int nfp_net_set_config_and_enable(struct nfp_net *nn)
2809{
2810	u32 bufsz, new_ctrl, update = 0;
2811	unsigned int r;
2812	int err;
2813
2814	new_ctrl = nn->dp.ctrl;
 
2815
2816	if (nn->dp.ctrl & NFP_NET_CFG_CTRL_RSS_ANY) {
2817		nfp_net_rss_write_key(nn);
2818		nfp_net_rss_write_itbl(nn);
2819		nn_writel(nn, NFP_NET_CFG_RSS_CTRL, nn->rss_cfg);
2820		update |= NFP_NET_CFG_UPDATE_RSS;
2821	}
2822
2823	if (nn->dp.ctrl & NFP_NET_CFG_CTRL_IRQMOD) {
2824		nfp_net_coalesce_write_cfg(nn);
2825		update |= NFP_NET_CFG_UPDATE_IRQMOD;
2826	}
2827
2828	for (r = 0; r < nn->dp.num_tx_rings; r++)
2829		nfp_net_tx_ring_hw_cfg_write(nn, &nn->dp.tx_rings[r], r);
2830	for (r = 0; r < nn->dp.num_rx_rings; r++)
2831		nfp_net_rx_ring_hw_cfg_write(nn, &nn->dp.rx_rings[r], r);
2832
2833	nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, nn->dp.num_tx_rings == 64 ?
2834		  0xffffffffffffffffULL : ((u64)1 << nn->dp.num_tx_rings) - 1);
2835
2836	nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, nn->dp.num_rx_rings == 64 ?
2837		  0xffffffffffffffffULL : ((u64)1 << nn->dp.num_rx_rings) - 1);
2838
2839	if (nn->dp.netdev)
2840		nfp_net_write_mac_addr(nn, nn->dp.netdev->dev_addr);
2841
2842	nn_writel(nn, NFP_NET_CFG_MTU, nn->dp.mtu);
2843
2844	bufsz = nn->dp.fl_bufsz - nn->dp.rx_dma_off - NFP_NET_RX_BUF_NON_DATA;
2845	nn_writel(nn, NFP_NET_CFG_FLBUFSZ, bufsz);
2846
2847	/* Enable device */
2848	new_ctrl |= NFP_NET_CFG_CTRL_ENABLE;
 
 
 
 
 
 
2849	update |= NFP_NET_CFG_UPDATE_GEN;
2850	update |= NFP_NET_CFG_UPDATE_MSIX;
2851	update |= NFP_NET_CFG_UPDATE_RING;
2852	if (nn->cap & NFP_NET_CFG_CTRL_RINGCFG)
2853		new_ctrl |= NFP_NET_CFG_CTRL_RINGCFG;
2854
 
 
 
2855	nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
 
2856	err = nfp_net_reconfig(nn, update);
2857	if (err) {
2858		nfp_net_clear_config_and_disable(nn);
2859		return err;
2860	}
2861
2862	nn->dp.ctrl = new_ctrl;
 
2863
2864	for (r = 0; r < nn->dp.num_rx_rings; r++)
2865		nfp_net_rx_ring_fill_freelist(&nn->dp, &nn->dp.rx_rings[r]);
2866
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2867	return 0;
2868}
2869
2870/**
2871 * nfp_net_close_stack() - Quiesce the stack (part of close)
2872 * @nn:	     NFP Net device to reconfigure
2873 */
2874static void nfp_net_close_stack(struct nfp_net *nn)
2875{
 
2876	unsigned int r;
2877
2878	disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
2879	netif_carrier_off(nn->dp.netdev);
2880	nn->link_up = false;
2881
2882	for (r = 0; r < nn->dp.num_r_vecs; r++) {
2883		disable_irq(nn->r_vecs[r].irq_vector);
2884		napi_disable(&nn->r_vecs[r].napi);
 
 
 
 
 
 
 
 
2885	}
2886
2887	netif_tx_disable(nn->dp.netdev);
2888}
2889
2890/**
2891 * nfp_net_close_free_all() - Free all runtime resources
2892 * @nn:      NFP Net device to reconfigure
2893 */
2894static void nfp_net_close_free_all(struct nfp_net *nn)
2895{
2896	unsigned int r;
2897
2898	nfp_net_tx_rings_free(&nn->dp);
2899	nfp_net_rx_rings_free(&nn->dp);
2900
2901	for (r = 0; r < nn->dp.num_r_vecs; r++)
2902		nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
2903
2904	nfp_net_aux_irq_free(nn, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX);
2905	nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX);
2906}
2907
2908/**
2909 * nfp_net_netdev_close() - Called when the device is downed
2910 * @netdev:      netdev structure
2911 */
2912static int nfp_net_netdev_close(struct net_device *netdev)
2913{
2914	struct nfp_net *nn = netdev_priv(netdev);
2915
2916	/* Step 1: Disable RX and TX rings from the Linux kernel perspective
2917	 */
2918	nfp_net_close_stack(nn);
2919
2920	/* Step 2: Tell NFP
2921	 */
 
 
 
2922	nfp_net_clear_config_and_disable(nn);
2923	nfp_port_configure(netdev, false);
2924
2925	/* Step 3: Free resources
2926	 */
2927	nfp_net_close_free_all(nn);
2928
2929	nn_dbg(nn, "%s down", netdev->name);
2930	return 0;
2931}
2932
2933void nfp_ctrl_close(struct nfp_net *nn)
2934{
2935	int r;
2936
2937	rtnl_lock();
2938
2939	for (r = 0; r < nn->dp.num_r_vecs; r++) {
2940		disable_irq(nn->r_vecs[r].irq_vector);
2941		tasklet_disable(&nn->r_vecs[r].tasklet);
2942	}
2943
2944	nfp_net_clear_config_and_disable(nn);
2945
2946	nfp_net_close_free_all(nn);
2947
2948	rtnl_unlock();
2949}
2950
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2951/**
2952 * nfp_net_open_stack() - Start the device from stack's perspective
2953 * @nn:      NFP Net device to reconfigure
2954 */
2955static void nfp_net_open_stack(struct nfp_net *nn)
2956{
 
2957	unsigned int r;
2958
2959	for (r = 0; r < nn->dp.num_r_vecs; r++) {
2960		napi_enable(&nn->r_vecs[r].napi);
2961		enable_irq(nn->r_vecs[r].irq_vector);
 
 
 
 
 
 
 
 
 
 
 
 
2962	}
2963
2964	netif_tx_wake_all_queues(nn->dp.netdev);
2965
2966	enable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
2967	nfp_net_read_link_status(nn);
2968}
2969
2970static int nfp_net_open_alloc_all(struct nfp_net *nn)
2971{
2972	int err, r;
2973
2974	err = nfp_net_aux_irq_request(nn, NFP_NET_CFG_EXN, "%s-exn",
2975				      nn->exn_name, sizeof(nn->exn_name),
2976				      NFP_NET_IRQ_EXN_IDX, nn->exn_handler);
2977	if (err)
2978		return err;
2979	err = nfp_net_aux_irq_request(nn, NFP_NET_CFG_LSC, "%s-lsc",
2980				      nn->lsc_name, sizeof(nn->lsc_name),
2981				      NFP_NET_IRQ_LSC_IDX, nn->lsc_handler);
2982	if (err)
2983		goto err_free_exn;
2984	disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
2985
2986	for (r = 0; r < nn->dp.num_r_vecs; r++) {
2987		err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r);
2988		if (err)
2989			goto err_cleanup_vec_p;
2990	}
2991
2992	err = nfp_net_rx_rings_prepare(nn, &nn->dp);
2993	if (err)
2994		goto err_cleanup_vec;
2995
2996	err = nfp_net_tx_rings_prepare(nn, &nn->dp);
2997	if (err)
2998		goto err_free_rx_rings;
2999
3000	for (r = 0; r < nn->max_r_vecs; r++)
3001		nfp_net_vector_assign_rings(&nn->dp, &nn->r_vecs[r], r);
3002
3003	return 0;
3004
3005err_free_rx_rings:
3006	nfp_net_rx_rings_free(&nn->dp);
3007err_cleanup_vec:
3008	r = nn->dp.num_r_vecs;
3009err_cleanup_vec_p:
3010	while (r--)
3011		nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
3012	nfp_net_aux_irq_free(nn, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX);
3013err_free_exn:
3014	nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX);
3015	return err;
3016}
3017
3018static int nfp_net_netdev_open(struct net_device *netdev)
3019{
3020	struct nfp_net *nn = netdev_priv(netdev);
3021	int err;
3022
3023	/* Step 1: Allocate resources for rings and the like
3024	 * - Request interrupts
3025	 * - Allocate RX and TX ring resources
3026	 * - Setup initial RSS table
3027	 */
3028	err = nfp_net_open_alloc_all(nn);
3029	if (err)
3030		return err;
3031
3032	err = netif_set_real_num_tx_queues(netdev, nn->dp.num_stack_tx_rings);
3033	if (err)
3034		goto err_free_all;
3035
3036	err = netif_set_real_num_rx_queues(netdev, nn->dp.num_rx_rings);
3037	if (err)
3038		goto err_free_all;
3039
3040	/* Step 2: Configure the NFP
3041	 * - Ifup the physical interface if it exists
3042	 * - Enable rings from 0 to tx_rings/rx_rings - 1.
3043	 * - Write MAC address (in case it changed)
3044	 * - Set the MTU
3045	 * - Set the Freelist buffer size
3046	 * - Enable the FW
3047	 */
3048	err = nfp_port_configure(netdev, true);
3049	if (err)
3050		goto err_free_all;
3051
3052	err = nfp_net_set_config_and_enable(nn);
3053	if (err)
3054		goto err_port_disable;
3055
3056	/* Step 3: Enable for kernel
3057	 * - put some freelist descriptors on each RX ring
3058	 * - enable NAPI on each ring
3059	 * - enable all TX queues
3060	 * - set link state
3061	 */
3062	nfp_net_open_stack(nn);
3063
3064	return 0;
3065
3066err_port_disable:
3067	nfp_port_configure(netdev, false);
3068err_free_all:
3069	nfp_net_close_free_all(nn);
3070	return err;
3071}
3072
3073int nfp_ctrl_open(struct nfp_net *nn)
3074{
3075	int err, r;
3076
3077	/* ring dumping depends on vNICs being opened/closed under rtnl */
3078	rtnl_lock();
3079
3080	err = nfp_net_open_alloc_all(nn);
3081	if (err)
3082		goto err_unlock;
3083
3084	err = nfp_net_set_config_and_enable(nn);
3085	if (err)
3086		goto err_free_all;
3087
3088	for (r = 0; r < nn->dp.num_r_vecs; r++)
3089		enable_irq(nn->r_vecs[r].irq_vector);
3090
3091	rtnl_unlock();
3092
3093	return 0;
3094
3095err_free_all:
3096	nfp_net_close_free_all(nn);
3097err_unlock:
3098	rtnl_unlock();
3099	return err;
3100}
3101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3102static void nfp_net_set_rx_mode(struct net_device *netdev)
3103{
3104	struct nfp_net *nn = netdev_priv(netdev);
3105	u32 new_ctrl;
3106
3107	new_ctrl = nn->dp.ctrl;
 
3108
3109	if (!netdev_mc_empty(netdev) || netdev->flags & IFF_ALLMULTI)
3110		new_ctrl |= nn->cap & NFP_NET_CFG_CTRL_L2MC;
3111	else
3112		new_ctrl &= ~NFP_NET_CFG_CTRL_L2MC;
3113
 
 
 
 
 
3114	if (netdev->flags & IFF_PROMISC) {
3115		if (nn->cap & NFP_NET_CFG_CTRL_PROMISC)
3116			new_ctrl |= NFP_NET_CFG_CTRL_PROMISC;
3117		else
3118			nn_warn(nn, "FW does not support promiscuous mode\n");
3119	} else {
3120		new_ctrl &= ~NFP_NET_CFG_CTRL_PROMISC;
3121	}
3122
3123	if (new_ctrl == nn->dp.ctrl)
 
 
 
 
3124		return;
3125
3126	nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
 
 
 
3127	nfp_net_reconfig_post(nn, NFP_NET_CFG_UPDATE_GEN);
3128
3129	nn->dp.ctrl = new_ctrl;
 
3130}
3131
3132static void nfp_net_rss_init_itbl(struct nfp_net *nn)
3133{
3134	int i;
3135
3136	for (i = 0; i < sizeof(nn->rss_itbl); i++)
3137		nn->rss_itbl[i] =
3138			ethtool_rxfh_indir_default(i, nn->dp.num_rx_rings);
3139}
3140
3141static void nfp_net_dp_swap(struct nfp_net *nn, struct nfp_net_dp *dp)
3142{
3143	struct nfp_net_dp new_dp = *dp;
3144
3145	*dp = nn->dp;
3146	nn->dp = new_dp;
3147
3148	nn->dp.netdev->mtu = new_dp.mtu;
3149
3150	if (!netif_is_rxfh_configured(nn->dp.netdev))
3151		nfp_net_rss_init_itbl(nn);
3152}
3153
3154static int nfp_net_dp_swap_enable(struct nfp_net *nn, struct nfp_net_dp *dp)
3155{
3156	unsigned int r;
3157	int err;
3158
3159	nfp_net_dp_swap(nn, dp);
3160
3161	for (r = 0; r <	nn->max_r_vecs; r++)
3162		nfp_net_vector_assign_rings(&nn->dp, &nn->r_vecs[r], r);
3163
3164	err = netif_set_real_num_rx_queues(nn->dp.netdev, nn->dp.num_rx_rings);
 
 
3165	if (err)
3166		return err;
3167
3168	if (nn->dp.netdev->real_num_tx_queues != nn->dp.num_stack_tx_rings) {
3169		err = netif_set_real_num_tx_queues(nn->dp.netdev,
3170						   nn->dp.num_stack_tx_rings);
3171		if (err)
3172			return err;
3173	}
3174
3175	return nfp_net_set_config_and_enable(nn);
3176}
3177
3178struct nfp_net_dp *nfp_net_clone_dp(struct nfp_net *nn)
3179{
3180	struct nfp_net_dp *new;
3181
3182	new = kmalloc(sizeof(*new), GFP_KERNEL);
3183	if (!new)
3184		return NULL;
3185
3186	*new = nn->dp;
3187
 
 
 
 
 
 
 
 
 
3188	/* Clear things which need to be recomputed */
3189	new->fl_bufsz = 0;
3190	new->tx_rings = NULL;
3191	new->rx_rings = NULL;
3192	new->num_r_vecs = 0;
3193	new->num_stack_tx_rings = 0;
 
 
3194
3195	return new;
3196}
3197
 
 
 
 
 
 
3198static int
3199nfp_net_check_config(struct nfp_net *nn, struct nfp_net_dp *dp,
3200		     struct netlink_ext_ack *extack)
3201{
 
 
3202	/* XDP-enabled tests */
3203	if (!dp->xdp_prog)
3204		return 0;
3205	if (dp->fl_bufsz > PAGE_SIZE) {
3206		NL_SET_ERR_MSG_MOD(extack, "MTU too large w/ XDP enabled");
3207		return -EINVAL;
3208	}
3209	if (dp->num_tx_rings > nn->max_tx_rings) {
3210		NL_SET_ERR_MSG_MOD(extack, "Insufficient number of TX rings w/ XDP enabled");
3211		return -EINVAL;
3212	}
3213
 
 
 
 
 
 
 
 
 
 
 
 
3214	return 0;
3215}
3216
3217int nfp_net_ring_reconfig(struct nfp_net *nn, struct nfp_net_dp *dp,
3218			  struct netlink_ext_ack *extack)
3219{
3220	int r, err;
3221
3222	dp->fl_bufsz = nfp_net_calc_fl_bufsz(dp);
3223
3224	dp->num_stack_tx_rings = dp->num_tx_rings;
3225	if (dp->xdp_prog)
3226		dp->num_stack_tx_rings -= dp->num_rx_rings;
3227
3228	dp->num_r_vecs = max(dp->num_rx_rings, dp->num_stack_tx_rings);
3229
3230	err = nfp_net_check_config(nn, dp, extack);
3231	if (err)
3232		goto exit_free_dp;
3233
3234	if (!netif_running(dp->netdev)) {
3235		nfp_net_dp_swap(nn, dp);
3236		err = 0;
3237		goto exit_free_dp;
3238	}
3239
3240	/* Prepare new rings */
3241	for (r = nn->dp.num_r_vecs; r < dp->num_r_vecs; r++) {
3242		err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r);
3243		if (err) {
3244			dp->num_r_vecs = r;
3245			goto err_cleanup_vecs;
3246		}
3247	}
3248
3249	err = nfp_net_rx_rings_prepare(nn, dp);
3250	if (err)
3251		goto err_cleanup_vecs;
3252
3253	err = nfp_net_tx_rings_prepare(nn, dp);
3254	if (err)
3255		goto err_free_rx;
3256
3257	/* Stop device, swap in new rings, try to start the firmware */
3258	nfp_net_close_stack(nn);
3259	nfp_net_clear_config_and_disable(nn);
3260
3261	err = nfp_net_dp_swap_enable(nn, dp);
3262	if (err) {
3263		int err2;
3264
3265		nfp_net_clear_config_and_disable(nn);
3266
3267		/* Try with old configuration and old rings */
3268		err2 = nfp_net_dp_swap_enable(nn, dp);
3269		if (err2)
3270			nn_err(nn, "Can't restore ring config - FW communication failed (%d,%d)\n",
3271			       err, err2);
3272	}
3273	for (r = dp->num_r_vecs - 1; r >= nn->dp.num_r_vecs; r--)
3274		nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
3275
3276	nfp_net_rx_rings_free(dp);
3277	nfp_net_tx_rings_free(dp);
3278
3279	nfp_net_open_stack(nn);
3280exit_free_dp:
3281	kfree(dp);
3282
3283	return err;
3284
3285err_free_rx:
3286	nfp_net_rx_rings_free(dp);
3287err_cleanup_vecs:
3288	for (r = dp->num_r_vecs - 1; r >= nn->dp.num_r_vecs; r--)
3289		nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
3290	kfree(dp);
3291	return err;
3292}
3293
3294static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu)
3295{
3296	struct nfp_net *nn = netdev_priv(netdev);
3297	struct nfp_net_dp *dp;
3298	int err;
3299
3300	err = nfp_app_check_mtu(nn->app, netdev, new_mtu);
3301	if (err)
3302		return err;
3303
3304	dp = nfp_net_clone_dp(nn);
3305	if (!dp)
3306		return -ENOMEM;
3307
3308	dp->mtu = new_mtu;
3309
3310	return nfp_net_ring_reconfig(nn, dp, NULL);
3311}
3312
3313static int
3314nfp_net_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
3315{
3316	const u32 cmd = NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_ADD;
3317	struct nfp_net *nn = netdev_priv(netdev);
3318	int err;
3319
3320	/* Priority tagged packets with vlan id 0 are processed by the
3321	 * NFP as untagged packets
3322	 */
3323	if (!vid)
3324		return 0;
3325
3326	err = nfp_net_mbox_lock(nn, NFP_NET_CFG_VLAN_FILTER_SZ);
3327	if (err)
3328		return err;
3329
3330	nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_VID, vid);
3331	nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_PROTO,
3332		  ETH_P_8021Q);
3333
3334	return nfp_net_mbox_reconfig_and_unlock(nn, cmd);
3335}
3336
3337static int
3338nfp_net_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
3339{
3340	const u32 cmd = NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_KILL;
3341	struct nfp_net *nn = netdev_priv(netdev);
3342	int err;
3343
3344	/* Priority tagged packets with vlan id 0 are processed by the
3345	 * NFP as untagged packets
3346	 */
3347	if (!vid)
3348		return 0;
3349
3350	err = nfp_net_mbox_lock(nn, NFP_NET_CFG_VLAN_FILTER_SZ);
3351	if (err)
3352		return err;
3353
3354	nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_VID, vid);
3355	nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_PROTO,
3356		  ETH_P_8021Q);
3357
3358	return nfp_net_mbox_reconfig_and_unlock(nn, cmd);
3359}
3360
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3361static void nfp_net_stat64(struct net_device *netdev,
3362			   struct rtnl_link_stats64 *stats)
3363{
3364	struct nfp_net *nn = netdev_priv(netdev);
3365	int r;
3366
3367	/* Collect software stats */
3368	for (r = 0; r < nn->max_r_vecs; r++) {
3369		struct nfp_net_r_vector *r_vec = &nn->r_vecs[r];
3370		u64 data[3];
3371		unsigned int start;
3372
3373		do {
3374			start = u64_stats_fetch_begin(&r_vec->rx_sync);
3375			data[0] = r_vec->rx_pkts;
3376			data[1] = r_vec->rx_bytes;
3377			data[2] = r_vec->rx_drops;
3378		} while (u64_stats_fetch_retry(&r_vec->rx_sync, start));
3379		stats->rx_packets += data[0];
3380		stats->rx_bytes += data[1];
3381		stats->rx_dropped += data[2];
3382
3383		do {
3384			start = u64_stats_fetch_begin(&r_vec->tx_sync);
3385			data[0] = r_vec->tx_pkts;
3386			data[1] = r_vec->tx_bytes;
3387			data[2] = r_vec->tx_errors;
3388		} while (u64_stats_fetch_retry(&r_vec->tx_sync, start));
3389		stats->tx_packets += data[0];
3390		stats->tx_bytes += data[1];
3391		stats->tx_errors += data[2];
3392	}
3393
3394	/* Add in device stats */
3395	stats->multicast += nn_readq(nn, NFP_NET_CFG_STATS_RX_MC_FRAMES);
3396	stats->rx_dropped += nn_readq(nn, NFP_NET_CFG_STATS_RX_DISCARDS);
3397	stats->rx_errors += nn_readq(nn, NFP_NET_CFG_STATS_RX_ERRORS);
3398
3399	stats->tx_dropped += nn_readq(nn, NFP_NET_CFG_STATS_TX_DISCARDS);
3400	stats->tx_errors += nn_readq(nn, NFP_NET_CFG_STATS_TX_ERRORS);
3401}
3402
3403static int nfp_net_set_features(struct net_device *netdev,
3404				netdev_features_t features)
3405{
3406	netdev_features_t changed = netdev->features ^ features;
3407	struct nfp_net *nn = netdev_priv(netdev);
3408	u32 new_ctrl;
3409	int err;
3410
3411	/* Assume this is not called with features we have not advertised */
3412
3413	new_ctrl = nn->dp.ctrl;
3414
3415	if (changed & NETIF_F_RXCSUM) {
3416		if (features & NETIF_F_RXCSUM)
3417			new_ctrl |= nn->cap & NFP_NET_CFG_CTRL_RXCSUM_ANY;
3418		else
3419			new_ctrl &= ~NFP_NET_CFG_CTRL_RXCSUM_ANY;
3420	}
3421
3422	if (changed & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) {
3423		if (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))
3424			new_ctrl |= NFP_NET_CFG_CTRL_TXCSUM;
3425		else
3426			new_ctrl &= ~NFP_NET_CFG_CTRL_TXCSUM;
3427	}
3428
3429	if (changed & (NETIF_F_TSO | NETIF_F_TSO6)) {
3430		if (features & (NETIF_F_TSO | NETIF_F_TSO6))
3431			new_ctrl |= nn->cap & NFP_NET_CFG_CTRL_LSO2 ?:
3432					      NFP_NET_CFG_CTRL_LSO;
3433		else
3434			new_ctrl &= ~NFP_NET_CFG_CTRL_LSO_ANY;
3435	}
3436
3437	if (changed & NETIF_F_HW_VLAN_CTAG_RX) {
3438		if (features & NETIF_F_HW_VLAN_CTAG_RX)
3439			new_ctrl |= NFP_NET_CFG_CTRL_RXVLAN;
 
3440		else
3441			new_ctrl &= ~NFP_NET_CFG_CTRL_RXVLAN;
3442	}
3443
3444	if (changed & NETIF_F_HW_VLAN_CTAG_TX) {
3445		if (features & NETIF_F_HW_VLAN_CTAG_TX)
3446			new_ctrl |= NFP_NET_CFG_CTRL_TXVLAN;
 
3447		else
3448			new_ctrl &= ~NFP_NET_CFG_CTRL_TXVLAN;
3449	}
3450
3451	if (changed & NETIF_F_HW_VLAN_CTAG_FILTER) {
3452		if (features & NETIF_F_HW_VLAN_CTAG_FILTER)
3453			new_ctrl |= NFP_NET_CFG_CTRL_CTAG_FILTER;
3454		else
3455			new_ctrl &= ~NFP_NET_CFG_CTRL_CTAG_FILTER;
3456	}
3457
 
 
 
 
 
 
 
3458	if (changed & NETIF_F_SG) {
3459		if (features & NETIF_F_SG)
3460			new_ctrl |= NFP_NET_CFG_CTRL_GATHER;
3461		else
3462			new_ctrl &= ~NFP_NET_CFG_CTRL_GATHER;
3463	}
3464
3465	err = nfp_port_set_features(netdev, features);
3466	if (err)
3467		return err;
3468
3469	nn_dbg(nn, "Feature change 0x%llx -> 0x%llx (changed=0x%llx)\n",
3470	       netdev->features, features, changed);
3471
3472	if (new_ctrl == nn->dp.ctrl)
3473		return 0;
3474
3475	nn_dbg(nn, "NIC ctrl: 0x%x -> 0x%x\n", nn->dp.ctrl, new_ctrl);
3476	nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
3477	err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN);
3478	if (err)
3479		return err;
3480
3481	nn->dp.ctrl = new_ctrl;
3482
3483	return 0;
3484}
3485
3486static netdev_features_t
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3487nfp_net_features_check(struct sk_buff *skb, struct net_device *dev,
3488		       netdev_features_t features)
3489{
3490	u8 l4_hdr;
3491
3492	/* We can't do TSO over double tagged packets (802.1AD) */
3493	features &= vlan_features_check(skb, features);
3494
3495	if (!skb->encapsulation)
3496		return features;
3497
3498	/* Ensure that inner L4 header offset fits into TX descriptor field */
3499	if (skb_is_gso(skb)) {
3500		u32 hdrlen;
3501
3502		hdrlen = skb_inner_transport_header(skb) - skb->data +
3503			inner_tcp_hdrlen(skb);
 
 
3504
3505		/* Assume worst case scenario of having longest possible
3506		 * metadata prepend - 8B
3507		 */
3508		if (unlikely(hdrlen > NFP_NET_LSO_MAX_HDR_SZ - 8))
3509			features &= ~NETIF_F_GSO_MASK;
3510	}
3511
 
 
 
3512	/* VXLAN/GRE check */
3513	switch (vlan_get_protocol(skb)) {
3514	case htons(ETH_P_IP):
3515		l4_hdr = ip_hdr(skb)->protocol;
3516		break;
3517	case htons(ETH_P_IPV6):
3518		l4_hdr = ipv6_hdr(skb)->nexthdr;
3519		break;
3520	default:
3521		return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
3522	}
3523
3524	if (skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
3525	    skb->inner_protocol != htons(ETH_P_TEB) ||
3526	    (l4_hdr != IPPROTO_UDP && l4_hdr != IPPROTO_GRE) ||
3527	    (l4_hdr == IPPROTO_UDP &&
3528	     (skb_inner_mac_header(skb) - skb_transport_header(skb) !=
3529	      sizeof(struct udphdr) + sizeof(struct vxlanhdr))))
3530		return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
3531
3532	return features;
3533}
3534
3535static int
3536nfp_net_get_phys_port_name(struct net_device *netdev, char *name, size_t len)
3537{
3538	struct nfp_net *nn = netdev_priv(netdev);
3539	int n;
3540
3541	/* If port is defined, devlink_port is registered and devlink core
3542	 * is taking care of name formatting.
3543	 */
3544	if (nn->port)
3545		return -EOPNOTSUPP;
3546
3547	if (nn->dp.is_vf || nn->vnic_no_name)
3548		return -EOPNOTSUPP;
3549
3550	n = snprintf(name, len, "n%d", nn->id);
3551	if (n >= len)
3552		return -EINVAL;
3553
3554	return 0;
3555}
3556
3557static int nfp_net_xdp_setup_drv(struct nfp_net *nn, struct netdev_bpf *bpf)
3558{
3559	struct bpf_prog *prog = bpf->prog;
3560	struct nfp_net_dp *dp;
3561	int err;
3562
3563	if (!prog == !nn->dp.xdp_prog) {
3564		WRITE_ONCE(nn->dp.xdp_prog, prog);
3565		xdp_attachment_setup(&nn->xdp, bpf);
3566		return 0;
3567	}
3568
3569	dp = nfp_net_clone_dp(nn);
3570	if (!dp)
3571		return -ENOMEM;
3572
3573	dp->xdp_prog = prog;
3574	dp->num_tx_rings += prog ? nn->dp.num_rx_rings : -nn->dp.num_rx_rings;
3575	dp->rx_dma_dir = prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
3576	dp->rx_dma_off = prog ? XDP_PACKET_HEADROOM - nn->dp.rx_offset : 0;
3577
3578	/* We need RX reconfig to remap the buffers (BIDIR vs FROM_DEV) */
3579	err = nfp_net_ring_reconfig(nn, dp, bpf->extack);
3580	if (err)
3581		return err;
3582
3583	xdp_attachment_setup(&nn->xdp, bpf);
3584	return 0;
3585}
3586
3587static int nfp_net_xdp_setup_hw(struct nfp_net *nn, struct netdev_bpf *bpf)
3588{
3589	int err;
3590
3591	err = nfp_app_xdp_offload(nn->app, nn, bpf->prog, bpf->extack);
3592	if (err)
3593		return err;
3594
3595	xdp_attachment_setup(&nn->xdp_hw, bpf);
3596	return 0;
3597}
3598
3599static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
3600{
3601	struct nfp_net *nn = netdev_priv(netdev);
3602
3603	switch (xdp->command) {
3604	case XDP_SETUP_PROG:
3605		return nfp_net_xdp_setup_drv(nn, xdp);
3606	case XDP_SETUP_PROG_HW:
3607		return nfp_net_xdp_setup_hw(nn, xdp);
 
 
 
3608	default:
3609		return nfp_app_bpf(nn->app, nn, xdp);
3610	}
3611}
3612
3613static int nfp_net_set_mac_address(struct net_device *netdev, void *addr)
3614{
3615	struct nfp_net *nn = netdev_priv(netdev);
3616	struct sockaddr *saddr = addr;
3617	int err;
3618
3619	err = eth_prepare_mac_addr_change(netdev, addr);
3620	if (err)
3621		return err;
3622
3623	nfp_net_write_mac_addr(nn, saddr->sa_data);
3624
3625	err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_MACADDR);
3626	if (err)
3627		return err;
3628
3629	eth_commit_mac_addr_change(netdev, addr);
3630
3631	return 0;
3632}
3633
3634const struct net_device_ops nfp_net_netdev_ops = {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3635	.ndo_init		= nfp_app_ndo_init,
3636	.ndo_uninit		= nfp_app_ndo_uninit,
3637	.ndo_open		= nfp_net_netdev_open,
3638	.ndo_stop		= nfp_net_netdev_close,
3639	.ndo_start_xmit		= nfp_net_tx,
3640	.ndo_get_stats64	= nfp_net_stat64,
3641	.ndo_vlan_rx_add_vid	= nfp_net_vlan_rx_add_vid,
3642	.ndo_vlan_rx_kill_vid	= nfp_net_vlan_rx_kill_vid,
3643	.ndo_set_vf_mac         = nfp_app_set_vf_mac,
3644	.ndo_set_vf_vlan        = nfp_app_set_vf_vlan,
 
3645	.ndo_set_vf_spoofchk    = nfp_app_set_vf_spoofchk,
3646	.ndo_set_vf_trust	= nfp_app_set_vf_trust,
3647	.ndo_get_vf_config	= nfp_app_get_vf_config,
3648	.ndo_set_vf_link_state  = nfp_app_set_vf_link_state,
3649	.ndo_setup_tc		= nfp_port_setup_tc,
3650	.ndo_tx_timeout		= nfp_net_tx_timeout,
3651	.ndo_set_rx_mode	= nfp_net_set_rx_mode,
3652	.ndo_change_mtu		= nfp_net_change_mtu,
3653	.ndo_set_mac_address	= nfp_net_set_mac_address,
3654	.ndo_set_features	= nfp_net_set_features,
 
3655	.ndo_features_check	= nfp_net_features_check,
3656	.ndo_get_phys_port_name	= nfp_net_get_phys_port_name,
3657	.ndo_bpf		= nfp_net_xdp,
3658	.ndo_get_devlink_port	= nfp_devlink_get_devlink_port,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3659};
3660
3661static int nfp_udp_tunnel_sync(struct net_device *netdev, unsigned int table)
3662{
3663	struct nfp_net *nn = netdev_priv(netdev);
3664	int i;
3665
3666	BUILD_BUG_ON(NFP_NET_N_VXLAN_PORTS & 1);
3667	for (i = 0; i < NFP_NET_N_VXLAN_PORTS; i += 2) {
3668		struct udp_tunnel_info ti0, ti1;
3669
3670		udp_tunnel_nic_get_port(netdev, table, i, &ti0);
3671		udp_tunnel_nic_get_port(netdev, table, i + 1, &ti1);
3672
3673		nn_writel(nn, NFP_NET_CFG_VXLAN_PORT + i * sizeof(ti0.port),
3674			  be16_to_cpu(ti1.port) << 16 | be16_to_cpu(ti0.port));
3675	}
3676
3677	return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_VXLAN);
3678}
3679
3680static const struct udp_tunnel_nic_info nfp_udp_tunnels = {
3681	.sync_table     = nfp_udp_tunnel_sync,
3682	.flags          = UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
3683			  UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
3684	.tables         = {
3685		{
3686			.n_entries      = NFP_NET_N_VXLAN_PORTS,
3687			.tunnel_types   = UDP_TUNNEL_TYPE_VXLAN,
3688		},
3689	},
3690};
3691
3692/**
3693 * nfp_net_info() - Print general info about the NIC
3694 * @nn:      NFP Net device to reconfigure
3695 */
3696void nfp_net_info(struct nfp_net *nn)
3697{
3698	nn_info(nn, "Netronome NFP-6xxx %sNetdev: TxQs=%d/%d RxQs=%d/%d\n",
3699		nn->dp.is_vf ? "VF " : "",
3700		nn->dp.num_tx_rings, nn->max_tx_rings,
3701		nn->dp.num_rx_rings, nn->max_rx_rings);
3702	nn_info(nn, "VER: %d.%d.%d.%d, Maximum supported MTU: %d\n",
3703		nn->fw_ver.resv, nn->fw_ver.class,
3704		nn->fw_ver.major, nn->fw_ver.minor,
3705		nn->max_mtu);
3706	nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3707		nn->cap,
3708		nn->cap & NFP_NET_CFG_CTRL_PROMISC  ? "PROMISC "  : "",
3709		nn->cap & NFP_NET_CFG_CTRL_L2BC     ? "L2BCFILT " : "",
3710		nn->cap & NFP_NET_CFG_CTRL_L2MC     ? "L2MCFILT " : "",
3711		nn->cap & NFP_NET_CFG_CTRL_RXCSUM   ? "RXCSUM "   : "",
3712		nn->cap & NFP_NET_CFG_CTRL_TXCSUM   ? "TXCSUM "   : "",
3713		nn->cap & NFP_NET_CFG_CTRL_RXVLAN   ? "RXVLAN "   : "",
3714		nn->cap & NFP_NET_CFG_CTRL_TXVLAN   ? "TXVLAN "   : "",
 
 
 
3715		nn->cap & NFP_NET_CFG_CTRL_SCATTER  ? "SCATTER "  : "",
3716		nn->cap & NFP_NET_CFG_CTRL_GATHER   ? "GATHER "   : "",
3717		nn->cap & NFP_NET_CFG_CTRL_LSO      ? "TSO1 "     : "",
3718		nn->cap & NFP_NET_CFG_CTRL_LSO2     ? "TSO2 "     : "",
3719		nn->cap & NFP_NET_CFG_CTRL_RSS      ? "RSS1 "     : "",
3720		nn->cap & NFP_NET_CFG_CTRL_RSS2     ? "RSS2 "     : "",
3721		nn->cap & NFP_NET_CFG_CTRL_CTAG_FILTER ? "CTAG_FILTER " : "",
3722		nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO ? "AUTOMASK " : "",
3723		nn->cap & NFP_NET_CFG_CTRL_IRQMOD   ? "IRQMOD "   : "",
 
 
3724		nn->cap & NFP_NET_CFG_CTRL_VXLAN    ? "VXLAN "    : "",
3725		nn->cap & NFP_NET_CFG_CTRL_NVGRE    ? "NVGRE "	  : "",
3726		nn->cap & NFP_NET_CFG_CTRL_CSUM_COMPLETE ?
3727						      "RXCSUM_COMPLETE " : "",
3728		nn->cap & NFP_NET_CFG_CTRL_LIVE_ADDR ? "LIVE_ADDR " : "",
 
 
3729		nfp_app_extra_cap(nn->app, nn));
3730}
3731
3732/**
3733 * nfp_net_alloc() - Allocate netdev and related structure
3734 * @pdev:         PCI device
 
3735 * @ctrl_bar:     PCI IOMEM with vNIC config memory
3736 * @needs_netdev: Whether to allocate a netdev for this vNIC
3737 * @max_tx_rings: Maximum number of TX rings supported by device
3738 * @max_rx_rings: Maximum number of RX rings supported by device
3739 *
3740 * This function allocates a netdev device and fills in the initial
3741 * part of the @struct nfp_net structure.  In case of control device
3742 * nfp_net structure is allocated without the netdev.
3743 *
3744 * Return: NFP Net device structure, or ERR_PTR on error.
3745 */
3746struct nfp_net *
3747nfp_net_alloc(struct pci_dev *pdev, void __iomem *ctrl_bar, bool needs_netdev,
 
3748	      unsigned int max_tx_rings, unsigned int max_rx_rings)
3749{
 
3750	struct nfp_net *nn;
3751	int err;
3752
3753	if (needs_netdev) {
3754		struct net_device *netdev;
3755
3756		netdev = alloc_etherdev_mqs(sizeof(struct nfp_net),
3757					    max_tx_rings, max_rx_rings);
3758		if (!netdev)
3759			return ERR_PTR(-ENOMEM);
3760
3761		SET_NETDEV_DEV(netdev, &pdev->dev);
3762		nn = netdev_priv(netdev);
3763		nn->dp.netdev = netdev;
3764	} else {
3765		nn = vzalloc(sizeof(*nn));
3766		if (!nn)
3767			return ERR_PTR(-ENOMEM);
3768	}
3769
3770	nn->dp.dev = &pdev->dev;
3771	nn->dp.ctrl_bar = ctrl_bar;
 
3772	nn->pdev = pdev;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3773
3774	nn->max_tx_rings = max_tx_rings;
3775	nn->max_rx_rings = max_rx_rings;
3776
3777	nn->dp.num_tx_rings = min_t(unsigned int,
3778				    max_tx_rings, num_online_cpus());
3779	nn->dp.num_rx_rings = min_t(unsigned int, max_rx_rings,
3780				 netif_get_num_default_rss_queues());
3781
3782	nn->dp.num_r_vecs = max(nn->dp.num_tx_rings, nn->dp.num_rx_rings);
3783	nn->dp.num_r_vecs = min_t(unsigned int,
3784				  nn->dp.num_r_vecs, num_online_cpus());
 
 
 
 
 
 
 
 
3785
3786	nn->dp.txd_cnt = NFP_NET_TX_DESCS_DEFAULT;
3787	nn->dp.rxd_cnt = NFP_NET_RX_DESCS_DEFAULT;
3788
3789	sema_init(&nn->bar_lock, 1);
3790
3791	spin_lock_init(&nn->reconfig_lock);
3792	spin_lock_init(&nn->link_status_lock);
3793
3794	timer_setup(&nn->reconfig_timer, nfp_net_reconfig_timer, 0);
3795
3796	err = nfp_net_tlv_caps_parse(&nn->pdev->dev, nn->dp.ctrl_bar,
3797				     &nn->tlv_caps);
3798	if (err)
3799		goto err_free_nn;
3800
3801	err = nfp_ccm_mbox_alloc(nn);
3802	if (err)
3803		goto err_free_nn;
3804
3805	return nn;
3806
3807err_free_nn:
3808	if (nn->dp.netdev)
3809		free_netdev(nn->dp.netdev);
3810	else
3811		vfree(nn);
3812	return ERR_PTR(err);
3813}
3814
3815/**
3816 * nfp_net_free() - Undo what @nfp_net_alloc() did
3817 * @nn:      NFP Net device to reconfigure
3818 */
3819void nfp_net_free(struct nfp_net *nn)
3820{
3821	WARN_ON(timer_pending(&nn->reconfig_timer) || nn->reconfig_posted);
3822	nfp_ccm_mbox_free(nn);
3823
 
3824	if (nn->dp.netdev)
3825		free_netdev(nn->dp.netdev);
3826	else
3827		vfree(nn);
3828}
3829
3830/**
3831 * nfp_net_rss_key_sz() - Get current size of the RSS key
3832 * @nn:		NFP Net device instance
3833 *
3834 * Return: size of the RSS key for currently selected hash function.
3835 */
3836unsigned int nfp_net_rss_key_sz(struct nfp_net *nn)
3837{
3838	switch (nn->rss_hfunc) {
3839	case ETH_RSS_HASH_TOP:
3840		return NFP_NET_CFG_RSS_KEY_SZ;
3841	case ETH_RSS_HASH_XOR:
3842		return 0;
3843	case ETH_RSS_HASH_CRC32:
3844		return 4;
3845	}
3846
3847	nn_warn(nn, "Unknown hash function: %u\n", nn->rss_hfunc);
3848	return 0;
3849}
3850
3851/**
3852 * nfp_net_rss_init() - Set the initial RSS parameters
3853 * @nn:	     NFP Net device to reconfigure
3854 */
3855static void nfp_net_rss_init(struct nfp_net *nn)
3856{
3857	unsigned long func_bit, rss_cap_hfunc;
3858	u32 reg;
3859
3860	/* Read the RSS function capability and select first supported func */
3861	reg = nn_readl(nn, NFP_NET_CFG_RSS_CAP);
3862	rss_cap_hfunc =	FIELD_GET(NFP_NET_CFG_RSS_CAP_HFUNC, reg);
3863	if (!rss_cap_hfunc)
3864		rss_cap_hfunc =	FIELD_GET(NFP_NET_CFG_RSS_CAP_HFUNC,
3865					  NFP_NET_CFG_RSS_TOEPLITZ);
3866
3867	func_bit = find_first_bit(&rss_cap_hfunc, NFP_NET_CFG_RSS_HFUNCS);
3868	if (func_bit == NFP_NET_CFG_RSS_HFUNCS) {
3869		dev_warn(nn->dp.dev,
3870			 "Bad RSS config, defaulting to Toeplitz hash\n");
3871		func_bit = ETH_RSS_HASH_TOP_BIT;
3872	}
3873	nn->rss_hfunc = 1 << func_bit;
3874
3875	netdev_rss_key_fill(nn->rss_key, nfp_net_rss_key_sz(nn));
3876
3877	nfp_net_rss_init_itbl(nn);
3878
3879	/* Enable IPv4/IPv6 TCP by default */
3880	nn->rss_cfg = NFP_NET_CFG_RSS_IPV4_TCP |
3881		      NFP_NET_CFG_RSS_IPV6_TCP |
 
 
3882		      FIELD_PREP(NFP_NET_CFG_RSS_HFUNC, nn->rss_hfunc) |
3883		      NFP_NET_CFG_RSS_MASK;
3884}
3885
3886/**
3887 * nfp_net_irqmod_init() - Set the initial IRQ moderation parameters
3888 * @nn:	     NFP Net device to reconfigure
3889 */
3890static void nfp_net_irqmod_init(struct nfp_net *nn)
3891{
3892	nn->rx_coalesce_usecs      = 50;
3893	nn->rx_coalesce_max_frames = 64;
3894	nn->tx_coalesce_usecs      = 50;
3895	nn->tx_coalesce_max_frames = 64;
 
 
 
3896}
3897
3898static void nfp_net_netdev_init(struct nfp_net *nn)
3899{
3900	struct net_device *netdev = nn->dp.netdev;
3901
3902	nfp_net_write_mac_addr(nn, nn->dp.netdev->dev_addr);
3903
3904	netdev->mtu = nn->dp.mtu;
3905
3906	/* Advertise/enable offloads based on capabilities
3907	 *
3908	 * Note: netdev->features show the currently enabled features
3909	 * and netdev->hw_features advertises which features are
3910	 * supported.  By default we enable most features.
3911	 */
3912	if (nn->cap & NFP_NET_CFG_CTRL_LIVE_ADDR)
3913		netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
3914
3915	netdev->hw_features = NETIF_F_HIGHDMA;
3916	if (nn->cap & NFP_NET_CFG_CTRL_RXCSUM_ANY) {
3917		netdev->hw_features |= NETIF_F_RXCSUM;
3918		nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_RXCSUM_ANY;
3919	}
3920	if (nn->cap & NFP_NET_CFG_CTRL_TXCSUM) {
3921		netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
3922		nn->dp.ctrl |= NFP_NET_CFG_CTRL_TXCSUM;
3923	}
3924	if (nn->cap & NFP_NET_CFG_CTRL_GATHER) {
3925		netdev->hw_features |= NETIF_F_SG;
3926		nn->dp.ctrl |= NFP_NET_CFG_CTRL_GATHER;
3927	}
3928	if ((nn->cap & NFP_NET_CFG_CTRL_LSO && nn->fw_ver.major > 2) ||
3929	    nn->cap & NFP_NET_CFG_CTRL_LSO2) {
3930		netdev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6;
 
 
3931		nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_LSO2 ?:
3932					 NFP_NET_CFG_CTRL_LSO;
3933	}
3934	if (nn->cap & NFP_NET_CFG_CTRL_RSS_ANY)
3935		netdev->hw_features |= NETIF_F_RXHASH;
 
 
 
 
 
 
3936	if (nn->cap & NFP_NET_CFG_CTRL_VXLAN) {
3937		if (nn->cap & NFP_NET_CFG_CTRL_LSO)
3938			netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
 
 
 
 
3939		netdev->udp_tunnel_nic_info = &nfp_udp_tunnels;
3940		nn->dp.ctrl |= NFP_NET_CFG_CTRL_VXLAN;
3941	}
3942	if (nn->cap & NFP_NET_CFG_CTRL_NVGRE) {
3943		if (nn->cap & NFP_NET_CFG_CTRL_LSO)
3944			netdev->hw_features |= NETIF_F_GSO_GRE;
3945		nn->dp.ctrl |= NFP_NET_CFG_CTRL_NVGRE;
3946	}
3947	if (nn->cap & (NFP_NET_CFG_CTRL_VXLAN | NFP_NET_CFG_CTRL_NVGRE))
3948		netdev->hw_enc_features = netdev->hw_features;
3949
3950	netdev->vlan_features = netdev->hw_features;
3951
3952	if (nn->cap & NFP_NET_CFG_CTRL_RXVLAN) {
3953		netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
3954		nn->dp.ctrl |= NFP_NET_CFG_CTRL_RXVLAN;
 
3955	}
3956	if (nn->cap & NFP_NET_CFG_CTRL_TXVLAN) {
3957		if (nn->cap & NFP_NET_CFG_CTRL_LSO2) {
3958			nn_warn(nn, "Device advertises both TSO2 and TXVLAN. Refusing to enable TXVLAN.\n");
3959		} else {
3960			netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX;
3961			nn->dp.ctrl |= NFP_NET_CFG_CTRL_TXVLAN;
 
3962		}
3963	}
3964	if (nn->cap & NFP_NET_CFG_CTRL_CTAG_FILTER) {
3965		netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
3966		nn->dp.ctrl |= NFP_NET_CFG_CTRL_CTAG_FILTER;
3967	}
 
 
 
 
3968
3969	netdev->features = netdev->hw_features;
3970
3971	if (nfp_app_has_tc(nn->app) && nn->port)
3972		netdev->hw_features |= NETIF_F_HW_TC;
3973
3974	/* Advertise but disable TSO by default. */
3975	netdev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6);
3976	nn->dp.ctrl &= ~NFP_NET_CFG_CTRL_LSO_ANY;
 
 
 
 
 
 
3977
3978	/* Finalise the netdev setup */
3979	netdev->netdev_ops = &nfp_net_netdev_ops;
 
 
 
 
 
 
 
 
 
 
3980	netdev->watchdog_timeo = msecs_to_jiffies(5 * 1000);
3981
3982	/* MTU range: 68 - hw-specific max */
3983	netdev->min_mtu = ETH_MIN_MTU;
3984	netdev->max_mtu = nn->max_mtu;
3985
3986	netdev->gso_max_segs = NFP_NET_LSO_MAX_SEGS;
3987
3988	netif_carrier_off(netdev);
3989
3990	nfp_net_set_ethtool_ops(netdev);
3991}
3992
3993static int nfp_net_read_caps(struct nfp_net *nn)
3994{
3995	/* Get some of the read-only fields from the BAR */
3996	nn->cap = nn_readl(nn, NFP_NET_CFG_CAP);
 
3997	nn->max_mtu = nn_readl(nn, NFP_NET_CFG_MAX_MTU);
3998
3999	/* ABI 4.x and ctrl vNIC always use chained metadata, in other cases
4000	 * we allow use of non-chained metadata if RSS(v1) is the only
4001	 * advertised capability requiring metadata.
4002	 */
4003	nn->dp.chained_metadata_format = nn->fw_ver.major == 4 ||
4004					 !nn->dp.netdev ||
4005					 !(nn->cap & NFP_NET_CFG_CTRL_RSS) ||
4006					 nn->cap & NFP_NET_CFG_CTRL_CHAIN_META;
4007	/* RSS(v1) uses non-chained metadata format, except in ABI 4.x where
4008	 * it has the same meaning as RSSv2.
4009	 */
4010	if (nn->dp.chained_metadata_format && nn->fw_ver.major != 4)
4011		nn->cap &= ~NFP_NET_CFG_CTRL_RSS;
4012
4013	/* Determine RX packet/metadata boundary offset */
4014	if (nn->fw_ver.major >= 2) {
4015		u32 reg;
4016
4017		reg = nn_readl(nn, NFP_NET_CFG_RX_OFFSET);
4018		if (reg > NFP_NET_MAX_PREPEND) {
4019			nn_err(nn, "Invalid rx offset: %d\n", reg);
4020			return -EINVAL;
4021		}
4022		nn->dp.rx_offset = reg;
4023	} else {
4024		nn->dp.rx_offset = NFP_NET_RX_OFFSET;
4025	}
4026
 
 
 
4027	/* For control vNICs mask out the capabilities app doesn't want. */
4028	if (!nn->dp.netdev)
4029		nn->cap &= nn->app->type->ctrl_cap_mask;
4030
4031	return 0;
4032}
4033
4034/**
4035 * nfp_net_init() - Initialise/finalise the nfp_net structure
4036 * @nn:		NFP Net device structure
4037 *
4038 * Return: 0 on success or negative errno on error.
4039 */
4040int nfp_net_init(struct nfp_net *nn)
4041{
4042	int err;
4043
4044	nn->dp.rx_dma_dir = DMA_FROM_DEVICE;
4045
4046	err = nfp_net_read_caps(nn);
4047	if (err)
4048		return err;
4049
4050	/* Set default MTU and Freelist buffer size */
4051	if (!nfp_net_is_data_vnic(nn) && nn->app->ctrl_mtu) {
4052		nn->dp.mtu = min(nn->app->ctrl_mtu, nn->max_mtu);
4053	} else if (nn->max_mtu < NFP_NET_DEFAULT_MTU) {
4054		nn->dp.mtu = nn->max_mtu;
4055	} else {
4056		nn->dp.mtu = NFP_NET_DEFAULT_MTU;
4057	}
4058	nn->dp.fl_bufsz = nfp_net_calc_fl_bufsz(&nn->dp);
4059
4060	if (nfp_app_ctrl_uses_data_vnics(nn->app))
4061		nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_CMSG_DATA;
4062
4063	if (nn->cap & NFP_NET_CFG_CTRL_RSS_ANY) {
4064		nfp_net_rss_init(nn);
4065		nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_RSS2 ?:
4066					 NFP_NET_CFG_CTRL_RSS;
4067	}
4068
4069	/* Allow L2 Broadcast and Multicast through by default, if supported */
4070	if (nn->cap & NFP_NET_CFG_CTRL_L2BC)
4071		nn->dp.ctrl |= NFP_NET_CFG_CTRL_L2BC;
4072
4073	/* Allow IRQ moderation, if supported */
4074	if (nn->cap & NFP_NET_CFG_CTRL_IRQMOD) {
4075		nfp_net_irqmod_init(nn);
4076		nn->dp.ctrl |= NFP_NET_CFG_CTRL_IRQMOD;
4077	}
4078
 
 
 
 
 
 
 
4079	/* Stash the re-configuration queue away.  First odd queue in TX Bar */
4080	nn->qcp_cfg = nn->tx_bar + NFP_QCP_QUEUE_ADDR_SZ;
4081
4082	/* Make sure the FW knows the netdev is supposed to be disabled here */
4083	nn_writel(nn, NFP_NET_CFG_CTRL, 0);
4084	nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, 0);
4085	nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, 0);
 
4086	err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_RING |
4087				   NFP_NET_CFG_UPDATE_GEN);
4088	if (err)
4089		return err;
4090
4091	if (nn->dp.netdev) {
4092		nfp_net_netdev_init(nn);
4093
4094		err = nfp_ccm_mbox_init(nn);
4095		if (err)
4096			return err;
4097
4098		err = nfp_net_tls_init(nn);
4099		if (err)
4100			goto err_clean_mbox;
 
 
4101	}
4102
4103	nfp_net_vecs_init(nn);
4104
4105	if (!nn->dp.netdev)
4106		return 0;
 
 
 
 
 
 
 
4107	return register_netdev(nn->dp.netdev);
4108
4109err_clean_mbox:
4110	nfp_ccm_mbox_clean(nn);
4111	return err;
4112}
4113
4114/**
4115 * nfp_net_clean() - Undo what nfp_net_init() did.
4116 * @nn:		NFP Net device structure
4117 */
4118void nfp_net_clean(struct nfp_net *nn)
4119{
4120	if (!nn->dp.netdev)
4121		return;
4122
4123	unregister_netdev(nn->dp.netdev);
 
4124	nfp_ccm_mbox_clean(nn);
 
 
4125	nfp_net_reconfig_wait_posted(nn);
4126}