rx_common.c - drivers/net/ethernet/sfc/siena/rx_common.c - Linux source code v6.9.4

Note: File does not exist in v4.10.11.
   1// SPDX-License-Identifier: GPL-2.0-only
   2/****************************************************************************
   3 * Driver for Solarflare network controllers and boards
   4 * Copyright 2018 Solarflare Communications Inc.
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms of the GNU General Public License version 2 as published
   8 * by the Free Software Foundation, incorporated herein by reference.
   9 */
  10
  11#include "net_driver.h"
  12#include <linux/module.h>
  13#include <linux/iommu.h>
  14#include <net/rps.h>
  15#include "efx.h"
  16#include "nic.h"
  17#include "rx_common.h"
  18
  19/* This is the percentage fill level below which new RX descriptors
  20 * will be added to the RX descriptor ring.
  21 */
  22static unsigned int rx_refill_threshold;
  23module_param(rx_refill_threshold, uint, 0444);
  24MODULE_PARM_DESC(rx_refill_threshold,
  25		 "RX descriptor ring refill threshold (%)");
  26
  27/* RX maximum head room required.
  28 *
  29 * This must be at least 1 to prevent overflow, plus one packet-worth
  30 * to allow pipelined receives.
  31 */
  32#define EFX_RXD_HEAD_ROOM (1 + EFX_RX_MAX_FRAGS)
  33
  34static void efx_unmap_rx_buffer(struct efx_nic *efx,
  35				struct efx_rx_buffer *rx_buf);
  36
  37/* Check the RX page recycle ring for a page that can be reused. */
  38static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue)
  39{
  40	struct efx_nic *efx = rx_queue->efx;
  41	struct efx_rx_page_state *state;
  42	unsigned int index;
  43	struct page *page;
  44
  45	if (unlikely(!rx_queue->page_ring))
  46		return NULL;
  47	index = rx_queue->page_remove & rx_queue->page_ptr_mask;
  48	page = rx_queue->page_ring[index];
  49	if (page == NULL)
  50		return NULL;
  51
  52	rx_queue->page_ring[index] = NULL;
  53	/* page_remove cannot exceed page_add. */
  54	if (rx_queue->page_remove != rx_queue->page_add)
  55		++rx_queue->page_remove;
  56
  57	/* If page_count is 1 then we hold the only reference to this page. */
  58	if (page_count(page) == 1) {
  59		++rx_queue->page_recycle_count;
  60		return page;
  61	} else {
  62		state = page_address(page);
  63		dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
  64			       PAGE_SIZE << efx->rx_buffer_order,
  65			       DMA_FROM_DEVICE);
  66		put_page(page);
  67		++rx_queue->page_recycle_failed;
  68	}
  69
  70	return NULL;
  71}
  72
  73/* Attempt to recycle the page if there is an RX recycle ring; the page can
  74 * only be added if this is the final RX buffer, to prevent pages being used in
  75 * the descriptor ring and appearing in the recycle ring simultaneously.
  76 */
  77static void efx_recycle_rx_page(struct efx_channel *channel,
  78				struct efx_rx_buffer *rx_buf)
  79{
  80	struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
  81	struct efx_nic *efx = rx_queue->efx;
  82	struct page *page = rx_buf->page;
  83	unsigned int index;
  84
  85	/* Only recycle the page after processing the final buffer. */
  86	if (!(rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE))
  87		return;
  88
  89	index = rx_queue->page_add & rx_queue->page_ptr_mask;
  90	if (rx_queue->page_ring[index] == NULL) {
  91		unsigned int read_index = rx_queue->page_remove &
  92			rx_queue->page_ptr_mask;
  93
  94		/* The next slot in the recycle ring is available, but
  95		 * increment page_remove if the read pointer currently
  96		 * points here.
  97		 */
  98		if (read_index == index)
  99			++rx_queue->page_remove;
 100		rx_queue->page_ring[index] = page;
 101		++rx_queue->page_add;
 102		return;
 103	}
 104	++rx_queue->page_recycle_full;
 105	efx_unmap_rx_buffer(efx, rx_buf);
 106	put_page(rx_buf->page);
 107}
 108
 109/* Recycle the pages that are used by buffers that have just been received. */
 110void efx_siena_recycle_rx_pages(struct efx_channel *channel,
 111				struct efx_rx_buffer *rx_buf,
 112				unsigned int n_frags)
 113{
 114	struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
 115
 116	if (unlikely(!rx_queue->page_ring))
 117		return;
 118
 119	do {
 120		efx_recycle_rx_page(channel, rx_buf);
 121		rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
 122	} while (--n_frags);
 123}
 124
 125void efx_siena_discard_rx_packet(struct efx_channel *channel,
 126				 struct efx_rx_buffer *rx_buf,
 127				 unsigned int n_frags)
 128{
 129	struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
 130
 131	efx_siena_recycle_rx_pages(channel, rx_buf, n_frags);
 132
 133	efx_siena_free_rx_buffers(rx_queue, rx_buf, n_frags);
 134}
 135
 136static void efx_init_rx_recycle_ring(struct efx_rx_queue *rx_queue)
 137{
 138	unsigned int bufs_in_recycle_ring, page_ring_size;
 139	struct efx_nic *efx = rx_queue->efx;
 140
 141	bufs_in_recycle_ring = efx_rx_recycle_ring_size(efx);
 142	page_ring_size = roundup_pow_of_two(bufs_in_recycle_ring /
 143					    efx->rx_bufs_per_page);
 144	rx_queue->page_ring = kcalloc(page_ring_size,
 145				      sizeof(*rx_queue->page_ring), GFP_KERNEL);
 146	if (!rx_queue->page_ring)
 147		rx_queue->page_ptr_mask = 0;
 148	else
 149		rx_queue->page_ptr_mask = page_ring_size - 1;
 150}
 151
 152static void efx_fini_rx_recycle_ring(struct efx_rx_queue *rx_queue)
 153{
 154	struct efx_nic *efx = rx_queue->efx;
 155	int i;
 156
 157	if (unlikely(!rx_queue->page_ring))
 158		return;
 159
 160	/* Unmap and release the pages in the recycle ring. Remove the ring. */
 161	for (i = 0; i <= rx_queue->page_ptr_mask; i++) {
 162		struct page *page = rx_queue->page_ring[i];
 163		struct efx_rx_page_state *state;
 164
 165		if (page == NULL)
 166			continue;
 167
 168		state = page_address(page);
 169		dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
 170			       PAGE_SIZE << efx->rx_buffer_order,
 171			       DMA_FROM_DEVICE);
 172		put_page(page);
 173	}
 174	kfree(rx_queue->page_ring);
 175	rx_queue->page_ring = NULL;
 176}
 177
 178static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue,
 179			       struct efx_rx_buffer *rx_buf)
 180{
 181	/* Release the page reference we hold for the buffer. */
 182	if (rx_buf->page)
 183		put_page(rx_buf->page);
 184
 185	/* If this is the last buffer in a page, unmap and free it. */
 186	if (rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE) {
 187		efx_unmap_rx_buffer(rx_queue->efx, rx_buf);
 188		efx_siena_free_rx_buffers(rx_queue, rx_buf, 1);
 189	}
 190	rx_buf->page = NULL;
 191}
 192
 193int efx_siena_probe_rx_queue(struct efx_rx_queue *rx_queue)
 194{
 195	struct efx_nic *efx = rx_queue->efx;
 196	unsigned int entries;
 197	int rc;
 198
 199	/* Create the smallest power-of-two aligned ring */
 200	entries = max(roundup_pow_of_two(efx->rxq_entries), EFX_MIN_DMAQ_SIZE);
 201	EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE);
 202	rx_queue->ptr_mask = entries - 1;
 203
 204	netif_dbg(efx, probe, efx->net_dev,
 205		  "creating RX queue %d size %#x mask %#x\n",
 206		  efx_rx_queue_index(rx_queue), efx->rxq_entries,
 207		  rx_queue->ptr_mask);
 208
 209	/* Allocate RX buffers */
 210	rx_queue->buffer = kcalloc(entries, sizeof(*rx_queue->buffer),
 211				   GFP_KERNEL);
 212	if (!rx_queue->buffer)
 213		return -ENOMEM;
 214
 215	rc = efx_nic_probe_rx(rx_queue);
 216	if (rc) {
 217		kfree(rx_queue->buffer);
 218		rx_queue->buffer = NULL;
 219	}
 220
 221	return rc;
 222}
 223
 224void efx_siena_init_rx_queue(struct efx_rx_queue *rx_queue)
 225{
 226	unsigned int max_fill, trigger, max_trigger;
 227	struct efx_nic *efx = rx_queue->efx;
 228	int rc = 0;
 229
 230	netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
 231		  "initialising RX queue %d\n", efx_rx_queue_index(rx_queue));
 232
 233	/* Initialise ptr fields */
 234	rx_queue->added_count = 0;
 235	rx_queue->notified_count = 0;
 236	rx_queue->removed_count = 0;
 237	rx_queue->min_fill = -1U;
 238	efx_init_rx_recycle_ring(rx_queue);
 239
 240	rx_queue->page_remove = 0;
 241	rx_queue->page_add = rx_queue->page_ptr_mask + 1;
 242	rx_queue->page_recycle_count = 0;
 243	rx_queue->page_recycle_failed = 0;
 244	rx_queue->page_recycle_full = 0;
 245
 246	/* Initialise limit fields */
 247	max_fill = efx->rxq_entries - EFX_RXD_HEAD_ROOM;
 248	max_trigger =
 249		max_fill - efx->rx_pages_per_batch * efx->rx_bufs_per_page;
 250	if (rx_refill_threshold != 0) {
 251		trigger = max_fill * min(rx_refill_threshold, 100U) / 100U;
 252		if (trigger > max_trigger)
 253			trigger = max_trigger;
 254	} else {
 255		trigger = max_trigger;
 256	}
 257
 258	rx_queue->max_fill = max_fill;
 259	rx_queue->fast_fill_trigger = trigger;
 260	rx_queue->refill_enabled = true;
 261
 262	/* Initialise XDP queue information */
 263	rc = xdp_rxq_info_reg(&rx_queue->xdp_rxq_info, efx->net_dev,
 264			      rx_queue->core_index, 0);
 265
 266	if (rc) {
 267		netif_err(efx, rx_err, efx->net_dev,
 268			  "Failure to initialise XDP queue information rc=%d\n",
 269			  rc);
 270		efx->xdp_rxq_info_failed = true;
 271	} else {
 272		rx_queue->xdp_rxq_info_valid = true;
 273	}
 274
 275	/* Set up RX descriptor ring */
 276	efx_nic_init_rx(rx_queue);
 277}
 278
 279void efx_siena_fini_rx_queue(struct efx_rx_queue *rx_queue)
 280{
 281	struct efx_rx_buffer *rx_buf;
 282	int i;
 283
 284	netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
 285		  "shutting down RX queue %d\n", efx_rx_queue_index(rx_queue));
 286
 287	del_timer_sync(&rx_queue->slow_fill);
 288
 289	/* Release RX buffers from the current read ptr to the write ptr */
 290	if (rx_queue->buffer) {
 291		for (i = rx_queue->removed_count; i < rx_queue->added_count;
 292		     i++) {
 293			unsigned int index = i & rx_queue->ptr_mask;
 294
 295			rx_buf = efx_rx_buffer(rx_queue, index);
 296			efx_fini_rx_buffer(rx_queue, rx_buf);
 297		}
 298	}
 299
 300	efx_fini_rx_recycle_ring(rx_queue);
 301
 302	if (rx_queue->xdp_rxq_info_valid)
 303		xdp_rxq_info_unreg(&rx_queue->xdp_rxq_info);
 304
 305	rx_queue->xdp_rxq_info_valid = false;
 306}
 307
 308void efx_siena_remove_rx_queue(struct efx_rx_queue *rx_queue)
 309{
 310	netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
 311		  "destroying RX queue %d\n", efx_rx_queue_index(rx_queue));
 312
 313	efx_nic_remove_rx(rx_queue);
 314
 315	kfree(rx_queue->buffer);
 316	rx_queue->buffer = NULL;
 317}
 318
 319/* Unmap a DMA-mapped page.  This function is only called for the final RX
 320 * buffer in a page.
 321 */
 322static void efx_unmap_rx_buffer(struct efx_nic *efx,
 323				struct efx_rx_buffer *rx_buf)
 324{
 325	struct page *page = rx_buf->page;
 326
 327	if (page) {
 328		struct efx_rx_page_state *state = page_address(page);
 329
 330		dma_unmap_page(&efx->pci_dev->dev,
 331			       state->dma_addr,
 332			       PAGE_SIZE << efx->rx_buffer_order,
 333			       DMA_FROM_DEVICE);
 334	}
 335}
 336
 337void efx_siena_free_rx_buffers(struct efx_rx_queue *rx_queue,
 338			       struct efx_rx_buffer *rx_buf,
 339			       unsigned int num_bufs)
 340{
 341	do {
 342		if (rx_buf->page) {
 343			put_page(rx_buf->page);
 344			rx_buf->page = NULL;
 345		}
 346		rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
 347	} while (--num_bufs);
 348}
 349
 350void efx_siena_rx_slow_fill(struct timer_list *t)
 351{
 352	struct efx_rx_queue *rx_queue = from_timer(rx_queue, t, slow_fill);
 353
 354	/* Post an event to cause NAPI to run and refill the queue */
 355	efx_nic_generate_fill_event(rx_queue);
 356	++rx_queue->slow_fill_count;
 357}
 358
 359static void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue)
 360{
 361	mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(10));
 362}
 363
 364/* efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers
 365 *
 366 * @rx_queue:		Efx RX queue
 367 *
 368 * This allocates a batch of pages, maps them for DMA, and populates
 369 * struct efx_rx_buffers for each one. Return a negative error code or
 370 * 0 on success. If a single page can be used for multiple buffers,
 371 * then the page will either be inserted fully, or not at all.
 372 */
 373static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic)
 374{
 375	unsigned int page_offset, index, count;
 376	struct efx_nic *efx = rx_queue->efx;
 377	struct efx_rx_page_state *state;
 378	struct efx_rx_buffer *rx_buf;
 379	dma_addr_t dma_addr;
 380	struct page *page;
 381
 382	count = 0;
 383	do {
 384		page = efx_reuse_page(rx_queue);
 385		if (page == NULL) {
 386			page = alloc_pages(__GFP_COMP |
 387					   (atomic ? GFP_ATOMIC : GFP_KERNEL),
 388					   efx->rx_buffer_order);
 389			if (unlikely(page == NULL))
 390				return -ENOMEM;
 391			dma_addr =
 392				dma_map_page(&efx->pci_dev->dev, page, 0,
 393					     PAGE_SIZE << efx->rx_buffer_order,
 394					     DMA_FROM_DEVICE);
 395			if (unlikely(dma_mapping_error(&efx->pci_dev->dev,
 396						       dma_addr))) {
 397				__free_pages(page, efx->rx_buffer_order);
 398				return -EIO;
 399			}
 400			state = page_address(page);
 401			state->dma_addr = dma_addr;
 402		} else {
 403			state = page_address(page);
 404			dma_addr = state->dma_addr;
 405		}
 406
 407		dma_addr += sizeof(struct efx_rx_page_state);
 408		page_offset = sizeof(struct efx_rx_page_state);
 409
 410		do {
 411			index = rx_queue->added_count & rx_queue->ptr_mask;
 412			rx_buf = efx_rx_buffer(rx_queue, index);
 413			rx_buf->dma_addr = dma_addr + efx->rx_ip_align +
 414					   EFX_XDP_HEADROOM;
 415			rx_buf->page = page;
 416			rx_buf->page_offset = page_offset + efx->rx_ip_align +
 417					      EFX_XDP_HEADROOM;
 418			rx_buf->len = efx->rx_dma_len;
 419			rx_buf->flags = 0;
 420			++rx_queue->added_count;
 421			get_page(page);
 422			dma_addr += efx->rx_page_buf_step;
 423			page_offset += efx->rx_page_buf_step;
 424		} while (page_offset + efx->rx_page_buf_step <= PAGE_SIZE);
 425
 426		rx_buf->flags = EFX_RX_BUF_LAST_IN_PAGE;
 427	} while (++count < efx->rx_pages_per_batch);
 428
 429	return 0;
 430}
 431
 432void efx_siena_rx_config_page_split(struct efx_nic *efx)
 433{
 434	efx->rx_page_buf_step = ALIGN(efx->rx_dma_len + efx->rx_ip_align +
 435				      EFX_XDP_HEADROOM + EFX_XDP_TAILROOM,
 436				      EFX_RX_BUF_ALIGNMENT);
 437	efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 :
 438		((PAGE_SIZE - sizeof(struct efx_rx_page_state)) /
 439		efx->rx_page_buf_step);
 440	efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) /
 441		efx->rx_bufs_per_page;
 442	efx->rx_pages_per_batch = DIV_ROUND_UP(EFX_RX_PREFERRED_BATCH,
 443					       efx->rx_bufs_per_page);
 444}
 445
 446/* efx_siena_fast_push_rx_descriptors - push new RX descriptors quickly
 447 * @rx_queue:		RX descriptor queue
 448 *
 449 * This will aim to fill the RX descriptor queue up to
 450 * @rx_queue->@max_fill. If there is insufficient atomic
 451 * memory to do so, a slow fill will be scheduled.
 452 *
 453 * The caller must provide serialisation (none is used here). In practise,
 454 * this means this function must run from the NAPI handler, or be called
 455 * when NAPI is disabled.
 456 */
 457void efx_siena_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue,
 458					bool atomic)
 459{
 460	struct efx_nic *efx = rx_queue->efx;
 461	unsigned int fill_level, batch_size;
 462	int space, rc = 0;
 463
 464	if (!rx_queue->refill_enabled)
 465		return;
 466
 467	/* Calculate current fill level, and exit if we don't need to fill */
 468	fill_level = (rx_queue->added_count - rx_queue->removed_count);
 469	EFX_WARN_ON_ONCE_PARANOID(fill_level > rx_queue->efx->rxq_entries);
 470	if (fill_level >= rx_queue->fast_fill_trigger)
 471		goto out;
 472
 473	/* Record minimum fill level */
 474	if (unlikely(fill_level < rx_queue->min_fill)) {
 475		if (fill_level)
 476			rx_queue->min_fill = fill_level;
 477	}
 478
 479	batch_size = efx->rx_pages_per_batch * efx->rx_bufs_per_page;
 480	space = rx_queue->max_fill - fill_level;
 481	EFX_WARN_ON_ONCE_PARANOID(space < batch_size);
 482
 483	netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
 484		   "RX queue %d fast-filling descriptor ring from"
 485		   " level %d to level %d\n",
 486		   efx_rx_queue_index(rx_queue), fill_level,
 487		   rx_queue->max_fill);
 488
 489	do {
 490		rc = efx_init_rx_buffers(rx_queue, atomic);
 491		if (unlikely(rc)) {
 492			/* Ensure that we don't leave the rx queue empty */
 493			efx_schedule_slow_fill(rx_queue);
 494			goto out;
 495		}
 496	} while ((space -= batch_size) >= batch_size);
 497
 498	netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
 499		   "RX queue %d fast-filled descriptor ring "
 500		   "to level %d\n", efx_rx_queue_index(rx_queue),
 501		   rx_queue->added_count - rx_queue->removed_count);
 502
 503 out:
 504	if (rx_queue->notified_count != rx_queue->added_count)
 505		efx_nic_notify_rx_desc(rx_queue);
 506}
 507
 508/* Pass a received packet up through GRO.  GRO can handle pages
 509 * regardless of checksum state and skbs with a good checksum.
 510 */
 511void
 512efx_siena_rx_packet_gro(struct efx_channel *channel,
 513			struct efx_rx_buffer *rx_buf,
 514			unsigned int n_frags, u8 *eh, __wsum csum)
 515{
 516	struct napi_struct *napi = &channel->napi_str;
 517	struct efx_nic *efx = channel->efx;
 518	struct sk_buff *skb;
 519
 520	skb = napi_get_frags(napi);
 521	if (unlikely(!skb)) {
 522		struct efx_rx_queue *rx_queue;
 523
 524		rx_queue = efx_channel_get_rx_queue(channel);
 525		efx_siena_free_rx_buffers(rx_queue, rx_buf, n_frags);
 526		return;
 527	}
 528
 529	if (efx->net_dev->features & NETIF_F_RXHASH)
 530		skb_set_hash(skb, efx_rx_buf_hash(efx, eh),
 531			     PKT_HASH_TYPE_L3);
 532	if (csum) {
 533		skb->csum = csum;
 534		skb->ip_summed = CHECKSUM_COMPLETE;
 535	} else {
 536		skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ?
 537				  CHECKSUM_UNNECESSARY : CHECKSUM_NONE);
 538	}
 539	skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL);
 540
 541	for (;;) {
 542		skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
 543				   rx_buf->page, rx_buf->page_offset,
 544				   rx_buf->len);
 545		rx_buf->page = NULL;
 546		skb->len += rx_buf->len;
 547		if (skb_shinfo(skb)->nr_frags == n_frags)
 548			break;
 549
 550		rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf);
 551	}
 552
 553	skb->data_len = skb->len;
 554	skb->truesize += n_frags * efx->rx_buffer_truesize;
 555
 556	skb_record_rx_queue(skb, channel->rx_queue.core_index);
 557
 558	napi_gro_frags(napi);
 559}
 560
 561/* RSS contexts.  We're using linked lists and crappy O(n) algorithms, because
 562 * (a) this is an infrequent control-plane operation and (b) n is small (max 64)
 563 */
 564struct efx_rss_context *efx_siena_alloc_rss_context_entry(struct efx_nic *efx)
 565{
 566	struct list_head *head = &efx->rss_context.list;
 567	struct efx_rss_context *ctx, *new;
 568	u32 id = 1; /* Don't use zero, that refers to the master RSS context */
 569
 570	WARN_ON(!mutex_is_locked(&efx->rss_lock));
 571
 572	/* Search for first gap in the numbering */
 573	list_for_each_entry(ctx, head, list) {
 574		if (ctx->user_id != id)
 575			break;
 576		id++;
 577		/* Check for wrap.  If this happens, we have nearly 2^32
 578		 * allocated RSS contexts, which seems unlikely.
 579		 */
 580		if (WARN_ON_ONCE(!id))
 581			return NULL;
 582	}
 583
 584	/* Create the new entry */
 585	new = kmalloc(sizeof(*new), GFP_KERNEL);
 586	if (!new)
 587		return NULL;
 588	new->context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
 589	new->rx_hash_udp_4tuple = false;
 590
 591	/* Insert the new entry into the gap */
 592	new->user_id = id;
 593	list_add_tail(&new->list, &ctx->list);
 594	return new;
 595}
 596
 597struct efx_rss_context *efx_siena_find_rss_context_entry(struct efx_nic *efx,
 598							 u32 id)
 599{
 600	struct list_head *head = &efx->rss_context.list;
 601	struct efx_rss_context *ctx;
 602
 603	WARN_ON(!mutex_is_locked(&efx->rss_lock));
 604
 605	list_for_each_entry(ctx, head, list)
 606		if (ctx->user_id == id)
 607			return ctx;
 608	return NULL;
 609}
 610
 611void efx_siena_free_rss_context_entry(struct efx_rss_context *ctx)
 612{
 613	list_del(&ctx->list);
 614	kfree(ctx);
 615}
 616
 617void efx_siena_set_default_rx_indir_table(struct efx_nic *efx,
 618					  struct efx_rss_context *ctx)
 619{
 620	size_t i;
 621
 622	for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++)
 623		ctx->rx_indir_table[i] =
 624			ethtool_rxfh_indir_default(i, efx->rss_spread);
 625}
 626
 627/**
 628 * efx_siena_filter_is_mc_recipient - test whether spec is a multicast recipient
 629 * @spec: Specification to test
 630 *
 631 * Return: %true if the specification is a non-drop RX filter that
 632 * matches a local MAC address I/G bit value of 1 or matches a local
 633 * IPv4 or IPv6 address value in the respective multicast address
 634 * range.  Otherwise %false.
 635 */
 636bool efx_siena_filter_is_mc_recipient(const struct efx_filter_spec *spec)
 637{
 638	if (!(spec->flags & EFX_FILTER_FLAG_RX) ||
 639	    spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP)
 640		return false;
 641
 642	if (spec->match_flags &
 643	    (EFX_FILTER_MATCH_LOC_MAC | EFX_FILTER_MATCH_LOC_MAC_IG) &&
 644	    is_multicast_ether_addr(spec->loc_mac))
 645		return true;
 646
 647	if ((spec->match_flags &
 648	     (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) ==
 649	    (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) {
 650		if (spec->ether_type == htons(ETH_P_IP) &&
 651		    ipv4_is_multicast(spec->loc_host[0]))
 652			return true;
 653		if (spec->ether_type == htons(ETH_P_IPV6) &&
 654		    ((const u8 *)spec->loc_host)[0] == 0xff)
 655			return true;
 656	}
 657
 658	return false;
 659}
 660
 661bool efx_siena_filter_spec_equal(const struct efx_filter_spec *left,
 662				 const struct efx_filter_spec *right)
 663{
 664	if ((left->match_flags ^ right->match_flags) |
 665	    ((left->flags ^ right->flags) &
 666	     (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX)))
 667		return false;
 668
 669	return memcmp(&left->outer_vid, &right->outer_vid,
 670		      sizeof(struct efx_filter_spec) -
 671		      offsetof(struct efx_filter_spec, outer_vid)) == 0;
 672}
 673
 674u32 efx_siena_filter_spec_hash(const struct efx_filter_spec *spec)
 675{
 676	BUILD_BUG_ON(offsetof(struct efx_filter_spec, outer_vid) & 3);
 677	return jhash2((const u32 *)&spec->outer_vid,
 678		      (sizeof(struct efx_filter_spec) -
 679		       offsetof(struct efx_filter_spec, outer_vid)) / 4,
 680		      0);
 681}
 682
 683#ifdef CONFIG_RFS_ACCEL
 684bool efx_siena_rps_check_rule(struct efx_arfs_rule *rule,
 685			      unsigned int filter_idx, bool *force)
 686{
 687	if (rule->filter_id == EFX_ARFS_FILTER_ID_PENDING) {
 688		/* ARFS is currently updating this entry, leave it */
 689		return false;
 690	}
 691	if (rule->filter_id == EFX_ARFS_FILTER_ID_ERROR) {
 692		/* ARFS tried and failed to update this, so it's probably out
 693		 * of date.  Remove the filter and the ARFS rule entry.
 694		 */
 695		rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING;
 696		*force = true;
 697		return true;
 698	} else if (WARN_ON(rule->filter_id != filter_idx)) { /* can't happen */
 699		/* ARFS has moved on, so old filter is not needed.  Since we did
 700		 * not mark the rule with EFX_ARFS_FILTER_ID_REMOVING, it will
 701		 * not be removed by efx_siena_rps_hash_del() subsequently.
 702		 */
 703		*force = true;
 704		return true;
 705	}
 706	/* Remove it iff ARFS wants to. */
 707	return true;
 708}
 709
 710static
 711struct hlist_head *efx_rps_hash_bucket(struct efx_nic *efx,
 712				       const struct efx_filter_spec *spec)
 713{
 714	u32 hash = efx_siena_filter_spec_hash(spec);
 715
 716	lockdep_assert_held(&efx->rps_hash_lock);
 717	if (!efx->rps_hash_table)
 718		return NULL;
 719	return &efx->rps_hash_table[hash % EFX_ARFS_HASH_TABLE_SIZE];
 720}
 721
 722struct efx_arfs_rule *efx_siena_rps_hash_find(struct efx_nic *efx,
 723					const struct efx_filter_spec *spec)
 724{
 725	struct efx_arfs_rule *rule;
 726	struct hlist_head *head;
 727	struct hlist_node *node;
 728
 729	head = efx_rps_hash_bucket(efx, spec);
 730	if (!head)
 731		return NULL;
 732	hlist_for_each(node, head) {
 733		rule = container_of(node, struct efx_arfs_rule, node);
 734		if (efx_siena_filter_spec_equal(spec, &rule->spec))
 735			return rule;
 736	}
 737	return NULL;
 738}
 739
 740static struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx,
 741					const struct efx_filter_spec *spec,
 742					bool *new)
 743{
 744	struct efx_arfs_rule *rule;
 745	struct hlist_head *head;
 746	struct hlist_node *node;
 747
 748	head = efx_rps_hash_bucket(efx, spec);
 749	if (!head)
 750		return NULL;
 751	hlist_for_each(node, head) {
 752		rule = container_of(node, struct efx_arfs_rule, node);
 753		if (efx_siena_filter_spec_equal(spec, &rule->spec)) {
 754			*new = false;
 755			return rule;
 756		}
 757	}
 758	rule = kmalloc(sizeof(*rule), GFP_ATOMIC);
 759	*new = true;
 760	if (rule) {
 761		memcpy(&rule->spec, spec, sizeof(rule->spec));
 762		hlist_add_head(&rule->node, head);
 763	}
 764	return rule;
 765}
 766
 767void efx_siena_rps_hash_del(struct efx_nic *efx,
 768			    const struct efx_filter_spec *spec)
 769{
 770	struct efx_arfs_rule *rule;
 771	struct hlist_head *head;
 772	struct hlist_node *node;
 773
 774	head = efx_rps_hash_bucket(efx, spec);
 775	if (WARN_ON(!head))
 776		return;
 777	hlist_for_each(node, head) {
 778		rule = container_of(node, struct efx_arfs_rule, node);
 779		if (efx_siena_filter_spec_equal(spec, &rule->spec)) {
 780			/* Someone already reused the entry.  We know that if
 781			 * this check doesn't fire (i.e. filter_id == REMOVING)
 782			 * then the REMOVING mark was put there by our caller,
 783			 * because caller is holding a lock on filter table and
 784			 * only holders of that lock set REMOVING.
 785			 */
 786			if (rule->filter_id != EFX_ARFS_FILTER_ID_REMOVING)
 787				return;
 788			hlist_del(node);
 789			kfree(rule);
 790			return;
 791		}
 792	}
 793	/* We didn't find it. */
 794	WARN_ON(1);
 795}
 796#endif
 797
 798int efx_siena_probe_filters(struct efx_nic *efx)
 799{
 800	int rc;
 801
 802	mutex_lock(&efx->mac_lock);
 803	down_write(&efx->filter_sem);
 804	rc = efx->type->filter_table_probe(efx);
 805	if (rc)
 806		goto out_unlock;
 807
 808#ifdef CONFIG_RFS_ACCEL
 809	if (efx->type->offload_features & NETIF_F_NTUPLE) {
 810		struct efx_channel *channel;
 811		int i, success = 1;
 812
 813		efx_for_each_channel(channel, efx) {
 814			channel->rps_flow_id =
 815				kcalloc(efx->type->max_rx_ip_filters,
 816					sizeof(*channel->rps_flow_id),
 817					GFP_KERNEL);
 818			if (!channel->rps_flow_id)
 819				success = 0;
 820			else
 821				for (i = 0;
 822				     i < efx->type->max_rx_ip_filters;
 823				     ++i)
 824					channel->rps_flow_id[i] =
 825						RPS_FLOW_ID_INVALID;
 826			channel->rfs_expire_index = 0;
 827			channel->rfs_filter_count = 0;
 828		}
 829
 830		if (!success) {
 831			efx_for_each_channel(channel, efx)
 832				kfree(channel->rps_flow_id);
 833			efx->type->filter_table_remove(efx);
 834			rc = -ENOMEM;
 835			goto out_unlock;
 836		}
 837	}
 838#endif
 839out_unlock:
 840	up_write(&efx->filter_sem);
 841	mutex_unlock(&efx->mac_lock);
 842	return rc;
 843}
 844
 845void efx_siena_remove_filters(struct efx_nic *efx)
 846{
 847#ifdef CONFIG_RFS_ACCEL
 848	struct efx_channel *channel;
 849
 850	efx_for_each_channel(channel, efx) {
 851		cancel_delayed_work_sync(&channel->filter_work);
 852		kfree(channel->rps_flow_id);
 853		channel->rps_flow_id = NULL;
 854	}
 855#endif
 856	down_write(&efx->filter_sem);
 857	efx->type->filter_table_remove(efx);
 858	up_write(&efx->filter_sem);
 859}
 860
 861#ifdef CONFIG_RFS_ACCEL
 862
 863static void efx_filter_rfs_work(struct work_struct *data)
 864{
 865	struct efx_async_filter_insertion *req = container_of(data, struct efx_async_filter_insertion,
 866							      work);
 867	struct efx_nic *efx = netdev_priv(req->net_dev);
 868	struct efx_channel *channel = efx_get_channel(efx, req->rxq_index);
 869	int slot_idx = req - efx->rps_slot;
 870	struct efx_arfs_rule *rule;
 871	u16 arfs_id = 0;
 872	int rc;
 873
 874	rc = efx->type->filter_insert(efx, &req->spec, true);
 875	if (rc >= 0)
 876		/* Discard 'priority' part of EF10+ filter ID (mcdi_filters) */
 877		rc %= efx->type->max_rx_ip_filters;
 878	if (efx->rps_hash_table) {
 879		spin_lock_bh(&efx->rps_hash_lock);
 880		rule = efx_siena_rps_hash_find(efx, &req->spec);
 881		/* The rule might have already gone, if someone else's request
 882		 * for the same spec was already worked and then expired before
 883		 * we got around to our work.  In that case we have nothing
 884		 * tying us to an arfs_id, meaning that as soon as the filter
 885		 * is considered for expiry it will be removed.
 886		 */
 887		if (rule) {
 888			if (rc < 0)
 889				rule->filter_id = EFX_ARFS_FILTER_ID_ERROR;
 890			else
 891				rule->filter_id = rc;
 892			arfs_id = rule->arfs_id;
 893		}
 894		spin_unlock_bh(&efx->rps_hash_lock);
 895	}
 896	if (rc >= 0) {
 897		/* Remember this so we can check whether to expire the filter
 898		 * later.
 899		 */
 900		mutex_lock(&efx->rps_mutex);
 901		if (channel->rps_flow_id[rc] == RPS_FLOW_ID_INVALID)
 902			channel->rfs_filter_count++;
 903		channel->rps_flow_id[rc] = req->flow_id;
 904		mutex_unlock(&efx->rps_mutex);
 905
 906		if (req->spec.ether_type == htons(ETH_P_IP))
 907			netif_info(efx, rx_status, efx->net_dev,
 908				   "steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d id %u]\n",
 909				   (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
 910				   req->spec.rem_host, ntohs(req->spec.rem_port),
 911				   req->spec.loc_host, ntohs(req->spec.loc_port),
 912				   req->rxq_index, req->flow_id, rc, arfs_id);
 913		else
 914			netif_info(efx, rx_status, efx->net_dev,
 915				   "steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d id %u]\n",
 916				   (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
 917				   req->spec.rem_host, ntohs(req->spec.rem_port),
 918				   req->spec.loc_host, ntohs(req->spec.loc_port),
 919				   req->rxq_index, req->flow_id, rc, arfs_id);
 920		channel->n_rfs_succeeded++;
 921	} else {
 922		if (req->spec.ether_type == htons(ETH_P_IP))
 923			netif_dbg(efx, rx_status, efx->net_dev,
 924				  "failed to steer %s %pI4:%u:%pI4:%u to queue %u [flow %u rc %d id %u]\n",
 925				  (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
 926				  req->spec.rem_host, ntohs(req->spec.rem_port),
 927				  req->spec.loc_host, ntohs(req->spec.loc_port),
 928				  req->rxq_index, req->flow_id, rc, arfs_id);
 929		else
 930			netif_dbg(efx, rx_status, efx->net_dev,
 931				  "failed to steer %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u rc %d id %u]\n",
 932				  (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
 933				  req->spec.rem_host, ntohs(req->spec.rem_port),
 934				  req->spec.loc_host, ntohs(req->spec.loc_port),
 935				  req->rxq_index, req->flow_id, rc, arfs_id);
 936		channel->n_rfs_failed++;
 937		/* We're overloading the NIC's filter tables, so let's do a
 938		 * chunk of extra expiry work.
 939		 */
 940		__efx_siena_filter_rfs_expire(channel,
 941					      min(channel->rfs_filter_count,
 942						  100u));
 943	}
 944
 945	/* Release references */
 946	clear_bit(slot_idx, &efx->rps_slot_map);
 947	dev_put(req->net_dev);
 948}
 949
 950int efx_siena_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
 951			 u16 rxq_index, u32 flow_id)
 952{
 953	struct efx_nic *efx = netdev_priv(net_dev);
 954	struct efx_async_filter_insertion *req;
 955	struct efx_arfs_rule *rule;
 956	struct flow_keys fk;
 957	int slot_idx;
 958	bool new;
 959	int rc;
 960
 961	/* find a free slot */
 962	for (slot_idx = 0; slot_idx < EFX_RPS_MAX_IN_FLIGHT; slot_idx++)
 963		if (!test_and_set_bit(slot_idx, &efx->rps_slot_map))
 964			break;
 965	if (slot_idx >= EFX_RPS_MAX_IN_FLIGHT)
 966		return -EBUSY;
 967
 968	if (flow_id == RPS_FLOW_ID_INVALID) {
 969		rc = -EINVAL;
 970		goto out_clear;
 971	}
 972
 973	if (!skb_flow_dissect_flow_keys(skb, &fk, 0)) {
 974		rc = -EPROTONOSUPPORT;
 975		goto out_clear;
 976	}
 977
 978	if (fk.basic.n_proto != htons(ETH_P_IP) && fk.basic.n_proto != htons(ETH_P_IPV6)) {
 979		rc = -EPROTONOSUPPORT;
 980		goto out_clear;
 981	}
 982	if (fk.control.flags & FLOW_DIS_IS_FRAGMENT) {
 983		rc = -EPROTONOSUPPORT;
 984		goto out_clear;
 985	}
 986
 987	req = efx->rps_slot + slot_idx;
 988	efx_filter_init_rx(&req->spec, EFX_FILTER_PRI_HINT,
 989			   efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0,
 990			   rxq_index);
 991	req->spec.match_flags =
 992		EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_IP_PROTO |
 993		EFX_FILTER_MATCH_LOC_HOST | EFX_FILTER_MATCH_LOC_PORT |
 994		EFX_FILTER_MATCH_REM_HOST | EFX_FILTER_MATCH_REM_PORT;
 995	req->spec.ether_type = fk.basic.n_proto;
 996	req->spec.ip_proto = fk.basic.ip_proto;
 997
 998	if (fk.basic.n_proto == htons(ETH_P_IP)) {
 999		req->spec.rem_host[0] = fk.addrs.v4addrs.src;
1000		req->spec.loc_host[0] = fk.addrs.v4addrs.dst;
1001	} else {
1002		memcpy(req->spec.rem_host, &fk.addrs.v6addrs.src,
1003		       sizeof(struct in6_addr));
1004		memcpy(req->spec.loc_host, &fk.addrs.v6addrs.dst,
1005		       sizeof(struct in6_addr));
1006	}
1007
1008	req->spec.rem_port = fk.ports.src;
1009	req->spec.loc_port = fk.ports.dst;
1010
1011	if (efx->rps_hash_table) {
1012		/* Add it to ARFS hash table */
1013		spin_lock(&efx->rps_hash_lock);
1014		rule = efx_rps_hash_add(efx, &req->spec, &new);
1015		if (!rule) {
1016			rc = -ENOMEM;
1017			goto out_unlock;
1018		}
1019		if (new)
1020			rule->arfs_id = efx->rps_next_id++ % RPS_NO_FILTER;
1021		rc = rule->arfs_id;
1022		/* Skip if existing or pending filter already does the right thing */
1023		if (!new && rule->rxq_index == rxq_index &&
1024		    rule->filter_id >= EFX_ARFS_FILTER_ID_PENDING)
1025			goto out_unlock;
1026		rule->rxq_index = rxq_index;
1027		rule->filter_id = EFX_ARFS_FILTER_ID_PENDING;
1028		spin_unlock(&efx->rps_hash_lock);
1029	} else {
1030		/* Without an ARFS hash table, we just use arfs_id 0 for all
1031		 * filters.  This means if multiple flows hash to the same
1032		 * flow_id, all but the most recently touched will be eligible
1033		 * for expiry.
1034		 */
1035		rc = 0;
1036	}
1037
1038	/* Queue the request */
1039	dev_hold(req->net_dev = net_dev);
1040	INIT_WORK(&req->work, efx_filter_rfs_work);
1041	req->rxq_index = rxq_index;
1042	req->flow_id = flow_id;
1043	schedule_work(&req->work);
1044	return rc;
1045out_unlock:
1046	spin_unlock(&efx->rps_hash_lock);
1047out_clear:
1048	clear_bit(slot_idx, &efx->rps_slot_map);
1049	return rc;
1050}
1051
1052bool __efx_siena_filter_rfs_expire(struct efx_channel *channel,
1053				   unsigned int quota)
1054{
1055	bool (*expire_one)(struct efx_nic *efx, u32 flow_id, unsigned int index);
1056	struct efx_nic *efx = channel->efx;
1057	unsigned int index, size, start;
1058	u32 flow_id;
1059
1060	if (!mutex_trylock(&efx->rps_mutex))
1061		return false;
1062	expire_one = efx->type->filter_rfs_expire_one;
1063	index = channel->rfs_expire_index;
1064	start = index;
1065	size = efx->type->max_rx_ip_filters;
1066	while (quota) {
1067		flow_id = channel->rps_flow_id[index];
1068
1069		if (flow_id != RPS_FLOW_ID_INVALID) {
1070			quota--;
1071			if (expire_one(efx, flow_id, index)) {
1072				netif_info(efx, rx_status, efx->net_dev,
1073					   "expired filter %d [channel %u flow %u]\n",
1074					   index, channel->channel, flow_id);
1075				channel->rps_flow_id[index] = RPS_FLOW_ID_INVALID;
1076				channel->rfs_filter_count--;
1077			}
1078		}
1079		if (++index == size)
1080			index = 0;
1081		/* If we were called with a quota that exceeds the total number
1082		 * of filters in the table (which shouldn't happen, but could
1083		 * if two callers race), ensure that we don't loop forever -
1084		 * stop when we've examined every row of the table.
1085		 */
1086		if (index == start)
1087			break;
1088	}
1089
1090	channel->rfs_expire_index = index;
1091	mutex_unlock(&efx->rps_mutex);
1092	return true;
1093}
1094
1095#endif /* CONFIG_RFS_ACCEL */