Linux Audio

Check our new training course

Loading...
v6.8
  1// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
  2/*
  3 * Copyright(c) 2017 - 2020 Intel Corporation.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  4 */
  5
  6/*
  7 * This file contains HFI1 support for VNIC functionality
  8 */
  9
 10#include <linux/io.h>
 11#include <linux/if_vlan.h>
 12
 13#include "vnic.h"
 14#include "netdev.h"
 15
 16#define HFI_TX_TIMEOUT_MS 1000
 17
 18#define HFI1_VNIC_RCV_Q_SIZE   1024
 19
 20#define HFI1_VNIC_UP 0
 21
 22static DEFINE_SPINLOCK(vport_cntr_lock);
 23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 24#define SUM_GRP_COUNTERS(stats, qstats, x_grp) do {            \
 25		u64 *src64, *dst64;                            \
 26		for (src64 = &qstats->x_grp.unicast,           \
 27			dst64 = &stats->x_grp.unicast;         \
 28			dst64 <= &stats->x_grp.s_1519_max;) {  \
 29			*dst64++ += *src64++;                  \
 30		}                                              \
 31	} while (0)
 32
 33#define VNIC_MASK (0xFF)
 34#define VNIC_ID(val) ((1ull << 24) | ((val) & VNIC_MASK))
 35
 36/* hfi1_vnic_update_stats - update statistics */
 37static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo,
 38				   struct opa_vnic_stats *stats)
 39{
 40	struct net_device *netdev = vinfo->netdev;
 41	u8 i;
 42
 43	/* add tx counters on different queues */
 44	for (i = 0; i < vinfo->num_tx_q; i++) {
 45		struct opa_vnic_stats *qstats = &vinfo->stats[i];
 46		struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
 47
 48		stats->netstats.tx_fifo_errors += qnstats->tx_fifo_errors;
 49		stats->netstats.tx_carrier_errors += qnstats->tx_carrier_errors;
 50		stats->tx_drop_state += qstats->tx_drop_state;
 51		stats->tx_dlid_zero += qstats->tx_dlid_zero;
 52
 53		SUM_GRP_COUNTERS(stats, qstats, tx_grp);
 54		stats->netstats.tx_packets += qnstats->tx_packets;
 55		stats->netstats.tx_bytes += qnstats->tx_bytes;
 56	}
 57
 58	/* add rx counters on different queues */
 59	for (i = 0; i < vinfo->num_rx_q; i++) {
 60		struct opa_vnic_stats *qstats = &vinfo->stats[i];
 61		struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
 62
 63		stats->netstats.rx_fifo_errors += qnstats->rx_fifo_errors;
 64		stats->netstats.rx_nohandler += qnstats->rx_nohandler;
 65		stats->rx_drop_state += qstats->rx_drop_state;
 66		stats->rx_oversize += qstats->rx_oversize;
 67		stats->rx_runt += qstats->rx_runt;
 68
 69		SUM_GRP_COUNTERS(stats, qstats, rx_grp);
 70		stats->netstats.rx_packets += qnstats->rx_packets;
 71		stats->netstats.rx_bytes += qnstats->rx_bytes;
 72	}
 73
 74	stats->netstats.tx_errors = stats->netstats.tx_fifo_errors +
 75				    stats->netstats.tx_carrier_errors +
 76				    stats->tx_drop_state + stats->tx_dlid_zero;
 77	stats->netstats.tx_dropped = stats->netstats.tx_errors;
 78
 79	stats->netstats.rx_errors = stats->netstats.rx_fifo_errors +
 80				    stats->netstats.rx_nohandler +
 81				    stats->rx_drop_state + stats->rx_oversize +
 82				    stats->rx_runt;
 83	stats->netstats.rx_dropped = stats->netstats.rx_errors;
 84
 85	netdev->stats.tx_packets = stats->netstats.tx_packets;
 86	netdev->stats.tx_bytes = stats->netstats.tx_bytes;
 87	netdev->stats.tx_fifo_errors = stats->netstats.tx_fifo_errors;
 88	netdev->stats.tx_carrier_errors = stats->netstats.tx_carrier_errors;
 89	netdev->stats.tx_errors = stats->netstats.tx_errors;
 90	netdev->stats.tx_dropped = stats->netstats.tx_dropped;
 91
 92	netdev->stats.rx_packets = stats->netstats.rx_packets;
 93	netdev->stats.rx_bytes = stats->netstats.rx_bytes;
 94	netdev->stats.rx_fifo_errors = stats->netstats.rx_fifo_errors;
 95	netdev->stats.multicast = stats->rx_grp.mcastbcast;
 96	netdev->stats.rx_length_errors = stats->rx_oversize + stats->rx_runt;
 97	netdev->stats.rx_errors = stats->netstats.rx_errors;
 98	netdev->stats.rx_dropped = stats->netstats.rx_dropped;
 99}
100
101/* update_len_counters - update pkt's len histogram counters */
102static inline void update_len_counters(struct opa_vnic_grp_stats *grp,
103				       int len)
104{
105	/* account for 4 byte FCS */
106	if (len >= 1515)
107		grp->s_1519_max++;
108	else if (len >= 1020)
109		grp->s_1024_1518++;
110	else if (len >= 508)
111		grp->s_512_1023++;
112	else if (len >= 252)
113		grp->s_256_511++;
114	else if (len >= 124)
115		grp->s_128_255++;
116	else if (len >= 61)
117		grp->s_65_127++;
118	else
119		grp->s_64++;
120}
121
122/* hfi1_vnic_update_tx_counters - update transmit counters */
123static void hfi1_vnic_update_tx_counters(struct hfi1_vnic_vport_info *vinfo,
124					 u8 q_idx, struct sk_buff *skb, int err)
125{
126	struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
127	struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
128	struct opa_vnic_grp_stats *tx_grp = &stats->tx_grp;
129	u16 vlan_tci;
130
131	stats->netstats.tx_packets++;
132	stats->netstats.tx_bytes += skb->len + ETH_FCS_LEN;
133
134	update_len_counters(tx_grp, skb->len);
135
136	/* rest of the counts are for good packets only */
137	if (unlikely(err))
138		return;
139
140	if (is_multicast_ether_addr(mac_hdr->h_dest))
141		tx_grp->mcastbcast++;
142	else
143		tx_grp->unicast++;
144
145	if (!__vlan_get_tag(skb, &vlan_tci))
146		tx_grp->vlan++;
147	else
148		tx_grp->untagged++;
149}
150
151/* hfi1_vnic_update_rx_counters - update receive counters */
152static void hfi1_vnic_update_rx_counters(struct hfi1_vnic_vport_info *vinfo,
153					 u8 q_idx, struct sk_buff *skb, int err)
154{
155	struct ethhdr *mac_hdr = (struct ethhdr *)skb->data;
156	struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
157	struct opa_vnic_grp_stats *rx_grp = &stats->rx_grp;
158	u16 vlan_tci;
159
160	stats->netstats.rx_packets++;
161	stats->netstats.rx_bytes += skb->len + ETH_FCS_LEN;
162
163	update_len_counters(rx_grp, skb->len);
164
165	/* rest of the counts are for good packets only */
166	if (unlikely(err))
167		return;
168
169	if (is_multicast_ether_addr(mac_hdr->h_dest))
170		rx_grp->mcastbcast++;
171	else
172		rx_grp->unicast++;
173
174	if (!__vlan_get_tag(skb, &vlan_tci))
175		rx_grp->vlan++;
176	else
177		rx_grp->untagged++;
178}
179
180/* This function is overloaded for opa_vnic specific implementation */
181static void hfi1_vnic_get_stats64(struct net_device *netdev,
182				  struct rtnl_link_stats64 *stats)
183{
184	struct opa_vnic_stats *vstats = (struct opa_vnic_stats *)stats;
185	struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
186
187	hfi1_vnic_update_stats(vinfo, vstats);
188}
189
190static u64 create_bypass_pbc(u32 vl, u32 dw_len)
191{
192	u64 pbc;
193
194	pbc = ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT)
195		| PBC_INSERT_BYPASS_ICRC | PBC_CREDIT_RETURN
196		| PBC_PACKET_BYPASS
197		| ((vl & PBC_VL_MASK) << PBC_VL_SHIFT)
198		| (dw_len & PBC_LENGTH_DWS_MASK) << PBC_LENGTH_DWS_SHIFT;
199
200	return pbc;
201}
202
203/* hfi1_vnic_maybe_stop_tx - stop tx queue if required */
204static void hfi1_vnic_maybe_stop_tx(struct hfi1_vnic_vport_info *vinfo,
205				    u8 q_idx)
206{
207	netif_stop_subqueue(vinfo->netdev, q_idx);
208	if (!hfi1_vnic_sdma_write_avail(vinfo, q_idx))
209		return;
210
211	netif_start_subqueue(vinfo->netdev, q_idx);
212}
213
214static netdev_tx_t hfi1_netdev_start_xmit(struct sk_buff *skb,
215					  struct net_device *netdev)
216{
217	struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
218	u8 pad_len, q_idx = skb->queue_mapping;
219	struct hfi1_devdata *dd = vinfo->dd;
220	struct opa_vnic_skb_mdata *mdata;
221	u32 pkt_len, total_len;
222	int err = -EINVAL;
223	u64 pbc;
224
225	v_dbg("xmit: queue %d skb len %d\n", q_idx, skb->len);
226	if (unlikely(!netif_oper_up(netdev))) {
227		vinfo->stats[q_idx].tx_drop_state++;
228		goto tx_finish;
229	}
230
231	/* take out meta data */
232	mdata = (struct opa_vnic_skb_mdata *)skb->data;
233	skb_pull(skb, sizeof(*mdata));
234	if (unlikely(mdata->flags & OPA_VNIC_SKB_MDATA_ENCAP_ERR)) {
235		vinfo->stats[q_idx].tx_dlid_zero++;
236		goto tx_finish;
237	}
238
239	/* add tail padding (for 8 bytes size alignment) and icrc */
240	pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7;
241	pad_len += OPA_VNIC_ICRC_TAIL_LEN;
242
243	/*
244	 * pkt_len is how much data we have to write, includes header and data.
245	 * total_len is length of the packet in Dwords plus the PBC should not
246	 * include the CRC.
247	 */
248	pkt_len = (skb->len + pad_len) >> 2;
249	total_len = pkt_len + 2; /* PBC + packet */
250
251	pbc = create_bypass_pbc(mdata->vl, total_len);
252
253	skb_get(skb);
254	v_dbg("pbc 0x%016llX len %d pad_len %d\n", pbc, skb->len, pad_len);
255	err = dd->process_vnic_dma_send(dd, q_idx, vinfo, skb, pbc, pad_len);
256	if (unlikely(err)) {
257		if (err == -ENOMEM)
258			vinfo->stats[q_idx].netstats.tx_fifo_errors++;
259		else if (err != -EBUSY)
260			vinfo->stats[q_idx].netstats.tx_carrier_errors++;
261	}
262	/* remove the header before updating tx counters */
263	skb_pull(skb, OPA_VNIC_HDR_LEN);
264
265	if (unlikely(err == -EBUSY)) {
266		hfi1_vnic_maybe_stop_tx(vinfo, q_idx);
267		dev_kfree_skb_any(skb);
268		return NETDEV_TX_BUSY;
269	}
270
271tx_finish:
272	/* update tx counters */
273	hfi1_vnic_update_tx_counters(vinfo, q_idx, skb, err);
274	dev_kfree_skb_any(skb);
275	return NETDEV_TX_OK;
276}
277
278static u16 hfi1_vnic_select_queue(struct net_device *netdev,
279				  struct sk_buff *skb,
280				  struct net_device *sb_dev)
281{
282	struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
283	struct opa_vnic_skb_mdata *mdata;
284	struct sdma_engine *sde;
285
286	mdata = (struct opa_vnic_skb_mdata *)skb->data;
287	sde = sdma_select_engine_vl(vinfo->dd, mdata->entropy, mdata->vl);
288	return sde->this_idx;
289}
290
291/* hfi1_vnic_decap_skb - strip OPA header from the skb (ethernet) packet */
292static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue *rxq,
293				      struct sk_buff *skb)
294{
295	struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
296	int max_len = vinfo->netdev->mtu + VLAN_ETH_HLEN;
297	int rc = -EFAULT;
298
299	skb_pull(skb, OPA_VNIC_HDR_LEN);
300
301	/* Validate Packet length */
302	if (unlikely(skb->len > max_len))
303		vinfo->stats[rxq->idx].rx_oversize++;
304	else if (unlikely(skb->len < ETH_ZLEN))
305		vinfo->stats[rxq->idx].rx_runt++;
306	else
307		rc = 0;
308	return rc;
309}
310
311static struct hfi1_vnic_vport_info *get_vnic_port(struct hfi1_devdata *dd,
312						  int vesw_id)
313{
314	int vnic_id = VNIC_ID(vesw_id);
 
 
 
 
 
315
316	return hfi1_netdev_get_data(dd, vnic_id);
 
 
 
 
 
317}
318
319static struct hfi1_vnic_vport_info *get_first_vnic_port(struct hfi1_devdata *dd)
 
 
320{
321	struct hfi1_vnic_vport_info *vinfo;
322	int next_id = VNIC_ID(0);
 
323
324	vinfo = hfi1_netdev_get_first_data(dd, &next_id);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
325
326	if (next_id > VNIC_ID(VNIC_MASK))
327		return NULL;
328
329	return vinfo;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330}
331
332void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
333{
334	struct hfi1_devdata *dd = packet->rcd->dd;
335	struct hfi1_vnic_vport_info *vinfo = NULL;
336	struct hfi1_vnic_rx_queue *rxq;
337	struct sk_buff *skb;
338	int l4_type, vesw_id = -1, rc;
339	u8 q_idx;
340	unsigned char *pad_info;
341
342	l4_type = hfi1_16B_get_l4(packet->ebuf);
343	if (likely(l4_type == OPA_16B_L4_ETHR)) {
344		vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf);
345		vinfo = get_vnic_port(dd, vesw_id);
346
347		/*
348		 * In case of invalid vesw id, count the error on
349		 * the first available vport.
350		 */
351		if (unlikely(!vinfo)) {
352			struct hfi1_vnic_vport_info *vinfo_tmp;
 
353
354			vinfo_tmp = get_first_vnic_port(dd);
 
355			if (vinfo_tmp) {
356				spin_lock(&vport_cntr_lock);
357				vinfo_tmp->stats[0].netstats.rx_nohandler++;
358				spin_unlock(&vport_cntr_lock);
359			}
360		}
361	}
362
363	if (unlikely(!vinfo)) {
364		dd_dev_warn(dd, "vnic rcv err: l4 %d vesw id %d ctx %d\n",
365			    l4_type, vesw_id, packet->rcd->ctxt);
366		return;
367	}
368
369	q_idx = packet->rcd->vnic_q_idx;
370	rxq = &vinfo->rxq[q_idx];
371	if (unlikely(!netif_oper_up(vinfo->netdev))) {
372		vinfo->stats[q_idx].rx_drop_state++;
 
 
 
 
 
 
373		return;
374	}
375
376	skb = netdev_alloc_skb(vinfo->netdev, packet->tlen);
377	if (unlikely(!skb)) {
378		vinfo->stats[q_idx].netstats.rx_fifo_errors++;
379		return;
380	}
381
382	memcpy(skb->data, packet->ebuf, packet->tlen);
383	skb_put(skb, packet->tlen);
 
384
385	pad_info = skb->data + skb->len - 1;
386	skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN -
387		       ((*pad_info) & 0x7)));
388
389	rc = hfi1_vnic_decap_skb(rxq, skb);
390
391	/* update rx counters */
392	hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc);
393	if (unlikely(rc)) {
394		dev_kfree_skb_any(skb);
395		return;
396	}
397
398	skb_checksum_none_assert(skb);
399	skb->protocol = eth_type_trans(skb, rxq->netdev);
400
401	napi_gro_receive(&rxq->napi, skb);
402}
403
404static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo)
405{
406	struct hfi1_devdata *dd = vinfo->dd;
407	struct net_device *netdev = vinfo->netdev;
408	int rc;
409
410	/* ensure virtual eth switch id is valid */
411	if (!vinfo->vesw_id)
412		return -EINVAL;
413
414	rc = hfi1_netdev_add_data(dd, VNIC_ID(vinfo->vesw_id), vinfo);
415	if (rc < 0)
416		return rc;
417
418	rc = hfi1_netdev_rx_init(dd);
419	if (rc)
420		goto err_remove;
 
 
 
421
422	netif_carrier_on(netdev);
423	netif_tx_start_all_queues(netdev);
424	set_bit(HFI1_VNIC_UP, &vinfo->flags);
425
426	return 0;
427
428err_remove:
429	hfi1_netdev_remove_data(dd, VNIC_ID(vinfo->vesw_id));
430	return rc;
431}
432
433static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo)
434{
435	struct hfi1_devdata *dd = vinfo->dd;
 
436
437	clear_bit(HFI1_VNIC_UP, &vinfo->flags);
438	netif_carrier_off(vinfo->netdev);
439	netif_tx_disable(vinfo->netdev);
440	hfi1_netdev_remove_data(dd, VNIC_ID(vinfo->vesw_id));
 
 
 
441
442	hfi1_netdev_rx_destroy(dd);
 
 
 
 
 
 
443}
444
445static int hfi1_netdev_open(struct net_device *netdev)
446{
447	struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
448	int rc;
449
450	mutex_lock(&vinfo->lock);
451	rc = hfi1_vnic_up(vinfo);
452	mutex_unlock(&vinfo->lock);
453	return rc;
454}
455
456static int hfi1_netdev_close(struct net_device *netdev)
457{
458	struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
459
460	mutex_lock(&vinfo->lock);
461	if (test_bit(HFI1_VNIC_UP, &vinfo->flags))
462		hfi1_vnic_down(vinfo);
463	mutex_unlock(&vinfo->lock);
464	return 0;
465}
466
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
467static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
468{
469	struct hfi1_devdata *dd = vinfo->dd;
470	int rc = 0;
471
472	mutex_lock(&hfi1_mutex);
473	if (!dd->vnic_num_vports) {
474		rc = hfi1_vnic_txreq_init(dd);
475		if (rc)
476			goto txreq_fail;
477	}
478
479	rc = hfi1_netdev_rx_init(dd);
480	if (rc) {
481		dd_dev_err(dd, "Unable to initialize netdev contexts\n");
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
482		goto alloc_fail;
483	}
484
485	hfi1_init_vnic_rsm(dd);
 
 
 
486
487	dd->vnic_num_vports++;
488	hfi1_vnic_sdma_init(vinfo);
489
490alloc_fail:
491	if (!dd->vnic_num_vports)
492		hfi1_vnic_txreq_deinit(dd);
493txreq_fail:
494	mutex_unlock(&hfi1_mutex);
495	return rc;
496}
497
498static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo)
499{
500	struct hfi1_devdata *dd = vinfo->dd;
 
501
502	mutex_lock(&hfi1_mutex);
503	if (--dd->vnic_num_vports == 0) {
 
 
 
 
 
504		hfi1_deinit_vnic_rsm(dd);
 
505		hfi1_vnic_txreq_deinit(dd);
506	}
507	mutex_unlock(&hfi1_mutex);
508	hfi1_netdev_rx_destroy(dd);
509}
510
511static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id)
512{
513	struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
514	bool reopen = false;
515
516	/*
517	 * If vesw_id is being changed, and if the vnic port is up,
518	 * reset the vnic port to ensure new vesw_id gets picked up
519	 */
520	if (id != vinfo->vesw_id) {
521		mutex_lock(&vinfo->lock);
522		if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) {
523			hfi1_vnic_down(vinfo);
524			reopen = true;
525		}
526
527		vinfo->vesw_id = id;
528		if (reopen)
529			hfi1_vnic_up(vinfo);
530
531		mutex_unlock(&vinfo->lock);
532	}
533}
534
535/* netdev ops */
536static const struct net_device_ops hfi1_netdev_ops = {
537	.ndo_open = hfi1_netdev_open,
538	.ndo_stop = hfi1_netdev_close,
539	.ndo_start_xmit = hfi1_netdev_start_xmit,
540	.ndo_select_queue = hfi1_vnic_select_queue,
541	.ndo_get_stats64 = hfi1_vnic_get_stats64,
542};
543
544static void hfi1_vnic_free_rn(struct net_device *netdev)
545{
546	struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
547
548	hfi1_vnic_deinit(vinfo);
549	mutex_destroy(&vinfo->lock);
550	free_netdev(netdev);
551}
552
553struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
554				      u32 port_num,
555				      enum rdma_netdev_t type,
556				      const char *name,
557				      unsigned char name_assign_type,
558				      void (*setup)(struct net_device *))
559{
560	struct hfi1_devdata *dd = dd_from_ibdev(device);
561	struct hfi1_vnic_vport_info *vinfo;
562	struct net_device *netdev;
563	struct rdma_netdev *rn;
564	int i, size, rc;
565
566	if (!dd->num_netdev_contexts)
567		return ERR_PTR(-ENOMEM);
568
569	if (!port_num || (port_num > dd->num_pports))
570		return ERR_PTR(-EINVAL);
571
572	if (type != RDMA_NETDEV_OPA_VNIC)
573		return ERR_PTR(-EOPNOTSUPP);
574
575	size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo);
576	netdev = alloc_netdev_mqs(size, name, name_assign_type, setup,
577				  chip_sdma_engines(dd),
578				  dd->num_netdev_contexts);
579	if (!netdev)
580		return ERR_PTR(-ENOMEM);
581
582	rn = netdev_priv(netdev);
583	vinfo = opa_vnic_dev_priv(netdev);
584	vinfo->dd = dd;
585	vinfo->num_tx_q = chip_sdma_engines(dd);
586	vinfo->num_rx_q = dd->num_netdev_contexts;
587	vinfo->netdev = netdev;
588	rn->free_rdma_netdev = hfi1_vnic_free_rn;
589	rn->set_id = hfi1_vnic_set_vesw_id;
590
591	netdev->features = NETIF_F_HIGHDMA | NETIF_F_SG;
592	netdev->hw_features = netdev->features;
593	netdev->vlan_features = netdev->features;
594	netdev->watchdog_timeo = msecs_to_jiffies(HFI_TX_TIMEOUT_MS);
595	netdev->netdev_ops = &hfi1_netdev_ops;
596	mutex_init(&vinfo->lock);
597
598	for (i = 0; i < vinfo->num_rx_q; i++) {
599		struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
600
601		rxq->idx = i;
602		rxq->vinfo = vinfo;
603		rxq->netdev = netdev;
 
604	}
605
606	rc = hfi1_vnic_init(vinfo);
607	if (rc)
608		goto init_fail;
609
610	return netdev;
611init_fail:
612	mutex_destroy(&vinfo->lock);
613	free_netdev(netdev);
614	return ERR_PTR(rc);
615}
v5.4
 
  1/*
  2 * Copyright(c) 2017 - 2018 Intel Corporation.
  3 *
  4 * This file is provided under a dual BSD/GPLv2 license.  When using or
  5 * redistributing this file, you may do so under either license.
  6 *
  7 * GPL LICENSE SUMMARY
  8 *
  9 * This program is free software; you can redistribute it and/or modify
 10 * it under the terms of version 2 of the GNU General Public License as
 11 * published by the Free Software Foundation.
 12 *
 13 * This program is distributed in the hope that it will be useful, but
 14 * WITHOUT ANY WARRANTY; without even the implied warranty of
 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 16 * General Public License for more details.
 17 *
 18 * BSD LICENSE
 19 *
 20 * Redistribution and use in source and binary forms, with or without
 21 * modification, are permitted provided that the following conditions
 22 * are met:
 23 *
 24 *  - Redistributions of source code must retain the above copyright
 25 *    notice, this list of conditions and the following disclaimer.
 26 *  - Redistributions in binary form must reproduce the above copyright
 27 *    notice, this list of conditions and the following disclaimer in
 28 *    the documentation and/or other materials provided with the
 29 *    distribution.
 30 *  - Neither the name of Intel Corporation nor the names of its
 31 *    contributors may be used to endorse or promote products derived
 32 *    from this software without specific prior written permission.
 33 *
 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 45 *
 46 */
 47
 48/*
 49 * This file contains HFI1 support for VNIC functionality
 50 */
 51
 52#include <linux/io.h>
 53#include <linux/if_vlan.h>
 54
 55#include "vnic.h"
 
 56
 57#define HFI_TX_TIMEOUT_MS 1000
 58
 59#define HFI1_VNIC_RCV_Q_SIZE   1024
 60
 61#define HFI1_VNIC_UP 0
 62
 63static DEFINE_SPINLOCK(vport_cntr_lock);
 64
 65static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt)
 66{
 67	unsigned int rcvctrl_ops = 0;
 68	int ret;
 69
 70	uctxt->do_interrupt = &handle_receive_interrupt;
 71
 72	/* Now allocate the RcvHdr queue and eager buffers. */
 73	ret = hfi1_create_rcvhdrq(dd, uctxt);
 74	if (ret)
 75		goto done;
 76
 77	ret = hfi1_setup_eagerbufs(uctxt);
 78	if (ret)
 79		goto done;
 80
 81	if (uctxt->rcvhdrtail_kvaddr)
 82		clear_rcvhdrtail(uctxt);
 83
 84	rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
 85	rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_ENB;
 86
 87	if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR))
 88		rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
 89	if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL))
 90		rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
 91	if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
 92		rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
 93	if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
 94		rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
 95
 96	hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt);
 97done:
 98	return ret;
 99}
100
101static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
102			      struct hfi1_ctxtdata **vnic_ctxt)
103{
104	struct hfi1_ctxtdata *uctxt;
105	int ret;
106
107	if (dd->flags & HFI1_FROZEN)
108		return -EIO;
109
110	ret = hfi1_create_ctxtdata(dd->pport, dd->node, &uctxt);
111	if (ret < 0) {
112		dd_dev_err(dd, "Unable to create ctxtdata, failing open\n");
113		return -ENOMEM;
114	}
115
116	uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) |
117			HFI1_CAP_KGET(NODROP_RHQ_FULL) |
118			HFI1_CAP_KGET(NODROP_EGR_FULL) |
119			HFI1_CAP_KGET(DMA_RTAIL);
120	uctxt->seq_cnt = 1;
121	uctxt->is_vnic = true;
122
123	msix_request_rcd_irq(uctxt);
124
125	hfi1_stats.sps_ctxts++;
126	dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt);
127	*vnic_ctxt = uctxt;
128
129	return 0;
130}
131
132static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
133				 struct hfi1_ctxtdata *uctxt)
134{
135	dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt);
136	flush_wc();
137
138	/*
139	 * Disable receive context and interrupt available, reset all
140	 * RcvCtxtCtrl bits to default values.
141	 */
142	hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
143		     HFI1_RCVCTRL_TIDFLOW_DIS |
144		     HFI1_RCVCTRL_INTRAVAIL_DIS |
145		     HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
146		     HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
147		     HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
148
149	/* msix_intr will always be > 0, only clean up if this is true */
150	if (uctxt->msix_intr)
151		msix_free_irq(dd, uctxt->msix_intr);
152
153	uctxt->event_flags = 0;
154
155	hfi1_clear_tids(uctxt);
156	hfi1_clear_ctxt_pkey(dd, uctxt);
157
158	hfi1_stats.sps_ctxts--;
159
160	hfi1_free_ctxt(uctxt);
161}
162
163void hfi1_vnic_setup(struct hfi1_devdata *dd)
164{
165	xa_init(&dd->vnic.vesws);
166}
167
168void hfi1_vnic_cleanup(struct hfi1_devdata *dd)
169{
170	WARN_ON(!xa_empty(&dd->vnic.vesws));
171}
172
173#define SUM_GRP_COUNTERS(stats, qstats, x_grp) do {            \
174		u64 *src64, *dst64;                            \
175		for (src64 = &qstats->x_grp.unicast,           \
176			dst64 = &stats->x_grp.unicast;         \
177			dst64 <= &stats->x_grp.s_1519_max;) {  \
178			*dst64++ += *src64++;                  \
179		}                                              \
180	} while (0)
181
 
 
 
182/* hfi1_vnic_update_stats - update statistics */
183static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo,
184				   struct opa_vnic_stats *stats)
185{
186	struct net_device *netdev = vinfo->netdev;
187	u8 i;
188
189	/* add tx counters on different queues */
190	for (i = 0; i < vinfo->num_tx_q; i++) {
191		struct opa_vnic_stats *qstats = &vinfo->stats[i];
192		struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
193
194		stats->netstats.tx_fifo_errors += qnstats->tx_fifo_errors;
195		stats->netstats.tx_carrier_errors += qnstats->tx_carrier_errors;
196		stats->tx_drop_state += qstats->tx_drop_state;
197		stats->tx_dlid_zero += qstats->tx_dlid_zero;
198
199		SUM_GRP_COUNTERS(stats, qstats, tx_grp);
200		stats->netstats.tx_packets += qnstats->tx_packets;
201		stats->netstats.tx_bytes += qnstats->tx_bytes;
202	}
203
204	/* add rx counters on different queues */
205	for (i = 0; i < vinfo->num_rx_q; i++) {
206		struct opa_vnic_stats *qstats = &vinfo->stats[i];
207		struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
208
209		stats->netstats.rx_fifo_errors += qnstats->rx_fifo_errors;
210		stats->netstats.rx_nohandler += qnstats->rx_nohandler;
211		stats->rx_drop_state += qstats->rx_drop_state;
212		stats->rx_oversize += qstats->rx_oversize;
213		stats->rx_runt += qstats->rx_runt;
214
215		SUM_GRP_COUNTERS(stats, qstats, rx_grp);
216		stats->netstats.rx_packets += qnstats->rx_packets;
217		stats->netstats.rx_bytes += qnstats->rx_bytes;
218	}
219
220	stats->netstats.tx_errors = stats->netstats.tx_fifo_errors +
221				    stats->netstats.tx_carrier_errors +
222				    stats->tx_drop_state + stats->tx_dlid_zero;
223	stats->netstats.tx_dropped = stats->netstats.tx_errors;
224
225	stats->netstats.rx_errors = stats->netstats.rx_fifo_errors +
226				    stats->netstats.rx_nohandler +
227				    stats->rx_drop_state + stats->rx_oversize +
228				    stats->rx_runt;
229	stats->netstats.rx_dropped = stats->netstats.rx_errors;
230
231	netdev->stats.tx_packets = stats->netstats.tx_packets;
232	netdev->stats.tx_bytes = stats->netstats.tx_bytes;
233	netdev->stats.tx_fifo_errors = stats->netstats.tx_fifo_errors;
234	netdev->stats.tx_carrier_errors = stats->netstats.tx_carrier_errors;
235	netdev->stats.tx_errors = stats->netstats.tx_errors;
236	netdev->stats.tx_dropped = stats->netstats.tx_dropped;
237
238	netdev->stats.rx_packets = stats->netstats.rx_packets;
239	netdev->stats.rx_bytes = stats->netstats.rx_bytes;
240	netdev->stats.rx_fifo_errors = stats->netstats.rx_fifo_errors;
241	netdev->stats.multicast = stats->rx_grp.mcastbcast;
242	netdev->stats.rx_length_errors = stats->rx_oversize + stats->rx_runt;
243	netdev->stats.rx_errors = stats->netstats.rx_errors;
244	netdev->stats.rx_dropped = stats->netstats.rx_dropped;
245}
246
247/* update_len_counters - update pkt's len histogram counters */
248static inline void update_len_counters(struct opa_vnic_grp_stats *grp,
249				       int len)
250{
251	/* account for 4 byte FCS */
252	if (len >= 1515)
253		grp->s_1519_max++;
254	else if (len >= 1020)
255		grp->s_1024_1518++;
256	else if (len >= 508)
257		grp->s_512_1023++;
258	else if (len >= 252)
259		grp->s_256_511++;
260	else if (len >= 124)
261		grp->s_128_255++;
262	else if (len >= 61)
263		grp->s_65_127++;
264	else
265		grp->s_64++;
266}
267
268/* hfi1_vnic_update_tx_counters - update transmit counters */
269static void hfi1_vnic_update_tx_counters(struct hfi1_vnic_vport_info *vinfo,
270					 u8 q_idx, struct sk_buff *skb, int err)
271{
272	struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
273	struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
274	struct opa_vnic_grp_stats *tx_grp = &stats->tx_grp;
275	u16 vlan_tci;
276
277	stats->netstats.tx_packets++;
278	stats->netstats.tx_bytes += skb->len + ETH_FCS_LEN;
279
280	update_len_counters(tx_grp, skb->len);
281
282	/* rest of the counts are for good packets only */
283	if (unlikely(err))
284		return;
285
286	if (is_multicast_ether_addr(mac_hdr->h_dest))
287		tx_grp->mcastbcast++;
288	else
289		tx_grp->unicast++;
290
291	if (!__vlan_get_tag(skb, &vlan_tci))
292		tx_grp->vlan++;
293	else
294		tx_grp->untagged++;
295}
296
297/* hfi1_vnic_update_rx_counters - update receive counters */
298static void hfi1_vnic_update_rx_counters(struct hfi1_vnic_vport_info *vinfo,
299					 u8 q_idx, struct sk_buff *skb, int err)
300{
301	struct ethhdr *mac_hdr = (struct ethhdr *)skb->data;
302	struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
303	struct opa_vnic_grp_stats *rx_grp = &stats->rx_grp;
304	u16 vlan_tci;
305
306	stats->netstats.rx_packets++;
307	stats->netstats.rx_bytes += skb->len + ETH_FCS_LEN;
308
309	update_len_counters(rx_grp, skb->len);
310
311	/* rest of the counts are for good packets only */
312	if (unlikely(err))
313		return;
314
315	if (is_multicast_ether_addr(mac_hdr->h_dest))
316		rx_grp->mcastbcast++;
317	else
318		rx_grp->unicast++;
319
320	if (!__vlan_get_tag(skb, &vlan_tci))
321		rx_grp->vlan++;
322	else
323		rx_grp->untagged++;
324}
325
326/* This function is overloaded for opa_vnic specific implementation */
327static void hfi1_vnic_get_stats64(struct net_device *netdev,
328				  struct rtnl_link_stats64 *stats)
329{
330	struct opa_vnic_stats *vstats = (struct opa_vnic_stats *)stats;
331	struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
332
333	hfi1_vnic_update_stats(vinfo, vstats);
334}
335
336static u64 create_bypass_pbc(u32 vl, u32 dw_len)
337{
338	u64 pbc;
339
340	pbc = ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT)
341		| PBC_INSERT_BYPASS_ICRC | PBC_CREDIT_RETURN
342		| PBC_PACKET_BYPASS
343		| ((vl & PBC_VL_MASK) << PBC_VL_SHIFT)
344		| (dw_len & PBC_LENGTH_DWS_MASK) << PBC_LENGTH_DWS_SHIFT;
345
346	return pbc;
347}
348
349/* hfi1_vnic_maybe_stop_tx - stop tx queue if required */
350static void hfi1_vnic_maybe_stop_tx(struct hfi1_vnic_vport_info *vinfo,
351				    u8 q_idx)
352{
353	netif_stop_subqueue(vinfo->netdev, q_idx);
354	if (!hfi1_vnic_sdma_write_avail(vinfo, q_idx))
355		return;
356
357	netif_start_subqueue(vinfo->netdev, q_idx);
358}
359
360static netdev_tx_t hfi1_netdev_start_xmit(struct sk_buff *skb,
361					  struct net_device *netdev)
362{
363	struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
364	u8 pad_len, q_idx = skb->queue_mapping;
365	struct hfi1_devdata *dd = vinfo->dd;
366	struct opa_vnic_skb_mdata *mdata;
367	u32 pkt_len, total_len;
368	int err = -EINVAL;
369	u64 pbc;
370
371	v_dbg("xmit: queue %d skb len %d\n", q_idx, skb->len);
372	if (unlikely(!netif_oper_up(netdev))) {
373		vinfo->stats[q_idx].tx_drop_state++;
374		goto tx_finish;
375	}
376
377	/* take out meta data */
378	mdata = (struct opa_vnic_skb_mdata *)skb->data;
379	skb_pull(skb, sizeof(*mdata));
380	if (unlikely(mdata->flags & OPA_VNIC_SKB_MDATA_ENCAP_ERR)) {
381		vinfo->stats[q_idx].tx_dlid_zero++;
382		goto tx_finish;
383	}
384
385	/* add tail padding (for 8 bytes size alignment) and icrc */
386	pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7;
387	pad_len += OPA_VNIC_ICRC_TAIL_LEN;
388
389	/*
390	 * pkt_len is how much data we have to write, includes header and data.
391	 * total_len is length of the packet in Dwords plus the PBC should not
392	 * include the CRC.
393	 */
394	pkt_len = (skb->len + pad_len) >> 2;
395	total_len = pkt_len + 2; /* PBC + packet */
396
397	pbc = create_bypass_pbc(mdata->vl, total_len);
398
399	skb_get(skb);
400	v_dbg("pbc 0x%016llX len %d pad_len %d\n", pbc, skb->len, pad_len);
401	err = dd->process_vnic_dma_send(dd, q_idx, vinfo, skb, pbc, pad_len);
402	if (unlikely(err)) {
403		if (err == -ENOMEM)
404			vinfo->stats[q_idx].netstats.tx_fifo_errors++;
405		else if (err != -EBUSY)
406			vinfo->stats[q_idx].netstats.tx_carrier_errors++;
407	}
408	/* remove the header before updating tx counters */
409	skb_pull(skb, OPA_VNIC_HDR_LEN);
410
411	if (unlikely(err == -EBUSY)) {
412		hfi1_vnic_maybe_stop_tx(vinfo, q_idx);
413		dev_kfree_skb_any(skb);
414		return NETDEV_TX_BUSY;
415	}
416
417tx_finish:
418	/* update tx counters */
419	hfi1_vnic_update_tx_counters(vinfo, q_idx, skb, err);
420	dev_kfree_skb_any(skb);
421	return NETDEV_TX_OK;
422}
423
424static u16 hfi1_vnic_select_queue(struct net_device *netdev,
425				  struct sk_buff *skb,
426				  struct net_device *sb_dev)
427{
428	struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
429	struct opa_vnic_skb_mdata *mdata;
430	struct sdma_engine *sde;
431
432	mdata = (struct opa_vnic_skb_mdata *)skb->data;
433	sde = sdma_select_engine_vl(vinfo->dd, mdata->entropy, mdata->vl);
434	return sde->this_idx;
435}
436
437/* hfi1_vnic_decap_skb - strip OPA header from the skb (ethernet) packet */
438static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue *rxq,
439				      struct sk_buff *skb)
440{
441	struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
442	int max_len = vinfo->netdev->mtu + VLAN_ETH_HLEN;
443	int rc = -EFAULT;
444
445	skb_pull(skb, OPA_VNIC_HDR_LEN);
446
447	/* Validate Packet length */
448	if (unlikely(skb->len > max_len))
449		vinfo->stats[rxq->idx].rx_oversize++;
450	else if (unlikely(skb->len < ETH_ZLEN))
451		vinfo->stats[rxq->idx].rx_runt++;
452	else
453		rc = 0;
454	return rc;
455}
456
457static inline struct sk_buff *hfi1_vnic_get_skb(struct hfi1_vnic_rx_queue *rxq)
 
458{
459	unsigned char *pad_info;
460	struct sk_buff *skb;
461
462	skb = skb_dequeue(&rxq->skbq);
463	if (unlikely(!skb))
464		return NULL;
465
466	/* remove tail padding and icrc */
467	pad_info = skb->data + skb->len - 1;
468	skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN -
469		       ((*pad_info) & 0x7)));
470
471	return skb;
472}
473
474/* hfi1_vnic_handle_rx - handle skb receive */
475static void hfi1_vnic_handle_rx(struct hfi1_vnic_rx_queue *rxq,
476				int *work_done, int work_to_do)
477{
478	struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
479	struct sk_buff *skb;
480	int rc;
481
482	while (1) {
483		if (*work_done >= work_to_do)
484			break;
485
486		skb = hfi1_vnic_get_skb(rxq);
487		if (unlikely(!skb))
488			break;
489
490		rc = hfi1_vnic_decap_skb(rxq, skb);
491		/* update rx counters */
492		hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc);
493		if (unlikely(rc)) {
494			dev_kfree_skb_any(skb);
495			continue;
496		}
497
498		skb_checksum_none_assert(skb);
499		skb->protocol = eth_type_trans(skb, rxq->netdev);
500
501		napi_gro_receive(&rxq->napi, skb);
502		(*work_done)++;
503	}
504}
505
506/* hfi1_vnic_napi - napi receive polling callback function */
507static int hfi1_vnic_napi(struct napi_struct *napi, int budget)
508{
509	struct hfi1_vnic_rx_queue *rxq = container_of(napi,
510					      struct hfi1_vnic_rx_queue, napi);
511	struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
512	int work_done = 0;
513
514	v_dbg("napi %d budget %d\n", rxq->idx, budget);
515	hfi1_vnic_handle_rx(rxq, &work_done, budget);
516
517	v_dbg("napi %d work_done %d\n", rxq->idx, work_done);
518	if (work_done < budget)
519		napi_complete(napi);
520
521	return work_done;
522}
523
524void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
525{
526	struct hfi1_devdata *dd = packet->rcd->dd;
527	struct hfi1_vnic_vport_info *vinfo = NULL;
528	struct hfi1_vnic_rx_queue *rxq;
529	struct sk_buff *skb;
530	int l4_type, vesw_id = -1;
531	u8 q_idx;
 
532
533	l4_type = hfi1_16B_get_l4(packet->ebuf);
534	if (likely(l4_type == OPA_16B_L4_ETHR)) {
535		vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf);
536		vinfo = xa_load(&dd->vnic.vesws, vesw_id);
537
538		/*
539		 * In case of invalid vesw id, count the error on
540		 * the first available vport.
541		 */
542		if (unlikely(!vinfo)) {
543			struct hfi1_vnic_vport_info *vinfo_tmp;
544			unsigned long index = 0;
545
546			vinfo_tmp = xa_find(&dd->vnic.vesws, &index, ULONG_MAX,
547					XA_PRESENT);
548			if (vinfo_tmp) {
549				spin_lock(&vport_cntr_lock);
550				vinfo_tmp->stats[0].netstats.rx_nohandler++;
551				spin_unlock(&vport_cntr_lock);
552			}
553		}
554	}
555
556	if (unlikely(!vinfo)) {
557		dd_dev_warn(dd, "vnic rcv err: l4 %d vesw id %d ctx %d\n",
558			    l4_type, vesw_id, packet->rcd->ctxt);
559		return;
560	}
561
562	q_idx = packet->rcd->vnic_q_idx;
563	rxq = &vinfo->rxq[q_idx];
564	if (unlikely(!netif_oper_up(vinfo->netdev))) {
565		vinfo->stats[q_idx].rx_drop_state++;
566		skb_queue_purge(&rxq->skbq);
567		return;
568	}
569
570	if (unlikely(skb_queue_len(&rxq->skbq) > HFI1_VNIC_RCV_Q_SIZE)) {
571		vinfo->stats[q_idx].netstats.rx_fifo_errors++;
572		return;
573	}
574
575	skb = netdev_alloc_skb(vinfo->netdev, packet->tlen);
576	if (unlikely(!skb)) {
577		vinfo->stats[q_idx].netstats.rx_fifo_errors++;
578		return;
579	}
580
581	memcpy(skb->data, packet->ebuf, packet->tlen);
582	skb_put(skb, packet->tlen);
583	skb_queue_tail(&rxq->skbq, skb);
584
585	if (napi_schedule_prep(&rxq->napi)) {
586		v_dbg("napi %d scheduling\n", q_idx);
587		__napi_schedule(&rxq->napi);
 
 
 
 
 
 
 
 
588	}
 
 
 
 
 
589}
590
591static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo)
592{
593	struct hfi1_devdata *dd = vinfo->dd;
594	struct net_device *netdev = vinfo->netdev;
595	int i, rc;
596
597	/* ensure virtual eth switch id is valid */
598	if (!vinfo->vesw_id)
599		return -EINVAL;
600
601	rc = xa_insert(&dd->vnic.vesws, vinfo->vesw_id, vinfo, GFP_KERNEL);
602	if (rc < 0)
603		return rc;
604
605	for (i = 0; i < vinfo->num_rx_q; i++) {
606		struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
607
608		skb_queue_head_init(&rxq->skbq);
609		napi_enable(&rxq->napi);
610	}
611
612	netif_carrier_on(netdev);
613	netif_tx_start_all_queues(netdev);
614	set_bit(HFI1_VNIC_UP, &vinfo->flags);
615
616	return 0;
 
 
 
 
617}
618
619static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo)
620{
621	struct hfi1_devdata *dd = vinfo->dd;
622	u8 i;
623
624	clear_bit(HFI1_VNIC_UP, &vinfo->flags);
625	netif_carrier_off(vinfo->netdev);
626	netif_tx_disable(vinfo->netdev);
627	xa_erase(&dd->vnic.vesws, vinfo->vesw_id);
628
629	/* ensure irqs see the change */
630	msix_vnic_synchronize_irq(dd);
631
632	/* remove unread skbs */
633	for (i = 0; i < vinfo->num_rx_q; i++) {
634		struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
635
636		napi_disable(&rxq->napi);
637		skb_queue_purge(&rxq->skbq);
638	}
639}
640
641static int hfi1_netdev_open(struct net_device *netdev)
642{
643	struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
644	int rc;
645
646	mutex_lock(&vinfo->lock);
647	rc = hfi1_vnic_up(vinfo);
648	mutex_unlock(&vinfo->lock);
649	return rc;
650}
651
652static int hfi1_netdev_close(struct net_device *netdev)
653{
654	struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
655
656	mutex_lock(&vinfo->lock);
657	if (test_bit(HFI1_VNIC_UP, &vinfo->flags))
658		hfi1_vnic_down(vinfo);
659	mutex_unlock(&vinfo->lock);
660	return 0;
661}
662
663static int hfi1_vnic_allot_ctxt(struct hfi1_devdata *dd,
664				struct hfi1_ctxtdata **vnic_ctxt)
665{
666	int rc;
667
668	rc = allocate_vnic_ctxt(dd, vnic_ctxt);
669	if (rc) {
670		dd_dev_err(dd, "vnic ctxt alloc failed %d\n", rc);
671		return rc;
672	}
673
674	rc = setup_vnic_ctxt(dd, *vnic_ctxt);
675	if (rc) {
676		dd_dev_err(dd, "vnic ctxt setup failed %d\n", rc);
677		deallocate_vnic_ctxt(dd, *vnic_ctxt);
678		*vnic_ctxt = NULL;
679	}
680
681	return rc;
682}
683
684static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
685{
686	struct hfi1_devdata *dd = vinfo->dd;
687	int i, rc = 0;
688
689	mutex_lock(&hfi1_mutex);
690	if (!dd->vnic.num_vports) {
691		rc = hfi1_vnic_txreq_init(dd);
692		if (rc)
693			goto txreq_fail;
694	}
695
696	for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) {
697		rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]);
698		if (rc)
699			break;
700		hfi1_rcd_get(dd->vnic.ctxt[i]);
701		dd->vnic.ctxt[i]->vnic_q_idx = i;
702	}
703
704	if (i < vinfo->num_rx_q) {
705		/*
706		 * If required amount of contexts is not
707		 * allocated successfully then remaining contexts
708		 * are released.
709		 */
710		while (i-- > dd->vnic.num_ctxt) {
711			deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
712			hfi1_rcd_put(dd->vnic.ctxt[i]);
713			dd->vnic.ctxt[i] = NULL;
714		}
715		goto alloc_fail;
716	}
717
718	if (dd->vnic.num_ctxt != i) {
719		dd->vnic.num_ctxt = i;
720		hfi1_init_vnic_rsm(dd);
721	}
722
723	dd->vnic.num_vports++;
724	hfi1_vnic_sdma_init(vinfo);
 
725alloc_fail:
726	if (!dd->vnic.num_vports)
727		hfi1_vnic_txreq_deinit(dd);
728txreq_fail:
729	mutex_unlock(&hfi1_mutex);
730	return rc;
731}
732
733static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo)
734{
735	struct hfi1_devdata *dd = vinfo->dd;
736	int i;
737
738	mutex_lock(&hfi1_mutex);
739	if (--dd->vnic.num_vports == 0) {
740		for (i = 0; i < dd->vnic.num_ctxt; i++) {
741			deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
742			hfi1_rcd_put(dd->vnic.ctxt[i]);
743			dd->vnic.ctxt[i] = NULL;
744		}
745		hfi1_deinit_vnic_rsm(dd);
746		dd->vnic.num_ctxt = 0;
747		hfi1_vnic_txreq_deinit(dd);
748	}
749	mutex_unlock(&hfi1_mutex);
 
750}
751
752static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id)
753{
754	struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
755	bool reopen = false;
756
757	/*
758	 * If vesw_id is being changed, and if the vnic port is up,
759	 * reset the vnic port to ensure new vesw_id gets picked up
760	 */
761	if (id != vinfo->vesw_id) {
762		mutex_lock(&vinfo->lock);
763		if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) {
764			hfi1_vnic_down(vinfo);
765			reopen = true;
766		}
767
768		vinfo->vesw_id = id;
769		if (reopen)
770			hfi1_vnic_up(vinfo);
771
772		mutex_unlock(&vinfo->lock);
773	}
774}
775
776/* netdev ops */
777static const struct net_device_ops hfi1_netdev_ops = {
778	.ndo_open = hfi1_netdev_open,
779	.ndo_stop = hfi1_netdev_close,
780	.ndo_start_xmit = hfi1_netdev_start_xmit,
781	.ndo_select_queue = hfi1_vnic_select_queue,
782	.ndo_get_stats64 = hfi1_vnic_get_stats64,
783};
784
785static void hfi1_vnic_free_rn(struct net_device *netdev)
786{
787	struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
788
789	hfi1_vnic_deinit(vinfo);
790	mutex_destroy(&vinfo->lock);
791	free_netdev(netdev);
792}
793
794struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
795				      u8 port_num,
796				      enum rdma_netdev_t type,
797				      const char *name,
798				      unsigned char name_assign_type,
799				      void (*setup)(struct net_device *))
800{
801	struct hfi1_devdata *dd = dd_from_ibdev(device);
802	struct hfi1_vnic_vport_info *vinfo;
803	struct net_device *netdev;
804	struct rdma_netdev *rn;
805	int i, size, rc;
806
807	if (!dd->num_vnic_contexts)
808		return ERR_PTR(-ENOMEM);
809
810	if (!port_num || (port_num > dd->num_pports))
811		return ERR_PTR(-EINVAL);
812
813	if (type != RDMA_NETDEV_OPA_VNIC)
814		return ERR_PTR(-EOPNOTSUPP);
815
816	size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo);
817	netdev = alloc_netdev_mqs(size, name, name_assign_type, setup,
818				  dd->num_sdma, dd->num_vnic_contexts);
 
819	if (!netdev)
820		return ERR_PTR(-ENOMEM);
821
822	rn = netdev_priv(netdev);
823	vinfo = opa_vnic_dev_priv(netdev);
824	vinfo->dd = dd;
825	vinfo->num_tx_q = dd->num_sdma;
826	vinfo->num_rx_q = dd->num_vnic_contexts;
827	vinfo->netdev = netdev;
828	rn->free_rdma_netdev = hfi1_vnic_free_rn;
829	rn->set_id = hfi1_vnic_set_vesw_id;
830
831	netdev->features = NETIF_F_HIGHDMA | NETIF_F_SG;
832	netdev->hw_features = netdev->features;
833	netdev->vlan_features = netdev->features;
834	netdev->watchdog_timeo = msecs_to_jiffies(HFI_TX_TIMEOUT_MS);
835	netdev->netdev_ops = &hfi1_netdev_ops;
836	mutex_init(&vinfo->lock);
837
838	for (i = 0; i < vinfo->num_rx_q; i++) {
839		struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
840
841		rxq->idx = i;
842		rxq->vinfo = vinfo;
843		rxq->netdev = netdev;
844		netif_napi_add(netdev, &rxq->napi, hfi1_vnic_napi, 64);
845	}
846
847	rc = hfi1_vnic_init(vinfo);
848	if (rc)
849		goto init_fail;
850
851	return netdev;
852init_fail:
853	mutex_destroy(&vinfo->lock);
854	free_netdev(netdev);
855	return ERR_PTR(rc);
856}