Loading...
1// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2/*
3 * Copyright(c) 2017 - 2020 Intel Corporation.
4 */
5
6/*
7 * This file contains HFI1 support for VNIC functionality
8 */
9
10#include <linux/io.h>
11#include <linux/if_vlan.h>
12
13#include "vnic.h"
14#include "netdev.h"
15
16#define HFI_TX_TIMEOUT_MS 1000
17
18#define HFI1_VNIC_RCV_Q_SIZE 1024
19
20#define HFI1_VNIC_UP 0
21
22static DEFINE_SPINLOCK(vport_cntr_lock);
23
24#define SUM_GRP_COUNTERS(stats, qstats, x_grp) do { \
25 u64 *src64, *dst64; \
26 for (src64 = &qstats->x_grp.unicast, \
27 dst64 = &stats->x_grp.unicast; \
28 dst64 <= &stats->x_grp.s_1519_max;) { \
29 *dst64++ += *src64++; \
30 } \
31 } while (0)
32
33#define VNIC_MASK (0xFF)
34#define VNIC_ID(val) ((1ull << 24) | ((val) & VNIC_MASK))
35
36/* hfi1_vnic_update_stats - update statistics */
37static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo,
38 struct opa_vnic_stats *stats)
39{
40 struct net_device *netdev = vinfo->netdev;
41 u8 i;
42
43 /* add tx counters on different queues */
44 for (i = 0; i < vinfo->num_tx_q; i++) {
45 struct opa_vnic_stats *qstats = &vinfo->stats[i];
46 struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
47
48 stats->netstats.tx_fifo_errors += qnstats->tx_fifo_errors;
49 stats->netstats.tx_carrier_errors += qnstats->tx_carrier_errors;
50 stats->tx_drop_state += qstats->tx_drop_state;
51 stats->tx_dlid_zero += qstats->tx_dlid_zero;
52
53 SUM_GRP_COUNTERS(stats, qstats, tx_grp);
54 stats->netstats.tx_packets += qnstats->tx_packets;
55 stats->netstats.tx_bytes += qnstats->tx_bytes;
56 }
57
58 /* add rx counters on different queues */
59 for (i = 0; i < vinfo->num_rx_q; i++) {
60 struct opa_vnic_stats *qstats = &vinfo->stats[i];
61 struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
62
63 stats->netstats.rx_fifo_errors += qnstats->rx_fifo_errors;
64 stats->netstats.rx_nohandler += qnstats->rx_nohandler;
65 stats->rx_drop_state += qstats->rx_drop_state;
66 stats->rx_oversize += qstats->rx_oversize;
67 stats->rx_runt += qstats->rx_runt;
68
69 SUM_GRP_COUNTERS(stats, qstats, rx_grp);
70 stats->netstats.rx_packets += qnstats->rx_packets;
71 stats->netstats.rx_bytes += qnstats->rx_bytes;
72 }
73
74 stats->netstats.tx_errors = stats->netstats.tx_fifo_errors +
75 stats->netstats.tx_carrier_errors +
76 stats->tx_drop_state + stats->tx_dlid_zero;
77 stats->netstats.tx_dropped = stats->netstats.tx_errors;
78
79 stats->netstats.rx_errors = stats->netstats.rx_fifo_errors +
80 stats->netstats.rx_nohandler +
81 stats->rx_drop_state + stats->rx_oversize +
82 stats->rx_runt;
83 stats->netstats.rx_dropped = stats->netstats.rx_errors;
84
85 netdev->stats.tx_packets = stats->netstats.tx_packets;
86 netdev->stats.tx_bytes = stats->netstats.tx_bytes;
87 netdev->stats.tx_fifo_errors = stats->netstats.tx_fifo_errors;
88 netdev->stats.tx_carrier_errors = stats->netstats.tx_carrier_errors;
89 netdev->stats.tx_errors = stats->netstats.tx_errors;
90 netdev->stats.tx_dropped = stats->netstats.tx_dropped;
91
92 netdev->stats.rx_packets = stats->netstats.rx_packets;
93 netdev->stats.rx_bytes = stats->netstats.rx_bytes;
94 netdev->stats.rx_fifo_errors = stats->netstats.rx_fifo_errors;
95 netdev->stats.multicast = stats->rx_grp.mcastbcast;
96 netdev->stats.rx_length_errors = stats->rx_oversize + stats->rx_runt;
97 netdev->stats.rx_errors = stats->netstats.rx_errors;
98 netdev->stats.rx_dropped = stats->netstats.rx_dropped;
99}
100
101/* update_len_counters - update pkt's len histogram counters */
102static inline void update_len_counters(struct opa_vnic_grp_stats *grp,
103 int len)
104{
105 /* account for 4 byte FCS */
106 if (len >= 1515)
107 grp->s_1519_max++;
108 else if (len >= 1020)
109 grp->s_1024_1518++;
110 else if (len >= 508)
111 grp->s_512_1023++;
112 else if (len >= 252)
113 grp->s_256_511++;
114 else if (len >= 124)
115 grp->s_128_255++;
116 else if (len >= 61)
117 grp->s_65_127++;
118 else
119 grp->s_64++;
120}
121
122/* hfi1_vnic_update_tx_counters - update transmit counters */
123static void hfi1_vnic_update_tx_counters(struct hfi1_vnic_vport_info *vinfo,
124 u8 q_idx, struct sk_buff *skb, int err)
125{
126 struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
127 struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
128 struct opa_vnic_grp_stats *tx_grp = &stats->tx_grp;
129 u16 vlan_tci;
130
131 stats->netstats.tx_packets++;
132 stats->netstats.tx_bytes += skb->len + ETH_FCS_LEN;
133
134 update_len_counters(tx_grp, skb->len);
135
136 /* rest of the counts are for good packets only */
137 if (unlikely(err))
138 return;
139
140 if (is_multicast_ether_addr(mac_hdr->h_dest))
141 tx_grp->mcastbcast++;
142 else
143 tx_grp->unicast++;
144
145 if (!__vlan_get_tag(skb, &vlan_tci))
146 tx_grp->vlan++;
147 else
148 tx_grp->untagged++;
149}
150
151/* hfi1_vnic_update_rx_counters - update receive counters */
152static void hfi1_vnic_update_rx_counters(struct hfi1_vnic_vport_info *vinfo,
153 u8 q_idx, struct sk_buff *skb, int err)
154{
155 struct ethhdr *mac_hdr = (struct ethhdr *)skb->data;
156 struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
157 struct opa_vnic_grp_stats *rx_grp = &stats->rx_grp;
158 u16 vlan_tci;
159
160 stats->netstats.rx_packets++;
161 stats->netstats.rx_bytes += skb->len + ETH_FCS_LEN;
162
163 update_len_counters(rx_grp, skb->len);
164
165 /* rest of the counts are for good packets only */
166 if (unlikely(err))
167 return;
168
169 if (is_multicast_ether_addr(mac_hdr->h_dest))
170 rx_grp->mcastbcast++;
171 else
172 rx_grp->unicast++;
173
174 if (!__vlan_get_tag(skb, &vlan_tci))
175 rx_grp->vlan++;
176 else
177 rx_grp->untagged++;
178}
179
180/* This function is overloaded for opa_vnic specific implementation */
181static void hfi1_vnic_get_stats64(struct net_device *netdev,
182 struct rtnl_link_stats64 *stats)
183{
184 struct opa_vnic_stats *vstats = (struct opa_vnic_stats *)stats;
185 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
186
187 hfi1_vnic_update_stats(vinfo, vstats);
188}
189
190static u64 create_bypass_pbc(u32 vl, u32 dw_len)
191{
192 u64 pbc;
193
194 pbc = ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT)
195 | PBC_INSERT_BYPASS_ICRC | PBC_CREDIT_RETURN
196 | PBC_PACKET_BYPASS
197 | ((vl & PBC_VL_MASK) << PBC_VL_SHIFT)
198 | (dw_len & PBC_LENGTH_DWS_MASK) << PBC_LENGTH_DWS_SHIFT;
199
200 return pbc;
201}
202
203/* hfi1_vnic_maybe_stop_tx - stop tx queue if required */
204static void hfi1_vnic_maybe_stop_tx(struct hfi1_vnic_vport_info *vinfo,
205 u8 q_idx)
206{
207 netif_stop_subqueue(vinfo->netdev, q_idx);
208 if (!hfi1_vnic_sdma_write_avail(vinfo, q_idx))
209 return;
210
211 netif_start_subqueue(vinfo->netdev, q_idx);
212}
213
214static netdev_tx_t hfi1_netdev_start_xmit(struct sk_buff *skb,
215 struct net_device *netdev)
216{
217 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
218 u8 pad_len, q_idx = skb->queue_mapping;
219 struct hfi1_devdata *dd = vinfo->dd;
220 struct opa_vnic_skb_mdata *mdata;
221 u32 pkt_len, total_len;
222 int err = -EINVAL;
223 u64 pbc;
224
225 v_dbg("xmit: queue %d skb len %d\n", q_idx, skb->len);
226 if (unlikely(!netif_oper_up(netdev))) {
227 vinfo->stats[q_idx].tx_drop_state++;
228 goto tx_finish;
229 }
230
231 /* take out meta data */
232 mdata = (struct opa_vnic_skb_mdata *)skb->data;
233 skb_pull(skb, sizeof(*mdata));
234 if (unlikely(mdata->flags & OPA_VNIC_SKB_MDATA_ENCAP_ERR)) {
235 vinfo->stats[q_idx].tx_dlid_zero++;
236 goto tx_finish;
237 }
238
239 /* add tail padding (for 8 bytes size alignment) and icrc */
240 pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7;
241 pad_len += OPA_VNIC_ICRC_TAIL_LEN;
242
243 /*
244 * pkt_len is how much data we have to write, includes header and data.
245 * total_len is length of the packet in Dwords plus the PBC should not
246 * include the CRC.
247 */
248 pkt_len = (skb->len + pad_len) >> 2;
249 total_len = pkt_len + 2; /* PBC + packet */
250
251 pbc = create_bypass_pbc(mdata->vl, total_len);
252
253 skb_get(skb);
254 v_dbg("pbc 0x%016llX len %d pad_len %d\n", pbc, skb->len, pad_len);
255 err = dd->process_vnic_dma_send(dd, q_idx, vinfo, skb, pbc, pad_len);
256 if (unlikely(err)) {
257 if (err == -ENOMEM)
258 vinfo->stats[q_idx].netstats.tx_fifo_errors++;
259 else if (err != -EBUSY)
260 vinfo->stats[q_idx].netstats.tx_carrier_errors++;
261 }
262 /* remove the header before updating tx counters */
263 skb_pull(skb, OPA_VNIC_HDR_LEN);
264
265 if (unlikely(err == -EBUSY)) {
266 hfi1_vnic_maybe_stop_tx(vinfo, q_idx);
267 dev_kfree_skb_any(skb);
268 return NETDEV_TX_BUSY;
269 }
270
271tx_finish:
272 /* update tx counters */
273 hfi1_vnic_update_tx_counters(vinfo, q_idx, skb, err);
274 dev_kfree_skb_any(skb);
275 return NETDEV_TX_OK;
276}
277
278static u16 hfi1_vnic_select_queue(struct net_device *netdev,
279 struct sk_buff *skb,
280 struct net_device *sb_dev)
281{
282 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
283 struct opa_vnic_skb_mdata *mdata;
284 struct sdma_engine *sde;
285
286 mdata = (struct opa_vnic_skb_mdata *)skb->data;
287 sde = sdma_select_engine_vl(vinfo->dd, mdata->entropy, mdata->vl);
288 return sde->this_idx;
289}
290
291/* hfi1_vnic_decap_skb - strip OPA header from the skb (ethernet) packet */
292static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue *rxq,
293 struct sk_buff *skb)
294{
295 struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
296 int max_len = vinfo->netdev->mtu + VLAN_ETH_HLEN;
297 int rc = -EFAULT;
298
299 skb_pull(skb, OPA_VNIC_HDR_LEN);
300
301 /* Validate Packet length */
302 if (unlikely(skb->len > max_len))
303 vinfo->stats[rxq->idx].rx_oversize++;
304 else if (unlikely(skb->len < ETH_ZLEN))
305 vinfo->stats[rxq->idx].rx_runt++;
306 else
307 rc = 0;
308 return rc;
309}
310
311static struct hfi1_vnic_vport_info *get_vnic_port(struct hfi1_devdata *dd,
312 int vesw_id)
313{
314 int vnic_id = VNIC_ID(vesw_id);
315
316 return hfi1_netdev_get_data(dd, vnic_id);
317}
318
319static struct hfi1_vnic_vport_info *get_first_vnic_port(struct hfi1_devdata *dd)
320{
321 struct hfi1_vnic_vport_info *vinfo;
322 int next_id = VNIC_ID(0);
323
324 vinfo = hfi1_netdev_get_first_data(dd, &next_id);
325
326 if (next_id > VNIC_ID(VNIC_MASK))
327 return NULL;
328
329 return vinfo;
330}
331
332void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
333{
334 struct hfi1_devdata *dd = packet->rcd->dd;
335 struct hfi1_vnic_vport_info *vinfo = NULL;
336 struct hfi1_vnic_rx_queue *rxq;
337 struct sk_buff *skb;
338 int l4_type, vesw_id = -1, rc;
339 u8 q_idx;
340 unsigned char *pad_info;
341
342 l4_type = hfi1_16B_get_l4(packet->ebuf);
343 if (likely(l4_type == OPA_16B_L4_ETHR)) {
344 vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf);
345 vinfo = get_vnic_port(dd, vesw_id);
346
347 /*
348 * In case of invalid vesw id, count the error on
349 * the first available vport.
350 */
351 if (unlikely(!vinfo)) {
352 struct hfi1_vnic_vport_info *vinfo_tmp;
353
354 vinfo_tmp = get_first_vnic_port(dd);
355 if (vinfo_tmp) {
356 spin_lock(&vport_cntr_lock);
357 vinfo_tmp->stats[0].netstats.rx_nohandler++;
358 spin_unlock(&vport_cntr_lock);
359 }
360 }
361 }
362
363 if (unlikely(!vinfo)) {
364 dd_dev_warn(dd, "vnic rcv err: l4 %d vesw id %d ctx %d\n",
365 l4_type, vesw_id, packet->rcd->ctxt);
366 return;
367 }
368
369 q_idx = packet->rcd->vnic_q_idx;
370 rxq = &vinfo->rxq[q_idx];
371 if (unlikely(!netif_oper_up(vinfo->netdev))) {
372 vinfo->stats[q_idx].rx_drop_state++;
373 return;
374 }
375
376 skb = netdev_alloc_skb(vinfo->netdev, packet->tlen);
377 if (unlikely(!skb)) {
378 vinfo->stats[q_idx].netstats.rx_fifo_errors++;
379 return;
380 }
381
382 memcpy(skb->data, packet->ebuf, packet->tlen);
383 skb_put(skb, packet->tlen);
384
385 pad_info = skb->data + skb->len - 1;
386 skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN -
387 ((*pad_info) & 0x7)));
388
389 rc = hfi1_vnic_decap_skb(rxq, skb);
390
391 /* update rx counters */
392 hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc);
393 if (unlikely(rc)) {
394 dev_kfree_skb_any(skb);
395 return;
396 }
397
398 skb_checksum_none_assert(skb);
399 skb->protocol = eth_type_trans(skb, rxq->netdev);
400
401 napi_gro_receive(&rxq->napi, skb);
402}
403
404static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo)
405{
406 struct hfi1_devdata *dd = vinfo->dd;
407 struct net_device *netdev = vinfo->netdev;
408 int rc;
409
410 /* ensure virtual eth switch id is valid */
411 if (!vinfo->vesw_id)
412 return -EINVAL;
413
414 rc = hfi1_netdev_add_data(dd, VNIC_ID(vinfo->vesw_id), vinfo);
415 if (rc < 0)
416 return rc;
417
418 rc = hfi1_netdev_rx_init(dd);
419 if (rc)
420 goto err_remove;
421
422 netif_carrier_on(netdev);
423 netif_tx_start_all_queues(netdev);
424 set_bit(HFI1_VNIC_UP, &vinfo->flags);
425
426 return 0;
427
428err_remove:
429 hfi1_netdev_remove_data(dd, VNIC_ID(vinfo->vesw_id));
430 return rc;
431}
432
433static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo)
434{
435 struct hfi1_devdata *dd = vinfo->dd;
436
437 clear_bit(HFI1_VNIC_UP, &vinfo->flags);
438 netif_carrier_off(vinfo->netdev);
439 netif_tx_disable(vinfo->netdev);
440 hfi1_netdev_remove_data(dd, VNIC_ID(vinfo->vesw_id));
441
442 hfi1_netdev_rx_destroy(dd);
443}
444
445static int hfi1_netdev_open(struct net_device *netdev)
446{
447 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
448 int rc;
449
450 mutex_lock(&vinfo->lock);
451 rc = hfi1_vnic_up(vinfo);
452 mutex_unlock(&vinfo->lock);
453 return rc;
454}
455
456static int hfi1_netdev_close(struct net_device *netdev)
457{
458 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
459
460 mutex_lock(&vinfo->lock);
461 if (test_bit(HFI1_VNIC_UP, &vinfo->flags))
462 hfi1_vnic_down(vinfo);
463 mutex_unlock(&vinfo->lock);
464 return 0;
465}
466
467static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
468{
469 struct hfi1_devdata *dd = vinfo->dd;
470 int rc = 0;
471
472 mutex_lock(&hfi1_mutex);
473 if (!dd->vnic_num_vports) {
474 rc = hfi1_vnic_txreq_init(dd);
475 if (rc)
476 goto txreq_fail;
477 }
478
479 rc = hfi1_netdev_rx_init(dd);
480 if (rc) {
481 dd_dev_err(dd, "Unable to initialize netdev contexts\n");
482 goto alloc_fail;
483 }
484
485 hfi1_init_vnic_rsm(dd);
486
487 dd->vnic_num_vports++;
488 hfi1_vnic_sdma_init(vinfo);
489
490alloc_fail:
491 if (!dd->vnic_num_vports)
492 hfi1_vnic_txreq_deinit(dd);
493txreq_fail:
494 mutex_unlock(&hfi1_mutex);
495 return rc;
496}
497
498static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo)
499{
500 struct hfi1_devdata *dd = vinfo->dd;
501
502 mutex_lock(&hfi1_mutex);
503 if (--dd->vnic_num_vports == 0) {
504 hfi1_deinit_vnic_rsm(dd);
505 hfi1_vnic_txreq_deinit(dd);
506 }
507 mutex_unlock(&hfi1_mutex);
508 hfi1_netdev_rx_destroy(dd);
509}
510
511static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id)
512{
513 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
514 bool reopen = false;
515
516 /*
517 * If vesw_id is being changed, and if the vnic port is up,
518 * reset the vnic port to ensure new vesw_id gets picked up
519 */
520 if (id != vinfo->vesw_id) {
521 mutex_lock(&vinfo->lock);
522 if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) {
523 hfi1_vnic_down(vinfo);
524 reopen = true;
525 }
526
527 vinfo->vesw_id = id;
528 if (reopen)
529 hfi1_vnic_up(vinfo);
530
531 mutex_unlock(&vinfo->lock);
532 }
533}
534
535/* netdev ops */
536static const struct net_device_ops hfi1_netdev_ops = {
537 .ndo_open = hfi1_netdev_open,
538 .ndo_stop = hfi1_netdev_close,
539 .ndo_start_xmit = hfi1_netdev_start_xmit,
540 .ndo_select_queue = hfi1_vnic_select_queue,
541 .ndo_get_stats64 = hfi1_vnic_get_stats64,
542};
543
544static void hfi1_vnic_free_rn(struct net_device *netdev)
545{
546 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
547
548 hfi1_vnic_deinit(vinfo);
549 mutex_destroy(&vinfo->lock);
550 free_netdev(netdev);
551}
552
553struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
554 u32 port_num,
555 enum rdma_netdev_t type,
556 const char *name,
557 unsigned char name_assign_type,
558 void (*setup)(struct net_device *))
559{
560 struct hfi1_devdata *dd = dd_from_ibdev(device);
561 struct hfi1_vnic_vport_info *vinfo;
562 struct net_device *netdev;
563 struct rdma_netdev *rn;
564 int i, size, rc;
565
566 if (!dd->num_netdev_contexts)
567 return ERR_PTR(-ENOMEM);
568
569 if (!port_num || (port_num > dd->num_pports))
570 return ERR_PTR(-EINVAL);
571
572 if (type != RDMA_NETDEV_OPA_VNIC)
573 return ERR_PTR(-EOPNOTSUPP);
574
575 size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo);
576 netdev = alloc_netdev_mqs(size, name, name_assign_type, setup,
577 chip_sdma_engines(dd),
578 dd->num_netdev_contexts);
579 if (!netdev)
580 return ERR_PTR(-ENOMEM);
581
582 rn = netdev_priv(netdev);
583 vinfo = opa_vnic_dev_priv(netdev);
584 vinfo->dd = dd;
585 vinfo->num_tx_q = chip_sdma_engines(dd);
586 vinfo->num_rx_q = dd->num_netdev_contexts;
587 vinfo->netdev = netdev;
588 rn->free_rdma_netdev = hfi1_vnic_free_rn;
589 rn->set_id = hfi1_vnic_set_vesw_id;
590
591 netdev->features = NETIF_F_HIGHDMA | NETIF_F_SG;
592 netdev->hw_features = netdev->features;
593 netdev->vlan_features = netdev->features;
594 netdev->watchdog_timeo = msecs_to_jiffies(HFI_TX_TIMEOUT_MS);
595 netdev->netdev_ops = &hfi1_netdev_ops;
596 mutex_init(&vinfo->lock);
597
598 for (i = 0; i < vinfo->num_rx_q; i++) {
599 struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
600
601 rxq->idx = i;
602 rxq->vinfo = vinfo;
603 rxq->netdev = netdev;
604 }
605
606 rc = hfi1_vnic_init(vinfo);
607 if (rc)
608 goto init_fail;
609
610 return netdev;
611init_fail:
612 mutex_destroy(&vinfo->lock);
613 free_netdev(netdev);
614 return ERR_PTR(rc);
615}
1/*
2 * Copyright(c) 2017 - 2018 Intel Corporation.
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
17 *
18 * BSD LICENSE
19 *
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions
22 * are met:
23 *
24 * - Redistributions of source code must retain the above copyright
25 * notice, this list of conditions and the following disclaimer.
26 * - Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in
28 * the documentation and/or other materials provided with the
29 * distribution.
30 * - Neither the name of Intel Corporation nor the names of its
31 * contributors may be used to endorse or promote products derived
32 * from this software without specific prior written permission.
33 *
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 *
46 */
47
48/*
49 * This file contains HFI1 support for VNIC functionality
50 */
51
52#include <linux/io.h>
53#include <linux/if_vlan.h>
54
55#include "vnic.h"
56
57#define HFI_TX_TIMEOUT_MS 1000
58
59#define HFI1_VNIC_RCV_Q_SIZE 1024
60
61#define HFI1_VNIC_UP 0
62
63static DEFINE_SPINLOCK(vport_cntr_lock);
64
65static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt)
66{
67 unsigned int rcvctrl_ops = 0;
68 int ret;
69
70 uctxt->do_interrupt = &handle_receive_interrupt;
71
72 /* Now allocate the RcvHdr queue and eager buffers. */
73 ret = hfi1_create_rcvhdrq(dd, uctxt);
74 if (ret)
75 goto done;
76
77 ret = hfi1_setup_eagerbufs(uctxt);
78 if (ret)
79 goto done;
80
81 if (uctxt->rcvhdrtail_kvaddr)
82 clear_rcvhdrtail(uctxt);
83
84 rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
85 rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_ENB;
86
87 if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR))
88 rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
89 if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL))
90 rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
91 if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
92 rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
93 if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
94 rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
95
96 hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt);
97done:
98 return ret;
99}
100
101static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
102 struct hfi1_ctxtdata **vnic_ctxt)
103{
104 struct hfi1_ctxtdata *uctxt;
105 int ret;
106
107 if (dd->flags & HFI1_FROZEN)
108 return -EIO;
109
110 ret = hfi1_create_ctxtdata(dd->pport, dd->node, &uctxt);
111 if (ret < 0) {
112 dd_dev_err(dd, "Unable to create ctxtdata, failing open\n");
113 return -ENOMEM;
114 }
115
116 uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) |
117 HFI1_CAP_KGET(NODROP_RHQ_FULL) |
118 HFI1_CAP_KGET(NODROP_EGR_FULL) |
119 HFI1_CAP_KGET(DMA_RTAIL);
120 uctxt->seq_cnt = 1;
121 uctxt->is_vnic = true;
122
123 msix_request_rcd_irq(uctxt);
124
125 hfi1_stats.sps_ctxts++;
126 dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt);
127 *vnic_ctxt = uctxt;
128
129 return 0;
130}
131
132static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
133 struct hfi1_ctxtdata *uctxt)
134{
135 dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt);
136 flush_wc();
137
138 /*
139 * Disable receive context and interrupt available, reset all
140 * RcvCtxtCtrl bits to default values.
141 */
142 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
143 HFI1_RCVCTRL_TIDFLOW_DIS |
144 HFI1_RCVCTRL_INTRAVAIL_DIS |
145 HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
146 HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
147 HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
148
149 /* msix_intr will always be > 0, only clean up if this is true */
150 if (uctxt->msix_intr)
151 msix_free_irq(dd, uctxt->msix_intr);
152
153 uctxt->event_flags = 0;
154
155 hfi1_clear_tids(uctxt);
156 hfi1_clear_ctxt_pkey(dd, uctxt);
157
158 hfi1_stats.sps_ctxts--;
159
160 hfi1_free_ctxt(uctxt);
161}
162
163void hfi1_vnic_setup(struct hfi1_devdata *dd)
164{
165 xa_init(&dd->vnic.vesws);
166}
167
168void hfi1_vnic_cleanup(struct hfi1_devdata *dd)
169{
170 WARN_ON(!xa_empty(&dd->vnic.vesws));
171}
172
173#define SUM_GRP_COUNTERS(stats, qstats, x_grp) do { \
174 u64 *src64, *dst64; \
175 for (src64 = &qstats->x_grp.unicast, \
176 dst64 = &stats->x_grp.unicast; \
177 dst64 <= &stats->x_grp.s_1519_max;) { \
178 *dst64++ += *src64++; \
179 } \
180 } while (0)
181
182/* hfi1_vnic_update_stats - update statistics */
183static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo,
184 struct opa_vnic_stats *stats)
185{
186 struct net_device *netdev = vinfo->netdev;
187 u8 i;
188
189 /* add tx counters on different queues */
190 for (i = 0; i < vinfo->num_tx_q; i++) {
191 struct opa_vnic_stats *qstats = &vinfo->stats[i];
192 struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
193
194 stats->netstats.tx_fifo_errors += qnstats->tx_fifo_errors;
195 stats->netstats.tx_carrier_errors += qnstats->tx_carrier_errors;
196 stats->tx_drop_state += qstats->tx_drop_state;
197 stats->tx_dlid_zero += qstats->tx_dlid_zero;
198
199 SUM_GRP_COUNTERS(stats, qstats, tx_grp);
200 stats->netstats.tx_packets += qnstats->tx_packets;
201 stats->netstats.tx_bytes += qnstats->tx_bytes;
202 }
203
204 /* add rx counters on different queues */
205 for (i = 0; i < vinfo->num_rx_q; i++) {
206 struct opa_vnic_stats *qstats = &vinfo->stats[i];
207 struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
208
209 stats->netstats.rx_fifo_errors += qnstats->rx_fifo_errors;
210 stats->netstats.rx_nohandler += qnstats->rx_nohandler;
211 stats->rx_drop_state += qstats->rx_drop_state;
212 stats->rx_oversize += qstats->rx_oversize;
213 stats->rx_runt += qstats->rx_runt;
214
215 SUM_GRP_COUNTERS(stats, qstats, rx_grp);
216 stats->netstats.rx_packets += qnstats->rx_packets;
217 stats->netstats.rx_bytes += qnstats->rx_bytes;
218 }
219
220 stats->netstats.tx_errors = stats->netstats.tx_fifo_errors +
221 stats->netstats.tx_carrier_errors +
222 stats->tx_drop_state + stats->tx_dlid_zero;
223 stats->netstats.tx_dropped = stats->netstats.tx_errors;
224
225 stats->netstats.rx_errors = stats->netstats.rx_fifo_errors +
226 stats->netstats.rx_nohandler +
227 stats->rx_drop_state + stats->rx_oversize +
228 stats->rx_runt;
229 stats->netstats.rx_dropped = stats->netstats.rx_errors;
230
231 netdev->stats.tx_packets = stats->netstats.tx_packets;
232 netdev->stats.tx_bytes = stats->netstats.tx_bytes;
233 netdev->stats.tx_fifo_errors = stats->netstats.tx_fifo_errors;
234 netdev->stats.tx_carrier_errors = stats->netstats.tx_carrier_errors;
235 netdev->stats.tx_errors = stats->netstats.tx_errors;
236 netdev->stats.tx_dropped = stats->netstats.tx_dropped;
237
238 netdev->stats.rx_packets = stats->netstats.rx_packets;
239 netdev->stats.rx_bytes = stats->netstats.rx_bytes;
240 netdev->stats.rx_fifo_errors = stats->netstats.rx_fifo_errors;
241 netdev->stats.multicast = stats->rx_grp.mcastbcast;
242 netdev->stats.rx_length_errors = stats->rx_oversize + stats->rx_runt;
243 netdev->stats.rx_errors = stats->netstats.rx_errors;
244 netdev->stats.rx_dropped = stats->netstats.rx_dropped;
245}
246
247/* update_len_counters - update pkt's len histogram counters */
248static inline void update_len_counters(struct opa_vnic_grp_stats *grp,
249 int len)
250{
251 /* account for 4 byte FCS */
252 if (len >= 1515)
253 grp->s_1519_max++;
254 else if (len >= 1020)
255 grp->s_1024_1518++;
256 else if (len >= 508)
257 grp->s_512_1023++;
258 else if (len >= 252)
259 grp->s_256_511++;
260 else if (len >= 124)
261 grp->s_128_255++;
262 else if (len >= 61)
263 grp->s_65_127++;
264 else
265 grp->s_64++;
266}
267
268/* hfi1_vnic_update_tx_counters - update transmit counters */
269static void hfi1_vnic_update_tx_counters(struct hfi1_vnic_vport_info *vinfo,
270 u8 q_idx, struct sk_buff *skb, int err)
271{
272 struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
273 struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
274 struct opa_vnic_grp_stats *tx_grp = &stats->tx_grp;
275 u16 vlan_tci;
276
277 stats->netstats.tx_packets++;
278 stats->netstats.tx_bytes += skb->len + ETH_FCS_LEN;
279
280 update_len_counters(tx_grp, skb->len);
281
282 /* rest of the counts are for good packets only */
283 if (unlikely(err))
284 return;
285
286 if (is_multicast_ether_addr(mac_hdr->h_dest))
287 tx_grp->mcastbcast++;
288 else
289 tx_grp->unicast++;
290
291 if (!__vlan_get_tag(skb, &vlan_tci))
292 tx_grp->vlan++;
293 else
294 tx_grp->untagged++;
295}
296
297/* hfi1_vnic_update_rx_counters - update receive counters */
298static void hfi1_vnic_update_rx_counters(struct hfi1_vnic_vport_info *vinfo,
299 u8 q_idx, struct sk_buff *skb, int err)
300{
301 struct ethhdr *mac_hdr = (struct ethhdr *)skb->data;
302 struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
303 struct opa_vnic_grp_stats *rx_grp = &stats->rx_grp;
304 u16 vlan_tci;
305
306 stats->netstats.rx_packets++;
307 stats->netstats.rx_bytes += skb->len + ETH_FCS_LEN;
308
309 update_len_counters(rx_grp, skb->len);
310
311 /* rest of the counts are for good packets only */
312 if (unlikely(err))
313 return;
314
315 if (is_multicast_ether_addr(mac_hdr->h_dest))
316 rx_grp->mcastbcast++;
317 else
318 rx_grp->unicast++;
319
320 if (!__vlan_get_tag(skb, &vlan_tci))
321 rx_grp->vlan++;
322 else
323 rx_grp->untagged++;
324}
325
326/* This function is overloaded for opa_vnic specific implementation */
327static void hfi1_vnic_get_stats64(struct net_device *netdev,
328 struct rtnl_link_stats64 *stats)
329{
330 struct opa_vnic_stats *vstats = (struct opa_vnic_stats *)stats;
331 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
332
333 hfi1_vnic_update_stats(vinfo, vstats);
334}
335
336static u64 create_bypass_pbc(u32 vl, u32 dw_len)
337{
338 u64 pbc;
339
340 pbc = ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT)
341 | PBC_INSERT_BYPASS_ICRC | PBC_CREDIT_RETURN
342 | PBC_PACKET_BYPASS
343 | ((vl & PBC_VL_MASK) << PBC_VL_SHIFT)
344 | (dw_len & PBC_LENGTH_DWS_MASK) << PBC_LENGTH_DWS_SHIFT;
345
346 return pbc;
347}
348
349/* hfi1_vnic_maybe_stop_tx - stop tx queue if required */
350static void hfi1_vnic_maybe_stop_tx(struct hfi1_vnic_vport_info *vinfo,
351 u8 q_idx)
352{
353 netif_stop_subqueue(vinfo->netdev, q_idx);
354 if (!hfi1_vnic_sdma_write_avail(vinfo, q_idx))
355 return;
356
357 netif_start_subqueue(vinfo->netdev, q_idx);
358}
359
360static netdev_tx_t hfi1_netdev_start_xmit(struct sk_buff *skb,
361 struct net_device *netdev)
362{
363 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
364 u8 pad_len, q_idx = skb->queue_mapping;
365 struct hfi1_devdata *dd = vinfo->dd;
366 struct opa_vnic_skb_mdata *mdata;
367 u32 pkt_len, total_len;
368 int err = -EINVAL;
369 u64 pbc;
370
371 v_dbg("xmit: queue %d skb len %d\n", q_idx, skb->len);
372 if (unlikely(!netif_oper_up(netdev))) {
373 vinfo->stats[q_idx].tx_drop_state++;
374 goto tx_finish;
375 }
376
377 /* take out meta data */
378 mdata = (struct opa_vnic_skb_mdata *)skb->data;
379 skb_pull(skb, sizeof(*mdata));
380 if (unlikely(mdata->flags & OPA_VNIC_SKB_MDATA_ENCAP_ERR)) {
381 vinfo->stats[q_idx].tx_dlid_zero++;
382 goto tx_finish;
383 }
384
385 /* add tail padding (for 8 bytes size alignment) and icrc */
386 pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7;
387 pad_len += OPA_VNIC_ICRC_TAIL_LEN;
388
389 /*
390 * pkt_len is how much data we have to write, includes header and data.
391 * total_len is length of the packet in Dwords plus the PBC should not
392 * include the CRC.
393 */
394 pkt_len = (skb->len + pad_len) >> 2;
395 total_len = pkt_len + 2; /* PBC + packet */
396
397 pbc = create_bypass_pbc(mdata->vl, total_len);
398
399 skb_get(skb);
400 v_dbg("pbc 0x%016llX len %d pad_len %d\n", pbc, skb->len, pad_len);
401 err = dd->process_vnic_dma_send(dd, q_idx, vinfo, skb, pbc, pad_len);
402 if (unlikely(err)) {
403 if (err == -ENOMEM)
404 vinfo->stats[q_idx].netstats.tx_fifo_errors++;
405 else if (err != -EBUSY)
406 vinfo->stats[q_idx].netstats.tx_carrier_errors++;
407 }
408 /* remove the header before updating tx counters */
409 skb_pull(skb, OPA_VNIC_HDR_LEN);
410
411 if (unlikely(err == -EBUSY)) {
412 hfi1_vnic_maybe_stop_tx(vinfo, q_idx);
413 dev_kfree_skb_any(skb);
414 return NETDEV_TX_BUSY;
415 }
416
417tx_finish:
418 /* update tx counters */
419 hfi1_vnic_update_tx_counters(vinfo, q_idx, skb, err);
420 dev_kfree_skb_any(skb);
421 return NETDEV_TX_OK;
422}
423
424static u16 hfi1_vnic_select_queue(struct net_device *netdev,
425 struct sk_buff *skb,
426 struct net_device *sb_dev)
427{
428 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
429 struct opa_vnic_skb_mdata *mdata;
430 struct sdma_engine *sde;
431
432 mdata = (struct opa_vnic_skb_mdata *)skb->data;
433 sde = sdma_select_engine_vl(vinfo->dd, mdata->entropy, mdata->vl);
434 return sde->this_idx;
435}
436
437/* hfi1_vnic_decap_skb - strip OPA header from the skb (ethernet) packet */
438static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue *rxq,
439 struct sk_buff *skb)
440{
441 struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
442 int max_len = vinfo->netdev->mtu + VLAN_ETH_HLEN;
443 int rc = -EFAULT;
444
445 skb_pull(skb, OPA_VNIC_HDR_LEN);
446
447 /* Validate Packet length */
448 if (unlikely(skb->len > max_len))
449 vinfo->stats[rxq->idx].rx_oversize++;
450 else if (unlikely(skb->len < ETH_ZLEN))
451 vinfo->stats[rxq->idx].rx_runt++;
452 else
453 rc = 0;
454 return rc;
455}
456
457static inline struct sk_buff *hfi1_vnic_get_skb(struct hfi1_vnic_rx_queue *rxq)
458{
459 unsigned char *pad_info;
460 struct sk_buff *skb;
461
462 skb = skb_dequeue(&rxq->skbq);
463 if (unlikely(!skb))
464 return NULL;
465
466 /* remove tail padding and icrc */
467 pad_info = skb->data + skb->len - 1;
468 skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN -
469 ((*pad_info) & 0x7)));
470
471 return skb;
472}
473
474/* hfi1_vnic_handle_rx - handle skb receive */
475static void hfi1_vnic_handle_rx(struct hfi1_vnic_rx_queue *rxq,
476 int *work_done, int work_to_do)
477{
478 struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
479 struct sk_buff *skb;
480 int rc;
481
482 while (1) {
483 if (*work_done >= work_to_do)
484 break;
485
486 skb = hfi1_vnic_get_skb(rxq);
487 if (unlikely(!skb))
488 break;
489
490 rc = hfi1_vnic_decap_skb(rxq, skb);
491 /* update rx counters */
492 hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc);
493 if (unlikely(rc)) {
494 dev_kfree_skb_any(skb);
495 continue;
496 }
497
498 skb_checksum_none_assert(skb);
499 skb->protocol = eth_type_trans(skb, rxq->netdev);
500
501 napi_gro_receive(&rxq->napi, skb);
502 (*work_done)++;
503 }
504}
505
506/* hfi1_vnic_napi - napi receive polling callback function */
507static int hfi1_vnic_napi(struct napi_struct *napi, int budget)
508{
509 struct hfi1_vnic_rx_queue *rxq = container_of(napi,
510 struct hfi1_vnic_rx_queue, napi);
511 struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
512 int work_done = 0;
513
514 v_dbg("napi %d budget %d\n", rxq->idx, budget);
515 hfi1_vnic_handle_rx(rxq, &work_done, budget);
516
517 v_dbg("napi %d work_done %d\n", rxq->idx, work_done);
518 if (work_done < budget)
519 napi_complete(napi);
520
521 return work_done;
522}
523
524void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
525{
526 struct hfi1_devdata *dd = packet->rcd->dd;
527 struct hfi1_vnic_vport_info *vinfo = NULL;
528 struct hfi1_vnic_rx_queue *rxq;
529 struct sk_buff *skb;
530 int l4_type, vesw_id = -1;
531 u8 q_idx;
532
533 l4_type = hfi1_16B_get_l4(packet->ebuf);
534 if (likely(l4_type == OPA_16B_L4_ETHR)) {
535 vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf);
536 vinfo = xa_load(&dd->vnic.vesws, vesw_id);
537
538 /*
539 * In case of invalid vesw id, count the error on
540 * the first available vport.
541 */
542 if (unlikely(!vinfo)) {
543 struct hfi1_vnic_vport_info *vinfo_tmp;
544 unsigned long index = 0;
545
546 vinfo_tmp = xa_find(&dd->vnic.vesws, &index, ULONG_MAX,
547 XA_PRESENT);
548 if (vinfo_tmp) {
549 spin_lock(&vport_cntr_lock);
550 vinfo_tmp->stats[0].netstats.rx_nohandler++;
551 spin_unlock(&vport_cntr_lock);
552 }
553 }
554 }
555
556 if (unlikely(!vinfo)) {
557 dd_dev_warn(dd, "vnic rcv err: l4 %d vesw id %d ctx %d\n",
558 l4_type, vesw_id, packet->rcd->ctxt);
559 return;
560 }
561
562 q_idx = packet->rcd->vnic_q_idx;
563 rxq = &vinfo->rxq[q_idx];
564 if (unlikely(!netif_oper_up(vinfo->netdev))) {
565 vinfo->stats[q_idx].rx_drop_state++;
566 skb_queue_purge(&rxq->skbq);
567 return;
568 }
569
570 if (unlikely(skb_queue_len(&rxq->skbq) > HFI1_VNIC_RCV_Q_SIZE)) {
571 vinfo->stats[q_idx].netstats.rx_fifo_errors++;
572 return;
573 }
574
575 skb = netdev_alloc_skb(vinfo->netdev, packet->tlen);
576 if (unlikely(!skb)) {
577 vinfo->stats[q_idx].netstats.rx_fifo_errors++;
578 return;
579 }
580
581 memcpy(skb->data, packet->ebuf, packet->tlen);
582 skb_put(skb, packet->tlen);
583 skb_queue_tail(&rxq->skbq, skb);
584
585 if (napi_schedule_prep(&rxq->napi)) {
586 v_dbg("napi %d scheduling\n", q_idx);
587 __napi_schedule(&rxq->napi);
588 }
589}
590
591static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo)
592{
593 struct hfi1_devdata *dd = vinfo->dd;
594 struct net_device *netdev = vinfo->netdev;
595 int i, rc;
596
597 /* ensure virtual eth switch id is valid */
598 if (!vinfo->vesw_id)
599 return -EINVAL;
600
601 rc = xa_insert(&dd->vnic.vesws, vinfo->vesw_id, vinfo, GFP_KERNEL);
602 if (rc < 0)
603 return rc;
604
605 for (i = 0; i < vinfo->num_rx_q; i++) {
606 struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
607
608 skb_queue_head_init(&rxq->skbq);
609 napi_enable(&rxq->napi);
610 }
611
612 netif_carrier_on(netdev);
613 netif_tx_start_all_queues(netdev);
614 set_bit(HFI1_VNIC_UP, &vinfo->flags);
615
616 return 0;
617}
618
619static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo)
620{
621 struct hfi1_devdata *dd = vinfo->dd;
622 u8 i;
623
624 clear_bit(HFI1_VNIC_UP, &vinfo->flags);
625 netif_carrier_off(vinfo->netdev);
626 netif_tx_disable(vinfo->netdev);
627 xa_erase(&dd->vnic.vesws, vinfo->vesw_id);
628
629 /* ensure irqs see the change */
630 msix_vnic_synchronize_irq(dd);
631
632 /* remove unread skbs */
633 for (i = 0; i < vinfo->num_rx_q; i++) {
634 struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
635
636 napi_disable(&rxq->napi);
637 skb_queue_purge(&rxq->skbq);
638 }
639}
640
641static int hfi1_netdev_open(struct net_device *netdev)
642{
643 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
644 int rc;
645
646 mutex_lock(&vinfo->lock);
647 rc = hfi1_vnic_up(vinfo);
648 mutex_unlock(&vinfo->lock);
649 return rc;
650}
651
652static int hfi1_netdev_close(struct net_device *netdev)
653{
654 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
655
656 mutex_lock(&vinfo->lock);
657 if (test_bit(HFI1_VNIC_UP, &vinfo->flags))
658 hfi1_vnic_down(vinfo);
659 mutex_unlock(&vinfo->lock);
660 return 0;
661}
662
663static int hfi1_vnic_allot_ctxt(struct hfi1_devdata *dd,
664 struct hfi1_ctxtdata **vnic_ctxt)
665{
666 int rc;
667
668 rc = allocate_vnic_ctxt(dd, vnic_ctxt);
669 if (rc) {
670 dd_dev_err(dd, "vnic ctxt alloc failed %d\n", rc);
671 return rc;
672 }
673
674 rc = setup_vnic_ctxt(dd, *vnic_ctxt);
675 if (rc) {
676 dd_dev_err(dd, "vnic ctxt setup failed %d\n", rc);
677 deallocate_vnic_ctxt(dd, *vnic_ctxt);
678 *vnic_ctxt = NULL;
679 }
680
681 return rc;
682}
683
684static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
685{
686 struct hfi1_devdata *dd = vinfo->dd;
687 int i, rc = 0;
688
689 mutex_lock(&hfi1_mutex);
690 if (!dd->vnic.num_vports) {
691 rc = hfi1_vnic_txreq_init(dd);
692 if (rc)
693 goto txreq_fail;
694 }
695
696 for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) {
697 rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]);
698 if (rc)
699 break;
700 hfi1_rcd_get(dd->vnic.ctxt[i]);
701 dd->vnic.ctxt[i]->vnic_q_idx = i;
702 }
703
704 if (i < vinfo->num_rx_q) {
705 /*
706 * If required amount of contexts is not
707 * allocated successfully then remaining contexts
708 * are released.
709 */
710 while (i-- > dd->vnic.num_ctxt) {
711 deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
712 hfi1_rcd_put(dd->vnic.ctxt[i]);
713 dd->vnic.ctxt[i] = NULL;
714 }
715 goto alloc_fail;
716 }
717
718 if (dd->vnic.num_ctxt != i) {
719 dd->vnic.num_ctxt = i;
720 hfi1_init_vnic_rsm(dd);
721 }
722
723 dd->vnic.num_vports++;
724 hfi1_vnic_sdma_init(vinfo);
725alloc_fail:
726 if (!dd->vnic.num_vports)
727 hfi1_vnic_txreq_deinit(dd);
728txreq_fail:
729 mutex_unlock(&hfi1_mutex);
730 return rc;
731}
732
733static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo)
734{
735 struct hfi1_devdata *dd = vinfo->dd;
736 int i;
737
738 mutex_lock(&hfi1_mutex);
739 if (--dd->vnic.num_vports == 0) {
740 for (i = 0; i < dd->vnic.num_ctxt; i++) {
741 deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
742 hfi1_rcd_put(dd->vnic.ctxt[i]);
743 dd->vnic.ctxt[i] = NULL;
744 }
745 hfi1_deinit_vnic_rsm(dd);
746 dd->vnic.num_ctxt = 0;
747 hfi1_vnic_txreq_deinit(dd);
748 }
749 mutex_unlock(&hfi1_mutex);
750}
751
752static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id)
753{
754 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
755 bool reopen = false;
756
757 /*
758 * If vesw_id is being changed, and if the vnic port is up,
759 * reset the vnic port to ensure new vesw_id gets picked up
760 */
761 if (id != vinfo->vesw_id) {
762 mutex_lock(&vinfo->lock);
763 if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) {
764 hfi1_vnic_down(vinfo);
765 reopen = true;
766 }
767
768 vinfo->vesw_id = id;
769 if (reopen)
770 hfi1_vnic_up(vinfo);
771
772 mutex_unlock(&vinfo->lock);
773 }
774}
775
776/* netdev ops */
777static const struct net_device_ops hfi1_netdev_ops = {
778 .ndo_open = hfi1_netdev_open,
779 .ndo_stop = hfi1_netdev_close,
780 .ndo_start_xmit = hfi1_netdev_start_xmit,
781 .ndo_select_queue = hfi1_vnic_select_queue,
782 .ndo_get_stats64 = hfi1_vnic_get_stats64,
783};
784
785static void hfi1_vnic_free_rn(struct net_device *netdev)
786{
787 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
788
789 hfi1_vnic_deinit(vinfo);
790 mutex_destroy(&vinfo->lock);
791 free_netdev(netdev);
792}
793
794struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
795 u8 port_num,
796 enum rdma_netdev_t type,
797 const char *name,
798 unsigned char name_assign_type,
799 void (*setup)(struct net_device *))
800{
801 struct hfi1_devdata *dd = dd_from_ibdev(device);
802 struct hfi1_vnic_vport_info *vinfo;
803 struct net_device *netdev;
804 struct rdma_netdev *rn;
805 int i, size, rc;
806
807 if (!dd->num_vnic_contexts)
808 return ERR_PTR(-ENOMEM);
809
810 if (!port_num || (port_num > dd->num_pports))
811 return ERR_PTR(-EINVAL);
812
813 if (type != RDMA_NETDEV_OPA_VNIC)
814 return ERR_PTR(-EOPNOTSUPP);
815
816 size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo);
817 netdev = alloc_netdev_mqs(size, name, name_assign_type, setup,
818 dd->num_sdma, dd->num_vnic_contexts);
819 if (!netdev)
820 return ERR_PTR(-ENOMEM);
821
822 rn = netdev_priv(netdev);
823 vinfo = opa_vnic_dev_priv(netdev);
824 vinfo->dd = dd;
825 vinfo->num_tx_q = dd->num_sdma;
826 vinfo->num_rx_q = dd->num_vnic_contexts;
827 vinfo->netdev = netdev;
828 rn->free_rdma_netdev = hfi1_vnic_free_rn;
829 rn->set_id = hfi1_vnic_set_vesw_id;
830
831 netdev->features = NETIF_F_HIGHDMA | NETIF_F_SG;
832 netdev->hw_features = netdev->features;
833 netdev->vlan_features = netdev->features;
834 netdev->watchdog_timeo = msecs_to_jiffies(HFI_TX_TIMEOUT_MS);
835 netdev->netdev_ops = &hfi1_netdev_ops;
836 mutex_init(&vinfo->lock);
837
838 for (i = 0; i < vinfo->num_rx_q; i++) {
839 struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
840
841 rxq->idx = i;
842 rxq->vinfo = vinfo;
843 rxq->netdev = netdev;
844 netif_napi_add(netdev, &rxq->napi, hfi1_vnic_napi, 64);
845 }
846
847 rc = hfi1_vnic_init(vinfo);
848 if (rc)
849 goto init_fail;
850
851 return netdev;
852init_fail:
853 mutex_destroy(&vinfo->lock);
854 free_netdev(netdev);
855 return ERR_PTR(rc);
856}