Loading...
1/*
2 * Copyright (c) 2021 Cornelis Networks. All rights reserved.
3 * Copyright (c) 2013 Intel Corporation. All rights reserved.
4 * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
5 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
15 * conditions are met:
16 *
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer.
20 *
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36#include <linux/spinlock.h>
37#include <linux/pci.h>
38#include <linux/io.h>
39#include <linux/delay.h>
40#include <linux/netdevice.h>
41#include <linux/vmalloc.h>
42#include <linux/module.h>
43#include <linux/prefetch.h>
44
45#include "qib.h"
46
47/*
48 * The size has to be longer than this string, so we can append
49 * board/chip information to it in the init code.
50 */
51const char ib_qib_version[] = QIB_DRIVER_VERSION "\n";
52
53DEFINE_MUTEX(qib_mutex); /* general driver use */
54
55unsigned qib_ibmtu;
56module_param_named(ibmtu, qib_ibmtu, uint, S_IRUGO);
57MODULE_PARM_DESC(ibmtu, "Set max IB MTU (0=2KB, 1=256, 2=512, ... 5=4096");
58
59unsigned qib_compat_ddr_negotiate = 1;
60module_param_named(compat_ddr_negotiate, qib_compat_ddr_negotiate, uint,
61 S_IWUSR | S_IRUGO);
62MODULE_PARM_DESC(compat_ddr_negotiate,
63 "Attempt pre-IBTA 1.2 DDR speed negotiation");
64
65MODULE_LICENSE("Dual BSD/GPL");
66MODULE_AUTHOR("Cornelis <support@cornelisnetworks.com>");
67MODULE_DESCRIPTION("Cornelis IB driver");
68
69/*
70 * QIB_PIO_MAXIBHDR is the max IB header size allowed for in our
71 * PIO send buffers. This is well beyond anything currently
72 * defined in the InfiniBand spec.
73 */
74#define QIB_PIO_MAXIBHDR 128
75
76/*
77 * QIB_MAX_PKT_RCV is the max # if packets processed per receive interrupt.
78 */
79#define QIB_MAX_PKT_RECV 64
80
81struct qlogic_ib_stats qib_stats;
82
83struct pci_dev *qib_get_pci_dev(struct rvt_dev_info *rdi)
84{
85 struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
86 struct qib_devdata *dd = container_of(ibdev,
87 struct qib_devdata, verbs_dev);
88 return dd->pcidev;
89}
90
91/*
92 * Return count of units with at least one port ACTIVE.
93 */
94int qib_count_active_units(void)
95{
96 struct qib_devdata *dd;
97 struct qib_pportdata *ppd;
98 unsigned long index, flags;
99 int pidx, nunits_active = 0;
100
101 xa_lock_irqsave(&qib_dev_table, flags);
102 xa_for_each(&qib_dev_table, index, dd) {
103 if (!(dd->flags & QIB_PRESENT) || !dd->kregbase)
104 continue;
105 for (pidx = 0; pidx < dd->num_pports; ++pidx) {
106 ppd = dd->pport + pidx;
107 if (ppd->lid && (ppd->lflags & (QIBL_LINKINIT |
108 QIBL_LINKARMED | QIBL_LINKACTIVE))) {
109 nunits_active++;
110 break;
111 }
112 }
113 }
114 xa_unlock_irqrestore(&qib_dev_table, flags);
115 return nunits_active;
116}
117
118/*
119 * Return count of all units, optionally return in arguments
120 * the number of usable (present) units, and the number of
121 * ports that are up.
122 */
123int qib_count_units(int *npresentp, int *nupp)
124{
125 int nunits = 0, npresent = 0, nup = 0;
126 struct qib_devdata *dd;
127 unsigned long index, flags;
128 int pidx;
129 struct qib_pportdata *ppd;
130
131 xa_lock_irqsave(&qib_dev_table, flags);
132 xa_for_each(&qib_dev_table, index, dd) {
133 nunits++;
134 if ((dd->flags & QIB_PRESENT) && dd->kregbase)
135 npresent++;
136 for (pidx = 0; pidx < dd->num_pports; ++pidx) {
137 ppd = dd->pport + pidx;
138 if (ppd->lid && (ppd->lflags & (QIBL_LINKINIT |
139 QIBL_LINKARMED | QIBL_LINKACTIVE)))
140 nup++;
141 }
142 }
143 xa_unlock_irqrestore(&qib_dev_table, flags);
144
145 if (npresentp)
146 *npresentp = npresent;
147 if (nupp)
148 *nupp = nup;
149
150 return nunits;
151}
152
153/**
154 * qib_wait_linkstate - wait for an IB link state change to occur
155 * @ppd: the qlogic_ib device
156 * @state: the state to wait for
157 * @msecs: the number of milliseconds to wait
158 *
159 * wait up to msecs milliseconds for IB link state change to occur for
160 * now, take the easy polling route. Currently used only by
161 * qib_set_linkstate. Returns 0 if state reached, otherwise
162 * -ETIMEDOUT state can have multiple states set, for any of several
163 * transitions.
164 */
165int qib_wait_linkstate(struct qib_pportdata *ppd, u32 state, int msecs)
166{
167 int ret;
168 unsigned long flags;
169
170 spin_lock_irqsave(&ppd->lflags_lock, flags);
171 if (ppd->state_wanted) {
172 spin_unlock_irqrestore(&ppd->lflags_lock, flags);
173 ret = -EBUSY;
174 goto bail;
175 }
176 ppd->state_wanted = state;
177 spin_unlock_irqrestore(&ppd->lflags_lock, flags);
178 wait_event_interruptible_timeout(ppd->state_wait,
179 (ppd->lflags & state),
180 msecs_to_jiffies(msecs));
181 spin_lock_irqsave(&ppd->lflags_lock, flags);
182 ppd->state_wanted = 0;
183 spin_unlock_irqrestore(&ppd->lflags_lock, flags);
184
185 if (!(ppd->lflags & state))
186 ret = -ETIMEDOUT;
187 else
188 ret = 0;
189bail:
190 return ret;
191}
192
193int qib_set_linkstate(struct qib_pportdata *ppd, u8 newstate)
194{
195 u32 lstate;
196 int ret;
197 struct qib_devdata *dd = ppd->dd;
198 unsigned long flags;
199
200 switch (newstate) {
201 case QIB_IB_LINKDOWN_ONLY:
202 dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
203 IB_LINKCMD_DOWN | IB_LINKINITCMD_NOP);
204 /* don't wait */
205 ret = 0;
206 goto bail;
207
208 case QIB_IB_LINKDOWN:
209 dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
210 IB_LINKCMD_DOWN | IB_LINKINITCMD_POLL);
211 /* don't wait */
212 ret = 0;
213 goto bail;
214
215 case QIB_IB_LINKDOWN_SLEEP:
216 dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
217 IB_LINKCMD_DOWN | IB_LINKINITCMD_SLEEP);
218 /* don't wait */
219 ret = 0;
220 goto bail;
221
222 case QIB_IB_LINKDOWN_DISABLE:
223 dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
224 IB_LINKCMD_DOWN | IB_LINKINITCMD_DISABLE);
225 /* don't wait */
226 ret = 0;
227 goto bail;
228
229 case QIB_IB_LINKARM:
230 if (ppd->lflags & QIBL_LINKARMED) {
231 ret = 0;
232 goto bail;
233 }
234 if (!(ppd->lflags & (QIBL_LINKINIT | QIBL_LINKACTIVE))) {
235 ret = -EINVAL;
236 goto bail;
237 }
238 /*
239 * Since the port can be ACTIVE when we ask for ARMED,
240 * clear QIBL_LINKV so we can wait for a transition.
241 * If the link isn't ARMED, then something else happened
242 * and there is no point waiting for ARMED.
243 */
244 spin_lock_irqsave(&ppd->lflags_lock, flags);
245 ppd->lflags &= ~QIBL_LINKV;
246 spin_unlock_irqrestore(&ppd->lflags_lock, flags);
247 dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
248 IB_LINKCMD_ARMED | IB_LINKINITCMD_NOP);
249 lstate = QIBL_LINKV;
250 break;
251
252 case QIB_IB_LINKACTIVE:
253 if (ppd->lflags & QIBL_LINKACTIVE) {
254 ret = 0;
255 goto bail;
256 }
257 if (!(ppd->lflags & QIBL_LINKARMED)) {
258 ret = -EINVAL;
259 goto bail;
260 }
261 dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
262 IB_LINKCMD_ACTIVE | IB_LINKINITCMD_NOP);
263 lstate = QIBL_LINKACTIVE;
264 break;
265
266 default:
267 ret = -EINVAL;
268 goto bail;
269 }
270 ret = qib_wait_linkstate(ppd, lstate, 10);
271
272bail:
273 return ret;
274}
275
276/*
277 * Get address of eager buffer from it's index (allocated in chunks, not
278 * contiguous).
279 */
280static inline void *qib_get_egrbuf(const struct qib_ctxtdata *rcd, u32 etail)
281{
282 const u32 chunk = etail >> rcd->rcvegrbufs_perchunk_shift;
283 const u32 idx = etail & ((u32)rcd->rcvegrbufs_perchunk - 1);
284
285 return rcd->rcvegrbuf[chunk] + (idx << rcd->dd->rcvegrbufsize_shift);
286}
287
288/*
289 * Returns 1 if error was a CRC, else 0.
290 * Needed for some chip's synthesized error counters.
291 */
292static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
293 u32 ctxt, u32 eflags, u32 l, u32 etail,
294 __le32 *rhf_addr, struct qib_message_header *rhdr)
295{
296 u32 ret = 0;
297
298 if (eflags & (QLOGIC_IB_RHF_H_ICRCERR | QLOGIC_IB_RHF_H_VCRCERR))
299 ret = 1;
300 else if (eflags == QLOGIC_IB_RHF_H_TIDERR) {
301 /* For TIDERR and RC QPs premptively schedule a NAK */
302 struct ib_header *hdr = (struct ib_header *)rhdr;
303 struct ib_other_headers *ohdr = NULL;
304 struct qib_ibport *ibp = &ppd->ibport_data;
305 struct qib_devdata *dd = ppd->dd;
306 struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
307 struct rvt_qp *qp = NULL;
308 u32 tlen = qib_hdrget_length_in_bytes(rhf_addr);
309 u16 lid = be16_to_cpu(hdr->lrh[1]);
310 int lnh = be16_to_cpu(hdr->lrh[0]) & 3;
311 u32 qp_num;
312 u32 opcode;
313 u32 psn;
314 int diff;
315
316 /* Sanity check packet */
317 if (tlen < 24)
318 goto drop;
319
320 if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) {
321 lid &= ~((1 << ppd->lmc) - 1);
322 if (unlikely(lid != ppd->lid))
323 goto drop;
324 }
325
326 /* Check for GRH */
327 if (lnh == QIB_LRH_BTH)
328 ohdr = &hdr->u.oth;
329 else if (lnh == QIB_LRH_GRH) {
330 u32 vtf;
331
332 ohdr = &hdr->u.l.oth;
333 if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
334 goto drop;
335 vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow);
336 if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
337 goto drop;
338 } else
339 goto drop;
340
341 /* Get opcode and PSN from packet */
342 opcode = be32_to_cpu(ohdr->bth[0]);
343 opcode >>= 24;
344 psn = be32_to_cpu(ohdr->bth[2]);
345
346 /* Get the destination QP number. */
347 qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
348 if (qp_num != QIB_MULTICAST_QPN) {
349 int ruc_res;
350
351 rcu_read_lock();
352 qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
353 if (!qp) {
354 rcu_read_unlock();
355 goto drop;
356 }
357
358 /*
359 * Handle only RC QPs - for other QP types drop error
360 * packet.
361 */
362 spin_lock(&qp->r_lock);
363
364 /* Check for valid receive state. */
365 if (!(ib_rvt_state_ops[qp->state] &
366 RVT_PROCESS_RECV_OK)) {
367 ibp->rvp.n_pkt_drops++;
368 goto unlock;
369 }
370
371 switch (qp->ibqp.qp_type) {
372 case IB_QPT_RC:
373 ruc_res =
374 qib_ruc_check_hdr(
375 ibp, hdr,
376 lnh == QIB_LRH_GRH,
377 qp,
378 be32_to_cpu(ohdr->bth[0]));
379 if (ruc_res)
380 goto unlock;
381
382 /* Only deal with RDMA Writes for now */
383 if (opcode <
384 IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
385 diff = qib_cmp24(psn, qp->r_psn);
386 if (!qp->r_nak_state && diff >= 0) {
387 ibp->rvp.n_rc_seqnak++;
388 qp->r_nak_state =
389 IB_NAK_PSN_ERROR;
390 /* Use the expected PSN. */
391 qp->r_ack_psn = qp->r_psn;
392 /*
393 * Wait to send the sequence
394 * NAK until all packets
395 * in the receive queue have
396 * been processed.
397 * Otherwise, we end up
398 * propagating congestion.
399 */
400 if (list_empty(&qp->rspwait)) {
401 qp->r_flags |=
402 RVT_R_RSP_NAK;
403 rvt_get_qp(qp);
404 list_add_tail(
405 &qp->rspwait,
406 &rcd->qp_wait_list);
407 }
408 } /* Out of sequence NAK */
409 } /* QP Request NAKs */
410 break;
411 case IB_QPT_SMI:
412 case IB_QPT_GSI:
413 case IB_QPT_UD:
414 case IB_QPT_UC:
415 default:
416 /* For now don't handle any other QP types */
417 break;
418 }
419
420unlock:
421 spin_unlock(&qp->r_lock);
422 rcu_read_unlock();
423 } /* Unicast QP */
424 } /* Valid packet with TIDErr */
425
426drop:
427 return ret;
428}
429
430/*
431 * qib_kreceive - receive a packet
432 * @rcd: the qlogic_ib context
433 * @llic: gets count of good packets needed to clear lli,
434 * (used with chips that need need to track crcs for lli)
435 *
436 * called from interrupt handler for errors or receive interrupt
437 * Returns number of CRC error packets, needed by some chips for
438 * local link integrity tracking. crcs are adjusted down by following
439 * good packets, if any, and count of good packets is also tracked.
440 */
441u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts)
442{
443 struct qib_devdata *dd = rcd->dd;
444 struct qib_pportdata *ppd = rcd->ppd;
445 __le32 *rhf_addr;
446 void *ebuf;
447 const u32 rsize = dd->rcvhdrentsize; /* words */
448 const u32 maxcnt = dd->rcvhdrcnt * rsize; /* words */
449 u32 etail = -1, l, hdrqtail;
450 struct qib_message_header *hdr;
451 u32 eflags, etype, tlen, i = 0, updegr = 0, crcs = 0;
452 int last;
453 u64 lval;
454 struct rvt_qp *qp, *nqp;
455
456 l = rcd->head;
457 rhf_addr = (__le32 *) rcd->rcvhdrq + l + dd->rhf_offset;
458 if (dd->flags & QIB_NODMA_RTAIL) {
459 u32 seq = qib_hdrget_seq(rhf_addr);
460
461 if (seq != rcd->seq_cnt)
462 goto bail;
463 hdrqtail = 0;
464 } else {
465 hdrqtail = qib_get_rcvhdrtail(rcd);
466 if (l == hdrqtail)
467 goto bail;
468 smp_rmb(); /* prevent speculative reads of dma'ed hdrq */
469 }
470
471 for (last = 0, i = 1; !last; i += !last) {
472 hdr = dd->f_get_msgheader(dd, rhf_addr);
473 eflags = qib_hdrget_err_flags(rhf_addr);
474 etype = qib_hdrget_rcv_type(rhf_addr);
475 /* total length */
476 tlen = qib_hdrget_length_in_bytes(rhf_addr);
477 ebuf = NULL;
478 if ((dd->flags & QIB_NODMA_RTAIL) ?
479 qib_hdrget_use_egr_buf(rhf_addr) :
480 (etype != RCVHQ_RCV_TYPE_EXPECTED)) {
481 etail = qib_hdrget_index(rhf_addr);
482 updegr = 1;
483 if (tlen > sizeof(*hdr) ||
484 etype >= RCVHQ_RCV_TYPE_NON_KD) {
485 ebuf = qib_get_egrbuf(rcd, etail);
486 prefetch_range(ebuf, tlen - sizeof(*hdr));
487 }
488 }
489 if (!eflags) {
490 u16 lrh_len = be16_to_cpu(hdr->lrh[2]) << 2;
491
492 if (lrh_len != tlen) {
493 qib_stats.sps_lenerrs++;
494 goto move_along;
495 }
496 }
497 if (etype == RCVHQ_RCV_TYPE_NON_KD && !eflags &&
498 ebuf == NULL &&
499 tlen > (dd->rcvhdrentsize - 2 + 1 -
500 qib_hdrget_offset(rhf_addr)) << 2) {
501 goto move_along;
502 }
503
504 /*
505 * Both tiderr and qibhdrerr are set for all plain IB
506 * packets; only qibhdrerr should be set.
507 */
508 if (unlikely(eflags))
509 crcs += qib_rcv_hdrerr(rcd, ppd, rcd->ctxt, eflags, l,
510 etail, rhf_addr, hdr);
511 else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
512 qib_ib_rcv(rcd, hdr, ebuf, tlen);
513 if (crcs)
514 crcs--;
515 else if (llic && *llic)
516 --*llic;
517 }
518move_along:
519 l += rsize;
520 if (l >= maxcnt)
521 l = 0;
522 if (i == QIB_MAX_PKT_RECV)
523 last = 1;
524
525 rhf_addr = (__le32 *) rcd->rcvhdrq + l + dd->rhf_offset;
526 if (dd->flags & QIB_NODMA_RTAIL) {
527 u32 seq = qib_hdrget_seq(rhf_addr);
528
529 if (++rcd->seq_cnt > 13)
530 rcd->seq_cnt = 1;
531 if (seq != rcd->seq_cnt)
532 last = 1;
533 } else if (l == hdrqtail)
534 last = 1;
535 /*
536 * Update head regs etc., every 16 packets, if not last pkt,
537 * to help prevent rcvhdrq overflows, when many packets
538 * are processed and queue is nearly full.
539 * Don't request an interrupt for intermediate updates.
540 */
541 lval = l;
542 if (!last && !(i & 0xf)) {
543 dd->f_update_usrhead(rcd, lval, updegr, etail, i);
544 updegr = 0;
545 }
546 }
547
548 rcd->head = l;
549
550 /*
551 * Iterate over all QPs waiting to respond.
552 * The list won't change since the IRQ is only run on one CPU.
553 */
554 list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) {
555 list_del_init(&qp->rspwait);
556 if (qp->r_flags & RVT_R_RSP_NAK) {
557 qp->r_flags &= ~RVT_R_RSP_NAK;
558 qib_send_rc_ack(qp);
559 }
560 if (qp->r_flags & RVT_R_RSP_SEND) {
561 unsigned long flags;
562
563 qp->r_flags &= ~RVT_R_RSP_SEND;
564 spin_lock_irqsave(&qp->s_lock, flags);
565 if (ib_rvt_state_ops[qp->state] &
566 RVT_PROCESS_OR_FLUSH_SEND)
567 qib_schedule_send(qp);
568 spin_unlock_irqrestore(&qp->s_lock, flags);
569 }
570 rvt_put_qp(qp);
571 }
572
573bail:
574 /* Report number of packets consumed */
575 if (npkts)
576 *npkts = i;
577
578 /*
579 * Always write head at end, and setup rcv interrupt, even
580 * if no packets were processed.
581 */
582 lval = (u64)rcd->head | dd->rhdrhead_intr_off;
583 dd->f_update_usrhead(rcd, lval, updegr, etail, i);
584 return crcs;
585}
586
587/**
588 * qib_set_mtu - set the MTU
589 * @ppd: the perport data
590 * @arg: the new MTU
591 *
592 * We can handle "any" incoming size, the issue here is whether we
593 * need to restrict our outgoing size. For now, we don't do any
594 * sanity checking on this, and we don't deal with what happens to
595 * programs that are already running when the size changes.
596 * NOTE: changing the MTU will usually cause the IBC to go back to
597 * link INIT state...
598 */
599int qib_set_mtu(struct qib_pportdata *ppd, u16 arg)
600{
601 u32 piosize;
602 int ret, chk;
603
604 if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
605 arg != 4096) {
606 ret = -EINVAL;
607 goto bail;
608 }
609 chk = ib_mtu_enum_to_int(qib_ibmtu);
610 if (chk > 0 && arg > chk) {
611 ret = -EINVAL;
612 goto bail;
613 }
614
615 piosize = ppd->ibmaxlen;
616 ppd->ibmtu = arg;
617
618 if (arg >= (piosize - QIB_PIO_MAXIBHDR)) {
619 /* Only if it's not the initial value (or reset to it) */
620 if (piosize != ppd->init_ibmaxlen) {
621 if (arg > piosize && arg <= ppd->init_ibmaxlen)
622 piosize = ppd->init_ibmaxlen - 2 * sizeof(u32);
623 ppd->ibmaxlen = piosize;
624 }
625 } else if ((arg + QIB_PIO_MAXIBHDR) != ppd->ibmaxlen) {
626 piosize = arg + QIB_PIO_MAXIBHDR - 2 * sizeof(u32);
627 ppd->ibmaxlen = piosize;
628 }
629
630 ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_MTU, 0);
631
632 ret = 0;
633
634bail:
635 return ret;
636}
637
638int qib_set_lid(struct qib_pportdata *ppd, u32 lid, u8 lmc)
639{
640 struct qib_devdata *dd = ppd->dd;
641
642 ppd->lid = lid;
643 ppd->lmc = lmc;
644
645 dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LIDLMC,
646 lid | (~((1U << lmc) - 1)) << 16);
647
648 qib_devinfo(dd->pcidev, "IB%u:%u got a lid: 0x%x\n",
649 dd->unit, ppd->port, lid);
650
651 return 0;
652}
653
654/*
655 * Following deal with the "obviously simple" task of overriding the state
656 * of the LEDS, which normally indicate link physical and logical status.
657 * The complications arise in dealing with different hardware mappings
658 * and the board-dependent routine being called from interrupts.
659 * and then there's the requirement to _flash_ them.
660 */
661#define LED_OVER_FREQ_SHIFT 8
662#define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT)
663/* Below is "non-zero" to force override, but both actual LEDs are off */
664#define LED_OVER_BOTH_OFF (8)
665
666static void qib_run_led_override(struct timer_list *t)
667{
668 struct qib_pportdata *ppd = from_timer(ppd, t,
669 led_override_timer);
670 struct qib_devdata *dd = ppd->dd;
671 int timeoff;
672 int ph_idx;
673
674 if (!(dd->flags & QIB_INITTED))
675 return;
676
677 ph_idx = ppd->led_override_phase++ & 1;
678 ppd->led_override = ppd->led_override_vals[ph_idx];
679 timeoff = ppd->led_override_timeoff;
680
681 dd->f_setextled(ppd, 1);
682 /*
683 * don't re-fire the timer if user asked for it to be off; we let
684 * it fire one more time after they turn it off to simplify
685 */
686 if (ppd->led_override_vals[0] || ppd->led_override_vals[1])
687 mod_timer(&ppd->led_override_timer, jiffies + timeoff);
688}
689
690void qib_set_led_override(struct qib_pportdata *ppd, unsigned int val)
691{
692 struct qib_devdata *dd = ppd->dd;
693 int timeoff, freq;
694
695 if (!(dd->flags & QIB_INITTED))
696 return;
697
698 /* First check if we are blinking. If not, use 1HZ polling */
699 timeoff = HZ;
700 freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT;
701
702 if (freq) {
703 /* For blink, set each phase from one nybble of val */
704 ppd->led_override_vals[0] = val & 0xF;
705 ppd->led_override_vals[1] = (val >> 4) & 0xF;
706 timeoff = (HZ << 4)/freq;
707 } else {
708 /* Non-blink set both phases the same. */
709 ppd->led_override_vals[0] = val & 0xF;
710 ppd->led_override_vals[1] = val & 0xF;
711 }
712 ppd->led_override_timeoff = timeoff;
713
714 /*
715 * If the timer has not already been started, do so. Use a "quick"
716 * timeout so the function will be called soon, to look at our request.
717 */
718 if (atomic_inc_return(&ppd->led_override_timer_active) == 1) {
719 /* Need to start timer */
720 timer_setup(&ppd->led_override_timer, qib_run_led_override, 0);
721 ppd->led_override_timer.expires = jiffies + 1;
722 add_timer(&ppd->led_override_timer);
723 } else {
724 if (ppd->led_override_vals[0] || ppd->led_override_vals[1])
725 mod_timer(&ppd->led_override_timer, jiffies + 1);
726 atomic_dec(&ppd->led_override_timer_active);
727 }
728}
729
730/**
731 * qib_reset_device - reset the chip if possible
732 * @unit: the device to reset
733 *
734 * Whether or not reset is successful, we attempt to re-initialize the chip
735 * (that is, much like a driver unload/reload). We clear the INITTED flag
736 * so that the various entry points will fail until we reinitialize. For
737 * now, we only allow this if no user contexts are open that use chip resources
738 */
739int qib_reset_device(int unit)
740{
741 int ret, i;
742 struct qib_devdata *dd = qib_lookup(unit);
743 struct qib_pportdata *ppd;
744 unsigned long flags;
745 int pidx;
746
747 if (!dd) {
748 ret = -ENODEV;
749 goto bail;
750 }
751
752 qib_devinfo(dd->pcidev, "Reset on unit %u requested\n", unit);
753
754 if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) {
755 qib_devinfo(dd->pcidev,
756 "Invalid unit number %u or not initialized or not present\n",
757 unit);
758 ret = -ENXIO;
759 goto bail;
760 }
761
762 spin_lock_irqsave(&dd->uctxt_lock, flags);
763 if (dd->rcd)
764 for (i = dd->first_user_ctxt; i < dd->cfgctxts; i++) {
765 if (!dd->rcd[i] || !dd->rcd[i]->cnt)
766 continue;
767 spin_unlock_irqrestore(&dd->uctxt_lock, flags);
768 ret = -EBUSY;
769 goto bail;
770 }
771 spin_unlock_irqrestore(&dd->uctxt_lock, flags);
772
773 for (pidx = 0; pidx < dd->num_pports; ++pidx) {
774 ppd = dd->pport + pidx;
775 if (atomic_read(&ppd->led_override_timer_active)) {
776 /* Need to stop LED timer, _then_ shut off LEDs */
777 del_timer_sync(&ppd->led_override_timer);
778 atomic_set(&ppd->led_override_timer_active, 0);
779 }
780
781 /* Shut off LEDs after we are sure timer is not running */
782 ppd->led_override = LED_OVER_BOTH_OFF;
783 dd->f_setextled(ppd, 0);
784 if (dd->flags & QIB_HAS_SEND_DMA)
785 qib_teardown_sdma(ppd);
786 }
787
788 ret = dd->f_reset(dd);
789 if (ret == 1)
790 ret = qib_init(dd, 1);
791 else
792 ret = -EAGAIN;
793 if (ret)
794 qib_dev_err(dd,
795 "Reinitialize unit %u after reset failed with %d\n",
796 unit, ret);
797 else
798 qib_devinfo(dd->pcidev,
799 "Reinitialized unit %u after resetting\n",
800 unit);
801
802bail:
803 return ret;
804}
1/*
2 * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/spinlock.h>
35#include <linux/pci.h>
36#include <linux/io.h>
37#include <linux/delay.h>
38#include <linux/netdevice.h>
39#include <linux/vmalloc.h>
40#include <linux/module.h>
41#include <linux/prefetch.h>
42
43#include "qib.h"
44
45/*
46 * The size has to be longer than this string, so we can append
47 * board/chip information to it in the init code.
48 */
49const char ib_qib_version[] = QIB_IDSTR "\n";
50
51DEFINE_SPINLOCK(qib_devs_lock);
52LIST_HEAD(qib_dev_list);
53DEFINE_MUTEX(qib_mutex); /* general driver use */
54
55unsigned qib_ibmtu;
56module_param_named(ibmtu, qib_ibmtu, uint, S_IRUGO);
57MODULE_PARM_DESC(ibmtu, "Set max IB MTU (0=2KB, 1=256, 2=512, ... 5=4096");
58
59unsigned qib_compat_ddr_negotiate = 1;
60module_param_named(compat_ddr_negotiate, qib_compat_ddr_negotiate, uint,
61 S_IWUSR | S_IRUGO);
62MODULE_PARM_DESC(compat_ddr_negotiate,
63 "Attempt pre-IBTA 1.2 DDR speed negotiation");
64
65MODULE_LICENSE("Dual BSD/GPL");
66MODULE_AUTHOR("QLogic <support@qlogic.com>");
67MODULE_DESCRIPTION("QLogic IB driver");
68
69/*
70 * QIB_PIO_MAXIBHDR is the max IB header size allowed for in our
71 * PIO send buffers. This is well beyond anything currently
72 * defined in the InfiniBand spec.
73 */
74#define QIB_PIO_MAXIBHDR 128
75
76/*
77 * QIB_MAX_PKT_RCV is the max # if packets processed per receive interrupt.
78 */
79#define QIB_MAX_PKT_RECV 64
80
81struct qlogic_ib_stats qib_stats;
82
83const char *qib_get_unit_name(int unit)
84{
85 static char iname[16];
86
87 snprintf(iname, sizeof iname, "infinipath%u", unit);
88 return iname;
89}
90
91/*
92 * Return count of units with at least one port ACTIVE.
93 */
94int qib_count_active_units(void)
95{
96 struct qib_devdata *dd;
97 struct qib_pportdata *ppd;
98 unsigned long flags;
99 int pidx, nunits_active = 0;
100
101 spin_lock_irqsave(&qib_devs_lock, flags);
102 list_for_each_entry(dd, &qib_dev_list, list) {
103 if (!(dd->flags & QIB_PRESENT) || !dd->kregbase)
104 continue;
105 for (pidx = 0; pidx < dd->num_pports; ++pidx) {
106 ppd = dd->pport + pidx;
107 if (ppd->lid && (ppd->lflags & (QIBL_LINKINIT |
108 QIBL_LINKARMED | QIBL_LINKACTIVE))) {
109 nunits_active++;
110 break;
111 }
112 }
113 }
114 spin_unlock_irqrestore(&qib_devs_lock, flags);
115 return nunits_active;
116}
117
118/*
119 * Return count of all units, optionally return in arguments
120 * the number of usable (present) units, and the number of
121 * ports that are up.
122 */
123int qib_count_units(int *npresentp, int *nupp)
124{
125 int nunits = 0, npresent = 0, nup = 0;
126 struct qib_devdata *dd;
127 unsigned long flags;
128 int pidx;
129 struct qib_pportdata *ppd;
130
131 spin_lock_irqsave(&qib_devs_lock, flags);
132
133 list_for_each_entry(dd, &qib_dev_list, list) {
134 nunits++;
135 if ((dd->flags & QIB_PRESENT) && dd->kregbase)
136 npresent++;
137 for (pidx = 0; pidx < dd->num_pports; ++pidx) {
138 ppd = dd->pport + pidx;
139 if (ppd->lid && (ppd->lflags & (QIBL_LINKINIT |
140 QIBL_LINKARMED | QIBL_LINKACTIVE)))
141 nup++;
142 }
143 }
144
145 spin_unlock_irqrestore(&qib_devs_lock, flags);
146
147 if (npresentp)
148 *npresentp = npresent;
149 if (nupp)
150 *nupp = nup;
151
152 return nunits;
153}
154
155/**
156 * qib_wait_linkstate - wait for an IB link state change to occur
157 * @dd: the qlogic_ib device
158 * @state: the state to wait for
159 * @msecs: the number of milliseconds to wait
160 *
161 * wait up to msecs milliseconds for IB link state change to occur for
162 * now, take the easy polling route. Currently used only by
163 * qib_set_linkstate. Returns 0 if state reached, otherwise
164 * -ETIMEDOUT state can have multiple states set, for any of several
165 * transitions.
166 */
167int qib_wait_linkstate(struct qib_pportdata *ppd, u32 state, int msecs)
168{
169 int ret;
170 unsigned long flags;
171
172 spin_lock_irqsave(&ppd->lflags_lock, flags);
173 if (ppd->state_wanted) {
174 spin_unlock_irqrestore(&ppd->lflags_lock, flags);
175 ret = -EBUSY;
176 goto bail;
177 }
178 ppd->state_wanted = state;
179 spin_unlock_irqrestore(&ppd->lflags_lock, flags);
180 wait_event_interruptible_timeout(ppd->state_wait,
181 (ppd->lflags & state),
182 msecs_to_jiffies(msecs));
183 spin_lock_irqsave(&ppd->lflags_lock, flags);
184 ppd->state_wanted = 0;
185 spin_unlock_irqrestore(&ppd->lflags_lock, flags);
186
187 if (!(ppd->lflags & state))
188 ret = -ETIMEDOUT;
189 else
190 ret = 0;
191bail:
192 return ret;
193}
194
195int qib_set_linkstate(struct qib_pportdata *ppd, u8 newstate)
196{
197 u32 lstate;
198 int ret;
199 struct qib_devdata *dd = ppd->dd;
200 unsigned long flags;
201
202 switch (newstate) {
203 case QIB_IB_LINKDOWN_ONLY:
204 dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
205 IB_LINKCMD_DOWN | IB_LINKINITCMD_NOP);
206 /* don't wait */
207 ret = 0;
208 goto bail;
209
210 case QIB_IB_LINKDOWN:
211 dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
212 IB_LINKCMD_DOWN | IB_LINKINITCMD_POLL);
213 /* don't wait */
214 ret = 0;
215 goto bail;
216
217 case QIB_IB_LINKDOWN_SLEEP:
218 dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
219 IB_LINKCMD_DOWN | IB_LINKINITCMD_SLEEP);
220 /* don't wait */
221 ret = 0;
222 goto bail;
223
224 case QIB_IB_LINKDOWN_DISABLE:
225 dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
226 IB_LINKCMD_DOWN | IB_LINKINITCMD_DISABLE);
227 /* don't wait */
228 ret = 0;
229 goto bail;
230
231 case QIB_IB_LINKARM:
232 if (ppd->lflags & QIBL_LINKARMED) {
233 ret = 0;
234 goto bail;
235 }
236 if (!(ppd->lflags & (QIBL_LINKINIT | QIBL_LINKACTIVE))) {
237 ret = -EINVAL;
238 goto bail;
239 }
240 /*
241 * Since the port can be ACTIVE when we ask for ARMED,
242 * clear QIBL_LINKV so we can wait for a transition.
243 * If the link isn't ARMED, then something else happened
244 * and there is no point waiting for ARMED.
245 */
246 spin_lock_irqsave(&ppd->lflags_lock, flags);
247 ppd->lflags &= ~QIBL_LINKV;
248 spin_unlock_irqrestore(&ppd->lflags_lock, flags);
249 dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
250 IB_LINKCMD_ARMED | IB_LINKINITCMD_NOP);
251 lstate = QIBL_LINKV;
252 break;
253
254 case QIB_IB_LINKACTIVE:
255 if (ppd->lflags & QIBL_LINKACTIVE) {
256 ret = 0;
257 goto bail;
258 }
259 if (!(ppd->lflags & QIBL_LINKARMED)) {
260 ret = -EINVAL;
261 goto bail;
262 }
263 dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
264 IB_LINKCMD_ACTIVE | IB_LINKINITCMD_NOP);
265 lstate = QIBL_LINKACTIVE;
266 break;
267
268 default:
269 ret = -EINVAL;
270 goto bail;
271 }
272 ret = qib_wait_linkstate(ppd, lstate, 10);
273
274bail:
275 return ret;
276}
277
278/*
279 * Get address of eager buffer from it's index (allocated in chunks, not
280 * contiguous).
281 */
282static inline void *qib_get_egrbuf(const struct qib_ctxtdata *rcd, u32 etail)
283{
284 const u32 chunk = etail >> rcd->rcvegrbufs_perchunk_shift;
285 const u32 idx = etail & ((u32)rcd->rcvegrbufs_perchunk - 1);
286
287 return rcd->rcvegrbuf[chunk] + (idx << rcd->dd->rcvegrbufsize_shift);
288}
289
290/*
291 * Returns 1 if error was a CRC, else 0.
292 * Needed for some chip's synthesized error counters.
293 */
294static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
295 u32 ctxt, u32 eflags, u32 l, u32 etail,
296 __le32 *rhf_addr, struct qib_message_header *rhdr)
297{
298 u32 ret = 0;
299
300 if (eflags & (QLOGIC_IB_RHF_H_ICRCERR | QLOGIC_IB_RHF_H_VCRCERR))
301 ret = 1;
302 else if (eflags == QLOGIC_IB_RHF_H_TIDERR) {
303 /* For TIDERR and RC QPs premptively schedule a NAK */
304 struct qib_ib_header *hdr = (struct qib_ib_header *) rhdr;
305 struct qib_other_headers *ohdr = NULL;
306 struct qib_ibport *ibp = &ppd->ibport_data;
307 struct qib_qp *qp = NULL;
308 u32 tlen = qib_hdrget_length_in_bytes(rhf_addr);
309 u16 lid = be16_to_cpu(hdr->lrh[1]);
310 int lnh = be16_to_cpu(hdr->lrh[0]) & 3;
311 u32 qp_num;
312 u32 opcode;
313 u32 psn;
314 int diff;
315
316 /* Sanity check packet */
317 if (tlen < 24)
318 goto drop;
319
320 if (lid < QIB_MULTICAST_LID_BASE) {
321 lid &= ~((1 << ppd->lmc) - 1);
322 if (unlikely(lid != ppd->lid))
323 goto drop;
324 }
325
326 /* Check for GRH */
327 if (lnh == QIB_LRH_BTH)
328 ohdr = &hdr->u.oth;
329 else if (lnh == QIB_LRH_GRH) {
330 u32 vtf;
331
332 ohdr = &hdr->u.l.oth;
333 if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
334 goto drop;
335 vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow);
336 if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
337 goto drop;
338 } else
339 goto drop;
340
341 /* Get opcode and PSN from packet */
342 opcode = be32_to_cpu(ohdr->bth[0]);
343 opcode >>= 24;
344 psn = be32_to_cpu(ohdr->bth[2]);
345
346 /* Get the destination QP number. */
347 qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK;
348 if (qp_num != QIB_MULTICAST_QPN) {
349 int ruc_res;
350 qp = qib_lookup_qpn(ibp, qp_num);
351 if (!qp)
352 goto drop;
353
354 /*
355 * Handle only RC QPs - for other QP types drop error
356 * packet.
357 */
358 spin_lock(&qp->r_lock);
359
360 /* Check for valid receive state. */
361 if (!(ib_qib_state_ops[qp->state] &
362 QIB_PROCESS_RECV_OK)) {
363 ibp->n_pkt_drops++;
364 goto unlock;
365 }
366
367 switch (qp->ibqp.qp_type) {
368 case IB_QPT_RC:
369 ruc_res =
370 qib_ruc_check_hdr(
371 ibp, hdr,
372 lnh == QIB_LRH_GRH,
373 qp,
374 be32_to_cpu(ohdr->bth[0]));
375 if (ruc_res)
376 goto unlock;
377
378 /* Only deal with RDMA Writes for now */
379 if (opcode <
380 IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
381 diff = qib_cmp24(psn, qp->r_psn);
382 if (!qp->r_nak_state && diff >= 0) {
383 ibp->n_rc_seqnak++;
384 qp->r_nak_state =
385 IB_NAK_PSN_ERROR;
386 /* Use the expected PSN. */
387 qp->r_ack_psn = qp->r_psn;
388 /*
389 * Wait to send the sequence
390 * NAK until all packets
391 * in the receive queue have
392 * been processed.
393 * Otherwise, we end up
394 * propagating congestion.
395 */
396 if (list_empty(&qp->rspwait)) {
397 qp->r_flags |=
398 QIB_R_RSP_NAK;
399 atomic_inc(
400 &qp->refcount);
401 list_add_tail(
402 &qp->rspwait,
403 &rcd->qp_wait_list);
404 }
405 } /* Out of sequence NAK */
406 } /* QP Request NAKs */
407 break;
408 case IB_QPT_SMI:
409 case IB_QPT_GSI:
410 case IB_QPT_UD:
411 case IB_QPT_UC:
412 default:
413 /* For now don't handle any other QP types */
414 break;
415 }
416
417unlock:
418 spin_unlock(&qp->r_lock);
419 /*
420 * Notify qib_destroy_qp() if it is waiting
421 * for us to finish.
422 */
423 if (atomic_dec_and_test(&qp->refcount))
424 wake_up(&qp->wait);
425 } /* Unicast QP */
426 } /* Valid packet with TIDErr */
427
428drop:
429 return ret;
430}
431
432/*
433 * qib_kreceive - receive a packet
434 * @rcd: the qlogic_ib context
435 * @llic: gets count of good packets needed to clear lli,
436 * (used with chips that need need to track crcs for lli)
437 *
438 * called from interrupt handler for errors or receive interrupt
439 * Returns number of CRC error packets, needed by some chips for
440 * local link integrity tracking. crcs are adjusted down by following
441 * good packets, if any, and count of good packets is also tracked.
442 */
443u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts)
444{
445 struct qib_devdata *dd = rcd->dd;
446 struct qib_pportdata *ppd = rcd->ppd;
447 __le32 *rhf_addr;
448 void *ebuf;
449 const u32 rsize = dd->rcvhdrentsize; /* words */
450 const u32 maxcnt = dd->rcvhdrcnt * rsize; /* words */
451 u32 etail = -1, l, hdrqtail;
452 struct qib_message_header *hdr;
453 u32 eflags, etype, tlen, i = 0, updegr = 0, crcs = 0;
454 int last;
455 u64 lval;
456 struct qib_qp *qp, *nqp;
457
458 l = rcd->head;
459 rhf_addr = (__le32 *) rcd->rcvhdrq + l + dd->rhf_offset;
460 if (dd->flags & QIB_NODMA_RTAIL) {
461 u32 seq = qib_hdrget_seq(rhf_addr);
462 if (seq != rcd->seq_cnt)
463 goto bail;
464 hdrqtail = 0;
465 } else {
466 hdrqtail = qib_get_rcvhdrtail(rcd);
467 if (l == hdrqtail)
468 goto bail;
469 smp_rmb(); /* prevent speculative reads of dma'ed hdrq */
470 }
471
472 for (last = 0, i = 1; !last; i += !last) {
473 hdr = dd->f_get_msgheader(dd, rhf_addr);
474 eflags = qib_hdrget_err_flags(rhf_addr);
475 etype = qib_hdrget_rcv_type(rhf_addr);
476 /* total length */
477 tlen = qib_hdrget_length_in_bytes(rhf_addr);
478 ebuf = NULL;
479 if ((dd->flags & QIB_NODMA_RTAIL) ?
480 qib_hdrget_use_egr_buf(rhf_addr) :
481 (etype != RCVHQ_RCV_TYPE_EXPECTED)) {
482 etail = qib_hdrget_index(rhf_addr);
483 updegr = 1;
484 if (tlen > sizeof(*hdr) ||
485 etype >= RCVHQ_RCV_TYPE_NON_KD) {
486 ebuf = qib_get_egrbuf(rcd, etail);
487 prefetch_range(ebuf, tlen - sizeof(*hdr));
488 }
489 }
490 if (!eflags) {
491 u16 lrh_len = be16_to_cpu(hdr->lrh[2]) << 2;
492
493 if (lrh_len != tlen) {
494 qib_stats.sps_lenerrs++;
495 goto move_along;
496 }
497 }
498 if (etype == RCVHQ_RCV_TYPE_NON_KD && !eflags &&
499 ebuf == NULL &&
500 tlen > (dd->rcvhdrentsize - 2 + 1 -
501 qib_hdrget_offset(rhf_addr)) << 2) {
502 goto move_along;
503 }
504
505 /*
506 * Both tiderr and qibhdrerr are set for all plain IB
507 * packets; only qibhdrerr should be set.
508 */
509 if (unlikely(eflags))
510 crcs += qib_rcv_hdrerr(rcd, ppd, rcd->ctxt, eflags, l,
511 etail, rhf_addr, hdr);
512 else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
513 qib_ib_rcv(rcd, hdr, ebuf, tlen);
514 if (crcs)
515 crcs--;
516 else if (llic && *llic)
517 --*llic;
518 }
519move_along:
520 l += rsize;
521 if (l >= maxcnt)
522 l = 0;
523 if (i == QIB_MAX_PKT_RECV)
524 last = 1;
525
526 rhf_addr = (__le32 *) rcd->rcvhdrq + l + dd->rhf_offset;
527 if (dd->flags & QIB_NODMA_RTAIL) {
528 u32 seq = qib_hdrget_seq(rhf_addr);
529
530 if (++rcd->seq_cnt > 13)
531 rcd->seq_cnt = 1;
532 if (seq != rcd->seq_cnt)
533 last = 1;
534 } else if (l == hdrqtail)
535 last = 1;
536 /*
537 * Update head regs etc., every 16 packets, if not last pkt,
538 * to help prevent rcvhdrq overflows, when many packets
539 * are processed and queue is nearly full.
540 * Don't request an interrupt for intermediate updates.
541 */
542 lval = l;
543 if (!last && !(i & 0xf)) {
544 dd->f_update_usrhead(rcd, lval, updegr, etail, i);
545 updegr = 0;
546 }
547 }
548 /*
549 * Notify qib_destroy_qp() if it is waiting
550 * for lookaside_qp to finish.
551 */
552 if (rcd->lookaside_qp) {
553 if (atomic_dec_and_test(&rcd->lookaside_qp->refcount))
554 wake_up(&rcd->lookaside_qp->wait);
555 rcd->lookaside_qp = NULL;
556 }
557
558 rcd->head = l;
559 rcd->pkt_count += i;
560
561 /*
562 * Iterate over all QPs waiting to respond.
563 * The list won't change since the IRQ is only run on one CPU.
564 */
565 list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) {
566 list_del_init(&qp->rspwait);
567 if (qp->r_flags & QIB_R_RSP_NAK) {
568 qp->r_flags &= ~QIB_R_RSP_NAK;
569 qib_send_rc_ack(qp);
570 }
571 if (qp->r_flags & QIB_R_RSP_SEND) {
572 unsigned long flags;
573
574 qp->r_flags &= ~QIB_R_RSP_SEND;
575 spin_lock_irqsave(&qp->s_lock, flags);
576 if (ib_qib_state_ops[qp->state] &
577 QIB_PROCESS_OR_FLUSH_SEND)
578 qib_schedule_send(qp);
579 spin_unlock_irqrestore(&qp->s_lock, flags);
580 }
581 if (atomic_dec_and_test(&qp->refcount))
582 wake_up(&qp->wait);
583 }
584
585bail:
586 /* Report number of packets consumed */
587 if (npkts)
588 *npkts = i;
589
590 /*
591 * Always write head at end, and setup rcv interrupt, even
592 * if no packets were processed.
593 */
594 lval = (u64)rcd->head | dd->rhdrhead_intr_off;
595 dd->f_update_usrhead(rcd, lval, updegr, etail, i);
596 return crcs;
597}
598
599/**
600 * qib_set_mtu - set the MTU
601 * @ppd: the perport data
602 * @arg: the new MTU
603 *
604 * We can handle "any" incoming size, the issue here is whether we
605 * need to restrict our outgoing size. For now, we don't do any
606 * sanity checking on this, and we don't deal with what happens to
607 * programs that are already running when the size changes.
608 * NOTE: changing the MTU will usually cause the IBC to go back to
609 * link INIT state...
610 */
611int qib_set_mtu(struct qib_pportdata *ppd, u16 arg)
612{
613 u32 piosize;
614 int ret, chk;
615
616 if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
617 arg != 4096) {
618 ret = -EINVAL;
619 goto bail;
620 }
621 chk = ib_mtu_enum_to_int(qib_ibmtu);
622 if (chk > 0 && arg > chk) {
623 ret = -EINVAL;
624 goto bail;
625 }
626
627 piosize = ppd->ibmaxlen;
628 ppd->ibmtu = arg;
629
630 if (arg >= (piosize - QIB_PIO_MAXIBHDR)) {
631 /* Only if it's not the initial value (or reset to it) */
632 if (piosize != ppd->init_ibmaxlen) {
633 if (arg > piosize && arg <= ppd->init_ibmaxlen)
634 piosize = ppd->init_ibmaxlen - 2 * sizeof(u32);
635 ppd->ibmaxlen = piosize;
636 }
637 } else if ((arg + QIB_PIO_MAXIBHDR) != ppd->ibmaxlen) {
638 piosize = arg + QIB_PIO_MAXIBHDR - 2 * sizeof(u32);
639 ppd->ibmaxlen = piosize;
640 }
641
642 ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_MTU, 0);
643
644 ret = 0;
645
646bail:
647 return ret;
648}
649
650int qib_set_lid(struct qib_pportdata *ppd, u32 lid, u8 lmc)
651{
652 struct qib_devdata *dd = ppd->dd;
653 ppd->lid = lid;
654 ppd->lmc = lmc;
655
656 dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LIDLMC,
657 lid | (~((1U << lmc) - 1)) << 16);
658
659 qib_devinfo(dd->pcidev, "IB%u:%u got a lid: 0x%x\n",
660 dd->unit, ppd->port, lid);
661
662 return 0;
663}
664
665/*
666 * Following deal with the "obviously simple" task of overriding the state
667 * of the LEDS, which normally indicate link physical and logical status.
668 * The complications arise in dealing with different hardware mappings
669 * and the board-dependent routine being called from interrupts.
670 * and then there's the requirement to _flash_ them.
671 */
672#define LED_OVER_FREQ_SHIFT 8
673#define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT)
674/* Below is "non-zero" to force override, but both actual LEDs are off */
675#define LED_OVER_BOTH_OFF (8)
676
677static void qib_run_led_override(unsigned long opaque)
678{
679 struct qib_pportdata *ppd = (struct qib_pportdata *)opaque;
680 struct qib_devdata *dd = ppd->dd;
681 int timeoff;
682 int ph_idx;
683
684 if (!(dd->flags & QIB_INITTED))
685 return;
686
687 ph_idx = ppd->led_override_phase++ & 1;
688 ppd->led_override = ppd->led_override_vals[ph_idx];
689 timeoff = ppd->led_override_timeoff;
690
691 dd->f_setextled(ppd, 1);
692 /*
693 * don't re-fire the timer if user asked for it to be off; we let
694 * it fire one more time after they turn it off to simplify
695 */
696 if (ppd->led_override_vals[0] || ppd->led_override_vals[1])
697 mod_timer(&ppd->led_override_timer, jiffies + timeoff);
698}
699
700void qib_set_led_override(struct qib_pportdata *ppd, unsigned int val)
701{
702 struct qib_devdata *dd = ppd->dd;
703 int timeoff, freq;
704
705 if (!(dd->flags & QIB_INITTED))
706 return;
707
708 /* First check if we are blinking. If not, use 1HZ polling */
709 timeoff = HZ;
710 freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT;
711
712 if (freq) {
713 /* For blink, set each phase from one nybble of val */
714 ppd->led_override_vals[0] = val & 0xF;
715 ppd->led_override_vals[1] = (val >> 4) & 0xF;
716 timeoff = (HZ << 4)/freq;
717 } else {
718 /* Non-blink set both phases the same. */
719 ppd->led_override_vals[0] = val & 0xF;
720 ppd->led_override_vals[1] = val & 0xF;
721 }
722 ppd->led_override_timeoff = timeoff;
723
724 /*
725 * If the timer has not already been started, do so. Use a "quick"
726 * timeout so the function will be called soon, to look at our request.
727 */
728 if (atomic_inc_return(&ppd->led_override_timer_active) == 1) {
729 /* Need to start timer */
730 init_timer(&ppd->led_override_timer);
731 ppd->led_override_timer.function = qib_run_led_override;
732 ppd->led_override_timer.data = (unsigned long) ppd;
733 ppd->led_override_timer.expires = jiffies + 1;
734 add_timer(&ppd->led_override_timer);
735 } else {
736 if (ppd->led_override_vals[0] || ppd->led_override_vals[1])
737 mod_timer(&ppd->led_override_timer, jiffies + 1);
738 atomic_dec(&ppd->led_override_timer_active);
739 }
740}
741
742/**
743 * qib_reset_device - reset the chip if possible
744 * @unit: the device to reset
745 *
746 * Whether or not reset is successful, we attempt to re-initialize the chip
747 * (that is, much like a driver unload/reload). We clear the INITTED flag
748 * so that the various entry points will fail until we reinitialize. For
749 * now, we only allow this if no user contexts are open that use chip resources
750 */
751int qib_reset_device(int unit)
752{
753 int ret, i;
754 struct qib_devdata *dd = qib_lookup(unit);
755 struct qib_pportdata *ppd;
756 unsigned long flags;
757 int pidx;
758
759 if (!dd) {
760 ret = -ENODEV;
761 goto bail;
762 }
763
764 qib_devinfo(dd->pcidev, "Reset on unit %u requested\n", unit);
765
766 if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) {
767 qib_devinfo(dd->pcidev, "Invalid unit number %u or "
768 "not initialized or not present\n", unit);
769 ret = -ENXIO;
770 goto bail;
771 }
772
773 spin_lock_irqsave(&dd->uctxt_lock, flags);
774 if (dd->rcd)
775 for (i = dd->first_user_ctxt; i < dd->cfgctxts; i++) {
776 if (!dd->rcd[i] || !dd->rcd[i]->cnt)
777 continue;
778 spin_unlock_irqrestore(&dd->uctxt_lock, flags);
779 ret = -EBUSY;
780 goto bail;
781 }
782 spin_unlock_irqrestore(&dd->uctxt_lock, flags);
783
784 for (pidx = 0; pidx < dd->num_pports; ++pidx) {
785 ppd = dd->pport + pidx;
786 if (atomic_read(&ppd->led_override_timer_active)) {
787 /* Need to stop LED timer, _then_ shut off LEDs */
788 del_timer_sync(&ppd->led_override_timer);
789 atomic_set(&ppd->led_override_timer_active, 0);
790 }
791
792 /* Shut off LEDs after we are sure timer is not running */
793 ppd->led_override = LED_OVER_BOTH_OFF;
794 dd->f_setextled(ppd, 0);
795 if (dd->flags & QIB_HAS_SEND_DMA)
796 qib_teardown_sdma(ppd);
797 }
798
799 ret = dd->f_reset(dd);
800 if (ret == 1)
801 ret = qib_init(dd, 1);
802 else
803 ret = -EAGAIN;
804 if (ret)
805 qib_dev_err(dd, "Reinitialize unit %u after "
806 "reset failed with %d\n", unit, ret);
807 else
808 qib_devinfo(dd->pcidev, "Reinitialized unit %u after "
809 "resetting\n", unit);
810
811bail:
812 return ret;
813}