Linux Audio

Check our new training course

Linux debugging, profiling, tracing and performance analysis training

Mar 24-27, 2025, special US time zones
Register
Loading...
v6.8
  1/*
  2 * Copyright (c) 2021 Cornelis Networks. All rights reserved.
  3 * Copyright (c) 2013 Intel Corporation. All rights reserved.
  4 * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
  5 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  6 *
  7 * This software is available to you under a choice of one of two
  8 * licenses.  You may choose to be licensed under the terms of the GNU
  9 * General Public License (GPL) Version 2, available from the file
 10 * COPYING in the main directory of this source tree, or the
 11 * OpenIB.org BSD license below:
 12 *
 13 *     Redistribution and use in source and binary forms, with or
 14 *     without modification, are permitted provided that the following
 15 *     conditions are met:
 16 *
 17 *      - Redistributions of source code must retain the above
 18 *        copyright notice, this list of conditions and the following
 19 *        disclaimer.
 20 *
 21 *      - Redistributions in binary form must reproduce the above
 22 *        copyright notice, this list of conditions and the following
 23 *        disclaimer in the documentation and/or other materials
 24 *        provided with the distribution.
 25 *
 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 33 * SOFTWARE.
 34 */
 35
 36#include <linux/spinlock.h>
 37#include <linux/pci.h>
 38#include <linux/io.h>
 39#include <linux/delay.h>
 40#include <linux/netdevice.h>
 41#include <linux/vmalloc.h>
 42#include <linux/module.h>
 43#include <linux/prefetch.h>
 44
 45#include "qib.h"
 46
 47/*
 48 * The size has to be longer than this string, so we can append
 49 * board/chip information to it in the init code.
 50 */
 51const char ib_qib_version[] = QIB_DRIVER_VERSION "\n";
 52
 
 
 53DEFINE_MUTEX(qib_mutex);	/* general driver use */
 54
 55unsigned qib_ibmtu;
 56module_param_named(ibmtu, qib_ibmtu, uint, S_IRUGO);
 57MODULE_PARM_DESC(ibmtu, "Set max IB MTU (0=2KB, 1=256, 2=512, ... 5=4096");
 58
 59unsigned qib_compat_ddr_negotiate = 1;
 60module_param_named(compat_ddr_negotiate, qib_compat_ddr_negotiate, uint,
 61		   S_IWUSR | S_IRUGO);
 62MODULE_PARM_DESC(compat_ddr_negotiate,
 63		 "Attempt pre-IBTA 1.2 DDR speed negotiation");
 64
 65MODULE_LICENSE("Dual BSD/GPL");
 66MODULE_AUTHOR("Cornelis <support@cornelisnetworks.com>");
 67MODULE_DESCRIPTION("Cornelis IB driver");
 68
 69/*
 70 * QIB_PIO_MAXIBHDR is the max IB header size allowed for in our
 71 * PIO send buffers.  This is well beyond anything currently
 72 * defined in the InfiniBand spec.
 73 */
 74#define QIB_PIO_MAXIBHDR 128
 75
 76/*
 77 * QIB_MAX_PKT_RCV is the max # if packets processed per receive interrupt.
 78 */
 79#define QIB_MAX_PKT_RECV 64
 80
 81struct qlogic_ib_stats qib_stats;
 82
 83struct pci_dev *qib_get_pci_dev(struct rvt_dev_info *rdi)
 84{
 85	struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
 86	struct qib_devdata *dd = container_of(ibdev,
 87					      struct qib_devdata, verbs_dev);
 88	return dd->pcidev;
 89}
 90
 91/*
 92 * Return count of units with at least one port ACTIVE.
 93 */
 94int qib_count_active_units(void)
 95{
 96	struct qib_devdata *dd;
 97	struct qib_pportdata *ppd;
 98	unsigned long index, flags;
 99	int pidx, nunits_active = 0;
100
101	xa_lock_irqsave(&qib_dev_table, flags);
102	xa_for_each(&qib_dev_table, index, dd) {
103		if (!(dd->flags & QIB_PRESENT) || !dd->kregbase)
104			continue;
105		for (pidx = 0; pidx < dd->num_pports; ++pidx) {
106			ppd = dd->pport + pidx;
107			if (ppd->lid && (ppd->lflags & (QIBL_LINKINIT |
108					 QIBL_LINKARMED | QIBL_LINKACTIVE))) {
109				nunits_active++;
110				break;
111			}
112		}
113	}
114	xa_unlock_irqrestore(&qib_dev_table, flags);
115	return nunits_active;
116}
117
118/*
119 * Return count of all units, optionally return in arguments
120 * the number of usable (present) units, and the number of
121 * ports that are up.
122 */
123int qib_count_units(int *npresentp, int *nupp)
124{
125	int nunits = 0, npresent = 0, nup = 0;
126	struct qib_devdata *dd;
127	unsigned long index, flags;
128	int pidx;
129	struct qib_pportdata *ppd;
130
131	xa_lock_irqsave(&qib_dev_table, flags);
132	xa_for_each(&qib_dev_table, index, dd) {
 
133		nunits++;
134		if ((dd->flags & QIB_PRESENT) && dd->kregbase)
135			npresent++;
136		for (pidx = 0; pidx < dd->num_pports; ++pidx) {
137			ppd = dd->pport + pidx;
138			if (ppd->lid && (ppd->lflags & (QIBL_LINKINIT |
139					 QIBL_LINKARMED | QIBL_LINKACTIVE)))
140				nup++;
141		}
142	}
143	xa_unlock_irqrestore(&qib_dev_table, flags);
 
144
145	if (npresentp)
146		*npresentp = npresent;
147	if (nupp)
148		*nupp = nup;
149
150	return nunits;
151}
152
153/**
154 * qib_wait_linkstate - wait for an IB link state change to occur
155 * @ppd: the qlogic_ib device
156 * @state: the state to wait for
157 * @msecs: the number of milliseconds to wait
158 *
159 * wait up to msecs milliseconds for IB link state change to occur for
160 * now, take the easy polling route.  Currently used only by
161 * qib_set_linkstate.  Returns 0 if state reached, otherwise
162 * -ETIMEDOUT state can have multiple states set, for any of several
163 * transitions.
164 */
165int qib_wait_linkstate(struct qib_pportdata *ppd, u32 state, int msecs)
166{
167	int ret;
168	unsigned long flags;
169
170	spin_lock_irqsave(&ppd->lflags_lock, flags);
171	if (ppd->state_wanted) {
172		spin_unlock_irqrestore(&ppd->lflags_lock, flags);
173		ret = -EBUSY;
174		goto bail;
175	}
176	ppd->state_wanted = state;
177	spin_unlock_irqrestore(&ppd->lflags_lock, flags);
178	wait_event_interruptible_timeout(ppd->state_wait,
179					 (ppd->lflags & state),
180					 msecs_to_jiffies(msecs));
181	spin_lock_irqsave(&ppd->lflags_lock, flags);
182	ppd->state_wanted = 0;
183	spin_unlock_irqrestore(&ppd->lflags_lock, flags);
184
185	if (!(ppd->lflags & state))
186		ret = -ETIMEDOUT;
187	else
188		ret = 0;
189bail:
190	return ret;
191}
192
193int qib_set_linkstate(struct qib_pportdata *ppd, u8 newstate)
194{
195	u32 lstate;
196	int ret;
197	struct qib_devdata *dd = ppd->dd;
198	unsigned long flags;
199
200	switch (newstate) {
201	case QIB_IB_LINKDOWN_ONLY:
202		dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
203				 IB_LINKCMD_DOWN | IB_LINKINITCMD_NOP);
204		/* don't wait */
205		ret = 0;
206		goto bail;
207
208	case QIB_IB_LINKDOWN:
209		dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
210				 IB_LINKCMD_DOWN | IB_LINKINITCMD_POLL);
211		/* don't wait */
212		ret = 0;
213		goto bail;
214
215	case QIB_IB_LINKDOWN_SLEEP:
216		dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
217				 IB_LINKCMD_DOWN | IB_LINKINITCMD_SLEEP);
218		/* don't wait */
219		ret = 0;
220		goto bail;
221
222	case QIB_IB_LINKDOWN_DISABLE:
223		dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
224				 IB_LINKCMD_DOWN | IB_LINKINITCMD_DISABLE);
225		/* don't wait */
226		ret = 0;
227		goto bail;
228
229	case QIB_IB_LINKARM:
230		if (ppd->lflags & QIBL_LINKARMED) {
231			ret = 0;
232			goto bail;
233		}
234		if (!(ppd->lflags & (QIBL_LINKINIT | QIBL_LINKACTIVE))) {
235			ret = -EINVAL;
236			goto bail;
237		}
238		/*
239		 * Since the port can be ACTIVE when we ask for ARMED,
240		 * clear QIBL_LINKV so we can wait for a transition.
241		 * If the link isn't ARMED, then something else happened
242		 * and there is no point waiting for ARMED.
243		 */
244		spin_lock_irqsave(&ppd->lflags_lock, flags);
245		ppd->lflags &= ~QIBL_LINKV;
246		spin_unlock_irqrestore(&ppd->lflags_lock, flags);
247		dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
248				 IB_LINKCMD_ARMED | IB_LINKINITCMD_NOP);
249		lstate = QIBL_LINKV;
250		break;
251
252	case QIB_IB_LINKACTIVE:
253		if (ppd->lflags & QIBL_LINKACTIVE) {
254			ret = 0;
255			goto bail;
256		}
257		if (!(ppd->lflags & QIBL_LINKARMED)) {
258			ret = -EINVAL;
259			goto bail;
260		}
261		dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
262				 IB_LINKCMD_ACTIVE | IB_LINKINITCMD_NOP);
263		lstate = QIBL_LINKACTIVE;
264		break;
265
266	default:
267		ret = -EINVAL;
268		goto bail;
269	}
270	ret = qib_wait_linkstate(ppd, lstate, 10);
271
272bail:
273	return ret;
274}
275
276/*
277 * Get address of eager buffer from it's index (allocated in chunks, not
278 * contiguous).
279 */
280static inline void *qib_get_egrbuf(const struct qib_ctxtdata *rcd, u32 etail)
281{
282	const u32 chunk = etail >> rcd->rcvegrbufs_perchunk_shift;
283	const u32 idx =  etail & ((u32)rcd->rcvegrbufs_perchunk - 1);
284
285	return rcd->rcvegrbuf[chunk] + (idx << rcd->dd->rcvegrbufsize_shift);
286}
287
288/*
289 * Returns 1 if error was a CRC, else 0.
290 * Needed for some chip's synthesized error counters.
291 */
292static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
293			  u32 ctxt, u32 eflags, u32 l, u32 etail,
294			  __le32 *rhf_addr, struct qib_message_header *rhdr)
295{
296	u32 ret = 0;
297
298	if (eflags & (QLOGIC_IB_RHF_H_ICRCERR | QLOGIC_IB_RHF_H_VCRCERR))
299		ret = 1;
300	else if (eflags == QLOGIC_IB_RHF_H_TIDERR) {
301		/* For TIDERR and RC QPs premptively schedule a NAK */
302		struct ib_header *hdr = (struct ib_header *)rhdr;
303		struct ib_other_headers *ohdr = NULL;
304		struct qib_ibport *ibp = &ppd->ibport_data;
305		struct qib_devdata *dd = ppd->dd;
306		struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
307		struct rvt_qp *qp = NULL;
308		u32 tlen = qib_hdrget_length_in_bytes(rhf_addr);
309		u16 lid  = be16_to_cpu(hdr->lrh[1]);
310		int lnh = be16_to_cpu(hdr->lrh[0]) & 3;
311		u32 qp_num;
312		u32 opcode;
313		u32 psn;
314		int diff;
315
316		/* Sanity check packet */
317		if (tlen < 24)
318			goto drop;
319
320		if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) {
321			lid &= ~((1 << ppd->lmc) - 1);
322			if (unlikely(lid != ppd->lid))
323				goto drop;
324		}
325
326		/* Check for GRH */
327		if (lnh == QIB_LRH_BTH)
328			ohdr = &hdr->u.oth;
329		else if (lnh == QIB_LRH_GRH) {
330			u32 vtf;
331
332			ohdr = &hdr->u.l.oth;
333			if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
334				goto drop;
335			vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow);
336			if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
337				goto drop;
338		} else
339			goto drop;
340
341		/* Get opcode and PSN from packet */
342		opcode = be32_to_cpu(ohdr->bth[0]);
343		opcode >>= 24;
344		psn = be32_to_cpu(ohdr->bth[2]);
345
346		/* Get the destination QP number. */
347		qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
348		if (qp_num != QIB_MULTICAST_QPN) {
349			int ruc_res;
350
351			rcu_read_lock();
352			qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
353			if (!qp) {
354				rcu_read_unlock();
355				goto drop;
356			}
357
358			/*
359			 * Handle only RC QPs - for other QP types drop error
360			 * packet.
361			 */
362			spin_lock(&qp->r_lock);
363
364			/* Check for valid receive state. */
365			if (!(ib_rvt_state_ops[qp->state] &
366			      RVT_PROCESS_RECV_OK)) {
367				ibp->rvp.n_pkt_drops++;
368				goto unlock;
369			}
370
371			switch (qp->ibqp.qp_type) {
372			case IB_QPT_RC:
373				ruc_res =
374					qib_ruc_check_hdr(
375						ibp, hdr,
376						lnh == QIB_LRH_GRH,
377						qp,
378						be32_to_cpu(ohdr->bth[0]));
379				if (ruc_res)
380					goto unlock;
381
382				/* Only deal with RDMA Writes for now */
383				if (opcode <
384				    IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
385					diff = qib_cmp24(psn, qp->r_psn);
386					if (!qp->r_nak_state && diff >= 0) {
387						ibp->rvp.n_rc_seqnak++;
388						qp->r_nak_state =
389							IB_NAK_PSN_ERROR;
390						/* Use the expected PSN. */
391						qp->r_ack_psn = qp->r_psn;
392						/*
393						 * Wait to send the sequence
394						 * NAK until all packets
395						 * in the receive queue have
396						 * been processed.
397						 * Otherwise, we end up
398						 * propagating congestion.
399						 */
400						if (list_empty(&qp->rspwait)) {
401							qp->r_flags |=
402								RVT_R_RSP_NAK;
403							rvt_get_qp(qp);
 
404							list_add_tail(
405							 &qp->rspwait,
406							 &rcd->qp_wait_list);
407						}
408					} /* Out of sequence NAK */
409				} /* QP Request NAKs */
410				break;
411			case IB_QPT_SMI:
412			case IB_QPT_GSI:
413			case IB_QPT_UD:
414			case IB_QPT_UC:
415			default:
416				/* For now don't handle any other QP types */
417				break;
418			}
419
420unlock:
421			spin_unlock(&qp->r_lock);
422			rcu_read_unlock();
 
 
 
 
 
423		} /* Unicast QP */
424	} /* Valid packet with TIDErr */
425
426drop:
427	return ret;
428}
429
430/*
431 * qib_kreceive - receive a packet
432 * @rcd: the qlogic_ib context
433 * @llic: gets count of good packets needed to clear lli,
434 *          (used with chips that need need to track crcs for lli)
435 *
436 * called from interrupt handler for errors or receive interrupt
437 * Returns number of CRC error packets, needed by some chips for
438 * local link integrity tracking.   crcs are adjusted down by following
439 * good packets, if any, and count of good packets is also tracked.
440 */
441u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts)
442{
443	struct qib_devdata *dd = rcd->dd;
444	struct qib_pportdata *ppd = rcd->ppd;
445	__le32 *rhf_addr;
446	void *ebuf;
447	const u32 rsize = dd->rcvhdrentsize;        /* words */
448	const u32 maxcnt = dd->rcvhdrcnt * rsize;   /* words */
449	u32 etail = -1, l, hdrqtail;
450	struct qib_message_header *hdr;
451	u32 eflags, etype, tlen, i = 0, updegr = 0, crcs = 0;
452	int last;
453	u64 lval;
454	struct rvt_qp *qp, *nqp;
455
456	l = rcd->head;
457	rhf_addr = (__le32 *) rcd->rcvhdrq + l + dd->rhf_offset;
458	if (dd->flags & QIB_NODMA_RTAIL) {
459		u32 seq = qib_hdrget_seq(rhf_addr);
460
461		if (seq != rcd->seq_cnt)
462			goto bail;
463		hdrqtail = 0;
464	} else {
465		hdrqtail = qib_get_rcvhdrtail(rcd);
466		if (l == hdrqtail)
467			goto bail;
468		smp_rmb();  /* prevent speculative reads of dma'ed hdrq */
469	}
470
471	for (last = 0, i = 1; !last; i += !last) {
472		hdr = dd->f_get_msgheader(dd, rhf_addr);
473		eflags = qib_hdrget_err_flags(rhf_addr);
474		etype = qib_hdrget_rcv_type(rhf_addr);
475		/* total length */
476		tlen = qib_hdrget_length_in_bytes(rhf_addr);
477		ebuf = NULL;
478		if ((dd->flags & QIB_NODMA_RTAIL) ?
479		    qib_hdrget_use_egr_buf(rhf_addr) :
480		    (etype != RCVHQ_RCV_TYPE_EXPECTED)) {
481			etail = qib_hdrget_index(rhf_addr);
482			updegr = 1;
483			if (tlen > sizeof(*hdr) ||
484			    etype >= RCVHQ_RCV_TYPE_NON_KD) {
485				ebuf = qib_get_egrbuf(rcd, etail);
486				prefetch_range(ebuf, tlen - sizeof(*hdr));
487			}
488		}
489		if (!eflags) {
490			u16 lrh_len = be16_to_cpu(hdr->lrh[2]) << 2;
491
492			if (lrh_len != tlen) {
493				qib_stats.sps_lenerrs++;
494				goto move_along;
495			}
496		}
497		if (etype == RCVHQ_RCV_TYPE_NON_KD && !eflags &&
498		    ebuf == NULL &&
499		    tlen > (dd->rcvhdrentsize - 2 + 1 -
500				qib_hdrget_offset(rhf_addr)) << 2) {
501			goto move_along;
502		}
503
504		/*
505		 * Both tiderr and qibhdrerr are set for all plain IB
506		 * packets; only qibhdrerr should be set.
507		 */
508		if (unlikely(eflags))
509			crcs += qib_rcv_hdrerr(rcd, ppd, rcd->ctxt, eflags, l,
510					       etail, rhf_addr, hdr);
511		else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
512			qib_ib_rcv(rcd, hdr, ebuf, tlen);
513			if (crcs)
514				crcs--;
515			else if (llic && *llic)
516				--*llic;
517		}
518move_along:
519		l += rsize;
520		if (l >= maxcnt)
521			l = 0;
522		if (i == QIB_MAX_PKT_RECV)
523			last = 1;
524
525		rhf_addr = (__le32 *) rcd->rcvhdrq + l + dd->rhf_offset;
526		if (dd->flags & QIB_NODMA_RTAIL) {
527			u32 seq = qib_hdrget_seq(rhf_addr);
528
529			if (++rcd->seq_cnt > 13)
530				rcd->seq_cnt = 1;
531			if (seq != rcd->seq_cnt)
532				last = 1;
533		} else if (l == hdrqtail)
534			last = 1;
535		/*
536		 * Update head regs etc., every 16 packets, if not last pkt,
537		 * to help prevent rcvhdrq overflows, when many packets
538		 * are processed and queue is nearly full.
539		 * Don't request an interrupt for intermediate updates.
540		 */
541		lval = l;
542		if (!last && !(i & 0xf)) {
543			dd->f_update_usrhead(rcd, lval, updegr, etail, i);
544			updegr = 0;
545		}
546	}
 
 
 
 
 
 
 
 
 
547
548	rcd->head = l;
 
549
550	/*
551	 * Iterate over all QPs waiting to respond.
552	 * The list won't change since the IRQ is only run on one CPU.
553	 */
554	list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) {
555		list_del_init(&qp->rspwait);
556		if (qp->r_flags & RVT_R_RSP_NAK) {
557			qp->r_flags &= ~RVT_R_RSP_NAK;
558			qib_send_rc_ack(qp);
559		}
560		if (qp->r_flags & RVT_R_RSP_SEND) {
561			unsigned long flags;
562
563			qp->r_flags &= ~RVT_R_RSP_SEND;
564			spin_lock_irqsave(&qp->s_lock, flags);
565			if (ib_rvt_state_ops[qp->state] &
566					RVT_PROCESS_OR_FLUSH_SEND)
567				qib_schedule_send(qp);
568			spin_unlock_irqrestore(&qp->s_lock, flags);
569		}
570		rvt_put_qp(qp);
 
571	}
572
573bail:
574	/* Report number of packets consumed */
575	if (npkts)
576		*npkts = i;
577
578	/*
579	 * Always write head at end, and setup rcv interrupt, even
580	 * if no packets were processed.
581	 */
582	lval = (u64)rcd->head | dd->rhdrhead_intr_off;
583	dd->f_update_usrhead(rcd, lval, updegr, etail, i);
584	return crcs;
585}
586
587/**
588 * qib_set_mtu - set the MTU
589 * @ppd: the perport data
590 * @arg: the new MTU
591 *
592 * We can handle "any" incoming size, the issue here is whether we
593 * need to restrict our outgoing size.   For now, we don't do any
594 * sanity checking on this, and we don't deal with what happens to
595 * programs that are already running when the size changes.
596 * NOTE: changing the MTU will usually cause the IBC to go back to
597 * link INIT state...
598 */
599int qib_set_mtu(struct qib_pportdata *ppd, u16 arg)
600{
601	u32 piosize;
602	int ret, chk;
603
604	if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
605	    arg != 4096) {
606		ret = -EINVAL;
607		goto bail;
608	}
609	chk = ib_mtu_enum_to_int(qib_ibmtu);
610	if (chk > 0 && arg > chk) {
611		ret = -EINVAL;
612		goto bail;
613	}
614
615	piosize = ppd->ibmaxlen;
616	ppd->ibmtu = arg;
617
618	if (arg >= (piosize - QIB_PIO_MAXIBHDR)) {
619		/* Only if it's not the initial value (or reset to it) */
620		if (piosize != ppd->init_ibmaxlen) {
621			if (arg > piosize && arg <= ppd->init_ibmaxlen)
622				piosize = ppd->init_ibmaxlen - 2 * sizeof(u32);
623			ppd->ibmaxlen = piosize;
624		}
625	} else if ((arg + QIB_PIO_MAXIBHDR) != ppd->ibmaxlen) {
626		piosize = arg + QIB_PIO_MAXIBHDR - 2 * sizeof(u32);
627		ppd->ibmaxlen = piosize;
628	}
629
630	ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_MTU, 0);
631
632	ret = 0;
633
634bail:
635	return ret;
636}
637
638int qib_set_lid(struct qib_pportdata *ppd, u32 lid, u8 lmc)
639{
640	struct qib_devdata *dd = ppd->dd;
641
642	ppd->lid = lid;
643	ppd->lmc = lmc;
644
645	dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LIDLMC,
646			 lid | (~((1U << lmc) - 1)) << 16);
647
648	qib_devinfo(dd->pcidev, "IB%u:%u got a lid: 0x%x\n",
649		    dd->unit, ppd->port, lid);
650
651	return 0;
652}
653
654/*
655 * Following deal with the "obviously simple" task of overriding the state
656 * of the LEDS, which normally indicate link physical and logical status.
657 * The complications arise in dealing with different hardware mappings
658 * and the board-dependent routine being called from interrupts.
659 * and then there's the requirement to _flash_ them.
660 */
661#define LED_OVER_FREQ_SHIFT 8
662#define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT)
663/* Below is "non-zero" to force override, but both actual LEDs are off */
664#define LED_OVER_BOTH_OFF (8)
665
666static void qib_run_led_override(struct timer_list *t)
667{
668	struct qib_pportdata *ppd = from_timer(ppd, t,
669						    led_override_timer);
670	struct qib_devdata *dd = ppd->dd;
671	int timeoff;
672	int ph_idx;
673
674	if (!(dd->flags & QIB_INITTED))
675		return;
676
677	ph_idx = ppd->led_override_phase++ & 1;
678	ppd->led_override = ppd->led_override_vals[ph_idx];
679	timeoff = ppd->led_override_timeoff;
680
681	dd->f_setextled(ppd, 1);
682	/*
683	 * don't re-fire the timer if user asked for it to be off; we let
684	 * it fire one more time after they turn it off to simplify
685	 */
686	if (ppd->led_override_vals[0] || ppd->led_override_vals[1])
687		mod_timer(&ppd->led_override_timer, jiffies + timeoff);
688}
689
690void qib_set_led_override(struct qib_pportdata *ppd, unsigned int val)
691{
692	struct qib_devdata *dd = ppd->dd;
693	int timeoff, freq;
694
695	if (!(dd->flags & QIB_INITTED))
696		return;
697
698	/* First check if we are blinking. If not, use 1HZ polling */
699	timeoff = HZ;
700	freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT;
701
702	if (freq) {
703		/* For blink, set each phase from one nybble of val */
704		ppd->led_override_vals[0] = val & 0xF;
705		ppd->led_override_vals[1] = (val >> 4) & 0xF;
706		timeoff = (HZ << 4)/freq;
707	} else {
708		/* Non-blink set both phases the same. */
709		ppd->led_override_vals[0] = val & 0xF;
710		ppd->led_override_vals[1] = val & 0xF;
711	}
712	ppd->led_override_timeoff = timeoff;
713
714	/*
715	 * If the timer has not already been started, do so. Use a "quick"
716	 * timeout so the function will be called soon, to look at our request.
717	 */
718	if (atomic_inc_return(&ppd->led_override_timer_active) == 1) {
719		/* Need to start timer */
720		timer_setup(&ppd->led_override_timer, qib_run_led_override, 0);
 
 
721		ppd->led_override_timer.expires = jiffies + 1;
722		add_timer(&ppd->led_override_timer);
723	} else {
724		if (ppd->led_override_vals[0] || ppd->led_override_vals[1])
725			mod_timer(&ppd->led_override_timer, jiffies + 1);
726		atomic_dec(&ppd->led_override_timer_active);
727	}
728}
729
730/**
731 * qib_reset_device - reset the chip if possible
732 * @unit: the device to reset
733 *
734 * Whether or not reset is successful, we attempt to re-initialize the chip
735 * (that is, much like a driver unload/reload).  We clear the INITTED flag
736 * so that the various entry points will fail until we reinitialize.  For
737 * now, we only allow this if no user contexts are open that use chip resources
738 */
739int qib_reset_device(int unit)
740{
741	int ret, i;
742	struct qib_devdata *dd = qib_lookup(unit);
743	struct qib_pportdata *ppd;
744	unsigned long flags;
745	int pidx;
746
747	if (!dd) {
748		ret = -ENODEV;
749		goto bail;
750	}
751
752	qib_devinfo(dd->pcidev, "Reset on unit %u requested\n", unit);
753
754	if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) {
755		qib_devinfo(dd->pcidev,
756			"Invalid unit number %u or not initialized or not present\n",
757			unit);
758		ret = -ENXIO;
759		goto bail;
760	}
761
762	spin_lock_irqsave(&dd->uctxt_lock, flags);
763	if (dd->rcd)
764		for (i = dd->first_user_ctxt; i < dd->cfgctxts; i++) {
765			if (!dd->rcd[i] || !dd->rcd[i]->cnt)
766				continue;
767			spin_unlock_irqrestore(&dd->uctxt_lock, flags);
768			ret = -EBUSY;
769			goto bail;
770		}
771	spin_unlock_irqrestore(&dd->uctxt_lock, flags);
772
773	for (pidx = 0; pidx < dd->num_pports; ++pidx) {
774		ppd = dd->pport + pidx;
775		if (atomic_read(&ppd->led_override_timer_active)) {
776			/* Need to stop LED timer, _then_ shut off LEDs */
777			del_timer_sync(&ppd->led_override_timer);
778			atomic_set(&ppd->led_override_timer_active, 0);
779		}
780
781		/* Shut off LEDs after we are sure timer is not running */
782		ppd->led_override = LED_OVER_BOTH_OFF;
783		dd->f_setextled(ppd, 0);
784		if (dd->flags & QIB_HAS_SEND_DMA)
785			qib_teardown_sdma(ppd);
786	}
787
788	ret = dd->f_reset(dd);
789	if (ret == 1)
790		ret = qib_init(dd, 1);
791	else
792		ret = -EAGAIN;
793	if (ret)
794		qib_dev_err(dd,
795			"Reinitialize unit %u after reset failed with %d\n",
796			unit, ret);
797	else
798		qib_devinfo(dd->pcidev,
799			"Reinitialized unit %u after resetting\n",
800			unit);
801
802bail:
803	return ret;
804}
v3.5.6
  1/*
 
 
  2 * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
  3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  4 *
  5 * This software is available to you under a choice of one of two
  6 * licenses.  You may choose to be licensed under the terms of the GNU
  7 * General Public License (GPL) Version 2, available from the file
  8 * COPYING in the main directory of this source tree, or the
  9 * OpenIB.org BSD license below:
 10 *
 11 *     Redistribution and use in source and binary forms, with or
 12 *     without modification, are permitted provided that the following
 13 *     conditions are met:
 14 *
 15 *      - Redistributions of source code must retain the above
 16 *        copyright notice, this list of conditions and the following
 17 *        disclaimer.
 18 *
 19 *      - Redistributions in binary form must reproduce the above
 20 *        copyright notice, this list of conditions and the following
 21 *        disclaimer in the documentation and/or other materials
 22 *        provided with the distribution.
 23 *
 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 31 * SOFTWARE.
 32 */
 33
 34#include <linux/spinlock.h>
 35#include <linux/pci.h>
 36#include <linux/io.h>
 37#include <linux/delay.h>
 38#include <linux/netdevice.h>
 39#include <linux/vmalloc.h>
 40#include <linux/module.h>
 41#include <linux/prefetch.h>
 42
 43#include "qib.h"
 44
 45/*
 46 * The size has to be longer than this string, so we can append
 47 * board/chip information to it in the init code.
 48 */
 49const char ib_qib_version[] = QIB_IDSTR "\n";
 50
 51DEFINE_SPINLOCK(qib_devs_lock);
 52LIST_HEAD(qib_dev_list);
 53DEFINE_MUTEX(qib_mutex);	/* general driver use */
 54
 55unsigned qib_ibmtu;
 56module_param_named(ibmtu, qib_ibmtu, uint, S_IRUGO);
 57MODULE_PARM_DESC(ibmtu, "Set max IB MTU (0=2KB, 1=256, 2=512, ... 5=4096");
 58
 59unsigned qib_compat_ddr_negotiate = 1;
 60module_param_named(compat_ddr_negotiate, qib_compat_ddr_negotiate, uint,
 61		   S_IWUSR | S_IRUGO);
 62MODULE_PARM_DESC(compat_ddr_negotiate,
 63		 "Attempt pre-IBTA 1.2 DDR speed negotiation");
 64
 65MODULE_LICENSE("Dual BSD/GPL");
 66MODULE_AUTHOR("QLogic <support@qlogic.com>");
 67MODULE_DESCRIPTION("QLogic IB driver");
 68
 69/*
 70 * QIB_PIO_MAXIBHDR is the max IB header size allowed for in our
 71 * PIO send buffers.  This is well beyond anything currently
 72 * defined in the InfiniBand spec.
 73 */
 74#define QIB_PIO_MAXIBHDR 128
 75
 76/*
 77 * QIB_MAX_PKT_RCV is the max # if packets processed per receive interrupt.
 78 */
 79#define QIB_MAX_PKT_RECV 64
 80
 81struct qlogic_ib_stats qib_stats;
 82
 83const char *qib_get_unit_name(int unit)
 84{
 85	static char iname[16];
 86
 87	snprintf(iname, sizeof iname, "infinipath%u", unit);
 88	return iname;
 89}
 90
 91/*
 92 * Return count of units with at least one port ACTIVE.
 93 */
 94int qib_count_active_units(void)
 95{
 96	struct qib_devdata *dd;
 97	struct qib_pportdata *ppd;
 98	unsigned long flags;
 99	int pidx, nunits_active = 0;
100
101	spin_lock_irqsave(&qib_devs_lock, flags);
102	list_for_each_entry(dd, &qib_dev_list, list) {
103		if (!(dd->flags & QIB_PRESENT) || !dd->kregbase)
104			continue;
105		for (pidx = 0; pidx < dd->num_pports; ++pidx) {
106			ppd = dd->pport + pidx;
107			if (ppd->lid && (ppd->lflags & (QIBL_LINKINIT |
108					 QIBL_LINKARMED | QIBL_LINKACTIVE))) {
109				nunits_active++;
110				break;
111			}
112		}
113	}
114	spin_unlock_irqrestore(&qib_devs_lock, flags);
115	return nunits_active;
116}
117
118/*
119 * Return count of all units, optionally return in arguments
120 * the number of usable (present) units, and the number of
121 * ports that are up.
122 */
123int qib_count_units(int *npresentp, int *nupp)
124{
125	int nunits = 0, npresent = 0, nup = 0;
126	struct qib_devdata *dd;
127	unsigned long flags;
128	int pidx;
129	struct qib_pportdata *ppd;
130
131	spin_lock_irqsave(&qib_devs_lock, flags);
132
133	list_for_each_entry(dd, &qib_dev_list, list) {
134		nunits++;
135		if ((dd->flags & QIB_PRESENT) && dd->kregbase)
136			npresent++;
137		for (pidx = 0; pidx < dd->num_pports; ++pidx) {
138			ppd = dd->pport + pidx;
139			if (ppd->lid && (ppd->lflags & (QIBL_LINKINIT |
140					 QIBL_LINKARMED | QIBL_LINKACTIVE)))
141				nup++;
142		}
143	}
144
145	spin_unlock_irqrestore(&qib_devs_lock, flags);
146
147	if (npresentp)
148		*npresentp = npresent;
149	if (nupp)
150		*nupp = nup;
151
152	return nunits;
153}
154
155/**
156 * qib_wait_linkstate - wait for an IB link state change to occur
157 * @dd: the qlogic_ib device
158 * @state: the state to wait for
159 * @msecs: the number of milliseconds to wait
160 *
161 * wait up to msecs milliseconds for IB link state change to occur for
162 * now, take the easy polling route.  Currently used only by
163 * qib_set_linkstate.  Returns 0 if state reached, otherwise
164 * -ETIMEDOUT state can have multiple states set, for any of several
165 * transitions.
166 */
167int qib_wait_linkstate(struct qib_pportdata *ppd, u32 state, int msecs)
168{
169	int ret;
170	unsigned long flags;
171
172	spin_lock_irqsave(&ppd->lflags_lock, flags);
173	if (ppd->state_wanted) {
174		spin_unlock_irqrestore(&ppd->lflags_lock, flags);
175		ret = -EBUSY;
176		goto bail;
177	}
178	ppd->state_wanted = state;
179	spin_unlock_irqrestore(&ppd->lflags_lock, flags);
180	wait_event_interruptible_timeout(ppd->state_wait,
181					 (ppd->lflags & state),
182					 msecs_to_jiffies(msecs));
183	spin_lock_irqsave(&ppd->lflags_lock, flags);
184	ppd->state_wanted = 0;
185	spin_unlock_irqrestore(&ppd->lflags_lock, flags);
186
187	if (!(ppd->lflags & state))
188		ret = -ETIMEDOUT;
189	else
190		ret = 0;
191bail:
192	return ret;
193}
194
195int qib_set_linkstate(struct qib_pportdata *ppd, u8 newstate)
196{
197	u32 lstate;
198	int ret;
199	struct qib_devdata *dd = ppd->dd;
200	unsigned long flags;
201
202	switch (newstate) {
203	case QIB_IB_LINKDOWN_ONLY:
204		dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
205				 IB_LINKCMD_DOWN | IB_LINKINITCMD_NOP);
206		/* don't wait */
207		ret = 0;
208		goto bail;
209
210	case QIB_IB_LINKDOWN:
211		dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
212				 IB_LINKCMD_DOWN | IB_LINKINITCMD_POLL);
213		/* don't wait */
214		ret = 0;
215		goto bail;
216
217	case QIB_IB_LINKDOWN_SLEEP:
218		dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
219				 IB_LINKCMD_DOWN | IB_LINKINITCMD_SLEEP);
220		/* don't wait */
221		ret = 0;
222		goto bail;
223
224	case QIB_IB_LINKDOWN_DISABLE:
225		dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
226				 IB_LINKCMD_DOWN | IB_LINKINITCMD_DISABLE);
227		/* don't wait */
228		ret = 0;
229		goto bail;
230
231	case QIB_IB_LINKARM:
232		if (ppd->lflags & QIBL_LINKARMED) {
233			ret = 0;
234			goto bail;
235		}
236		if (!(ppd->lflags & (QIBL_LINKINIT | QIBL_LINKACTIVE))) {
237			ret = -EINVAL;
238			goto bail;
239		}
240		/*
241		 * Since the port can be ACTIVE when we ask for ARMED,
242		 * clear QIBL_LINKV so we can wait for a transition.
243		 * If the link isn't ARMED, then something else happened
244		 * and there is no point waiting for ARMED.
245		 */
246		spin_lock_irqsave(&ppd->lflags_lock, flags);
247		ppd->lflags &= ~QIBL_LINKV;
248		spin_unlock_irqrestore(&ppd->lflags_lock, flags);
249		dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
250				 IB_LINKCMD_ARMED | IB_LINKINITCMD_NOP);
251		lstate = QIBL_LINKV;
252		break;
253
254	case QIB_IB_LINKACTIVE:
255		if (ppd->lflags & QIBL_LINKACTIVE) {
256			ret = 0;
257			goto bail;
258		}
259		if (!(ppd->lflags & QIBL_LINKARMED)) {
260			ret = -EINVAL;
261			goto bail;
262		}
263		dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
264				 IB_LINKCMD_ACTIVE | IB_LINKINITCMD_NOP);
265		lstate = QIBL_LINKACTIVE;
266		break;
267
268	default:
269		ret = -EINVAL;
270		goto bail;
271	}
272	ret = qib_wait_linkstate(ppd, lstate, 10);
273
274bail:
275	return ret;
276}
277
278/*
279 * Get address of eager buffer from it's index (allocated in chunks, not
280 * contiguous).
281 */
282static inline void *qib_get_egrbuf(const struct qib_ctxtdata *rcd, u32 etail)
283{
284	const u32 chunk = etail >> rcd->rcvegrbufs_perchunk_shift;
285	const u32 idx =  etail & ((u32)rcd->rcvegrbufs_perchunk - 1);
286
287	return rcd->rcvegrbuf[chunk] + (idx << rcd->dd->rcvegrbufsize_shift);
288}
289
290/*
291 * Returns 1 if error was a CRC, else 0.
292 * Needed for some chip's synthesized error counters.
293 */
294static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
295			  u32 ctxt, u32 eflags, u32 l, u32 etail,
296			  __le32 *rhf_addr, struct qib_message_header *rhdr)
297{
298	u32 ret = 0;
299
300	if (eflags & (QLOGIC_IB_RHF_H_ICRCERR | QLOGIC_IB_RHF_H_VCRCERR))
301		ret = 1;
302	else if (eflags == QLOGIC_IB_RHF_H_TIDERR) {
303		/* For TIDERR and RC QPs premptively schedule a NAK */
304		struct qib_ib_header *hdr = (struct qib_ib_header *) rhdr;
305		struct qib_other_headers *ohdr = NULL;
306		struct qib_ibport *ibp = &ppd->ibport_data;
307		struct qib_qp *qp = NULL;
 
 
308		u32 tlen = qib_hdrget_length_in_bytes(rhf_addr);
309		u16 lid  = be16_to_cpu(hdr->lrh[1]);
310		int lnh = be16_to_cpu(hdr->lrh[0]) & 3;
311		u32 qp_num;
312		u32 opcode;
313		u32 psn;
314		int diff;
315
316		/* Sanity check packet */
317		if (tlen < 24)
318			goto drop;
319
320		if (lid < QIB_MULTICAST_LID_BASE) {
321			lid &= ~((1 << ppd->lmc) - 1);
322			if (unlikely(lid != ppd->lid))
323				goto drop;
324		}
325
326		/* Check for GRH */
327		if (lnh == QIB_LRH_BTH)
328			ohdr = &hdr->u.oth;
329		else if (lnh == QIB_LRH_GRH) {
330			u32 vtf;
331
332			ohdr = &hdr->u.l.oth;
333			if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
334				goto drop;
335			vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow);
336			if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
337				goto drop;
338		} else
339			goto drop;
340
341		/* Get opcode and PSN from packet */
342		opcode = be32_to_cpu(ohdr->bth[0]);
343		opcode >>= 24;
344		psn = be32_to_cpu(ohdr->bth[2]);
345
346		/* Get the destination QP number. */
347		qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK;
348		if (qp_num != QIB_MULTICAST_QPN) {
349			int ruc_res;
350			qp = qib_lookup_qpn(ibp, qp_num);
351			if (!qp)
 
 
 
352				goto drop;
 
353
354			/*
355			 * Handle only RC QPs - for other QP types drop error
356			 * packet.
357			 */
358			spin_lock(&qp->r_lock);
359
360			/* Check for valid receive state. */
361			if (!(ib_qib_state_ops[qp->state] &
362			      QIB_PROCESS_RECV_OK)) {
363				ibp->n_pkt_drops++;
364				goto unlock;
365			}
366
367			switch (qp->ibqp.qp_type) {
368			case IB_QPT_RC:
369				ruc_res =
370					qib_ruc_check_hdr(
371						ibp, hdr,
372						lnh == QIB_LRH_GRH,
373						qp,
374						be32_to_cpu(ohdr->bth[0]));
375				if (ruc_res)
376					goto unlock;
377
378				/* Only deal with RDMA Writes for now */
379				if (opcode <
380				    IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
381					diff = qib_cmp24(psn, qp->r_psn);
382					if (!qp->r_nak_state && diff >= 0) {
383						ibp->n_rc_seqnak++;
384						qp->r_nak_state =
385							IB_NAK_PSN_ERROR;
386						/* Use the expected PSN. */
387						qp->r_ack_psn = qp->r_psn;
388						/*
389						 * Wait to send the sequence
390						 * NAK until all packets
391						 * in the receive queue have
392						 * been processed.
393						 * Otherwise, we end up
394						 * propagating congestion.
395						 */
396						if (list_empty(&qp->rspwait)) {
397							qp->r_flags |=
398								QIB_R_RSP_NAK;
399							atomic_inc(
400								&qp->refcount);
401							list_add_tail(
402							 &qp->rspwait,
403							 &rcd->qp_wait_list);
404						}
405					} /* Out of sequence NAK */
406				} /* QP Request NAKs */
407				break;
408			case IB_QPT_SMI:
409			case IB_QPT_GSI:
410			case IB_QPT_UD:
411			case IB_QPT_UC:
412			default:
413				/* For now don't handle any other QP types */
414				break;
415			}
416
417unlock:
418			spin_unlock(&qp->r_lock);
419			/*
420			 * Notify qib_destroy_qp() if it is waiting
421			 * for us to finish.
422			 */
423			if (atomic_dec_and_test(&qp->refcount))
424				wake_up(&qp->wait);
425		} /* Unicast QP */
426	} /* Valid packet with TIDErr */
427
428drop:
429	return ret;
430}
431
432/*
433 * qib_kreceive - receive a packet
434 * @rcd: the qlogic_ib context
435 * @llic: gets count of good packets needed to clear lli,
436 *          (used with chips that need need to track crcs for lli)
437 *
438 * called from interrupt handler for errors or receive interrupt
439 * Returns number of CRC error packets, needed by some chips for
440 * local link integrity tracking.   crcs are adjusted down by following
441 * good packets, if any, and count of good packets is also tracked.
442 */
443u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts)
444{
445	struct qib_devdata *dd = rcd->dd;
446	struct qib_pportdata *ppd = rcd->ppd;
447	__le32 *rhf_addr;
448	void *ebuf;
449	const u32 rsize = dd->rcvhdrentsize;        /* words */
450	const u32 maxcnt = dd->rcvhdrcnt * rsize;   /* words */
451	u32 etail = -1, l, hdrqtail;
452	struct qib_message_header *hdr;
453	u32 eflags, etype, tlen, i = 0, updegr = 0, crcs = 0;
454	int last;
455	u64 lval;
456	struct qib_qp *qp, *nqp;
457
458	l = rcd->head;
459	rhf_addr = (__le32 *) rcd->rcvhdrq + l + dd->rhf_offset;
460	if (dd->flags & QIB_NODMA_RTAIL) {
461		u32 seq = qib_hdrget_seq(rhf_addr);
 
462		if (seq != rcd->seq_cnt)
463			goto bail;
464		hdrqtail = 0;
465	} else {
466		hdrqtail = qib_get_rcvhdrtail(rcd);
467		if (l == hdrqtail)
468			goto bail;
469		smp_rmb();  /* prevent speculative reads of dma'ed hdrq */
470	}
471
472	for (last = 0, i = 1; !last; i += !last) {
473		hdr = dd->f_get_msgheader(dd, rhf_addr);
474		eflags = qib_hdrget_err_flags(rhf_addr);
475		etype = qib_hdrget_rcv_type(rhf_addr);
476		/* total length */
477		tlen = qib_hdrget_length_in_bytes(rhf_addr);
478		ebuf = NULL;
479		if ((dd->flags & QIB_NODMA_RTAIL) ?
480		    qib_hdrget_use_egr_buf(rhf_addr) :
481		    (etype != RCVHQ_RCV_TYPE_EXPECTED)) {
482			etail = qib_hdrget_index(rhf_addr);
483			updegr = 1;
484			if (tlen > sizeof(*hdr) ||
485			    etype >= RCVHQ_RCV_TYPE_NON_KD) {
486				ebuf = qib_get_egrbuf(rcd, etail);
487				prefetch_range(ebuf, tlen - sizeof(*hdr));
488			}
489		}
490		if (!eflags) {
491			u16 lrh_len = be16_to_cpu(hdr->lrh[2]) << 2;
492
493			if (lrh_len != tlen) {
494				qib_stats.sps_lenerrs++;
495				goto move_along;
496			}
497		}
498		if (etype == RCVHQ_RCV_TYPE_NON_KD && !eflags &&
499		    ebuf == NULL &&
500		    tlen > (dd->rcvhdrentsize - 2 + 1 -
501				qib_hdrget_offset(rhf_addr)) << 2) {
502			goto move_along;
503		}
504
505		/*
506		 * Both tiderr and qibhdrerr are set for all plain IB
507		 * packets; only qibhdrerr should be set.
508		 */
509		if (unlikely(eflags))
510			crcs += qib_rcv_hdrerr(rcd, ppd, rcd->ctxt, eflags, l,
511					       etail, rhf_addr, hdr);
512		else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
513			qib_ib_rcv(rcd, hdr, ebuf, tlen);
514			if (crcs)
515				crcs--;
516			else if (llic && *llic)
517				--*llic;
518		}
519move_along:
520		l += rsize;
521		if (l >= maxcnt)
522			l = 0;
523		if (i == QIB_MAX_PKT_RECV)
524			last = 1;
525
526		rhf_addr = (__le32 *) rcd->rcvhdrq + l + dd->rhf_offset;
527		if (dd->flags & QIB_NODMA_RTAIL) {
528			u32 seq = qib_hdrget_seq(rhf_addr);
529
530			if (++rcd->seq_cnt > 13)
531				rcd->seq_cnt = 1;
532			if (seq != rcd->seq_cnt)
533				last = 1;
534		} else if (l == hdrqtail)
535			last = 1;
536		/*
537		 * Update head regs etc., every 16 packets, if not last pkt,
538		 * to help prevent rcvhdrq overflows, when many packets
539		 * are processed and queue is nearly full.
540		 * Don't request an interrupt for intermediate updates.
541		 */
542		lval = l;
543		if (!last && !(i & 0xf)) {
544			dd->f_update_usrhead(rcd, lval, updegr, etail, i);
545			updegr = 0;
546		}
547	}
548	/*
549	 * Notify qib_destroy_qp() if it is waiting
550	 * for lookaside_qp to finish.
551	 */
552	if (rcd->lookaside_qp) {
553		if (atomic_dec_and_test(&rcd->lookaside_qp->refcount))
554			wake_up(&rcd->lookaside_qp->wait);
555		rcd->lookaside_qp = NULL;
556	}
557
558	rcd->head = l;
559	rcd->pkt_count += i;
560
561	/*
562	 * Iterate over all QPs waiting to respond.
563	 * The list won't change since the IRQ is only run on one CPU.
564	 */
565	list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) {
566		list_del_init(&qp->rspwait);
567		if (qp->r_flags & QIB_R_RSP_NAK) {
568			qp->r_flags &= ~QIB_R_RSP_NAK;
569			qib_send_rc_ack(qp);
570		}
571		if (qp->r_flags & QIB_R_RSP_SEND) {
572			unsigned long flags;
573
574			qp->r_flags &= ~QIB_R_RSP_SEND;
575			spin_lock_irqsave(&qp->s_lock, flags);
576			if (ib_qib_state_ops[qp->state] &
577					QIB_PROCESS_OR_FLUSH_SEND)
578				qib_schedule_send(qp);
579			spin_unlock_irqrestore(&qp->s_lock, flags);
580		}
581		if (atomic_dec_and_test(&qp->refcount))
582			wake_up(&qp->wait);
583	}
584
585bail:
586	/* Report number of packets consumed */
587	if (npkts)
588		*npkts = i;
589
590	/*
591	 * Always write head at end, and setup rcv interrupt, even
592	 * if no packets were processed.
593	 */
594	lval = (u64)rcd->head | dd->rhdrhead_intr_off;
595	dd->f_update_usrhead(rcd, lval, updegr, etail, i);
596	return crcs;
597}
598
599/**
600 * qib_set_mtu - set the MTU
601 * @ppd: the perport data
602 * @arg: the new MTU
603 *
604 * We can handle "any" incoming size, the issue here is whether we
605 * need to restrict our outgoing size.   For now, we don't do any
606 * sanity checking on this, and we don't deal with what happens to
607 * programs that are already running when the size changes.
608 * NOTE: changing the MTU will usually cause the IBC to go back to
609 * link INIT state...
610 */
611int qib_set_mtu(struct qib_pportdata *ppd, u16 arg)
612{
613	u32 piosize;
614	int ret, chk;
615
616	if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
617	    arg != 4096) {
618		ret = -EINVAL;
619		goto bail;
620	}
621	chk = ib_mtu_enum_to_int(qib_ibmtu);
622	if (chk > 0 && arg > chk) {
623		ret = -EINVAL;
624		goto bail;
625	}
626
627	piosize = ppd->ibmaxlen;
628	ppd->ibmtu = arg;
629
630	if (arg >= (piosize - QIB_PIO_MAXIBHDR)) {
631		/* Only if it's not the initial value (or reset to it) */
632		if (piosize != ppd->init_ibmaxlen) {
633			if (arg > piosize && arg <= ppd->init_ibmaxlen)
634				piosize = ppd->init_ibmaxlen - 2 * sizeof(u32);
635			ppd->ibmaxlen = piosize;
636		}
637	} else if ((arg + QIB_PIO_MAXIBHDR) != ppd->ibmaxlen) {
638		piosize = arg + QIB_PIO_MAXIBHDR - 2 * sizeof(u32);
639		ppd->ibmaxlen = piosize;
640	}
641
642	ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_MTU, 0);
643
644	ret = 0;
645
646bail:
647	return ret;
648}
649
650int qib_set_lid(struct qib_pportdata *ppd, u32 lid, u8 lmc)
651{
652	struct qib_devdata *dd = ppd->dd;
 
653	ppd->lid = lid;
654	ppd->lmc = lmc;
655
656	dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LIDLMC,
657			 lid | (~((1U << lmc) - 1)) << 16);
658
659	qib_devinfo(dd->pcidev, "IB%u:%u got a lid: 0x%x\n",
660		    dd->unit, ppd->port, lid);
661
662	return 0;
663}
664
665/*
666 * Following deal with the "obviously simple" task of overriding the state
667 * of the LEDS, which normally indicate link physical and logical status.
668 * The complications arise in dealing with different hardware mappings
669 * and the board-dependent routine being called from interrupts.
670 * and then there's the requirement to _flash_ them.
671 */
672#define LED_OVER_FREQ_SHIFT 8
673#define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT)
674/* Below is "non-zero" to force override, but both actual LEDs are off */
675#define LED_OVER_BOTH_OFF (8)
676
677static void qib_run_led_override(unsigned long opaque)
678{
679	struct qib_pportdata *ppd = (struct qib_pportdata *)opaque;
 
680	struct qib_devdata *dd = ppd->dd;
681	int timeoff;
682	int ph_idx;
683
684	if (!(dd->flags & QIB_INITTED))
685		return;
686
687	ph_idx = ppd->led_override_phase++ & 1;
688	ppd->led_override = ppd->led_override_vals[ph_idx];
689	timeoff = ppd->led_override_timeoff;
690
691	dd->f_setextled(ppd, 1);
692	/*
693	 * don't re-fire the timer if user asked for it to be off; we let
694	 * it fire one more time after they turn it off to simplify
695	 */
696	if (ppd->led_override_vals[0] || ppd->led_override_vals[1])
697		mod_timer(&ppd->led_override_timer, jiffies + timeoff);
698}
699
700void qib_set_led_override(struct qib_pportdata *ppd, unsigned int val)
701{
702	struct qib_devdata *dd = ppd->dd;
703	int timeoff, freq;
704
705	if (!(dd->flags & QIB_INITTED))
706		return;
707
708	/* First check if we are blinking. If not, use 1HZ polling */
709	timeoff = HZ;
710	freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT;
711
712	if (freq) {
713		/* For blink, set each phase from one nybble of val */
714		ppd->led_override_vals[0] = val & 0xF;
715		ppd->led_override_vals[1] = (val >> 4) & 0xF;
716		timeoff = (HZ << 4)/freq;
717	} else {
718		/* Non-blink set both phases the same. */
719		ppd->led_override_vals[0] = val & 0xF;
720		ppd->led_override_vals[1] = val & 0xF;
721	}
722	ppd->led_override_timeoff = timeoff;
723
724	/*
725	 * If the timer has not already been started, do so. Use a "quick"
726	 * timeout so the function will be called soon, to look at our request.
727	 */
728	if (atomic_inc_return(&ppd->led_override_timer_active) == 1) {
729		/* Need to start timer */
730		init_timer(&ppd->led_override_timer);
731		ppd->led_override_timer.function = qib_run_led_override;
732		ppd->led_override_timer.data = (unsigned long) ppd;
733		ppd->led_override_timer.expires = jiffies + 1;
734		add_timer(&ppd->led_override_timer);
735	} else {
736		if (ppd->led_override_vals[0] || ppd->led_override_vals[1])
737			mod_timer(&ppd->led_override_timer, jiffies + 1);
738		atomic_dec(&ppd->led_override_timer_active);
739	}
740}
741
742/**
743 * qib_reset_device - reset the chip if possible
744 * @unit: the device to reset
745 *
746 * Whether or not reset is successful, we attempt to re-initialize the chip
747 * (that is, much like a driver unload/reload).  We clear the INITTED flag
748 * so that the various entry points will fail until we reinitialize.  For
749 * now, we only allow this if no user contexts are open that use chip resources
750 */
751int qib_reset_device(int unit)
752{
753	int ret, i;
754	struct qib_devdata *dd = qib_lookup(unit);
755	struct qib_pportdata *ppd;
756	unsigned long flags;
757	int pidx;
758
759	if (!dd) {
760		ret = -ENODEV;
761		goto bail;
762	}
763
764	qib_devinfo(dd->pcidev, "Reset on unit %u requested\n", unit);
765
766	if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) {
767		qib_devinfo(dd->pcidev, "Invalid unit number %u or "
768			    "not initialized or not present\n", unit);
 
769		ret = -ENXIO;
770		goto bail;
771	}
772
773	spin_lock_irqsave(&dd->uctxt_lock, flags);
774	if (dd->rcd)
775		for (i = dd->first_user_ctxt; i < dd->cfgctxts; i++) {
776			if (!dd->rcd[i] || !dd->rcd[i]->cnt)
777				continue;
778			spin_unlock_irqrestore(&dd->uctxt_lock, flags);
779			ret = -EBUSY;
780			goto bail;
781		}
782	spin_unlock_irqrestore(&dd->uctxt_lock, flags);
783
784	for (pidx = 0; pidx < dd->num_pports; ++pidx) {
785		ppd = dd->pport + pidx;
786		if (atomic_read(&ppd->led_override_timer_active)) {
787			/* Need to stop LED timer, _then_ shut off LEDs */
788			del_timer_sync(&ppd->led_override_timer);
789			atomic_set(&ppd->led_override_timer_active, 0);
790		}
791
792		/* Shut off LEDs after we are sure timer is not running */
793		ppd->led_override = LED_OVER_BOTH_OFF;
794		dd->f_setextled(ppd, 0);
795		if (dd->flags & QIB_HAS_SEND_DMA)
796			qib_teardown_sdma(ppd);
797	}
798
799	ret = dd->f_reset(dd);
800	if (ret == 1)
801		ret = qib_init(dd, 1);
802	else
803		ret = -EAGAIN;
804	if (ret)
805		qib_dev_err(dd, "Reinitialize unit %u after "
806			    "reset failed with %d\n", unit, ret);
 
807	else
808		qib_devinfo(dd->pcidev, "Reinitialized unit %u after "
809			    "resetting\n", unit);
 
810
811bail:
812	return ret;
813}