Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.15.
   1// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
   2/*
   3 * Copyright(c) 2015-2018 Intel Corporation.
   4 */
   5
   6#include <linux/net.h>
   7#include <rdma/opa_addr.h>
   8#define OPA_NUM_PKEY_BLOCKS_PER_SMP (OPA_SMP_DR_DATA_SIZE \
   9			/ (OPA_PARTITION_TABLE_BLK_SIZE * sizeof(u16)))
  10
  11#include "hfi.h"
  12#include "mad.h"
  13#include "trace.h"
  14#include "qp.h"
  15#include "vnic.h"
  16
  17/* the reset value from the FM is supposed to be 0xffff, handle both */
  18#define OPA_LINK_WIDTH_RESET_OLD 0x0fff
  19#define OPA_LINK_WIDTH_RESET 0xffff
  20
  21struct trap_node {
  22	struct list_head list;
  23	struct opa_mad_notice_attr data;
  24	__be64 tid;
  25	int len;
  26	u32 retry;
  27	u8 in_use;
  28	u8 repress;
  29};
  30
  31static int smp_length_check(u32 data_size, u32 request_len)
  32{
  33	if (unlikely(request_len < data_size))
  34		return -EINVAL;
  35
  36	return 0;
  37}
  38
  39static int reply(struct ib_mad_hdr *smp)
  40{
  41	/*
  42	 * The verbs framework will handle the directed/LID route
  43	 * packet changes.
  44	 */
  45	smp->method = IB_MGMT_METHOD_GET_RESP;
  46	if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
  47		smp->status |= IB_SMP_DIRECTION;
  48	return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
  49}
  50
  51static inline void clear_opa_smp_data(struct opa_smp *smp)
  52{
  53	void *data = opa_get_smp_data(smp);
  54	size_t size = opa_get_smp_data_size(smp);
  55
  56	memset(data, 0, size);
  57}
  58
  59static u16 hfi1_lookup_pkey_value(struct hfi1_ibport *ibp, int pkey_idx)
  60{
  61	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
  62
  63	if (pkey_idx < ARRAY_SIZE(ppd->pkeys))
  64		return ppd->pkeys[pkey_idx];
  65
  66	return 0;
  67}
  68
  69void hfi1_event_pkey_change(struct hfi1_devdata *dd, u32 port)
  70{
  71	struct ib_event event;
  72
  73	event.event = IB_EVENT_PKEY_CHANGE;
  74	event.device = &dd->verbs_dev.rdi.ibdev;
  75	event.element.port_num = port;
  76	ib_dispatch_event(&event);
  77}
  78
  79/*
  80 * If the port is down, clean up all pending traps.  We need to be careful
  81 * with the given trap, because it may be queued.
  82 */
  83static void cleanup_traps(struct hfi1_ibport *ibp, struct trap_node *trap)
  84{
  85	struct trap_node *node, *q;
  86	unsigned long flags;
  87	struct list_head trap_list;
  88	int i;
  89
  90	for (i = 0; i < RVT_MAX_TRAP_LISTS; i++) {
  91		spin_lock_irqsave(&ibp->rvp.lock, flags);
  92		list_replace_init(&ibp->rvp.trap_lists[i].list, &trap_list);
  93		ibp->rvp.trap_lists[i].list_len = 0;
  94		spin_unlock_irqrestore(&ibp->rvp.lock, flags);
  95
  96		/*
  97		 * Remove all items from the list, freeing all the non-given
  98		 * traps.
  99		 */
 100		list_for_each_entry_safe(node, q, &trap_list, list) {
 101			list_del(&node->list);
 102			if (node != trap)
 103				kfree(node);
 104		}
 105	}
 106
 107	/*
 108	 * If this wasn't on one of the lists it would not be freed.  If it
 109	 * was on the list, it is now safe to free.
 110	 */
 111	kfree(trap);
 112}
 113
 114static struct trap_node *check_and_add_trap(struct hfi1_ibport *ibp,
 115					    struct trap_node *trap)
 116{
 117	struct trap_node *node;
 118	struct trap_list *trap_list;
 119	unsigned long flags;
 120	unsigned long timeout;
 121	int found = 0;
 122	unsigned int queue_id;
 123	static int trap_count;
 124
 125	queue_id = trap->data.generic_type & 0x0F;
 126	if (queue_id >= RVT_MAX_TRAP_LISTS) {
 127		trap_count++;
 128		pr_err_ratelimited("hfi1: Invalid trap 0x%0x dropped. Total dropped: %d\n",
 129				   trap->data.generic_type, trap_count);
 130		kfree(trap);
 131		return NULL;
 132	}
 133
 134	/*
 135	 * Since the retry (handle timeout) does not remove a trap request
 136	 * from the list, all we have to do is compare the node.
 137	 */
 138	spin_lock_irqsave(&ibp->rvp.lock, flags);
 139	trap_list = &ibp->rvp.trap_lists[queue_id];
 140
 141	list_for_each_entry(node, &trap_list->list, list) {
 142		if (node == trap) {
 143			node->retry++;
 144			found = 1;
 145			break;
 146		}
 147	}
 148
 149	/* If it is not on the list, add it, limited to RVT-MAX_TRAP_LEN. */
 150	if (!found) {
 151		if (trap_list->list_len < RVT_MAX_TRAP_LEN) {
 152			trap_list->list_len++;
 153			list_add_tail(&trap->list, &trap_list->list);
 154		} else {
 155			pr_warn_ratelimited("hfi1: Maximum trap limit reached for 0x%0x traps\n",
 156					    trap->data.generic_type);
 157			kfree(trap);
 158		}
 159	}
 160
 161	/*
 162	 * Next check to see if there is a timer pending.  If not, set it up
 163	 * and get the first trap from the list.
 164	 */
 165	node = NULL;
 166	if (!timer_pending(&ibp->rvp.trap_timer)) {
 167		/*
 168		 * o14-2
 169		 * If the time out is set we have to wait until it expires
 170		 * before the trap can be sent.
 171		 * This should be > RVT_TRAP_TIMEOUT
 172		 */
 173		timeout = (RVT_TRAP_TIMEOUT *
 174			   (1UL << ibp->rvp.subnet_timeout)) / 1000;
 175		mod_timer(&ibp->rvp.trap_timer,
 176			  jiffies + usecs_to_jiffies(timeout));
 177		node = list_first_entry(&trap_list->list, struct trap_node,
 178					list);
 179		node->in_use = 1;
 180	}
 181	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
 182
 183	return node;
 184}
 185
 186static void subn_handle_opa_trap_repress(struct hfi1_ibport *ibp,
 187					 struct opa_smp *smp)
 188{
 189	struct trap_list *trap_list;
 190	struct trap_node *trap;
 191	unsigned long flags;
 192	int i;
 193
 194	if (smp->attr_id != IB_SMP_ATTR_NOTICE)
 195		return;
 196
 197	spin_lock_irqsave(&ibp->rvp.lock, flags);
 198	for (i = 0; i < RVT_MAX_TRAP_LISTS; i++) {
 199		trap_list = &ibp->rvp.trap_lists[i];
 200		trap = list_first_entry_or_null(&trap_list->list,
 201						struct trap_node, list);
 202		if (trap && trap->tid == smp->tid) {
 203			if (trap->in_use) {
 204				trap->repress = 1;
 205			} else {
 206				trap_list->list_len--;
 207				list_del(&trap->list);
 208				kfree(trap);
 209			}
 210			break;
 211		}
 212	}
 213	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
 214}
 215
 216static void hfi1_update_sm_ah_attr(struct hfi1_ibport *ibp,
 217				   struct rdma_ah_attr *attr, u32 dlid)
 218{
 219	rdma_ah_set_dlid(attr, dlid);
 220	rdma_ah_set_port_num(attr, ppd_from_ibp(ibp)->port);
 221	if (dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
 222		struct ib_global_route *grh = rdma_ah_retrieve_grh(attr);
 223
 224		rdma_ah_set_ah_flags(attr, IB_AH_GRH);
 225		grh->sgid_index = 0;
 226		grh->hop_limit = 1;
 227		grh->dgid.global.subnet_prefix =
 228			ibp->rvp.gid_prefix;
 229		grh->dgid.global.interface_id = OPA_MAKE_ID(dlid);
 230	}
 231}
 232
 233static int hfi1_modify_qp0_ah(struct hfi1_ibport *ibp,
 234			      struct rvt_ah *ah, u32 dlid)
 235{
 236	struct rdma_ah_attr attr;
 237	struct rvt_qp *qp0;
 238	int ret = -EINVAL;
 239
 240	memset(&attr, 0, sizeof(attr));
 241	attr.type = ah->ibah.type;
 242	hfi1_update_sm_ah_attr(ibp, &attr, dlid);
 243	rcu_read_lock();
 244	qp0 = rcu_dereference(ibp->rvp.qp[0]);
 245	if (qp0)
 246		ret = rdma_modify_ah(&ah->ibah, &attr);
 247	rcu_read_unlock();
 248	return ret;
 249}
 250
 251static struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u32 dlid)
 252{
 253	struct rdma_ah_attr attr;
 254	struct ib_ah *ah = ERR_PTR(-EINVAL);
 255	struct rvt_qp *qp0;
 256	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 257	struct hfi1_devdata *dd = dd_from_ppd(ppd);
 258	u32 port_num = ppd->port;
 259
 260	memset(&attr, 0, sizeof(attr));
 261	attr.type = rdma_ah_find_type(&dd->verbs_dev.rdi.ibdev, port_num);
 262	hfi1_update_sm_ah_attr(ibp, &attr, dlid);
 263	rcu_read_lock();
 264	qp0 = rcu_dereference(ibp->rvp.qp[0]);
 265	if (qp0)
 266		ah = rdma_create_ah(qp0->ibqp.pd, &attr, 0);
 267	rcu_read_unlock();
 268	return ah;
 269}
 270
 271static void send_trap(struct hfi1_ibport *ibp, struct trap_node *trap)
 272{
 273	struct ib_mad_send_buf *send_buf;
 274	struct ib_mad_agent *agent;
 275	struct opa_smp *smp;
 276	unsigned long flags;
 277	int pkey_idx;
 278	u32 qpn = ppd_from_ibp(ibp)->sm_trap_qp;
 279
 280	agent = ibp->rvp.send_agent;
 281	if (!agent) {
 282		cleanup_traps(ibp, trap);
 283		return;
 284	}
 285
 286	/* o14-3.2.1 */
 287	if (driver_lstate(ppd_from_ibp(ibp)) != IB_PORT_ACTIVE) {
 288		cleanup_traps(ibp, trap);
 289		return;
 290	}
 291
 292	/* Add the trap to the list if necessary and see if we can send it */
 293	trap = check_and_add_trap(ibp, trap);
 294	if (!trap)
 295		return;
 296
 297	pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
 298	if (pkey_idx < 0) {
 299		pr_warn("%s: failed to find limited mgmt pkey, defaulting 0x%x\n",
 300			__func__, hfi1_get_pkey(ibp, 1));
 301		pkey_idx = 1;
 302	}
 303
 304	send_buf = ib_create_send_mad(agent, qpn, pkey_idx, 0,
 305				      IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
 306				      GFP_ATOMIC, IB_MGMT_BASE_VERSION);
 307	if (IS_ERR(send_buf))
 308		return;
 309
 310	smp = send_buf->mad;
 311	smp->base_version = OPA_MGMT_BASE_VERSION;
 312	smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
 313	smp->class_version = OPA_SM_CLASS_VERSION;
 314	smp->method = IB_MGMT_METHOD_TRAP;
 315
 316	/* Only update the transaction ID for new traps (o13-5). */
 317	if (trap->tid == 0) {
 318		ibp->rvp.tid++;
 319		/* make sure that tid != 0 */
 320		if (ibp->rvp.tid == 0)
 321			ibp->rvp.tid++;
 322		trap->tid = cpu_to_be64(ibp->rvp.tid);
 323	}
 324	smp->tid = trap->tid;
 325
 326	smp->attr_id = IB_SMP_ATTR_NOTICE;
 327	/* o14-1: smp->mkey = 0; */
 328
 329	memcpy(smp->route.lid.data, &trap->data, trap->len);
 330
 331	spin_lock_irqsave(&ibp->rvp.lock, flags);
 332	if (!ibp->rvp.sm_ah) {
 333		if (ibp->rvp.sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
 334			struct ib_ah *ah;
 335
 336			ah = hfi1_create_qp0_ah(ibp, ibp->rvp.sm_lid);
 337			if (IS_ERR(ah)) {
 338				spin_unlock_irqrestore(&ibp->rvp.lock, flags);
 339				return;
 340			}
 341			send_buf->ah = ah;
 342			ibp->rvp.sm_ah = ibah_to_rvtah(ah);
 343		} else {
 344			spin_unlock_irqrestore(&ibp->rvp.lock, flags);
 345			return;
 346		}
 347	} else {
 348		send_buf->ah = &ibp->rvp.sm_ah->ibah;
 349	}
 350
 351	/*
 352	 * If the trap was repressed while things were getting set up, don't
 353	 * bother sending it. This could happen for a retry.
 354	 */
 355	if (trap->repress) {
 356		list_del(&trap->list);
 357		spin_unlock_irqrestore(&ibp->rvp.lock, flags);
 358		kfree(trap);
 359		ib_free_send_mad(send_buf);
 360		return;
 361	}
 362
 363	trap->in_use = 0;
 364	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
 365
 366	if (ib_post_send_mad(send_buf, NULL))
 367		ib_free_send_mad(send_buf);
 368}
 369
 370void hfi1_handle_trap_timer(struct timer_list *t)
 371{
 372	struct hfi1_ibport *ibp = from_timer(ibp, t, rvp.trap_timer);
 373	struct trap_node *trap = NULL;
 374	unsigned long flags;
 375	int i;
 376
 377	/* Find the trap with the highest priority */
 378	spin_lock_irqsave(&ibp->rvp.lock, flags);
 379	for (i = 0; !trap && i < RVT_MAX_TRAP_LISTS; i++) {
 380		trap = list_first_entry_or_null(&ibp->rvp.trap_lists[i].list,
 381						struct trap_node, list);
 382	}
 383	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
 384
 385	if (trap)
 386		send_trap(ibp, trap);
 387}
 388
 389static struct trap_node *create_trap_node(u8 type, __be16 trap_num, u32 lid)
 390{
 391	struct trap_node *trap;
 392
 393	trap = kzalloc(sizeof(*trap), GFP_ATOMIC);
 394	if (!trap)
 395		return NULL;
 396
 397	INIT_LIST_HEAD(&trap->list);
 398	trap->data.generic_type = type;
 399	trap->data.prod_type_lsb = IB_NOTICE_PROD_CA;
 400	trap->data.trap_num = trap_num;
 401	trap->data.issuer_lid = cpu_to_be32(lid);
 402
 403	return trap;
 404}
 405
 406/*
 407 * Send a bad P_Key trap (ch. 14.3.8).
 408 */
 409void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl,
 410		   u32 qp1, u32 qp2, u32 lid1, u32 lid2)
 411{
 412	struct trap_node *trap;
 413	u32 lid = ppd_from_ibp(ibp)->lid;
 414
 415	ibp->rvp.n_pkt_drops++;
 416	ibp->rvp.pkey_violations++;
 417
 418	trap = create_trap_node(IB_NOTICE_TYPE_SECURITY, OPA_TRAP_BAD_P_KEY,
 419				lid);
 420	if (!trap)
 421		return;
 422
 423	/* Send violation trap */
 424	trap->data.ntc_257_258.lid1 = cpu_to_be32(lid1);
 425	trap->data.ntc_257_258.lid2 = cpu_to_be32(lid2);
 426	trap->data.ntc_257_258.key = cpu_to_be32(key);
 427	trap->data.ntc_257_258.sl = sl << 3;
 428	trap->data.ntc_257_258.qp1 = cpu_to_be32(qp1);
 429	trap->data.ntc_257_258.qp2 = cpu_to_be32(qp2);
 430
 431	trap->len = sizeof(trap->data);
 432	send_trap(ibp, trap);
 433}
 434
 435/*
 436 * Send a bad M_Key trap (ch. 14.3.9).
 437 */
 438static void bad_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
 439		     __be64 mkey, __be32 dr_slid, u8 return_path[], u8 hop_cnt)
 440{
 441	struct trap_node *trap;
 442	u32 lid = ppd_from_ibp(ibp)->lid;
 443
 444	trap = create_trap_node(IB_NOTICE_TYPE_SECURITY, OPA_TRAP_BAD_M_KEY,
 445				lid);
 446	if (!trap)
 447		return;
 448
 449	/* Send violation trap */
 450	trap->data.ntc_256.lid = trap->data.issuer_lid;
 451	trap->data.ntc_256.method = mad->method;
 452	trap->data.ntc_256.attr_id = mad->attr_id;
 453	trap->data.ntc_256.attr_mod = mad->attr_mod;
 454	trap->data.ntc_256.mkey = mkey;
 455	if (mad->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
 456		trap->data.ntc_256.dr_slid = dr_slid;
 457		trap->data.ntc_256.dr_trunc_hop = IB_NOTICE_TRAP_DR_NOTICE;
 458		if (hop_cnt > ARRAY_SIZE(trap->data.ntc_256.dr_rtn_path)) {
 459			trap->data.ntc_256.dr_trunc_hop |=
 460				IB_NOTICE_TRAP_DR_TRUNC;
 461			hop_cnt = ARRAY_SIZE(trap->data.ntc_256.dr_rtn_path);
 462		}
 463		trap->data.ntc_256.dr_trunc_hop |= hop_cnt;
 464		memcpy(trap->data.ntc_256.dr_rtn_path, return_path,
 465		       hop_cnt);
 466	}
 467
 468	trap->len = sizeof(trap->data);
 469
 470	send_trap(ibp, trap);
 471}
 472
 473/*
 474 * Send a Port Capability Mask Changed trap (ch. 14.3.11).
 475 */
 476void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u32 port_num)
 477{
 478	struct trap_node *trap;
 479	struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
 480	struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
 481	struct hfi1_ibport *ibp = &dd->pport[port_num - 1].ibport_data;
 482	u32 lid = ppd_from_ibp(ibp)->lid;
 483
 484	trap = create_trap_node(IB_NOTICE_TYPE_INFO,
 485				OPA_TRAP_CHANGE_CAPABILITY,
 486				lid);
 487	if (!trap)
 488		return;
 489
 490	trap->data.ntc_144.lid = trap->data.issuer_lid;
 491	trap->data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
 492	trap->data.ntc_144.cap_mask3 = cpu_to_be16(ibp->rvp.port_cap3_flags);
 493
 494	trap->len = sizeof(trap->data);
 495	send_trap(ibp, trap);
 496}
 497
 498/*
 499 * Send a System Image GUID Changed trap (ch. 14.3.12).
 500 */
 501void hfi1_sys_guid_chg(struct hfi1_ibport *ibp)
 502{
 503	struct trap_node *trap;
 504	u32 lid = ppd_from_ibp(ibp)->lid;
 505
 506	trap = create_trap_node(IB_NOTICE_TYPE_INFO, OPA_TRAP_CHANGE_SYSGUID,
 507				lid);
 508	if (!trap)
 509		return;
 510
 511	trap->data.ntc_145.new_sys_guid = ib_hfi1_sys_image_guid;
 512	trap->data.ntc_145.lid = trap->data.issuer_lid;
 513
 514	trap->len = sizeof(trap->data);
 515	send_trap(ibp, trap);
 516}
 517
 518/*
 519 * Send a Node Description Changed trap (ch. 14.3.13).
 520 */
 521void hfi1_node_desc_chg(struct hfi1_ibport *ibp)
 522{
 523	struct trap_node *trap;
 524	u32 lid = ppd_from_ibp(ibp)->lid;
 525
 526	trap = create_trap_node(IB_NOTICE_TYPE_INFO,
 527				OPA_TRAP_CHANGE_CAPABILITY,
 528				lid);
 529	if (!trap)
 530		return;
 531
 532	trap->data.ntc_144.lid = trap->data.issuer_lid;
 533	trap->data.ntc_144.change_flags =
 534		cpu_to_be16(OPA_NOTICE_TRAP_NODE_DESC_CHG);
 535
 536	trap->len = sizeof(trap->data);
 537	send_trap(ibp, trap);
 538}
 539
 540static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am,
 541				   u8 *data, struct ib_device *ibdev,
 542				   u32 port, u32 *resp_len, u32 max_len)
 543{
 544	struct opa_node_description *nd;
 545
 546	if (am || smp_length_check(sizeof(*nd), max_len)) {
 547		smp->status |= IB_SMP_INVALID_FIELD;
 548		return reply((struct ib_mad_hdr *)smp);
 549	}
 550
 551	nd = (struct opa_node_description *)data;
 552
 553	memcpy(nd->data, ibdev->node_desc, sizeof(nd->data));
 554
 555	if (resp_len)
 556		*resp_len += sizeof(*nd);
 557
 558	return reply((struct ib_mad_hdr *)smp);
 559}
 560
 561static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data,
 562				   struct ib_device *ibdev, u32 port,
 563				   u32 *resp_len, u32 max_len)
 564{
 565	struct opa_node_info *ni;
 566	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
 567	u32 pidx = port - 1; /* IB number port from 1, hw from 0 */
 568
 569	ni = (struct opa_node_info *)data;
 570
 571	/* GUID 0 is illegal */
 572	if (am || pidx >= dd->num_pports || ibdev->node_guid == 0 ||
 573	    smp_length_check(sizeof(*ni), max_len) ||
 574	    get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) {
 575		smp->status |= IB_SMP_INVALID_FIELD;
 576		return reply((struct ib_mad_hdr *)smp);
 577	}
 578
 579	ni->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX);
 580	ni->base_version = OPA_MGMT_BASE_VERSION;
 581	ni->class_version = OPA_SM_CLASS_VERSION;
 582	ni->node_type = 1;     /* channel adapter */
 583	ni->num_ports = ibdev->phys_port_cnt;
 584	/* This is already in network order */
 585	ni->system_image_guid = ib_hfi1_sys_image_guid;
 586	ni->node_guid = ibdev->node_guid;
 587	ni->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
 588	ni->device_id = cpu_to_be16(dd->pcidev->device);
 589	ni->revision = cpu_to_be32(dd->minrev);
 590	ni->local_port_num = port;
 591	ni->vendor_id[0] = dd->oui1;
 592	ni->vendor_id[1] = dd->oui2;
 593	ni->vendor_id[2] = dd->oui3;
 594
 595	if (resp_len)
 596		*resp_len += sizeof(*ni);
 597
 598	return reply((struct ib_mad_hdr *)smp);
 599}
 600
 601static int subn_get_nodeinfo(struct ib_smp *smp, struct ib_device *ibdev,
 602			     u32 port)
 603{
 604	struct ib_node_info *nip = (struct ib_node_info *)&smp->data;
 605	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
 606	u32 pidx = port - 1; /* IB number port from 1, hw from 0 */
 607
 608	/* GUID 0 is illegal */
 609	if (smp->attr_mod || pidx >= dd->num_pports ||
 610	    ibdev->node_guid == 0 ||
 611	    get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) {
 612		smp->status |= IB_SMP_INVALID_FIELD;
 613		return reply((struct ib_mad_hdr *)smp);
 614	}
 615
 616	nip->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX);
 617	nip->base_version = OPA_MGMT_BASE_VERSION;
 618	nip->class_version = OPA_SM_CLASS_VERSION;
 619	nip->node_type = 1;     /* channel adapter */
 620	nip->num_ports = ibdev->phys_port_cnt;
 621	/* This is already in network order */
 622	nip->sys_guid = ib_hfi1_sys_image_guid;
 623	nip->node_guid = ibdev->node_guid;
 624	nip->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
 625	nip->device_id = cpu_to_be16(dd->pcidev->device);
 626	nip->revision = cpu_to_be32(dd->minrev);
 627	nip->local_port_num = port;
 628	nip->vendor_id[0] = dd->oui1;
 629	nip->vendor_id[1] = dd->oui2;
 630	nip->vendor_id[2] = dd->oui3;
 631
 632	return reply((struct ib_mad_hdr *)smp);
 633}
 634
 635static void set_link_width_enabled(struct hfi1_pportdata *ppd, u32 w)
 636{
 637	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LWID_ENB, w);
 638}
 639
 640static void set_link_width_downgrade_enabled(struct hfi1_pportdata *ppd, u32 w)
 641{
 642	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LWID_DG_ENB, w);
 643}
 644
 645static void set_link_speed_enabled(struct hfi1_pportdata *ppd, u32 s)
 646{
 647	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_SPD_ENB, s);
 648}
 649
 650static int check_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
 651		      int mad_flags, __be64 mkey, __be32 dr_slid,
 652		      u8 return_path[], u8 hop_cnt)
 653{
 654	int valid_mkey = 0;
 655	int ret = 0;
 656
 657	/* Is the mkey in the process of expiring? */
 658	if (ibp->rvp.mkey_lease_timeout &&
 659	    time_after_eq(jiffies, ibp->rvp.mkey_lease_timeout)) {
 660		/* Clear timeout and mkey protection field. */
 661		ibp->rvp.mkey_lease_timeout = 0;
 662		ibp->rvp.mkeyprot = 0;
 663	}
 664
 665	if ((mad_flags & IB_MAD_IGNORE_MKEY) ||  ibp->rvp.mkey == 0 ||
 666	    ibp->rvp.mkey == mkey)
 667		valid_mkey = 1;
 668
 669	/* Unset lease timeout on any valid Get/Set/TrapRepress */
 670	if (valid_mkey && ibp->rvp.mkey_lease_timeout &&
 671	    (mad->method == IB_MGMT_METHOD_GET ||
 672	     mad->method == IB_MGMT_METHOD_SET ||
 673	     mad->method == IB_MGMT_METHOD_TRAP_REPRESS))
 674		ibp->rvp.mkey_lease_timeout = 0;
 675
 676	if (!valid_mkey) {
 677		switch (mad->method) {
 678		case IB_MGMT_METHOD_GET:
 679			/* Bad mkey not a violation below level 2 */
 680			if (ibp->rvp.mkeyprot < 2)
 681				break;
 682			fallthrough;
 683		case IB_MGMT_METHOD_SET:
 684		case IB_MGMT_METHOD_TRAP_REPRESS:
 685			if (ibp->rvp.mkey_violations != 0xFFFF)
 686				++ibp->rvp.mkey_violations;
 687			if (!ibp->rvp.mkey_lease_timeout &&
 688			    ibp->rvp.mkey_lease_period)
 689				ibp->rvp.mkey_lease_timeout = jiffies +
 690					ibp->rvp.mkey_lease_period * HZ;
 691			/* Generate a trap notice. */
 692			bad_mkey(ibp, mad, mkey, dr_slid, return_path,
 693				 hop_cnt);
 694			ret = 1;
 695		}
 696	}
 697
 698	return ret;
 699}
 700
 701/*
 702 * The SMA caches reads from LCB registers in case the LCB is unavailable.
 703 * (The LCB is unavailable in certain link states, for example.)
 704 */
 705struct lcb_datum {
 706	u32 off;
 707	u64 val;
 708};
 709
 710static struct lcb_datum lcb_cache[] = {
 711	{ DC_LCB_STS_ROUND_TRIP_LTP_CNT, 0 },
 712};
 713
 714static int write_lcb_cache(u32 off, u64 val)
 715{
 716	int i;
 717
 718	for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
 719		if (lcb_cache[i].off == off) {
 720			lcb_cache[i].val = val;
 721			return 0;
 722		}
 723	}
 724
 725	pr_warn("%s bad offset 0x%x\n", __func__, off);
 726	return -1;
 727}
 728
 729static int read_lcb_cache(u32 off, u64 *val)
 730{
 731	int i;
 732
 733	for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
 734		if (lcb_cache[i].off == off) {
 735			*val = lcb_cache[i].val;
 736			return 0;
 737		}
 738	}
 739
 740	pr_warn("%s bad offset 0x%x\n", __func__, off);
 741	return -1;
 742}
 743
 744void read_ltp_rtt(struct hfi1_devdata *dd)
 745{
 746	u64 reg;
 747
 748	if (read_lcb_csr(dd, DC_LCB_STS_ROUND_TRIP_LTP_CNT, &reg))
 749		dd_dev_err(dd, "%s: unable to read LTP RTT\n", __func__);
 750	else
 751		write_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, reg);
 752}
 753
 754static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
 755				   struct ib_device *ibdev, u32 port,
 756				   u32 *resp_len, u32 max_len)
 757{
 758	int i;
 759	struct hfi1_devdata *dd;
 760	struct hfi1_pportdata *ppd;
 761	struct hfi1_ibport *ibp;
 762	struct opa_port_info *pi = (struct opa_port_info *)data;
 763	u8 mtu;
 764	u8 credit_rate;
 765	u8 is_beaconing_active;
 766	u32 state;
 767	u32 num_ports = OPA_AM_NPORT(am);
 768	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
 769	u32 buffer_units;
 770	u64 tmp = 0;
 771
 772	if (num_ports != 1 || smp_length_check(sizeof(*pi), max_len)) {
 773		smp->status |= IB_SMP_INVALID_FIELD;
 774		return reply((struct ib_mad_hdr *)smp);
 775	}
 776
 777	dd = dd_from_ibdev(ibdev);
 778	/* IB numbers ports from 1, hw from 0 */
 779	ppd = dd->pport + (port - 1);
 780	ibp = &ppd->ibport_data;
 781
 782	if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
 783	    ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
 784		smp->status |= IB_SMP_INVALID_FIELD;
 785		return reply((struct ib_mad_hdr *)smp);
 786	}
 787
 788	pi->lid = cpu_to_be32(ppd->lid);
 789
 790	/* Only return the mkey if the protection field allows it. */
 791	if (!(smp->method == IB_MGMT_METHOD_GET &&
 792	      ibp->rvp.mkey != smp->mkey &&
 793	      ibp->rvp.mkeyprot == 1))
 794		pi->mkey = ibp->rvp.mkey;
 795
 796	pi->subnet_prefix = ibp->rvp.gid_prefix;
 797	pi->sm_lid = cpu_to_be32(ibp->rvp.sm_lid);
 798	pi->ib_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
 799	pi->mkey_lease_period = cpu_to_be16(ibp->rvp.mkey_lease_period);
 800	pi->sm_trap_qp = cpu_to_be32(ppd->sm_trap_qp);
 801	pi->sa_qp = cpu_to_be32(ppd->sa_qp);
 802
 803	pi->link_width.enabled = cpu_to_be16(ppd->link_width_enabled);
 804	pi->link_width.supported = cpu_to_be16(ppd->link_width_supported);
 805	pi->link_width.active = cpu_to_be16(ppd->link_width_active);
 806
 807	pi->link_width_downgrade.supported =
 808			cpu_to_be16(ppd->link_width_downgrade_supported);
 809	pi->link_width_downgrade.enabled =
 810			cpu_to_be16(ppd->link_width_downgrade_enabled);
 811	pi->link_width_downgrade.tx_active =
 812			cpu_to_be16(ppd->link_width_downgrade_tx_active);
 813	pi->link_width_downgrade.rx_active =
 814			cpu_to_be16(ppd->link_width_downgrade_rx_active);
 815
 816	pi->link_speed.supported = cpu_to_be16(ppd->link_speed_supported);
 817	pi->link_speed.active = cpu_to_be16(ppd->link_speed_active);
 818	pi->link_speed.enabled = cpu_to_be16(ppd->link_speed_enabled);
 819
 820	state = driver_lstate(ppd);
 821
 822	if (start_of_sm_config && (state == IB_PORT_INIT))
 823		ppd->is_sm_config_started = 1;
 824
 825	pi->port_phys_conf = (ppd->port_type & 0xf);
 826
 827	pi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
 828	pi->port_states.ledenable_offlinereason |=
 829		ppd->is_sm_config_started << 5;
 830	/*
 831	 * This pairs with the memory barrier in hfi1_start_led_override to
 832	 * ensure that we read the correct state of LED beaconing represented
 833	 * by led_override_timer_active
 834	 */
 835	smp_rmb();
 836	is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
 837	pi->port_states.ledenable_offlinereason |= is_beaconing_active << 6;
 838	pi->port_states.ledenable_offlinereason |=
 839		ppd->offline_disabled_reason;
 840
 841	pi->port_states.portphysstate_portstate =
 842		(driver_pstate(ppd) << 4) | state;
 843
 844	pi->mkeyprotect_lmc = (ibp->rvp.mkeyprot << 6) | ppd->lmc;
 845
 846	memset(pi->neigh_mtu.pvlx_to_mtu, 0, sizeof(pi->neigh_mtu.pvlx_to_mtu));
 847	for (i = 0; i < ppd->vls_supported; i++) {
 848		mtu = mtu_to_enum(dd->vld[i].mtu, HFI1_DEFAULT_ACTIVE_MTU);
 849		if ((i % 2) == 0)
 850			pi->neigh_mtu.pvlx_to_mtu[i / 2] |= (mtu << 4);
 851		else
 852			pi->neigh_mtu.pvlx_to_mtu[i / 2] |= mtu;
 853	}
 854	/* don't forget VL 15 */
 855	mtu = mtu_to_enum(dd->vld[15].mtu, 2048);
 856	pi->neigh_mtu.pvlx_to_mtu[15 / 2] |= mtu;
 857	pi->smsl = ibp->rvp.sm_sl & OPA_PI_MASK_SMSL;
 858	pi->operational_vls = hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS);
 859	pi->partenforce_filterraw |=
 860		(ppd->linkinit_reason & OPA_PI_MASK_LINKINIT_REASON);
 861	if (ppd->part_enforce & HFI1_PART_ENFORCE_IN)
 862		pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_IN;
 863	if (ppd->part_enforce & HFI1_PART_ENFORCE_OUT)
 864		pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_OUT;
 865	pi->mkey_violations = cpu_to_be16(ibp->rvp.mkey_violations);
 866	/* P_KeyViolations are counted by hardware. */
 867	pi->pkey_violations = cpu_to_be16(ibp->rvp.pkey_violations);
 868	pi->qkey_violations = cpu_to_be16(ibp->rvp.qkey_violations);
 869
 870	pi->vl.cap = ppd->vls_supported;
 871	pi->vl.high_limit = cpu_to_be16(ibp->rvp.vl_high_limit);
 872	pi->vl.arb_high_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_CAP);
 873	pi->vl.arb_low_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_LOW_CAP);
 874
 875	pi->clientrereg_subnettimeout = ibp->rvp.subnet_timeout;
 876
 877	pi->port_link_mode  = cpu_to_be16(OPA_PORT_LINK_MODE_OPA << 10 |
 878					  OPA_PORT_LINK_MODE_OPA << 5 |
 879					  OPA_PORT_LINK_MODE_OPA);
 880
 881	pi->port_ltp_crc_mode = cpu_to_be16(ppd->port_ltp_crc_mode);
 882
 883	pi->port_mode = cpu_to_be16(
 884				ppd->is_active_optimize_enabled ?
 885					OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE : 0);
 886
 887	pi->port_packet_format.supported =
 888		cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B |
 889			    OPA_PORT_PACKET_FORMAT_16B);
 890	pi->port_packet_format.enabled =
 891		cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B |
 892			    OPA_PORT_PACKET_FORMAT_16B);
 893
 894	/* flit_control.interleave is (OPA V1, version .76):
 895	 * bits		use
 896	 * ----		---
 897	 * 2		res
 898	 * 2		DistanceSupported
 899	 * 2		DistanceEnabled
 900	 * 5		MaxNextLevelTxEnabled
 901	 * 5		MaxNestLevelRxSupported
 902	 *
 903	 * HFI supports only "distance mode 1" (see OPA V1, version .76,
 904	 * section 9.6.2), so set DistanceSupported, DistanceEnabled
 905	 * to 0x1.
 906	 */
 907	pi->flit_control.interleave = cpu_to_be16(0x1400);
 908
 909	pi->link_down_reason = ppd->local_link_down_reason.sma;
 910	pi->neigh_link_down_reason = ppd->neigh_link_down_reason.sma;
 911	pi->port_error_action = cpu_to_be32(ppd->port_error_action);
 912	pi->mtucap = mtu_to_enum(hfi1_max_mtu, IB_MTU_4096);
 913
 914	/* 32.768 usec. response time (guessing) */
 915	pi->resptimevalue = 3;
 916
 917	pi->local_port_num = port;
 918
 919	/* buffer info for FM */
 920	pi->overall_buffer_space = cpu_to_be16(dd->link_credits);
 921
 922	pi->neigh_node_guid = cpu_to_be64(ppd->neighbor_guid);
 923	pi->neigh_port_num = ppd->neighbor_port_number;
 924	pi->port_neigh_mode =
 925		(ppd->neighbor_type & OPA_PI_MASK_NEIGH_NODE_TYPE) |
 926		(ppd->mgmt_allowed ? OPA_PI_MASK_NEIGH_MGMT_ALLOWED : 0) |
 927		(ppd->neighbor_fm_security ?
 928			OPA_PI_MASK_NEIGH_FW_AUTH_BYPASS : 0);
 929
 930	/* HFIs shall always return VL15 credits to their
 931	 * neighbor in a timely manner, without any credit return pacing.
 932	 */
 933	credit_rate = 0;
 934	buffer_units  = (dd->vau) & OPA_PI_MASK_BUF_UNIT_BUF_ALLOC;
 935	buffer_units |= (dd->vcu << 3) & OPA_PI_MASK_BUF_UNIT_CREDIT_ACK;
 936	buffer_units |= (credit_rate << 6) &
 937				OPA_PI_MASK_BUF_UNIT_VL15_CREDIT_RATE;
 938	buffer_units |= (dd->vl15_init << 11) & OPA_PI_MASK_BUF_UNIT_VL15_INIT;
 939	pi->buffer_units = cpu_to_be32(buffer_units);
 940
 941	pi->opa_cap_mask = cpu_to_be16(ibp->rvp.port_cap3_flags);
 942	pi->collectivemask_multicastmask = ((OPA_COLLECTIVE_NR & 0x7)
 943					    << 3 | (OPA_MCAST_NR & 0x7));
 944
 945	/* HFI supports a replay buffer 128 LTPs in size */
 946	pi->replay_depth.buffer = 0x80;
 947	/* read the cached value of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
 948	read_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, &tmp);
 949
 950	/*
 951	 * this counter is 16 bits wide, but the replay_depth.wire
 952	 * variable is only 8 bits
 953	 */
 954	if (tmp > 0xff)
 955		tmp = 0xff;
 956	pi->replay_depth.wire = tmp;
 957
 958	if (resp_len)
 959		*resp_len += sizeof(struct opa_port_info);
 960
 961	return reply((struct ib_mad_hdr *)smp);
 962}
 963
 964/**
 965 * get_pkeys - return the PKEY table
 966 * @dd: the hfi1_ib device
 967 * @port: the IB port number
 968 * @pkeys: the pkey table is placed here
 969 */
 970static int get_pkeys(struct hfi1_devdata *dd, u32 port, u16 *pkeys)
 971{
 972	struct hfi1_pportdata *ppd = dd->pport + port - 1;
 973
 974	memcpy(pkeys, ppd->pkeys, sizeof(ppd->pkeys));
 975
 976	return 0;
 977}
 978
 979static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
 980				    struct ib_device *ibdev, u32 port,
 981				    u32 *resp_len, u32 max_len)
 982{
 983	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
 984	u32 n_blocks_req = OPA_AM_NBLK(am);
 985	u32 start_block = am & 0x7ff;
 986	__be16 *p;
 987	u16 *q;
 988	int i;
 989	u16 n_blocks_avail;
 990	unsigned npkeys = hfi1_get_npkeys(dd);
 991	size_t size;
 992
 993	if (n_blocks_req == 0) {
 994		pr_warn("OPA Get PKey AM Invalid : P = %d; B = 0x%x; N = 0x%x\n",
 995			port, start_block, n_blocks_req);
 996		smp->status |= IB_SMP_INVALID_FIELD;
 997		return reply((struct ib_mad_hdr *)smp);
 998	}
 999
1000	n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
1001
1002	size = (n_blocks_req * OPA_PARTITION_TABLE_BLK_SIZE) * sizeof(u16);
1003
1004	if (smp_length_check(size, max_len)) {
1005		smp->status |= IB_SMP_INVALID_FIELD;
1006		return reply((struct ib_mad_hdr *)smp);
1007	}
1008
1009	if (start_block + n_blocks_req > n_blocks_avail ||
1010	    n_blocks_req > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
1011		pr_warn("OPA Get PKey AM Invalid : s 0x%x; req 0x%x; "
1012			"avail 0x%x; blk/smp 0x%lx\n",
1013			start_block, n_blocks_req, n_blocks_avail,
1014			OPA_NUM_PKEY_BLOCKS_PER_SMP);
1015		smp->status |= IB_SMP_INVALID_FIELD;
1016		return reply((struct ib_mad_hdr *)smp);
1017	}
1018
1019	p = (__be16 *)data;
1020	q = (u16 *)data;
1021	/* get the real pkeys if we are requesting the first block */
1022	if (start_block == 0) {
1023		get_pkeys(dd, port, q);
1024		for (i = 0; i < npkeys; i++)
1025			p[i] = cpu_to_be16(q[i]);
1026		if (resp_len)
1027			*resp_len += size;
1028	} else {
1029		smp->status |= IB_SMP_INVALID_FIELD;
1030	}
1031	return reply((struct ib_mad_hdr *)smp);
1032}
1033
1034enum {
1035	HFI_TRANSITION_DISALLOWED,
1036	HFI_TRANSITION_IGNORED,
1037	HFI_TRANSITION_ALLOWED,
1038	HFI_TRANSITION_UNDEFINED,
1039};
1040
1041/*
1042 * Use shortened names to improve readability of
1043 * {logical,physical}_state_transitions
1044 */
1045enum {
1046	__D = HFI_TRANSITION_DISALLOWED,
1047	__I = HFI_TRANSITION_IGNORED,
1048	__A = HFI_TRANSITION_ALLOWED,
1049	__U = HFI_TRANSITION_UNDEFINED,
1050};
1051
1052/*
1053 * IB_PORTPHYSSTATE_POLLING (2) through OPA_PORTPHYSSTATE_MAX (11) are
1054 * represented in physical_state_transitions.
1055 */
1056#define __N_PHYSTATES (OPA_PORTPHYSSTATE_MAX - IB_PORTPHYSSTATE_POLLING + 1)
1057
1058/*
1059 * Within physical_state_transitions, rows represent "old" states,
1060 * columns "new" states, and physical_state_transitions.allowed[old][new]
1061 * indicates if the transition from old state to new state is legal (see
1062 * OPAg1v1, Table 6-4).
1063 */
1064static const struct {
1065	u8 allowed[__N_PHYSTATES][__N_PHYSTATES];
1066} physical_state_transitions = {
1067	{
1068		/* 2    3    4    5    6    7    8    9   10   11 */
1069	/* 2 */	{ __A, __A, __D, __D, __D, __D, __D, __D, __D, __D },
1070	/* 3 */	{ __A, __I, __D, __D, __D, __D, __D, __D, __D, __A },
1071	/* 4 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
1072	/* 5 */	{ __A, __A, __D, __I, __D, __D, __D, __D, __D, __D },
1073	/* 6 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
1074	/* 7 */	{ __D, __A, __D, __D, __D, __I, __D, __D, __D, __D },
1075	/* 8 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
1076	/* 9 */	{ __I, __A, __D, __D, __D, __D, __D, __I, __D, __D },
1077	/*10 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
1078	/*11 */	{ __D, __A, __D, __D, __D, __D, __D, __D, __D, __I },
1079	}
1080};
1081
1082/*
1083 * IB_PORT_DOWN (1) through IB_PORT_ACTIVE_DEFER (5) are represented
1084 * logical_state_transitions
1085 */
1086
1087#define __N_LOGICAL_STATES (IB_PORT_ACTIVE_DEFER - IB_PORT_DOWN + 1)
1088
1089/*
1090 * Within logical_state_transitions rows represent "old" states,
1091 * columns "new" states, and logical_state_transitions.allowed[old][new]
1092 * indicates if the transition from old state to new state is legal (see
1093 * OPAg1v1, Table 9-12).
1094 */
1095static const struct {
1096	u8 allowed[__N_LOGICAL_STATES][__N_LOGICAL_STATES];
1097} logical_state_transitions = {
1098	{
1099		/* 1    2    3    4    5 */
1100	/* 1 */	{ __I, __D, __D, __D, __U},
1101	/* 2 */	{ __D, __I, __A, __D, __U},
1102	/* 3 */	{ __D, __D, __I, __A, __U},
1103	/* 4 */	{ __D, __D, __I, __I, __U},
1104	/* 5 */	{ __U, __U, __U, __U, __U},
1105	}
1106};
1107
1108static int logical_transition_allowed(int old, int new)
1109{
1110	if (old < IB_PORT_NOP || old > IB_PORT_ACTIVE_DEFER ||
1111	    new < IB_PORT_NOP || new > IB_PORT_ACTIVE_DEFER) {
1112		pr_warn("invalid logical state(s) (old %d new %d)\n",
1113			old, new);
1114		return HFI_TRANSITION_UNDEFINED;
1115	}
1116
1117	if (new == IB_PORT_NOP)
1118		return HFI_TRANSITION_ALLOWED; /* always allowed */
1119
1120	/* adjust states for indexing into logical_state_transitions */
1121	old -= IB_PORT_DOWN;
1122	new -= IB_PORT_DOWN;
1123
1124	if (old < 0 || new < 0)
1125		return HFI_TRANSITION_UNDEFINED;
1126	return logical_state_transitions.allowed[old][new];
1127}
1128
1129static int physical_transition_allowed(int old, int new)
1130{
1131	if (old < IB_PORTPHYSSTATE_NOP || old > OPA_PORTPHYSSTATE_MAX ||
1132	    new < IB_PORTPHYSSTATE_NOP || new > OPA_PORTPHYSSTATE_MAX) {
1133		pr_warn("invalid physical state(s) (old %d new %d)\n",
1134			old, new);
1135		return HFI_TRANSITION_UNDEFINED;
1136	}
1137
1138	if (new == IB_PORTPHYSSTATE_NOP)
1139		return HFI_TRANSITION_ALLOWED; /* always allowed */
1140
1141	/* adjust states for indexing into physical_state_transitions */
1142	old -= IB_PORTPHYSSTATE_POLLING;
1143	new -= IB_PORTPHYSSTATE_POLLING;
1144
1145	if (old < 0 || new < 0)
1146		return HFI_TRANSITION_UNDEFINED;
1147	return physical_state_transitions.allowed[old][new];
1148}
1149
1150static int port_states_transition_allowed(struct hfi1_pportdata *ppd,
1151					  u32 logical_new, u32 physical_new)
1152{
1153	u32 physical_old = driver_pstate(ppd);
1154	u32 logical_old = driver_lstate(ppd);
1155	int ret, logical_allowed, physical_allowed;
1156
1157	ret = logical_transition_allowed(logical_old, logical_new);
1158	logical_allowed = ret;
1159
1160	if (ret == HFI_TRANSITION_DISALLOWED ||
1161	    ret == HFI_TRANSITION_UNDEFINED) {
1162		pr_warn("invalid logical state transition %s -> %s\n",
1163			opa_lstate_name(logical_old),
1164			opa_lstate_name(logical_new));
1165		return ret;
1166	}
1167
1168	ret = physical_transition_allowed(physical_old, physical_new);
1169	physical_allowed = ret;
1170
1171	if (ret == HFI_TRANSITION_DISALLOWED ||
1172	    ret == HFI_TRANSITION_UNDEFINED) {
1173		pr_warn("invalid physical state transition %s -> %s\n",
1174			opa_pstate_name(physical_old),
1175			opa_pstate_name(physical_new));
1176		return ret;
1177	}
1178
1179	if (logical_allowed == HFI_TRANSITION_IGNORED &&
1180	    physical_allowed == HFI_TRANSITION_IGNORED)
1181		return HFI_TRANSITION_IGNORED;
1182
1183	/*
1184	 * A change request of Physical Port State from
1185	 * 'Offline' to 'Polling' should be ignored.
1186	 */
1187	if ((physical_old == OPA_PORTPHYSSTATE_OFFLINE) &&
1188	    (physical_new == IB_PORTPHYSSTATE_POLLING))
1189		return HFI_TRANSITION_IGNORED;
1190
1191	/*
1192	 * Either physical_allowed or logical_allowed is
1193	 * HFI_TRANSITION_ALLOWED.
1194	 */
1195	return HFI_TRANSITION_ALLOWED;
1196}
1197
1198static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
1199			   u32 logical_state, u32 phys_state, int local_mad)
1200{
1201	struct hfi1_devdata *dd = ppd->dd;
1202	u32 link_state;
1203	int ret;
1204
1205	ret = port_states_transition_allowed(ppd, logical_state, phys_state);
1206	if (ret == HFI_TRANSITION_DISALLOWED ||
1207	    ret == HFI_TRANSITION_UNDEFINED) {
1208		/* error message emitted above */
1209		smp->status |= IB_SMP_INVALID_FIELD;
1210		return 0;
1211	}
1212
1213	if (ret == HFI_TRANSITION_IGNORED)
1214		return 0;
1215
1216	if ((phys_state != IB_PORTPHYSSTATE_NOP) &&
1217	    !(logical_state == IB_PORT_DOWN ||
1218	      logical_state == IB_PORT_NOP)){
1219		pr_warn("SubnSet(OPA_PortInfo) port state invalid: logical_state 0x%x physical_state 0x%x\n",
1220			logical_state, phys_state);
1221		smp->status |= IB_SMP_INVALID_FIELD;
1222	}
1223
1224	/*
1225	 * Logical state changes are summarized in OPAv1g1 spec.,
1226	 * Table 9-12; physical state changes are summarized in
1227	 * OPAv1g1 spec., Table 6.4.
1228	 */
1229	switch (logical_state) {
1230	case IB_PORT_NOP:
1231		if (phys_state == IB_PORTPHYSSTATE_NOP)
1232			break;
1233		fallthrough;
1234	case IB_PORT_DOWN:
1235		if (phys_state == IB_PORTPHYSSTATE_NOP) {
1236			link_state = HLS_DN_DOWNDEF;
1237		} else if (phys_state == IB_PORTPHYSSTATE_POLLING) {
1238			link_state = HLS_DN_POLL;
1239			set_link_down_reason(ppd, OPA_LINKDOWN_REASON_FM_BOUNCE,
1240					     0, OPA_LINKDOWN_REASON_FM_BOUNCE);
1241		} else if (phys_state == IB_PORTPHYSSTATE_DISABLED) {
1242			link_state = HLS_DN_DISABLE;
1243		} else {
1244			pr_warn("SubnSet(OPA_PortInfo) invalid physical state 0x%x\n",
1245				phys_state);
1246			smp->status |= IB_SMP_INVALID_FIELD;
1247			break;
1248		}
1249
1250		if ((link_state == HLS_DN_POLL ||
1251		     link_state == HLS_DN_DOWNDEF)) {
1252			/*
1253			 * Going to poll.  No matter what the current state,
1254			 * always move offline first, then tune and start the
1255			 * link.  This correctly handles a FM link bounce and
1256			 * a link enable.  Going offline is a no-op if already
1257			 * offline.
1258			 */
1259			set_link_state(ppd, HLS_DN_OFFLINE);
1260			start_link(ppd);
1261		} else {
1262			set_link_state(ppd, link_state);
1263		}
1264		if (link_state == HLS_DN_DISABLE &&
1265		    (ppd->offline_disabled_reason >
1266		     HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED) ||
1267		     ppd->offline_disabled_reason ==
1268		     HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE)))
1269			ppd->offline_disabled_reason =
1270			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED);
1271		/*
1272		 * Don't send a reply if the response would be sent
1273		 * through the disabled port.
1274		 */
1275		if (link_state == HLS_DN_DISABLE && !local_mad)
1276			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
1277		break;
1278	case IB_PORT_ARMED:
1279		ret = set_link_state(ppd, HLS_UP_ARMED);
1280		if (!ret)
1281			send_idle_sma(dd, SMA_IDLE_ARM);
1282		break;
1283	case IB_PORT_ACTIVE:
1284		if (ppd->neighbor_normal) {
1285			ret = set_link_state(ppd, HLS_UP_ACTIVE);
1286			if (ret == 0)
1287				send_idle_sma(dd, SMA_IDLE_ACTIVE);
1288		} else {
1289			pr_warn("SubnSet(OPA_PortInfo) Cannot move to Active with NeighborNormal 0\n");
1290			smp->status |= IB_SMP_INVALID_FIELD;
1291		}
1292		break;
1293	default:
1294		pr_warn("SubnSet(OPA_PortInfo) invalid logical state 0x%x\n",
1295			logical_state);
1296		smp->status |= IB_SMP_INVALID_FIELD;
1297	}
1298
1299	return 0;
1300}
1301
1302/*
1303 * subn_set_opa_portinfo - set port information
1304 * @smp: the incoming SM packet
1305 * @ibdev: the infiniband device
1306 * @port: the port on the device
1307 *
1308 */
1309static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
1310				   struct ib_device *ibdev, u32 port,
1311				   u32 *resp_len, u32 max_len, int local_mad)
1312{
1313	struct opa_port_info *pi = (struct opa_port_info *)data;
1314	struct ib_event event;
1315	struct hfi1_devdata *dd;
1316	struct hfi1_pportdata *ppd;
1317	struct hfi1_ibport *ibp;
1318	u8 clientrereg;
1319	unsigned long flags;
1320	u32 smlid;
1321	u32 lid;
1322	u8 ls_old, ls_new, ps_new;
1323	u8 vls;
1324	u8 msl;
1325	u8 crc_enabled;
1326	u16 lse, lwe, mtu;
1327	u32 num_ports = OPA_AM_NPORT(am);
1328	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
1329	int ret, i, invalid = 0, call_set_mtu = 0;
1330	int call_link_downgrade_policy = 0;
1331
1332	if (num_ports != 1 ||
1333	    smp_length_check(sizeof(*pi), max_len)) {
1334		smp->status |= IB_SMP_INVALID_FIELD;
1335		return reply((struct ib_mad_hdr *)smp);
1336	}
1337
1338	lid = be32_to_cpu(pi->lid);
1339	if (lid & 0xFF000000) {
1340		pr_warn("OPA_PortInfo lid out of range: %X\n", lid);
1341		smp->status |= IB_SMP_INVALID_FIELD;
1342		goto get_only;
1343	}
1344
1345
1346	smlid = be32_to_cpu(pi->sm_lid);
1347	if (smlid & 0xFF000000) {
1348		pr_warn("OPA_PortInfo SM lid out of range: %X\n", smlid);
1349		smp->status |= IB_SMP_INVALID_FIELD;
1350		goto get_only;
1351	}
1352
1353	clientrereg = (pi->clientrereg_subnettimeout &
1354			OPA_PI_MASK_CLIENT_REREGISTER);
1355
1356	dd = dd_from_ibdev(ibdev);
1357	/* IB numbers ports from 1, hw from 0 */
1358	ppd = dd->pport + (port - 1);
1359	ibp = &ppd->ibport_data;
1360	event.device = ibdev;
1361	event.element.port_num = port;
1362
1363	ls_old = driver_lstate(ppd);
1364
1365	ibp->rvp.mkey = pi->mkey;
1366	if (ibp->rvp.gid_prefix != pi->subnet_prefix) {
1367		ibp->rvp.gid_prefix = pi->subnet_prefix;
1368		event.event = IB_EVENT_GID_CHANGE;
1369		ib_dispatch_event(&event);
1370	}
1371	ibp->rvp.mkey_lease_period = be16_to_cpu(pi->mkey_lease_period);
1372
1373	/* Must be a valid unicast LID address. */
1374	if ((lid == 0 && ls_old > IB_PORT_INIT) ||
1375	     (hfi1_is_16B_mcast(lid))) {
1376		smp->status |= IB_SMP_INVALID_FIELD;
1377		pr_warn("SubnSet(OPA_PortInfo) lid invalid 0x%x\n",
1378			lid);
1379	} else if (ppd->lid != lid ||
1380		 ppd->lmc != (pi->mkeyprotect_lmc & OPA_PI_MASK_LMC)) {
1381		if (ppd->lid != lid)
1382			hfi1_set_uevent_bits(ppd, _HFI1_EVENT_LID_CHANGE_BIT);
1383		if (ppd->lmc != (pi->mkeyprotect_lmc & OPA_PI_MASK_LMC))
1384			hfi1_set_uevent_bits(ppd, _HFI1_EVENT_LMC_CHANGE_BIT);
1385		hfi1_set_lid(ppd, lid, pi->mkeyprotect_lmc & OPA_PI_MASK_LMC);
1386		event.event = IB_EVENT_LID_CHANGE;
1387		ib_dispatch_event(&event);
1388
1389		if (HFI1_PORT_GUID_INDEX + 1 < HFI1_GUIDS_PER_PORT) {
1390			/* Manufacture GID from LID to support extended
1391			 * addresses
1392			 */
1393			ppd->guids[HFI1_PORT_GUID_INDEX + 1] =
1394				be64_to_cpu(OPA_MAKE_ID(lid));
1395			event.event = IB_EVENT_GID_CHANGE;
1396			ib_dispatch_event(&event);
1397		}
1398	}
1399
1400	msl = pi->smsl & OPA_PI_MASK_SMSL;
1401	if (pi->partenforce_filterraw & OPA_PI_MASK_LINKINIT_REASON)
1402		ppd->linkinit_reason =
1403			(pi->partenforce_filterraw &
1404			 OPA_PI_MASK_LINKINIT_REASON);
1405
1406	/* Must be a valid unicast LID address. */
1407	if ((smlid == 0 && ls_old > IB_PORT_INIT) ||
1408	     (hfi1_is_16B_mcast(smlid))) {
1409		smp->status |= IB_SMP_INVALID_FIELD;
1410		pr_warn("SubnSet(OPA_PortInfo) smlid invalid 0x%x\n", smlid);
1411	} else if (smlid != ibp->rvp.sm_lid || msl != ibp->rvp.sm_sl) {
1412		pr_warn("SubnSet(OPA_PortInfo) smlid 0x%x\n", smlid);
1413		spin_lock_irqsave(&ibp->rvp.lock, flags);
1414		if (ibp->rvp.sm_ah) {
1415			if (smlid != ibp->rvp.sm_lid)
1416				hfi1_modify_qp0_ah(ibp, ibp->rvp.sm_ah, smlid);
1417			if (msl != ibp->rvp.sm_sl)
1418				rdma_ah_set_sl(&ibp->rvp.sm_ah->attr, msl);
1419		}
1420		spin_unlock_irqrestore(&ibp->rvp.lock, flags);
1421		if (smlid != ibp->rvp.sm_lid)
1422			ibp->rvp.sm_lid = smlid;
1423		if (msl != ibp->rvp.sm_sl)
1424			ibp->rvp.sm_sl = msl;
1425		event.event = IB_EVENT_SM_CHANGE;
1426		ib_dispatch_event(&event);
1427	}
1428
1429	if (pi->link_down_reason == 0) {
1430		ppd->local_link_down_reason.sma = 0;
1431		ppd->local_link_down_reason.latest = 0;
1432	}
1433
1434	if (pi->neigh_link_down_reason == 0) {
1435		ppd->neigh_link_down_reason.sma = 0;
1436		ppd->neigh_link_down_reason.latest = 0;
1437	}
1438
1439	ppd->sm_trap_qp = be32_to_cpu(pi->sm_trap_qp);
1440	ppd->sa_qp = be32_to_cpu(pi->sa_qp);
1441
1442	ppd->port_error_action = be32_to_cpu(pi->port_error_action);
1443	lwe = be16_to_cpu(pi->link_width.enabled);
1444	if (lwe) {
1445		if (lwe == OPA_LINK_WIDTH_RESET ||
1446		    lwe == OPA_LINK_WIDTH_RESET_OLD)
1447			set_link_width_enabled(ppd, ppd->link_width_supported);
1448		else if ((lwe & ~ppd->link_width_supported) == 0)
1449			set_link_width_enabled(ppd, lwe);
1450		else
1451			smp->status |= IB_SMP_INVALID_FIELD;
1452	}
1453	lwe = be16_to_cpu(pi->link_width_downgrade.enabled);
1454	/* LWD.E is always applied - 0 means "disabled" */
1455	if (lwe == OPA_LINK_WIDTH_RESET ||
1456	    lwe == OPA_LINK_WIDTH_RESET_OLD) {
1457		set_link_width_downgrade_enabled(ppd,
1458						 ppd->
1459						 link_width_downgrade_supported
1460						 );
1461	} else if ((lwe & ~ppd->link_width_downgrade_supported) == 0) {
1462		/* only set and apply if something changed */
1463		if (lwe != ppd->link_width_downgrade_enabled) {
1464			set_link_width_downgrade_enabled(ppd, lwe);
1465			call_link_downgrade_policy = 1;
1466		}
1467	} else {
1468		smp->status |= IB_SMP_INVALID_FIELD;
1469	}
1470	lse = be16_to_cpu(pi->link_speed.enabled);
1471	if (lse) {
1472		if (lse & be16_to_cpu(pi->link_speed.supported))
1473			set_link_speed_enabled(ppd, lse);
1474		else
1475			smp->status |= IB_SMP_INVALID_FIELD;
1476	}
1477
1478	ibp->rvp.mkeyprot =
1479		(pi->mkeyprotect_lmc & OPA_PI_MASK_MKEY_PROT_BIT) >> 6;
1480	ibp->rvp.vl_high_limit = be16_to_cpu(pi->vl.high_limit) & 0xFF;
1481	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_LIMIT,
1482				    ibp->rvp.vl_high_limit);
1483
1484	if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
1485	    ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
1486		smp->status |= IB_SMP_INVALID_FIELD;
1487		return reply((struct ib_mad_hdr *)smp);
1488	}
1489	for (i = 0; i < ppd->vls_supported; i++) {
1490		if ((i % 2) == 0)
1491			mtu = enum_to_mtu((pi->neigh_mtu.pvlx_to_mtu[i / 2] >>
1492					   4) & 0xF);
1493		else
1494			mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[i / 2] &
1495					  0xF);
1496		if (mtu == 0xffff) {
1497			pr_warn("SubnSet(OPA_PortInfo) mtu invalid %d (0x%x)\n",
1498				mtu,
1499				(pi->neigh_mtu.pvlx_to_mtu[0] >> 4) & 0xF);
1500			smp->status |= IB_SMP_INVALID_FIELD;
1501			mtu = hfi1_max_mtu; /* use a valid MTU */
1502		}
1503		if (dd->vld[i].mtu != mtu) {
1504			dd_dev_info(dd,
1505				    "MTU change on vl %d from %d to %d\n",
1506				    i, dd->vld[i].mtu, mtu);
1507			dd->vld[i].mtu = mtu;
1508			call_set_mtu++;
1509		}
1510	}
1511	/* As per OPAV1 spec: VL15 must support and be configured
1512	 * for operation with a 2048 or larger MTU.
1513	 */
1514	mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[15 / 2] & 0xF);
1515	if (mtu < 2048 || mtu == 0xffff)
1516		mtu = 2048;
1517	if (dd->vld[15].mtu != mtu) {
1518		dd_dev_info(dd,
1519			    "MTU change on vl 15 from %d to %d\n",
1520			    dd->vld[15].mtu, mtu);
1521		dd->vld[15].mtu = mtu;
1522		call_set_mtu++;
1523	}
1524	if (call_set_mtu)
1525		set_mtu(ppd);
1526
1527	/* Set operational VLs */
1528	vls = pi->operational_vls & OPA_PI_MASK_OPERATIONAL_VL;
1529	if (vls) {
1530		if (vls > ppd->vls_supported) {
1531			pr_warn("SubnSet(OPA_PortInfo) VL's supported invalid %d\n",
1532				pi->operational_vls);
1533			smp->status |= IB_SMP_INVALID_FIELD;
1534		} else {
1535			if (hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS,
1536					    vls) == -EINVAL)
1537				smp->status |= IB_SMP_INVALID_FIELD;
1538		}
1539	}
1540
1541	if (pi->mkey_violations == 0)
1542		ibp->rvp.mkey_violations = 0;
1543
1544	if (pi->pkey_violations == 0)
1545		ibp->rvp.pkey_violations = 0;
1546
1547	if (pi->qkey_violations == 0)
1548		ibp->rvp.qkey_violations = 0;
1549
1550	ibp->rvp.subnet_timeout =
1551		pi->clientrereg_subnettimeout & OPA_PI_MASK_SUBNET_TIMEOUT;
1552
1553	crc_enabled = be16_to_cpu(pi->port_ltp_crc_mode);
1554	crc_enabled >>= 4;
1555	crc_enabled &= 0xf;
1556
1557	if (crc_enabled != 0)
1558		ppd->port_crc_mode_enabled = port_ltp_to_cap(crc_enabled);
1559
1560	ppd->is_active_optimize_enabled =
1561			!!(be16_to_cpu(pi->port_mode)
1562					& OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE);
1563
1564	ls_new = pi->port_states.portphysstate_portstate &
1565			OPA_PI_MASK_PORT_STATE;
1566	ps_new = (pi->port_states.portphysstate_portstate &
1567			OPA_PI_MASK_PORT_PHYSICAL_STATE) >> 4;
1568
1569	if (ls_old == IB_PORT_INIT) {
1570		if (start_of_sm_config) {
1571			if (ls_new == ls_old || (ls_new == IB_PORT_ARMED))
1572				ppd->is_sm_config_started = 1;
1573		} else if (ls_new == IB_PORT_ARMED) {
1574			if (ppd->is_sm_config_started == 0) {
1575				invalid = 1;
1576				smp->status |= IB_SMP_INVALID_FIELD;
1577			}
1578		}
1579	}
1580
1581	/* Handle CLIENT_REREGISTER event b/c SM asked us for it */
1582	if (clientrereg) {
1583		event.event = IB_EVENT_CLIENT_REREGISTER;
1584		ib_dispatch_event(&event);
1585	}
1586
1587	/*
1588	 * Do the port state change now that the other link parameters
1589	 * have been set.
1590	 * Changing the port physical state only makes sense if the link
1591	 * is down or is being set to down.
1592	 */
1593
1594	if (!invalid) {
1595		ret = set_port_states(ppd, smp, ls_new, ps_new, local_mad);
1596		if (ret)
1597			return ret;
1598	}
1599
1600	ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len,
1601				      max_len);
1602
1603	/* restore re-reg bit per o14-12.2.1 */
1604	pi->clientrereg_subnettimeout |= clientrereg;
1605
1606	/*
1607	 * Apply the new link downgrade policy.  This may result in a link
1608	 * bounce.  Do this after everything else so things are settled.
1609	 * Possible problem: if setting the port state above fails, then
1610	 * the policy change is not applied.
1611	 */
1612	if (call_link_downgrade_policy)
1613		apply_link_downgrade_policy(ppd, 0);
1614
1615	return ret;
1616
1617get_only:
1618	return __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len,
1619				       max_len);
1620}
1621
1622/**
1623 * set_pkeys - set the PKEY table for ctxt 0
1624 * @dd: the hfi1_ib device
1625 * @port: the IB port number
1626 * @pkeys: the PKEY table
1627 */
1628static int set_pkeys(struct hfi1_devdata *dd, u32 port, u16 *pkeys)
1629{
1630	struct hfi1_pportdata *ppd;
1631	int i;
1632	int changed = 0;
1633	int update_includes_mgmt_partition = 0;
1634
1635	/*
1636	 * IB port one/two always maps to context zero/one,
1637	 * always a kernel context, no locking needed
1638	 * If we get here with ppd setup, no need to check
1639	 * that rcd is valid.
1640	 */
1641	ppd = dd->pport + (port - 1);
1642	/*
1643	 * If the update does not include the management pkey, don't do it.
1644	 */
1645	for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
1646		if (pkeys[i] == LIM_MGMT_P_KEY) {
1647			update_includes_mgmt_partition = 1;
1648			break;
1649		}
1650	}
1651
1652	if (!update_includes_mgmt_partition)
1653		return 1;
1654
1655	for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
1656		u16 key = pkeys[i];
1657		u16 okey = ppd->pkeys[i];
1658
1659		if (key == okey)
1660			continue;
1661		/*
1662		 * The SM gives us the complete PKey table. We have
1663		 * to ensure that we put the PKeys in the matching
1664		 * slots.
1665		 */
1666		ppd->pkeys[i] = key;
1667		changed = 1;
1668	}
1669
1670	if (changed) {
1671		(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
1672		hfi1_event_pkey_change(dd, port);
1673	}
1674
1675	return 0;
1676}
1677
1678static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
1679				    struct ib_device *ibdev, u32 port,
1680				    u32 *resp_len, u32 max_len)
1681{
1682	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1683	u32 n_blocks_sent = OPA_AM_NBLK(am);
1684	u32 start_block = am & 0x7ff;
1685	u16 *p = (u16 *)data;
1686	__be16 *q = (__be16 *)data;
1687	int i;
1688	u16 n_blocks_avail;
1689	unsigned npkeys = hfi1_get_npkeys(dd);
1690	u32 size = 0;
1691
1692	if (n_blocks_sent == 0) {
1693		pr_warn("OPA Get PKey AM Invalid : P = %u; B = 0x%x; N = 0x%x\n",
1694			port, start_block, n_blocks_sent);
1695		smp->status |= IB_SMP_INVALID_FIELD;
1696		return reply((struct ib_mad_hdr *)smp);
1697	}
1698
1699	n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
1700
1701	size = sizeof(u16) * (n_blocks_sent * OPA_PARTITION_TABLE_BLK_SIZE);
1702
1703	if (smp_length_check(size, max_len)) {
1704		smp->status |= IB_SMP_INVALID_FIELD;
1705		return reply((struct ib_mad_hdr *)smp);
1706	}
1707
1708	if (start_block + n_blocks_sent > n_blocks_avail ||
1709	    n_blocks_sent > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
1710		pr_warn("OPA Set PKey AM Invalid : s 0x%x; req 0x%x; avail 0x%x; blk/smp 0x%lx\n",
1711			start_block, n_blocks_sent, n_blocks_avail,
1712			OPA_NUM_PKEY_BLOCKS_PER_SMP);
1713		smp->status |= IB_SMP_INVALID_FIELD;
1714		return reply((struct ib_mad_hdr *)smp);
1715	}
1716
1717	for (i = 0; i < n_blocks_sent * OPA_PARTITION_TABLE_BLK_SIZE; i++)
1718		p[i] = be16_to_cpu(q[i]);
1719
1720	if (start_block == 0 && set_pkeys(dd, port, p) != 0) {
1721		smp->status |= IB_SMP_INVALID_FIELD;
1722		return reply((struct ib_mad_hdr *)smp);
1723	}
1724
1725	return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len,
1726					max_len);
1727}
1728
1729#define ILLEGAL_VL 12
1730/*
1731 * filter_sc2vlt changes mappings to VL15 to ILLEGAL_VL (except
1732 * for SC15, which must map to VL15). If we don't remap things this
1733 * way it is possible for VL15 counters to increment when we try to
1734 * send on a SC which is mapped to an invalid VL.
1735 * When getting the table convert ILLEGAL_VL back to VL15.
1736 */
1737static void filter_sc2vlt(void *data, bool set)
1738{
1739	int i;
1740	u8 *pd = data;
1741
1742	for (i = 0; i < OPA_MAX_SCS; i++) {
1743		if (i == 15)
1744			continue;
1745
1746		if (set) {
1747			if ((pd[i] & 0x1f) == 0xf)
1748				pd[i] = ILLEGAL_VL;
1749		} else {
1750			if ((pd[i] & 0x1f) == ILLEGAL_VL)
1751				pd[i] = 0xf;
1752		}
1753	}
1754}
1755
1756static int set_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
1757{
1758	u64 *val = data;
1759
1760	filter_sc2vlt(data, true);
1761
1762	write_csr(dd, SEND_SC2VLT0, *val++);
1763	write_csr(dd, SEND_SC2VLT1, *val++);
1764	write_csr(dd, SEND_SC2VLT2, *val++);
1765	write_csr(dd, SEND_SC2VLT3, *val++);
1766	write_seqlock_irq(&dd->sc2vl_lock);
1767	memcpy(dd->sc2vl, data, sizeof(dd->sc2vl));
1768	write_sequnlock_irq(&dd->sc2vl_lock);
1769	return 0;
1770}
1771
1772static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
1773{
1774	u64 *val = (u64 *)data;
1775
1776	*val++ = read_csr(dd, SEND_SC2VLT0);
1777	*val++ = read_csr(dd, SEND_SC2VLT1);
1778	*val++ = read_csr(dd, SEND_SC2VLT2);
1779	*val++ = read_csr(dd, SEND_SC2VLT3);
1780
1781	filter_sc2vlt((u64 *)data, false);
1782	return 0;
1783}
1784
1785static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
1786				   struct ib_device *ibdev, u32 port,
1787				   u32 *resp_len, u32 max_len)
1788{
1789	struct hfi1_ibport *ibp = to_iport(ibdev, port);
1790	u8 *p = data;
1791	size_t size = ARRAY_SIZE(ibp->sl_to_sc); /* == 32 */
1792	unsigned i;
1793
1794	if (am || smp_length_check(size, max_len)) {
1795		smp->status |= IB_SMP_INVALID_FIELD;
1796		return reply((struct ib_mad_hdr *)smp);
1797	}
1798
1799	for (i = 0; i < ARRAY_SIZE(ibp->sl_to_sc); i++)
1800		*p++ = ibp->sl_to_sc[i];
1801
1802	if (resp_len)
1803		*resp_len += size;
1804
1805	return reply((struct ib_mad_hdr *)smp);
1806}
1807
1808static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
1809				   struct ib_device *ibdev, u32 port,
1810				   u32 *resp_len, u32 max_len)
1811{
1812	struct hfi1_ibport *ibp = to_iport(ibdev, port);
1813	u8 *p = data;
1814	size_t size = ARRAY_SIZE(ibp->sl_to_sc);
1815	int i;
1816	u8 sc;
1817
1818	if (am || smp_length_check(size, max_len)) {
1819		smp->status |= IB_SMP_INVALID_FIELD;
1820		return reply((struct ib_mad_hdr *)smp);
1821	}
1822
1823	for (i = 0; i <  ARRAY_SIZE(ibp->sl_to_sc); i++) {
1824		sc = *p++;
1825		if (ibp->sl_to_sc[i] != sc) {
1826			ibp->sl_to_sc[i] = sc;
1827
1828			/* Put all stale qps into error state */
1829			hfi1_error_port_qps(ibp, i);
1830		}
1831	}
1832
1833	return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len,
1834				       max_len);
1835}
1836
1837static int __subn_get_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
1838				   struct ib_device *ibdev, u32 port,
1839				   u32 *resp_len, u32 max_len)
1840{
1841	struct hfi1_ibport *ibp = to_iport(ibdev, port);
1842	u8 *p = data;
1843	size_t size = ARRAY_SIZE(ibp->sc_to_sl); /* == 32 */
1844	unsigned i;
1845
1846	if (am || smp_length_check(size, max_len)) {
1847		smp->status |= IB_SMP_INVALID_FIELD;
1848		return reply((struct ib_mad_hdr *)smp);
1849	}
1850
1851	for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++)
1852		*p++ = ibp->sc_to_sl[i];
1853
1854	if (resp_len)
1855		*resp_len += size;
1856
1857	return reply((struct ib_mad_hdr *)smp);
1858}
1859
1860static int __subn_set_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
1861				   struct ib_device *ibdev, u32 port,
1862				   u32 *resp_len, u32 max_len)
1863{
1864	struct hfi1_ibport *ibp = to_iport(ibdev, port);
1865	size_t size = ARRAY_SIZE(ibp->sc_to_sl);
1866	u8 *p = data;
1867	int i;
1868
1869	if (am || smp_length_check(size, max_len)) {
1870		smp->status |= IB_SMP_INVALID_FIELD;
1871		return reply((struct ib_mad_hdr *)smp);
1872	}
1873
1874	for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++)
1875		ibp->sc_to_sl[i] = *p++;
1876
1877	return __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port, resp_len,
1878				       max_len);
1879}
1880
1881static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
1882				    struct ib_device *ibdev, u32 port,
1883				    u32 *resp_len, u32 max_len)
1884{
1885	u32 n_blocks = OPA_AM_NBLK(am);
1886	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1887	void *vp = (void *)data;
1888	size_t size = 4 * sizeof(u64);
1889
1890	if (n_blocks != 1 || smp_length_check(size, max_len)) {
1891		smp->status |= IB_SMP_INVALID_FIELD;
1892		return reply((struct ib_mad_hdr *)smp);
1893	}
1894
1895	get_sc2vlt_tables(dd, vp);
1896
1897	if (resp_len)
1898		*resp_len += size;
1899
1900	return reply((struct ib_mad_hdr *)smp);
1901}
1902
1903static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
1904				    struct ib_device *ibdev, u32 port,
1905				    u32 *resp_len, u32 max_len)
1906{
1907	u32 n_blocks = OPA_AM_NBLK(am);
1908	int async_update = OPA_AM_ASYNC(am);
1909	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1910	void *vp = (void *)data;
1911	struct hfi1_pportdata *ppd;
1912	int lstate;
1913	/*
1914	 * set_sc2vlt_tables writes the information contained in *data
1915	 * to four 64-bit registers SendSC2VLt[0-3]. We need to make
1916	 * sure *max_len is not greater than the total size of the four
1917	 * SendSC2VLt[0-3] registers.
1918	 */
1919	size_t size = 4 * sizeof(u64);
1920
1921	if (n_blocks != 1 || async_update || smp_length_check(size, max_len)) {
1922		smp->status |= IB_SMP_INVALID_FIELD;
1923		return reply((struct ib_mad_hdr *)smp);
1924	}
1925
1926	/* IB numbers ports from 1, hw from 0 */
1927	ppd = dd->pport + (port - 1);
1928	lstate = driver_lstate(ppd);
1929	/*
1930	 * it's known that async_update is 0 by this point, but include
1931	 * the explicit check for clarity
1932	 */
1933	if (!async_update &&
1934	    (lstate == IB_PORT_ARMED || lstate == IB_PORT_ACTIVE)) {
1935		smp->status |= IB_SMP_INVALID_FIELD;
1936		return reply((struct ib_mad_hdr *)smp);
1937	}
1938
1939	set_sc2vlt_tables(dd, vp);
1940
1941	return __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port, resp_len,
1942					max_len);
1943}
1944
1945static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
1946				     struct ib_device *ibdev, u32 port,
1947				     u32 *resp_len, u32 max_len)
1948{
1949	u32 n_blocks = OPA_AM_NPORT(am);
1950	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1951	struct hfi1_pportdata *ppd;
1952	void *vp = (void *)data;
1953	int size = sizeof(struct sc2vlnt);
1954
1955	if (n_blocks != 1 || smp_length_check(size, max_len)) {
1956		smp->status |= IB_SMP_INVALID_FIELD;
1957		return reply((struct ib_mad_hdr *)smp);
1958	}
1959
1960	ppd = dd->pport + (port - 1);
1961
1962	fm_get_table(ppd, FM_TBL_SC2VLNT, vp);
1963
1964	if (resp_len)
1965		*resp_len += size;
1966
1967	return reply((struct ib_mad_hdr *)smp);
1968}
1969
1970static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
1971				     struct ib_device *ibdev, u32 port,
1972				     u32 *resp_len, u32 max_len)
1973{
1974	u32 n_blocks = OPA_AM_NPORT(am);
1975	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1976	struct hfi1_pportdata *ppd;
1977	void *vp = (void *)data;
1978	int lstate;
1979	int size = sizeof(struct sc2vlnt);
1980
1981	if (n_blocks != 1 || smp_length_check(size, max_len)) {
1982		smp->status |= IB_SMP_INVALID_FIELD;
1983		return reply((struct ib_mad_hdr *)smp);
1984	}
1985
1986	/* IB numbers ports from 1, hw from 0 */
1987	ppd = dd->pport + (port - 1);
1988	lstate = driver_lstate(ppd);
1989	if (lstate == IB_PORT_ARMED || lstate == IB_PORT_ACTIVE) {
1990		smp->status |= IB_SMP_INVALID_FIELD;
1991		return reply((struct ib_mad_hdr *)smp);
1992	}
1993
1994	ppd = dd->pport + (port - 1);
1995
1996	fm_set_table(ppd, FM_TBL_SC2VLNT, vp);
1997
1998	return __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
1999					 resp_len, max_len);
2000}
2001
2002static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
2003			      struct ib_device *ibdev, u32 port,
2004			      u32 *resp_len, u32 max_len)
2005{
2006	u32 nports = OPA_AM_NPORT(am);
2007	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
2008	u32 lstate;
2009	struct hfi1_ibport *ibp;
2010	struct hfi1_pportdata *ppd;
2011	struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
2012
2013	if (nports != 1 || smp_length_check(sizeof(*psi), max_len)) {
2014		smp->status |= IB_SMP_INVALID_FIELD;
2015		return reply((struct ib_mad_hdr *)smp);
2016	}
2017
2018	ibp = to_iport(ibdev, port);
2019	ppd = ppd_from_ibp(ibp);
2020
2021	lstate = driver_lstate(ppd);
2022
2023	if (start_of_sm_config && (lstate == IB_PORT_INIT))
2024		ppd->is_sm_config_started = 1;
2025
2026	psi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
2027	psi->port_states.ledenable_offlinereason |=
2028		ppd->is_sm_config_started << 5;
2029	psi->port_states.ledenable_offlinereason |=
2030		ppd->offline_disabled_reason;
2031
2032	psi->port_states.portphysstate_portstate =
2033		(driver_pstate(ppd) << 4) | (lstate & 0xf);
2034	psi->link_width_downgrade_tx_active =
2035		cpu_to_be16(ppd->link_width_downgrade_tx_active);
2036	psi->link_width_downgrade_rx_active =
2037		cpu_to_be16(ppd->link_width_downgrade_rx_active);
2038	if (resp_len)
2039		*resp_len += sizeof(struct opa_port_state_info);
2040
2041	return reply((struct ib_mad_hdr *)smp);
2042}
2043
2044static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
2045			      struct ib_device *ibdev, u32 port,
2046			      u32 *resp_len, u32 max_len, int local_mad)
2047{
2048	u32 nports = OPA_AM_NPORT(am);
2049	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
2050	u32 ls_old;
2051	u8 ls_new, ps_new;
2052	struct hfi1_ibport *ibp;
2053	struct hfi1_pportdata *ppd;
2054	struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
2055	int ret, invalid = 0;
2056
2057	if (nports != 1 || smp_length_check(sizeof(*psi), max_len)) {
2058		smp->status |= IB_SMP_INVALID_FIELD;
2059		return reply((struct ib_mad_hdr *)smp);
2060	}
2061
2062	ibp = to_iport(ibdev, port);
2063	ppd = ppd_from_ibp(ibp);
2064
2065	ls_old = driver_lstate(ppd);
2066
2067	ls_new = port_states_to_logical_state(&psi->port_states);
2068	ps_new = port_states_to_phys_state(&psi->port_states);
2069
2070	if (ls_old == IB_PORT_INIT) {
2071		if (start_of_sm_config) {
2072			if (ls_new == ls_old || (ls_new == IB_PORT_ARMED))
2073				ppd->is_sm_config_started = 1;
2074		} else if (ls_new == IB_PORT_ARMED) {
2075			if (ppd->is_sm_config_started == 0) {
2076				invalid = 1;
2077				smp->status |= IB_SMP_INVALID_FIELD;
2078			}
2079		}
2080	}
2081
2082	if (!invalid) {
2083		ret = set_port_states(ppd, smp, ls_new, ps_new, local_mad);
2084		if (ret)
2085			return ret;
2086	}
2087
2088	return __subn_get_opa_psi(smp, am, data, ibdev, port, resp_len,
2089				  max_len);
2090}
2091
2092static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data,
2093				     struct ib_device *ibdev, u32 port,
2094				     u32 *resp_len, u32 max_len)
2095{
2096	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2097	u32 addr = OPA_AM_CI_ADDR(am);
2098	u32 len = OPA_AM_CI_LEN(am) + 1;
2099	int ret;
2100
2101	if (dd->pport->port_type != PORT_TYPE_QSFP ||
2102	    smp_length_check(len, max_len)) {
2103		smp->status |= IB_SMP_INVALID_FIELD;
2104		return reply((struct ib_mad_hdr *)smp);
2105	}
2106
2107#define __CI_PAGE_SIZE BIT(7) /* 128 bytes */
2108#define __CI_PAGE_MASK ~(__CI_PAGE_SIZE - 1)
2109#define __CI_PAGE_NUM(a) ((a) & __CI_PAGE_MASK)
2110
2111	/*
2112	 * check that addr is within spec, and
2113	 * addr and (addr + len - 1) are on the same "page"
2114	 */
2115	if (addr >= 4096 ||
2116	    (__CI_PAGE_NUM(addr) != __CI_PAGE_NUM(addr + len - 1))) {
2117		smp->status |= IB_SMP_INVALID_FIELD;
2118		return reply((struct ib_mad_hdr *)smp);
2119	}
2120
2121	ret = get_cable_info(dd, port, addr, len, data);
2122
2123	if (ret == -ENODEV) {
2124		smp->status |= IB_SMP_UNSUP_METH_ATTR;
2125		return reply((struct ib_mad_hdr *)smp);
2126	}
2127
2128	/* The address range for the CableInfo SMA query is wider than the
2129	 * memory available on the QSFP cable. We want to return a valid
2130	 * response, albeit zeroed out, for address ranges beyond available
2131	 * memory but that are within the CableInfo query spec
2132	 */
2133	if (ret < 0 && ret != -ERANGE) {
2134		smp->status |= IB_SMP_INVALID_FIELD;
2135		return reply((struct ib_mad_hdr *)smp);
2136	}
2137
2138	if (resp_len)
2139		*resp_len += len;
2140
2141	return reply((struct ib_mad_hdr *)smp);
2142}
2143
2144static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
2145			      struct ib_device *ibdev, u32 port, u32 *resp_len,
2146			      u32 max_len)
2147{
2148	u32 num_ports = OPA_AM_NPORT(am);
2149	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2150	struct hfi1_pportdata *ppd;
2151	struct buffer_control *p = (struct buffer_control *)data;
2152	int size = sizeof(struct buffer_control);
2153
2154	if (num_ports != 1 || smp_length_check(size, max_len)) {
2155		smp->status |= IB_SMP_INVALID_FIELD;
2156		return reply((struct ib_mad_hdr *)smp);
2157	}
2158
2159	ppd = dd->pport + (port - 1);
2160	fm_get_table(ppd, FM_TBL_BUFFER_CONTROL, p);
2161	trace_bct_get(dd, p);
2162	if (resp_len)
2163		*resp_len += size;
2164
2165	return reply((struct ib_mad_hdr *)smp);
2166}
2167
2168static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
2169			      struct ib_device *ibdev, u32 port, u32 *resp_len,
2170			      u32 max_len)
2171{
2172	u32 num_ports = OPA_AM_NPORT(am);
2173	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2174	struct hfi1_pportdata *ppd;
2175	struct buffer_control *p = (struct buffer_control *)data;
2176
2177	if (num_ports != 1 || smp_length_check(sizeof(*p), max_len)) {
2178		smp->status |= IB_SMP_INVALID_FIELD;
2179		return reply((struct ib_mad_hdr *)smp);
2180	}
2181	ppd = dd->pport + (port - 1);
2182	trace_bct_set(dd, p);
2183	if (fm_set_table(ppd, FM_TBL_BUFFER_CONTROL, p) < 0) {
2184		smp->status |= IB_SMP_INVALID_FIELD;
2185		return reply((struct ib_mad_hdr *)smp);
2186	}
2187
2188	return __subn_get_opa_bct(smp, am, data, ibdev, port, resp_len,
2189				  max_len);
2190}
2191
2192static int __subn_get_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
2193				 struct ib_device *ibdev, u32 port,
2194				 u32 *resp_len, u32 max_len)
2195{
2196	struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
2197	u32 num_ports = OPA_AM_NPORT(am);
2198	u8 section = (am & 0x00ff0000) >> 16;
2199	u8 *p = data;
2200	int size = 256;
2201
2202	if (num_ports != 1 || smp_length_check(size, max_len)) {
2203		smp->status |= IB_SMP_INVALID_FIELD;
2204		return reply((struct ib_mad_hdr *)smp);
2205	}
2206
2207	switch (section) {
2208	case OPA_VLARB_LOW_ELEMENTS:
2209		fm_get_table(ppd, FM_TBL_VL_LOW_ARB, p);
2210		break;
2211	case OPA_VLARB_HIGH_ELEMENTS:
2212		fm_get_table(ppd, FM_TBL_VL_HIGH_ARB, p);
2213		break;
2214	case OPA_VLARB_PREEMPT_ELEMENTS:
2215		fm_get_table(ppd, FM_TBL_VL_PREEMPT_ELEMS, p);
2216		break;
2217	case OPA_VLARB_PREEMPT_MATRIX:
2218		fm_get_table(ppd, FM_TBL_VL_PREEMPT_MATRIX, p);
2219		break;
2220	default:
2221		pr_warn("OPA SubnGet(VL Arb) AM Invalid : 0x%x\n",
2222			be32_to_cpu(smp->attr_mod));
2223		smp->status |= IB_SMP_INVALID_FIELD;
2224		size = 0;
2225		break;
2226	}
2227
2228	if (size > 0 && resp_len)
2229		*resp_len += size;
2230
2231	return reply((struct ib_mad_hdr *)smp);
2232}
2233
2234static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
2235				 struct ib_device *ibdev, u32 port,
2236				 u32 *resp_len, u32 max_len)
2237{
2238	struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
2239	u32 num_ports = OPA_AM_NPORT(am);
2240	u8 section = (am & 0x00ff0000) >> 16;
2241	u8 *p = data;
2242	int size = 256;
2243
2244	if (num_ports != 1 || smp_length_check(size, max_len)) {
2245		smp->status |= IB_SMP_INVALID_FIELD;
2246		return reply((struct ib_mad_hdr *)smp);
2247	}
2248
2249	switch (section) {
2250	case OPA_VLARB_LOW_ELEMENTS:
2251		(void)fm_set_table(ppd, FM_TBL_VL_LOW_ARB, p);
2252		break;
2253	case OPA_VLARB_HIGH_ELEMENTS:
2254		(void)fm_set_table(ppd, FM_TBL_VL_HIGH_ARB, p);
2255		break;
2256	/*
2257	 * neither OPA_VLARB_PREEMPT_ELEMENTS, or OPA_VLARB_PREEMPT_MATRIX
2258	 * can be changed from the default values
2259	 */
2260	case OPA_VLARB_PREEMPT_ELEMENTS:
2261	case OPA_VLARB_PREEMPT_MATRIX:
2262		smp->status |= IB_SMP_UNSUP_METH_ATTR;
2263		break;
2264	default:
2265		pr_warn("OPA SubnSet(VL Arb) AM Invalid : 0x%x\n",
2266			be32_to_cpu(smp->attr_mod));
2267		smp->status |= IB_SMP_INVALID_FIELD;
2268		break;
2269	}
2270
2271	return __subn_get_opa_vl_arb(smp, am, data, ibdev, port, resp_len,
2272				     max_len);
2273}
2274
2275struct opa_pma_mad {
2276	struct ib_mad_hdr mad_hdr;
2277	u8 data[2024];
2278} __packed;
2279
2280struct opa_port_status_req {
2281	__u8 port_num;
2282	__u8 reserved[3];
2283	__be32 vl_select_mask;
2284};
2285
2286#define VL_MASK_ALL		0x00000000000080ffUL
2287
2288struct opa_port_status_rsp {
2289	__u8 port_num;
2290	__u8 reserved[3];
2291	__be32  vl_select_mask;
2292
2293	/* Data counters */
2294	__be64 port_xmit_data;
2295	__be64 port_rcv_data;
2296	__be64 port_xmit_pkts;
2297	__be64 port_rcv_pkts;
2298	__be64 port_multicast_xmit_pkts;
2299	__be64 port_multicast_rcv_pkts;
2300	__be64 port_xmit_wait;
2301	__be64 sw_port_congestion;
2302	__be64 port_rcv_fecn;
2303	__be64 port_rcv_becn;
2304	__be64 port_xmit_time_cong;
2305	__be64 port_xmit_wasted_bw;
2306	__be64 port_xmit_wait_data;
2307	__be64 port_rcv_bubble;
2308	__be64 port_mark_fecn;
2309	/* Error counters */
2310	__be64 port_rcv_constraint_errors;
2311	__be64 port_rcv_switch_relay_errors;
2312	__be64 port_xmit_discards;
2313	__be64 port_xmit_constraint_errors;
2314	__be64 port_rcv_remote_physical_errors;
2315	__be64 local_link_integrity_errors;
2316	__be64 port_rcv_errors;
2317	__be64 excessive_buffer_overruns;
2318	__be64 fm_config_errors;
2319	__be32 link_error_recovery;
2320	__be32 link_downed;
2321	u8 uncorrectable_errors;
2322
2323	u8 link_quality_indicator; /* 5res, 3bit */
2324	u8 res2[6];
2325	struct _vls_pctrs {
2326		/* per-VL Data counters */
2327		__be64 port_vl_xmit_data;
2328		__be64 port_vl_rcv_data;
2329		__be64 port_vl_xmit_pkts;
2330		__be64 port_vl_rcv_pkts;
2331		__be64 port_vl_xmit_wait;
2332		__be64 sw_port_vl_congestion;
2333		__be64 port_vl_rcv_fecn;
2334		__be64 port_vl_rcv_becn;
2335		__be64 port_xmit_time_cong;
2336		__be64 port_vl_xmit_wasted_bw;
2337		__be64 port_vl_xmit_wait_data;
2338		__be64 port_vl_rcv_bubble;
2339		__be64 port_vl_mark_fecn;
2340		__be64 port_vl_xmit_discards;
2341	} vls[]; /* real array size defined by # bits set in vl_select_mask */
2342};
2343
2344enum counter_selects {
2345	CS_PORT_XMIT_DATA			= (1 << 31),
2346	CS_PORT_RCV_DATA			= (1 << 30),
2347	CS_PORT_XMIT_PKTS			= (1 << 29),
2348	CS_PORT_RCV_PKTS			= (1 << 28),
2349	CS_PORT_MCAST_XMIT_PKTS			= (1 << 27),
2350	CS_PORT_MCAST_RCV_PKTS			= (1 << 26),
2351	CS_PORT_XMIT_WAIT			= (1 << 25),
2352	CS_SW_PORT_CONGESTION			= (1 << 24),
2353	CS_PORT_RCV_FECN			= (1 << 23),
2354	CS_PORT_RCV_BECN			= (1 << 22),
2355	CS_PORT_XMIT_TIME_CONG			= (1 << 21),
2356	CS_PORT_XMIT_WASTED_BW			= (1 << 20),
2357	CS_PORT_XMIT_WAIT_DATA			= (1 << 19),
2358	CS_PORT_RCV_BUBBLE			= (1 << 18),
2359	CS_PORT_MARK_FECN			= (1 << 17),
2360	CS_PORT_RCV_CONSTRAINT_ERRORS		= (1 << 16),
2361	CS_PORT_RCV_SWITCH_RELAY_ERRORS		= (1 << 15),
2362	CS_PORT_XMIT_DISCARDS			= (1 << 14),
2363	CS_PORT_XMIT_CONSTRAINT_ERRORS		= (1 << 13),
2364	CS_PORT_RCV_REMOTE_PHYSICAL_ERRORS	= (1 << 12),
2365	CS_LOCAL_LINK_INTEGRITY_ERRORS		= (1 << 11),
2366	CS_PORT_RCV_ERRORS			= (1 << 10),
2367	CS_EXCESSIVE_BUFFER_OVERRUNS		= (1 << 9),
2368	CS_FM_CONFIG_ERRORS			= (1 << 8),
2369	CS_LINK_ERROR_RECOVERY			= (1 << 7),
2370	CS_LINK_DOWNED				= (1 << 6),
2371	CS_UNCORRECTABLE_ERRORS			= (1 << 5),
2372};
2373
2374struct opa_clear_port_status {
2375	__be64 port_select_mask[4];
2376	__be32 counter_select_mask;
2377};
2378
2379struct opa_aggregate {
2380	__be16 attr_id;
2381	__be16 err_reqlength;	/* 1 bit, 8 res, 7 bit */
2382	__be32 attr_mod;
2383	u8 data[];
2384};
2385
2386#define MSK_LLI 0x000000f0
2387#define MSK_LLI_SFT 4
2388#define MSK_LER 0x0000000f
2389#define MSK_LER_SFT 0
2390#define ADD_LLI 8
2391#define ADD_LER 2
2392
2393/* Request contains first three fields, response contains those plus the rest */
2394struct opa_port_data_counters_msg {
2395	__be64 port_select_mask[4];
2396	__be32 vl_select_mask;
2397	__be32 resolution;
2398
2399	/* Response fields follow */
2400	struct _port_dctrs {
2401		u8 port_number;
2402		u8 reserved2[3];
2403		__be32 link_quality_indicator; /* 29res, 3bit */
2404
2405		/* Data counters */
2406		__be64 port_xmit_data;
2407		__be64 port_rcv_data;
2408		__be64 port_xmit_pkts;
2409		__be64 port_rcv_pkts;
2410		__be64 port_multicast_xmit_pkts;
2411		__be64 port_multicast_rcv_pkts;
2412		__be64 port_xmit_wait;
2413		__be64 sw_port_congestion;
2414		__be64 port_rcv_fecn;
2415		__be64 port_rcv_becn;
2416		__be64 port_xmit_time_cong;
2417		__be64 port_xmit_wasted_bw;
2418		__be64 port_xmit_wait_data;
2419		__be64 port_rcv_bubble;
2420		__be64 port_mark_fecn;
2421
2422		__be64 port_error_counter_summary;
2423		/* Sum of error counts/port */
2424
2425		struct _vls_dctrs {
2426			/* per-VL Data counters */
2427			__be64 port_vl_xmit_data;
2428			__be64 port_vl_rcv_data;
2429			__be64 port_vl_xmit_pkts;
2430			__be64 port_vl_rcv_pkts;
2431			__be64 port_vl_xmit_wait;
2432			__be64 sw_port_vl_congestion;
2433			__be64 port_vl_rcv_fecn;
2434			__be64 port_vl_rcv_becn;
2435			__be64 port_xmit_time_cong;
2436			__be64 port_vl_xmit_wasted_bw;
2437			__be64 port_vl_xmit_wait_data;
2438			__be64 port_vl_rcv_bubble;
2439			__be64 port_vl_mark_fecn;
2440		} vls[];
2441		/* array size defined by #bits set in vl_select_mask*/
2442	} port;
2443};
2444
2445struct opa_port_error_counters64_msg {
2446	/*
2447	 * Request contains first two fields, response contains the
2448	 * whole magilla
2449	 */
2450	__be64 port_select_mask[4];
2451	__be32 vl_select_mask;
2452
2453	/* Response-only fields follow */
2454	__be32 reserved1;
2455	struct _port_ectrs {
2456		u8 port_number;
2457		u8 reserved2[7];
2458		__be64 port_rcv_constraint_errors;
2459		__be64 port_rcv_switch_relay_errors;
2460		__be64 port_xmit_discards;
2461		__be64 port_xmit_constraint_errors;
2462		__be64 port_rcv_remote_physical_errors;
2463		__be64 local_link_integrity_errors;
2464		__be64 port_rcv_errors;
2465		__be64 excessive_buffer_overruns;
2466		__be64 fm_config_errors;
2467		__be32 link_error_recovery;
2468		__be32 link_downed;
2469		u8 uncorrectable_errors;
2470		u8 reserved3[7];
2471		struct _vls_ectrs {
2472			__be64 port_vl_xmit_discards;
2473		} vls[];
2474		/* array size defined by #bits set in vl_select_mask */
2475	} port;
2476};
2477
2478struct opa_port_error_info_msg {
2479	__be64 port_select_mask[4];
2480	__be32 error_info_select_mask;
2481	__be32 reserved1;
2482	struct _port_ei {
2483		u8 port_number;
2484		u8 reserved2[7];
2485
2486		/* PortRcvErrorInfo */
2487		struct {
2488			u8 status_and_code;
2489			union {
2490				u8 raw[17];
2491				struct {
2492					/* EI1to12 format */
2493					u8 packet_flit1[8];
2494					u8 packet_flit2[8];
2495					u8 remaining_flit_bits12;
2496				} ei1to12;
2497				struct {
2498					u8 packet_bytes[8];
2499					u8 remaining_flit_bits;
2500				} ei13;
2501			} ei;
2502			u8 reserved3[6];
2503		} __packed port_rcv_ei;
2504
2505		/* ExcessiveBufferOverrunInfo */
2506		struct {
2507			u8 status_and_sc;
2508			u8 reserved4[7];
2509		} __packed excessive_buffer_overrun_ei;
2510
2511		/* PortXmitConstraintErrorInfo */
2512		struct {
2513			u8 status;
2514			u8 reserved5;
2515			__be16 pkey;
2516			__be32 slid;
2517		} __packed port_xmit_constraint_ei;
2518
2519		/* PortRcvConstraintErrorInfo */
2520		struct {
2521			u8 status;
2522			u8 reserved6;
2523			__be16 pkey;
2524			__be32 slid;
2525		} __packed port_rcv_constraint_ei;
2526
2527		/* PortRcvSwitchRelayErrorInfo */
2528		struct {
2529			u8 status_and_code;
2530			u8 reserved7[3];
2531			__u32 error_info;
2532		} __packed port_rcv_switch_relay_ei;
2533
2534		/* UncorrectableErrorInfo */
2535		struct {
2536			u8 status_and_code;
2537			u8 reserved8;
2538		} __packed uncorrectable_ei;
2539
2540		/* FMConfigErrorInfo */
2541		struct {
2542			u8 status_and_code;
2543			u8 error_info;
2544		} __packed fm_config_ei;
2545		__u32 reserved9;
2546	} port;
2547};
2548
2549/* opa_port_error_info_msg error_info_select_mask bit definitions */
2550enum error_info_selects {
2551	ES_PORT_RCV_ERROR_INFO			= (1 << 31),
2552	ES_EXCESSIVE_BUFFER_OVERRUN_INFO	= (1 << 30),
2553	ES_PORT_XMIT_CONSTRAINT_ERROR_INFO	= (1 << 29),
2554	ES_PORT_RCV_CONSTRAINT_ERROR_INFO	= (1 << 28),
2555	ES_PORT_RCV_SWITCH_RELAY_ERROR_INFO	= (1 << 27),
2556	ES_UNCORRECTABLE_ERROR_INFO		= (1 << 26),
2557	ES_FM_CONFIG_ERROR_INFO			= (1 << 25)
2558};
2559
2560static int pma_get_opa_classportinfo(struct opa_pma_mad *pmp,
2561				     struct ib_device *ibdev, u32 *resp_len)
2562{
2563	struct opa_class_port_info *p =
2564		(struct opa_class_port_info *)pmp->data;
2565
2566	memset(pmp->data, 0, sizeof(pmp->data));
2567
2568	if (pmp->mad_hdr.attr_mod != 0)
2569		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2570
2571	p->base_version = OPA_MGMT_BASE_VERSION;
2572	p->class_version = OPA_SM_CLASS_VERSION;
2573	/*
2574	 * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec.
2575	 */
2576	p->cap_mask2_resp_time = cpu_to_be32(18);
2577
2578	if (resp_len)
2579		*resp_len += sizeof(*p);
2580
2581	return reply((struct ib_mad_hdr *)pmp);
2582}
2583
2584static void a0_portstatus(struct hfi1_pportdata *ppd,
2585			  struct opa_port_status_rsp *rsp)
2586{
2587	if (!is_bx(ppd->dd)) {
2588		unsigned long vl;
2589		u64 sum_vl_xmit_wait = 0;
2590		unsigned long vl_all_mask = VL_MASK_ALL;
2591
2592		for_each_set_bit(vl, &vl_all_mask, BITS_PER_LONG) {
2593			u64 tmp = sum_vl_xmit_wait +
2594				  read_port_cntr(ppd, C_TX_WAIT_VL,
2595						 idx_from_vl(vl));
2596			if (tmp < sum_vl_xmit_wait) {
2597				/* we wrapped */
2598				sum_vl_xmit_wait = (u64)~0;
2599				break;
2600			}
2601			sum_vl_xmit_wait = tmp;
2602		}
2603		if (be64_to_cpu(rsp->port_xmit_wait) > sum_vl_xmit_wait)
2604			rsp->port_xmit_wait = cpu_to_be64(sum_vl_xmit_wait);
2605	}
2606}
2607
2608/**
2609 * tx_link_width - convert link width bitmask to integer
2610 * value representing actual link width.
2611 * @link_width: width of active link
2612 * @return: return index of the bit set in link_width var
2613 *
2614 * The function convert and return the index of bit set
2615 * that indicate the current link width.
2616 */
2617u16 tx_link_width(u16 link_width)
2618{
2619	int n = LINK_WIDTH_DEFAULT;
2620	u16 tx_width = n;
2621
2622	while (link_width && n) {
2623		if (link_width & (1 << (n - 1))) {
2624			tx_width = n;
2625			break;
2626		}
2627		n--;
2628	}
2629
2630	return tx_width;
2631}
2632
2633/**
2634 * get_xmit_wait_counters - Convert HFI 's SendWaitCnt/SendWaitVlCnt
2635 * counter in unit of TXE cycle times to flit times.
2636 * @ppd: info of physical Hfi port
2637 * @link_width: width of active link
2638 * @link_speed: speed of active link
2639 * @vl: represent VL0-VL7, VL15 for PortVLXmitWait counters request
2640 * and if vl value is C_VL_COUNT, it represent SendWaitCnt
2641 * counter request
2642 * @return: return SendWaitCnt/SendWaitVlCnt counter value per vl.
2643 *
2644 * Convert SendWaitCnt/SendWaitVlCnt counter from TXE cycle times to
2645 * flit times. Call this function to samples these counters. This
2646 * function will calculate for previous state transition and update
2647 * current state at end of function using ppd->prev_link_width and
2648 * ppd->port_vl_xmit_wait_last to port_vl_xmit_wait_curr and link_width.
2649 */
2650u64 get_xmit_wait_counters(struct hfi1_pportdata *ppd,
2651			   u16 link_width, u16 link_speed, int vl)
2652{
2653	u64 port_vl_xmit_wait_curr;
2654	u64 delta_vl_xmit_wait;
2655	u64 xmit_wait_val;
2656
2657	if (vl > C_VL_COUNT)
2658		return  0;
2659	if (vl < C_VL_COUNT)
2660		port_vl_xmit_wait_curr =
2661			read_port_cntr(ppd, C_TX_WAIT_VL, vl);
2662	else
2663		port_vl_xmit_wait_curr =
2664			read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL);
2665
2666	xmit_wait_val =
2667		port_vl_xmit_wait_curr -
2668		ppd->port_vl_xmit_wait_last[vl];
2669	delta_vl_xmit_wait =
2670		convert_xmit_counter(xmit_wait_val,
2671				     ppd->prev_link_width,
2672				     link_speed);
2673
2674	ppd->vl_xmit_flit_cnt[vl] += delta_vl_xmit_wait;
2675	ppd->port_vl_xmit_wait_last[vl] = port_vl_xmit_wait_curr;
2676	ppd->prev_link_width = link_width;
2677
2678	return ppd->vl_xmit_flit_cnt[vl];
2679}
2680
2681static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
2682				  struct ib_device *ibdev,
2683				  u32 port, u32 *resp_len)
2684{
2685	struct opa_port_status_req *req =
2686		(struct opa_port_status_req *)pmp->data;
2687	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2688	struct opa_port_status_rsp *rsp;
2689	unsigned long vl_select_mask = be32_to_cpu(req->vl_select_mask);
2690	unsigned long vl;
2691	size_t response_data_size;
2692	u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
2693	u32 port_num = req->port_num;
2694	u8 num_vls = hweight64(vl_select_mask);
2695	struct _vls_pctrs *vlinfo;
2696	struct hfi1_ibport *ibp = to_iport(ibdev, port);
2697	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2698	int vfi;
2699	u64 tmp, tmp2;
2700	u16 link_width;
2701	u16 link_speed;
2702
2703	response_data_size = struct_size(rsp, vls, num_vls);
2704	if (response_data_size > sizeof(pmp->data)) {
2705		pmp->mad_hdr.status |= OPA_PM_STATUS_REQUEST_TOO_LARGE;
2706		return reply((struct ib_mad_hdr *)pmp);
2707	}
2708
2709	if (nports != 1 || (port_num && port_num != port) ||
2710	    num_vls > OPA_MAX_VLS || (vl_select_mask & ~VL_MASK_ALL)) {
2711		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2712		return reply((struct ib_mad_hdr *)pmp);
2713	}
2714
2715	memset(pmp->data, 0, sizeof(pmp->data));
2716
2717	rsp = (struct opa_port_status_rsp *)pmp->data;
2718	if (port_num)
2719		rsp->port_num = port_num;
2720	else
2721		rsp->port_num = port;
2722
2723	rsp->port_rcv_constraint_errors =
2724		cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
2725					   CNTR_INVALID_VL));
2726
2727	hfi1_read_link_quality(dd, &rsp->link_quality_indicator);
2728
2729	rsp->vl_select_mask = cpu_to_be32((u32)vl_select_mask);
2730	rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
2731					  CNTR_INVALID_VL));
2732	rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
2733					 CNTR_INVALID_VL));
2734	rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
2735					  CNTR_INVALID_VL));
2736	rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
2737					 CNTR_INVALID_VL));
2738	rsp->port_multicast_xmit_pkts =
2739		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
2740					  CNTR_INVALID_VL));
2741	rsp->port_multicast_rcv_pkts =
2742		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
2743					  CNTR_INVALID_VL));
2744	/*
2745	 * Convert PortXmitWait counter from TXE cycle times
2746	 * to flit times.
2747	 */
2748	link_width =
2749		tx_link_width(ppd->link_width_downgrade_tx_active);
2750	link_speed = get_link_speed(ppd->link_speed_active);
2751	rsp->port_xmit_wait =
2752		cpu_to_be64(get_xmit_wait_counters(ppd, link_width,
2753						   link_speed, C_VL_COUNT));
2754	rsp->port_rcv_fecn =
2755		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL));
2756	rsp->port_rcv_becn =
2757		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL));
2758	rsp->port_xmit_discards =
2759		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
2760					   CNTR_INVALID_VL));
2761	rsp->port_xmit_constraint_errors =
2762		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
2763					   CNTR_INVALID_VL));
2764	rsp->port_rcv_remote_physical_errors =
2765		cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2766					  CNTR_INVALID_VL));
2767	rsp->local_link_integrity_errors =
2768		cpu_to_be64(read_dev_cntr(dd, C_DC_RX_REPLAY,
2769					  CNTR_INVALID_VL));
2770	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
2771	tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
2772				   CNTR_INVALID_VL);
2773	if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
2774		/* overflow/wrapped */
2775		rsp->link_error_recovery = cpu_to_be32(~0);
2776	} else {
2777		rsp->link_error_recovery = cpu_to_be32(tmp2);
2778	}
2779	rsp->port_rcv_errors =
2780		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
2781	rsp->excessive_buffer_overruns =
2782		cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
2783	rsp->fm_config_errors =
2784		cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
2785					  CNTR_INVALID_VL));
2786	rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
2787						      CNTR_INVALID_VL));
2788
2789	/* rsp->uncorrectable_errors is 8 bits wide, and it pegs at 0xff */
2790	tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
2791	rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
2792
2793	vlinfo = &rsp->vls[0];
2794	vfi = 0;
2795	/* The vl_select_mask has been checked above, and we know
2796	 * that it contains only entries which represent valid VLs.
2797	 * So in the for_each_set_bit() loop below, we don't need
2798	 * any additional checks for vl.
2799	 */
2800	for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
2801		memset(vlinfo, 0, sizeof(*vlinfo));
2802
2803		tmp = read_dev_cntr(dd, C_DC_RX_FLIT_VL, idx_from_vl(vl));
2804		rsp->vls[vfi].port_vl_rcv_data = cpu_to_be64(tmp);
2805
2806		rsp->vls[vfi].port_vl_rcv_pkts =
2807			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
2808						  idx_from_vl(vl)));
2809
2810		rsp->vls[vfi].port_vl_xmit_data =
2811			cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
2812						   idx_from_vl(vl)));
2813
2814		rsp->vls[vfi].port_vl_xmit_pkts =
2815			cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
2816						   idx_from_vl(vl)));
2817		/*
2818		 * Convert PortVlXmitWait counter from TXE cycle
2819		 * times to flit times.
2820		 */
2821		rsp->vls[vfi].port_vl_xmit_wait =
2822			cpu_to_be64(get_xmit_wait_counters(ppd, link_width,
2823							   link_speed,
2824							   idx_from_vl(vl)));
2825
2826		rsp->vls[vfi].port_vl_rcv_fecn =
2827			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
2828						  idx_from_vl(vl)));
2829
2830		rsp->vls[vfi].port_vl_rcv_becn =
2831			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
2832						  idx_from_vl(vl)));
2833
2834		rsp->vls[vfi].port_vl_xmit_discards =
2835			cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
2836						   idx_from_vl(vl)));
2837		vlinfo++;
2838		vfi++;
2839	}
2840
2841	a0_portstatus(ppd, rsp);
2842
2843	if (resp_len)
2844		*resp_len += response_data_size;
2845
2846	return reply((struct ib_mad_hdr *)pmp);
2847}
2848
2849static u64 get_error_counter_summary(struct ib_device *ibdev, u32 port,
2850				     u8 res_lli, u8 res_ler)
2851{
2852	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2853	struct hfi1_ibport *ibp = to_iport(ibdev, port);
2854	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2855	u64 error_counter_summary = 0, tmp;
2856
2857	error_counter_summary += read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
2858						CNTR_INVALID_VL);
2859	/* port_rcv_switch_relay_errors is 0 for HFIs */
2860	error_counter_summary += read_port_cntr(ppd, C_SW_XMIT_DSCD,
2861						CNTR_INVALID_VL);
2862	error_counter_summary += read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
2863						CNTR_INVALID_VL);
2864	error_counter_summary += read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2865					       CNTR_INVALID_VL);
2866	/* local link integrity must be right-shifted by the lli resolution */
2867	error_counter_summary += (read_dev_cntr(dd, C_DC_RX_REPLAY,
2868						CNTR_INVALID_VL) >> res_lli);
2869	/* link error recovery must b right-shifted by the ler resolution */
2870	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
2871	tmp += read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL);
2872	error_counter_summary += (tmp >> res_ler);
2873	error_counter_summary += read_dev_cntr(dd, C_DC_RCV_ERR,
2874					       CNTR_INVALID_VL);
2875	error_counter_summary += read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
2876	error_counter_summary += read_dev_cntr(dd, C_DC_FM_CFG_ERR,
2877					       CNTR_INVALID_VL);
2878	/* ppd->link_downed is a 32-bit value */
2879	error_counter_summary += read_port_cntr(ppd, C_SW_LINK_DOWN,
2880						CNTR_INVALID_VL);
2881	tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
2882	/* this is an 8-bit quantity */
2883	error_counter_summary += tmp < 0x100 ? (tmp & 0xff) : 0xff;
2884
2885	return error_counter_summary;
2886}
2887
2888static void a0_datacounters(struct hfi1_pportdata *ppd, struct _port_dctrs *rsp)
2889{
2890	if (!is_bx(ppd->dd)) {
2891		unsigned long vl;
2892		u64 sum_vl_xmit_wait = 0;
2893		unsigned long vl_all_mask = VL_MASK_ALL;
2894
2895		for_each_set_bit(vl, &vl_all_mask, BITS_PER_LONG) {
2896			u64 tmp = sum_vl_xmit_wait +
2897				  read_port_cntr(ppd, C_TX_WAIT_VL,
2898						 idx_from_vl(vl));
2899			if (tmp < sum_vl_xmit_wait) {
2900				/* we wrapped */
2901				sum_vl_xmit_wait = (u64)~0;
2902				break;
2903			}
2904			sum_vl_xmit_wait = tmp;
2905		}
2906		if (be64_to_cpu(rsp->port_xmit_wait) > sum_vl_xmit_wait)
2907			rsp->port_xmit_wait = cpu_to_be64(sum_vl_xmit_wait);
2908	}
2909}
2910
2911static void pma_get_opa_port_dctrs(struct ib_device *ibdev,
2912				   struct _port_dctrs *rsp)
2913{
2914	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2915
2916	rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
2917						CNTR_INVALID_VL));
2918	rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
2919						CNTR_INVALID_VL));
2920	rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
2921						CNTR_INVALID_VL));
2922	rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
2923						CNTR_INVALID_VL));
2924	rsp->port_multicast_xmit_pkts =
2925		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
2926					  CNTR_INVALID_VL));
2927	rsp->port_multicast_rcv_pkts =
2928		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
2929					  CNTR_INVALID_VL));
2930}
2931
2932static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
2933				    struct ib_device *ibdev,
2934				    u32 port, u32 *resp_len)
2935{
2936	struct opa_port_data_counters_msg *req =
2937		(struct opa_port_data_counters_msg *)pmp->data;
2938	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2939	struct hfi1_ibport *ibp = to_iport(ibdev, port);
2940	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2941	struct _port_dctrs *rsp;
2942	struct _vls_dctrs *vlinfo;
2943	size_t response_data_size;
2944	u32 num_ports;
2945	u8 lq, num_vls;
2946	u8 res_lli, res_ler;
2947	u64 port_mask;
2948	u32 port_num;
2949	unsigned long vl;
2950	unsigned long vl_select_mask;
2951	int vfi;
2952	u16 link_width;
2953	u16 link_speed;
2954
2955	num_ports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
2956	num_vls = hweight32(be32_to_cpu(req->vl_select_mask));
2957	vl_select_mask = be32_to_cpu(req->vl_select_mask);
2958	res_lli = (u8)(be32_to_cpu(req->resolution) & MSK_LLI) >> MSK_LLI_SFT;
2959	res_lli = res_lli ? res_lli + ADD_LLI : 0;
2960	res_ler = (u8)(be32_to_cpu(req->resolution) & MSK_LER) >> MSK_LER_SFT;
2961	res_ler = res_ler ? res_ler + ADD_LER : 0;
2962
2963	if (num_ports != 1 || (vl_select_mask & ~VL_MASK_ALL)) {
2964		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2965		return reply((struct ib_mad_hdr *)pmp);
2966	}
2967
2968	/* Sanity check */
2969	response_data_size = struct_size(req, port.vls, num_vls);
2970
2971	if (response_data_size > sizeof(pmp->data)) {
2972		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2973		return reply((struct ib_mad_hdr *)pmp);
2974	}
2975
2976	/*
2977	 * The bit set in the mask needs to be consistent with the
2978	 * port the request came in on.
2979	 */
2980	port_mask = be64_to_cpu(req->port_select_mask[3]);
2981	port_num = find_first_bit((unsigned long *)&port_mask,
2982				  sizeof(port_mask) * 8);
2983
2984	if (port_num != port) {
2985		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2986		return reply((struct ib_mad_hdr *)pmp);
2987	}
2988
2989	rsp = &req->port;
2990	memset(rsp, 0, sizeof(*rsp));
2991
2992	rsp->port_number = port;
2993	/*
2994	 * Note that link_quality_indicator is a 32 bit quantity in
2995	 * 'datacounters' queries (as opposed to 'portinfo' queries,
2996	 * where it's a byte).
2997	 */
2998	hfi1_read_link_quality(dd, &lq);
2999	rsp->link_quality_indicator = cpu_to_be32((u32)lq);
3000	pma_get_opa_port_dctrs(ibdev, rsp);
3001
3002	/*
3003	 * Convert PortXmitWait counter from TXE
3004	 * cycle times to flit times.
3005	 */
3006	link_width =
3007		tx_link_width(ppd->link_width_downgrade_tx_active);
3008	link_speed = get_link_speed(ppd->link_speed_active);
3009	rsp->port_xmit_wait =
3010		cpu_to_be64(get_xmit_wait_counters(ppd, link_width,
3011						   link_speed, C_VL_COUNT));
3012	rsp->port_rcv_fecn =
3013		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL));
3014	rsp->port_rcv_becn =
3015		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL));
3016	rsp->port_error_counter_summary =
3017		cpu_to_be64(get_error_counter_summary(ibdev, port,
3018						      res_lli, res_ler));
3019
3020	vlinfo = &rsp->vls[0];
3021	vfi = 0;
3022	/* The vl_select_mask has been checked above, and we know
3023	 * that it contains only entries which represent valid VLs.
3024	 * So in the for_each_set_bit() loop below, we don't need
3025	 * any additional checks for vl.
3026	 */
3027	for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
3028		memset(vlinfo, 0, sizeof(*vlinfo));
3029
3030		rsp->vls[vfi].port_vl_xmit_data =
3031			cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
3032						   idx_from_vl(vl)));
3033
3034		rsp->vls[vfi].port_vl_rcv_data =
3035			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_FLIT_VL,
3036						  idx_from_vl(vl)));
3037
3038		rsp->vls[vfi].port_vl_xmit_pkts =
3039			cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
3040						   idx_from_vl(vl)));
3041
3042		rsp->vls[vfi].port_vl_rcv_pkts =
3043			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
3044						  idx_from_vl(vl)));
3045
3046		/*
3047		 * Convert PortVlXmitWait counter from TXE
3048		 * cycle times to flit times.
3049		 */
3050		rsp->vls[vfi].port_vl_xmit_wait =
3051			cpu_to_be64(get_xmit_wait_counters(ppd, link_width,
3052							   link_speed,
3053							   idx_from_vl(vl)));
3054
3055		rsp->vls[vfi].port_vl_rcv_fecn =
3056			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
3057						  idx_from_vl(vl)));
3058		rsp->vls[vfi].port_vl_rcv_becn =
3059			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
3060						  idx_from_vl(vl)));
3061
3062		/* rsp->port_vl_xmit_time_cong is 0 for HFIs */
3063		/* rsp->port_vl_xmit_wasted_bw ??? */
3064		/* port_vl_xmit_wait_data - TXE (table 13-9 HFI spec) ???
3065		 * does this differ from rsp->vls[vfi].port_vl_xmit_wait
3066		 */
3067		/*rsp->vls[vfi].port_vl_mark_fecn =
3068		 *	cpu_to_be64(read_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT
3069		 *		+ offset));
3070		 */
3071		vlinfo++;
3072		vfi++;
3073	}
3074
3075	a0_datacounters(ppd, rsp);
3076
3077	if (resp_len)
3078		*resp_len += response_data_size;
3079
3080	return reply((struct ib_mad_hdr *)pmp);
3081}
3082
3083static int pma_get_ib_portcounters_ext(struct ib_pma_mad *pmp,
3084				       struct ib_device *ibdev, u32 port)
3085{
3086	struct ib_pma_portcounters_ext *p = (struct ib_pma_portcounters_ext *)
3087						pmp->data;
3088	struct _port_dctrs rsp;
3089
3090	if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
3091		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3092		goto bail;
3093	}
3094
3095	memset(&rsp, 0, sizeof(rsp));
3096	pma_get_opa_port_dctrs(ibdev, &rsp);
3097
3098	p->port_xmit_data = rsp.port_xmit_data;
3099	p->port_rcv_data = rsp.port_rcv_data;
3100	p->port_xmit_packets = rsp.port_xmit_pkts;
3101	p->port_rcv_packets = rsp.port_rcv_pkts;
3102	p->port_unicast_xmit_packets = 0;
3103	p->port_unicast_rcv_packets =  0;
3104	p->port_multicast_xmit_packets = rsp.port_multicast_xmit_pkts;
3105	p->port_multicast_rcv_packets = rsp.port_multicast_rcv_pkts;
3106
3107bail:
3108	return reply((struct ib_mad_hdr *)pmp);
3109}
3110
3111static void pma_get_opa_port_ectrs(struct ib_device *ibdev,
3112				   struct _port_ectrs *rsp, u32 port)
3113{
3114	u64 tmp, tmp2;
3115	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3116	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3117	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3118
3119	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
3120	tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
3121					CNTR_INVALID_VL);
3122	if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
3123		/* overflow/wrapped */
3124		rsp->link_error_recovery = cpu_to_be32(~0);
3125	} else {
3126		rsp->link_error_recovery = cpu_to_be32(tmp2);
3127	}
3128
3129	rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
3130						CNTR_INVALID_VL));
3131	rsp->port_rcv_errors =
3132		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
3133	rsp->port_rcv_remote_physical_errors =
3134		cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
3135					  CNTR_INVALID_VL));
3136	rsp->port_rcv_switch_relay_errors = 0;
3137	rsp->port_xmit_discards =
3138		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
3139					   CNTR_INVALID_VL));
3140	rsp->port_xmit_constraint_errors =
3141		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
3142					   CNTR_INVALID_VL));
3143	rsp->port_rcv_constraint_errors =
3144		cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
3145					   CNTR_INVALID_VL));
3146	rsp->local_link_integrity_errors =
3147		cpu_to_be64(read_dev_cntr(dd, C_DC_RX_REPLAY,
3148					  CNTR_INVALID_VL));
3149	rsp->excessive_buffer_overruns =
3150		cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
3151}
3152
3153static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
3154				  struct ib_device *ibdev,
3155				  u32 port, u32 *resp_len)
3156{
3157	size_t response_data_size;
3158	struct _port_ectrs *rsp;
3159	u32 port_num;
3160	struct opa_port_error_counters64_msg *req;
3161	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3162	u32 num_ports;
3163	u8 num_pslm;
3164	u8 num_vls;
3165	struct hfi1_ibport *ibp;
3166	struct hfi1_pportdata *ppd;
3167	struct _vls_ectrs *vlinfo;
3168	unsigned long vl;
3169	u64 port_mask, tmp;
3170	unsigned long vl_select_mask;
3171	int vfi;
3172
3173	req = (struct opa_port_error_counters64_msg *)pmp->data;
3174
3175	num_ports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
3176
3177	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
3178	num_vls = hweight32(be32_to_cpu(req->vl_select_mask));
3179
3180	if (num_ports != 1 || num_ports != num_pslm) {
3181		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3182		return reply((struct ib_mad_hdr *)pmp);
3183	}
3184
3185	response_data_size = struct_size(req, port.vls, num_vls);
3186
3187	if (response_data_size > sizeof(pmp->data)) {
3188		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3189		return reply((struct ib_mad_hdr *)pmp);
3190	}
3191	/*
3192	 * The bit set in the mask needs to be consistent with the
3193	 * port the request came in on.
3194	 */
3195	port_mask = be64_to_cpu(req->port_select_mask[3]);
3196	port_num = find_first_bit((unsigned long *)&port_mask,
3197				  sizeof(port_mask) * 8);
3198
3199	if (port_num != port) {
3200		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3201		return reply((struct ib_mad_hdr *)pmp);
3202	}
3203
3204	rsp = &req->port;
3205
3206	ibp = to_iport(ibdev, port_num);
3207	ppd = ppd_from_ibp(ibp);
3208
3209	memset(rsp, 0, sizeof(*rsp));
3210	rsp->port_number = port_num;
3211
3212	pma_get_opa_port_ectrs(ibdev, rsp, port_num);
3213
3214	rsp->port_rcv_remote_physical_errors =
3215		cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
3216					  CNTR_INVALID_VL));
3217	rsp->fm_config_errors =
3218		cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
3219					  CNTR_INVALID_VL));
3220	tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
3221
3222	rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
3223	rsp->port_rcv_errors =
3224		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
3225	vlinfo = &rsp->vls[0];
3226	vfi = 0;
3227	vl_select_mask = be32_to_cpu(req->vl_select_mask);
3228	for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
3229		memset(vlinfo, 0, sizeof(*vlinfo));
3230		rsp->vls[vfi].port_vl_xmit_discards =
3231			cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
3232						   idx_from_vl(vl)));
3233		vlinfo += 1;
3234		vfi++;
3235	}
3236
3237	if (resp_len)
3238		*resp_len += response_data_size;
3239
3240	return reply((struct ib_mad_hdr *)pmp);
3241}
3242
3243static int pma_get_ib_portcounters(struct ib_pma_mad *pmp,
3244				   struct ib_device *ibdev, u32 port)
3245{
3246	struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
3247		pmp->data;
3248	struct _port_ectrs rsp;
3249	u64 temp_link_overrun_errors;
3250	u64 temp_64;
3251	u32 temp_32;
3252
3253	memset(&rsp, 0, sizeof(rsp));
3254	pma_get_opa_port_ectrs(ibdev, &rsp, port);
3255
3256	if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
3257		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3258		goto bail;
3259	}
3260
3261	p->symbol_error_counter = 0; /* N/A for OPA */
3262
3263	temp_32 = be32_to_cpu(rsp.link_error_recovery);
3264	if (temp_32 > 0xFFUL)
3265		p->link_error_recovery_counter = 0xFF;
3266	else
3267		p->link_error_recovery_counter = (u8)temp_32;
3268
3269	temp_32 = be32_to_cpu(rsp.link_downed);
3270	if (temp_32 > 0xFFUL)
3271		p->link_downed_counter = 0xFF;
3272	else
3273		p->link_downed_counter = (u8)temp_32;
3274
3275	temp_64 = be64_to_cpu(rsp.port_rcv_errors);
3276	if (temp_64 > 0xFFFFUL)
3277		p->port_rcv_errors = cpu_to_be16(0xFFFF);
3278	else
3279		p->port_rcv_errors = cpu_to_be16((u16)temp_64);
3280
3281	temp_64 = be64_to_cpu(rsp.port_rcv_remote_physical_errors);
3282	if (temp_64 > 0xFFFFUL)
3283		p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF);
3284	else
3285		p->port_rcv_remphys_errors = cpu_to_be16((u16)temp_64);
3286
3287	temp_64 = be64_to_cpu(rsp.port_rcv_switch_relay_errors);
3288	p->port_rcv_switch_relay_errors = cpu_to_be16((u16)temp_64);
3289
3290	temp_64 = be64_to_cpu(rsp.port_xmit_discards);
3291	if (temp_64 > 0xFFFFUL)
3292		p->port_xmit_discards = cpu_to_be16(0xFFFF);
3293	else
3294		p->port_xmit_discards = cpu_to_be16((u16)temp_64);
3295
3296	temp_64 = be64_to_cpu(rsp.port_xmit_constraint_errors);
3297	if (temp_64 > 0xFFUL)
3298		p->port_xmit_constraint_errors = 0xFF;
3299	else
3300		p->port_xmit_constraint_errors = (u8)temp_64;
3301
3302	temp_64 = be64_to_cpu(rsp.port_rcv_constraint_errors);
3303	if (temp_64 > 0xFFUL)
3304		p->port_rcv_constraint_errors = 0xFFUL;
3305	else
3306		p->port_rcv_constraint_errors = (u8)temp_64;
3307
3308	/* LocalLink: 7:4, BufferOverrun: 3:0 */
3309	temp_64 = be64_to_cpu(rsp.local_link_integrity_errors);
3310	if (temp_64 > 0xFUL)
3311		temp_64 = 0xFUL;
3312
3313	temp_link_overrun_errors = temp_64 << 4;
3314
3315	temp_64 = be64_to_cpu(rsp.excessive_buffer_overruns);
3316	if (temp_64 > 0xFUL)
3317		temp_64 = 0xFUL;
3318	temp_link_overrun_errors |= temp_64;
3319
3320	p->link_overrun_errors = (u8)temp_link_overrun_errors;
3321
3322	p->vl15_dropped = 0; /* N/A for OPA */
3323
3324bail:
3325	return reply((struct ib_mad_hdr *)pmp);
3326}
3327
3328static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
3329				 struct ib_device *ibdev,
3330				 u32 port, u32 *resp_len)
3331{
3332	size_t response_data_size;
3333	struct _port_ei *rsp;
3334	struct opa_port_error_info_msg *req;
3335	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3336	u64 port_mask;
3337	u32 num_ports;
3338	u32 port_num;
3339	u8 num_pslm;
3340	u64 reg;
3341
3342	req = (struct opa_port_error_info_msg *)pmp->data;
3343	rsp = &req->port;
3344
3345	num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
3346	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
3347
3348	memset(rsp, 0, sizeof(*rsp));
3349
3350	if (num_ports != 1 || num_ports != num_pslm) {
3351		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3352		return reply((struct ib_mad_hdr *)pmp);
3353	}
3354
3355	/* Sanity check */
3356	response_data_size = sizeof(struct opa_port_error_info_msg);
3357
3358	if (response_data_size > sizeof(pmp->data)) {
3359		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3360		return reply((struct ib_mad_hdr *)pmp);
3361	}
3362
3363	/*
3364	 * The bit set in the mask needs to be consistent with the port
3365	 * the request came in on.
3366	 */
3367	port_mask = be64_to_cpu(req->port_select_mask[3]);
3368	port_num = find_first_bit((unsigned long *)&port_mask,
3369				  sizeof(port_mask) * 8);
3370
3371	if (port_num != port) {
3372		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3373		return reply((struct ib_mad_hdr *)pmp);
3374	}
3375	rsp->port_number = port;
3376
3377	/* PortRcvErrorInfo */
3378	rsp->port_rcv_ei.status_and_code =
3379		dd->err_info_rcvport.status_and_code;
3380	memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit1,
3381	       &dd->err_info_rcvport.packet_flit1, sizeof(u64));
3382	memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit2,
3383	       &dd->err_info_rcvport.packet_flit2, sizeof(u64));
3384
3385	/* ExcessiverBufferOverrunInfo */
3386	reg = read_csr(dd, RCV_ERR_INFO);
3387	if (reg & RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK) {
3388		/*
3389		 * if the RcvExcessBufferOverrun bit is set, save SC of
3390		 * first pkt that encountered an excess buffer overrun
3391		 */
3392		u8 tmp = (u8)reg;
3393
3394		tmp &=  RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SC_SMASK;
3395		tmp <<= 2;
3396		rsp->excessive_buffer_overrun_ei.status_and_sc = tmp;
3397		/* set the status bit */
3398		rsp->excessive_buffer_overrun_ei.status_and_sc |= 0x80;
3399	}
3400
3401	rsp->port_xmit_constraint_ei.status =
3402		dd->err_info_xmit_constraint.status;
3403	rsp->port_xmit_constraint_ei.pkey =
3404		cpu_to_be16(dd->err_info_xmit_constraint.pkey);
3405	rsp->port_xmit_constraint_ei.slid =
3406		cpu_to_be32(dd->err_info_xmit_constraint.slid);
3407
3408	rsp->port_rcv_constraint_ei.status =
3409		dd->err_info_rcv_constraint.status;
3410	rsp->port_rcv_constraint_ei.pkey =
3411		cpu_to_be16(dd->err_info_rcv_constraint.pkey);
3412	rsp->port_rcv_constraint_ei.slid =
3413		cpu_to_be32(dd->err_info_rcv_constraint.slid);
3414
3415	/* UncorrectableErrorInfo */
3416	rsp->uncorrectable_ei.status_and_code = dd->err_info_uncorrectable;
3417
3418	/* FMConfigErrorInfo */
3419	rsp->fm_config_ei.status_and_code = dd->err_info_fmconfig;
3420
3421	if (resp_len)
3422		*resp_len += response_data_size;
3423
3424	return reply((struct ib_mad_hdr *)pmp);
3425}
3426
3427static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
3428				  struct ib_device *ibdev,
3429				  u32 port, u32 *resp_len)
3430{
3431	struct opa_clear_port_status *req =
3432		(struct opa_clear_port_status *)pmp->data;
3433	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3434	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3435	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3436	u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
3437	u64 portn = be64_to_cpu(req->port_select_mask[3]);
3438	u32 counter_select = be32_to_cpu(req->counter_select_mask);
3439	unsigned long vl_select_mask = VL_MASK_ALL; /* clear all per-vl cnts */
3440	unsigned long vl;
3441
3442	if ((nports != 1) || (portn != 1 << port)) {
3443		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3444		return reply((struct ib_mad_hdr *)pmp);
3445	}
3446	/*
3447	 * only counters returned by pma_get_opa_portstatus() are
3448	 * handled, so when pma_get_opa_portstatus() gets a fix,
3449	 * the corresponding change should be made here as well.
3450	 */
3451
3452	if (counter_select & CS_PORT_XMIT_DATA)
3453		write_dev_cntr(dd, C_DC_XMIT_FLITS, CNTR_INVALID_VL, 0);
3454
3455	if (counter_select & CS_PORT_RCV_DATA)
3456		write_dev_cntr(dd, C_DC_RCV_FLITS, CNTR_INVALID_VL, 0);
3457
3458	if (counter_select & CS_PORT_XMIT_PKTS)
3459		write_dev_cntr(dd, C_DC_XMIT_PKTS, CNTR_INVALID_VL, 0);
3460
3461	if (counter_select & CS_PORT_RCV_PKTS)
3462		write_dev_cntr(dd, C_DC_RCV_PKTS, CNTR_INVALID_VL, 0);
3463
3464	if (counter_select & CS_PORT_MCAST_XMIT_PKTS)
3465		write_dev_cntr(dd, C_DC_MC_XMIT_PKTS, CNTR_INVALID_VL, 0);
3466
3467	if (counter_select & CS_PORT_MCAST_RCV_PKTS)
3468		write_dev_cntr(dd, C_DC_MC_RCV_PKTS, CNTR_INVALID_VL, 0);
3469
3470	if (counter_select & CS_PORT_XMIT_WAIT) {
3471		write_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL, 0);
3472		ppd->port_vl_xmit_wait_last[C_VL_COUNT] = 0;
3473		ppd->vl_xmit_flit_cnt[C_VL_COUNT] = 0;
3474	}
3475	/* ignore cs_sw_portCongestion for HFIs */
3476
3477	if (counter_select & CS_PORT_RCV_FECN)
3478		write_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL, 0);
3479
3480	if (counter_select & CS_PORT_RCV_BECN)
3481		write_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL, 0);
3482
3483	/* ignore cs_port_xmit_time_cong for HFIs */
3484	/* ignore cs_port_xmit_wasted_bw for now */
3485	/* ignore cs_port_xmit_wait_data for now */
3486	if (counter_select & CS_PORT_RCV_BUBBLE)
3487		write_dev_cntr(dd, C_DC_RCV_BBL, CNTR_INVALID_VL, 0);
3488
3489	/* Only applicable for switch */
3490	/* if (counter_select & CS_PORT_MARK_FECN)
3491	 *	write_csr(dd, DCC_PRF_PORT_MARK_FECN_CNT, 0);
3492	 */
3493
3494	if (counter_select & CS_PORT_RCV_CONSTRAINT_ERRORS)
3495		write_port_cntr(ppd, C_SW_RCV_CSTR_ERR, CNTR_INVALID_VL, 0);
3496
3497	/* ignore cs_port_rcv_switch_relay_errors for HFIs */
3498	if (counter_select & CS_PORT_XMIT_DISCARDS)
3499		write_port_cntr(ppd, C_SW_XMIT_DSCD, CNTR_INVALID_VL, 0);
3500
3501	if (counter_select & CS_PORT_XMIT_CONSTRAINT_ERRORS)
3502		write_port_cntr(ppd, C_SW_XMIT_CSTR_ERR, CNTR_INVALID_VL, 0);
3503
3504	if (counter_select & CS_PORT_RCV_REMOTE_PHYSICAL_ERRORS)
3505		write_dev_cntr(dd, C_DC_RMT_PHY_ERR, CNTR_INVALID_VL, 0);
3506
3507	if (counter_select & CS_LOCAL_LINK_INTEGRITY_ERRORS)
3508		write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0);
3509
3510	if (counter_select & CS_LINK_ERROR_RECOVERY) {
3511		write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
3512		write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
3513			       CNTR_INVALID_VL, 0);
3514	}
3515
3516	if (counter_select & CS_PORT_RCV_ERRORS)
3517		write_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL, 0);
3518
3519	if (counter_select & CS_EXCESSIVE_BUFFER_OVERRUNS) {
3520		write_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL, 0);
3521		dd->rcv_ovfl_cnt = 0;
3522	}
3523
3524	if (counter_select & CS_FM_CONFIG_ERRORS)
3525		write_dev_cntr(dd, C_DC_FM_CFG_ERR, CNTR_INVALID_VL, 0);
3526
3527	if (counter_select & CS_LINK_DOWNED)
3528		write_port_cntr(ppd, C_SW_LINK_DOWN, CNTR_INVALID_VL, 0);
3529
3530	if (counter_select & CS_UNCORRECTABLE_ERRORS)
3531		write_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL, 0);
3532
3533	for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
3534		if (counter_select & CS_PORT_XMIT_DATA)
3535			write_port_cntr(ppd, C_TX_FLIT_VL, idx_from_vl(vl), 0);
3536
3537		if (counter_select & CS_PORT_RCV_DATA)
3538			write_dev_cntr(dd, C_DC_RX_FLIT_VL, idx_from_vl(vl), 0);
3539
3540		if (counter_select & CS_PORT_XMIT_PKTS)
3541			write_port_cntr(ppd, C_TX_PKT_VL, idx_from_vl(vl), 0);
3542
3543		if (counter_select & CS_PORT_RCV_PKTS)
3544			write_dev_cntr(dd, C_DC_RX_PKT_VL, idx_from_vl(vl), 0);
3545
3546		if (counter_select & CS_PORT_XMIT_WAIT) {
3547			write_port_cntr(ppd, C_TX_WAIT_VL, idx_from_vl(vl), 0);
3548			ppd->port_vl_xmit_wait_last[idx_from_vl(vl)] = 0;
3549			ppd->vl_xmit_flit_cnt[idx_from_vl(vl)] = 0;
3550		}
3551
3552		/* sw_port_vl_congestion is 0 for HFIs */
3553		if (counter_select & CS_PORT_RCV_FECN)
3554			write_dev_cntr(dd, C_DC_RCV_FCN_VL, idx_from_vl(vl), 0);
3555
3556		if (counter_select & CS_PORT_RCV_BECN)
3557			write_dev_cntr(dd, C_DC_RCV_BCN_VL, idx_from_vl(vl), 0);
3558
3559		/* port_vl_xmit_time_cong is 0 for HFIs */
3560		/* port_vl_xmit_wasted_bw ??? */
3561		/* port_vl_xmit_wait_data - TXE (table 13-9 HFI spec) ??? */
3562		if (counter_select & CS_PORT_RCV_BUBBLE)
3563			write_dev_cntr(dd, C_DC_RCV_BBL_VL, idx_from_vl(vl), 0);
3564
3565		/* if (counter_select & CS_PORT_MARK_FECN)
3566		 *     write_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT + offset, 0);
3567		 */
3568		if (counter_select & C_SW_XMIT_DSCD_VL)
3569			write_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
3570					idx_from_vl(vl), 0);
3571	}
3572
3573	if (resp_len)
3574		*resp_len += sizeof(*req);
3575
3576	return reply((struct ib_mad_hdr *)pmp);
3577}
3578
3579static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
3580				 struct ib_device *ibdev,
3581				 u32 port, u32 *resp_len)
3582{
3583	struct _port_ei *rsp;
3584	struct opa_port_error_info_msg *req;
3585	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3586	u64 port_mask;
3587	u32 num_ports;
3588	u32 port_num;
3589	u8 num_pslm;
3590	u32 error_info_select;
3591
3592	req = (struct opa_port_error_info_msg *)pmp->data;
3593	rsp = &req->port;
3594
3595	num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
3596	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
3597
3598	memset(rsp, 0, sizeof(*rsp));
3599
3600	if (num_ports != 1 || num_ports != num_pslm) {
3601		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3602		return reply((struct ib_mad_hdr *)pmp);
3603	}
3604
3605	/*
3606	 * The bit set in the mask needs to be consistent with the port
3607	 * the request came in on.
3608	 */
3609	port_mask = be64_to_cpu(req->port_select_mask[3]);
3610	port_num = find_first_bit((unsigned long *)&port_mask,
3611				  sizeof(port_mask) * 8);
3612
3613	if (port_num != port) {
3614		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3615		return reply((struct ib_mad_hdr *)pmp);
3616	}
3617
3618	error_info_select = be32_to_cpu(req->error_info_select_mask);
3619
3620	/* PortRcvErrorInfo */
3621	if (error_info_select & ES_PORT_RCV_ERROR_INFO)
3622		/* turn off status bit */
3623		dd->err_info_rcvport.status_and_code &= ~OPA_EI_STATUS_SMASK;
3624
3625	/* ExcessiverBufferOverrunInfo */
3626	if (error_info_select & ES_EXCESSIVE_BUFFER_OVERRUN_INFO)
3627		/*
3628		 * status bit is essentially kept in the h/w - bit 5 of
3629		 * RCV_ERR_INFO
3630		 */
3631		write_csr(dd, RCV_ERR_INFO,
3632			  RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
3633
3634	if (error_info_select & ES_PORT_XMIT_CONSTRAINT_ERROR_INFO)
3635		dd->err_info_xmit_constraint.status &= ~OPA_EI_STATUS_SMASK;
3636
3637	if (error_info_select & ES_PORT_RCV_CONSTRAINT_ERROR_INFO)
3638		dd->err_info_rcv_constraint.status &= ~OPA_EI_STATUS_SMASK;
3639
3640	/* UncorrectableErrorInfo */
3641	if (error_info_select & ES_UNCORRECTABLE_ERROR_INFO)
3642		/* turn off status bit */
3643		dd->err_info_uncorrectable &= ~OPA_EI_STATUS_SMASK;
3644
3645	/* FMConfigErrorInfo */
3646	if (error_info_select & ES_FM_CONFIG_ERROR_INFO)
3647		/* turn off status bit */
3648		dd->err_info_fmconfig &= ~OPA_EI_STATUS_SMASK;
3649
3650	if (resp_len)
3651		*resp_len += sizeof(*req);
3652
3653	return reply((struct ib_mad_hdr *)pmp);
3654}
3655
3656struct opa_congestion_info_attr {
3657	__be16 congestion_info;
3658	u8 control_table_cap;	/* Multiple of 64 entry unit CCTs */
3659	u8 congestion_log_length;
3660} __packed;
3661
3662static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data,
3663				    struct ib_device *ibdev, u32 port,
3664				    u32 *resp_len, u32 max_len)
3665{
3666	struct opa_congestion_info_attr *p =
3667		(struct opa_congestion_info_attr *)data;
3668	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3669	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3670
3671	if (smp_length_check(sizeof(*p), max_len)) {
3672		smp->status |= IB_SMP_INVALID_FIELD;
3673		return reply((struct ib_mad_hdr *)smp);
3674	}
3675
3676	p->congestion_info = 0;
3677	p->control_table_cap = ppd->cc_max_table_entries;
3678	p->congestion_log_length = OPA_CONG_LOG_ELEMS;
3679
3680	if (resp_len)
3681		*resp_len += sizeof(*p);
3682
3683	return reply((struct ib_mad_hdr *)smp);
3684}
3685
3686static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am,
3687				       u8 *data, struct ib_device *ibdev,
3688				       u32 port, u32 *resp_len, u32 max_len)
3689{
3690	int i;
3691	struct opa_congestion_setting_attr *p =
3692		(struct opa_congestion_setting_attr *)data;
3693	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3694	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3695	struct opa_congestion_setting_entry_shadow *entries;
3696	struct cc_state *cc_state;
3697
3698	if (smp_length_check(sizeof(*p), max_len)) {
3699		smp->status |= IB_SMP_INVALID_FIELD;
3700		return reply((struct ib_mad_hdr *)smp);
3701	}
3702
3703	rcu_read_lock();
3704
3705	cc_state = get_cc_state(ppd);
3706
3707	if (!cc_state) {
3708		rcu_read_unlock();
3709		return reply((struct ib_mad_hdr *)smp);
3710	}
3711
3712	entries = cc_state->cong_setting.entries;
3713	p->port_control = cpu_to_be16(cc_state->cong_setting.port_control);
3714	p->control_map = cpu_to_be32(cc_state->cong_setting.control_map);
3715	for (i = 0; i < OPA_MAX_SLS; i++) {
3716		p->entries[i].ccti_increase = entries[i].ccti_increase;
3717		p->entries[i].ccti_timer = cpu_to_be16(entries[i].ccti_timer);
3718		p->entries[i].trigger_threshold =
3719			entries[i].trigger_threshold;
3720		p->entries[i].ccti_min = entries[i].ccti_min;
3721	}
3722
3723	rcu_read_unlock();
3724
3725	if (resp_len)
3726		*resp_len += sizeof(*p);
3727
3728	return reply((struct ib_mad_hdr *)smp);
3729}
3730
3731/*
3732 * Apply congestion control information stored in the ppd to the
3733 * active structure.
3734 */
3735static void apply_cc_state(struct hfi1_pportdata *ppd)
3736{
3737	struct cc_state *old_cc_state, *new_cc_state;
3738
3739	new_cc_state = kzalloc(sizeof(*new_cc_state), GFP_KERNEL);
3740	if (!new_cc_state)
3741		return;
3742
3743	/*
3744	 * Hold the lock for updating *and* to prevent ppd information
3745	 * from changing during the update.
3746	 */
3747	spin_lock(&ppd->cc_state_lock);
3748
3749	old_cc_state = get_cc_state_protected(ppd);
3750	if (!old_cc_state) {
3751		/* never active, or shutting down */
3752		spin_unlock(&ppd->cc_state_lock);
3753		kfree(new_cc_state);
3754		return;
3755	}
3756
3757	*new_cc_state = *old_cc_state;
3758
3759	if (ppd->total_cct_entry)
3760		new_cc_state->cct.ccti_limit = ppd->total_cct_entry - 1;
3761	else
3762		new_cc_state->cct.ccti_limit = 0;
3763
3764	memcpy(new_cc_state->cct.entries, ppd->ccti_entries,
3765	       ppd->total_cct_entry * sizeof(struct ib_cc_table_entry));
3766
3767	new_cc_state->cong_setting.port_control = IB_CC_CCS_PC_SL_BASED;
3768	new_cc_state->cong_setting.control_map = ppd->cc_sl_control_map;
3769	memcpy(new_cc_state->cong_setting.entries, ppd->congestion_entries,
3770	       OPA_MAX_SLS * sizeof(struct opa_congestion_setting_entry));
3771
3772	rcu_assign_pointer(ppd->cc_state, new_cc_state);
3773
3774	spin_unlock(&ppd->cc_state_lock);
3775
3776	kfree_rcu(old_cc_state, rcu);
3777}
3778
3779static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
3780				       struct ib_device *ibdev, u32 port,
3781				       u32 *resp_len, u32 max_len)
3782{
3783	struct opa_congestion_setting_attr *p =
3784		(struct opa_congestion_setting_attr *)data;
3785	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3786	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3787	struct opa_congestion_setting_entry_shadow *entries;
3788	int i;
3789
3790	if (smp_length_check(sizeof(*p), max_len)) {
3791		smp->status |= IB_SMP_INVALID_FIELD;
3792		return reply((struct ib_mad_hdr *)smp);
3793	}
3794
3795	/*
3796	 * Save details from packet into the ppd.  Hold the cc_state_lock so
3797	 * our information is consistent with anyone trying to apply the state.
3798	 */
3799	spin_lock(&ppd->cc_state_lock);
3800	ppd->cc_sl_control_map = be32_to_cpu(p->control_map);
3801
3802	entries = ppd->congestion_entries;
3803	for (i = 0; i < OPA_MAX_SLS; i++) {
3804		entries[i].ccti_increase = p->entries[i].ccti_increase;
3805		entries[i].ccti_timer = be16_to_cpu(p->entries[i].ccti_timer);
3806		entries[i].trigger_threshold =
3807			p->entries[i].trigger_threshold;
3808		entries[i].ccti_min = p->entries[i].ccti_min;
3809	}
3810	spin_unlock(&ppd->cc_state_lock);
3811
3812	/* now apply the information */
3813	apply_cc_state(ppd);
3814
3815	return __subn_get_opa_cong_setting(smp, am, data, ibdev, port,
3816					   resp_len, max_len);
3817}
3818
3819static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am,
3820					u8 *data, struct ib_device *ibdev,
3821					u32 port, u32 *resp_len, u32 max_len)
3822{
3823	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3824	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3825	struct opa_hfi1_cong_log *cong_log = (struct opa_hfi1_cong_log *)data;
3826	u64 ts;
3827	int i;
3828
3829	if (am || smp_length_check(sizeof(*cong_log), max_len)) {
3830		smp->status |= IB_SMP_INVALID_FIELD;
3831		return reply((struct ib_mad_hdr *)smp);
3832	}
3833
3834	spin_lock_irq(&ppd->cc_log_lock);
3835
3836	cong_log->log_type = OPA_CC_LOG_TYPE_HFI;
3837	cong_log->congestion_flags = 0;
3838	cong_log->threshold_event_counter =
3839		cpu_to_be16(ppd->threshold_event_counter);
3840	memcpy(cong_log->threshold_cong_event_map,
3841	       ppd->threshold_cong_event_map,
3842	       sizeof(cong_log->threshold_cong_event_map));
3843	/* keep timestamp in units of 1.024 usec */
3844	ts = ktime_get_ns() / 1024;
3845	cong_log->current_time_stamp = cpu_to_be32(ts);
3846	for (i = 0; i < OPA_CONG_LOG_ELEMS; i++) {
3847		struct opa_hfi1_cong_log_event_internal *cce =
3848			&ppd->cc_events[ppd->cc_mad_idx++];
3849		if (ppd->cc_mad_idx == OPA_CONG_LOG_ELEMS)
3850			ppd->cc_mad_idx = 0;
3851		/*
3852		 * Entries which are older than twice the time
3853		 * required to wrap the counter are supposed to
3854		 * be zeroed (CA10-49 IBTA, release 1.2.1, V1).
3855		 */
3856		if ((ts - cce->timestamp) / 2 > U32_MAX)
3857			continue;
3858		memcpy(cong_log->events[i].local_qp_cn_entry, &cce->lqpn, 3);
3859		memcpy(cong_log->events[i].remote_qp_number_cn_entry,
3860		       &cce->rqpn, 3);
3861		cong_log->events[i].sl_svc_type_cn_entry =
3862			((cce->sl & 0x1f) << 3) | (cce->svc_type & 0x7);
3863		cong_log->events[i].remote_lid_cn_entry =
3864			cpu_to_be32(cce->rlid);
3865		cong_log->events[i].timestamp_cn_entry =
3866			cpu_to_be32(cce->timestamp);
3867	}
3868
3869	/*
3870	 * Reset threshold_cong_event_map, and threshold_event_counter
3871	 * to 0 when log is read.
3872	 */
3873	memset(ppd->threshold_cong_event_map, 0x0,
3874	       sizeof(ppd->threshold_cong_event_map));
3875	ppd->threshold_event_counter = 0;
3876
3877	spin_unlock_irq(&ppd->cc_log_lock);
3878
3879	if (resp_len)
3880		*resp_len += sizeof(struct opa_hfi1_cong_log);
3881
3882	return reply((struct ib_mad_hdr *)smp);
3883}
3884
3885static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
3886				   struct ib_device *ibdev, u32 port,
3887				   u32 *resp_len, u32 max_len)
3888{
3889	struct ib_cc_table_attr *cc_table_attr =
3890		(struct ib_cc_table_attr *)data;
3891	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3892	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3893	u32 start_block = OPA_AM_START_BLK(am);
3894	u32 n_blocks = OPA_AM_NBLK(am);
3895	struct ib_cc_table_entry_shadow *entries;
3896	int i, j;
3897	u32 sentry, eentry;
3898	struct cc_state *cc_state;
3899	u32 size = sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
3900
3901	/* sanity check n_blocks, start_block */
3902	if (n_blocks == 0 || smp_length_check(size, max_len) ||
3903	    start_block + n_blocks > ppd->cc_max_table_entries) {
3904		smp->status |= IB_SMP_INVALID_FIELD;
3905		return reply((struct ib_mad_hdr *)smp);
3906	}
3907
3908	rcu_read_lock();
3909
3910	cc_state = get_cc_state(ppd);
3911
3912	if (!cc_state) {
3913		rcu_read_unlock();
3914		return reply((struct ib_mad_hdr *)smp);
3915	}
3916
3917	sentry = start_block * IB_CCT_ENTRIES;
3918	eentry = sentry + (IB_CCT_ENTRIES * n_blocks);
3919
3920	cc_table_attr->ccti_limit = cpu_to_be16(cc_state->cct.ccti_limit);
3921
3922	entries = cc_state->cct.entries;
3923
3924	/* return n_blocks, though the last block may not be full */
3925	for (j = 0, i = sentry; i < eentry; j++, i++)
3926		cc_table_attr->ccti_entries[j].entry =
3927			cpu_to_be16(entries[i].entry);
3928
3929	rcu_read_unlock();
3930
3931	if (resp_len)
3932		*resp_len += size;
3933
3934	return reply((struct ib_mad_hdr *)smp);
3935}
3936
3937static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
3938				   struct ib_device *ibdev, u32 port,
3939				   u32 *resp_len, u32 max_len)
3940{
3941	struct ib_cc_table_attr *p = (struct ib_cc_table_attr *)data;
3942	struct hfi1_ibport *ibp = to_iport(ibdev, port);
3943	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3944	u32 start_block = OPA_AM_START_BLK(am);
3945	u32 n_blocks = OPA_AM_NBLK(am);
3946	struct ib_cc_table_entry_shadow *entries;
3947	int i, j;
3948	u32 sentry, eentry;
3949	u16 ccti_limit;
3950	u32 size = sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
3951
3952	/* sanity check n_blocks, start_block */
3953	if (n_blocks == 0 || smp_length_check(size, max_len) ||
3954	    start_block + n_blocks > ppd->cc_max_table_entries) {
3955		smp->status |= IB_SMP_INVALID_FIELD;
3956		return reply((struct ib_mad_hdr *)smp);
3957	}
3958
3959	sentry = start_block * IB_CCT_ENTRIES;
3960	eentry = sentry + ((n_blocks - 1) * IB_CCT_ENTRIES) +
3961		 (be16_to_cpu(p->ccti_limit)) % IB_CCT_ENTRIES + 1;
3962
3963	/* sanity check ccti_limit */
3964	ccti_limit = be16_to_cpu(p->ccti_limit);
3965	if (ccti_limit + 1 > eentry) {
3966		smp->status |= IB_SMP_INVALID_FIELD;
3967		return reply((struct ib_mad_hdr *)smp);
3968	}
3969
3970	/*
3971	 * Save details from packet into the ppd.  Hold the cc_state_lock so
3972	 * our information is consistent with anyone trying to apply the state.
3973	 */
3974	spin_lock(&ppd->cc_state_lock);
3975	ppd->total_cct_entry = ccti_limit + 1;
3976	entries = ppd->ccti_entries;
3977	for (j = 0, i = sentry; i < eentry; j++, i++)
3978		entries[i].entry = be16_to_cpu(p->ccti_entries[j].entry);
3979	spin_unlock(&ppd->cc_state_lock);
3980
3981	/* now apply the information */
3982	apply_cc_state(ppd);
3983
3984	return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len,
3985				       max_len);
3986}
3987
3988struct opa_led_info {
3989	__be32 rsvd_led_mask;
3990	__be32 rsvd;
3991};
3992
3993#define OPA_LED_SHIFT	31
3994#define OPA_LED_MASK	BIT(OPA_LED_SHIFT)
3995
3996static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
3997				   struct ib_device *ibdev, u32 port,
3998				   u32 *resp_len, u32 max_len)
3999{
4000	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
4001	struct hfi1_pportdata *ppd = dd->pport;
4002	struct opa_led_info *p = (struct opa_led_info *)data;
4003	u32 nport = OPA_AM_NPORT(am);
4004	u32 is_beaconing_active;
4005
4006	if (nport != 1 || smp_length_check(sizeof(*p), max_len)) {
4007		smp->status |= IB_SMP_INVALID_FIELD;
4008		return reply((struct ib_mad_hdr *)smp);
4009	}
4010
4011	/*
4012	 * This pairs with the memory barrier in hfi1_start_led_override to
4013	 * ensure that we read the correct state of LED beaconing represented
4014	 * by led_override_timer_active
4015	 */
4016	smp_rmb();
4017	is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
4018	p->rsvd_led_mask = cpu_to_be32(is_beaconing_active << OPA_LED_SHIFT);
4019
4020	if (resp_len)
4021		*resp_len += sizeof(struct opa_led_info);
4022
4023	return reply((struct ib_mad_hdr *)smp);
4024}
4025
4026static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
4027				   struct ib_device *ibdev, u32 port,
4028				   u32 *resp_len, u32 max_len)
4029{
4030	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
4031	struct opa_led_info *p = (struct opa_led_info *)data;
4032	u32 nport = OPA_AM_NPORT(am);
4033	int on = !!(be32_to_cpu(p->rsvd_led_mask) & OPA_LED_MASK);
4034
4035	if (nport != 1 || smp_length_check(sizeof(*p), max_len)) {
4036		smp->status |= IB_SMP_INVALID_FIELD;
4037		return reply((struct ib_mad_hdr *)smp);
4038	}
4039
4040	if (on)
4041		hfi1_start_led_override(dd->pport, 2000, 1500);
4042	else
4043		shutdown_led_override(dd->pport);
4044
4045	return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len,
4046				       max_len);
4047}
4048
4049static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
4050			    u8 *data, struct ib_device *ibdev, u32 port,
4051			    u32 *resp_len, u32 max_len)
4052{
4053	int ret;
4054	struct hfi1_ibport *ibp = to_iport(ibdev, port);
4055
4056	switch (attr_id) {
4057	case IB_SMP_ATTR_NODE_DESC:
4058		ret = __subn_get_opa_nodedesc(smp, am, data, ibdev, port,
4059					      resp_len, max_len);
4060		break;
4061	case IB_SMP_ATTR_NODE_INFO:
4062		ret = __subn_get_opa_nodeinfo(smp, am, data, ibdev, port,
4063					      resp_len, max_len);
4064		break;
4065	case IB_SMP_ATTR_PORT_INFO:
4066		ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port,
4067					      resp_len, max_len);
4068		break;
4069	case IB_SMP_ATTR_PKEY_TABLE:
4070		ret = __subn_get_opa_pkeytable(smp, am, data, ibdev, port,
4071					       resp_len, max_len);
4072		break;
4073	case OPA_ATTRIB_ID_SL_TO_SC_MAP:
4074		ret = __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port,
4075					      resp_len, max_len);
4076		break;
4077	case OPA_ATTRIB_ID_SC_TO_SL_MAP:
4078		ret = __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port,
4079					      resp_len, max_len);
4080		break;
4081	case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
4082		ret = __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port,
4083					       resp_len, max_len);
4084		break;
4085	case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
4086		ret = __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
4087						resp_len, max_len);
4088		break;
4089	case OPA_ATTRIB_ID_PORT_STATE_INFO:
4090		ret = __subn_get_opa_psi(smp, am, data, ibdev, port,
4091					 resp_len, max_len);
4092		break;
4093	case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
4094		ret = __subn_get_opa_bct(smp, am, data, ibdev, port,
4095					 resp_len, max_len);
4096		break;
4097	case OPA_ATTRIB_ID_CABLE_INFO:
4098		ret = __subn_get_opa_cable_info(smp, am, data, ibdev, port,
4099						resp_len, max_len);
4100		break;
4101	case IB_SMP_ATTR_VL_ARB_TABLE:
4102		ret = __subn_get_opa_vl_arb(smp, am, data, ibdev, port,
4103					    resp_len, max_len);
4104		break;
4105	case OPA_ATTRIB_ID_CONGESTION_INFO:
4106		ret = __subn_get_opa_cong_info(smp, am, data, ibdev, port,
4107					       resp_len, max_len);
4108		break;
4109	case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
4110		ret = __subn_get_opa_cong_setting(smp, am, data, ibdev,
4111						  port, resp_len, max_len);
4112		break;
4113	case OPA_ATTRIB_ID_HFI_CONGESTION_LOG:
4114		ret = __subn_get_opa_hfi1_cong_log(smp, am, data, ibdev,
4115						   port, resp_len, max_len);
4116		break;
4117	case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
4118		ret = __subn_get_opa_cc_table(smp, am, data, ibdev, port,
4119					      resp_len, max_len);
4120		break;
4121	case IB_SMP_ATTR_LED_INFO:
4122		ret = __subn_get_opa_led_info(smp, am, data, ibdev, port,
4123					      resp_len, max_len);
4124		break;
4125	case IB_SMP_ATTR_SM_INFO:
4126		if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
4127			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
4128		if (ibp->rvp.port_cap_flags & IB_PORT_SM)
4129			return IB_MAD_RESULT_SUCCESS;
4130		fallthrough;
4131	default:
4132		smp->status |= IB_SMP_UNSUP_METH_ATTR;
4133		ret = reply((struct ib_mad_hdr *)smp);
4134		break;
4135	}
4136	return ret;
4137}
4138
4139static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
4140			    u8 *data, struct ib_device *ibdev, u32 port,
4141			    u32 *resp_len, u32 max_len, int local_mad)
4142{
4143	int ret;
4144	struct hfi1_ibport *ibp = to_iport(ibdev, port);
4145
4146	switch (attr_id) {
4147	case IB_SMP_ATTR_PORT_INFO:
4148		ret = __subn_set_opa_portinfo(smp, am, data, ibdev, port,
4149					      resp_len, max_len, local_mad);
4150		break;
4151	case IB_SMP_ATTR_PKEY_TABLE:
4152		ret = __subn_set_opa_pkeytable(smp, am, data, ibdev, port,
4153					       resp_len, max_len);
4154		break;
4155	case OPA_ATTRIB_ID_SL_TO_SC_MAP:
4156		ret = __subn_set_opa_sl_to_sc(smp, am, data, ibdev, port,
4157					      resp_len, max_len);
4158		break;
4159	case OPA_ATTRIB_ID_SC_TO_SL_MAP:
4160		ret = __subn_set_opa_sc_to_sl(smp, am, data, ibdev, port,
4161					      resp_len, max_len);
4162		break;
4163	case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
4164		ret = __subn_set_opa_sc_to_vlt(smp, am, data, ibdev, port,
4165					       resp_len, max_len);
4166		break;
4167	case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
4168		ret = __subn_set_opa_sc_to_vlnt(smp, am, data, ibdev, port,
4169						resp_len, max_len);
4170		break;
4171	case OPA_ATTRIB_ID_PORT_STATE_INFO:
4172		ret = __subn_set_opa_psi(smp, am, data, ibdev, port,
4173					 resp_len, max_len, local_mad);
4174		break;
4175	case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
4176		ret = __subn_set_opa_bct(smp, am, data, ibdev, port,
4177					 resp_len, max_len);
4178		break;
4179	case IB_SMP_ATTR_VL_ARB_TABLE:
4180		ret = __subn_set_opa_vl_arb(smp, am, data, ibdev, port,
4181					    resp_len, max_len);
4182		break;
4183	case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
4184		ret = __subn_set_opa_cong_setting(smp, am, data, ibdev,
4185						  port, resp_len, max_len);
4186		break;
4187	case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
4188		ret = __subn_set_opa_cc_table(smp, am, data, ibdev, port,
4189					      resp_len, max_len);
4190		break;
4191	case IB_SMP_ATTR_LED_INFO:
4192		ret = __subn_set_opa_led_info(smp, am, data, ibdev, port,
4193					      resp_len, max_len);
4194		break;
4195	case IB_SMP_ATTR_SM_INFO:
4196		if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
4197			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
4198		if (ibp->rvp.port_cap_flags & IB_PORT_SM)
4199			return IB_MAD_RESULT_SUCCESS;
4200		fallthrough;
4201	default:
4202		smp->status |= IB_SMP_UNSUP_METH_ATTR;
4203		ret = reply((struct ib_mad_hdr *)smp);
4204		break;
4205	}
4206	return ret;
4207}
4208
4209static inline void set_aggr_error(struct opa_aggregate *ag)
4210{
4211	ag->err_reqlength |= cpu_to_be16(0x8000);
4212}
4213
4214static int subn_get_opa_aggregate(struct opa_smp *smp,
4215				  struct ib_device *ibdev, u32 port,
4216				  u32 *resp_len)
4217{
4218	int i;
4219	u32 num_attr = be32_to_cpu(smp->attr_mod) & 0x000000ff;
4220	u8 *next_smp = opa_get_smp_data(smp);
4221
4222	if (num_attr < 1 || num_attr > 117) {
4223		smp->status |= IB_SMP_INVALID_FIELD;
4224		return reply((struct ib_mad_hdr *)smp);
4225	}
4226
4227	for (i = 0; i < num_attr; i++) {
4228		struct opa_aggregate *agg;
4229		size_t agg_data_len;
4230		size_t agg_size;
4231		u32 am;
4232
4233		agg = (struct opa_aggregate *)next_smp;
4234		agg_data_len = (be16_to_cpu(agg->err_reqlength) & 0x007f) * 8;
4235		agg_size = sizeof(*agg) + agg_data_len;
4236		am = be32_to_cpu(agg->attr_mod);
4237
4238		*resp_len += agg_size;
4239
4240		if (next_smp + agg_size > ((u8 *)smp) + sizeof(*smp)) {
4241			smp->status |= IB_SMP_INVALID_FIELD;
4242			return reply((struct ib_mad_hdr *)smp);
4243		}
4244
4245		/* zero the payload for this segment */
4246		memset(next_smp + sizeof(*agg), 0, agg_data_len);
4247
4248		(void)subn_get_opa_sma(agg->attr_id, smp, am, agg->data,
4249				       ibdev, port, NULL, (u32)agg_data_len);
4250
4251		if (smp->status & IB_SMP_INVALID_FIELD)
4252			break;
4253		if (smp->status & ~IB_SMP_DIRECTION) {
4254			set_aggr_error(agg);
4255			return reply((struct ib_mad_hdr *)smp);
4256		}
4257		next_smp += agg_size;
4258	}
4259
4260	return reply((struct ib_mad_hdr *)smp);
4261}
4262
4263static int subn_set_opa_aggregate(struct opa_smp *smp,
4264				  struct ib_device *ibdev, u32 port,
4265				  u32 *resp_len, int local_mad)
4266{
4267	int i;
4268	u32 num_attr = be32_to_cpu(smp->attr_mod) & 0x000000ff;
4269	u8 *next_smp = opa_get_smp_data(smp);
4270
4271	if (num_attr < 1 || num_attr > 117) {
4272		smp->status |= IB_SMP_INVALID_FIELD;
4273		return reply((struct ib_mad_hdr *)smp);
4274	}
4275
4276	for (i = 0; i < num_attr; i++) {
4277		struct opa_aggregate *agg;
4278		size_t agg_data_len;
4279		size_t agg_size;
4280		u32 am;
4281
4282		agg = (struct opa_aggregate *)next_smp;
4283		agg_data_len = (be16_to_cpu(agg->err_reqlength) & 0x007f) * 8;
4284		agg_size = sizeof(*agg) + agg_data_len;
4285		am = be32_to_cpu(agg->attr_mod);
4286
4287		*resp_len += agg_size;
4288
4289		if (next_smp + agg_size > ((u8 *)smp) + sizeof(*smp)) {
4290			smp->status |= IB_SMP_INVALID_FIELD;
4291			return reply((struct ib_mad_hdr *)smp);
4292		}
4293
4294		(void)subn_set_opa_sma(agg->attr_id, smp, am, agg->data,
4295				       ibdev, port, NULL, (u32)agg_data_len,
4296				       local_mad);
4297
4298		if (smp->status & IB_SMP_INVALID_FIELD)
4299			break;
4300		if (smp->status & ~IB_SMP_DIRECTION) {
4301			set_aggr_error(agg);
4302			return reply((struct ib_mad_hdr *)smp);
4303		}
4304		next_smp += agg_size;
4305	}
4306
4307	return reply((struct ib_mad_hdr *)smp);
4308}
4309
4310/*
4311 * OPAv1 specifies that, on the transition to link up, these counters
4312 * are cleared:
4313 *   PortRcvErrors [*]
4314 *   LinkErrorRecovery
4315 *   LocalLinkIntegrityErrors
4316 *   ExcessiveBufferOverruns [*]
4317 *
4318 * [*] Error info associated with these counters is retained, but the
4319 * error info status is reset to 0.
4320 */
4321void clear_linkup_counters(struct hfi1_devdata *dd)
4322{
4323	/* PortRcvErrors */
4324	write_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL, 0);
4325	dd->err_info_rcvport.status_and_code &= ~OPA_EI_STATUS_SMASK;
4326	/* LinkErrorRecovery */
4327	write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
4328	write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL, 0);
4329	/* LocalLinkIntegrityErrors */
4330	write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0);
4331	/* ExcessiveBufferOverruns */
4332	write_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL, 0);
4333	dd->rcv_ovfl_cnt = 0;
4334	dd->err_info_xmit_constraint.status &= ~OPA_EI_STATUS_SMASK;
4335}
4336
4337static int is_full_mgmt_pkey_in_table(struct hfi1_ibport *ibp)
4338{
4339	unsigned int i;
4340	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
4341
4342	for (i = 0; i < ARRAY_SIZE(ppd->pkeys); ++i)
4343		if (ppd->pkeys[i] == FULL_MGMT_P_KEY)
4344			return 1;
4345
4346	return 0;
4347}
4348
4349/*
4350 * is_local_mad() returns 1 if 'mad' is sent from, and destined to the
4351 * local node, 0 otherwise.
4352 */
4353static int is_local_mad(struct hfi1_ibport *ibp, const struct opa_mad *mad,
4354			const struct ib_wc *in_wc)
4355{
4356	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
4357	const struct opa_smp *smp = (const struct opa_smp *)mad;
4358
4359	if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
4360		return (smp->hop_cnt == 0 &&
4361			smp->route.dr.dr_slid == OPA_LID_PERMISSIVE &&
4362			smp->route.dr.dr_dlid == OPA_LID_PERMISSIVE);
4363	}
4364
4365	return (in_wc->slid == ppd->lid);
4366}
4367
4368/*
4369 * opa_local_smp_check() should only be called on MADs for which
4370 * is_local_mad() returns true. It applies the SMP checks that are
4371 * specific to SMPs which are sent from, and destined to this node.
4372 * opa_local_smp_check() returns 0 if the SMP passes its checks, 1
4373 * otherwise.
4374 *
4375 * SMPs which arrive from other nodes are instead checked by
4376 * opa_smp_check().
4377 */
4378static int opa_local_smp_check(struct hfi1_ibport *ibp,
4379			       const struct ib_wc *in_wc)
4380{
4381	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
4382	u16 pkey;
4383
4384	if (in_wc->pkey_index >= ARRAY_SIZE(ppd->pkeys))
4385		return 1;
4386
4387	pkey = ppd->pkeys[in_wc->pkey_index];
4388	/*
4389	 * We need to do the "node-local" checks specified in OPAv1,
4390	 * rev 0.90, section 9.10.26, which are:
4391	 *   - pkey is 0x7fff, or 0xffff
4392	 *   - Source QPN == 0 || Destination QPN == 0
4393	 *   - the MAD header's management class is either
4394	 *     IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE or
4395	 *     IB_MGMT_CLASS_SUBN_LID_ROUTED
4396	 *   - SLID != 0
4397	 *
4398	 * However, we know (and so don't need to check again) that,
4399	 * for local SMPs, the MAD stack passes MADs with:
4400	 *   - Source QPN of 0
4401	 *   - MAD mgmt_class is IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
4402	 *   - SLID is either: OPA_LID_PERMISSIVE (0xFFFFFFFF), or
4403	 *     our own port's lid
4404	 *
4405	 */
4406	if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY)
4407		return 0;
4408	ingress_pkey_table_fail(ppd, pkey, in_wc->slid);
4409	return 1;
4410}
4411
4412/**
4413 * hfi1_pkey_validation_pma - It validates PKEYs for incoming PMA MAD packets.
4414 * @ibp: IB port data
4415 * @in_mad: MAD packet with header and data
4416 * @in_wc: Work completion data such as source LID, port number, etc.
4417 *
4418 * These are all the possible logic rules for validating a pkey:
4419 *
4420 * a) If pkey neither FULL_MGMT_P_KEY nor LIM_MGMT_P_KEY,
4421 *    and NOT self-originated packet:
4422 *     Drop MAD packet as it should always be part of the
4423 *     management partition unless it's a self-originated packet.
4424 *
4425 * b) If pkey_index -> FULL_MGMT_P_KEY, and LIM_MGMT_P_KEY in pkey table:
4426 *     The packet is coming from a management node and the receiving node
4427 *     is also a management node, so it is safe for the packet to go through.
4428 *
4429 * c) If pkey_index -> FULL_MGMT_P_KEY, and LIM_MGMT_P_KEY is NOT in pkey table:
4430 *     Drop the packet as LIM_MGMT_P_KEY should always be in the pkey table.
4431 *     It could be an FM misconfiguration.
4432 *
4433 * d) If pkey_index -> LIM_MGMT_P_KEY and FULL_MGMT_P_KEY is NOT in pkey table:
4434 *     It is safe for the packet to go through since a non-management node is
4435 *     talking to another non-management node.
4436 *
4437 * e) If pkey_index -> LIM_MGMT_P_KEY and FULL_MGMT_P_KEY in pkey table:
4438 *     Drop the packet because a non-management node is talking to a
4439 *     management node, and it could be an attack.
4440 *
4441 * For the implementation, these rules can be simplied to only checking
4442 * for (a) and (e). There's no need to check for rule (b) as
4443 * the packet doesn't need to be dropped. Rule (c) is not possible in
4444 * the driver as LIM_MGMT_P_KEY is always in the pkey table.
4445 *
4446 * Return:
4447 * 0 - pkey is okay, -EINVAL it's a bad pkey
4448 */
4449static int hfi1_pkey_validation_pma(struct hfi1_ibport *ibp,
4450				    const struct opa_mad *in_mad,
4451				    const struct ib_wc *in_wc)
4452{
4453	u16 pkey_value = hfi1_lookup_pkey_value(ibp, in_wc->pkey_index);
4454
4455	/* Rule (a) from above */
4456	if (!is_local_mad(ibp, in_mad, in_wc) &&
4457	    pkey_value != LIM_MGMT_P_KEY &&
4458	    pkey_value != FULL_MGMT_P_KEY)
4459		return -EINVAL;
4460
4461	/* Rule (e) from above */
4462	if (pkey_value == LIM_MGMT_P_KEY &&
4463	    is_full_mgmt_pkey_in_table(ibp))
4464		return -EINVAL;
4465
4466	return 0;
4467}
4468
4469static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
4470			    u32 port, const struct opa_mad *in_mad,
4471			    struct opa_mad *out_mad,
4472			    u32 *resp_len, int local_mad)
4473{
4474	struct opa_smp *smp = (struct opa_smp *)out_mad;
4475	struct hfi1_ibport *ibp = to_iport(ibdev, port);
4476	u8 *data;
4477	u32 am, data_size;
4478	__be16 attr_id;
4479	int ret;
4480
4481	*out_mad = *in_mad;
4482	data = opa_get_smp_data(smp);
4483	data_size = (u32)opa_get_smp_data_size(smp);
4484
4485	am = be32_to_cpu(smp->attr_mod);
4486	attr_id = smp->attr_id;
4487	if (smp->class_version != OPA_SM_CLASS_VERSION) {
4488		smp->status |= IB_SMP_UNSUP_VERSION;
4489		ret = reply((struct ib_mad_hdr *)smp);
4490		return ret;
4491	}
4492	ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags, smp->mkey,
4493			 smp->route.dr.dr_slid, smp->route.dr.return_path,
4494			 smp->hop_cnt);
4495	if (ret) {
4496		u32 port_num = be32_to_cpu(smp->attr_mod);
4497
4498		/*
4499		 * If this is a get/set portinfo, we already check the
4500		 * M_Key if the MAD is for another port and the M_Key
4501		 * is OK on the receiving port. This check is needed
4502		 * to increment the error counters when the M_Key
4503		 * fails to match on *both* ports.
4504		 */
4505		if (attr_id == IB_SMP_ATTR_PORT_INFO &&
4506		    (smp->method == IB_MGMT_METHOD_GET ||
4507		     smp->method == IB_MGMT_METHOD_SET) &&
4508		    port_num && port_num <= ibdev->phys_port_cnt &&
4509		    port != port_num)
4510			(void)check_mkey(to_iport(ibdev, port_num),
4511					  (struct ib_mad_hdr *)smp, 0,
4512					  smp->mkey, smp->route.dr.dr_slid,
4513					  smp->route.dr.return_path,
4514					  smp->hop_cnt);
4515		ret = IB_MAD_RESULT_FAILURE;
4516		return ret;
4517	}
4518
4519	*resp_len = opa_get_smp_header_size(smp);
4520
4521	switch (smp->method) {
4522	case IB_MGMT_METHOD_GET:
4523		switch (attr_id) {
4524		default:
4525			clear_opa_smp_data(smp);
4526			ret = subn_get_opa_sma(attr_id, smp, am, data,
4527					       ibdev, port, resp_len,
4528					       data_size);
4529			break;
4530		case OPA_ATTRIB_ID_AGGREGATE:
4531			ret = subn_get_opa_aggregate(smp, ibdev, port,
4532						     resp_len);
4533			break;
4534		}
4535		break;
4536	case IB_MGMT_METHOD_SET:
4537		switch (attr_id) {
4538		default:
4539			ret = subn_set_opa_sma(attr_id, smp, am, data,
4540					       ibdev, port, resp_len,
4541					       data_size, local_mad);
4542			break;
4543		case OPA_ATTRIB_ID_AGGREGATE:
4544			ret = subn_set_opa_aggregate(smp, ibdev, port,
4545						     resp_len, local_mad);
4546			break;
4547		}
4548		break;
4549	case IB_MGMT_METHOD_TRAP:
4550	case IB_MGMT_METHOD_REPORT:
4551	case IB_MGMT_METHOD_REPORT_RESP:
4552	case IB_MGMT_METHOD_GET_RESP:
4553		/*
4554		 * The ib_mad module will call us to process responses
4555		 * before checking for other consumers.
4556		 * Just tell the caller to process it normally.
4557		 */
4558		ret = IB_MAD_RESULT_SUCCESS;
4559		break;
4560	case IB_MGMT_METHOD_TRAP_REPRESS:
4561		subn_handle_opa_trap_repress(ibp, smp);
4562		/* Always successful */
4563		ret = IB_MAD_RESULT_SUCCESS;
4564		break;
4565	default:
4566		smp->status |= IB_SMP_UNSUP_METHOD;
4567		ret = reply((struct ib_mad_hdr *)smp);
4568		break;
4569	}
4570
4571	return ret;
4572}
4573
4574static int process_subn(struct ib_device *ibdev, int mad_flags,
4575			u32 port, const struct ib_mad *in_mad,
4576			struct ib_mad *out_mad)
4577{
4578	struct ib_smp *smp = (struct ib_smp *)out_mad;
4579	struct hfi1_ibport *ibp = to_iport(ibdev, port);
4580	int ret;
4581
4582	*out_mad = *in_mad;
4583	if (smp->class_version != 1) {
4584		smp->status |= IB_SMP_UNSUP_VERSION;
4585		ret = reply((struct ib_mad_hdr *)smp);
4586		return ret;
4587	}
4588
4589	ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags,
4590			 smp->mkey, (__force __be32)smp->dr_slid,
4591			 smp->return_path, smp->hop_cnt);
4592	if (ret) {
4593		u32 port_num = be32_to_cpu(smp->attr_mod);
4594
4595		/*
4596		 * If this is a get/set portinfo, we already check the
4597		 * M_Key if the MAD is for another port and the M_Key
4598		 * is OK on the receiving port. This check is needed
4599		 * to increment the error counters when the M_Key
4600		 * fails to match on *both* ports.
4601		 */
4602		if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO &&
4603		    (smp->method == IB_MGMT_METHOD_GET ||
4604		     smp->method == IB_MGMT_METHOD_SET) &&
4605		    port_num && port_num <= ibdev->phys_port_cnt &&
4606		    port != port_num)
4607			(void)check_mkey(to_iport(ibdev, port_num),
4608					 (struct ib_mad_hdr *)smp, 0,
4609					 smp->mkey,
4610					 (__force __be32)smp->dr_slid,
4611					 smp->return_path, smp->hop_cnt);
4612		ret = IB_MAD_RESULT_FAILURE;
4613		return ret;
4614	}
4615
4616	switch (smp->method) {
4617	case IB_MGMT_METHOD_GET:
4618		switch (smp->attr_id) {
4619		case IB_SMP_ATTR_NODE_INFO:
4620			ret = subn_get_nodeinfo(smp, ibdev, port);
4621			break;
4622		default:
4623			smp->status |= IB_SMP_UNSUP_METH_ATTR;
4624			ret = reply((struct ib_mad_hdr *)smp);
4625			break;
4626		}
4627		break;
4628	}
4629
4630	return ret;
4631}
4632
4633static int process_perf(struct ib_device *ibdev, u32 port,
4634			const struct ib_mad *in_mad,
4635			struct ib_mad *out_mad)
4636{
4637	struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad;
4638	struct ib_class_port_info *cpi = (struct ib_class_port_info *)
4639						&pmp->data;
4640	int ret = IB_MAD_RESULT_FAILURE;
4641
4642	*out_mad = *in_mad;
4643	if (pmp->mad_hdr.class_version != 1) {
4644		pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
4645		ret = reply((struct ib_mad_hdr *)pmp);
4646		return ret;
4647	}
4648
4649	switch (pmp->mad_hdr.method) {
4650	case IB_MGMT_METHOD_GET:
4651		switch (pmp->mad_hdr.attr_id) {
4652		case IB_PMA_PORT_COUNTERS:
4653			ret = pma_get_ib_portcounters(pmp, ibdev, port);
4654			break;
4655		case IB_PMA_PORT_COUNTERS_EXT:
4656			ret = pma_get_ib_portcounters_ext(pmp, ibdev, port);
4657			break;
4658		case IB_PMA_CLASS_PORT_INFO:
4659			cpi->capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
4660			ret = reply((struct ib_mad_hdr *)pmp);
4661			break;
4662		default:
4663			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4664			ret = reply((struct ib_mad_hdr *)pmp);
4665			break;
4666		}
4667		break;
4668
4669	case IB_MGMT_METHOD_SET:
4670		if (pmp->mad_hdr.attr_id) {
4671			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4672			ret = reply((struct ib_mad_hdr *)pmp);
4673		}
4674		break;
4675
4676	case IB_MGMT_METHOD_TRAP:
4677	case IB_MGMT_METHOD_GET_RESP:
4678		/*
4679		 * The ib_mad module will call us to process responses
4680		 * before checking for other consumers.
4681		 * Just tell the caller to process it normally.
4682		 */
4683		ret = IB_MAD_RESULT_SUCCESS;
4684		break;
4685
4686	default:
4687		pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
4688		ret = reply((struct ib_mad_hdr *)pmp);
4689		break;
4690	}
4691
4692	return ret;
4693}
4694
4695static int process_perf_opa(struct ib_device *ibdev, u32 port,
4696			    const struct opa_mad *in_mad,
4697			    struct opa_mad *out_mad, u32 *resp_len)
4698{
4699	struct opa_pma_mad *pmp = (struct opa_pma_mad *)out_mad;
4700	int ret;
4701
4702	*out_mad = *in_mad;
4703
4704	if (pmp->mad_hdr.class_version != OPA_SM_CLASS_VERSION) {
4705		pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
4706		return reply((struct ib_mad_hdr *)pmp);
4707	}
4708
4709	*resp_len = sizeof(pmp->mad_hdr);
4710
4711	switch (pmp->mad_hdr.method) {
4712	case IB_MGMT_METHOD_GET:
4713		switch (pmp->mad_hdr.attr_id) {
4714		case IB_PMA_CLASS_PORT_INFO:
4715			ret = pma_get_opa_classportinfo(pmp, ibdev, resp_len);
4716			break;
4717		case OPA_PM_ATTRIB_ID_PORT_STATUS:
4718			ret = pma_get_opa_portstatus(pmp, ibdev, port,
4719						     resp_len);
4720			break;
4721		case OPA_PM_ATTRIB_ID_DATA_PORT_COUNTERS:
4722			ret = pma_get_opa_datacounters(pmp, ibdev, port,
4723						       resp_len);
4724			break;
4725		case OPA_PM_ATTRIB_ID_ERROR_PORT_COUNTERS:
4726			ret = pma_get_opa_porterrors(pmp, ibdev, port,
4727						     resp_len);
4728			break;
4729		case OPA_PM_ATTRIB_ID_ERROR_INFO:
4730			ret = pma_get_opa_errorinfo(pmp, ibdev, port,
4731						    resp_len);
4732			break;
4733		default:
4734			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4735			ret = reply((struct ib_mad_hdr *)pmp);
4736			break;
4737		}
4738		break;
4739
4740	case IB_MGMT_METHOD_SET:
4741		switch (pmp->mad_hdr.attr_id) {
4742		case OPA_PM_ATTRIB_ID_CLEAR_PORT_STATUS:
4743			ret = pma_set_opa_portstatus(pmp, ibdev, port,
4744						     resp_len);
4745			break;
4746		case OPA_PM_ATTRIB_ID_ERROR_INFO:
4747			ret = pma_set_opa_errorinfo(pmp, ibdev, port,
4748						    resp_len);
4749			break;
4750		default:
4751			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4752			ret = reply((struct ib_mad_hdr *)pmp);
4753			break;
4754		}
4755		break;
4756
4757	case IB_MGMT_METHOD_TRAP:
4758	case IB_MGMT_METHOD_GET_RESP:
4759		/*
4760		 * The ib_mad module will call us to process responses
4761		 * before checking for other consumers.
4762		 * Just tell the caller to process it normally.
4763		 */
4764		ret = IB_MAD_RESULT_SUCCESS;
4765		break;
4766
4767	default:
4768		pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
4769		ret = reply((struct ib_mad_hdr *)pmp);
4770		break;
4771	}
4772
4773	return ret;
4774}
4775
4776static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags,
4777				u32 port, const struct ib_wc *in_wc,
4778				const struct ib_grh *in_grh,
4779				const struct opa_mad *in_mad,
4780				struct opa_mad *out_mad, size_t *out_mad_size,
4781				u16 *out_mad_pkey_index)
4782{
4783	int ret;
4784	int pkey_idx;
4785	int local_mad = 0;
4786	u32 resp_len = in_wc->byte_len - sizeof(*in_grh);
4787	struct hfi1_ibport *ibp = to_iport(ibdev, port);
4788
4789	pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
4790	if (pkey_idx < 0) {
4791		pr_warn("failed to find limited mgmt pkey, defaulting 0x%x\n",
4792			hfi1_get_pkey(ibp, 1));
4793		pkey_idx = 1;
4794	}
4795	*out_mad_pkey_index = (u16)pkey_idx;
4796
4797	switch (in_mad->mad_hdr.mgmt_class) {
4798	case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
4799	case IB_MGMT_CLASS_SUBN_LID_ROUTED:
4800		local_mad = is_local_mad(ibp, in_mad, in_wc);
4801		if (local_mad) {
4802			ret = opa_local_smp_check(ibp, in_wc);
4803			if (ret)
4804				return IB_MAD_RESULT_FAILURE;
4805		}
4806		ret = process_subn_opa(ibdev, mad_flags, port, in_mad,
4807				       out_mad, &resp_len, local_mad);
4808		goto bail;
4809	case IB_MGMT_CLASS_PERF_MGMT:
4810		ret = hfi1_pkey_validation_pma(ibp, in_mad, in_wc);
4811		if (ret)
4812			return IB_MAD_RESULT_FAILURE;
4813
4814		ret = process_perf_opa(ibdev, port, in_mad, out_mad, &resp_len);
4815		goto bail;
4816
4817	default:
4818		ret = IB_MAD_RESULT_SUCCESS;
4819	}
4820
4821bail:
4822	if (ret & IB_MAD_RESULT_REPLY)
4823		*out_mad_size = round_up(resp_len, 8);
4824	else if (ret & IB_MAD_RESULT_SUCCESS)
4825		*out_mad_size = in_wc->byte_len - sizeof(struct ib_grh);
4826
4827	return ret;
4828}
4829
4830static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u32 port,
4831			       const struct ib_wc *in_wc,
4832			       const struct ib_grh *in_grh,
4833			       const struct ib_mad *in_mad,
4834			       struct ib_mad *out_mad)
4835{
4836	int ret;
4837
4838	switch (in_mad->mad_hdr.mgmt_class) {
4839	case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
4840	case IB_MGMT_CLASS_SUBN_LID_ROUTED:
4841		ret = process_subn(ibdev, mad_flags, port, in_mad, out_mad);
4842		break;
4843	case IB_MGMT_CLASS_PERF_MGMT:
4844		ret = process_perf(ibdev, port, in_mad, out_mad);
4845		break;
4846	default:
4847		ret = IB_MAD_RESULT_SUCCESS;
4848		break;
4849	}
4850
4851	return ret;
4852}
4853
4854/**
4855 * hfi1_process_mad - process an incoming MAD packet
4856 * @ibdev: the infiniband device this packet came in on
4857 * @mad_flags: MAD flags
4858 * @port: the port number this packet came in on
4859 * @in_wc: the work completion entry for this packet
4860 * @in_grh: the global route header for this packet
4861 * @in_mad: the incoming MAD
4862 * @out_mad: any outgoing MAD reply
4863 * @out_mad_size: size of the outgoing MAD reply
4864 * @out_mad_pkey_index: used to apss back the packet key index
4865 *
4866 * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not
4867 * interested in processing.
4868 *
4869 * Note that the verbs framework has already done the MAD sanity checks,
4870 * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
4871 * MADs.
4872 *
4873 * This is called by the ib_mad module.
4874 */
4875int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u32 port,
4876		     const struct ib_wc *in_wc, const struct ib_grh *in_grh,
4877		     const struct ib_mad *in_mad, struct ib_mad *out_mad,
4878		     size_t *out_mad_size, u16 *out_mad_pkey_index)
4879{
4880	switch (in_mad->mad_hdr.base_version) {
4881	case OPA_MGMT_BASE_VERSION:
4882		return hfi1_process_opa_mad(ibdev, mad_flags, port,
4883					    in_wc, in_grh,
4884					    (struct opa_mad *)in_mad,
4885					    (struct opa_mad *)out_mad,
4886					    out_mad_size,
4887					    out_mad_pkey_index);
4888	case IB_MGMT_BASE_VERSION:
4889		return hfi1_process_ib_mad(ibdev, mad_flags, port, in_wc,
4890					   in_grh, in_mad, out_mad);
4891	default:
4892		break;
4893	}
4894
4895	return IB_MAD_RESULT_FAILURE;
4896}