Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *  Shared Memory Communications over RDMA (SMC-R) and RoCE
   4 *
   5 *  Basic Transport Functions exploiting Infiniband API
   6 *
   7 *  Copyright IBM Corp. 2016
   8 *
   9 *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
  10 */
  11
  12#include <linux/socket.h>
  13#include <linux/if_vlan.h>
  14#include <linux/random.h>
  15#include <linux/workqueue.h>
  16#include <linux/wait.h>
  17#include <linux/reboot.h>
  18#include <linux/mutex.h>
  19#include <linux/list.h>
  20#include <linux/smc.h>
  21#include <net/tcp.h>
  22#include <net/sock.h>
  23#include <rdma/ib_verbs.h>
  24#include <rdma/ib_cache.h>
  25
  26#include "smc.h"
  27#include "smc_clc.h"
  28#include "smc_core.h"
  29#include "smc_ib.h"
  30#include "smc_wr.h"
  31#include "smc_llc.h"
  32#include "smc_cdc.h"
  33#include "smc_close.h"
  34#include "smc_ism.h"
  35#include "smc_netlink.h"
  36#include "smc_stats.h"
  37#include "smc_tracepoint.h"
  38
  39#define SMC_LGR_NUM_INCR		256
  40#define SMC_LGR_FREE_DELAY_SERV		(600 * HZ)
  41#define SMC_LGR_FREE_DELAY_CLNT		(SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
  42
  43struct smc_lgr_list smc_lgr_list = {	/* established link groups */
  44	.lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
  45	.list = LIST_HEAD_INIT(smc_lgr_list.list),
  46	.num = 0,
  47};
  48
  49static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */
  50static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted);
  51
  52static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
  53			 struct smc_buf_desc *buf_desc);
  54static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft);
  55
  56static void smc_link_down_work(struct work_struct *work);
  57
  58/* return head of link group list and its lock for a given link group */
  59static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr,
  60						  spinlock_t **lgr_lock)
  61{
  62	if (lgr->is_smcd) {
  63		*lgr_lock = &lgr->smcd->lgr_lock;
  64		return &lgr->smcd->lgr_list;
  65	}
  66
  67	*lgr_lock = &smc_lgr_list.lock;
  68	return &smc_lgr_list.list;
  69}
  70
  71static void smc_ibdev_cnt_inc(struct smc_link *lnk)
  72{
  73	atomic_inc(&lnk->smcibdev->lnk_cnt_by_port[lnk->ibport - 1]);
  74}
  75
  76static void smc_ibdev_cnt_dec(struct smc_link *lnk)
  77{
  78	atomic_dec(&lnk->smcibdev->lnk_cnt_by_port[lnk->ibport - 1]);
  79}
  80
  81static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
  82{
  83	/* client link group creation always follows the server link group
  84	 * creation. For client use a somewhat higher removal delay time,
  85	 * otherwise there is a risk of out-of-sync link groups.
  86	 */
  87	if (!lgr->freeing) {
  88		mod_delayed_work(system_wq, &lgr->free_work,
  89				 (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
  90						SMC_LGR_FREE_DELAY_CLNT :
  91						SMC_LGR_FREE_DELAY_SERV);
  92	}
  93}
  94
  95/* Register connection's alert token in our lookup structure.
  96 * To use rbtrees we have to implement our own insert core.
  97 * Requires @conns_lock
  98 * @smc		connection to register
  99 * Returns 0 on success, != otherwise.
 100 */
 101static void smc_lgr_add_alert_token(struct smc_connection *conn)
 102{
 103	struct rb_node **link, *parent = NULL;
 104	u32 token = conn->alert_token_local;
 105
 106	link = &conn->lgr->conns_all.rb_node;
 107	while (*link) {
 108		struct smc_connection *cur = rb_entry(*link,
 109					struct smc_connection, alert_node);
 110
 111		parent = *link;
 112		if (cur->alert_token_local > token)
 113			link = &parent->rb_left;
 114		else
 115			link = &parent->rb_right;
 116	}
 117	/* Put the new node there */
 118	rb_link_node(&conn->alert_node, parent, link);
 119	rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
 120}
 121
 122/* assign an SMC-R link to the connection */
 123static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first)
 124{
 125	enum smc_link_state expected = first ? SMC_LNK_ACTIVATING :
 126				       SMC_LNK_ACTIVE;
 127	int i, j;
 128
 129	/* do link balancing */
 130	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
 131		struct smc_link *lnk = &conn->lgr->lnk[i];
 132
 133		if (lnk->state != expected || lnk->link_is_asym)
 134			continue;
 135		if (conn->lgr->role == SMC_CLNT) {
 136			conn->lnk = lnk; /* temporary, SMC server assigns link*/
 137			break;
 138		}
 139		if (conn->lgr->conns_num % 2) {
 140			for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) {
 141				struct smc_link *lnk2;
 142
 143				lnk2 = &conn->lgr->lnk[j];
 144				if (lnk2->state == expected &&
 145				    !lnk2->link_is_asym) {
 146					conn->lnk = lnk2;
 147					break;
 148				}
 149			}
 150		}
 151		if (!conn->lnk)
 152			conn->lnk = lnk;
 153		break;
 154	}
 155	if (!conn->lnk)
 156		return SMC_CLC_DECL_NOACTLINK;
 157	atomic_inc(&conn->lnk->conn_cnt);
 158	return 0;
 159}
 160
 161/* Register connection in link group by assigning an alert token
 162 * registered in a search tree.
 163 * Requires @conns_lock
 164 * Note that '0' is a reserved value and not assigned.
 165 */
 166static int smc_lgr_register_conn(struct smc_connection *conn, bool first)
 167{
 168	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
 169	static atomic_t nexttoken = ATOMIC_INIT(0);
 170	int rc;
 171
 172	if (!conn->lgr->is_smcd) {
 173		rc = smcr_lgr_conn_assign_link(conn, first);
 174		if (rc) {
 175			conn->lgr = NULL;
 176			return rc;
 177		}
 178	}
 179	/* find a new alert_token_local value not yet used by some connection
 180	 * in this link group
 181	 */
 182	sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */
 183	while (!conn->alert_token_local) {
 184		conn->alert_token_local = atomic_inc_return(&nexttoken);
 185		if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr))
 186			conn->alert_token_local = 0;
 187	}
 188	smc_lgr_add_alert_token(conn);
 189	conn->lgr->conns_num++;
 190	return 0;
 191}
 192
 193/* Unregister connection and reset the alert token of the given connection<
 194 */
 195static void __smc_lgr_unregister_conn(struct smc_connection *conn)
 196{
 197	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
 198	struct smc_link_group *lgr = conn->lgr;
 199
 200	rb_erase(&conn->alert_node, &lgr->conns_all);
 201	if (conn->lnk)
 202		atomic_dec(&conn->lnk->conn_cnt);
 203	lgr->conns_num--;
 204	conn->alert_token_local = 0;
 205	sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
 206}
 207
 208/* Unregister connection from lgr
 209 */
 210static void smc_lgr_unregister_conn(struct smc_connection *conn)
 211{
 212	struct smc_link_group *lgr = conn->lgr;
 213
 214	if (!smc_conn_lgr_valid(conn))
 215		return;
 216	write_lock_bh(&lgr->conns_lock);
 217	if (conn->alert_token_local) {
 218		__smc_lgr_unregister_conn(conn);
 219	}
 220	write_unlock_bh(&lgr->conns_lock);
 221}
 222
 223int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb)
 224{
 225	struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
 226	char hostname[SMC_MAX_HOSTNAME_LEN + 1];
 227	char smc_seid[SMC_MAX_EID_LEN + 1];
 228	struct nlattr *attrs;
 229	u8 *seid = NULL;
 230	u8 *host = NULL;
 231	void *nlh;
 232
 233	nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 234			  &smc_gen_nl_family, NLM_F_MULTI,
 235			  SMC_NETLINK_GET_SYS_INFO);
 236	if (!nlh)
 237		goto errmsg;
 238	if (cb_ctx->pos[0])
 239		goto errout;
 240	attrs = nla_nest_start(skb, SMC_GEN_SYS_INFO);
 241	if (!attrs)
 242		goto errout;
 243	if (nla_put_u8(skb, SMC_NLA_SYS_VER, SMC_V2))
 244		goto errattr;
 245	if (nla_put_u8(skb, SMC_NLA_SYS_REL, SMC_RELEASE))
 246		goto errattr;
 247	if (nla_put_u8(skb, SMC_NLA_SYS_IS_ISM_V2, smc_ism_is_v2_capable()))
 248		goto errattr;
 249	if (nla_put_u8(skb, SMC_NLA_SYS_IS_SMCR_V2, true))
 250		goto errattr;
 251	smc_clc_get_hostname(&host);
 252	if (host) {
 253		memcpy(hostname, host, SMC_MAX_HOSTNAME_LEN);
 254		hostname[SMC_MAX_HOSTNAME_LEN] = 0;
 255		if (nla_put_string(skb, SMC_NLA_SYS_LOCAL_HOST, hostname))
 256			goto errattr;
 257	}
 258	if (smc_ism_is_v2_capable()) {
 259		smc_ism_get_system_eid(&seid);
 260		memcpy(smc_seid, seid, SMC_MAX_EID_LEN);
 261		smc_seid[SMC_MAX_EID_LEN] = 0;
 262		if (nla_put_string(skb, SMC_NLA_SYS_SEID, smc_seid))
 263			goto errattr;
 264	}
 265	nla_nest_end(skb, attrs);
 266	genlmsg_end(skb, nlh);
 267	cb_ctx->pos[0] = 1;
 268	return skb->len;
 269
 270errattr:
 271	nla_nest_cancel(skb, attrs);
 272errout:
 273	genlmsg_cancel(skb, nlh);
 274errmsg:
 275	return skb->len;
 276}
 277
 278/* Fill SMC_NLA_LGR_D_V2_COMMON/SMC_NLA_LGR_R_V2_COMMON nested attributes */
 279static int smc_nl_fill_lgr_v2_common(struct smc_link_group *lgr,
 280				     struct sk_buff *skb,
 281				     struct netlink_callback *cb,
 282				     struct nlattr *v2_attrs)
 283{
 284	char smc_host[SMC_MAX_HOSTNAME_LEN + 1];
 285	char smc_eid[SMC_MAX_EID_LEN + 1];
 286
 287	if (nla_put_u8(skb, SMC_NLA_LGR_V2_VER, lgr->smc_version))
 288		goto errv2attr;
 289	if (nla_put_u8(skb, SMC_NLA_LGR_V2_REL, lgr->peer_smc_release))
 290		goto errv2attr;
 291	if (nla_put_u8(skb, SMC_NLA_LGR_V2_OS, lgr->peer_os))
 292		goto errv2attr;
 293	memcpy(smc_host, lgr->peer_hostname, SMC_MAX_HOSTNAME_LEN);
 294	smc_host[SMC_MAX_HOSTNAME_LEN] = 0;
 295	if (nla_put_string(skb, SMC_NLA_LGR_V2_PEER_HOST, smc_host))
 296		goto errv2attr;
 297	memcpy(smc_eid, lgr->negotiated_eid, SMC_MAX_EID_LEN);
 298	smc_eid[SMC_MAX_EID_LEN] = 0;
 299	if (nla_put_string(skb, SMC_NLA_LGR_V2_NEG_EID, smc_eid))
 300		goto errv2attr;
 301
 302	nla_nest_end(skb, v2_attrs);
 303	return 0;
 304
 305errv2attr:
 306	nla_nest_cancel(skb, v2_attrs);
 307	return -EMSGSIZE;
 308}
 309
 310static int smc_nl_fill_smcr_lgr_v2(struct smc_link_group *lgr,
 311				   struct sk_buff *skb,
 312				   struct netlink_callback *cb)
 313{
 314	struct nlattr *v2_attrs;
 315
 316	v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2);
 317	if (!v2_attrs)
 318		goto errattr;
 319	if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_DIRECT, !lgr->uses_gateway))
 320		goto errv2attr;
 321
 322	nla_nest_end(skb, v2_attrs);
 323	return 0;
 324
 325errv2attr:
 326	nla_nest_cancel(skb, v2_attrs);
 327errattr:
 328	return -EMSGSIZE;
 329}
 330
 331static int smc_nl_fill_lgr(struct smc_link_group *lgr,
 332			   struct sk_buff *skb,
 333			   struct netlink_callback *cb)
 334{
 335	char smc_target[SMC_MAX_PNETID_LEN + 1];
 336	struct nlattr *attrs, *v2_attrs;
 337
 338	attrs = nla_nest_start(skb, SMC_GEN_LGR_SMCR);
 339	if (!attrs)
 340		goto errout;
 341
 342	if (nla_put_u32(skb, SMC_NLA_LGR_R_ID, *((u32 *)&lgr->id)))
 343		goto errattr;
 344	if (nla_put_u32(skb, SMC_NLA_LGR_R_CONNS_NUM, lgr->conns_num))
 345		goto errattr;
 346	if (nla_put_u8(skb, SMC_NLA_LGR_R_ROLE, lgr->role))
 347		goto errattr;
 348	if (nla_put_u8(skb, SMC_NLA_LGR_R_TYPE, lgr->type))
 349		goto errattr;
 350	if (nla_put_u8(skb, SMC_NLA_LGR_R_BUF_TYPE, lgr->buf_type))
 351		goto errattr;
 352	if (nla_put_u8(skb, SMC_NLA_LGR_R_VLAN_ID, lgr->vlan_id))
 353		goto errattr;
 354	if (nla_put_u64_64bit(skb, SMC_NLA_LGR_R_NET_COOKIE,
 355			      lgr->net->net_cookie, SMC_NLA_LGR_R_PAD))
 356		goto errattr;
 357	memcpy(smc_target, lgr->pnet_id, SMC_MAX_PNETID_LEN);
 358	smc_target[SMC_MAX_PNETID_LEN] = 0;
 359	if (nla_put_string(skb, SMC_NLA_LGR_R_PNETID, smc_target))
 360		goto errattr;
 361	if (lgr->smc_version > SMC_V1) {
 362		v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2_COMMON);
 363		if (!v2_attrs)
 364			goto errattr;
 365		if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs))
 366			goto errattr;
 367		if (smc_nl_fill_smcr_lgr_v2(lgr, skb, cb))
 368			goto errattr;
 369	}
 370
 371	nla_nest_end(skb, attrs);
 372	return 0;
 373errattr:
 374	nla_nest_cancel(skb, attrs);
 375errout:
 376	return -EMSGSIZE;
 377}
 378
 379static int smc_nl_fill_lgr_link(struct smc_link_group *lgr,
 380				struct smc_link *link,
 381				struct sk_buff *skb,
 382				struct netlink_callback *cb)
 383{
 384	char smc_ibname[IB_DEVICE_NAME_MAX];
 385	u8 smc_gid_target[41];
 386	struct nlattr *attrs;
 387	u32 link_uid = 0;
 388	void *nlh;
 389
 390	nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 391			  &smc_gen_nl_family, NLM_F_MULTI,
 392			  SMC_NETLINK_GET_LINK_SMCR);
 393	if (!nlh)
 394		goto errmsg;
 395
 396	attrs = nla_nest_start(skb, SMC_GEN_LINK_SMCR);
 397	if (!attrs)
 398		goto errout;
 399
 400	if (nla_put_u8(skb, SMC_NLA_LINK_ID, link->link_id))
 401		goto errattr;
 402	if (nla_put_u32(skb, SMC_NLA_LINK_STATE, link->state))
 403		goto errattr;
 404	if (nla_put_u32(skb, SMC_NLA_LINK_CONN_CNT,
 405			atomic_read(&link->conn_cnt)))
 406		goto errattr;
 407	if (nla_put_u8(skb, SMC_NLA_LINK_IB_PORT, link->ibport))
 408		goto errattr;
 409	if (nla_put_u32(skb, SMC_NLA_LINK_NET_DEV, link->ndev_ifidx))
 410		goto errattr;
 411	snprintf(smc_ibname, sizeof(smc_ibname), "%s", link->ibname);
 412	if (nla_put_string(skb, SMC_NLA_LINK_IB_DEV, smc_ibname))
 413		goto errattr;
 414	memcpy(&link_uid, link->link_uid, sizeof(link_uid));
 415	if (nla_put_u32(skb, SMC_NLA_LINK_UID, link_uid))
 416		goto errattr;
 417	memcpy(&link_uid, link->peer_link_uid, sizeof(link_uid));
 418	if (nla_put_u32(skb, SMC_NLA_LINK_PEER_UID, link_uid))
 419		goto errattr;
 420	memset(smc_gid_target, 0, sizeof(smc_gid_target));
 421	smc_gid_be16_convert(smc_gid_target, link->gid);
 422	if (nla_put_string(skb, SMC_NLA_LINK_GID, smc_gid_target))
 423		goto errattr;
 424	memset(smc_gid_target, 0, sizeof(smc_gid_target));
 425	smc_gid_be16_convert(smc_gid_target, link->peer_gid);
 426	if (nla_put_string(skb, SMC_NLA_LINK_PEER_GID, smc_gid_target))
 427		goto errattr;
 428
 429	nla_nest_end(skb, attrs);
 430	genlmsg_end(skb, nlh);
 431	return 0;
 432errattr:
 433	nla_nest_cancel(skb, attrs);
 434errout:
 435	genlmsg_cancel(skb, nlh);
 436errmsg:
 437	return -EMSGSIZE;
 438}
 439
 440static int smc_nl_handle_lgr(struct smc_link_group *lgr,
 441			     struct sk_buff *skb,
 442			     struct netlink_callback *cb,
 443			     bool list_links)
 444{
 445	void *nlh;
 446	int i;
 447
 448	nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 449			  &smc_gen_nl_family, NLM_F_MULTI,
 450			  SMC_NETLINK_GET_LGR_SMCR);
 451	if (!nlh)
 452		goto errmsg;
 453	if (smc_nl_fill_lgr(lgr, skb, cb))
 454		goto errout;
 455
 456	genlmsg_end(skb, nlh);
 457	if (!list_links)
 458		goto out;
 459	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
 460		if (!smc_link_usable(&lgr->lnk[i]))
 461			continue;
 462		if (smc_nl_fill_lgr_link(lgr, &lgr->lnk[i], skb, cb))
 463			goto errout;
 464	}
 465out:
 466	return 0;
 467
 468errout:
 469	genlmsg_cancel(skb, nlh);
 470errmsg:
 471	return -EMSGSIZE;
 472}
 473
 474static void smc_nl_fill_lgr_list(struct smc_lgr_list *smc_lgr,
 475				 struct sk_buff *skb,
 476				 struct netlink_callback *cb,
 477				 bool list_links)
 478{
 479	struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
 480	struct smc_link_group *lgr;
 481	int snum = cb_ctx->pos[0];
 482	int num = 0;
 483
 484	spin_lock_bh(&smc_lgr->lock);
 485	list_for_each_entry(lgr, &smc_lgr->list, list) {
 486		if (num < snum)
 487			goto next;
 488		if (smc_nl_handle_lgr(lgr, skb, cb, list_links))
 489			goto errout;
 490next:
 491		num++;
 492	}
 493errout:
 494	spin_unlock_bh(&smc_lgr->lock);
 495	cb_ctx->pos[0] = num;
 496}
 497
 498static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr,
 499				struct sk_buff *skb,
 500				struct netlink_callback *cb)
 501{
 502	char smc_pnet[SMC_MAX_PNETID_LEN + 1];
 503	struct nlattr *attrs;
 504	void *nlh;
 505
 506	nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 507			  &smc_gen_nl_family, NLM_F_MULTI,
 508			  SMC_NETLINK_GET_LGR_SMCD);
 509	if (!nlh)
 510		goto errmsg;
 511
 512	attrs = nla_nest_start(skb, SMC_GEN_LGR_SMCD);
 513	if (!attrs)
 514		goto errout;
 515
 516	if (nla_put_u32(skb, SMC_NLA_LGR_D_ID, *((u32 *)&lgr->id)))
 517		goto errattr;
 518	if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_GID, lgr->smcd->local_gid,
 519			      SMC_NLA_LGR_D_PAD))
 520		goto errattr;
 521	if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_PEER_GID, lgr->peer_gid,
 522			      SMC_NLA_LGR_D_PAD))
 523		goto errattr;
 524	if (nla_put_u8(skb, SMC_NLA_LGR_D_VLAN_ID, lgr->vlan_id))
 525		goto errattr;
 526	if (nla_put_u32(skb, SMC_NLA_LGR_D_CONNS_NUM, lgr->conns_num))
 527		goto errattr;
 528	if (nla_put_u32(skb, SMC_NLA_LGR_D_CHID, smc_ism_get_chid(lgr->smcd)))
 529		goto errattr;
 530	memcpy(smc_pnet, lgr->smcd->pnetid, SMC_MAX_PNETID_LEN);
 531	smc_pnet[SMC_MAX_PNETID_LEN] = 0;
 532	if (nla_put_string(skb, SMC_NLA_LGR_D_PNETID, smc_pnet))
 533		goto errattr;
 534	if (lgr->smc_version > SMC_V1) {
 535		struct nlattr *v2_attrs;
 536
 537		v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_D_V2_COMMON);
 538		if (!v2_attrs)
 539			goto errattr;
 540		if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs))
 541			goto errattr;
 542	}
 543	nla_nest_end(skb, attrs);
 544	genlmsg_end(skb, nlh);
 545	return 0;
 546
 547errattr:
 548	nla_nest_cancel(skb, attrs);
 549errout:
 550	genlmsg_cancel(skb, nlh);
 551errmsg:
 552	return -EMSGSIZE;
 553}
 554
 555static int smc_nl_handle_smcd_lgr(struct smcd_dev *dev,
 556				  struct sk_buff *skb,
 557				  struct netlink_callback *cb)
 558{
 559	struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
 560	struct smc_link_group *lgr;
 561	int snum = cb_ctx->pos[1];
 562	int rc = 0, num = 0;
 563
 564	spin_lock_bh(&dev->lgr_lock);
 565	list_for_each_entry(lgr, &dev->lgr_list, list) {
 566		if (!lgr->is_smcd)
 567			continue;
 568		if (num < snum)
 569			goto next;
 570		rc = smc_nl_fill_smcd_lgr(lgr, skb, cb);
 571		if (rc)
 572			goto errout;
 573next:
 574		num++;
 575	}
 576errout:
 577	spin_unlock_bh(&dev->lgr_lock);
 578	cb_ctx->pos[1] = num;
 579	return rc;
 580}
 581
 582static int smc_nl_fill_smcd_dev(struct smcd_dev_list *dev_list,
 583				struct sk_buff *skb,
 584				struct netlink_callback *cb)
 585{
 586	struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
 587	struct smcd_dev *smcd_dev;
 588	int snum = cb_ctx->pos[0];
 589	int rc = 0, num = 0;
 590
 591	mutex_lock(&dev_list->mutex);
 592	list_for_each_entry(smcd_dev, &dev_list->list, list) {
 593		if (list_empty(&smcd_dev->lgr_list))
 594			continue;
 595		if (num < snum)
 596			goto next;
 597		rc = smc_nl_handle_smcd_lgr(smcd_dev, skb, cb);
 598		if (rc)
 599			goto errout;
 600next:
 601		num++;
 602	}
 603errout:
 604	mutex_unlock(&dev_list->mutex);
 605	cb_ctx->pos[0] = num;
 606	return rc;
 607}
 608
 609int smcr_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb)
 610{
 611	bool list_links = false;
 612
 613	smc_nl_fill_lgr_list(&smc_lgr_list, skb, cb, list_links);
 614	return skb->len;
 615}
 616
 617int smcr_nl_get_link(struct sk_buff *skb, struct netlink_callback *cb)
 618{
 619	bool list_links = true;
 620
 621	smc_nl_fill_lgr_list(&smc_lgr_list, skb, cb, list_links);
 622	return skb->len;
 623}
 624
 625int smcd_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb)
 626{
 627	smc_nl_fill_smcd_dev(&smcd_dev_list, skb, cb);
 628	return skb->len;
 629}
 630
 631void smc_lgr_cleanup_early(struct smc_link_group *lgr)
 632{
 633	spinlock_t *lgr_lock;
 634
 635	if (!lgr)
 636		return;
 637
 638	smc_lgr_list_head(lgr, &lgr_lock);
 639	spin_lock_bh(lgr_lock);
 640	/* do not use this link group for new connections */
 641	if (!list_empty(&lgr->list))
 642		list_del_init(&lgr->list);
 643	spin_unlock_bh(lgr_lock);
 644	__smc_lgr_terminate(lgr, true);
 645}
 646
 647static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr)
 648{
 649	int i;
 650
 651	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
 652		struct smc_link *lnk = &lgr->lnk[i];
 653
 654		if (smc_link_sendable(lnk))
 655			lnk->state = SMC_LNK_INACTIVE;
 656	}
 657	wake_up_all(&lgr->llc_msg_waiter);
 658	wake_up_all(&lgr->llc_flow_waiter);
 659}
 660
 661static void smc_lgr_free(struct smc_link_group *lgr);
 662
 663static void smc_lgr_free_work(struct work_struct *work)
 664{
 665	struct smc_link_group *lgr = container_of(to_delayed_work(work),
 666						  struct smc_link_group,
 667						  free_work);
 668	spinlock_t *lgr_lock;
 669	bool conns;
 670
 671	smc_lgr_list_head(lgr, &lgr_lock);
 672	spin_lock_bh(lgr_lock);
 673	if (lgr->freeing) {
 674		spin_unlock_bh(lgr_lock);
 675		return;
 676	}
 677	read_lock_bh(&lgr->conns_lock);
 678	conns = RB_EMPTY_ROOT(&lgr->conns_all);
 679	read_unlock_bh(&lgr->conns_lock);
 680	if (!conns) { /* number of lgr connections is no longer zero */
 681		spin_unlock_bh(lgr_lock);
 682		return;
 683	}
 684	list_del_init(&lgr->list); /* remove from smc_lgr_list */
 685	lgr->freeing = 1; /* this instance does the freeing, no new schedule */
 686	spin_unlock_bh(lgr_lock);
 687	cancel_delayed_work(&lgr->free_work);
 688
 689	if (!lgr->is_smcd && !lgr->terminating)
 690		smc_llc_send_link_delete_all(lgr, true,
 691					     SMC_LLC_DEL_PROG_INIT_TERM);
 692	if (lgr->is_smcd && !lgr->terminating)
 693		smc_ism_signal_shutdown(lgr);
 694	if (!lgr->is_smcd)
 695		smcr_lgr_link_deactivate_all(lgr);
 696	smc_lgr_free(lgr);
 697}
 698
 699static void smc_lgr_terminate_work(struct work_struct *work)
 700{
 701	struct smc_link_group *lgr = container_of(work, struct smc_link_group,
 702						  terminate_work);
 703
 704	__smc_lgr_terminate(lgr, true);
 705}
 706
 707/* return next unique link id for the lgr */
 708static u8 smcr_next_link_id(struct smc_link_group *lgr)
 709{
 710	u8 link_id;
 711	int i;
 712
 713	while (1) {
 714again:
 715		link_id = ++lgr->next_link_id;
 716		if (!link_id)	/* skip zero as link_id */
 717			link_id = ++lgr->next_link_id;
 718		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
 719			if (smc_link_usable(&lgr->lnk[i]) &&
 720			    lgr->lnk[i].link_id == link_id)
 721				goto again;
 722		}
 723		break;
 724	}
 725	return link_id;
 726}
 727
 728static void smcr_copy_dev_info_to_link(struct smc_link *link)
 729{
 730	struct smc_ib_device *smcibdev = link->smcibdev;
 731
 732	snprintf(link->ibname, sizeof(link->ibname), "%s",
 733		 smcibdev->ibdev->name);
 734	link->ndev_ifidx = smcibdev->ndev_ifidx[link->ibport - 1];
 735}
 736
 737int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
 738		   u8 link_idx, struct smc_init_info *ini)
 739{
 740	struct smc_ib_device *smcibdev;
 741	u8 rndvec[3];
 742	int rc;
 743
 744	if (lgr->smc_version == SMC_V2) {
 745		lnk->smcibdev = ini->smcrv2.ib_dev_v2;
 746		lnk->ibport = ini->smcrv2.ib_port_v2;
 747	} else {
 748		lnk->smcibdev = ini->ib_dev;
 749		lnk->ibport = ini->ib_port;
 750	}
 751	get_device(&lnk->smcibdev->ibdev->dev);
 752	atomic_inc(&lnk->smcibdev->lnk_cnt);
 753	refcount_set(&lnk->refcnt, 1); /* link refcnt is set to 1 */
 754	lnk->clearing = 0;
 755	lnk->path_mtu = lnk->smcibdev->pattr[lnk->ibport - 1].active_mtu;
 756	lnk->link_id = smcr_next_link_id(lgr);
 757	lnk->lgr = lgr;
 758	smc_lgr_hold(lgr); /* lgr_put in smcr_link_clear() */
 759	lnk->link_idx = link_idx;
 760	lnk->wr_rx_id_compl = 0;
 761	smc_ibdev_cnt_inc(lnk);
 762	smcr_copy_dev_info_to_link(lnk);
 763	atomic_set(&lnk->conn_cnt, 0);
 764	smc_llc_link_set_uid(lnk);
 765	INIT_WORK(&lnk->link_down_wrk, smc_link_down_work);
 766	if (!lnk->smcibdev->initialized) {
 767		rc = (int)smc_ib_setup_per_ibdev(lnk->smcibdev);
 768		if (rc)
 769			goto out;
 770	}
 771	get_random_bytes(rndvec, sizeof(rndvec));
 772	lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
 773		(rndvec[2] << 16);
 774	rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
 775				  ini->vlan_id, lnk->gid, &lnk->sgid_index,
 776				  lgr->smc_version == SMC_V2 ?
 777						  &ini->smcrv2 : NULL);
 778	if (rc)
 779		goto out;
 780	rc = smc_llc_link_init(lnk);
 781	if (rc)
 782		goto out;
 783	rc = smc_wr_alloc_link_mem(lnk);
 784	if (rc)
 785		goto clear_llc_lnk;
 786	rc = smc_ib_create_protection_domain(lnk);
 787	if (rc)
 788		goto free_link_mem;
 789	rc = smc_ib_create_queue_pair(lnk);
 790	if (rc)
 791		goto dealloc_pd;
 792	rc = smc_wr_create_link(lnk);
 793	if (rc)
 794		goto destroy_qp;
 795	lnk->state = SMC_LNK_ACTIVATING;
 796	return 0;
 797
 798destroy_qp:
 799	smc_ib_destroy_queue_pair(lnk);
 800dealloc_pd:
 801	smc_ib_dealloc_protection_domain(lnk);
 802free_link_mem:
 803	smc_wr_free_link_mem(lnk);
 804clear_llc_lnk:
 805	smc_llc_link_clear(lnk, false);
 806out:
 807	smc_ibdev_cnt_dec(lnk);
 808	put_device(&lnk->smcibdev->ibdev->dev);
 809	smcibdev = lnk->smcibdev;
 810	memset(lnk, 0, sizeof(struct smc_link));
 811	lnk->state = SMC_LNK_UNUSED;
 812	if (!atomic_dec_return(&smcibdev->lnk_cnt))
 813		wake_up(&smcibdev->lnks_deleted);
 814	smc_lgr_put(lgr); /* lgr_hold above */
 815	return rc;
 816}
 817
 818/* create a new SMC link group */
 819static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
 820{
 821	struct smc_link_group *lgr;
 822	struct list_head *lgr_list;
 823	struct smc_link *lnk;
 824	spinlock_t *lgr_lock;
 825	u8 link_idx;
 826	int rc = 0;
 827	int i;
 828
 829	if (ini->is_smcd && ini->vlan_id) {
 830		if (smc_ism_get_vlan(ini->ism_dev[ini->ism_selected],
 831				     ini->vlan_id)) {
 832			rc = SMC_CLC_DECL_ISMVLANERR;
 833			goto out;
 834		}
 835	}
 836
 837	lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
 838	if (!lgr) {
 839		rc = SMC_CLC_DECL_MEM;
 840		goto ism_put_vlan;
 841	}
 842	lgr->tx_wq = alloc_workqueue("smc_tx_wq-%*phN", 0, 0,
 843				     SMC_LGR_ID_SIZE, &lgr->id);
 844	if (!lgr->tx_wq) {
 845		rc = -ENOMEM;
 846		goto free_lgr;
 847	}
 848	lgr->is_smcd = ini->is_smcd;
 849	lgr->sync_err = 0;
 850	lgr->terminating = 0;
 851	lgr->freeing = 0;
 852	lgr->vlan_id = ini->vlan_id;
 853	refcount_set(&lgr->refcnt, 1); /* set lgr refcnt to 1 */
 854	mutex_init(&lgr->sndbufs_lock);
 855	mutex_init(&lgr->rmbs_lock);
 856	rwlock_init(&lgr->conns_lock);
 857	for (i = 0; i < SMC_RMBE_SIZES; i++) {
 858		INIT_LIST_HEAD(&lgr->sndbufs[i]);
 859		INIT_LIST_HEAD(&lgr->rmbs[i]);
 860	}
 861	lgr->next_link_id = 0;
 862	smc_lgr_list.num += SMC_LGR_NUM_INCR;
 863	memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
 864	INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
 865	INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work);
 866	lgr->conns_all = RB_ROOT;
 867	if (ini->is_smcd) {
 868		/* SMC-D specific settings */
 869		get_device(&ini->ism_dev[ini->ism_selected]->dev);
 870		lgr->peer_gid = ini->ism_peer_gid[ini->ism_selected];
 871		lgr->smcd = ini->ism_dev[ini->ism_selected];
 872		lgr_list = &ini->ism_dev[ini->ism_selected]->lgr_list;
 873		lgr_lock = &lgr->smcd->lgr_lock;
 874		lgr->smc_version = ini->smcd_version;
 875		lgr->peer_shutdown = 0;
 876		atomic_inc(&ini->ism_dev[ini->ism_selected]->lgr_cnt);
 877	} else {
 878		/* SMC-R specific settings */
 879		struct smc_ib_device *ibdev;
 880		int ibport;
 881
 882		lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
 883		lgr->smc_version = ini->smcr_version;
 884		memcpy(lgr->peer_systemid, ini->peer_systemid,
 885		       SMC_SYSTEMID_LEN);
 886		if (lgr->smc_version == SMC_V2) {
 887			ibdev = ini->smcrv2.ib_dev_v2;
 888			ibport = ini->smcrv2.ib_port_v2;
 889			lgr->saddr = ini->smcrv2.saddr;
 890			lgr->uses_gateway = ini->smcrv2.uses_gateway;
 891			memcpy(lgr->nexthop_mac, ini->smcrv2.nexthop_mac,
 892			       ETH_ALEN);
 893		} else {
 894			ibdev = ini->ib_dev;
 895			ibport = ini->ib_port;
 896		}
 897		memcpy(lgr->pnet_id, ibdev->pnetid[ibport - 1],
 898		       SMC_MAX_PNETID_LEN);
 899		rc = smc_wr_alloc_lgr_mem(lgr);
 900		if (rc)
 901			goto free_wq;
 902		smc_llc_lgr_init(lgr, smc);
 903
 904		link_idx = SMC_SINGLE_LINK;
 905		lnk = &lgr->lnk[link_idx];
 906		rc = smcr_link_init(lgr, lnk, link_idx, ini);
 907		if (rc) {
 908			smc_wr_free_lgr_mem(lgr);
 909			goto free_wq;
 910		}
 911		lgr->net = smc_ib_net(lnk->smcibdev);
 912		lgr_list = &smc_lgr_list.list;
 913		lgr_lock = &smc_lgr_list.lock;
 914		lgr->buf_type = lgr->net->smc.sysctl_smcr_buf_type;
 915		atomic_inc(&lgr_cnt);
 916	}
 917	smc->conn.lgr = lgr;
 918	spin_lock_bh(lgr_lock);
 919	list_add_tail(&lgr->list, lgr_list);
 920	spin_unlock_bh(lgr_lock);
 921	return 0;
 922
 923free_wq:
 924	destroy_workqueue(lgr->tx_wq);
 925free_lgr:
 926	kfree(lgr);
 927ism_put_vlan:
 928	if (ini->is_smcd && ini->vlan_id)
 929		smc_ism_put_vlan(ini->ism_dev[ini->ism_selected], ini->vlan_id);
 930out:
 931	if (rc < 0) {
 932		if (rc == -ENOMEM)
 933			rc = SMC_CLC_DECL_MEM;
 934		else
 935			rc = SMC_CLC_DECL_INTERR;
 936	}
 937	return rc;
 938}
 939
 940static int smc_write_space(struct smc_connection *conn)
 941{
 942	int buffer_len = conn->peer_rmbe_size;
 943	union smc_host_cursor prod;
 944	union smc_host_cursor cons;
 945	int space;
 946
 947	smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn);
 948	smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
 949	/* determine rx_buf space */
 950	space = buffer_len - smc_curs_diff(buffer_len, &cons, &prod);
 951	return space;
 952}
 953
 954static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend,
 955			     struct smc_wr_buf *wr_buf)
 956{
 957	struct smc_connection *conn = &smc->conn;
 958	union smc_host_cursor cons, fin;
 959	int rc = 0;
 960	int diff;
 961
 962	smc_curs_copy(&conn->tx_curs_sent, &conn->tx_curs_fin, conn);
 963	smc_curs_copy(&fin, &conn->local_tx_ctrl_fin, conn);
 964	/* set prod cursor to old state, enforce tx_rdma_writes() */
 965	smc_curs_copy(&conn->local_tx_ctrl.prod, &fin, conn);
 966	smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
 967
 968	if (smc_curs_comp(conn->peer_rmbe_size, &cons, &fin) < 0) {
 969		/* cons cursor advanced more than fin, and prod was set
 970		 * fin above, so now prod is smaller than cons. Fix that.
 971		 */
 972		diff = smc_curs_diff(conn->peer_rmbe_size, &fin, &cons);
 973		smc_curs_add(conn->sndbuf_desc->len,
 974			     &conn->tx_curs_sent, diff);
 975		smc_curs_add(conn->sndbuf_desc->len,
 976			     &conn->tx_curs_fin, diff);
 977
 978		smp_mb__before_atomic();
 979		atomic_add(diff, &conn->sndbuf_space);
 980		smp_mb__after_atomic();
 981
 982		smc_curs_add(conn->peer_rmbe_size,
 983			     &conn->local_tx_ctrl.prod, diff);
 984		smc_curs_add(conn->peer_rmbe_size,
 985			     &conn->local_tx_ctrl_fin, diff);
 986	}
 987	/* recalculate, value is used by tx_rdma_writes() */
 988	atomic_set(&smc->conn.peer_rmbe_space, smc_write_space(conn));
 989
 990	if (smc->sk.sk_state != SMC_INIT &&
 991	    smc->sk.sk_state != SMC_CLOSED) {
 992		rc = smcr_cdc_msg_send_validation(conn, pend, wr_buf);
 993		if (!rc) {
 994			queue_delayed_work(conn->lgr->tx_wq, &conn->tx_work, 0);
 995			smc->sk.sk_data_ready(&smc->sk);
 996		}
 997	} else {
 998		smc_wr_tx_put_slot(conn->lnk,
 999				   (struct smc_wr_tx_pend_priv *)pend);
1000	}
1001	return rc;
1002}
1003
1004void smc_switch_link_and_count(struct smc_connection *conn,
1005			       struct smc_link *to_lnk)
1006{
1007	atomic_dec(&conn->lnk->conn_cnt);
1008	/* link_hold in smc_conn_create() */
1009	smcr_link_put(conn->lnk);
1010	conn->lnk = to_lnk;
1011	atomic_inc(&conn->lnk->conn_cnt);
1012	/* link_put in smc_conn_free() */
1013	smcr_link_hold(conn->lnk);
1014}
1015
1016struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
1017				  struct smc_link *from_lnk, bool is_dev_err)
1018{
1019	struct smc_link *to_lnk = NULL;
1020	struct smc_cdc_tx_pend *pend;
1021	struct smc_connection *conn;
1022	struct smc_wr_buf *wr_buf;
1023	struct smc_sock *smc;
1024	struct rb_node *node;
1025	int i, rc = 0;
1026
1027	/* link is inactive, wake up tx waiters */
1028	smc_wr_wakeup_tx_wait(from_lnk);
1029
1030	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1031		if (!smc_link_active(&lgr->lnk[i]) || i == from_lnk->link_idx)
1032			continue;
1033		if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev &&
1034		    from_lnk->ibport == lgr->lnk[i].ibport) {
1035			continue;
1036		}
1037		to_lnk = &lgr->lnk[i];
1038		break;
1039	}
1040	if (!to_lnk || !smc_wr_tx_link_hold(to_lnk)) {
1041		smc_lgr_terminate_sched(lgr);
1042		return NULL;
1043	}
1044again:
1045	read_lock_bh(&lgr->conns_lock);
1046	for (node = rb_first(&lgr->conns_all); node; node = rb_next(node)) {
1047		conn = rb_entry(node, struct smc_connection, alert_node);
1048		if (conn->lnk != from_lnk)
1049			continue;
1050		smc = container_of(conn, struct smc_sock, conn);
1051		/* conn->lnk not yet set in SMC_INIT state */
1052		if (smc->sk.sk_state == SMC_INIT)
1053			continue;
1054		if (smc->sk.sk_state == SMC_CLOSED ||
1055		    smc->sk.sk_state == SMC_PEERCLOSEWAIT1 ||
1056		    smc->sk.sk_state == SMC_PEERCLOSEWAIT2 ||
1057		    smc->sk.sk_state == SMC_APPFINCLOSEWAIT ||
1058		    smc->sk.sk_state == SMC_APPCLOSEWAIT1 ||
1059		    smc->sk.sk_state == SMC_APPCLOSEWAIT2 ||
1060		    smc->sk.sk_state == SMC_PEERFINCLOSEWAIT ||
1061		    smc->sk.sk_state == SMC_PEERABORTWAIT ||
1062		    smc->sk.sk_state == SMC_PROCESSABORT) {
1063			spin_lock_bh(&conn->send_lock);
1064			smc_switch_link_and_count(conn, to_lnk);
1065			spin_unlock_bh(&conn->send_lock);
1066			continue;
1067		}
1068		sock_hold(&smc->sk);
1069		read_unlock_bh(&lgr->conns_lock);
1070		/* pre-fetch buffer outside of send_lock, might sleep */
1071		rc = smc_cdc_get_free_slot(conn, to_lnk, &wr_buf, NULL, &pend);
1072		if (rc)
1073			goto err_out;
1074		/* avoid race with smcr_tx_sndbuf_nonempty() */
1075		spin_lock_bh(&conn->send_lock);
1076		smc_switch_link_and_count(conn, to_lnk);
1077		rc = smc_switch_cursor(smc, pend, wr_buf);
1078		spin_unlock_bh(&conn->send_lock);
1079		sock_put(&smc->sk);
1080		if (rc)
1081			goto err_out;
1082		goto again;
1083	}
1084	read_unlock_bh(&lgr->conns_lock);
1085	smc_wr_tx_link_put(to_lnk);
1086	return to_lnk;
1087
1088err_out:
1089	smcr_link_down_cond_sched(to_lnk);
1090	smc_wr_tx_link_put(to_lnk);
1091	return NULL;
1092}
1093
1094static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb,
1095			   struct smc_link_group *lgr)
1096{
1097	struct mutex *lock;	/* lock buffer list */
1098	int rc;
1099
1100	if (is_rmb && buf_desc->is_conf_rkey && !list_empty(&lgr->list)) {
1101		/* unregister rmb with peer */
1102		rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
1103		if (!rc) {
1104			/* protect against smc_llc_cli_rkey_exchange() */
1105			mutex_lock(&lgr->llc_conf_mutex);
1106			smc_llc_do_delete_rkey(lgr, buf_desc);
1107			buf_desc->is_conf_rkey = false;
1108			mutex_unlock(&lgr->llc_conf_mutex);
1109			smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
1110		}
1111	}
1112
1113	if (buf_desc->is_reg_err) {
1114		/* buf registration failed, reuse not possible */
1115		lock = is_rmb ? &lgr->rmbs_lock :
1116				&lgr->sndbufs_lock;
1117		mutex_lock(lock);
1118		list_del(&buf_desc->list);
1119		mutex_unlock(lock);
1120
1121		smc_buf_free(lgr, is_rmb, buf_desc);
1122	} else {
1123		buf_desc->used = 0;
1124		memset(buf_desc->cpu_addr, 0, buf_desc->len);
1125	}
1126}
1127
1128static void smc_buf_unuse(struct smc_connection *conn,
1129			  struct smc_link_group *lgr)
1130{
1131	if (conn->sndbuf_desc) {
1132		if (!lgr->is_smcd && conn->sndbuf_desc->is_vm) {
1133			smcr_buf_unuse(conn->sndbuf_desc, false, lgr);
1134		} else {
1135			conn->sndbuf_desc->used = 0;
1136			memset(conn->sndbuf_desc->cpu_addr, 0,
1137			       conn->sndbuf_desc->len);
1138		}
1139	}
1140	if (conn->rmb_desc) {
1141		if (!lgr->is_smcd) {
1142			smcr_buf_unuse(conn->rmb_desc, true, lgr);
1143		} else {
1144			conn->rmb_desc->used = 0;
1145			memset(conn->rmb_desc->cpu_addr, 0,
1146			       conn->rmb_desc->len +
1147			       sizeof(struct smcd_cdc_msg));
1148		}
1149	}
1150}
1151
1152/* remove a finished connection from its link group */
1153void smc_conn_free(struct smc_connection *conn)
1154{
1155	struct smc_link_group *lgr = conn->lgr;
1156
1157	if (!lgr || conn->freed)
1158		/* Connection has never been registered in a
1159		 * link group, or has already been freed.
1160		 */
1161		return;
1162
1163	conn->freed = 1;
1164	if (!smc_conn_lgr_valid(conn))
1165		/* Connection has already unregistered from
1166		 * link group.
1167		 */
1168		goto lgr_put;
1169
1170	if (lgr->is_smcd) {
1171		if (!list_empty(&lgr->list))
1172			smc_ism_unset_conn(conn);
1173		tasklet_kill(&conn->rx_tsklet);
1174	} else {
1175		smc_cdc_wait_pend_tx_wr(conn);
1176		if (current_work() != &conn->abort_work)
1177			cancel_work_sync(&conn->abort_work);
1178	}
1179	if (!list_empty(&lgr->list)) {
1180		smc_buf_unuse(conn, lgr); /* allow buffer reuse */
1181		smc_lgr_unregister_conn(conn);
1182	}
1183
1184	if (!lgr->conns_num)
1185		smc_lgr_schedule_free_work(lgr);
1186lgr_put:
1187	if (!lgr->is_smcd)
1188		smcr_link_put(conn->lnk); /* link_hold in smc_conn_create() */
1189	smc_lgr_put(lgr); /* lgr_hold in smc_conn_create() */
1190}
1191
1192/* unregister a link from a buf_desc */
1193static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb,
1194				struct smc_link *lnk)
1195{
1196	if (is_rmb || buf_desc->is_vm)
1197		buf_desc->is_reg_mr[lnk->link_idx] = false;
1198	if (!buf_desc->is_map_ib[lnk->link_idx])
1199		return;
1200
1201	if ((is_rmb || buf_desc->is_vm) &&
1202	    buf_desc->mr[lnk->link_idx]) {
1203		smc_ib_put_memory_region(buf_desc->mr[lnk->link_idx]);
1204		buf_desc->mr[lnk->link_idx] = NULL;
1205	}
1206	if (is_rmb)
1207		smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
1208	else
1209		smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
1210
1211	sg_free_table(&buf_desc->sgt[lnk->link_idx]);
1212	buf_desc->is_map_ib[lnk->link_idx] = false;
1213}
1214
1215/* unmap all buffers of lgr for a deleted link */
1216static void smcr_buf_unmap_lgr(struct smc_link *lnk)
1217{
1218	struct smc_link_group *lgr = lnk->lgr;
1219	struct smc_buf_desc *buf_desc, *bf;
1220	int i;
1221
1222	for (i = 0; i < SMC_RMBE_SIZES; i++) {
1223		mutex_lock(&lgr->rmbs_lock);
1224		list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list)
1225			smcr_buf_unmap_link(buf_desc, true, lnk);
1226		mutex_unlock(&lgr->rmbs_lock);
1227		mutex_lock(&lgr->sndbufs_lock);
1228		list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i],
1229					 list)
1230			smcr_buf_unmap_link(buf_desc, false, lnk);
1231		mutex_unlock(&lgr->sndbufs_lock);
1232	}
1233}
1234
1235static void smcr_rtoken_clear_link(struct smc_link *lnk)
1236{
1237	struct smc_link_group *lgr = lnk->lgr;
1238	int i;
1239
1240	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1241		lgr->rtokens[i][lnk->link_idx].rkey = 0;
1242		lgr->rtokens[i][lnk->link_idx].dma_addr = 0;
1243	}
1244}
1245
1246static void __smcr_link_clear(struct smc_link *lnk)
1247{
1248	struct smc_link_group *lgr = lnk->lgr;
1249	struct smc_ib_device *smcibdev;
1250
1251	smc_wr_free_link_mem(lnk);
1252	smc_ibdev_cnt_dec(lnk);
1253	put_device(&lnk->smcibdev->ibdev->dev);
1254	smcibdev = lnk->smcibdev;
1255	memset(lnk, 0, sizeof(struct smc_link));
1256	lnk->state = SMC_LNK_UNUSED;
1257	if (!atomic_dec_return(&smcibdev->lnk_cnt))
1258		wake_up(&smcibdev->lnks_deleted);
1259	smc_lgr_put(lgr); /* lgr_hold in smcr_link_init() */
1260}
1261
1262/* must be called under lgr->llc_conf_mutex lock */
1263void smcr_link_clear(struct smc_link *lnk, bool log)
1264{
1265	if (!lnk->lgr || lnk->clearing ||
1266	    lnk->state == SMC_LNK_UNUSED)
1267		return;
1268	lnk->clearing = 1;
1269	lnk->peer_qpn = 0;
1270	smc_llc_link_clear(lnk, log);
1271	smcr_buf_unmap_lgr(lnk);
1272	smcr_rtoken_clear_link(lnk);
1273	smc_ib_modify_qp_error(lnk);
1274	smc_wr_free_link(lnk);
1275	smc_ib_destroy_queue_pair(lnk);
1276	smc_ib_dealloc_protection_domain(lnk);
1277	smcr_link_put(lnk); /* theoretically last link_put */
1278}
1279
1280void smcr_link_hold(struct smc_link *lnk)
1281{
1282	refcount_inc(&lnk->refcnt);
1283}
1284
1285void smcr_link_put(struct smc_link *lnk)
1286{
1287	if (refcount_dec_and_test(&lnk->refcnt))
1288		__smcr_link_clear(lnk);
1289}
1290
1291static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
1292			  struct smc_buf_desc *buf_desc)
1293{
1294	int i;
1295
1296	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
1297		smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]);
1298
1299	if (!buf_desc->is_vm && buf_desc->pages)
1300		__free_pages(buf_desc->pages, buf_desc->order);
1301	else if (buf_desc->is_vm && buf_desc->cpu_addr)
1302		vfree(buf_desc->cpu_addr);
1303	kfree(buf_desc);
1304}
1305
1306static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb,
1307			  struct smc_buf_desc *buf_desc)
1308{
1309	if (is_dmb) {
1310		/* restore original buf len */
1311		buf_desc->len += sizeof(struct smcd_cdc_msg);
1312		smc_ism_unregister_dmb(lgr->smcd, buf_desc);
1313	} else {
1314		kfree(buf_desc->cpu_addr);
1315	}
1316	kfree(buf_desc);
1317}
1318
1319static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
1320			 struct smc_buf_desc *buf_desc)
1321{
1322	if (lgr->is_smcd)
1323		smcd_buf_free(lgr, is_rmb, buf_desc);
1324	else
1325		smcr_buf_free(lgr, is_rmb, buf_desc);
1326}
1327
1328static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
1329{
1330	struct smc_buf_desc *buf_desc, *bf_desc;
1331	struct list_head *buf_list;
1332	int i;
1333
1334	for (i = 0; i < SMC_RMBE_SIZES; i++) {
1335		if (is_rmb)
1336			buf_list = &lgr->rmbs[i];
1337		else
1338			buf_list = &lgr->sndbufs[i];
1339		list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
1340					 list) {
1341			list_del(&buf_desc->list);
1342			smc_buf_free(lgr, is_rmb, buf_desc);
1343		}
1344	}
1345}
1346
1347static void smc_lgr_free_bufs(struct smc_link_group *lgr)
1348{
1349	/* free send buffers */
1350	__smc_lgr_free_bufs(lgr, false);
1351	/* free rmbs */
1352	__smc_lgr_free_bufs(lgr, true);
1353}
1354
1355/* won't be freed until no one accesses to lgr anymore */
1356static void __smc_lgr_free(struct smc_link_group *lgr)
1357{
1358	smc_lgr_free_bufs(lgr);
1359	if (lgr->is_smcd) {
1360		if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
1361			wake_up(&lgr->smcd->lgrs_deleted);
1362	} else {
1363		smc_wr_free_lgr_mem(lgr);
1364		if (!atomic_dec_return(&lgr_cnt))
1365			wake_up(&lgrs_deleted);
1366	}
1367	kfree(lgr);
1368}
1369
1370/* remove a link group */
1371static void smc_lgr_free(struct smc_link_group *lgr)
1372{
1373	int i;
1374
1375	if (!lgr->is_smcd) {
1376		mutex_lock(&lgr->llc_conf_mutex);
1377		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1378			if (lgr->lnk[i].state != SMC_LNK_UNUSED)
1379				smcr_link_clear(&lgr->lnk[i], false);
1380		}
1381		mutex_unlock(&lgr->llc_conf_mutex);
1382		smc_llc_lgr_clear(lgr);
1383	}
1384
1385	destroy_workqueue(lgr->tx_wq);
1386	if (lgr->is_smcd) {
1387		smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
1388		put_device(&lgr->smcd->dev);
1389	}
1390	smc_lgr_put(lgr); /* theoretically last lgr_put */
1391}
1392
1393void smc_lgr_hold(struct smc_link_group *lgr)
1394{
1395	refcount_inc(&lgr->refcnt);
1396}
1397
1398void smc_lgr_put(struct smc_link_group *lgr)
1399{
1400	if (refcount_dec_and_test(&lgr->refcnt))
1401		__smc_lgr_free(lgr);
1402}
1403
1404static void smc_sk_wake_ups(struct smc_sock *smc)
1405{
1406	smc->sk.sk_write_space(&smc->sk);
1407	smc->sk.sk_data_ready(&smc->sk);
1408	smc->sk.sk_state_change(&smc->sk);
1409}
1410
1411/* kill a connection */
1412static void smc_conn_kill(struct smc_connection *conn, bool soft)
1413{
1414	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
1415
1416	if (conn->lgr->is_smcd && conn->lgr->peer_shutdown)
1417		conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
1418	else
1419		smc_close_abort(conn);
1420	conn->killed = 1;
1421	smc->sk.sk_err = ECONNABORTED;
1422	smc_sk_wake_ups(smc);
1423	if (conn->lgr->is_smcd) {
1424		smc_ism_unset_conn(conn);
1425		if (soft)
1426			tasklet_kill(&conn->rx_tsklet);
1427		else
1428			tasklet_unlock_wait(&conn->rx_tsklet);
1429	} else {
1430		smc_cdc_wait_pend_tx_wr(conn);
1431	}
1432	smc_lgr_unregister_conn(conn);
1433	smc_close_active_abort(smc);
1434}
1435
1436static void smc_lgr_cleanup(struct smc_link_group *lgr)
1437{
1438	if (lgr->is_smcd) {
1439		smc_ism_signal_shutdown(lgr);
1440	} else {
1441		u32 rsn = lgr->llc_termination_rsn;
1442
1443		if (!rsn)
1444			rsn = SMC_LLC_DEL_PROG_INIT_TERM;
1445		smc_llc_send_link_delete_all(lgr, false, rsn);
1446		smcr_lgr_link_deactivate_all(lgr);
1447	}
1448}
1449
1450/* terminate link group
1451 * @soft: true if link group shutdown can take its time
1452 *	  false if immediate link group shutdown is required
1453 */
1454static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
1455{
1456	struct smc_connection *conn;
1457	struct smc_sock *smc;
1458	struct rb_node *node;
1459
1460	if (lgr->terminating)
1461		return;	/* lgr already terminating */
1462	/* cancel free_work sync, will terminate when lgr->freeing is set */
1463	cancel_delayed_work_sync(&lgr->free_work);
1464	lgr->terminating = 1;
1465
1466	/* kill remaining link group connections */
1467	read_lock_bh(&lgr->conns_lock);
1468	node = rb_first(&lgr->conns_all);
1469	while (node) {
1470		read_unlock_bh(&lgr->conns_lock);
1471		conn = rb_entry(node, struct smc_connection, alert_node);
1472		smc = container_of(conn, struct smc_sock, conn);
1473		sock_hold(&smc->sk); /* sock_put below */
1474		lock_sock(&smc->sk);
1475		smc_conn_kill(conn, soft);
1476		release_sock(&smc->sk);
1477		sock_put(&smc->sk); /* sock_hold above */
1478		read_lock_bh(&lgr->conns_lock);
1479		node = rb_first(&lgr->conns_all);
1480	}
1481	read_unlock_bh(&lgr->conns_lock);
1482	smc_lgr_cleanup(lgr);
1483	smc_lgr_free(lgr);
1484}
1485
1486/* unlink link group and schedule termination */
1487void smc_lgr_terminate_sched(struct smc_link_group *lgr)
1488{
1489	spinlock_t *lgr_lock;
1490
1491	smc_lgr_list_head(lgr, &lgr_lock);
1492	spin_lock_bh(lgr_lock);
1493	if (list_empty(&lgr->list) || lgr->terminating || lgr->freeing) {
1494		spin_unlock_bh(lgr_lock);
1495		return;	/* lgr already terminating */
1496	}
1497	list_del_init(&lgr->list);
1498	lgr->freeing = 1;
1499	spin_unlock_bh(lgr_lock);
1500	schedule_work(&lgr->terminate_work);
1501}
1502
1503/* Called when peer lgr shutdown (regularly or abnormally) is received */
1504void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
1505{
1506	struct smc_link_group *lgr, *l;
1507	LIST_HEAD(lgr_free_list);
1508
1509	/* run common cleanup function and build free list */
1510	spin_lock_bh(&dev->lgr_lock);
1511	list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) {
1512		if ((!peer_gid || lgr->peer_gid == peer_gid) &&
1513		    (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
1514			if (peer_gid) /* peer triggered termination */
1515				lgr->peer_shutdown = 1;
1516			list_move(&lgr->list, &lgr_free_list);
1517			lgr->freeing = 1;
1518		}
1519	}
1520	spin_unlock_bh(&dev->lgr_lock);
1521
1522	/* cancel the regular free workers and actually free lgrs */
1523	list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
1524		list_del_init(&lgr->list);
1525		schedule_work(&lgr->terminate_work);
1526	}
1527}
1528
1529/* Called when an SMCD device is removed or the smc module is unloaded */
1530void smc_smcd_terminate_all(struct smcd_dev *smcd)
1531{
1532	struct smc_link_group *lgr, *lg;
1533	LIST_HEAD(lgr_free_list);
1534
1535	spin_lock_bh(&smcd->lgr_lock);
1536	list_splice_init(&smcd->lgr_list, &lgr_free_list);
1537	list_for_each_entry(lgr, &lgr_free_list, list)
1538		lgr->freeing = 1;
1539	spin_unlock_bh(&smcd->lgr_lock);
1540
1541	list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
1542		list_del_init(&lgr->list);
1543		__smc_lgr_terminate(lgr, false);
1544	}
1545
1546	if (atomic_read(&smcd->lgr_cnt))
1547		wait_event(smcd->lgrs_deleted, !atomic_read(&smcd->lgr_cnt));
1548}
1549
1550/* Called when an SMCR device is removed or the smc module is unloaded.
1551 * If smcibdev is given, all SMCR link groups using this device are terminated.
1552 * If smcibdev is NULL, all SMCR link groups are terminated.
1553 */
1554void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
1555{
1556	struct smc_link_group *lgr, *lg;
1557	LIST_HEAD(lgr_free_list);
1558	int i;
1559
1560	spin_lock_bh(&smc_lgr_list.lock);
1561	if (!smcibdev) {
1562		list_splice_init(&smc_lgr_list.list, &lgr_free_list);
1563		list_for_each_entry(lgr, &lgr_free_list, list)
1564			lgr->freeing = 1;
1565	} else {
1566		list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
1567			for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1568				if (lgr->lnk[i].smcibdev == smcibdev)
1569					smcr_link_down_cond_sched(&lgr->lnk[i]);
1570			}
1571		}
1572	}
1573	spin_unlock_bh(&smc_lgr_list.lock);
1574
1575	list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
1576		list_del_init(&lgr->list);
1577		smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_OP_INIT_TERM);
1578		__smc_lgr_terminate(lgr, false);
1579	}
1580
1581	if (smcibdev) {
1582		if (atomic_read(&smcibdev->lnk_cnt))
1583			wait_event(smcibdev->lnks_deleted,
1584				   !atomic_read(&smcibdev->lnk_cnt));
1585	} else {
1586		if (atomic_read(&lgr_cnt))
1587			wait_event(lgrs_deleted, !atomic_read(&lgr_cnt));
1588	}
1589}
1590
1591/* set new lgr type and clear all asymmetric link tagging */
1592void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type)
1593{
1594	char *lgr_type = "";
1595	int i;
1596
1597	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
1598		if (smc_link_usable(&lgr->lnk[i]))
1599			lgr->lnk[i].link_is_asym = false;
1600	if (lgr->type == new_type)
1601		return;
1602	lgr->type = new_type;
1603
1604	switch (lgr->type) {
1605	case SMC_LGR_NONE:
1606		lgr_type = "NONE";
1607		break;
1608	case SMC_LGR_SINGLE:
1609		lgr_type = "SINGLE";
1610		break;
1611	case SMC_LGR_SYMMETRIC:
1612		lgr_type = "SYMMETRIC";
1613		break;
1614	case SMC_LGR_ASYMMETRIC_PEER:
1615		lgr_type = "ASYMMETRIC_PEER";
1616		break;
1617	case SMC_LGR_ASYMMETRIC_LOCAL:
1618		lgr_type = "ASYMMETRIC_LOCAL";
1619		break;
1620	}
1621	pr_warn_ratelimited("smc: SMC-R lg %*phN net %llu state changed: "
1622			    "%s, pnetid %.16s\n", SMC_LGR_ID_SIZE, &lgr->id,
1623			    lgr->net->net_cookie, lgr_type, lgr->pnet_id);
1624}
1625
1626/* set new lgr type and tag a link as asymmetric */
1627void smcr_lgr_set_type_asym(struct smc_link_group *lgr,
1628			    enum smc_lgr_type new_type, int asym_lnk_idx)
1629{
1630	smcr_lgr_set_type(lgr, new_type);
1631	lgr->lnk[asym_lnk_idx].link_is_asym = true;
1632}
1633
1634/* abort connection, abort_work scheduled from tasklet context */
1635static void smc_conn_abort_work(struct work_struct *work)
1636{
1637	struct smc_connection *conn = container_of(work,
1638						   struct smc_connection,
1639						   abort_work);
1640	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
1641
1642	lock_sock(&smc->sk);
1643	smc_conn_kill(conn, true);
1644	release_sock(&smc->sk);
1645	sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */
1646}
1647
1648void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
1649{
1650	struct smc_link_group *lgr, *n;
1651
1652	list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
1653		struct smc_link *link;
1654
1655		if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
1656			    SMC_MAX_PNETID_LEN) ||
1657		    lgr->type == SMC_LGR_SYMMETRIC ||
1658		    lgr->type == SMC_LGR_ASYMMETRIC_PEER ||
1659		    !rdma_dev_access_netns(smcibdev->ibdev, lgr->net))
1660			continue;
1661
1662		/* trigger local add link processing */
1663		link = smc_llc_usable_link(lgr);
1664		if (link)
1665			smc_llc_add_link_local(link);
1666	}
1667}
1668
1669/* link is down - switch connections to alternate link,
1670 * must be called under lgr->llc_conf_mutex lock
1671 */
1672static void smcr_link_down(struct smc_link *lnk)
1673{
1674	struct smc_link_group *lgr = lnk->lgr;
1675	struct smc_link *to_lnk;
1676	int del_link_id;
1677
1678	if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list))
1679		return;
1680
1681	to_lnk = smc_switch_conns(lgr, lnk, true);
1682	if (!to_lnk) { /* no backup link available */
1683		smcr_link_clear(lnk, true);
1684		return;
1685	}
1686	smcr_lgr_set_type(lgr, SMC_LGR_SINGLE);
1687	del_link_id = lnk->link_id;
1688
1689	if (lgr->role == SMC_SERV) {
1690		/* trigger local delete link processing */
1691		smc_llc_srv_delete_link_local(to_lnk, del_link_id);
1692	} else {
1693		if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
1694			/* another llc task is ongoing */
1695			mutex_unlock(&lgr->llc_conf_mutex);
1696			wait_event_timeout(lgr->llc_flow_waiter,
1697				(list_empty(&lgr->list) ||
1698				 lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
1699				SMC_LLC_WAIT_TIME);
1700			mutex_lock(&lgr->llc_conf_mutex);
1701		}
1702		if (!list_empty(&lgr->list)) {
1703			smc_llc_send_delete_link(to_lnk, del_link_id,
1704						 SMC_LLC_REQ, true,
1705						 SMC_LLC_DEL_LOST_PATH);
1706			smcr_link_clear(lnk, true);
1707		}
1708		wake_up(&lgr->llc_flow_waiter);	/* wake up next waiter */
1709	}
1710}
1711
1712/* must be called under lgr->llc_conf_mutex lock */
1713void smcr_link_down_cond(struct smc_link *lnk)
1714{
1715	if (smc_link_downing(&lnk->state)) {
1716		trace_smcr_link_down(lnk, __builtin_return_address(0));
1717		smcr_link_down(lnk);
1718	}
1719}
1720
1721/* will get the lgr->llc_conf_mutex lock */
1722void smcr_link_down_cond_sched(struct smc_link *lnk)
1723{
1724	if (smc_link_downing(&lnk->state)) {
1725		trace_smcr_link_down(lnk, __builtin_return_address(0));
1726		schedule_work(&lnk->link_down_wrk);
1727	}
1728}
1729
1730void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport)
1731{
1732	struct smc_link_group *lgr, *n;
1733	int i;
1734
1735	list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
1736		if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
1737			    SMC_MAX_PNETID_LEN))
1738			continue; /* lgr is not affected */
1739		if (list_empty(&lgr->list))
1740			continue;
1741		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1742			struct smc_link *lnk = &lgr->lnk[i];
1743
1744			if (smc_link_usable(lnk) &&
1745			    lnk->smcibdev == smcibdev && lnk->ibport == ibport)
1746				smcr_link_down_cond_sched(lnk);
1747		}
1748	}
1749}
1750
1751static void smc_link_down_work(struct work_struct *work)
1752{
1753	struct smc_link *link = container_of(work, struct smc_link,
1754					     link_down_wrk);
1755	struct smc_link_group *lgr = link->lgr;
1756
1757	if (list_empty(&lgr->list))
1758		return;
1759	wake_up_all(&lgr->llc_msg_waiter);
1760	mutex_lock(&lgr->llc_conf_mutex);
1761	smcr_link_down(link);
1762	mutex_unlock(&lgr->llc_conf_mutex);
1763}
1764
1765static int smc_vlan_by_tcpsk_walk(struct net_device *lower_dev,
1766				  struct netdev_nested_priv *priv)
1767{
1768	unsigned short *vlan_id = (unsigned short *)priv->data;
1769
1770	if (is_vlan_dev(lower_dev)) {
1771		*vlan_id = vlan_dev_vlan_id(lower_dev);
1772		return 1;
1773	}
1774
1775	return 0;
1776}
1777
1778/* Determine vlan of internal TCP socket. */
1779int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
1780{
1781	struct dst_entry *dst = sk_dst_get(clcsock->sk);
1782	struct netdev_nested_priv priv;
1783	struct net_device *ndev;
1784	int rc = 0;
1785
1786	ini->vlan_id = 0;
1787	if (!dst) {
1788		rc = -ENOTCONN;
1789		goto out;
1790	}
1791	if (!dst->dev) {
1792		rc = -ENODEV;
1793		goto out_rel;
1794	}
1795
1796	ndev = dst->dev;
1797	if (is_vlan_dev(ndev)) {
1798		ini->vlan_id = vlan_dev_vlan_id(ndev);
1799		goto out_rel;
1800	}
1801
1802	priv.data = (void *)&ini->vlan_id;
1803	rtnl_lock();
1804	netdev_walk_all_lower_dev(ndev, smc_vlan_by_tcpsk_walk, &priv);
1805	rtnl_unlock();
1806
1807out_rel:
1808	dst_release(dst);
1809out:
1810	return rc;
1811}
1812
1813static bool smcr_lgr_match(struct smc_link_group *lgr, u8 smcr_version,
1814			   u8 peer_systemid[],
1815			   u8 peer_gid[],
1816			   u8 peer_mac_v1[],
1817			   enum smc_lgr_role role, u32 clcqpn,
1818			   struct net *net)
1819{
1820	struct smc_link *lnk;
1821	int i;
1822
1823	if (memcmp(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN) ||
1824	    lgr->role != role)
1825		return false;
1826
1827	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1828		lnk = &lgr->lnk[i];
1829
1830		if (!smc_link_active(lnk))
1831			continue;
1832		/* use verbs API to check netns, instead of lgr->net */
1833		if (!rdma_dev_access_netns(lnk->smcibdev->ibdev, net))
1834			return false;
1835		if ((lgr->role == SMC_SERV || lnk->peer_qpn == clcqpn) &&
1836		    !memcmp(lnk->peer_gid, peer_gid, SMC_GID_SIZE) &&
1837		    (smcr_version == SMC_V2 ||
1838		     !memcmp(lnk->peer_mac, peer_mac_v1, ETH_ALEN)))
1839			return true;
1840	}
1841	return false;
1842}
1843
1844static bool smcd_lgr_match(struct smc_link_group *lgr,
1845			   struct smcd_dev *smcismdev, u64 peer_gid)
1846{
1847	return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev;
1848}
1849
1850/* create a new SMC connection (and a new link group if necessary) */
1851int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
1852{
1853	struct smc_connection *conn = &smc->conn;
1854	struct net *net = sock_net(&smc->sk);
1855	struct list_head *lgr_list;
1856	struct smc_link_group *lgr;
1857	enum smc_lgr_role role;
1858	spinlock_t *lgr_lock;
1859	int rc = 0;
1860
1861	lgr_list = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_list :
1862				  &smc_lgr_list.list;
1863	lgr_lock = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_lock :
1864				  &smc_lgr_list.lock;
1865	ini->first_contact_local = 1;
1866	role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
1867	if (role == SMC_CLNT && ini->first_contact_peer)
1868		/* create new link group as well */
1869		goto create;
1870
1871	/* determine if an existing link group can be reused */
1872	spin_lock_bh(lgr_lock);
1873	list_for_each_entry(lgr, lgr_list, list) {
1874		write_lock_bh(&lgr->conns_lock);
1875		if ((ini->is_smcd ?
1876		     smcd_lgr_match(lgr, ini->ism_dev[ini->ism_selected],
1877				    ini->ism_peer_gid[ini->ism_selected]) :
1878		     smcr_lgr_match(lgr, ini->smcr_version,
1879				    ini->peer_systemid,
1880				    ini->peer_gid, ini->peer_mac, role,
1881				    ini->ib_clcqpn, net)) &&
1882		    !lgr->sync_err &&
1883		    (ini->smcd_version == SMC_V2 ||
1884		     lgr->vlan_id == ini->vlan_id) &&
1885		    (role == SMC_CLNT || ini->is_smcd ||
1886		    (lgr->conns_num < SMC_RMBS_PER_LGR_MAX &&
1887		      !bitmap_full(lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX)))) {
1888			/* link group found */
1889			ini->first_contact_local = 0;
1890			conn->lgr = lgr;
1891			rc = smc_lgr_register_conn(conn, false);
1892			write_unlock_bh(&lgr->conns_lock);
1893			if (!rc && delayed_work_pending(&lgr->free_work))
1894				cancel_delayed_work(&lgr->free_work);
1895			break;
1896		}
1897		write_unlock_bh(&lgr->conns_lock);
1898	}
1899	spin_unlock_bh(lgr_lock);
1900	if (rc)
1901		return rc;
1902
1903	if (role == SMC_CLNT && !ini->first_contact_peer &&
1904	    ini->first_contact_local) {
1905		/* Server reuses a link group, but Client wants to start
1906		 * a new one
1907		 * send out_of_sync decline, reason synchr. error
1908		 */
1909		return SMC_CLC_DECL_SYNCERR;
1910	}
1911
1912create:
1913	if (ini->first_contact_local) {
1914		rc = smc_lgr_create(smc, ini);
1915		if (rc)
1916			goto out;
1917		lgr = conn->lgr;
1918		write_lock_bh(&lgr->conns_lock);
1919		rc = smc_lgr_register_conn(conn, true);
1920		write_unlock_bh(&lgr->conns_lock);
1921		if (rc) {
1922			smc_lgr_cleanup_early(lgr);
1923			goto out;
1924		}
1925	}
1926	smc_lgr_hold(conn->lgr); /* lgr_put in smc_conn_free() */
1927	if (!conn->lgr->is_smcd)
1928		smcr_link_hold(conn->lnk); /* link_put in smc_conn_free() */
1929	conn->freed = 0;
1930	conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
1931	conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
1932	conn->urg_state = SMC_URG_READ;
1933	init_waitqueue_head(&conn->cdc_pend_tx_wq);
1934	INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work);
1935	if (ini->is_smcd) {
1936		conn->rx_off = sizeof(struct smcd_cdc_msg);
1937		smcd_cdc_rx_init(conn); /* init tasklet for this conn */
1938	} else {
1939		conn->rx_off = 0;
1940	}
1941#ifndef KERNEL_HAS_ATOMIC64
1942	spin_lock_init(&conn->acurs_lock);
1943#endif
1944
1945out:
1946	return rc;
1947}
1948
1949#define SMCD_DMBE_SIZES		6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
1950#define SMCR_RMBE_SIZES		5 /* 0 -> 16KB, 1 -> 32KB, .. 5 -> 512KB */
1951
1952/* convert the RMB size into the compressed notation (minimum 16K, see
1953 * SMCD/R_DMBE_SIZES.
1954 * In contrast to plain ilog2, this rounds towards the next power of 2,
1955 * so the socket application gets at least its desired sndbuf / rcvbuf size.
1956 */
1957static u8 smc_compress_bufsize(int size, bool is_smcd, bool is_rmb)
1958{
1959	const unsigned int max_scat = SG_MAX_SINGLE_ALLOC * PAGE_SIZE;
1960	u8 compressed;
1961
1962	if (size <= SMC_BUF_MIN_SIZE)
1963		return 0;
1964
1965	size = (size - 1) >> 14;  /* convert to 16K multiple */
1966	compressed = min_t(u8, ilog2(size) + 1,
1967			   is_smcd ? SMCD_DMBE_SIZES : SMCR_RMBE_SIZES);
1968
1969	if (!is_smcd && is_rmb)
1970		/* RMBs are backed by & limited to max size of scatterlists */
1971		compressed = min_t(u8, compressed, ilog2(max_scat >> 14));
1972
1973	return compressed;
1974}
1975
1976/* convert the RMB size from compressed notation into integer */
1977int smc_uncompress_bufsize(u8 compressed)
1978{
1979	u32 size;
1980
1981	size = 0x00000001 << (((int)compressed) + 14);
1982	return (int)size;
1983}
1984
1985/* try to reuse a sndbuf or rmb description slot for a certain
1986 * buffer size; if not available, return NULL
1987 */
1988static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
1989					     struct mutex *lock,
1990					     struct list_head *buf_list)
1991{
1992	struct smc_buf_desc *buf_slot;
1993
1994	mutex_lock(lock);
1995	list_for_each_entry(buf_slot, buf_list, list) {
1996		if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
1997			mutex_unlock(lock);
1998			return buf_slot;
1999		}
2000	}
2001	mutex_unlock(lock);
2002	return NULL;
2003}
2004
2005/* one of the conditions for announcing a receiver's current window size is
2006 * that it "results in a minimum increase in the window size of 10% of the
2007 * receive buffer space" [RFC7609]
2008 */
2009static inline int smc_rmb_wnd_update_limit(int rmbe_size)
2010{
2011	return max_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
2012}
2013
2014/* map an buf to a link */
2015static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb,
2016			     struct smc_link *lnk)
2017{
2018	int rc, i, nents, offset, buf_size, size, access_flags;
2019	struct scatterlist *sg;
2020	void *buf;
2021
2022	if (buf_desc->is_map_ib[lnk->link_idx])
2023		return 0;
2024
2025	if (buf_desc->is_vm) {
2026		buf = buf_desc->cpu_addr;
2027		buf_size = buf_desc->len;
2028		offset = offset_in_page(buf_desc->cpu_addr);
2029		nents = PAGE_ALIGN(buf_size + offset) / PAGE_SIZE;
2030	} else {
2031		nents = 1;
2032	}
2033
2034	rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], nents, GFP_KERNEL);
2035	if (rc)
2036		return rc;
2037
2038	if (buf_desc->is_vm) {
2039		/* virtually contiguous buffer */
2040		for_each_sg(buf_desc->sgt[lnk->link_idx].sgl, sg, nents, i) {
2041			size = min_t(int, PAGE_SIZE - offset, buf_size);
2042			sg_set_page(sg, vmalloc_to_page(buf), size, offset);
2043			buf += size / sizeof(*buf);
2044			buf_size -= size;
2045			offset = 0;
2046		}
2047	} else {
2048		/* physically contiguous buffer */
2049		sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl,
2050			   buf_desc->cpu_addr, buf_desc->len);
2051	}
2052
2053	/* map sg table to DMA address */
2054	rc = smc_ib_buf_map_sg(lnk, buf_desc,
2055			       is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
2056	/* SMC protocol depends on mapping to one DMA address only */
2057	if (rc != nents) {
2058		rc = -EAGAIN;
2059		goto free_table;
2060	}
2061
2062	buf_desc->is_dma_need_sync |=
2063		smc_ib_is_sg_need_sync(lnk, buf_desc) << lnk->link_idx;
2064
2065	if (is_rmb || buf_desc->is_vm) {
2066		/* create a new memory region for the RMB or vzalloced sndbuf */
2067		access_flags = is_rmb ?
2068			       IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
2069			       IB_ACCESS_LOCAL_WRITE;
2070
2071		rc = smc_ib_get_memory_region(lnk->roce_pd, access_flags,
2072					      buf_desc, lnk->link_idx);
2073		if (rc)
2074			goto buf_unmap;
2075		smc_ib_sync_sg_for_device(lnk, buf_desc,
2076					  is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
2077	}
2078	buf_desc->is_map_ib[lnk->link_idx] = true;
2079	return 0;
2080
2081buf_unmap:
2082	smc_ib_buf_unmap_sg(lnk, buf_desc,
2083			    is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
2084free_table:
2085	sg_free_table(&buf_desc->sgt[lnk->link_idx]);
2086	return rc;
2087}
2088
2089/* register a new buf on IB device, rmb or vzalloced sndbuf
2090 * must be called under lgr->llc_conf_mutex lock
2091 */
2092int smcr_link_reg_buf(struct smc_link *link, struct smc_buf_desc *buf_desc)
2093{
2094	if (list_empty(&link->lgr->list))
2095		return -ENOLINK;
2096	if (!buf_desc->is_reg_mr[link->link_idx]) {
2097		/* register memory region for new buf */
2098		if (buf_desc->is_vm)
2099			buf_desc->mr[link->link_idx]->iova =
2100				(uintptr_t)buf_desc->cpu_addr;
2101		if (smc_wr_reg_send(link, buf_desc->mr[link->link_idx])) {
2102			buf_desc->is_reg_err = true;
2103			return -EFAULT;
2104		}
2105		buf_desc->is_reg_mr[link->link_idx] = true;
2106	}
2107	return 0;
2108}
2109
2110static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock,
2111			     struct list_head *lst, bool is_rmb)
2112{
2113	struct smc_buf_desc *buf_desc, *bf;
2114	int rc = 0;
2115
2116	mutex_lock(lock);
2117	list_for_each_entry_safe(buf_desc, bf, lst, list) {
2118		if (!buf_desc->used)
2119			continue;
2120		rc = smcr_buf_map_link(buf_desc, is_rmb, lnk);
2121		if (rc)
2122			goto out;
2123	}
2124out:
2125	mutex_unlock(lock);
2126	return rc;
2127}
2128
2129/* map all used buffers of lgr for a new link */
2130int smcr_buf_map_lgr(struct smc_link *lnk)
2131{
2132	struct smc_link_group *lgr = lnk->lgr;
2133	int i, rc = 0;
2134
2135	for (i = 0; i < SMC_RMBE_SIZES; i++) {
2136		rc = _smcr_buf_map_lgr(lnk, &lgr->rmbs_lock,
2137				       &lgr->rmbs[i], true);
2138		if (rc)
2139			return rc;
2140		rc = _smcr_buf_map_lgr(lnk, &lgr->sndbufs_lock,
2141				       &lgr->sndbufs[i], false);
2142		if (rc)
2143			return rc;
2144	}
2145	return 0;
2146}
2147
2148/* register all used buffers of lgr for a new link,
2149 * must be called under lgr->llc_conf_mutex lock
2150 */
2151int smcr_buf_reg_lgr(struct smc_link *lnk)
2152{
2153	struct smc_link_group *lgr = lnk->lgr;
2154	struct smc_buf_desc *buf_desc, *bf;
2155	int i, rc = 0;
2156
2157	/* reg all RMBs for a new link */
2158	mutex_lock(&lgr->rmbs_lock);
2159	for (i = 0; i < SMC_RMBE_SIZES; i++) {
2160		list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) {
2161			if (!buf_desc->used)
2162				continue;
2163			rc = smcr_link_reg_buf(lnk, buf_desc);
2164			if (rc) {
2165				mutex_unlock(&lgr->rmbs_lock);
2166				return rc;
2167			}
2168		}
2169	}
2170	mutex_unlock(&lgr->rmbs_lock);
2171
2172	if (lgr->buf_type == SMCR_PHYS_CONT_BUFS)
2173		return rc;
2174
2175	/* reg all vzalloced sndbufs for a new link */
2176	mutex_lock(&lgr->sndbufs_lock);
2177	for (i = 0; i < SMC_RMBE_SIZES; i++) {
2178		list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i], list) {
2179			if (!buf_desc->used || !buf_desc->is_vm)
2180				continue;
2181			rc = smcr_link_reg_buf(lnk, buf_desc);
2182			if (rc) {
2183				mutex_unlock(&lgr->sndbufs_lock);
2184				return rc;
2185			}
2186		}
2187	}
2188	mutex_unlock(&lgr->sndbufs_lock);
2189	return rc;
2190}
2191
2192static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
2193						bool is_rmb, int bufsize)
2194{
2195	struct smc_buf_desc *buf_desc;
2196
2197	/* try to alloc a new buffer */
2198	buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
2199	if (!buf_desc)
2200		return ERR_PTR(-ENOMEM);
2201
2202	switch (lgr->buf_type) {
2203	case SMCR_PHYS_CONT_BUFS:
2204	case SMCR_MIXED_BUFS:
2205		buf_desc->order = get_order(bufsize);
2206		buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN |
2207					      __GFP_NOMEMALLOC | __GFP_COMP |
2208					      __GFP_NORETRY | __GFP_ZERO,
2209					      buf_desc->order);
2210		if (buf_desc->pages) {
2211			buf_desc->cpu_addr =
2212				(void *)page_address(buf_desc->pages);
2213			buf_desc->len = bufsize;
2214			buf_desc->is_vm = false;
2215			break;
2216		}
2217		if (lgr->buf_type == SMCR_PHYS_CONT_BUFS)
2218			goto out;
2219		fallthrough;	// try virtually continguous buf
2220	case SMCR_VIRT_CONT_BUFS:
2221		buf_desc->order = get_order(bufsize);
2222		buf_desc->cpu_addr = vzalloc(PAGE_SIZE << buf_desc->order);
2223		if (!buf_desc->cpu_addr)
2224			goto out;
2225		buf_desc->pages = NULL;
2226		buf_desc->len = bufsize;
2227		buf_desc->is_vm = true;
2228		break;
2229	}
2230	return buf_desc;
2231
2232out:
2233	kfree(buf_desc);
2234	return ERR_PTR(-EAGAIN);
2235}
2236
2237/* map buf_desc on all usable links,
2238 * unused buffers stay mapped as long as the link is up
2239 */
2240static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
2241				     struct smc_buf_desc *buf_desc, bool is_rmb)
2242{
2243	int i, rc = 0, cnt = 0;
2244
2245	/* protect against parallel link reconfiguration */
2246	mutex_lock(&lgr->llc_conf_mutex);
2247	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
2248		struct smc_link *lnk = &lgr->lnk[i];
2249
2250		if (!smc_link_usable(lnk))
2251			continue;
2252		if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) {
2253			rc = -ENOMEM;
2254			goto out;
2255		}
2256		cnt++;
2257	}
2258out:
2259	mutex_unlock(&lgr->llc_conf_mutex);
2260	if (!rc && !cnt)
2261		rc = -EINVAL;
2262	return rc;
2263}
2264
2265static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
2266						bool is_dmb, int bufsize)
2267{
2268	struct smc_buf_desc *buf_desc;
2269	int rc;
2270
2271	/* try to alloc a new DMB */
2272	buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
2273	if (!buf_desc)
2274		return ERR_PTR(-ENOMEM);
2275	if (is_dmb) {
2276		rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
2277		if (rc) {
2278			kfree(buf_desc);
2279			if (rc == -ENOMEM)
2280				return ERR_PTR(-EAGAIN);
2281			if (rc == -ENOSPC)
2282				return ERR_PTR(-ENOSPC);
2283			return ERR_PTR(-EIO);
2284		}
2285		buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
2286		/* CDC header stored in buf. So, pretend it was smaller */
2287		buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg);
2288	} else {
2289		buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL |
2290					     __GFP_NOWARN | __GFP_NORETRY |
2291					     __GFP_NOMEMALLOC);
2292		if (!buf_desc->cpu_addr) {
2293			kfree(buf_desc);
2294			return ERR_PTR(-EAGAIN);
2295		}
2296		buf_desc->len = bufsize;
2297	}
2298	return buf_desc;
2299}
2300
2301static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
2302{
2303	struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
2304	struct smc_connection *conn = &smc->conn;
2305	struct smc_link_group *lgr = conn->lgr;
2306	struct list_head *buf_list;
2307	int bufsize, bufsize_short;
2308	bool is_dgraded = false;
2309	struct mutex *lock;	/* lock buffer list */
2310	int sk_buf_size;
2311
2312	if (is_rmb)
2313		/* use socket recv buffer size (w/o overhead) as start value */
2314		sk_buf_size = smc->sk.sk_rcvbuf;
2315	else
2316		/* use socket send buffer size (w/o overhead) as start value */
2317		sk_buf_size = smc->sk.sk_sndbuf;
2318
2319	for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb);
2320	     bufsize_short >= 0; bufsize_short--) {
2321		if (is_rmb) {
2322			lock = &lgr->rmbs_lock;
2323			buf_list = &lgr->rmbs[bufsize_short];
2324		} else {
2325			lock = &lgr->sndbufs_lock;
2326			buf_list = &lgr->sndbufs[bufsize_short];
2327		}
2328		bufsize = smc_uncompress_bufsize(bufsize_short);
2329
2330		/* check for reusable slot in the link group */
2331		buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
2332		if (buf_desc) {
2333			buf_desc->is_dma_need_sync = 0;
2334			SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
2335			SMC_STAT_BUF_REUSE(smc, is_smcd, is_rmb);
2336			break; /* found reusable slot */
2337		}
2338
2339		if (is_smcd)
2340			buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize);
2341		else
2342			buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize);
2343
2344		if (PTR_ERR(buf_desc) == -ENOMEM)
2345			break;
2346		if (IS_ERR(buf_desc)) {
2347			if (!is_dgraded) {
2348				is_dgraded = true;
2349				SMC_STAT_RMB_DOWNGRADED(smc, is_smcd, is_rmb);
2350			}
2351			continue;
2352		}
2353
2354		SMC_STAT_RMB_ALLOC(smc, is_smcd, is_rmb);
2355		SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
2356		buf_desc->used = 1;
2357		mutex_lock(lock);
2358		list_add(&buf_desc->list, buf_list);
2359		mutex_unlock(lock);
2360		break; /* found */
2361	}
2362
2363	if (IS_ERR(buf_desc))
2364		return PTR_ERR(buf_desc);
2365
2366	if (!is_smcd) {
2367		if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) {
2368			smcr_buf_unuse(buf_desc, is_rmb, lgr);
2369			return -ENOMEM;
2370		}
2371	}
2372
2373	if (is_rmb) {
2374		conn->rmb_desc = buf_desc;
2375		conn->rmbe_size_short = bufsize_short;
2376		smc->sk.sk_rcvbuf = bufsize;
2377		atomic_set(&conn->bytes_to_rcv, 0);
2378		conn->rmbe_update_limit =
2379			smc_rmb_wnd_update_limit(buf_desc->len);
2380		if (is_smcd)
2381			smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
2382	} else {
2383		conn->sndbuf_desc = buf_desc;
2384		smc->sk.sk_sndbuf = bufsize;
2385		atomic_set(&conn->sndbuf_space, bufsize);
2386	}
2387	return 0;
2388}
2389
2390void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
2391{
2392	if (!conn->sndbuf_desc->is_dma_need_sync)
2393		return;
2394	if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd ||
2395	    !smc_link_active(conn->lnk))
2396		return;
2397	smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
2398}
2399
2400void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
2401{
2402	int i;
2403
2404	if (!conn->rmb_desc->is_dma_need_sync)
2405		return;
2406	if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd)
2407		return;
2408	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
2409		if (!smc_link_active(&conn->lgr->lnk[i]))
2410			continue;
2411		smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc,
2412				       DMA_FROM_DEVICE);
2413	}
2414}
2415
2416/* create the send and receive buffer for an SMC socket;
2417 * receive buffers are called RMBs;
2418 * (even though the SMC protocol allows more than one RMB-element per RMB,
2419 * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
2420 * extra RMB for every connection in a link group
2421 */
2422int smc_buf_create(struct smc_sock *smc, bool is_smcd)
2423{
2424	int rc;
2425
2426	/* create send buffer */
2427	rc = __smc_buf_create(smc, is_smcd, false);
2428	if (rc)
2429		return rc;
2430	/* create rmb */
2431	rc = __smc_buf_create(smc, is_smcd, true);
2432	if (rc) {
2433		mutex_lock(&smc->conn.lgr->sndbufs_lock);
2434		list_del(&smc->conn.sndbuf_desc->list);
2435		mutex_unlock(&smc->conn.lgr->sndbufs_lock);
2436		smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
2437		smc->conn.sndbuf_desc = NULL;
2438	}
2439	return rc;
2440}
2441
2442static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
2443{
2444	int i;
2445
2446	for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) {
2447		if (!test_and_set_bit(i, lgr->rtokens_used_mask))
2448			return i;
2449	}
2450	return -ENOSPC;
2451}
2452
2453static int smc_rtoken_find_by_link(struct smc_link_group *lgr, int lnk_idx,
2454				   u32 rkey)
2455{
2456	int i;
2457
2458	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
2459		if (test_bit(i, lgr->rtokens_used_mask) &&
2460		    lgr->rtokens[i][lnk_idx].rkey == rkey)
2461			return i;
2462	}
2463	return -ENOENT;
2464}
2465
2466/* set rtoken for a new link to an existing rmb */
2467void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new,
2468		    __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey)
2469{
2470	int rtok_idx;
2471
2472	rtok_idx = smc_rtoken_find_by_link(lgr, link_idx, ntohl(nw_rkey_known));
2473	if (rtok_idx == -ENOENT)
2474		return;
2475	lgr->rtokens[rtok_idx][link_idx_new].rkey = ntohl(nw_rkey);
2476	lgr->rtokens[rtok_idx][link_idx_new].dma_addr = be64_to_cpu(nw_vaddr);
2477}
2478
2479/* set rtoken for a new link whose link_id is given */
2480void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id,
2481		     __be64 nw_vaddr, __be32 nw_rkey)
2482{
2483	u64 dma_addr = be64_to_cpu(nw_vaddr);
2484	u32 rkey = ntohl(nw_rkey);
2485	bool found = false;
2486	int link_idx;
2487
2488	for (link_idx = 0; link_idx < SMC_LINKS_PER_LGR_MAX; link_idx++) {
2489		if (lgr->lnk[link_idx].link_id == link_id) {
2490			found = true;
2491			break;
2492		}
2493	}
2494	if (!found)
2495		return;
2496	lgr->rtokens[rtok_idx][link_idx].rkey = rkey;
2497	lgr->rtokens[rtok_idx][link_idx].dma_addr = dma_addr;
2498}
2499
2500/* add a new rtoken from peer */
2501int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey)
2502{
2503	struct smc_link_group *lgr = smc_get_lgr(lnk);
2504	u64 dma_addr = be64_to_cpu(nw_vaddr);
2505	u32 rkey = ntohl(nw_rkey);
2506	int i;
2507
2508	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
2509		if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
2510		    lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr &&
2511		    test_bit(i, lgr->rtokens_used_mask)) {
2512			/* already in list */
2513			return i;
2514		}
2515	}
2516	i = smc_rmb_reserve_rtoken_idx(lgr);
2517	if (i < 0)
2518		return i;
2519	lgr->rtokens[i][lnk->link_idx].rkey = rkey;
2520	lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr;
2521	return i;
2522}
2523
2524/* delete an rtoken from all links */
2525int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey)
2526{
2527	struct smc_link_group *lgr = smc_get_lgr(lnk);
2528	u32 rkey = ntohl(nw_rkey);
2529	int i, j;
2530
2531	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
2532		if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
2533		    test_bit(i, lgr->rtokens_used_mask)) {
2534			for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) {
2535				lgr->rtokens[i][j].rkey = 0;
2536				lgr->rtokens[i][j].dma_addr = 0;
2537			}
2538			clear_bit(i, lgr->rtokens_used_mask);
2539			return 0;
2540		}
2541	}
2542	return -ENOENT;
2543}
2544
2545/* save rkey and dma_addr received from peer during clc handshake */
2546int smc_rmb_rtoken_handling(struct smc_connection *conn,
2547			    struct smc_link *lnk,
2548			    struct smc_clc_msg_accept_confirm *clc)
2549{
2550	conn->rtoken_idx = smc_rtoken_add(lnk, clc->r0.rmb_dma_addr,
2551					  clc->r0.rmb_rkey);
2552	if (conn->rtoken_idx < 0)
2553		return conn->rtoken_idx;
2554	return 0;
2555}
2556
2557static void smc_core_going_away(void)
2558{
2559	struct smc_ib_device *smcibdev;
2560	struct smcd_dev *smcd;
2561
2562	mutex_lock(&smc_ib_devices.mutex);
2563	list_for_each_entry(smcibdev, &smc_ib_devices.list, list) {
2564		int i;
2565
2566		for (i = 0; i < SMC_MAX_PORTS; i++)
2567			set_bit(i, smcibdev->ports_going_away);
2568	}
2569	mutex_unlock(&smc_ib_devices.mutex);
2570
2571	mutex_lock(&smcd_dev_list.mutex);
2572	list_for_each_entry(smcd, &smcd_dev_list.list, list) {
2573		smcd->going_away = 1;
2574	}
2575	mutex_unlock(&smcd_dev_list.mutex);
2576}
2577
2578/* Clean up all SMC link groups */
2579static void smc_lgrs_shutdown(void)
2580{
2581	struct smcd_dev *smcd;
2582
2583	smc_core_going_away();
2584
2585	smc_smcr_terminate_all(NULL);
2586
2587	mutex_lock(&smcd_dev_list.mutex);
2588	list_for_each_entry(smcd, &smcd_dev_list.list, list)
2589		smc_smcd_terminate_all(smcd);
2590	mutex_unlock(&smcd_dev_list.mutex);
2591}
2592
2593static int smc_core_reboot_event(struct notifier_block *this,
2594				 unsigned long event, void *ptr)
2595{
2596	smc_lgrs_shutdown();
2597	smc_ib_unregister_client();
2598	return 0;
2599}
2600
2601static struct notifier_block smc_reboot_notifier = {
2602	.notifier_call = smc_core_reboot_event,
2603};
2604
2605int __init smc_core_init(void)
2606{
2607	return register_reboot_notifier(&smc_reboot_notifier);
2608}
2609
2610/* Called (from smc_exit) when module is removed */
2611void smc_core_exit(void)
2612{
2613	unregister_reboot_notifier(&smc_reboot_notifier);
2614	smc_lgrs_shutdown();
2615}