Linux Audio

Check our new training course

Linux kernel drivers training

May 6-19, 2025
Register
Loading...
v6.13.7
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *  Shared Memory Communications over RDMA (SMC-R) and RoCE
   4 *
   5 *  CLC (connection layer control) handshake over initial TCP socket to
   6 *  prepare for RDMA traffic
   7 *
   8 *  Copyright IBM Corp. 2016, 2018
   9 *
  10 *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
  11 */
  12
  13#include <linux/in.h>
  14#include <linux/inetdevice.h>
  15#include <linux/if_ether.h>
  16#include <linux/sched/signal.h>
  17#include <linux/utsname.h>
  18#include <linux/ctype.h>
  19
  20#include <net/addrconf.h>
  21#include <net/sock.h>
  22#include <net/tcp.h>
  23
  24#include "smc.h"
  25#include "smc_core.h"
  26#include "smc_clc.h"
  27#include "smc_ib.h"
  28#include "smc_ism.h"
  29#include "smc_netlink.h"
  30
  31#define SMCR_CLC_ACCEPT_CONFIRM_LEN 68
  32#define SMCD_CLC_ACCEPT_CONFIRM_LEN 48
  33#define SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 78
  34#define SMCR_CLC_ACCEPT_CONFIRM_LEN_V2 108
  35#define SMC_CLC_RECV_BUF_LEN	100
  36
  37/* eye catcher "SMCR" EBCDIC for CLC messages */
  38static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
  39/* eye catcher "SMCD" EBCDIC for CLC messages */
  40static const char SMCD_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xc4'};
  41
  42static u8 smc_hostname[SMC_MAX_HOSTNAME_LEN];
  43
  44struct smc_clc_eid_table {
  45	rwlock_t lock;
  46	struct list_head list;
  47	u8 ueid_cnt;
  48	u8 seid_enabled;
  49};
  50
  51static struct smc_clc_eid_table smc_clc_eid_table;
  52
  53struct smc_clc_eid_entry {
  54	struct list_head list;
  55	u8 eid[SMC_MAX_EID_LEN];
  56};
  57
  58/* The size of a user EID is 32 characters.
  59 * Valid characters should be (single-byte character set) A-Z, 0-9, '.' and '-'.
  60 * Blanks should only be used to pad to the expected size.
  61 * First character must be alphanumeric.
  62 */
  63static bool smc_clc_ueid_valid(char *ueid)
  64{
  65	char *end = ueid + SMC_MAX_EID_LEN;
  66
  67	while (--end >= ueid && isspace(*end))
  68		;
  69	if (end < ueid)
  70		return false;
  71	if (!isalnum(*ueid) || islower(*ueid))
  72		return false;
  73	while (ueid <= end) {
  74		if ((!isalnum(*ueid) || islower(*ueid)) && *ueid != '.' &&
  75		    *ueid != '-')
  76			return false;
  77		ueid++;
  78	}
  79	return true;
  80}
  81
  82static int smc_clc_ueid_add(char *ueid)
  83{
  84	struct smc_clc_eid_entry *new_ueid, *tmp_ueid;
  85	int rc;
  86
  87	if (!smc_clc_ueid_valid(ueid))
  88		return -EINVAL;
  89
  90	/* add a new ueid entry to the ueid table if there isn't one */
  91	new_ueid = kzalloc(sizeof(*new_ueid), GFP_KERNEL);
  92	if (!new_ueid)
  93		return -ENOMEM;
  94	memcpy(new_ueid->eid, ueid, SMC_MAX_EID_LEN);
  95
  96	write_lock(&smc_clc_eid_table.lock);
  97	if (smc_clc_eid_table.ueid_cnt >= SMC_MAX_UEID) {
  98		rc = -ERANGE;
  99		goto err_out;
 100	}
 101	list_for_each_entry(tmp_ueid, &smc_clc_eid_table.list, list) {
 102		if (!memcmp(tmp_ueid->eid, ueid, SMC_MAX_EID_LEN)) {
 103			rc = -EEXIST;
 104			goto err_out;
 105		}
 106	}
 107	list_add_tail(&new_ueid->list, &smc_clc_eid_table.list);
 108	smc_clc_eid_table.ueid_cnt++;
 109	write_unlock(&smc_clc_eid_table.lock);
 110	return 0;
 111
 112err_out:
 113	write_unlock(&smc_clc_eid_table.lock);
 114	kfree(new_ueid);
 115	return rc;
 116}
 117
 118int smc_clc_ueid_count(void)
 119{
 120	int count;
 121
 122	read_lock(&smc_clc_eid_table.lock);
 123	count = smc_clc_eid_table.ueid_cnt;
 124	read_unlock(&smc_clc_eid_table.lock);
 125
 126	return count;
 127}
 128
 129int smc_nl_add_ueid(struct sk_buff *skb, struct genl_info *info)
 130{
 131	struct nlattr *nla_ueid = info->attrs[SMC_NLA_EID_TABLE_ENTRY];
 132	char *ueid;
 133
 134	if (!nla_ueid || nla_len(nla_ueid) != SMC_MAX_EID_LEN + 1)
 135		return -EINVAL;
 136	ueid = (char *)nla_data(nla_ueid);
 137
 138	return smc_clc_ueid_add(ueid);
 139}
 140
 141/* remove one or all ueid entries from the table */
 142static int smc_clc_ueid_remove(char *ueid)
 143{
 144	struct smc_clc_eid_entry *lst_ueid, *tmp_ueid;
 145	int rc = -ENOENT;
 146
 147	/* remove table entry */
 148	write_lock(&smc_clc_eid_table.lock);
 149	list_for_each_entry_safe(lst_ueid, tmp_ueid, &smc_clc_eid_table.list,
 150				 list) {
 151		if (!ueid || !memcmp(lst_ueid->eid, ueid, SMC_MAX_EID_LEN)) {
 152			list_del(&lst_ueid->list);
 153			smc_clc_eid_table.ueid_cnt--;
 154			kfree(lst_ueid);
 155			rc = 0;
 156		}
 157	}
 158#if IS_ENABLED(CONFIG_S390)
 159	if (!rc && !smc_clc_eid_table.ueid_cnt) {
 160		smc_clc_eid_table.seid_enabled = 1;
 161		rc = -EAGAIN;	/* indicate success and enabling of seid */
 162	}
 163#endif
 164	write_unlock(&smc_clc_eid_table.lock);
 165	return rc;
 166}
 167
 168int smc_nl_remove_ueid(struct sk_buff *skb, struct genl_info *info)
 169{
 170	struct nlattr *nla_ueid = info->attrs[SMC_NLA_EID_TABLE_ENTRY];
 171	char *ueid;
 172
 173	if (!nla_ueid || nla_len(nla_ueid) != SMC_MAX_EID_LEN + 1)
 174		return -EINVAL;
 175	ueid = (char *)nla_data(nla_ueid);
 176
 177	return smc_clc_ueid_remove(ueid);
 178}
 179
 180int smc_nl_flush_ueid(struct sk_buff *skb, struct genl_info *info)
 181{
 182	smc_clc_ueid_remove(NULL);
 183	return 0;
 184}
 185
 186static int smc_nl_ueid_dumpinfo(struct sk_buff *skb, u32 portid, u32 seq,
 187				u32 flags, char *ueid)
 188{
 189	char ueid_str[SMC_MAX_EID_LEN + 1];
 190	void *hdr;
 191
 192	hdr = genlmsg_put(skb, portid, seq, &smc_gen_nl_family,
 193			  flags, SMC_NETLINK_DUMP_UEID);
 194	if (!hdr)
 195		return -ENOMEM;
 196	memcpy(ueid_str, ueid, SMC_MAX_EID_LEN);
 197	ueid_str[SMC_MAX_EID_LEN] = 0;
 198	if (nla_put_string(skb, SMC_NLA_EID_TABLE_ENTRY, ueid_str)) {
 199		genlmsg_cancel(skb, hdr);
 200		return -EMSGSIZE;
 201	}
 202	genlmsg_end(skb, hdr);
 203	return 0;
 204}
 205
 206static int _smc_nl_ueid_dump(struct sk_buff *skb, u32 portid, u32 seq,
 207			     int start_idx)
 208{
 209	struct smc_clc_eid_entry *lst_ueid;
 210	int idx = 0;
 211
 212	read_lock(&smc_clc_eid_table.lock);
 213	list_for_each_entry(lst_ueid, &smc_clc_eid_table.list, list) {
 214		if (idx++ < start_idx)
 215			continue;
 216		if (smc_nl_ueid_dumpinfo(skb, portid, seq, NLM_F_MULTI,
 217					 lst_ueid->eid)) {
 218			--idx;
 219			break;
 220		}
 221	}
 222	read_unlock(&smc_clc_eid_table.lock);
 223	return idx;
 224}
 225
 226int smc_nl_dump_ueid(struct sk_buff *skb, struct netlink_callback *cb)
 227{
 228	struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
 229	int idx;
 230
 231	idx = _smc_nl_ueid_dump(skb, NETLINK_CB(cb->skb).portid,
 232				cb->nlh->nlmsg_seq, cb_ctx->pos[0]);
 233
 234	cb_ctx->pos[0] = idx;
 235	return skb->len;
 236}
 237
 238int smc_nl_dump_seid(struct sk_buff *skb, struct netlink_callback *cb)
 239{
 240	struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
 241	char seid_str[SMC_MAX_EID_LEN + 1];
 242	u8 seid_enabled;
 243	void *hdr;
 244	u8 *seid;
 245
 246	if (cb_ctx->pos[0])
 247		return skb->len;
 248
 249	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 250			  &smc_gen_nl_family, NLM_F_MULTI,
 251			  SMC_NETLINK_DUMP_SEID);
 252	if (!hdr)
 253		return -ENOMEM;
 254	if (!smc_ism_is_v2_capable())
 255		goto end;
 256
 257	smc_ism_get_system_eid(&seid);
 258	memcpy(seid_str, seid, SMC_MAX_EID_LEN);
 259	seid_str[SMC_MAX_EID_LEN] = 0;
 260	if (nla_put_string(skb, SMC_NLA_SEID_ENTRY, seid_str))
 261		goto err;
 262	read_lock(&smc_clc_eid_table.lock);
 263	seid_enabled = smc_clc_eid_table.seid_enabled;
 264	read_unlock(&smc_clc_eid_table.lock);
 265	if (nla_put_u8(skb, SMC_NLA_SEID_ENABLED, seid_enabled))
 266		goto err;
 267end:
 268	genlmsg_end(skb, hdr);
 269	cb_ctx->pos[0]++;
 270	return skb->len;
 271err:
 272	genlmsg_cancel(skb, hdr);
 273	return -EMSGSIZE;
 274}
 275
 276int smc_nl_enable_seid(struct sk_buff *skb, struct genl_info *info)
 277{
 278#if IS_ENABLED(CONFIG_S390)
 279	write_lock(&smc_clc_eid_table.lock);
 280	smc_clc_eid_table.seid_enabled = 1;
 281	write_unlock(&smc_clc_eid_table.lock);
 282	return 0;
 283#else
 284	return -EOPNOTSUPP;
 285#endif
 286}
 287
 288int smc_nl_disable_seid(struct sk_buff *skb, struct genl_info *info)
 289{
 290	int rc = 0;
 291
 292#if IS_ENABLED(CONFIG_S390)
 293	write_lock(&smc_clc_eid_table.lock);
 294	if (!smc_clc_eid_table.ueid_cnt)
 295		rc = -ENOENT;
 296	else
 297		smc_clc_eid_table.seid_enabled = 0;
 298	write_unlock(&smc_clc_eid_table.lock);
 299#else
 300	rc = -EOPNOTSUPP;
 301#endif
 302	return rc;
 303}
 304
 305static bool _smc_clc_match_ueid(u8 *peer_ueid)
 306{
 307	struct smc_clc_eid_entry *tmp_ueid;
 308
 309	list_for_each_entry(tmp_ueid, &smc_clc_eid_table.list, list) {
 310		if (!memcmp(tmp_ueid->eid, peer_ueid, SMC_MAX_EID_LEN))
 311			return true;
 312	}
 313	return false;
 314}
 315
 316bool smc_clc_match_eid(u8 *negotiated_eid,
 317		       struct smc_clc_v2_extension *smc_v2_ext,
 318		       u8 *peer_eid, u8 *local_eid)
 319{
 320	bool match = false;
 321	int i;
 322
 323	negotiated_eid[0] = 0;
 324	read_lock(&smc_clc_eid_table.lock);
 325	if (peer_eid && local_eid &&
 326	    smc_clc_eid_table.seid_enabled &&
 327	    smc_v2_ext->hdr.flag.seid &&
 328	    !memcmp(peer_eid, local_eid, SMC_MAX_EID_LEN)) {
 329		memcpy(negotiated_eid, peer_eid, SMC_MAX_EID_LEN);
 330		match = true;
 331		goto out;
 332	}
 333
 334	for (i = 0; i < smc_v2_ext->hdr.eid_cnt; i++) {
 335		if (_smc_clc_match_ueid(smc_v2_ext->user_eids[i])) {
 336			memcpy(negotiated_eid, smc_v2_ext->user_eids[i],
 337			       SMC_MAX_EID_LEN);
 338			match = true;
 339			goto out;
 340		}
 341	}
 342out:
 343	read_unlock(&smc_clc_eid_table.lock);
 344	return match;
 345}
 346
 347/* check arriving CLC proposal */
 348static bool smc_clc_msg_prop_valid(struct smc_clc_msg_proposal *pclc)
 349{
 350	struct smc_clc_msg_proposal_prefix *pclc_prfx;
 351	struct smc_clc_smcd_v2_extension *smcd_v2_ext;
 352	struct smc_clc_msg_hdr *hdr = &pclc->hdr;
 353	struct smc_clc_v2_extension *v2_ext;
 354
 355	pclc_prfx = smc_clc_proposal_get_prefix(pclc);
 356	if (!pclc_prfx ||
 357	    pclc_prfx->ipv6_prefixes_cnt > SMC_CLC_MAX_V6_PREFIX)
 358		return false;
 359
 360	if (hdr->version == SMC_V1) {
 361		if (hdr->typev1 == SMC_TYPE_N)
 362			return false;
 363		if (ntohs(hdr->length) !=
 364			sizeof(*pclc) + ntohs(pclc->iparea_offset) +
 365			sizeof(*pclc_prfx) +
 366			pclc_prfx->ipv6_prefixes_cnt *
 367				sizeof(struct smc_clc_ipv6_prefix) +
 368			sizeof(struct smc_clc_msg_trail))
 369			return false;
 370	} else {
 371		v2_ext = smc_get_clc_v2_ext(pclc);
 372		if ((hdr->typev2 != SMC_TYPE_N &&
 373		     (!v2_ext || v2_ext->hdr.eid_cnt > SMC_CLC_MAX_UEID)) ||
 374		    (smcd_indicated(hdr->typev2) &&
 375		     v2_ext->hdr.ism_gid_cnt > SMCD_CLC_MAX_V2_GID_ENTRIES))
 376			return false;
 377
 378		if (ntohs(hdr->length) !=
 379			sizeof(*pclc) +
 380			sizeof(struct smc_clc_msg_smcd) +
 381			(hdr->typev1 != SMC_TYPE_N ?
 382				sizeof(*pclc_prfx) +
 383				pclc_prfx->ipv6_prefixes_cnt *
 384				sizeof(struct smc_clc_ipv6_prefix) : 0) +
 385			(hdr->typev2 != SMC_TYPE_N ?
 386				sizeof(*v2_ext) +
 387				v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN : 0) +
 388			(smcd_indicated(hdr->typev2) ?
 389				sizeof(*smcd_v2_ext) + v2_ext->hdr.ism_gid_cnt *
 390					sizeof(struct smc_clc_smcd_gid_chid) :
 391				0) +
 392			sizeof(struct smc_clc_msg_trail))
 393			return false;
 394	}
 395	return true;
 396}
 397
 398/* check arriving CLC accept or confirm */
 399static bool
 400smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm *clc)
 401{
 402	struct smc_clc_msg_hdr *hdr = &clc->hdr;
 403
 404	if (hdr->typev1 != SMC_TYPE_R && hdr->typev1 != SMC_TYPE_D)
 405		return false;
 406	if (hdr->version == SMC_V1) {
 407		if ((hdr->typev1 == SMC_TYPE_R &&
 408		     ntohs(hdr->length) != SMCR_CLC_ACCEPT_CONFIRM_LEN) ||
 409		    (hdr->typev1 == SMC_TYPE_D &&
 410		     ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN))
 411			return false;
 412	} else {
 413		if (hdr->typev1 == SMC_TYPE_D &&
 414		    ntohs(hdr->length) < SMCD_CLC_ACCEPT_CONFIRM_LEN_V2)
 415			return false;
 416		if (hdr->typev1 == SMC_TYPE_R &&
 417		    ntohs(hdr->length) < SMCR_CLC_ACCEPT_CONFIRM_LEN_V2)
 418			return false;
 419	}
 420	return true;
 421}
 422
 423/* check arriving CLC decline */
 424static bool
 425smc_clc_msg_decl_valid(struct smc_clc_msg_decline *dclc)
 426{
 427	struct smc_clc_msg_hdr *hdr = &dclc->hdr;
 428
 429	if (hdr->typev1 != SMC_TYPE_R && hdr->typev1 != SMC_TYPE_D)
 430		return false;
 431	if (hdr->version == SMC_V1) {
 432		if (ntohs(hdr->length) != sizeof(struct smc_clc_msg_decline))
 433			return false;
 434	} else {
 435		if (ntohs(hdr->length) != sizeof(struct smc_clc_msg_decline_v2))
 436			return false;
 437	}
 438	return true;
 439}
 440
 441static int smc_clc_fill_fce_v2x(struct smc_clc_first_contact_ext_v2x *fce_v2x,
 442				struct smc_init_info *ini)
 443{
 444	int ret = sizeof(*fce_v2x);
 445
 446	memset(fce_v2x, 0, sizeof(*fce_v2x));
 447	fce_v2x->fce_v2_base.os_type = SMC_CLC_OS_LINUX;
 448	fce_v2x->fce_v2_base.release = ini->release_nr;
 449	memcpy(fce_v2x->fce_v2_base.hostname,
 450	       smc_hostname, sizeof(smc_hostname));
 451	if (ini->is_smcd && ini->release_nr < SMC_RELEASE_1) {
 452		ret = sizeof(struct smc_clc_first_contact_ext);
 453		goto out;
 454	}
 455
 456	if (ini->release_nr >= SMC_RELEASE_1) {
 457		if (!ini->is_smcd) {
 458			fce_v2x->max_conns = ini->max_conns;
 459			fce_v2x->max_links = ini->max_links;
 460		}
 461		fce_v2x->feature_mask = htons(ini->feature_mask);
 462	}
 463
 464out:
 465	return ret;
 466}
 467
 468/* check if received message has a correct header length and contains valid
 469 * heading and trailing eyecatchers
 470 */
 471static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl)
 472{
 
 473	struct smc_clc_msg_accept_confirm *clc;
 474	struct smc_clc_msg_proposal *pclc;
 475	struct smc_clc_msg_decline *dclc;
 476	struct smc_clc_msg_trail *trl;
 477
 478	if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) &&
 479	    memcmp(clcm->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER)))
 480		return false;
 481	switch (clcm->type) {
 482	case SMC_CLC_PROPOSAL:
 
 
 
 483		pclc = (struct smc_clc_msg_proposal *)clcm;
 484		if (!smc_clc_msg_prop_valid(pclc))
 
 
 
 
 
 
 485			return false;
 486		trl = (struct smc_clc_msg_trail *)
 487			((u8 *)pclc + ntohs(pclc->hdr.length) - sizeof(*trl));
 488		break;
 489	case SMC_CLC_ACCEPT:
 490	case SMC_CLC_CONFIRM:
 
 
 491		clc = (struct smc_clc_msg_accept_confirm *)clcm;
 492		if (!smc_clc_msg_acc_conf_valid(clc))
 
 
 
 493			return false;
 494		trl = (struct smc_clc_msg_trail *)
 495			((u8 *)clc + ntohs(clc->hdr.length) - sizeof(*trl));
 496		break;
 497	case SMC_CLC_DECLINE:
 498		dclc = (struct smc_clc_msg_decline *)clcm;
 499		if (!smc_clc_msg_decl_valid(dclc))
 500			return false;
 501		check_trl = false;
 502		break;
 503	default:
 504		return false;
 505	}
 506	if (check_trl &&
 507	    memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) &&
 508	    memcmp(trl->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER)))
 509		return false;
 510	return true;
 511}
 512
 513/* find ipv4 addr on device and get the prefix len, fill CLC proposal msg */
 514static int smc_clc_prfx_set4_rcu(struct dst_entry *dst, __be32 ipv4,
 515				 struct smc_clc_msg_proposal_prefix *prop)
 516{
 517	struct in_device *in_dev = __in_dev_get_rcu(dst->dev);
 518	const struct in_ifaddr *ifa;
 519
 520	if (!in_dev)
 521		return -ENODEV;
 522
 523	in_dev_for_each_ifa_rcu(ifa, in_dev) {
 524		if (!inet_ifa_match(ipv4, ifa))
 525			continue;
 526		prop->prefix_len = inet_mask_len(ifa->ifa_mask);
 527		prop->outgoing_subnet = ifa->ifa_address & ifa->ifa_mask;
 528		/* prop->ipv6_prefixes_cnt = 0; already done by memset before */
 529		return 0;
 530	}
 531	return -ENOENT;
 532}
 533
 534/* fill CLC proposal msg with ipv6 prefixes from device */
 535static int smc_clc_prfx_set6_rcu(struct dst_entry *dst,
 536				 struct smc_clc_msg_proposal_prefix *prop,
 537				 struct smc_clc_ipv6_prefix *ipv6_prfx)
 538{
 539#if IS_ENABLED(CONFIG_IPV6)
 540	struct inet6_dev *in6_dev = __in6_dev_get(dst->dev);
 541	struct inet6_ifaddr *ifa;
 542	int cnt = 0;
 543
 544	if (!in6_dev)
 545		return -ENODEV;
 546	/* use a maximum of 8 IPv6 prefixes from device */
 547	list_for_each_entry(ifa, &in6_dev->addr_list, if_list) {
 548		if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)
 549			continue;
 550		ipv6_addr_prefix(&ipv6_prfx[cnt].prefix,
 551				 &ifa->addr, ifa->prefix_len);
 552		ipv6_prfx[cnt].prefix_len = ifa->prefix_len;
 553		cnt++;
 554		if (cnt == SMC_CLC_MAX_V6_PREFIX)
 555			break;
 556	}
 557	prop->ipv6_prefixes_cnt = cnt;
 558	if (cnt)
 559		return 0;
 560#endif
 561	return -ENOENT;
 562}
 563
 564/* retrieve and set prefixes in CLC proposal msg */
 565static int smc_clc_prfx_set(struct socket *clcsock,
 566			    struct smc_clc_msg_proposal_prefix *prop,
 567			    struct smc_clc_ipv6_prefix *ipv6_prfx)
 568{
 569	struct dst_entry *dst = sk_dst_get(clcsock->sk);
 570	struct sockaddr_storage addrs;
 571	struct sockaddr_in6 *addr6;
 572	struct sockaddr_in *addr;
 573	int rc = -ENOENT;
 574
 
 575	if (!dst) {
 576		rc = -ENOTCONN;
 577		goto out;
 578	}
 579	if (!dst->dev) {
 580		rc = -ENODEV;
 581		goto out_rel;
 582	}
 583	/* get address to which the internal TCP socket is bound */
 584	if (kernel_getsockname(clcsock, (struct sockaddr *)&addrs) < 0)
 585		goto out_rel;
 586	/* analyze IP specific data of net_device belonging to TCP socket */
 587	addr6 = (struct sockaddr_in6 *)&addrs;
 588	rcu_read_lock();
 589	if (addrs.ss_family == PF_INET) {
 590		/* IPv4 */
 591		addr = (struct sockaddr_in *)&addrs;
 592		rc = smc_clc_prfx_set4_rcu(dst, addr->sin_addr.s_addr, prop);
 593	} else if (ipv6_addr_v4mapped(&addr6->sin6_addr)) {
 594		/* mapped IPv4 address - peer is IPv4 only */
 595		rc = smc_clc_prfx_set4_rcu(dst, addr6->sin6_addr.s6_addr32[3],
 596					   prop);
 597	} else {
 598		/* IPv6 */
 599		rc = smc_clc_prfx_set6_rcu(dst, prop, ipv6_prfx);
 600	}
 601	rcu_read_unlock();
 602out_rel:
 603	dst_release(dst);
 604out:
 605	return rc;
 606}
 607
 608/* match ipv4 addrs of dev against addr in CLC proposal */
 609static int smc_clc_prfx_match4_rcu(struct net_device *dev,
 610				   struct smc_clc_msg_proposal_prefix *prop)
 611{
 612	struct in_device *in_dev = __in_dev_get_rcu(dev);
 613	const struct in_ifaddr *ifa;
 614
 615	if (!in_dev)
 616		return -ENODEV;
 617	in_dev_for_each_ifa_rcu(ifa, in_dev) {
 618		if (prop->prefix_len == inet_mask_len(ifa->ifa_mask) &&
 619		    inet_ifa_match(prop->outgoing_subnet, ifa))
 620			return 0;
 621	}
 622
 623	return -ENOENT;
 624}
 625
 626/* match ipv6 addrs of dev against addrs in CLC proposal */
 627static int smc_clc_prfx_match6_rcu(struct net_device *dev,
 628				   struct smc_clc_msg_proposal_prefix *prop)
 629{
 630#if IS_ENABLED(CONFIG_IPV6)
 631	struct inet6_dev *in6_dev = __in6_dev_get(dev);
 632	struct smc_clc_ipv6_prefix *ipv6_prfx;
 633	struct inet6_ifaddr *ifa;
 634	int i, max;
 635
 636	if (!in6_dev)
 637		return -ENODEV;
 638	/* ipv6 prefix list starts behind smc_clc_msg_proposal_prefix */
 639	ipv6_prfx = (struct smc_clc_ipv6_prefix *)((u8 *)prop + sizeof(*prop));
 640	max = min_t(u8, prop->ipv6_prefixes_cnt, SMC_CLC_MAX_V6_PREFIX);
 641	list_for_each_entry(ifa, &in6_dev->addr_list, if_list) {
 642		if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)
 643			continue;
 644		for (i = 0; i < max; i++) {
 645			if (ifa->prefix_len == ipv6_prfx[i].prefix_len &&
 646			    ipv6_prefix_equal(&ifa->addr, &ipv6_prfx[i].prefix,
 647					      ifa->prefix_len))
 648				return 0;
 649		}
 650	}
 651#endif
 652	return -ENOENT;
 653}
 654
 655/* check if proposed prefixes match one of our device prefixes */
 656int smc_clc_prfx_match(struct socket *clcsock,
 657		       struct smc_clc_msg_proposal_prefix *prop)
 658{
 659	struct dst_entry *dst = sk_dst_get(clcsock->sk);
 660	int rc;
 661
 662	if (!dst) {
 663		rc = -ENOTCONN;
 664		goto out;
 665	}
 666	if (!dst->dev) {
 667		rc = -ENODEV;
 668		goto out_rel;
 669	}
 670	rcu_read_lock();
 671	if (!prop->ipv6_prefixes_cnt)
 672		rc = smc_clc_prfx_match4_rcu(dst->dev, prop);
 673	else
 674		rc = smc_clc_prfx_match6_rcu(dst->dev, prop);
 675	rcu_read_unlock();
 676out_rel:
 677	dst_release(dst);
 678out:
 679	return rc;
 680}
 681
 682/* Wait for data on the tcp-socket, analyze received data
 683 * Returns:
 684 * 0 if success and it was not a decline that we received.
 685 * SMC_CLC_DECL_REPLY if decline received for fallback w/o another decl send.
 686 * clcsock error, -EINTR, -ECONNRESET, -EPROTO otherwise.
 687 */
 688int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
 689		     u8 expected_type, unsigned long timeout)
 690{
 691	long rcvtimeo = smc->clcsock->sk->sk_rcvtimeo;
 692	struct sock *clc_sk = smc->clcsock->sk;
 693	struct smc_clc_msg_hdr *clcm = buf;
 694	struct msghdr msg = {NULL, 0};
 695	int reason_code = 0;
 696	struct kvec vec = {buf, buflen};
 697	int len, datlen, recvlen;
 698	bool check_trl = true;
 699	int krflags;
 700
 701	/* peek the first few bytes to determine length of data to receive
 702	 * so we don't consume any subsequent CLC message or payload data
 703	 * in the TCP byte stream
 704	 */
 705	/*
 706	 * Caller must make sure that buflen is no less than
 707	 * sizeof(struct smc_clc_msg_hdr)
 708	 */
 709	krflags = MSG_PEEK | MSG_WAITALL;
 710	clc_sk->sk_rcvtimeo = timeout;
 711	iov_iter_kvec(&msg.msg_iter, ITER_DEST, &vec, 1,
 712			sizeof(struct smc_clc_msg_hdr));
 713	len = sock_recvmsg(smc->clcsock, &msg, krflags);
 714	if (signal_pending(current)) {
 715		reason_code = -EINTR;
 716		clc_sk->sk_err = EINTR;
 717		smc->sk.sk_err = EINTR;
 718		goto out;
 719	}
 720	if (clc_sk->sk_err) {
 721		reason_code = -clc_sk->sk_err;
 722		if (clc_sk->sk_err == EAGAIN &&
 723		    expected_type == SMC_CLC_DECLINE)
 724			clc_sk->sk_err = 0; /* reset for fallback usage */
 725		else
 726			smc->sk.sk_err = clc_sk->sk_err;
 727		goto out;
 728	}
 729	if (!len) { /* peer has performed orderly shutdown */
 730		smc->sk.sk_err = ECONNRESET;
 731		reason_code = -ECONNRESET;
 732		goto out;
 733	}
 734	if (len < 0) {
 735		if (len != -EAGAIN || expected_type != SMC_CLC_DECLINE)
 736			smc->sk.sk_err = -len;
 737		reason_code = len;
 738		goto out;
 739	}
 740	datlen = ntohs(clcm->length);
 741	if ((len < sizeof(struct smc_clc_msg_hdr)) ||
 742	    (clcm->version < SMC_V1) ||
 
 
 
 743	    ((clcm->type != SMC_CLC_DECLINE) &&
 744	     (clcm->type != expected_type))) {
 745		smc->sk.sk_err = EPROTO;
 746		reason_code = -EPROTO;
 747		goto out;
 748	}
 749
 750	/* receive the complete CLC message */
 751	memset(&msg, 0, sizeof(struct msghdr));
 752	if (datlen > buflen) {
 753		check_trl = false;
 754		recvlen = buflen;
 755	} else {
 756		recvlen = datlen;
 757	}
 758	iov_iter_kvec(&msg.msg_iter, ITER_DEST, &vec, 1, recvlen);
 759	krflags = MSG_WAITALL;
 760	len = sock_recvmsg(smc->clcsock, &msg, krflags);
 761	if (len < recvlen || !smc_clc_msg_hdr_valid(clcm, check_trl)) {
 762		smc->sk.sk_err = EPROTO;
 763		reason_code = -EPROTO;
 764		goto out;
 765	}
 766	datlen -= len;
 767	while (datlen) {
 768		u8 tmp[SMC_CLC_RECV_BUF_LEN];
 769
 770		vec.iov_base = &tmp;
 771		vec.iov_len = SMC_CLC_RECV_BUF_LEN;
 772		/* receive remaining proposal message */
 773		recvlen = datlen > SMC_CLC_RECV_BUF_LEN ?
 774						SMC_CLC_RECV_BUF_LEN : datlen;
 775		iov_iter_kvec(&msg.msg_iter, ITER_DEST, &vec, 1, recvlen);
 776		len = sock_recvmsg(smc->clcsock, &msg, krflags);
 777		if (len < recvlen) {
 778			smc->sk.sk_err = EPROTO;
 779			reason_code = -EPROTO;
 780			goto out;
 781		}
 782		datlen -= len;
 783	}
 784	if (clcm->type == SMC_CLC_DECLINE) {
 785		struct smc_clc_msg_decline *dclc;
 786
 787		dclc = (struct smc_clc_msg_decline *)clcm;
 788		reason_code = SMC_CLC_DECL_PEERDECL;
 789		smc->peer_diagnosis = ntohl(dclc->peer_diagnosis);
 790		if (((struct smc_clc_msg_decline *)buf)->hdr.typev2 &
 791						SMC_FIRST_CONTACT_MASK) {
 792			smc->conn.lgr->sync_err = 1;
 793			smc_lgr_terminate_sched(smc->conn.lgr);
 794		}
 795	}
 796
 797out:
 798	clc_sk->sk_rcvtimeo = rcvtimeo;
 799	return reason_code;
 800}
 801
 802/* send CLC DECLINE message across internal TCP socket */
 803int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version)
 804{
 805	struct smc_clc_msg_decline *dclc_v1;
 806	struct smc_clc_msg_decline_v2 dclc;
 807	struct msghdr msg;
 808	int len, send_len;
 809	struct kvec vec;
 
 810
 811	dclc_v1 = (struct smc_clc_msg_decline *)&dclc;
 812	memset(&dclc, 0, sizeof(dclc));
 813	memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
 814	dclc.hdr.type = SMC_CLC_DECLINE;
 815	dclc.hdr.version = version;
 816	dclc.os_type = version == SMC_V1 ? 0 : SMC_CLC_OS_LINUX;
 817	dclc.hdr.typev2 = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ?
 818						SMC_FIRST_CONTACT_MASK : 0;
 819	if ((!smc_conn_lgr_valid(&smc->conn) || !smc->conn.lgr->is_smcd) &&
 820	    smc_ib_is_valid_local_systemid())
 821		memcpy(dclc.id_for_peer, local_systemid,
 822		       sizeof(local_systemid));
 823	dclc.peer_diagnosis = htonl(peer_diag_info);
 824	if (version == SMC_V1) {
 825		memcpy(dclc_v1->trl.eyecatcher, SMC_EYECATCHER,
 826		       sizeof(SMC_EYECATCHER));
 827		send_len = sizeof(*dclc_v1);
 828	} else {
 829		memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER,
 830		       sizeof(SMC_EYECATCHER));
 831		send_len = sizeof(dclc);
 832	}
 833	dclc.hdr.length = htons(send_len);
 834
 835	memset(&msg, 0, sizeof(msg));
 836	vec.iov_base = &dclc;
 837	vec.iov_len = send_len;
 838	len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, send_len);
 839	if (len < 0 || len < send_len)
 
 840		len = -EPROTO;
 841	return len > 0 ? 0 : len;
 842}
 843
 844/* send CLC PROPOSAL message across internal TCP socket */
 845int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
 
 846{
 847	struct smc_clc_smcd_v2_extension *smcd_v2_ext;
 848	struct smc_clc_msg_proposal_prefix *pclc_prfx;
 849	struct smc_clc_msg_proposal *pclc_base;
 850	struct smc_clc_smcd_gid_chid *gidchids;
 851	struct smc_clc_msg_proposal_area *pclc;
 852	struct smc_clc_ipv6_prefix *ipv6_prfx;
 853	struct net *net = sock_net(&smc->sk);
 854	struct smc_clc_v2_extension *v2_ext;
 855	struct smc_clc_msg_smcd *pclc_smcd;
 856	struct smc_clc_msg_trail *trl;
 857	struct smcd_dev *smcd;
 858	int len, i, plen, rc;
 859	int reason_code = 0;
 860	struct kvec vec[8];
 861	struct msghdr msg;
 862
 863	pclc = kzalloc(sizeof(*pclc), GFP_KERNEL);
 864	if (!pclc)
 865		return -ENOMEM;
 866
 867	pclc_base = &pclc->pclc_base;
 868	pclc_smcd = &pclc->pclc_smcd;
 869	pclc_prfx = &pclc->pclc_prfx;
 870	ipv6_prfx = pclc->pclc_prfx_ipv6;
 871	v2_ext = container_of(&pclc->pclc_v2_ext,
 872			      struct smc_clc_v2_extension, fixed);
 873	smcd_v2_ext = container_of(&pclc->pclc_smcd_v2_ext,
 874				   struct smc_clc_smcd_v2_extension, fixed);
 875	gidchids = pclc->pclc_gidchids;
 876	trl = &pclc->pclc_trl;
 877
 878	pclc_base->hdr.version = SMC_V2;
 879	pclc_base->hdr.typev1 = ini->smc_type_v1;
 880	pclc_base->hdr.typev2 = ini->smc_type_v2;
 881	plen = sizeof(*pclc_base) + sizeof(*pclc_smcd) + sizeof(*trl);
 882
 883	/* retrieve ip prefixes for CLC proposal msg */
 884	if (ini->smc_type_v1 != SMC_TYPE_N) {
 885		rc = smc_clc_prfx_set(smc->clcsock, pclc_prfx, ipv6_prfx);
 886		if (rc) {
 887			if (ini->smc_type_v2 == SMC_TYPE_N) {
 888				kfree(pclc);
 889				return SMC_CLC_DECL_CNFERR;
 890			}
 891			pclc_base->hdr.typev1 = SMC_TYPE_N;
 892		} else {
 893			pclc_base->iparea_offset = htons(sizeof(*pclc_smcd));
 894			plen += sizeof(*pclc_prfx) +
 895					pclc_prfx->ipv6_prefixes_cnt *
 896					sizeof(ipv6_prfx[0]);
 897		}
 898	}
 899
 900	/* build SMC Proposal CLC message */
 901	memcpy(pclc_base->hdr.eyecatcher, SMC_EYECATCHER,
 902	       sizeof(SMC_EYECATCHER));
 903	pclc_base->hdr.type = SMC_CLC_PROPOSAL;
 904	if (smcr_indicated(ini->smc_type_v1)) {
 
 
 
 
 
 905		/* add SMC-R specifics */
 906		memcpy(pclc_base->lcl.id_for_peer, local_systemid,
 907		       sizeof(local_systemid));
 908		memcpy(pclc_base->lcl.gid, ini->ib_gid, SMC_GID_SIZE);
 909		memcpy(pclc_base->lcl.mac, &ini->ib_dev->mac[ini->ib_port - 1],
 910		       ETH_ALEN);
 
 911	}
 912	if (smcd_indicated(ini->smc_type_v1)) {
 913		struct smcd_gid smcd_gid;
 914
 915		/* add SMC-D specifics */
 916		if (ini->ism_dev[0]) {
 917			smcd = ini->ism_dev[0];
 918			smcd->ops->get_local_gid(smcd, &smcd_gid);
 919			pclc_smcd->ism.gid = htonll(smcd_gid.gid);
 920			pclc_smcd->ism.chid =
 921				htons(smc_ism_get_chid(ini->ism_dev[0]));
 922		}
 923	}
 924	if (ini->smc_type_v2 == SMC_TYPE_N) {
 925		pclc_smcd->v2_ext_offset = 0;
 926	} else {
 927		struct smc_clc_eid_entry *ueident;
 928		u16 v2_ext_offset;
 929
 930		v2_ext->hdr.flag.release = SMC_RELEASE;
 931		v2_ext_offset = sizeof(*pclc_smcd) -
 932			offsetofend(struct smc_clc_msg_smcd, v2_ext_offset);
 933		if (ini->smc_type_v1 != SMC_TYPE_N)
 934			v2_ext_offset += sizeof(*pclc_prfx) +
 935						pclc_prfx->ipv6_prefixes_cnt *
 936						sizeof(ipv6_prfx[0]);
 937		pclc_smcd->v2_ext_offset = htons(v2_ext_offset);
 938		plen += sizeof(*v2_ext);
 939
 940		v2_ext->feature_mask = htons(SMC_FEATURE_MASK);
 941		read_lock(&smc_clc_eid_table.lock);
 942		v2_ext->hdr.eid_cnt = smc_clc_eid_table.ueid_cnt;
 943		plen += smc_clc_eid_table.ueid_cnt * SMC_MAX_EID_LEN;
 944		i = 0;
 945		list_for_each_entry(ueident, &smc_clc_eid_table.list, list) {
 946			memcpy(v2_ext->user_eids[i++], ueident->eid,
 947			       sizeof(ueident->eid));
 948		}
 949		read_unlock(&smc_clc_eid_table.lock);
 950	}
 951	if (smcd_indicated(ini->smc_type_v2)) {
 952		struct smcd_gid smcd_gid;
 953		u8 *eid = NULL;
 954		int entry = 0;
 955
 956		v2_ext->hdr.flag.seid = smc_clc_eid_table.seid_enabled;
 957		v2_ext->hdr.smcd_v2_ext_offset = htons(sizeof(*v2_ext) -
 958				offsetofend(struct smc_clnt_opts_area_hdr,
 959					    smcd_v2_ext_offset) +
 960				v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN);
 961		smc_ism_get_system_eid(&eid);
 962		if (eid && v2_ext->hdr.flag.seid)
 963			memcpy(smcd_v2_ext->system_eid, eid, SMC_MAX_EID_LEN);
 964		plen += sizeof(*smcd_v2_ext);
 965		if (ini->ism_offered_cnt) {
 966			for (i = 1; i <= ini->ism_offered_cnt; i++) {
 967				smcd = ini->ism_dev[i];
 968				smcd->ops->get_local_gid(smcd, &smcd_gid);
 969				gidchids[entry].chid =
 970					htons(smc_ism_get_chid(ini->ism_dev[i]));
 971				gidchids[entry].gid = htonll(smcd_gid.gid);
 972				if (smc_ism_is_emulated(smcd)) {
 973					/* an Emulated-ISM device takes two
 974					 * entries. CHID of the second entry
 975					 * repeats that of the first entry.
 976					 */
 977					gidchids[entry + 1].chid =
 978						gidchids[entry].chid;
 979					gidchids[entry + 1].gid =
 980						htonll(smcd_gid.gid_ext);
 981					entry++;
 982				}
 983				entry++;
 984			}
 985			plen += entry * sizeof(struct smc_clc_smcd_gid_chid);
 986		}
 987		v2_ext->hdr.ism_gid_cnt = entry;
 988	}
 989	if (smcr_indicated(ini->smc_type_v2)) {
 990		memcpy(v2_ext->roce, ini->smcrv2.ib_gid_v2, SMC_GID_SIZE);
 991		v2_ext->max_conns = net->smc.sysctl_max_conns_per_lgr;
 992		v2_ext->max_links = net->smc.sysctl_max_links_per_lgr;
 993	}
 
 994
 995	pclc_base->hdr.length = htons(plen);
 996	memcpy(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
 997
 998	/* send SMC Proposal CLC message */
 999	memset(&msg, 0, sizeof(msg));
1000	i = 0;
1001	vec[i].iov_base = pclc_base;
1002	vec[i++].iov_len = sizeof(*pclc_base);
1003	vec[i].iov_base = pclc_smcd;
1004	vec[i++].iov_len = sizeof(*pclc_smcd);
1005	if (ini->smc_type_v1 != SMC_TYPE_N) {
1006		vec[i].iov_base = pclc_prfx;
1007		vec[i++].iov_len = sizeof(*pclc_prfx);
1008		if (pclc_prfx->ipv6_prefixes_cnt > 0) {
1009			vec[i].iov_base = ipv6_prfx;
1010			vec[i++].iov_len = pclc_prfx->ipv6_prefixes_cnt *
1011					   sizeof(ipv6_prfx[0]);
1012		}
1013	}
1014	if (ini->smc_type_v2 != SMC_TYPE_N) {
1015		vec[i].iov_base = v2_ext;
1016		vec[i++].iov_len = sizeof(*v2_ext) +
1017				   (v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN);
1018		if (smcd_indicated(ini->smc_type_v2)) {
1019			vec[i].iov_base = smcd_v2_ext;
1020			vec[i++].iov_len = sizeof(*smcd_v2_ext);
1021			if (ini->ism_offered_cnt) {
1022				vec[i].iov_base = gidchids;
1023				vec[i++].iov_len = v2_ext->hdr.ism_gid_cnt *
1024					sizeof(struct smc_clc_smcd_gid_chid);
1025			}
1026		}
1027	}
1028	vec[i].iov_base = trl;
1029	vec[i++].iov_len = sizeof(*trl);
1030	/* due to the few bytes needed for clc-handshake this cannot block */
1031	len = kernel_sendmsg(smc->clcsock, &msg, vec, i, plen);
1032	if (len < 0) {
1033		smc->sk.sk_err = smc->clcsock->sk->sk_err;
1034		reason_code = -smc->sk.sk_err;
1035	} else if (len < ntohs(pclc_base->hdr.length)) {
1036		reason_code = -ENETUNREACH;
1037		smc->sk.sk_err = -reason_code;
1038	}
1039
1040	kfree(pclc);
1041	return reason_code;
1042}
1043
1044static void
1045smcd_clc_prep_confirm_accept(struct smc_connection *conn,
1046			     struct smc_clc_msg_accept_confirm *clc,
1047			     int first_contact, u8 version,
1048			     u8 *eid, struct smc_init_info *ini,
1049			     int *fce_len,
1050			     struct smc_clc_first_contact_ext_v2x *fce_v2x,
1051			     struct smc_clc_msg_trail *trl)
1052{
1053	struct smcd_dev *smcd = conn->lgr->smcd;
1054	struct smcd_gid smcd_gid;
1055	u16 chid;
1056	int len;
1057
1058	/* SMC-D specific settings */
1059	memcpy(clc->hdr.eyecatcher, SMCD_EYECATCHER,
1060	       sizeof(SMCD_EYECATCHER));
1061	smcd->ops->get_local_gid(smcd, &smcd_gid);
1062	clc->hdr.typev1 = SMC_TYPE_D;
1063	clc->d0.gid = htonll(smcd_gid.gid);
1064	clc->d0.token = htonll(conn->rmb_desc->token);
1065	clc->d0.dmbe_size = conn->rmbe_size_comp;
1066	clc->d0.dmbe_idx = 0;
1067	memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE);
1068	if (version == SMC_V1) {
1069		clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN);
1070	} else {
1071		chid = smc_ism_get_chid(smcd);
1072		clc->d1.chid = htons(chid);
1073		if (eid && eid[0])
1074			memcpy(clc->d1.eid, eid, SMC_MAX_EID_LEN);
1075		if (__smc_ism_is_emulated(chid))
1076			clc->d1.gid_ext = htonll(smcd_gid.gid_ext);
1077		len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2;
1078		if (first_contact) {
1079			*fce_len = smc_clc_fill_fce_v2x(fce_v2x, ini);
1080			len += *fce_len;
1081		}
1082		clc->hdr.length = htons(len);
1083	}
1084	memcpy(trl->eyecatcher, SMCD_EYECATCHER,
1085	       sizeof(SMCD_EYECATCHER));
1086}
1087
1088static void
1089smcr_clc_prep_confirm_accept(struct smc_connection *conn,
1090			     struct smc_clc_msg_accept_confirm *clc,
1091			     int first_contact, u8 version,
1092			     u8 *eid, struct smc_init_info *ini,
1093			     int *fce_len,
1094			     struct smc_clc_first_contact_ext_v2x *fce_v2x,
1095			     struct smc_clc_fce_gid_ext *gle,
1096			     struct smc_clc_msg_trail *trl)
1097{
1098	struct smc_link *link = conn->lnk;
1099	int len;
1100
1101	/* SMC-R specific settings */
1102	memcpy(clc->hdr.eyecatcher, SMC_EYECATCHER,
1103	       sizeof(SMC_EYECATCHER));
1104	clc->hdr.typev1 = SMC_TYPE_R;
1105	memcpy(clc->r0.lcl.id_for_peer, local_systemid,
1106	       sizeof(local_systemid));
1107	memcpy(&clc->r0.lcl.gid, link->gid, SMC_GID_SIZE);
1108	memcpy(&clc->r0.lcl.mac, &link->smcibdev->mac[link->ibport - 1],
1109	       ETH_ALEN);
1110	hton24(clc->r0.qpn, link->roce_qp->qp_num);
1111	clc->r0.rmb_rkey =
1112		htonl(conn->rmb_desc->mr[link->link_idx]->rkey);
1113	clc->r0.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */
1114	clc->r0.rmbe_alert_token = htonl(conn->alert_token_local);
1115	switch (clc->hdr.type) {
1116	case SMC_CLC_ACCEPT:
1117		clc->r0.qp_mtu = link->path_mtu;
1118		break;
1119	case SMC_CLC_CONFIRM:
1120		clc->r0.qp_mtu = min(link->path_mtu, link->peer_mtu);
1121		break;
1122	}
1123	clc->r0.rmbe_size = conn->rmbe_size_comp;
1124	clc->r0.rmb_dma_addr = conn->rmb_desc->is_vm ?
1125		cpu_to_be64((uintptr_t)conn->rmb_desc->cpu_addr) :
1126		cpu_to_be64((u64)sg_dma_address
1127			    (conn->rmb_desc->sgt[link->link_idx].sgl));
1128	hton24(clc->r0.psn, link->psn_initial);
1129	if (version == SMC_V1) {
1130		clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);
1131	} else {
1132		if (eid && eid[0])
1133			memcpy(clc->r1.eid, eid, SMC_MAX_EID_LEN);
1134		len = SMCR_CLC_ACCEPT_CONFIRM_LEN_V2;
1135		if (first_contact) {
1136			*fce_len = smc_clc_fill_fce_v2x(fce_v2x, ini);
1137			len += *fce_len;
1138			fce_v2x->fce_v2_base.v2_direct =
1139				!link->lgr->uses_gateway;
1140			if (clc->hdr.type == SMC_CLC_CONFIRM) {
1141				memset(gle, 0, sizeof(*gle));
1142				gle->gid_cnt = ini->smcrv2.gidlist.len;
1143				len += sizeof(*gle);
1144				len += gle->gid_cnt * sizeof(gle->gid[0]);
1145			}
1146		}
1147		clc->hdr.length = htons(len);
1148	}
1149	memcpy(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
1150}
1151
1152/* build and send CLC CONFIRM / ACCEPT message */
1153static int smc_clc_send_confirm_accept(struct smc_sock *smc,
1154				       struct smc_clc_msg_accept_confirm *clc,
1155				       int first_contact, u8 version,
1156				       u8 *eid, struct smc_init_info *ini)
1157{
1158	struct smc_clc_first_contact_ext_v2x fce_v2x;
1159	struct smc_connection *conn = &smc->conn;
1160	struct smc_clc_fce_gid_ext gle;
1161	struct smc_clc_msg_trail trl;
1162	int i, fce_len;
1163	struct kvec vec[5];
1164	struct msghdr msg;
1165
1166	/* send SMC Confirm CLC msg */
1167	clc->hdr.version = version;	/* SMC version */
1168	if (first_contact)
1169		clc->hdr.typev2 |= SMC_FIRST_CONTACT_MASK;
1170	if (conn->lgr->is_smcd)
1171		smcd_clc_prep_confirm_accept(conn, clc, first_contact,
1172					     version, eid, ini, &fce_len,
1173					     &fce_v2x, &trl);
1174	else
1175		smcr_clc_prep_confirm_accept(conn, clc, first_contact,
1176					     version, eid, ini, &fce_len,
1177					     &fce_v2x, &gle, &trl);
1178	memset(&msg, 0, sizeof(msg));
1179	i = 0;
1180	vec[i].iov_base = clc;
1181	if (version > SMC_V1)
1182		vec[i++].iov_len = (clc->hdr.typev1 == SMC_TYPE_D ?
1183					SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 :
1184					SMCR_CLC_ACCEPT_CONFIRM_LEN_V2) -
1185				   sizeof(trl);
1186	else
1187		vec[i++].iov_len = (clc->hdr.typev1 == SMC_TYPE_D ?
1188						SMCD_CLC_ACCEPT_CONFIRM_LEN :
1189						SMCR_CLC_ACCEPT_CONFIRM_LEN) -
1190				   sizeof(trl);
1191	if (version > SMC_V1 && first_contact) {
1192		vec[i].iov_base = &fce_v2x;
1193		vec[i++].iov_len = fce_len;
1194		if (!conn->lgr->is_smcd) {
1195			if (clc->hdr.type == SMC_CLC_CONFIRM) {
1196				vec[i].iov_base = &gle;
1197				vec[i++].iov_len = sizeof(gle);
1198				vec[i].iov_base = &ini->smcrv2.gidlist.list;
1199				vec[i++].iov_len = gle.gid_cnt *
1200						   sizeof(gle.gid[0]);
1201			}
1202		}
1203	}
1204	vec[i].iov_base = &trl;
1205	vec[i++].iov_len = sizeof(trl);
1206	return kernel_sendmsg(smc->clcsock, &msg, vec, 1,
1207			      ntohs(clc->hdr.length));
1208}
1209
1210/* send CLC CONFIRM message across internal TCP socket */
1211int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
1212			 u8 version, u8 *eid, struct smc_init_info *ini)
1213{
 
1214	struct smc_clc_msg_accept_confirm cclc;
 
1215	int reason_code = 0;
 
 
1216	int len;
1217
1218	/* send SMC Confirm CLC msg */
1219	memset(&cclc, 0, sizeof(cclc));
1220	cclc.hdr.type = SMC_CLC_CONFIRM;
1221	len = smc_clc_send_confirm_accept(smc, &cclc, clnt_first_contact,
1222					  version, eid, ini);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1223	if (len < ntohs(cclc.hdr.length)) {
1224		if (len >= 0) {
1225			reason_code = -ENETUNREACH;
1226			smc->sk.sk_err = -reason_code;
1227		} else {
1228			smc->sk.sk_err = smc->clcsock->sk->sk_err;
1229			reason_code = -smc->sk.sk_err;
1230		}
1231	}
1232	return reason_code;
1233}
1234
1235/* send CLC ACCEPT message across internal TCP socket */
1236int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact,
1237			u8 version, u8 *negotiated_eid, struct smc_init_info *ini)
1238{
 
1239	struct smc_clc_msg_accept_confirm aclc;
 
 
 
1240	int len;
1241
1242	memset(&aclc, 0, sizeof(aclc));
1243	aclc.hdr.type = SMC_CLC_ACCEPT;
1244	len = smc_clc_send_confirm_accept(new_smc, &aclc, srv_first_contact,
1245					  version, negotiated_eid, ini);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1246	if (len < ntohs(aclc.hdr.length))
1247		len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err;
1248
1249	return len > 0 ? 0 : len;
1250}
1251
1252int smc_clc_srv_v2x_features_validate(struct smc_sock *smc,
1253				      struct smc_clc_msg_proposal *pclc,
1254				      struct smc_init_info *ini)
1255{
1256	struct smc_clc_v2_extension *pclc_v2_ext;
1257	struct net *net = sock_net(&smc->sk);
1258
1259	ini->max_conns = SMC_CONN_PER_LGR_MAX;
1260	ini->max_links = SMC_LINKS_ADD_LNK_MAX;
1261	ini->feature_mask = SMC_FEATURE_MASK;
1262
1263	if ((!(ini->smcd_version & SMC_V2) && !(ini->smcr_version & SMC_V2)) ||
1264	    ini->release_nr < SMC_RELEASE_1)
1265		return 0;
1266
1267	pclc_v2_ext = smc_get_clc_v2_ext(pclc);
1268	if (!pclc_v2_ext)
1269		return SMC_CLC_DECL_NOV2EXT;
1270
1271	if (ini->smcr_version & SMC_V2) {
1272		ini->max_conns = min_t(u8, pclc_v2_ext->max_conns,
1273				       net->smc.sysctl_max_conns_per_lgr);
1274		if (ini->max_conns < SMC_CONN_PER_LGR_MIN)
1275			return SMC_CLC_DECL_MAXCONNERR;
1276
1277		ini->max_links = min_t(u8, pclc_v2_ext->max_links,
1278				       net->smc.sysctl_max_links_per_lgr);
1279		if (ini->max_links < SMC_LINKS_ADD_LNK_MIN)
1280			return SMC_CLC_DECL_MAXLINKERR;
1281	}
1282
1283	return 0;
1284}
1285
1286int smc_clc_clnt_v2x_features_validate(struct smc_clc_first_contact_ext *fce,
1287				       struct smc_init_info *ini)
1288{
1289	struct smc_clc_first_contact_ext_v2x *fce_v2x =
1290		(struct smc_clc_first_contact_ext_v2x *)fce;
1291
1292	if (ini->release_nr < SMC_RELEASE_1)
1293		return 0;
1294
1295	if (!ini->is_smcd) {
1296		if (fce_v2x->max_conns < SMC_CONN_PER_LGR_MIN)
1297			return SMC_CLC_DECL_MAXCONNERR;
1298		ini->max_conns = fce_v2x->max_conns;
1299
1300		if (fce_v2x->max_links > SMC_LINKS_ADD_LNK_MAX ||
1301		    fce_v2x->max_links < SMC_LINKS_ADD_LNK_MIN)
1302			return SMC_CLC_DECL_MAXLINKERR;
1303		ini->max_links = fce_v2x->max_links;
1304	}
1305	/* common supplemental features of server and client */
1306	ini->feature_mask = ntohs(fce_v2x->feature_mask) & SMC_FEATURE_MASK;
1307
1308	return 0;
1309}
1310
1311int smc_clc_v2x_features_confirm_check(struct smc_clc_msg_accept_confirm *cclc,
1312				       struct smc_init_info *ini)
1313{
1314	struct smc_clc_first_contact_ext *fce =
1315		smc_get_clc_first_contact_ext(cclc, ini->is_smcd);
1316	struct smc_clc_first_contact_ext_v2x *fce_v2x =
1317		(struct smc_clc_first_contact_ext_v2x *)fce;
1318
1319	if (cclc->hdr.version == SMC_V1 ||
1320	    !(cclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK))
1321		return 0;
1322
1323	if (ini->release_nr != fce->release)
1324		return SMC_CLC_DECL_RELEASEERR;
1325
1326	if (fce->release < SMC_RELEASE_1)
1327		return 0;
1328
1329	if (!ini->is_smcd) {
1330		if (fce_v2x->max_conns != ini->max_conns)
1331			return SMC_CLC_DECL_MAXCONNERR;
1332		if (fce_v2x->max_links != ini->max_links)
1333			return SMC_CLC_DECL_MAXLINKERR;
1334	}
1335	/* common supplemental features returned by client */
1336	ini->feature_mask = ntohs(fce_v2x->feature_mask);
1337
1338	return 0;
1339}
1340
1341void smc_clc_get_hostname(u8 **host)
1342{
1343	*host = &smc_hostname[0];
1344}
1345
1346void __init smc_clc_init(void)
1347{
1348	struct new_utsname *u;
1349
1350	memset(smc_hostname, _S, sizeof(smc_hostname)); /* ASCII blanks */
1351	u = utsname();
1352	memcpy(smc_hostname, u->nodename,
1353	       min_t(size_t, strlen(u->nodename), sizeof(smc_hostname)));
1354
1355	INIT_LIST_HEAD(&smc_clc_eid_table.list);
1356	rwlock_init(&smc_clc_eid_table.lock);
1357	smc_clc_eid_table.ueid_cnt = 0;
1358#if IS_ENABLED(CONFIG_S390)
1359	smc_clc_eid_table.seid_enabled = 1;
1360#else
1361	smc_clc_eid_table.seid_enabled = 0;
1362#endif
1363}
1364
1365void smc_clc_exit(void)
1366{
1367	smc_clc_ueid_remove(NULL);
1368}
v5.4
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 *  Shared Memory Communications over RDMA (SMC-R) and RoCE
  4 *
  5 *  CLC (connection layer control) handshake over initial TCP socket to
  6 *  prepare for RDMA traffic
  7 *
  8 *  Copyright IBM Corp. 2016, 2018
  9 *
 10 *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
 11 */
 12
 13#include <linux/in.h>
 14#include <linux/inetdevice.h>
 15#include <linux/if_ether.h>
 16#include <linux/sched/signal.h>
 
 
 17
 18#include <net/addrconf.h>
 19#include <net/sock.h>
 20#include <net/tcp.h>
 21
 22#include "smc.h"
 23#include "smc_core.h"
 24#include "smc_clc.h"
 25#include "smc_ib.h"
 26#include "smc_ism.h"
 
 27
 28#define SMCR_CLC_ACCEPT_CONFIRM_LEN 68
 29#define SMCD_CLC_ACCEPT_CONFIRM_LEN 48
 
 
 
 30
 31/* eye catcher "SMCR" EBCDIC for CLC messages */
 32static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
 33/* eye catcher "SMCD" EBCDIC for CLC messages */
 34static const char SMCD_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xc4'};
 35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 36/* check if received message has a correct header length and contains valid
 37 * heading and trailing eyecatchers
 38 */
 39static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
 40{
 41	struct smc_clc_msg_proposal_prefix *pclc_prfx;
 42	struct smc_clc_msg_accept_confirm *clc;
 43	struct smc_clc_msg_proposal *pclc;
 44	struct smc_clc_msg_decline *dclc;
 45	struct smc_clc_msg_trail *trl;
 46
 47	if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) &&
 48	    memcmp(clcm->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER)))
 49		return false;
 50	switch (clcm->type) {
 51	case SMC_CLC_PROPOSAL:
 52		if (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D &&
 53		    clcm->path != SMC_TYPE_B)
 54			return false;
 55		pclc = (struct smc_clc_msg_proposal *)clcm;
 56		pclc_prfx = smc_clc_proposal_get_prefix(pclc);
 57		if (ntohs(pclc->hdr.length) !=
 58			sizeof(*pclc) + ntohs(pclc->iparea_offset) +
 59			sizeof(*pclc_prfx) +
 60			pclc_prfx->ipv6_prefixes_cnt *
 61				sizeof(struct smc_clc_ipv6_prefix) +
 62			sizeof(*trl))
 63			return false;
 64		trl = (struct smc_clc_msg_trail *)
 65			((u8 *)pclc + ntohs(pclc->hdr.length) - sizeof(*trl));
 66		break;
 67	case SMC_CLC_ACCEPT:
 68	case SMC_CLC_CONFIRM:
 69		if (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D)
 70			return false;
 71		clc = (struct smc_clc_msg_accept_confirm *)clcm;
 72		if ((clcm->path == SMC_TYPE_R &&
 73		     ntohs(clc->hdr.length) != SMCR_CLC_ACCEPT_CONFIRM_LEN) ||
 74		    (clcm->path == SMC_TYPE_D &&
 75		     ntohs(clc->hdr.length) != SMCD_CLC_ACCEPT_CONFIRM_LEN))
 76			return false;
 77		trl = (struct smc_clc_msg_trail *)
 78			((u8 *)clc + ntohs(clc->hdr.length) - sizeof(*trl));
 79		break;
 80	case SMC_CLC_DECLINE:
 81		dclc = (struct smc_clc_msg_decline *)clcm;
 82		if (ntohs(dclc->hdr.length) != sizeof(*dclc))
 83			return false;
 84		trl = &dclc->trl;
 85		break;
 86	default:
 87		return false;
 88	}
 89	if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) &&
 
 90	    memcmp(trl->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER)))
 91		return false;
 92	return true;
 93}
 94
 95/* find ipv4 addr on device and get the prefix len, fill CLC proposal msg */
 96static int smc_clc_prfx_set4_rcu(struct dst_entry *dst, __be32 ipv4,
 97				 struct smc_clc_msg_proposal_prefix *prop)
 98{
 99	struct in_device *in_dev = __in_dev_get_rcu(dst->dev);
100	const struct in_ifaddr *ifa;
101
102	if (!in_dev)
103		return -ENODEV;
104
105	in_dev_for_each_ifa_rcu(ifa, in_dev) {
106		if (!inet_ifa_match(ipv4, ifa))
107			continue;
108		prop->prefix_len = inet_mask_len(ifa->ifa_mask);
109		prop->outgoing_subnet = ifa->ifa_address & ifa->ifa_mask;
110		/* prop->ipv6_prefixes_cnt = 0; already done by memset before */
111		return 0;
112	}
113	return -ENOENT;
114}
115
116/* fill CLC proposal msg with ipv6 prefixes from device */
117static int smc_clc_prfx_set6_rcu(struct dst_entry *dst,
118				 struct smc_clc_msg_proposal_prefix *prop,
119				 struct smc_clc_ipv6_prefix *ipv6_prfx)
120{
121#if IS_ENABLED(CONFIG_IPV6)
122	struct inet6_dev *in6_dev = __in6_dev_get(dst->dev);
123	struct inet6_ifaddr *ifa;
124	int cnt = 0;
125
126	if (!in6_dev)
127		return -ENODEV;
128	/* use a maximum of 8 IPv6 prefixes from device */
129	list_for_each_entry(ifa, &in6_dev->addr_list, if_list) {
130		if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)
131			continue;
132		ipv6_addr_prefix(&ipv6_prfx[cnt].prefix,
133				 &ifa->addr, ifa->prefix_len);
134		ipv6_prfx[cnt].prefix_len = ifa->prefix_len;
135		cnt++;
136		if (cnt == SMC_CLC_MAX_V6_PREFIX)
137			break;
138	}
139	prop->ipv6_prefixes_cnt = cnt;
140	if (cnt)
141		return 0;
142#endif
143	return -ENOENT;
144}
145
146/* retrieve and set prefixes in CLC proposal msg */
147static int smc_clc_prfx_set(struct socket *clcsock,
148			    struct smc_clc_msg_proposal_prefix *prop,
149			    struct smc_clc_ipv6_prefix *ipv6_prfx)
150{
151	struct dst_entry *dst = sk_dst_get(clcsock->sk);
152	struct sockaddr_storage addrs;
153	struct sockaddr_in6 *addr6;
154	struct sockaddr_in *addr;
155	int rc = -ENOENT;
156
157	memset(prop, 0, sizeof(*prop));
158	if (!dst) {
159		rc = -ENOTCONN;
160		goto out;
161	}
162	if (!dst->dev) {
163		rc = -ENODEV;
164		goto out_rel;
165	}
166	/* get address to which the internal TCP socket is bound */
167	kernel_getsockname(clcsock, (struct sockaddr *)&addrs);
 
168	/* analyze IP specific data of net_device belonging to TCP socket */
169	addr6 = (struct sockaddr_in6 *)&addrs;
170	rcu_read_lock();
171	if (addrs.ss_family == PF_INET) {
172		/* IPv4 */
173		addr = (struct sockaddr_in *)&addrs;
174		rc = smc_clc_prfx_set4_rcu(dst, addr->sin_addr.s_addr, prop);
175	} else if (ipv6_addr_v4mapped(&addr6->sin6_addr)) {
176		/* mapped IPv4 address - peer is IPv4 only */
177		rc = smc_clc_prfx_set4_rcu(dst, addr6->sin6_addr.s6_addr32[3],
178					   prop);
179	} else {
180		/* IPv6 */
181		rc = smc_clc_prfx_set6_rcu(dst, prop, ipv6_prfx);
182	}
183	rcu_read_unlock();
184out_rel:
185	dst_release(dst);
186out:
187	return rc;
188}
189
190/* match ipv4 addrs of dev against addr in CLC proposal */
191static int smc_clc_prfx_match4_rcu(struct net_device *dev,
192				   struct smc_clc_msg_proposal_prefix *prop)
193{
194	struct in_device *in_dev = __in_dev_get_rcu(dev);
195	const struct in_ifaddr *ifa;
196
197	if (!in_dev)
198		return -ENODEV;
199	in_dev_for_each_ifa_rcu(ifa, in_dev) {
200		if (prop->prefix_len == inet_mask_len(ifa->ifa_mask) &&
201		    inet_ifa_match(prop->outgoing_subnet, ifa))
202			return 0;
203	}
204
205	return -ENOENT;
206}
207
208/* match ipv6 addrs of dev against addrs in CLC proposal */
209static int smc_clc_prfx_match6_rcu(struct net_device *dev,
210				   struct smc_clc_msg_proposal_prefix *prop)
211{
212#if IS_ENABLED(CONFIG_IPV6)
213	struct inet6_dev *in6_dev = __in6_dev_get(dev);
214	struct smc_clc_ipv6_prefix *ipv6_prfx;
215	struct inet6_ifaddr *ifa;
216	int i, max;
217
218	if (!in6_dev)
219		return -ENODEV;
220	/* ipv6 prefix list starts behind smc_clc_msg_proposal_prefix */
221	ipv6_prfx = (struct smc_clc_ipv6_prefix *)((u8 *)prop + sizeof(*prop));
222	max = min_t(u8, prop->ipv6_prefixes_cnt, SMC_CLC_MAX_V6_PREFIX);
223	list_for_each_entry(ifa, &in6_dev->addr_list, if_list) {
224		if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)
225			continue;
226		for (i = 0; i < max; i++) {
227			if (ifa->prefix_len == ipv6_prfx[i].prefix_len &&
228			    ipv6_prefix_equal(&ifa->addr, &ipv6_prfx[i].prefix,
229					      ifa->prefix_len))
230				return 0;
231		}
232	}
233#endif
234	return -ENOENT;
235}
236
237/* check if proposed prefixes match one of our device prefixes */
238int smc_clc_prfx_match(struct socket *clcsock,
239		       struct smc_clc_msg_proposal_prefix *prop)
240{
241	struct dst_entry *dst = sk_dst_get(clcsock->sk);
242	int rc;
243
244	if (!dst) {
245		rc = -ENOTCONN;
246		goto out;
247	}
248	if (!dst->dev) {
249		rc = -ENODEV;
250		goto out_rel;
251	}
252	rcu_read_lock();
253	if (!prop->ipv6_prefixes_cnt)
254		rc = smc_clc_prfx_match4_rcu(dst->dev, prop);
255	else
256		rc = smc_clc_prfx_match6_rcu(dst->dev, prop);
257	rcu_read_unlock();
258out_rel:
259	dst_release(dst);
260out:
261	return rc;
262}
263
264/* Wait for data on the tcp-socket, analyze received data
265 * Returns:
266 * 0 if success and it was not a decline that we received.
267 * SMC_CLC_DECL_REPLY if decline received for fallback w/o another decl send.
268 * clcsock error, -EINTR, -ECONNRESET, -EPROTO otherwise.
269 */
270int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
271		     u8 expected_type, unsigned long timeout)
272{
273	long rcvtimeo = smc->clcsock->sk->sk_rcvtimeo;
274	struct sock *clc_sk = smc->clcsock->sk;
275	struct smc_clc_msg_hdr *clcm = buf;
276	struct msghdr msg = {NULL, 0};
277	int reason_code = 0;
278	struct kvec vec = {buf, buflen};
279	int len, datlen;
 
280	int krflags;
281
282	/* peek the first few bytes to determine length of data to receive
283	 * so we don't consume any subsequent CLC message or payload data
284	 * in the TCP byte stream
285	 */
286	/*
287	 * Caller must make sure that buflen is no less than
288	 * sizeof(struct smc_clc_msg_hdr)
289	 */
290	krflags = MSG_PEEK | MSG_WAITALL;
291	clc_sk->sk_rcvtimeo = timeout;
292	iov_iter_kvec(&msg.msg_iter, READ, &vec, 1,
293			sizeof(struct smc_clc_msg_hdr));
294	len = sock_recvmsg(smc->clcsock, &msg, krflags);
295	if (signal_pending(current)) {
296		reason_code = -EINTR;
297		clc_sk->sk_err = EINTR;
298		smc->sk.sk_err = EINTR;
299		goto out;
300	}
301	if (clc_sk->sk_err) {
302		reason_code = -clc_sk->sk_err;
303		if (clc_sk->sk_err == EAGAIN &&
304		    expected_type == SMC_CLC_DECLINE)
305			clc_sk->sk_err = 0; /* reset for fallback usage */
306		else
307			smc->sk.sk_err = clc_sk->sk_err;
308		goto out;
309	}
310	if (!len) { /* peer has performed orderly shutdown */
311		smc->sk.sk_err = ECONNRESET;
312		reason_code = -ECONNRESET;
313		goto out;
314	}
315	if (len < 0) {
316		if (len != -EAGAIN || expected_type != SMC_CLC_DECLINE)
317			smc->sk.sk_err = -len;
318		reason_code = len;
319		goto out;
320	}
321	datlen = ntohs(clcm->length);
322	if ((len < sizeof(struct smc_clc_msg_hdr)) ||
323	    (datlen > buflen) ||
324	    (clcm->version != SMC_CLC_V1) ||
325	    (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D &&
326	     clcm->path != SMC_TYPE_B) ||
327	    ((clcm->type != SMC_CLC_DECLINE) &&
328	     (clcm->type != expected_type))) {
329		smc->sk.sk_err = EPROTO;
330		reason_code = -EPROTO;
331		goto out;
332	}
333
334	/* receive the complete CLC message */
335	memset(&msg, 0, sizeof(struct msghdr));
336	iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, datlen);
 
 
 
 
 
 
337	krflags = MSG_WAITALL;
338	len = sock_recvmsg(smc->clcsock, &msg, krflags);
339	if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) {
340		smc->sk.sk_err = EPROTO;
341		reason_code = -EPROTO;
342		goto out;
343	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344	if (clcm->type == SMC_CLC_DECLINE) {
345		struct smc_clc_msg_decline *dclc;
346
347		dclc = (struct smc_clc_msg_decline *)clcm;
348		reason_code = SMC_CLC_DECL_PEERDECL;
349		smc->peer_diagnosis = ntohl(dclc->peer_diagnosis);
350		if (((struct smc_clc_msg_decline *)buf)->hdr.flag) {
 
351			smc->conn.lgr->sync_err = 1;
352			smc_lgr_terminate(smc->conn.lgr);
353		}
354	}
355
356out:
357	clc_sk->sk_rcvtimeo = rcvtimeo;
358	return reason_code;
359}
360
361/* send CLC DECLINE message across internal TCP socket */
362int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
363{
364	struct smc_clc_msg_decline dclc;
 
365	struct msghdr msg;
 
366	struct kvec vec;
367	int len;
368
 
369	memset(&dclc, 0, sizeof(dclc));
370	memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
371	dclc.hdr.type = SMC_CLC_DECLINE;
372	dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline));
373	dclc.hdr.version = SMC_CLC_V1;
374	dclc.hdr.flag = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? 1 : 0;
375	memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid));
 
 
 
 
376	dclc.peer_diagnosis = htonl(peer_diag_info);
377	memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
 
 
 
 
 
 
 
 
 
378
379	memset(&msg, 0, sizeof(msg));
380	vec.iov_base = &dclc;
381	vec.iov_len = sizeof(struct smc_clc_msg_decline);
382	len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1,
383			     sizeof(struct smc_clc_msg_decline));
384	if (len < 0 || len < sizeof(struct smc_clc_msg_decline))
385		len = -EPROTO;
386	return len > 0 ? 0 : len;
387}
388
389/* send CLC PROPOSAL message across internal TCP socket */
390int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
391			  struct smc_init_info *ini)
392{
393	struct smc_clc_ipv6_prefix ipv6_prfx[SMC_CLC_MAX_V6_PREFIX];
394	struct smc_clc_msg_proposal_prefix pclc_prfx;
395	struct smc_clc_msg_smcd pclc_smcd;
396	struct smc_clc_msg_proposal pclc;
397	struct smc_clc_msg_trail trl;
 
 
 
 
 
 
398	int len, i, plen, rc;
399	int reason_code = 0;
400	struct kvec vec[5];
401	struct msghdr msg;
402
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
403	/* retrieve ip prefixes for CLC proposal msg */
404	rc = smc_clc_prfx_set(smc->clcsock, &pclc_prfx, ipv6_prfx);
405	if (rc)
406		return SMC_CLC_DECL_CNFERR; /* configuration error */
 
 
 
 
 
 
 
 
 
 
 
 
407
408	/* send SMC Proposal CLC message */
409	plen = sizeof(pclc) + sizeof(pclc_prfx) +
410	       (pclc_prfx.ipv6_prefixes_cnt * sizeof(ipv6_prfx[0])) +
411	       sizeof(trl);
412	memset(&pclc, 0, sizeof(pclc));
413	memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
414	pclc.hdr.type = SMC_CLC_PROPOSAL;
415	pclc.hdr.version = SMC_CLC_V1;		/* SMC version */
416	pclc.hdr.path = smc_type;
417	if (smc_type == SMC_TYPE_R || smc_type == SMC_TYPE_B) {
418		/* add SMC-R specifics */
419		memcpy(pclc.lcl.id_for_peer, local_systemid,
420		       sizeof(local_systemid));
421		memcpy(&pclc.lcl.gid, ini->ib_gid, SMC_GID_SIZE);
422		memcpy(&pclc.lcl.mac, &ini->ib_dev->mac[ini->ib_port - 1],
423		       ETH_ALEN);
424		pclc.iparea_offset = htons(0);
425	}
426	if (smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B) {
 
 
427		/* add SMC-D specifics */
428		memset(&pclc_smcd, 0, sizeof(pclc_smcd));
429		plen += sizeof(pclc_smcd);
430		pclc.iparea_offset = htons(SMC_CLC_PROPOSAL_MAX_OFFSET);
431		pclc_smcd.gid = ini->ism_dev->local_gid;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
432	}
433	pclc.hdr.length = htons(plen);
434
435	memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
 
 
 
436	memset(&msg, 0, sizeof(msg));
437	i = 0;
438	vec[i].iov_base = &pclc;
439	vec[i++].iov_len = sizeof(pclc);
440	if (smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B) {
441		vec[i].iov_base = &pclc_smcd;
442		vec[i++].iov_len = sizeof(pclc_smcd);
443	}
444	vec[i].iov_base = &pclc_prfx;
445	vec[i++].iov_len = sizeof(pclc_prfx);
446	if (pclc_prfx.ipv6_prefixes_cnt > 0) {
447		vec[i].iov_base = &ipv6_prfx[0];
448		vec[i++].iov_len = pclc_prfx.ipv6_prefixes_cnt *
449				   sizeof(ipv6_prfx[0]);
450	}
451	vec[i].iov_base = &trl;
452	vec[i++].iov_len = sizeof(trl);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
453	/* due to the few bytes needed for clc-handshake this cannot block */
454	len = kernel_sendmsg(smc->clcsock, &msg, vec, i, plen);
455	if (len < 0) {
456		smc->sk.sk_err = smc->clcsock->sk->sk_err;
457		reason_code = -smc->sk.sk_err;
458	} else if (len < (int)sizeof(pclc)) {
459		reason_code = -ENETUNREACH;
460		smc->sk.sk_err = -reason_code;
461	}
462
 
463	return reason_code;
464}
465
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
466/* send CLC CONFIRM message across internal TCP socket */
467int smc_clc_send_confirm(struct smc_sock *smc)
 
468{
469	struct smc_connection *conn = &smc->conn;
470	struct smc_clc_msg_accept_confirm cclc;
471	struct smc_link *link;
472	int reason_code = 0;
473	struct msghdr msg;
474	struct kvec vec;
475	int len;
476
477	/* send SMC Confirm CLC msg */
478	memset(&cclc, 0, sizeof(cclc));
479	cclc.hdr.type = SMC_CLC_CONFIRM;
480	cclc.hdr.version = SMC_CLC_V1;		/* SMC version */
481	if (smc->conn.lgr->is_smcd) {
482		/* SMC-D specific settings */
483		memcpy(cclc.hdr.eyecatcher, SMCD_EYECATCHER,
484		       sizeof(SMCD_EYECATCHER));
485		cclc.hdr.path = SMC_TYPE_D;
486		cclc.hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN);
487		cclc.gid = conn->lgr->smcd->local_gid;
488		cclc.token = conn->rmb_desc->token;
489		cclc.dmbe_size = conn->rmbe_size_short;
490		cclc.dmbe_idx = 0;
491		memcpy(&cclc.linkid, conn->lgr->id, SMC_LGR_ID_SIZE);
492		memcpy(cclc.smcd_trl.eyecatcher, SMCD_EYECATCHER,
493		       sizeof(SMCD_EYECATCHER));
494	} else {
495		/* SMC-R specific settings */
496		link = &conn->lgr->lnk[SMC_SINGLE_LINK];
497		memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER,
498		       sizeof(SMC_EYECATCHER));
499		cclc.hdr.path = SMC_TYPE_R;
500		cclc.hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);
501		memcpy(cclc.lcl.id_for_peer, local_systemid,
502		       sizeof(local_systemid));
503		memcpy(&cclc.lcl.gid, link->gid, SMC_GID_SIZE);
504		memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1],
505		       ETH_ALEN);
506		hton24(cclc.qpn, link->roce_qp->qp_num);
507		cclc.rmb_rkey =
508			htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
509		cclc.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */
510		cclc.rmbe_alert_token = htonl(conn->alert_token_local);
511		cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
512		cclc.rmbe_size = conn->rmbe_size_short;
513		cclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
514				(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
515		hton24(cclc.psn, link->psn_initial);
516		memcpy(cclc.smcr_trl.eyecatcher, SMC_EYECATCHER,
517		       sizeof(SMC_EYECATCHER));
518	}
519
520	memset(&msg, 0, sizeof(msg));
521	vec.iov_base = &cclc;
522	vec.iov_len = ntohs(cclc.hdr.length);
523	len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1,
524			     ntohs(cclc.hdr.length));
525	if (len < ntohs(cclc.hdr.length)) {
526		if (len >= 0) {
527			reason_code = -ENETUNREACH;
528			smc->sk.sk_err = -reason_code;
529		} else {
530			smc->sk.sk_err = smc->clcsock->sk->sk_err;
531			reason_code = -smc->sk.sk_err;
532		}
533	}
534	return reason_code;
535}
536
537/* send CLC ACCEPT message across internal TCP socket */
538int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
 
539{
540	struct smc_connection *conn = &new_smc->conn;
541	struct smc_clc_msg_accept_confirm aclc;
542	struct smc_link *link;
543	struct msghdr msg;
544	struct kvec vec;
545	int len;
546
547	memset(&aclc, 0, sizeof(aclc));
548	aclc.hdr.type = SMC_CLC_ACCEPT;
549	aclc.hdr.version = SMC_CLC_V1;		/* SMC version */
550	if (srv_first_contact)
551		aclc.hdr.flag = 1;
552
553	if (new_smc->conn.lgr->is_smcd) {
554		/* SMC-D specific settings */
555		aclc.hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN);
556		memcpy(aclc.hdr.eyecatcher, SMCD_EYECATCHER,
557		       sizeof(SMCD_EYECATCHER));
558		aclc.hdr.path = SMC_TYPE_D;
559		aclc.gid = conn->lgr->smcd->local_gid;
560		aclc.token = conn->rmb_desc->token;
561		aclc.dmbe_size = conn->rmbe_size_short;
562		aclc.dmbe_idx = 0;
563		memcpy(&aclc.linkid, conn->lgr->id, SMC_LGR_ID_SIZE);
564		memcpy(aclc.smcd_trl.eyecatcher, SMCD_EYECATCHER,
565		       sizeof(SMCD_EYECATCHER));
566	} else {
567		/* SMC-R specific settings */
568		aclc.hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);
569		memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER,
570		       sizeof(SMC_EYECATCHER));
571		aclc.hdr.path = SMC_TYPE_R;
572		link = &conn->lgr->lnk[SMC_SINGLE_LINK];
573		memcpy(aclc.lcl.id_for_peer, local_systemid,
574		       sizeof(local_systemid));
575		memcpy(&aclc.lcl.gid, link->gid, SMC_GID_SIZE);
576		memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1],
577		       ETH_ALEN);
578		hton24(aclc.qpn, link->roce_qp->qp_num);
579		aclc.rmb_rkey =
580			htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
581		aclc.rmbe_idx = 1;		/* as long as 1 RMB = 1 RMBE */
582		aclc.rmbe_alert_token = htonl(conn->alert_token_local);
583		aclc.qp_mtu = link->path_mtu;
584		aclc.rmbe_size = conn->rmbe_size_short,
585		aclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
586				(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
587		hton24(aclc.psn, link->psn_initial);
588		memcpy(aclc.smcr_trl.eyecatcher, SMC_EYECATCHER,
589		       sizeof(SMC_EYECATCHER));
590	}
591
592	memset(&msg, 0, sizeof(msg));
593	vec.iov_base = &aclc;
594	vec.iov_len = ntohs(aclc.hdr.length);
595	len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1,
596			     ntohs(aclc.hdr.length));
597	if (len < ntohs(aclc.hdr.length))
598		len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err;
599
600	return len > 0 ? 0 : len;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
601}