Linux Audio

Check our new training course

Loading...
v5.14.15
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 *  Shared Memory Communications over RDMA (SMC-R) and RoCE
  4 *
  5 *  CLC (connection layer control) handshake over initial TCP socket to
  6 *  prepare for RDMA traffic
  7 *
  8 *  Copyright IBM Corp. 2016, 2018
  9 *
 10 *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
 11 */
 12
 13#include <linux/in.h>
 14#include <linux/inetdevice.h>
 15#include <linux/if_ether.h>
 16#include <linux/sched/signal.h>
 17#include <linux/utsname.h>
 18#include <linux/ctype.h>
 19
 20#include <net/addrconf.h>
 21#include <net/sock.h>
 22#include <net/tcp.h>
 23
 24#include "smc.h"
 25#include "smc_core.h"
 26#include "smc_clc.h"
 27#include "smc_ib.h"
 28#include "smc_ism.h"
 
 29
 30#define SMCR_CLC_ACCEPT_CONFIRM_LEN 68
 31#define SMCD_CLC_ACCEPT_CONFIRM_LEN 48
 32#define SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 78
 
 33#define SMC_CLC_RECV_BUF_LEN	100
 34
 35/* eye catcher "SMCR" EBCDIC for CLC messages */
 36static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
 37/* eye catcher "SMCD" EBCDIC for CLC messages */
 38static const char SMCD_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xc4'};
 39
 40static u8 smc_hostname[SMC_MAX_HOSTNAME_LEN];
 41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 42/* check arriving CLC proposal */
 43static bool smc_clc_msg_prop_valid(struct smc_clc_msg_proposal *pclc)
 44{
 45	struct smc_clc_msg_proposal_prefix *pclc_prfx;
 46	struct smc_clc_smcd_v2_extension *smcd_v2_ext;
 47	struct smc_clc_msg_hdr *hdr = &pclc->hdr;
 48	struct smc_clc_v2_extension *v2_ext;
 49
 50	v2_ext = smc_get_clc_v2_ext(pclc);
 51	pclc_prfx = smc_clc_proposal_get_prefix(pclc);
 52	if (hdr->version == SMC_V1) {
 53		if (hdr->typev1 == SMC_TYPE_N)
 54			return false;
 55		if (ntohs(hdr->length) !=
 56			sizeof(*pclc) + ntohs(pclc->iparea_offset) +
 57			sizeof(*pclc_prfx) +
 58			pclc_prfx->ipv6_prefixes_cnt *
 59				sizeof(struct smc_clc_ipv6_prefix) +
 60			sizeof(struct smc_clc_msg_trail))
 61			return false;
 62	} else {
 63		if (ntohs(hdr->length) !=
 64			sizeof(*pclc) +
 65			sizeof(struct smc_clc_msg_smcd) +
 66			(hdr->typev1 != SMC_TYPE_N ?
 67				sizeof(*pclc_prfx) +
 68				pclc_prfx->ipv6_prefixes_cnt *
 69				sizeof(struct smc_clc_ipv6_prefix) : 0) +
 70			(hdr->typev2 != SMC_TYPE_N ?
 71				sizeof(*v2_ext) +
 72				v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN : 0) +
 73			(smcd_indicated(hdr->typev2) ?
 74				sizeof(*smcd_v2_ext) + v2_ext->hdr.ism_gid_cnt *
 75					sizeof(struct smc_clc_smcd_gid_chid) :
 76				0) +
 77			sizeof(struct smc_clc_msg_trail))
 78			return false;
 79	}
 80	return true;
 81}
 82
 83/* check arriving CLC accept or confirm */
 84static bool
 85smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm_v2 *clc_v2)
 86{
 87	struct smc_clc_msg_hdr *hdr = &clc_v2->hdr;
 88
 89	if (hdr->typev1 != SMC_TYPE_R && hdr->typev1 != SMC_TYPE_D)
 90		return false;
 91	if (hdr->version == SMC_V1) {
 92		if ((hdr->typev1 == SMC_TYPE_R &&
 93		     ntohs(hdr->length) != SMCR_CLC_ACCEPT_CONFIRM_LEN) ||
 94		    (hdr->typev1 == SMC_TYPE_D &&
 95		     ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN))
 96			return false;
 97	} else {
 98		if (hdr->typev1 == SMC_TYPE_D &&
 99		    ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 &&
100		    (ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 +
101				sizeof(struct smc_clc_first_contact_ext)))
102			return false;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103	}
104	return true;
105}
106
107static void smc_clc_fill_fce(struct smc_clc_first_contact_ext *fce, int *len)
108{
109	memset(fce, 0, sizeof(*fce));
110	fce->os_type = SMC_CLC_OS_LINUX;
111	fce->release = SMC_RELEASE;
112	memcpy(fce->hostname, smc_hostname, sizeof(smc_hostname));
113	(*len) += sizeof(*fce);
114}
115
116/* check if received message has a correct header length and contains valid
117 * heading and trailing eyecatchers
118 */
119static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl)
120{
121	struct smc_clc_msg_accept_confirm_v2 *clc_v2;
122	struct smc_clc_msg_proposal *pclc;
123	struct smc_clc_msg_decline *dclc;
124	struct smc_clc_msg_trail *trl;
125
126	if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) &&
127	    memcmp(clcm->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER)))
128		return false;
129	switch (clcm->type) {
130	case SMC_CLC_PROPOSAL:
131		pclc = (struct smc_clc_msg_proposal *)clcm;
132		if (!smc_clc_msg_prop_valid(pclc))
133			return false;
134		trl = (struct smc_clc_msg_trail *)
135			((u8 *)pclc + ntohs(pclc->hdr.length) - sizeof(*trl));
136		break;
137	case SMC_CLC_ACCEPT:
138	case SMC_CLC_CONFIRM:
139		clc_v2 = (struct smc_clc_msg_accept_confirm_v2 *)clcm;
140		if (!smc_clc_msg_acc_conf_valid(clc_v2))
141			return false;
142		trl = (struct smc_clc_msg_trail *)
143			((u8 *)clc_v2 + ntohs(clc_v2->hdr.length) -
144							sizeof(*trl));
145		break;
146	case SMC_CLC_DECLINE:
147		dclc = (struct smc_clc_msg_decline *)clcm;
148		if (ntohs(dclc->hdr.length) != sizeof(*dclc))
149			return false;
150		trl = &dclc->trl;
151		break;
152	default:
153		return false;
154	}
155	if (check_trl &&
156	    memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) &&
157	    memcmp(trl->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER)))
158		return false;
159	return true;
160}
161
162/* find ipv4 addr on device and get the prefix len, fill CLC proposal msg */
163static int smc_clc_prfx_set4_rcu(struct dst_entry *dst, __be32 ipv4,
164				 struct smc_clc_msg_proposal_prefix *prop)
165{
166	struct in_device *in_dev = __in_dev_get_rcu(dst->dev);
167	const struct in_ifaddr *ifa;
168
169	if (!in_dev)
170		return -ENODEV;
171
172	in_dev_for_each_ifa_rcu(ifa, in_dev) {
173		if (!inet_ifa_match(ipv4, ifa))
174			continue;
175		prop->prefix_len = inet_mask_len(ifa->ifa_mask);
176		prop->outgoing_subnet = ifa->ifa_address & ifa->ifa_mask;
177		/* prop->ipv6_prefixes_cnt = 0; already done by memset before */
178		return 0;
179	}
180	return -ENOENT;
181}
182
183/* fill CLC proposal msg with ipv6 prefixes from device */
184static int smc_clc_prfx_set6_rcu(struct dst_entry *dst,
185				 struct smc_clc_msg_proposal_prefix *prop,
186				 struct smc_clc_ipv6_prefix *ipv6_prfx)
187{
188#if IS_ENABLED(CONFIG_IPV6)
189	struct inet6_dev *in6_dev = __in6_dev_get(dst->dev);
190	struct inet6_ifaddr *ifa;
191	int cnt = 0;
192
193	if (!in6_dev)
194		return -ENODEV;
195	/* use a maximum of 8 IPv6 prefixes from device */
196	list_for_each_entry(ifa, &in6_dev->addr_list, if_list) {
197		if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)
198			continue;
199		ipv6_addr_prefix(&ipv6_prfx[cnt].prefix,
200				 &ifa->addr, ifa->prefix_len);
201		ipv6_prfx[cnt].prefix_len = ifa->prefix_len;
202		cnt++;
203		if (cnt == SMC_CLC_MAX_V6_PREFIX)
204			break;
205	}
206	prop->ipv6_prefixes_cnt = cnt;
207	if (cnt)
208		return 0;
209#endif
210	return -ENOENT;
211}
212
213/* retrieve and set prefixes in CLC proposal msg */
214static int smc_clc_prfx_set(struct socket *clcsock,
215			    struct smc_clc_msg_proposal_prefix *prop,
216			    struct smc_clc_ipv6_prefix *ipv6_prfx)
217{
218	struct dst_entry *dst = sk_dst_get(clcsock->sk);
219	struct sockaddr_storage addrs;
220	struct sockaddr_in6 *addr6;
221	struct sockaddr_in *addr;
222	int rc = -ENOENT;
223
224	if (!dst) {
225		rc = -ENOTCONN;
226		goto out;
227	}
228	if (!dst->dev) {
229		rc = -ENODEV;
230		goto out_rel;
231	}
232	/* get address to which the internal TCP socket is bound */
233	if (kernel_getsockname(clcsock, (struct sockaddr *)&addrs) < 0)
234		goto out_rel;
235	/* analyze IP specific data of net_device belonging to TCP socket */
236	addr6 = (struct sockaddr_in6 *)&addrs;
237	rcu_read_lock();
238	if (addrs.ss_family == PF_INET) {
239		/* IPv4 */
240		addr = (struct sockaddr_in *)&addrs;
241		rc = smc_clc_prfx_set4_rcu(dst, addr->sin_addr.s_addr, prop);
242	} else if (ipv6_addr_v4mapped(&addr6->sin6_addr)) {
243		/* mapped IPv4 address - peer is IPv4 only */
244		rc = smc_clc_prfx_set4_rcu(dst, addr6->sin6_addr.s6_addr32[3],
245					   prop);
246	} else {
247		/* IPv6 */
248		rc = smc_clc_prfx_set6_rcu(dst, prop, ipv6_prfx);
249	}
250	rcu_read_unlock();
251out_rel:
252	dst_release(dst);
253out:
254	return rc;
255}
256
257/* match ipv4 addrs of dev against addr in CLC proposal */
258static int smc_clc_prfx_match4_rcu(struct net_device *dev,
259				   struct smc_clc_msg_proposal_prefix *prop)
260{
261	struct in_device *in_dev = __in_dev_get_rcu(dev);
262	const struct in_ifaddr *ifa;
263
264	if (!in_dev)
265		return -ENODEV;
266	in_dev_for_each_ifa_rcu(ifa, in_dev) {
267		if (prop->prefix_len == inet_mask_len(ifa->ifa_mask) &&
268		    inet_ifa_match(prop->outgoing_subnet, ifa))
269			return 0;
270	}
271
272	return -ENOENT;
273}
274
275/* match ipv6 addrs of dev against addrs in CLC proposal */
276static int smc_clc_prfx_match6_rcu(struct net_device *dev,
277				   struct smc_clc_msg_proposal_prefix *prop)
278{
279#if IS_ENABLED(CONFIG_IPV6)
280	struct inet6_dev *in6_dev = __in6_dev_get(dev);
281	struct smc_clc_ipv6_prefix *ipv6_prfx;
282	struct inet6_ifaddr *ifa;
283	int i, max;
284
285	if (!in6_dev)
286		return -ENODEV;
287	/* ipv6 prefix list starts behind smc_clc_msg_proposal_prefix */
288	ipv6_prfx = (struct smc_clc_ipv6_prefix *)((u8 *)prop + sizeof(*prop));
289	max = min_t(u8, prop->ipv6_prefixes_cnt, SMC_CLC_MAX_V6_PREFIX);
290	list_for_each_entry(ifa, &in6_dev->addr_list, if_list) {
291		if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)
292			continue;
293		for (i = 0; i < max; i++) {
294			if (ifa->prefix_len == ipv6_prfx[i].prefix_len &&
295			    ipv6_prefix_equal(&ifa->addr, &ipv6_prfx[i].prefix,
296					      ifa->prefix_len))
297				return 0;
298		}
299	}
300#endif
301	return -ENOENT;
302}
303
304/* check if proposed prefixes match one of our device prefixes */
305int smc_clc_prfx_match(struct socket *clcsock,
306		       struct smc_clc_msg_proposal_prefix *prop)
307{
308	struct dst_entry *dst = sk_dst_get(clcsock->sk);
309	int rc;
310
311	if (!dst) {
312		rc = -ENOTCONN;
313		goto out;
314	}
315	if (!dst->dev) {
316		rc = -ENODEV;
317		goto out_rel;
318	}
319	rcu_read_lock();
320	if (!prop->ipv6_prefixes_cnt)
321		rc = smc_clc_prfx_match4_rcu(dst->dev, prop);
322	else
323		rc = smc_clc_prfx_match6_rcu(dst->dev, prop);
324	rcu_read_unlock();
325out_rel:
326	dst_release(dst);
327out:
328	return rc;
329}
330
331/* Wait for data on the tcp-socket, analyze received data
332 * Returns:
333 * 0 if success and it was not a decline that we received.
334 * SMC_CLC_DECL_REPLY if decline received for fallback w/o another decl send.
335 * clcsock error, -EINTR, -ECONNRESET, -EPROTO otherwise.
336 */
337int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
338		     u8 expected_type, unsigned long timeout)
339{
340	long rcvtimeo = smc->clcsock->sk->sk_rcvtimeo;
341	struct sock *clc_sk = smc->clcsock->sk;
342	struct smc_clc_msg_hdr *clcm = buf;
343	struct msghdr msg = {NULL, 0};
344	int reason_code = 0;
345	struct kvec vec = {buf, buflen};
346	int len, datlen, recvlen;
347	bool check_trl = true;
348	int krflags;
349
350	/* peek the first few bytes to determine length of data to receive
351	 * so we don't consume any subsequent CLC message or payload data
352	 * in the TCP byte stream
353	 */
354	/*
355	 * Caller must make sure that buflen is no less than
356	 * sizeof(struct smc_clc_msg_hdr)
357	 */
358	krflags = MSG_PEEK | MSG_WAITALL;
359	clc_sk->sk_rcvtimeo = timeout;
360	iov_iter_kvec(&msg.msg_iter, READ, &vec, 1,
361			sizeof(struct smc_clc_msg_hdr));
362	len = sock_recvmsg(smc->clcsock, &msg, krflags);
363	if (signal_pending(current)) {
364		reason_code = -EINTR;
365		clc_sk->sk_err = EINTR;
366		smc->sk.sk_err = EINTR;
367		goto out;
368	}
369	if (clc_sk->sk_err) {
370		reason_code = -clc_sk->sk_err;
371		if (clc_sk->sk_err == EAGAIN &&
372		    expected_type == SMC_CLC_DECLINE)
373			clc_sk->sk_err = 0; /* reset for fallback usage */
374		else
375			smc->sk.sk_err = clc_sk->sk_err;
376		goto out;
377	}
378	if (!len) { /* peer has performed orderly shutdown */
379		smc->sk.sk_err = ECONNRESET;
380		reason_code = -ECONNRESET;
381		goto out;
382	}
383	if (len < 0) {
384		if (len != -EAGAIN || expected_type != SMC_CLC_DECLINE)
385			smc->sk.sk_err = -len;
386		reason_code = len;
387		goto out;
388	}
389	datlen = ntohs(clcm->length);
390	if ((len < sizeof(struct smc_clc_msg_hdr)) ||
391	    (clcm->version < SMC_V1) ||
392	    ((clcm->type != SMC_CLC_DECLINE) &&
393	     (clcm->type != expected_type))) {
394		smc->sk.sk_err = EPROTO;
395		reason_code = -EPROTO;
396		goto out;
397	}
398
399	/* receive the complete CLC message */
400	memset(&msg, 0, sizeof(struct msghdr));
401	if (datlen > buflen) {
402		check_trl = false;
403		recvlen = buflen;
404	} else {
405		recvlen = datlen;
406	}
407	iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, recvlen);
408	krflags = MSG_WAITALL;
409	len = sock_recvmsg(smc->clcsock, &msg, krflags);
410	if (len < recvlen || !smc_clc_msg_hdr_valid(clcm, check_trl)) {
411		smc->sk.sk_err = EPROTO;
412		reason_code = -EPROTO;
413		goto out;
414	}
415	datlen -= len;
416	while (datlen) {
417		u8 tmp[SMC_CLC_RECV_BUF_LEN];
418
419		vec.iov_base = &tmp;
420		vec.iov_len = SMC_CLC_RECV_BUF_LEN;
421		/* receive remaining proposal message */
422		recvlen = datlen > SMC_CLC_RECV_BUF_LEN ?
423						SMC_CLC_RECV_BUF_LEN : datlen;
424		iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, recvlen);
425		len = sock_recvmsg(smc->clcsock, &msg, krflags);
426		datlen -= len;
427	}
428	if (clcm->type == SMC_CLC_DECLINE) {
429		struct smc_clc_msg_decline *dclc;
430
431		dclc = (struct smc_clc_msg_decline *)clcm;
432		reason_code = SMC_CLC_DECL_PEERDECL;
433		smc->peer_diagnosis = ntohl(dclc->peer_diagnosis);
434		if (((struct smc_clc_msg_decline *)buf)->hdr.typev2 &
435						SMC_FIRST_CONTACT_MASK) {
436			smc->conn.lgr->sync_err = 1;
437			smc_lgr_terminate_sched(smc->conn.lgr);
438		}
439	}
440
441out:
442	clc_sk->sk_rcvtimeo = rcvtimeo;
443	return reason_code;
444}
445
446/* send CLC DECLINE message across internal TCP socket */
447int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version)
448{
449	struct smc_clc_msg_decline dclc;
 
450	struct msghdr msg;
 
451	struct kvec vec;
452	int len;
453
 
454	memset(&dclc, 0, sizeof(dclc));
455	memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
456	dclc.hdr.type = SMC_CLC_DECLINE;
457	dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline));
458	dclc.hdr.version = version;
459	dclc.os_type = version == SMC_V1 ? 0 : SMC_CLC_OS_LINUX;
460	dclc.hdr.typev2 = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ?
461						SMC_FIRST_CONTACT_MASK : 0;
462	if ((!smc->conn.lgr || !smc->conn.lgr->is_smcd) &&
463	    smc_ib_is_valid_local_systemid())
464		memcpy(dclc.id_for_peer, local_systemid,
465		       sizeof(local_systemid));
466	dclc.peer_diagnosis = htonl(peer_diag_info);
467	memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
 
 
 
 
 
 
 
 
 
468
469	memset(&msg, 0, sizeof(msg));
470	vec.iov_base = &dclc;
471	vec.iov_len = sizeof(struct smc_clc_msg_decline);
472	len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1,
473			     sizeof(struct smc_clc_msg_decline));
474	if (len < 0 || len < sizeof(struct smc_clc_msg_decline))
475		len = -EPROTO;
476	return len > 0 ? 0 : len;
477}
478
479/* send CLC PROPOSAL message across internal TCP socket */
480int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
481{
482	struct smc_clc_smcd_v2_extension *smcd_v2_ext;
483	struct smc_clc_msg_proposal_prefix *pclc_prfx;
484	struct smc_clc_msg_proposal *pclc_base;
485	struct smc_clc_smcd_gid_chid *gidchids;
486	struct smc_clc_msg_proposal_area *pclc;
487	struct smc_clc_ipv6_prefix *ipv6_prfx;
488	struct smc_clc_v2_extension *v2_ext;
489	struct smc_clc_msg_smcd *pclc_smcd;
490	struct smc_clc_msg_trail *trl;
491	int len, i, plen, rc;
492	int reason_code = 0;
493	struct kvec vec[8];
494	struct msghdr msg;
495
496	pclc = kzalloc(sizeof(*pclc), GFP_KERNEL);
497	if (!pclc)
498		return -ENOMEM;
499
500	pclc_base = &pclc->pclc_base;
501	pclc_smcd = &pclc->pclc_smcd;
502	pclc_prfx = &pclc->pclc_prfx;
503	ipv6_prfx = pclc->pclc_prfx_ipv6;
504	v2_ext = &pclc->pclc_v2_ext;
505	smcd_v2_ext = &pclc->pclc_smcd_v2_ext;
506	gidchids = pclc->pclc_gidchids;
507	trl = &pclc->pclc_trl;
508
509	pclc_base->hdr.version = SMC_V2;
510	pclc_base->hdr.typev1 = ini->smc_type_v1;
511	pclc_base->hdr.typev2 = ini->smc_type_v2;
512	plen = sizeof(*pclc_base) + sizeof(*pclc_smcd) + sizeof(*trl);
513
514	/* retrieve ip prefixes for CLC proposal msg */
515	if (ini->smc_type_v1 != SMC_TYPE_N) {
516		rc = smc_clc_prfx_set(smc->clcsock, pclc_prfx, ipv6_prfx);
517		if (rc) {
518			if (ini->smc_type_v2 == SMC_TYPE_N) {
519				kfree(pclc);
520				return SMC_CLC_DECL_CNFERR;
521			}
522			pclc_base->hdr.typev1 = SMC_TYPE_N;
523		} else {
524			pclc_base->iparea_offset = htons(sizeof(*pclc_smcd));
525			plen += sizeof(*pclc_prfx) +
526					pclc_prfx->ipv6_prefixes_cnt *
527					sizeof(ipv6_prfx[0]);
528		}
529	}
530
531	/* build SMC Proposal CLC message */
532	memcpy(pclc_base->hdr.eyecatcher, SMC_EYECATCHER,
533	       sizeof(SMC_EYECATCHER));
534	pclc_base->hdr.type = SMC_CLC_PROPOSAL;
535	if (smcr_indicated(ini->smc_type_v1)) {
536		/* add SMC-R specifics */
537		memcpy(pclc_base->lcl.id_for_peer, local_systemid,
538		       sizeof(local_systemid));
539		memcpy(pclc_base->lcl.gid, ini->ib_gid, SMC_GID_SIZE);
540		memcpy(pclc_base->lcl.mac, &ini->ib_dev->mac[ini->ib_port - 1],
541		       ETH_ALEN);
542	}
543	if (smcd_indicated(ini->smc_type_v1)) {
544		/* add SMC-D specifics */
545		if (ini->ism_dev[0]) {
546			pclc_smcd->ism.gid = htonll(ini->ism_dev[0]->local_gid);
547			pclc_smcd->ism.chid =
548				htons(smc_ism_get_chid(ini->ism_dev[0]));
549		}
550	}
551	if (ini->smc_type_v2 == SMC_TYPE_N) {
552		pclc_smcd->v2_ext_offset = 0;
553	} else {
 
554		u16 v2_ext_offset;
555		u8 *eid = NULL;
556
 
557		v2_ext_offset = sizeof(*pclc_smcd) -
558			offsetofend(struct smc_clc_msg_smcd, v2_ext_offset);
559		if (ini->smc_type_v1 != SMC_TYPE_N)
560			v2_ext_offset += sizeof(*pclc_prfx) +
561						pclc_prfx->ipv6_prefixes_cnt *
562						sizeof(ipv6_prfx[0]);
563		pclc_smcd->v2_ext_offset = htons(v2_ext_offset);
564		v2_ext->hdr.eid_cnt = 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
565		v2_ext->hdr.ism_gid_cnt = ini->ism_offered_cnt;
566		v2_ext->hdr.flag.release = SMC_RELEASE;
567		v2_ext->hdr.flag.seid = 1;
568		v2_ext->hdr.smcd_v2_ext_offset = htons(sizeof(*v2_ext) -
569				offsetofend(struct smc_clnt_opts_area_hdr,
570					    smcd_v2_ext_offset) +
571				v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN);
572		if (ini->ism_dev[0])
573			smc_ism_get_system_eid(ini->ism_dev[0], &eid);
574		else
575			smc_ism_get_system_eid(ini->ism_dev[1], &eid);
576		if (eid)
577			memcpy(smcd_v2_ext->system_eid, eid, SMC_MAX_EID_LEN);
578		plen += sizeof(*v2_ext) + sizeof(*smcd_v2_ext);
579		if (ini->ism_offered_cnt) {
580			for (i = 1; i <= ini->ism_offered_cnt; i++) {
581				gidchids[i - 1].gid =
582					htonll(ini->ism_dev[i]->local_gid);
583				gidchids[i - 1].chid =
584					htons(smc_ism_get_chid(ini->ism_dev[i]));
585			}
586			plen += ini->ism_offered_cnt *
587				sizeof(struct smc_clc_smcd_gid_chid);
588		}
589	}
 
 
 
590	pclc_base->hdr.length = htons(plen);
591	memcpy(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
592
593	/* send SMC Proposal CLC message */
594	memset(&msg, 0, sizeof(msg));
595	i = 0;
596	vec[i].iov_base = pclc_base;
597	vec[i++].iov_len = sizeof(*pclc_base);
598	vec[i].iov_base = pclc_smcd;
599	vec[i++].iov_len = sizeof(*pclc_smcd);
600	if (ini->smc_type_v1 != SMC_TYPE_N) {
601		vec[i].iov_base = pclc_prfx;
602		vec[i++].iov_len = sizeof(*pclc_prfx);
603		if (pclc_prfx->ipv6_prefixes_cnt > 0) {
604			vec[i].iov_base = ipv6_prfx;
605			vec[i++].iov_len = pclc_prfx->ipv6_prefixes_cnt *
606					   sizeof(ipv6_prfx[0]);
607		}
608	}
609	if (ini->smc_type_v2 != SMC_TYPE_N) {
610		vec[i].iov_base = v2_ext;
611		vec[i++].iov_len = sizeof(*v2_ext);
612		vec[i].iov_base = smcd_v2_ext;
613		vec[i++].iov_len = sizeof(*smcd_v2_ext);
614		if (ini->ism_offered_cnt) {
615			vec[i].iov_base = gidchids;
616			vec[i++].iov_len = ini->ism_offered_cnt *
 
 
617					sizeof(struct smc_clc_smcd_gid_chid);
 
618		}
619	}
620	vec[i].iov_base = trl;
621	vec[i++].iov_len = sizeof(*trl);
622	/* due to the few bytes needed for clc-handshake this cannot block */
623	len = kernel_sendmsg(smc->clcsock, &msg, vec, i, plen);
624	if (len < 0) {
625		smc->sk.sk_err = smc->clcsock->sk->sk_err;
626		reason_code = -smc->sk.sk_err;
627	} else if (len < ntohs(pclc_base->hdr.length)) {
628		reason_code = -ENETUNREACH;
629		smc->sk.sk_err = -reason_code;
630	}
631
632	kfree(pclc);
633	return reason_code;
634}
635
636/* build and send CLC CONFIRM / ACCEPT message */
637static int smc_clc_send_confirm_accept(struct smc_sock *smc,
638				       struct smc_clc_msg_accept_confirm_v2 *clc_v2,
639				       int first_contact, u8 version)
 
640{
641	struct smc_connection *conn = &smc->conn;
642	struct smc_clc_msg_accept_confirm *clc;
643	struct smc_clc_first_contact_ext fce;
 
644	struct smc_clc_msg_trail trl;
645	struct kvec vec[3];
646	struct msghdr msg;
647	int i, len;
648
649	/* send SMC Confirm CLC msg */
650	clc = (struct smc_clc_msg_accept_confirm *)clc_v2;
651	clc->hdr.version = version;	/* SMC version */
652	if (first_contact)
653		clc->hdr.typev2 |= SMC_FIRST_CONTACT_MASK;
654	if (conn->lgr->is_smcd) {
655		/* SMC-D specific settings */
656		memcpy(clc->hdr.eyecatcher, SMCD_EYECATCHER,
657		       sizeof(SMCD_EYECATCHER));
658		clc->hdr.typev1 = SMC_TYPE_D;
659		clc->d0.gid = conn->lgr->smcd->local_gid;
660		clc->d0.token = conn->rmb_desc->token;
661		clc->d0.dmbe_size = conn->rmbe_size_short;
662		clc->d0.dmbe_idx = 0;
663		memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE);
664		if (version == SMC_V1) {
665			clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN);
666		} else {
667			u8 *eid = NULL;
668
669			clc_v2->chid = htons(smc_ism_get_chid(conn->lgr->smcd));
670			smc_ism_get_system_eid(conn->lgr->smcd, &eid);
671			if (eid)
672				memcpy(clc_v2->eid, eid, SMC_MAX_EID_LEN);
673			len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2;
674			if (first_contact)
675				smc_clc_fill_fce(&fce, &len);
676			clc_v2->hdr.length = htons(len);
677		}
678		memcpy(trl.eyecatcher, SMCD_EYECATCHER,
679		       sizeof(SMCD_EYECATCHER));
680	} else {
681		struct smc_link *link = conn->lnk;
682
683		/* SMC-R specific settings */
684		link = conn->lnk;
685		memcpy(clc->hdr.eyecatcher, SMC_EYECATCHER,
686		       sizeof(SMC_EYECATCHER));
687		clc->hdr.typev1 = SMC_TYPE_R;
688		clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);
689		memcpy(clc->r0.lcl.id_for_peer, local_systemid,
690		       sizeof(local_systemid));
691		memcpy(&clc->r0.lcl.gid, link->gid, SMC_GID_SIZE);
692		memcpy(&clc->r0.lcl.mac, &link->smcibdev->mac[link->ibport - 1],
693		       ETH_ALEN);
694		hton24(clc->r0.qpn, link->roce_qp->qp_num);
695		clc->r0.rmb_rkey =
696			htonl(conn->rmb_desc->mr_rx[link->link_idx]->rkey);
697		clc->r0.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */
698		clc->r0.rmbe_alert_token = htonl(conn->alert_token_local);
699		switch (clc->hdr.type) {
700		case SMC_CLC_ACCEPT:
701			clc->r0.qp_mtu = link->path_mtu;
702			break;
703		case SMC_CLC_CONFIRM:
704			clc->r0.qp_mtu = min(link->path_mtu, link->peer_mtu);
705			break;
706		}
707		clc->r0.rmbe_size = conn->rmbe_size_short;
708		clc->r0.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
709				(conn->rmb_desc->sgt[link->link_idx].sgl));
 
 
710		hton24(clc->r0.psn, link->psn_initial);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
711		memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
712	}
713
714	memset(&msg, 0, sizeof(msg));
715	i = 0;
716	vec[i].iov_base = clc_v2;
717	if (version > SMC_V1)
718		vec[i++].iov_len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 - sizeof(trl);
 
 
 
719	else
720		vec[i++].iov_len = (clc->hdr.typev1 == SMC_TYPE_D ?
721						SMCD_CLC_ACCEPT_CONFIRM_LEN :
722						SMCR_CLC_ACCEPT_CONFIRM_LEN) -
723				   sizeof(trl);
724	if (version > SMC_V1 && first_contact) {
725		vec[i].iov_base = &fce;
726		vec[i++].iov_len = sizeof(fce);
 
 
 
 
 
 
 
 
 
 
 
 
727	}
728	vec[i].iov_base = &trl;
729	vec[i++].iov_len = sizeof(trl);
730	return kernel_sendmsg(smc->clcsock, &msg, vec, 1,
731			      ntohs(clc->hdr.length));
732}
733
734/* send CLC CONFIRM message across internal TCP socket */
735int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
736			 u8 version)
737{
738	struct smc_clc_msg_accept_confirm_v2 cclc_v2;
739	int reason_code = 0;
740	int len;
741
742	/* send SMC Confirm CLC msg */
743	memset(&cclc_v2, 0, sizeof(cclc_v2));
744	cclc_v2.hdr.type = SMC_CLC_CONFIRM;
745	len = smc_clc_send_confirm_accept(smc, &cclc_v2, clnt_first_contact,
746					  version);
747	if (len < ntohs(cclc_v2.hdr.length)) {
748		if (len >= 0) {
749			reason_code = -ENETUNREACH;
750			smc->sk.sk_err = -reason_code;
751		} else {
752			smc->sk.sk_err = smc->clcsock->sk->sk_err;
753			reason_code = -smc->sk.sk_err;
754		}
755	}
756	return reason_code;
757}
758
759/* send CLC ACCEPT message across internal TCP socket */
760int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact,
761			u8 version)
762{
763	struct smc_clc_msg_accept_confirm_v2 aclc_v2;
764	int len;
765
766	memset(&aclc_v2, 0, sizeof(aclc_v2));
767	aclc_v2.hdr.type = SMC_CLC_ACCEPT;
768	len = smc_clc_send_confirm_accept(new_smc, &aclc_v2, srv_first_contact,
769					  version);
770	if (len < ntohs(aclc_v2.hdr.length))
771		len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err;
772
773	return len > 0 ? 0 : len;
774}
775
776void smc_clc_get_hostname(u8 **host)
777{
778	*host = &smc_hostname[0];
779}
780
781void __init smc_clc_init(void)
782{
783	struct new_utsname *u;
784
785	memset(smc_hostname, _S, sizeof(smc_hostname)); /* ASCII blanks */
786	u = utsname();
787	memcpy(smc_hostname, u->nodename,
788	       min_t(size_t, strlen(u->nodename), sizeof(smc_hostname)));
 
 
 
 
 
 
 
 
 
 
789}
v6.2
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *  Shared Memory Communications over RDMA (SMC-R) and RoCE
   4 *
   5 *  CLC (connection layer control) handshake over initial TCP socket to
   6 *  prepare for RDMA traffic
   7 *
   8 *  Copyright IBM Corp. 2016, 2018
   9 *
  10 *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
  11 */
  12
  13#include <linux/in.h>
  14#include <linux/inetdevice.h>
  15#include <linux/if_ether.h>
  16#include <linux/sched/signal.h>
  17#include <linux/utsname.h>
  18#include <linux/ctype.h>
  19
  20#include <net/addrconf.h>
  21#include <net/sock.h>
  22#include <net/tcp.h>
  23
  24#include "smc.h"
  25#include "smc_core.h"
  26#include "smc_clc.h"
  27#include "smc_ib.h"
  28#include "smc_ism.h"
  29#include "smc_netlink.h"
  30
  31#define SMCR_CLC_ACCEPT_CONFIRM_LEN 68
  32#define SMCD_CLC_ACCEPT_CONFIRM_LEN 48
  33#define SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 78
  34#define SMCR_CLC_ACCEPT_CONFIRM_LEN_V2 108
  35#define SMC_CLC_RECV_BUF_LEN	100
  36
  37/* eye catcher "SMCR" EBCDIC for CLC messages */
  38static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
  39/* eye catcher "SMCD" EBCDIC for CLC messages */
  40static const char SMCD_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xc4'};
  41
  42static u8 smc_hostname[SMC_MAX_HOSTNAME_LEN];
  43
  44struct smc_clc_eid_table {
  45	rwlock_t lock;
  46	struct list_head list;
  47	u8 ueid_cnt;
  48	u8 seid_enabled;
  49};
  50
  51static struct smc_clc_eid_table smc_clc_eid_table;
  52
  53struct smc_clc_eid_entry {
  54	struct list_head list;
  55	u8 eid[SMC_MAX_EID_LEN];
  56};
  57
  58/* The size of a user EID is 32 characters.
  59 * Valid characters should be (single-byte character set) A-Z, 0-9, '.' and '-'.
  60 * Blanks should only be used to pad to the expected size.
  61 * First character must be alphanumeric.
  62 */
  63static bool smc_clc_ueid_valid(char *ueid)
  64{
  65	char *end = ueid + SMC_MAX_EID_LEN;
  66
  67	while (--end >= ueid && isspace(*end))
  68		;
  69	if (end < ueid)
  70		return false;
  71	if (!isalnum(*ueid) || islower(*ueid))
  72		return false;
  73	while (ueid <= end) {
  74		if ((!isalnum(*ueid) || islower(*ueid)) && *ueid != '.' &&
  75		    *ueid != '-')
  76			return false;
  77		ueid++;
  78	}
  79	return true;
  80}
  81
  82static int smc_clc_ueid_add(char *ueid)
  83{
  84	struct smc_clc_eid_entry *new_ueid, *tmp_ueid;
  85	int rc;
  86
  87	if (!smc_clc_ueid_valid(ueid))
  88		return -EINVAL;
  89
  90	/* add a new ueid entry to the ueid table if there isn't one */
  91	new_ueid = kzalloc(sizeof(*new_ueid), GFP_KERNEL);
  92	if (!new_ueid)
  93		return -ENOMEM;
  94	memcpy(new_ueid->eid, ueid, SMC_MAX_EID_LEN);
  95
  96	write_lock(&smc_clc_eid_table.lock);
  97	if (smc_clc_eid_table.ueid_cnt >= SMC_MAX_UEID) {
  98		rc = -ERANGE;
  99		goto err_out;
 100	}
 101	list_for_each_entry(tmp_ueid, &smc_clc_eid_table.list, list) {
 102		if (!memcmp(tmp_ueid->eid, ueid, SMC_MAX_EID_LEN)) {
 103			rc = -EEXIST;
 104			goto err_out;
 105		}
 106	}
 107	list_add_tail(&new_ueid->list, &smc_clc_eid_table.list);
 108	smc_clc_eid_table.ueid_cnt++;
 109	write_unlock(&smc_clc_eid_table.lock);
 110	return 0;
 111
 112err_out:
 113	write_unlock(&smc_clc_eid_table.lock);
 114	kfree(new_ueid);
 115	return rc;
 116}
 117
 118int smc_clc_ueid_count(void)
 119{
 120	int count;
 121
 122	read_lock(&smc_clc_eid_table.lock);
 123	count = smc_clc_eid_table.ueid_cnt;
 124	read_unlock(&smc_clc_eid_table.lock);
 125
 126	return count;
 127}
 128
 129int smc_nl_add_ueid(struct sk_buff *skb, struct genl_info *info)
 130{
 131	struct nlattr *nla_ueid = info->attrs[SMC_NLA_EID_TABLE_ENTRY];
 132	char *ueid;
 133
 134	if (!nla_ueid || nla_len(nla_ueid) != SMC_MAX_EID_LEN + 1)
 135		return -EINVAL;
 136	ueid = (char *)nla_data(nla_ueid);
 137
 138	return smc_clc_ueid_add(ueid);
 139}
 140
 141/* remove one or all ueid entries from the table */
 142static int smc_clc_ueid_remove(char *ueid)
 143{
 144	struct smc_clc_eid_entry *lst_ueid, *tmp_ueid;
 145	int rc = -ENOENT;
 146
 147	/* remove table entry */
 148	write_lock(&smc_clc_eid_table.lock);
 149	list_for_each_entry_safe(lst_ueid, tmp_ueid, &smc_clc_eid_table.list,
 150				 list) {
 151		if (!ueid || !memcmp(lst_ueid->eid, ueid, SMC_MAX_EID_LEN)) {
 152			list_del(&lst_ueid->list);
 153			smc_clc_eid_table.ueid_cnt--;
 154			kfree(lst_ueid);
 155			rc = 0;
 156		}
 157	}
 158	if (!rc && !smc_clc_eid_table.ueid_cnt) {
 159		smc_clc_eid_table.seid_enabled = 1;
 160		rc = -EAGAIN;	/* indicate success and enabling of seid */
 161	}
 162	write_unlock(&smc_clc_eid_table.lock);
 163	return rc;
 164}
 165
 166int smc_nl_remove_ueid(struct sk_buff *skb, struct genl_info *info)
 167{
 168	struct nlattr *nla_ueid = info->attrs[SMC_NLA_EID_TABLE_ENTRY];
 169	char *ueid;
 170
 171	if (!nla_ueid || nla_len(nla_ueid) != SMC_MAX_EID_LEN + 1)
 172		return -EINVAL;
 173	ueid = (char *)nla_data(nla_ueid);
 174
 175	return smc_clc_ueid_remove(ueid);
 176}
 177
 178int smc_nl_flush_ueid(struct sk_buff *skb, struct genl_info *info)
 179{
 180	smc_clc_ueid_remove(NULL);
 181	return 0;
 182}
 183
 184static int smc_nl_ueid_dumpinfo(struct sk_buff *skb, u32 portid, u32 seq,
 185				u32 flags, char *ueid)
 186{
 187	char ueid_str[SMC_MAX_EID_LEN + 1];
 188	void *hdr;
 189
 190	hdr = genlmsg_put(skb, portid, seq, &smc_gen_nl_family,
 191			  flags, SMC_NETLINK_DUMP_UEID);
 192	if (!hdr)
 193		return -ENOMEM;
 194	memcpy(ueid_str, ueid, SMC_MAX_EID_LEN);
 195	ueid_str[SMC_MAX_EID_LEN] = 0;
 196	if (nla_put_string(skb, SMC_NLA_EID_TABLE_ENTRY, ueid_str)) {
 197		genlmsg_cancel(skb, hdr);
 198		return -EMSGSIZE;
 199	}
 200	genlmsg_end(skb, hdr);
 201	return 0;
 202}
 203
 204static int _smc_nl_ueid_dump(struct sk_buff *skb, u32 portid, u32 seq,
 205			     int start_idx)
 206{
 207	struct smc_clc_eid_entry *lst_ueid;
 208	int idx = 0;
 209
 210	read_lock(&smc_clc_eid_table.lock);
 211	list_for_each_entry(lst_ueid, &smc_clc_eid_table.list, list) {
 212		if (idx++ < start_idx)
 213			continue;
 214		if (smc_nl_ueid_dumpinfo(skb, portid, seq, NLM_F_MULTI,
 215					 lst_ueid->eid)) {
 216			--idx;
 217			break;
 218		}
 219	}
 220	read_unlock(&smc_clc_eid_table.lock);
 221	return idx;
 222}
 223
 224int smc_nl_dump_ueid(struct sk_buff *skb, struct netlink_callback *cb)
 225{
 226	struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
 227	int idx;
 228
 229	idx = _smc_nl_ueid_dump(skb, NETLINK_CB(cb->skb).portid,
 230				cb->nlh->nlmsg_seq, cb_ctx->pos[0]);
 231
 232	cb_ctx->pos[0] = idx;
 233	return skb->len;
 234}
 235
 236int smc_nl_dump_seid(struct sk_buff *skb, struct netlink_callback *cb)
 237{
 238	struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
 239	char seid_str[SMC_MAX_EID_LEN + 1];
 240	u8 seid_enabled;
 241	void *hdr;
 242	u8 *seid;
 243
 244	if (cb_ctx->pos[0])
 245		return skb->len;
 246
 247	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 248			  &smc_gen_nl_family, NLM_F_MULTI,
 249			  SMC_NETLINK_DUMP_SEID);
 250	if (!hdr)
 251		return -ENOMEM;
 252	if (!smc_ism_is_v2_capable())
 253		goto end;
 254
 255	smc_ism_get_system_eid(&seid);
 256	memcpy(seid_str, seid, SMC_MAX_EID_LEN);
 257	seid_str[SMC_MAX_EID_LEN] = 0;
 258	if (nla_put_string(skb, SMC_NLA_SEID_ENTRY, seid_str))
 259		goto err;
 260	read_lock(&smc_clc_eid_table.lock);
 261	seid_enabled = smc_clc_eid_table.seid_enabled;
 262	read_unlock(&smc_clc_eid_table.lock);
 263	if (nla_put_u8(skb, SMC_NLA_SEID_ENABLED, seid_enabled))
 264		goto err;
 265end:
 266	genlmsg_end(skb, hdr);
 267	cb_ctx->pos[0]++;
 268	return skb->len;
 269err:
 270	genlmsg_cancel(skb, hdr);
 271	return -EMSGSIZE;
 272}
 273
 274int smc_nl_enable_seid(struct sk_buff *skb, struct genl_info *info)
 275{
 276	write_lock(&smc_clc_eid_table.lock);
 277	smc_clc_eid_table.seid_enabled = 1;
 278	write_unlock(&smc_clc_eid_table.lock);
 279	return 0;
 280}
 281
 282int smc_nl_disable_seid(struct sk_buff *skb, struct genl_info *info)
 283{
 284	int rc = 0;
 285
 286	write_lock(&smc_clc_eid_table.lock);
 287	if (!smc_clc_eid_table.ueid_cnt)
 288		rc = -ENOENT;
 289	else
 290		smc_clc_eid_table.seid_enabled = 0;
 291	write_unlock(&smc_clc_eid_table.lock);
 292	return rc;
 293}
 294
 295static bool _smc_clc_match_ueid(u8 *peer_ueid)
 296{
 297	struct smc_clc_eid_entry *tmp_ueid;
 298
 299	list_for_each_entry(tmp_ueid, &smc_clc_eid_table.list, list) {
 300		if (!memcmp(tmp_ueid->eid, peer_ueid, SMC_MAX_EID_LEN))
 301			return true;
 302	}
 303	return false;
 304}
 305
 306bool smc_clc_match_eid(u8 *negotiated_eid,
 307		       struct smc_clc_v2_extension *smc_v2_ext,
 308		       u8 *peer_eid, u8 *local_eid)
 309{
 310	bool match = false;
 311	int i;
 312
 313	negotiated_eid[0] = 0;
 314	read_lock(&smc_clc_eid_table.lock);
 315	if (peer_eid && local_eid &&
 316	    smc_clc_eid_table.seid_enabled &&
 317	    smc_v2_ext->hdr.flag.seid &&
 318	    !memcmp(peer_eid, local_eid, SMC_MAX_EID_LEN)) {
 319		memcpy(negotiated_eid, peer_eid, SMC_MAX_EID_LEN);
 320		match = true;
 321		goto out;
 322	}
 323
 324	for (i = 0; i < smc_v2_ext->hdr.eid_cnt; i++) {
 325		if (_smc_clc_match_ueid(smc_v2_ext->user_eids[i])) {
 326			memcpy(negotiated_eid, smc_v2_ext->user_eids[i],
 327			       SMC_MAX_EID_LEN);
 328			match = true;
 329			goto out;
 330		}
 331	}
 332out:
 333	read_unlock(&smc_clc_eid_table.lock);
 334	return match;
 335}
 336
 337/* check arriving CLC proposal */
 338static bool smc_clc_msg_prop_valid(struct smc_clc_msg_proposal *pclc)
 339{
 340	struct smc_clc_msg_proposal_prefix *pclc_prfx;
 341	struct smc_clc_smcd_v2_extension *smcd_v2_ext;
 342	struct smc_clc_msg_hdr *hdr = &pclc->hdr;
 343	struct smc_clc_v2_extension *v2_ext;
 344
 345	v2_ext = smc_get_clc_v2_ext(pclc);
 346	pclc_prfx = smc_clc_proposal_get_prefix(pclc);
 347	if (hdr->version == SMC_V1) {
 348		if (hdr->typev1 == SMC_TYPE_N)
 349			return false;
 350		if (ntohs(hdr->length) !=
 351			sizeof(*pclc) + ntohs(pclc->iparea_offset) +
 352			sizeof(*pclc_prfx) +
 353			pclc_prfx->ipv6_prefixes_cnt *
 354				sizeof(struct smc_clc_ipv6_prefix) +
 355			sizeof(struct smc_clc_msg_trail))
 356			return false;
 357	} else {
 358		if (ntohs(hdr->length) !=
 359			sizeof(*pclc) +
 360			sizeof(struct smc_clc_msg_smcd) +
 361			(hdr->typev1 != SMC_TYPE_N ?
 362				sizeof(*pclc_prfx) +
 363				pclc_prfx->ipv6_prefixes_cnt *
 364				sizeof(struct smc_clc_ipv6_prefix) : 0) +
 365			(hdr->typev2 != SMC_TYPE_N ?
 366				sizeof(*v2_ext) +
 367				v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN : 0) +
 368			(smcd_indicated(hdr->typev2) ?
 369				sizeof(*smcd_v2_ext) + v2_ext->hdr.ism_gid_cnt *
 370					sizeof(struct smc_clc_smcd_gid_chid) :
 371				0) +
 372			sizeof(struct smc_clc_msg_trail))
 373			return false;
 374	}
 375	return true;
 376}
 377
 378/* check arriving CLC accept or confirm */
 379static bool
 380smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm_v2 *clc_v2)
 381{
 382	struct smc_clc_msg_hdr *hdr = &clc_v2->hdr;
 383
 384	if (hdr->typev1 != SMC_TYPE_R && hdr->typev1 != SMC_TYPE_D)
 385		return false;
 386	if (hdr->version == SMC_V1) {
 387		if ((hdr->typev1 == SMC_TYPE_R &&
 388		     ntohs(hdr->length) != SMCR_CLC_ACCEPT_CONFIRM_LEN) ||
 389		    (hdr->typev1 == SMC_TYPE_D &&
 390		     ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN))
 391			return false;
 392	} else {
 393		if (hdr->typev1 == SMC_TYPE_D &&
 394		    ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 &&
 395		    (ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 +
 396				sizeof(struct smc_clc_first_contact_ext)))
 397			return false;
 398		if (hdr->typev1 == SMC_TYPE_R &&
 399		    ntohs(hdr->length) < SMCR_CLC_ACCEPT_CONFIRM_LEN_V2)
 400			return false;
 401	}
 402	return true;
 403}
 404
 405/* check arriving CLC decline */
 406static bool
 407smc_clc_msg_decl_valid(struct smc_clc_msg_decline *dclc)
 408{
 409	struct smc_clc_msg_hdr *hdr = &dclc->hdr;
 410
 411	if (hdr->typev1 != SMC_TYPE_R && hdr->typev1 != SMC_TYPE_D)
 412		return false;
 413	if (hdr->version == SMC_V1) {
 414		if (ntohs(hdr->length) != sizeof(struct smc_clc_msg_decline))
 415			return false;
 416	} else {
 417		if (ntohs(hdr->length) != sizeof(struct smc_clc_msg_decline_v2))
 418			return false;
 419	}
 420	return true;
 421}
 422
 423static void smc_clc_fill_fce(struct smc_clc_first_contact_ext *fce, int *len)
 424{
 425	memset(fce, 0, sizeof(*fce));
 426	fce->os_type = SMC_CLC_OS_LINUX;
 427	fce->release = SMC_RELEASE;
 428	memcpy(fce->hostname, smc_hostname, sizeof(smc_hostname));
 429	(*len) += sizeof(*fce);
 430}
 431
 432/* check if received message has a correct header length and contains valid
 433 * heading and trailing eyecatchers
 434 */
 435static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl)
 436{
 437	struct smc_clc_msg_accept_confirm_v2 *clc_v2;
 438	struct smc_clc_msg_proposal *pclc;
 439	struct smc_clc_msg_decline *dclc;
 440	struct smc_clc_msg_trail *trl;
 441
 442	if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) &&
 443	    memcmp(clcm->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER)))
 444		return false;
 445	switch (clcm->type) {
 446	case SMC_CLC_PROPOSAL:
 447		pclc = (struct smc_clc_msg_proposal *)clcm;
 448		if (!smc_clc_msg_prop_valid(pclc))
 449			return false;
 450		trl = (struct smc_clc_msg_trail *)
 451			((u8 *)pclc + ntohs(pclc->hdr.length) - sizeof(*trl));
 452		break;
 453	case SMC_CLC_ACCEPT:
 454	case SMC_CLC_CONFIRM:
 455		clc_v2 = (struct smc_clc_msg_accept_confirm_v2 *)clcm;
 456		if (!smc_clc_msg_acc_conf_valid(clc_v2))
 457			return false;
 458		trl = (struct smc_clc_msg_trail *)
 459			((u8 *)clc_v2 + ntohs(clc_v2->hdr.length) -
 460							sizeof(*trl));
 461		break;
 462	case SMC_CLC_DECLINE:
 463		dclc = (struct smc_clc_msg_decline *)clcm;
 464		if (!smc_clc_msg_decl_valid(dclc))
 465			return false;
 466		check_trl = false;
 467		break;
 468	default:
 469		return false;
 470	}
 471	if (check_trl &&
 472	    memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) &&
 473	    memcmp(trl->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER)))
 474		return false;
 475	return true;
 476}
 477
 478/* find ipv4 addr on device and get the prefix len, fill CLC proposal msg */
 479static int smc_clc_prfx_set4_rcu(struct dst_entry *dst, __be32 ipv4,
 480				 struct smc_clc_msg_proposal_prefix *prop)
 481{
 482	struct in_device *in_dev = __in_dev_get_rcu(dst->dev);
 483	const struct in_ifaddr *ifa;
 484
 485	if (!in_dev)
 486		return -ENODEV;
 487
 488	in_dev_for_each_ifa_rcu(ifa, in_dev) {
 489		if (!inet_ifa_match(ipv4, ifa))
 490			continue;
 491		prop->prefix_len = inet_mask_len(ifa->ifa_mask);
 492		prop->outgoing_subnet = ifa->ifa_address & ifa->ifa_mask;
 493		/* prop->ipv6_prefixes_cnt = 0; already done by memset before */
 494		return 0;
 495	}
 496	return -ENOENT;
 497}
 498
 499/* fill CLC proposal msg with ipv6 prefixes from device */
 500static int smc_clc_prfx_set6_rcu(struct dst_entry *dst,
 501				 struct smc_clc_msg_proposal_prefix *prop,
 502				 struct smc_clc_ipv6_prefix *ipv6_prfx)
 503{
 504#if IS_ENABLED(CONFIG_IPV6)
 505	struct inet6_dev *in6_dev = __in6_dev_get(dst->dev);
 506	struct inet6_ifaddr *ifa;
 507	int cnt = 0;
 508
 509	if (!in6_dev)
 510		return -ENODEV;
 511	/* use a maximum of 8 IPv6 prefixes from device */
 512	list_for_each_entry(ifa, &in6_dev->addr_list, if_list) {
 513		if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)
 514			continue;
 515		ipv6_addr_prefix(&ipv6_prfx[cnt].prefix,
 516				 &ifa->addr, ifa->prefix_len);
 517		ipv6_prfx[cnt].prefix_len = ifa->prefix_len;
 518		cnt++;
 519		if (cnt == SMC_CLC_MAX_V6_PREFIX)
 520			break;
 521	}
 522	prop->ipv6_prefixes_cnt = cnt;
 523	if (cnt)
 524		return 0;
 525#endif
 526	return -ENOENT;
 527}
 528
 529/* retrieve and set prefixes in CLC proposal msg */
 530static int smc_clc_prfx_set(struct socket *clcsock,
 531			    struct smc_clc_msg_proposal_prefix *prop,
 532			    struct smc_clc_ipv6_prefix *ipv6_prfx)
 533{
 534	struct dst_entry *dst = sk_dst_get(clcsock->sk);
 535	struct sockaddr_storage addrs;
 536	struct sockaddr_in6 *addr6;
 537	struct sockaddr_in *addr;
 538	int rc = -ENOENT;
 539
 540	if (!dst) {
 541		rc = -ENOTCONN;
 542		goto out;
 543	}
 544	if (!dst->dev) {
 545		rc = -ENODEV;
 546		goto out_rel;
 547	}
 548	/* get address to which the internal TCP socket is bound */
 549	if (kernel_getsockname(clcsock, (struct sockaddr *)&addrs) < 0)
 550		goto out_rel;
 551	/* analyze IP specific data of net_device belonging to TCP socket */
 552	addr6 = (struct sockaddr_in6 *)&addrs;
 553	rcu_read_lock();
 554	if (addrs.ss_family == PF_INET) {
 555		/* IPv4 */
 556		addr = (struct sockaddr_in *)&addrs;
 557		rc = smc_clc_prfx_set4_rcu(dst, addr->sin_addr.s_addr, prop);
 558	} else if (ipv6_addr_v4mapped(&addr6->sin6_addr)) {
 559		/* mapped IPv4 address - peer is IPv4 only */
 560		rc = smc_clc_prfx_set4_rcu(dst, addr6->sin6_addr.s6_addr32[3],
 561					   prop);
 562	} else {
 563		/* IPv6 */
 564		rc = smc_clc_prfx_set6_rcu(dst, prop, ipv6_prfx);
 565	}
 566	rcu_read_unlock();
 567out_rel:
 568	dst_release(dst);
 569out:
 570	return rc;
 571}
 572
 573/* match ipv4 addrs of dev against addr in CLC proposal */
 574static int smc_clc_prfx_match4_rcu(struct net_device *dev,
 575				   struct smc_clc_msg_proposal_prefix *prop)
 576{
 577	struct in_device *in_dev = __in_dev_get_rcu(dev);
 578	const struct in_ifaddr *ifa;
 579
 580	if (!in_dev)
 581		return -ENODEV;
 582	in_dev_for_each_ifa_rcu(ifa, in_dev) {
 583		if (prop->prefix_len == inet_mask_len(ifa->ifa_mask) &&
 584		    inet_ifa_match(prop->outgoing_subnet, ifa))
 585			return 0;
 586	}
 587
 588	return -ENOENT;
 589}
 590
 591/* match ipv6 addrs of dev against addrs in CLC proposal */
 592static int smc_clc_prfx_match6_rcu(struct net_device *dev,
 593				   struct smc_clc_msg_proposal_prefix *prop)
 594{
 595#if IS_ENABLED(CONFIG_IPV6)
 596	struct inet6_dev *in6_dev = __in6_dev_get(dev);
 597	struct smc_clc_ipv6_prefix *ipv6_prfx;
 598	struct inet6_ifaddr *ifa;
 599	int i, max;
 600
 601	if (!in6_dev)
 602		return -ENODEV;
 603	/* ipv6 prefix list starts behind smc_clc_msg_proposal_prefix */
 604	ipv6_prfx = (struct smc_clc_ipv6_prefix *)((u8 *)prop + sizeof(*prop));
 605	max = min_t(u8, prop->ipv6_prefixes_cnt, SMC_CLC_MAX_V6_PREFIX);
 606	list_for_each_entry(ifa, &in6_dev->addr_list, if_list) {
 607		if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)
 608			continue;
 609		for (i = 0; i < max; i++) {
 610			if (ifa->prefix_len == ipv6_prfx[i].prefix_len &&
 611			    ipv6_prefix_equal(&ifa->addr, &ipv6_prfx[i].prefix,
 612					      ifa->prefix_len))
 613				return 0;
 614		}
 615	}
 616#endif
 617	return -ENOENT;
 618}
 619
 620/* check if proposed prefixes match one of our device prefixes */
 621int smc_clc_prfx_match(struct socket *clcsock,
 622		       struct smc_clc_msg_proposal_prefix *prop)
 623{
 624	struct dst_entry *dst = sk_dst_get(clcsock->sk);
 625	int rc;
 626
 627	if (!dst) {
 628		rc = -ENOTCONN;
 629		goto out;
 630	}
 631	if (!dst->dev) {
 632		rc = -ENODEV;
 633		goto out_rel;
 634	}
 635	rcu_read_lock();
 636	if (!prop->ipv6_prefixes_cnt)
 637		rc = smc_clc_prfx_match4_rcu(dst->dev, prop);
 638	else
 639		rc = smc_clc_prfx_match6_rcu(dst->dev, prop);
 640	rcu_read_unlock();
 641out_rel:
 642	dst_release(dst);
 643out:
 644	return rc;
 645}
 646
 647/* Wait for data on the tcp-socket, analyze received data
 648 * Returns:
 649 * 0 if success and it was not a decline that we received.
 650 * SMC_CLC_DECL_REPLY if decline received for fallback w/o another decl send.
 651 * clcsock error, -EINTR, -ECONNRESET, -EPROTO otherwise.
 652 */
 653int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
 654		     u8 expected_type, unsigned long timeout)
 655{
 656	long rcvtimeo = smc->clcsock->sk->sk_rcvtimeo;
 657	struct sock *clc_sk = smc->clcsock->sk;
 658	struct smc_clc_msg_hdr *clcm = buf;
 659	struct msghdr msg = {NULL, 0};
 660	int reason_code = 0;
 661	struct kvec vec = {buf, buflen};
 662	int len, datlen, recvlen;
 663	bool check_trl = true;
 664	int krflags;
 665
 666	/* peek the first few bytes to determine length of data to receive
 667	 * so we don't consume any subsequent CLC message or payload data
 668	 * in the TCP byte stream
 669	 */
 670	/*
 671	 * Caller must make sure that buflen is no less than
 672	 * sizeof(struct smc_clc_msg_hdr)
 673	 */
 674	krflags = MSG_PEEK | MSG_WAITALL;
 675	clc_sk->sk_rcvtimeo = timeout;
 676	iov_iter_kvec(&msg.msg_iter, ITER_DEST, &vec, 1,
 677			sizeof(struct smc_clc_msg_hdr));
 678	len = sock_recvmsg(smc->clcsock, &msg, krflags);
 679	if (signal_pending(current)) {
 680		reason_code = -EINTR;
 681		clc_sk->sk_err = EINTR;
 682		smc->sk.sk_err = EINTR;
 683		goto out;
 684	}
 685	if (clc_sk->sk_err) {
 686		reason_code = -clc_sk->sk_err;
 687		if (clc_sk->sk_err == EAGAIN &&
 688		    expected_type == SMC_CLC_DECLINE)
 689			clc_sk->sk_err = 0; /* reset for fallback usage */
 690		else
 691			smc->sk.sk_err = clc_sk->sk_err;
 692		goto out;
 693	}
 694	if (!len) { /* peer has performed orderly shutdown */
 695		smc->sk.sk_err = ECONNRESET;
 696		reason_code = -ECONNRESET;
 697		goto out;
 698	}
 699	if (len < 0) {
 700		if (len != -EAGAIN || expected_type != SMC_CLC_DECLINE)
 701			smc->sk.sk_err = -len;
 702		reason_code = len;
 703		goto out;
 704	}
 705	datlen = ntohs(clcm->length);
 706	if ((len < sizeof(struct smc_clc_msg_hdr)) ||
 707	    (clcm->version < SMC_V1) ||
 708	    ((clcm->type != SMC_CLC_DECLINE) &&
 709	     (clcm->type != expected_type))) {
 710		smc->sk.sk_err = EPROTO;
 711		reason_code = -EPROTO;
 712		goto out;
 713	}
 714
 715	/* receive the complete CLC message */
 716	memset(&msg, 0, sizeof(struct msghdr));
 717	if (datlen > buflen) {
 718		check_trl = false;
 719		recvlen = buflen;
 720	} else {
 721		recvlen = datlen;
 722	}
 723	iov_iter_kvec(&msg.msg_iter, ITER_DEST, &vec, 1, recvlen);
 724	krflags = MSG_WAITALL;
 725	len = sock_recvmsg(smc->clcsock, &msg, krflags);
 726	if (len < recvlen || !smc_clc_msg_hdr_valid(clcm, check_trl)) {
 727		smc->sk.sk_err = EPROTO;
 728		reason_code = -EPROTO;
 729		goto out;
 730	}
 731	datlen -= len;
 732	while (datlen) {
 733		u8 tmp[SMC_CLC_RECV_BUF_LEN];
 734
 735		vec.iov_base = &tmp;
 736		vec.iov_len = SMC_CLC_RECV_BUF_LEN;
 737		/* receive remaining proposal message */
 738		recvlen = datlen > SMC_CLC_RECV_BUF_LEN ?
 739						SMC_CLC_RECV_BUF_LEN : datlen;
 740		iov_iter_kvec(&msg.msg_iter, ITER_DEST, &vec, 1, recvlen);
 741		len = sock_recvmsg(smc->clcsock, &msg, krflags);
 742		datlen -= len;
 743	}
 744	if (clcm->type == SMC_CLC_DECLINE) {
 745		struct smc_clc_msg_decline *dclc;
 746
 747		dclc = (struct smc_clc_msg_decline *)clcm;
 748		reason_code = SMC_CLC_DECL_PEERDECL;
 749		smc->peer_diagnosis = ntohl(dclc->peer_diagnosis);
 750		if (((struct smc_clc_msg_decline *)buf)->hdr.typev2 &
 751						SMC_FIRST_CONTACT_MASK) {
 752			smc->conn.lgr->sync_err = 1;
 753			smc_lgr_terminate_sched(smc->conn.lgr);
 754		}
 755	}
 756
 757out:
 758	clc_sk->sk_rcvtimeo = rcvtimeo;
 759	return reason_code;
 760}
 761
 762/* send CLC DECLINE message across internal TCP socket */
 763int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version)
 764{
 765	struct smc_clc_msg_decline *dclc_v1;
 766	struct smc_clc_msg_decline_v2 dclc;
 767	struct msghdr msg;
 768	int len, send_len;
 769	struct kvec vec;
 
 770
 771	dclc_v1 = (struct smc_clc_msg_decline *)&dclc;
 772	memset(&dclc, 0, sizeof(dclc));
 773	memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
 774	dclc.hdr.type = SMC_CLC_DECLINE;
 
 775	dclc.hdr.version = version;
 776	dclc.os_type = version == SMC_V1 ? 0 : SMC_CLC_OS_LINUX;
 777	dclc.hdr.typev2 = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ?
 778						SMC_FIRST_CONTACT_MASK : 0;
 779	if ((!smc_conn_lgr_valid(&smc->conn) || !smc->conn.lgr->is_smcd) &&
 780	    smc_ib_is_valid_local_systemid())
 781		memcpy(dclc.id_for_peer, local_systemid,
 782		       sizeof(local_systemid));
 783	dclc.peer_diagnosis = htonl(peer_diag_info);
 784	if (version == SMC_V1) {
 785		memcpy(dclc_v1->trl.eyecatcher, SMC_EYECATCHER,
 786		       sizeof(SMC_EYECATCHER));
 787		send_len = sizeof(*dclc_v1);
 788	} else {
 789		memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER,
 790		       sizeof(SMC_EYECATCHER));
 791		send_len = sizeof(dclc);
 792	}
 793	dclc.hdr.length = htons(send_len);
 794
 795	memset(&msg, 0, sizeof(msg));
 796	vec.iov_base = &dclc;
 797	vec.iov_len = send_len;
 798	len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, send_len);
 799	if (len < 0 || len < send_len)
 
 800		len = -EPROTO;
 801	return len > 0 ? 0 : len;
 802}
 803
 804/* send CLC PROPOSAL message across internal TCP socket */
 805int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
 806{
 807	struct smc_clc_smcd_v2_extension *smcd_v2_ext;
 808	struct smc_clc_msg_proposal_prefix *pclc_prfx;
 809	struct smc_clc_msg_proposal *pclc_base;
 810	struct smc_clc_smcd_gid_chid *gidchids;
 811	struct smc_clc_msg_proposal_area *pclc;
 812	struct smc_clc_ipv6_prefix *ipv6_prfx;
 813	struct smc_clc_v2_extension *v2_ext;
 814	struct smc_clc_msg_smcd *pclc_smcd;
 815	struct smc_clc_msg_trail *trl;
 816	int len, i, plen, rc;
 817	int reason_code = 0;
 818	struct kvec vec[8];
 819	struct msghdr msg;
 820
 821	pclc = kzalloc(sizeof(*pclc), GFP_KERNEL);
 822	if (!pclc)
 823		return -ENOMEM;
 824
 825	pclc_base = &pclc->pclc_base;
 826	pclc_smcd = &pclc->pclc_smcd;
 827	pclc_prfx = &pclc->pclc_prfx;
 828	ipv6_prfx = pclc->pclc_prfx_ipv6;
 829	v2_ext = &pclc->pclc_v2_ext;
 830	smcd_v2_ext = &pclc->pclc_smcd_v2_ext;
 831	gidchids = pclc->pclc_gidchids;
 832	trl = &pclc->pclc_trl;
 833
 834	pclc_base->hdr.version = SMC_V2;
 835	pclc_base->hdr.typev1 = ini->smc_type_v1;
 836	pclc_base->hdr.typev2 = ini->smc_type_v2;
 837	plen = sizeof(*pclc_base) + sizeof(*pclc_smcd) + sizeof(*trl);
 838
 839	/* retrieve ip prefixes for CLC proposal msg */
 840	if (ini->smc_type_v1 != SMC_TYPE_N) {
 841		rc = smc_clc_prfx_set(smc->clcsock, pclc_prfx, ipv6_prfx);
 842		if (rc) {
 843			if (ini->smc_type_v2 == SMC_TYPE_N) {
 844				kfree(pclc);
 845				return SMC_CLC_DECL_CNFERR;
 846			}
 847			pclc_base->hdr.typev1 = SMC_TYPE_N;
 848		} else {
 849			pclc_base->iparea_offset = htons(sizeof(*pclc_smcd));
 850			plen += sizeof(*pclc_prfx) +
 851					pclc_prfx->ipv6_prefixes_cnt *
 852					sizeof(ipv6_prfx[0]);
 853		}
 854	}
 855
 856	/* build SMC Proposal CLC message */
 857	memcpy(pclc_base->hdr.eyecatcher, SMC_EYECATCHER,
 858	       sizeof(SMC_EYECATCHER));
 859	pclc_base->hdr.type = SMC_CLC_PROPOSAL;
 860	if (smcr_indicated(ini->smc_type_v1)) {
 861		/* add SMC-R specifics */
 862		memcpy(pclc_base->lcl.id_for_peer, local_systemid,
 863		       sizeof(local_systemid));
 864		memcpy(pclc_base->lcl.gid, ini->ib_gid, SMC_GID_SIZE);
 865		memcpy(pclc_base->lcl.mac, &ini->ib_dev->mac[ini->ib_port - 1],
 866		       ETH_ALEN);
 867	}
 868	if (smcd_indicated(ini->smc_type_v1)) {
 869		/* add SMC-D specifics */
 870		if (ini->ism_dev[0]) {
 871			pclc_smcd->ism.gid = htonll(ini->ism_dev[0]->local_gid);
 872			pclc_smcd->ism.chid =
 873				htons(smc_ism_get_chid(ini->ism_dev[0]));
 874		}
 875	}
 876	if (ini->smc_type_v2 == SMC_TYPE_N) {
 877		pclc_smcd->v2_ext_offset = 0;
 878	} else {
 879		struct smc_clc_eid_entry *ueident;
 880		u16 v2_ext_offset;
 
 881
 882		v2_ext->hdr.flag.release = SMC_RELEASE;
 883		v2_ext_offset = sizeof(*pclc_smcd) -
 884			offsetofend(struct smc_clc_msg_smcd, v2_ext_offset);
 885		if (ini->smc_type_v1 != SMC_TYPE_N)
 886			v2_ext_offset += sizeof(*pclc_prfx) +
 887						pclc_prfx->ipv6_prefixes_cnt *
 888						sizeof(ipv6_prfx[0]);
 889		pclc_smcd->v2_ext_offset = htons(v2_ext_offset);
 890		plen += sizeof(*v2_ext);
 891
 892		read_lock(&smc_clc_eid_table.lock);
 893		v2_ext->hdr.eid_cnt = smc_clc_eid_table.ueid_cnt;
 894		plen += smc_clc_eid_table.ueid_cnt * SMC_MAX_EID_LEN;
 895		i = 0;
 896		list_for_each_entry(ueident, &smc_clc_eid_table.list, list) {
 897			memcpy(v2_ext->user_eids[i++], ueident->eid,
 898			       sizeof(ueident->eid));
 899		}
 900		read_unlock(&smc_clc_eid_table.lock);
 901	}
 902	if (smcd_indicated(ini->smc_type_v2)) {
 903		u8 *eid = NULL;
 904
 905		v2_ext->hdr.flag.seid = smc_clc_eid_table.seid_enabled;
 906		v2_ext->hdr.ism_gid_cnt = ini->ism_offered_cnt;
 
 
 907		v2_ext->hdr.smcd_v2_ext_offset = htons(sizeof(*v2_ext) -
 908				offsetofend(struct smc_clnt_opts_area_hdr,
 909					    smcd_v2_ext_offset) +
 910				v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN);
 911		smc_ism_get_system_eid(&eid);
 912		if (eid && v2_ext->hdr.flag.seid)
 
 
 
 913			memcpy(smcd_v2_ext->system_eid, eid, SMC_MAX_EID_LEN);
 914		plen += sizeof(*smcd_v2_ext);
 915		if (ini->ism_offered_cnt) {
 916			for (i = 1; i <= ini->ism_offered_cnt; i++) {
 917				gidchids[i - 1].gid =
 918					htonll(ini->ism_dev[i]->local_gid);
 919				gidchids[i - 1].chid =
 920					htons(smc_ism_get_chid(ini->ism_dev[i]));
 921			}
 922			plen += ini->ism_offered_cnt *
 923				sizeof(struct smc_clc_smcd_gid_chid);
 924		}
 925	}
 926	if (smcr_indicated(ini->smc_type_v2))
 927		memcpy(v2_ext->roce, ini->smcrv2.ib_gid_v2, SMC_GID_SIZE);
 928
 929	pclc_base->hdr.length = htons(plen);
 930	memcpy(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
 931
 932	/* send SMC Proposal CLC message */
 933	memset(&msg, 0, sizeof(msg));
 934	i = 0;
 935	vec[i].iov_base = pclc_base;
 936	vec[i++].iov_len = sizeof(*pclc_base);
 937	vec[i].iov_base = pclc_smcd;
 938	vec[i++].iov_len = sizeof(*pclc_smcd);
 939	if (ini->smc_type_v1 != SMC_TYPE_N) {
 940		vec[i].iov_base = pclc_prfx;
 941		vec[i++].iov_len = sizeof(*pclc_prfx);
 942		if (pclc_prfx->ipv6_prefixes_cnt > 0) {
 943			vec[i].iov_base = ipv6_prfx;
 944			vec[i++].iov_len = pclc_prfx->ipv6_prefixes_cnt *
 945					   sizeof(ipv6_prfx[0]);
 946		}
 947	}
 948	if (ini->smc_type_v2 != SMC_TYPE_N) {
 949		vec[i].iov_base = v2_ext;
 950		vec[i++].iov_len = sizeof(*v2_ext) +
 951				   (v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN);
 952		if (smcd_indicated(ini->smc_type_v2)) {
 953			vec[i].iov_base = smcd_v2_ext;
 954			vec[i++].iov_len = sizeof(*smcd_v2_ext);
 955			if (ini->ism_offered_cnt) {
 956				vec[i].iov_base = gidchids;
 957				vec[i++].iov_len = ini->ism_offered_cnt *
 958					sizeof(struct smc_clc_smcd_gid_chid);
 959			}
 960		}
 961	}
 962	vec[i].iov_base = trl;
 963	vec[i++].iov_len = sizeof(*trl);
 964	/* due to the few bytes needed for clc-handshake this cannot block */
 965	len = kernel_sendmsg(smc->clcsock, &msg, vec, i, plen);
 966	if (len < 0) {
 967		smc->sk.sk_err = smc->clcsock->sk->sk_err;
 968		reason_code = -smc->sk.sk_err;
 969	} else if (len < ntohs(pclc_base->hdr.length)) {
 970		reason_code = -ENETUNREACH;
 971		smc->sk.sk_err = -reason_code;
 972	}
 973
 974	kfree(pclc);
 975	return reason_code;
 976}
 977
 978/* build and send CLC CONFIRM / ACCEPT message */
 979static int smc_clc_send_confirm_accept(struct smc_sock *smc,
 980				       struct smc_clc_msg_accept_confirm_v2 *clc_v2,
 981				       int first_contact, u8 version,
 982				       u8 *eid, struct smc_init_info *ini)
 983{
 984	struct smc_connection *conn = &smc->conn;
 985	struct smc_clc_msg_accept_confirm *clc;
 986	struct smc_clc_first_contact_ext fce;
 987	struct smc_clc_fce_gid_ext gle;
 988	struct smc_clc_msg_trail trl;
 989	struct kvec vec[5];
 990	struct msghdr msg;
 991	int i, len;
 992
 993	/* send SMC Confirm CLC msg */
 994	clc = (struct smc_clc_msg_accept_confirm *)clc_v2;
 995	clc->hdr.version = version;	/* SMC version */
 996	if (first_contact)
 997		clc->hdr.typev2 |= SMC_FIRST_CONTACT_MASK;
 998	if (conn->lgr->is_smcd) {
 999		/* SMC-D specific settings */
1000		memcpy(clc->hdr.eyecatcher, SMCD_EYECATCHER,
1001		       sizeof(SMCD_EYECATCHER));
1002		clc->hdr.typev1 = SMC_TYPE_D;
1003		clc->d0.gid = conn->lgr->smcd->local_gid;
1004		clc->d0.token = conn->rmb_desc->token;
1005		clc->d0.dmbe_size = conn->rmbe_size_short;
1006		clc->d0.dmbe_idx = 0;
1007		memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE);
1008		if (version == SMC_V1) {
1009			clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN);
1010		} else {
1011			clc_v2->d1.chid =
1012				htons(smc_ism_get_chid(conn->lgr->smcd));
1013			if (eid && eid[0])
1014				memcpy(clc_v2->d1.eid, eid, SMC_MAX_EID_LEN);
 
 
1015			len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2;
1016			if (first_contact)
1017				smc_clc_fill_fce(&fce, &len);
1018			clc_v2->hdr.length = htons(len);
1019		}
1020		memcpy(trl.eyecatcher, SMCD_EYECATCHER,
1021		       sizeof(SMCD_EYECATCHER));
1022	} else {
1023		struct smc_link *link = conn->lnk;
1024
1025		/* SMC-R specific settings */
 
1026		memcpy(clc->hdr.eyecatcher, SMC_EYECATCHER,
1027		       sizeof(SMC_EYECATCHER));
1028		clc->hdr.typev1 = SMC_TYPE_R;
1029		clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);
1030		memcpy(clc->r0.lcl.id_for_peer, local_systemid,
1031		       sizeof(local_systemid));
1032		memcpy(&clc->r0.lcl.gid, link->gid, SMC_GID_SIZE);
1033		memcpy(&clc->r0.lcl.mac, &link->smcibdev->mac[link->ibport - 1],
1034		       ETH_ALEN);
1035		hton24(clc->r0.qpn, link->roce_qp->qp_num);
1036		clc->r0.rmb_rkey =
1037			htonl(conn->rmb_desc->mr[link->link_idx]->rkey);
1038		clc->r0.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */
1039		clc->r0.rmbe_alert_token = htonl(conn->alert_token_local);
1040		switch (clc->hdr.type) {
1041		case SMC_CLC_ACCEPT:
1042			clc->r0.qp_mtu = link->path_mtu;
1043			break;
1044		case SMC_CLC_CONFIRM:
1045			clc->r0.qp_mtu = min(link->path_mtu, link->peer_mtu);
1046			break;
1047		}
1048		clc->r0.rmbe_size = conn->rmbe_size_short;
1049		clc->r0.rmb_dma_addr = conn->rmb_desc->is_vm ?
1050			cpu_to_be64((uintptr_t)conn->rmb_desc->cpu_addr) :
1051			cpu_to_be64((u64)sg_dma_address
1052				    (conn->rmb_desc->sgt[link->link_idx].sgl));
1053		hton24(clc->r0.psn, link->psn_initial);
1054		if (version == SMC_V1) {
1055			clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);
1056		} else {
1057			if (eid && eid[0])
1058				memcpy(clc_v2->r1.eid, eid, SMC_MAX_EID_LEN);
1059			len = SMCR_CLC_ACCEPT_CONFIRM_LEN_V2;
1060			if (first_contact) {
1061				smc_clc_fill_fce(&fce, &len);
1062				fce.v2_direct = !link->lgr->uses_gateway;
1063				memset(&gle, 0, sizeof(gle));
1064				if (ini && clc->hdr.type == SMC_CLC_CONFIRM) {
1065					gle.gid_cnt = ini->smcrv2.gidlist.len;
1066					len += sizeof(gle);
1067					len += gle.gid_cnt * sizeof(gle.gid[0]);
1068				} else {
1069					len += sizeof(gle.reserved);
1070				}
1071			}
1072			clc_v2->hdr.length = htons(len);
1073		}
1074		memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
1075	}
1076
1077	memset(&msg, 0, sizeof(msg));
1078	i = 0;
1079	vec[i].iov_base = clc_v2;
1080	if (version > SMC_V1)
1081		vec[i++].iov_len = (clc->hdr.typev1 == SMC_TYPE_D ?
1082					SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 :
1083					SMCR_CLC_ACCEPT_CONFIRM_LEN_V2) -
1084				   sizeof(trl);
1085	else
1086		vec[i++].iov_len = (clc->hdr.typev1 == SMC_TYPE_D ?
1087						SMCD_CLC_ACCEPT_CONFIRM_LEN :
1088						SMCR_CLC_ACCEPT_CONFIRM_LEN) -
1089				   sizeof(trl);
1090	if (version > SMC_V1 && first_contact) {
1091		vec[i].iov_base = &fce;
1092		vec[i++].iov_len = sizeof(fce);
1093		if (!conn->lgr->is_smcd) {
1094			if (clc->hdr.type == SMC_CLC_CONFIRM) {
1095				vec[i].iov_base = &gle;
1096				vec[i++].iov_len = sizeof(gle);
1097				vec[i].iov_base = &ini->smcrv2.gidlist.list;
1098				vec[i++].iov_len = gle.gid_cnt *
1099						   sizeof(gle.gid[0]);
1100			} else {
1101				vec[i].iov_base = &gle.reserved;
1102				vec[i++].iov_len = sizeof(gle.reserved);
1103			}
1104		}
1105	}
1106	vec[i].iov_base = &trl;
1107	vec[i++].iov_len = sizeof(trl);
1108	return kernel_sendmsg(smc->clcsock, &msg, vec, 1,
1109			      ntohs(clc->hdr.length));
1110}
1111
1112/* send CLC CONFIRM message across internal TCP socket */
1113int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
1114			 u8 version, u8 *eid, struct smc_init_info *ini)
1115{
1116	struct smc_clc_msg_accept_confirm_v2 cclc_v2;
1117	int reason_code = 0;
1118	int len;
1119
1120	/* send SMC Confirm CLC msg */
1121	memset(&cclc_v2, 0, sizeof(cclc_v2));
1122	cclc_v2.hdr.type = SMC_CLC_CONFIRM;
1123	len = smc_clc_send_confirm_accept(smc, &cclc_v2, clnt_first_contact,
1124					  version, eid, ini);
1125	if (len < ntohs(cclc_v2.hdr.length)) {
1126		if (len >= 0) {
1127			reason_code = -ENETUNREACH;
1128			smc->sk.sk_err = -reason_code;
1129		} else {
1130			smc->sk.sk_err = smc->clcsock->sk->sk_err;
1131			reason_code = -smc->sk.sk_err;
1132		}
1133	}
1134	return reason_code;
1135}
1136
1137/* send CLC ACCEPT message across internal TCP socket */
1138int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact,
1139			u8 version, u8 *negotiated_eid)
1140{
1141	struct smc_clc_msg_accept_confirm_v2 aclc_v2;
1142	int len;
1143
1144	memset(&aclc_v2, 0, sizeof(aclc_v2));
1145	aclc_v2.hdr.type = SMC_CLC_ACCEPT;
1146	len = smc_clc_send_confirm_accept(new_smc, &aclc_v2, srv_first_contact,
1147					  version, negotiated_eid, NULL);
1148	if (len < ntohs(aclc_v2.hdr.length))
1149		len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err;
1150
1151	return len > 0 ? 0 : len;
1152}
1153
1154void smc_clc_get_hostname(u8 **host)
1155{
1156	*host = &smc_hostname[0];
1157}
1158
1159void __init smc_clc_init(void)
1160{
1161	struct new_utsname *u;
1162
1163	memset(smc_hostname, _S, sizeof(smc_hostname)); /* ASCII blanks */
1164	u = utsname();
1165	memcpy(smc_hostname, u->nodename,
1166	       min_t(size_t, strlen(u->nodename), sizeof(smc_hostname)));
1167
1168	INIT_LIST_HEAD(&smc_clc_eid_table.list);
1169	rwlock_init(&smc_clc_eid_table.lock);
1170	smc_clc_eid_table.ueid_cnt = 0;
1171	smc_clc_eid_table.seid_enabled = 1;
1172}
1173
1174void smc_clc_exit(void)
1175{
1176	smc_clc_ueid_remove(NULL);
1177}