Linux Audio

Check our new training course

Loading...
v6.13.7
   1// SPDX-License-Identifier: GPL-2.0
   2/* Multipath TCP
   3 *
   4 * Copyright (c) 2021, Red Hat.
   5 */
   6
   7#define pr_fmt(fmt) "MPTCP: " fmt
   8
   9#include <linux/kernel.h>
  10#include <linux/module.h>
  11#include <net/sock.h>
  12#include <net/protocol.h>
  13#include <net/tcp.h>
  14#include <net/mptcp.h>
  15#include "protocol.h"
  16
  17#define MIN_INFO_OPTLEN_SIZE		16
  18#define MIN_FULL_INFO_OPTLEN_SIZE	40
  19
  20static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk)
  21{
  22	msk_owned_by_me(msk);
  23
  24	if (likely(!__mptcp_check_fallback(msk)))
  25		return NULL;
  26
  27	return msk->first;
  28}
  29
  30static u32 sockopt_seq_reset(const struct sock *sk)
  31{
  32	sock_owned_by_me(sk);
  33
  34	/* Highbits contain state.  Allows to distinguish sockopt_seq
  35	 * of listener and established:
  36	 * s0 = new_listener()
  37	 * sockopt(s0) - seq is 1
  38	 * s1 = accept(s0) - s1 inherits seq 1 if listener sk (s0)
  39	 * sockopt(s0) - seq increments to 2 on s0
  40	 * sockopt(s1) // seq increments to 2 on s1 (different option)
  41	 * new ssk completes join, inherits options from s0 // seq 2
  42	 * Needs sync from mptcp join logic, but ssk->seq == msk->seq
  43	 *
  44	 * Set High order bits to sk_state so ssk->seq == msk->seq test
  45	 * will fail.
  46	 */
  47
  48	return (u32)sk->sk_state << 24u;
  49}
  50
  51static void sockopt_seq_inc(struct mptcp_sock *msk)
  52{
  53	u32 seq = (msk->setsockopt_seq + 1) & 0x00ffffff;
  54
  55	msk->setsockopt_seq = sockopt_seq_reset((struct sock *)msk) + seq;
  56}
  57
  58static int mptcp_get_int_option(struct mptcp_sock *msk, sockptr_t optval,
  59				unsigned int optlen, int *val)
  60{
  61	if (optlen < sizeof(int))
  62		return -EINVAL;
  63
  64	if (copy_from_sockptr(val, optval, sizeof(*val)))
  65		return -EFAULT;
  66
  67	return 0;
  68}
  69
  70static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, int val)
  71{
  72	struct mptcp_subflow_context *subflow;
  73	struct sock *sk = (struct sock *)msk;
  74
  75	lock_sock(sk);
  76	sockopt_seq_inc(msk);
  77
  78	mptcp_for_each_subflow(msk, subflow) {
  79		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
  80		bool slow = lock_sock_fast(ssk);
  81
  82		switch (optname) {
  83		case SO_DEBUG:
  84			sock_valbool_flag(ssk, SOCK_DBG, !!val);
  85			break;
  86		case SO_KEEPALIVE:
  87			if (ssk->sk_prot->keepalive)
  88				ssk->sk_prot->keepalive(ssk, !!val);
  89			sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val);
  90			break;
  91		case SO_PRIORITY:
  92			WRITE_ONCE(ssk->sk_priority, val);
  93			break;
  94		case SO_SNDBUF:
  95		case SO_SNDBUFFORCE:
  96			ssk->sk_userlocks |= SOCK_SNDBUF_LOCK;
  97			WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf);
  98			mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf;
  99			break;
 100		case SO_RCVBUF:
 101		case SO_RCVBUFFORCE:
 102			ssk->sk_userlocks |= SOCK_RCVBUF_LOCK;
 103			WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf);
 104			break;
 105		case SO_MARK:
 106			if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) {
 107				WRITE_ONCE(ssk->sk_mark, sk->sk_mark);
 108				sk_dst_reset(ssk);
 109			}
 110			break;
 111		case SO_INCOMING_CPU:
 112			WRITE_ONCE(ssk->sk_incoming_cpu, val);
 113			break;
 114		}
 115
 116		subflow->setsockopt_seq = msk->setsockopt_seq;
 117		unlock_sock_fast(ssk, slow);
 118	}
 119
 120	release_sock(sk);
 121}
 122
 123static int mptcp_sol_socket_intval(struct mptcp_sock *msk, int optname, int val)
 124{
 125	sockptr_t optval = KERNEL_SOCKPTR(&val);
 126	struct sock *sk = (struct sock *)msk;
 127	int ret;
 128
 129	ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname,
 130			      optval, sizeof(val));
 131	if (ret)
 132		return ret;
 133
 134	mptcp_sol_socket_sync_intval(msk, optname, val);
 135	return 0;
 136}
 137
 138static void mptcp_so_incoming_cpu(struct mptcp_sock *msk, int val)
 139{
 140	struct sock *sk = (struct sock *)msk;
 141
 142	WRITE_ONCE(sk->sk_incoming_cpu, val);
 143
 144	mptcp_sol_socket_sync_intval(msk, SO_INCOMING_CPU, val);
 145}
 146
 147static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optname, int val)
 148{
 149	sockptr_t optval = KERNEL_SOCKPTR(&val);
 150	struct mptcp_subflow_context *subflow;
 151	struct sock *sk = (struct sock *)msk;
 152	int ret;
 153
 154	ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname,
 155			      optval, sizeof(val));
 156	if (ret)
 157		return ret;
 158
 159	lock_sock(sk);
 160	mptcp_for_each_subflow(msk, subflow) {
 161		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 162		bool slow = lock_sock_fast(ssk);
 163
 164		sock_set_timestamp(sk, optname, !!val);
 165		unlock_sock_fast(ssk, slow);
 166	}
 167
 168	release_sock(sk);
 169	return 0;
 170}
 171
 172static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname,
 173					   sockptr_t optval,
 174					   unsigned int optlen)
 175{
 176	int val, ret;
 177
 178	ret = mptcp_get_int_option(msk, optval, optlen, &val);
 179	if (ret)
 180		return ret;
 181
 182	switch (optname) {
 183	case SO_KEEPALIVE:
 
 
 184	case SO_DEBUG:
 185	case SO_MARK:
 186	case SO_PRIORITY:
 187	case SO_SNDBUF:
 188	case SO_SNDBUFFORCE:
 189	case SO_RCVBUF:
 190	case SO_RCVBUFFORCE:
 191		return mptcp_sol_socket_intval(msk, optname, val);
 192	case SO_INCOMING_CPU:
 193		mptcp_so_incoming_cpu(msk, val);
 194		return 0;
 195	case SO_TIMESTAMP_OLD:
 196	case SO_TIMESTAMP_NEW:
 197	case SO_TIMESTAMPNS_OLD:
 198	case SO_TIMESTAMPNS_NEW:
 199		return mptcp_setsockopt_sol_socket_tstamp(msk, optname, val);
 200	}
 201
 202	return -ENOPROTOOPT;
 203}
 204
 205static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk,
 206						    int optname,
 207						    sockptr_t optval,
 208						    unsigned int optlen)
 209{
 210	struct mptcp_subflow_context *subflow;
 211	struct sock *sk = (struct sock *)msk;
 212	struct so_timestamping timestamping;
 213	int ret;
 214
 215	if (optlen == sizeof(timestamping)) {
 216		if (copy_from_sockptr(&timestamping, optval,
 217				      sizeof(timestamping)))
 218			return -EFAULT;
 219	} else if (optlen == sizeof(int)) {
 220		memset(&timestamping, 0, sizeof(timestamping));
 221
 222		if (copy_from_sockptr(&timestamping.flags, optval, sizeof(int)))
 223			return -EFAULT;
 224	} else {
 225		return -EINVAL;
 226	}
 227
 228	ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname,
 229			      KERNEL_SOCKPTR(&timestamping),
 230			      sizeof(timestamping));
 231	if (ret)
 232		return ret;
 233
 234	lock_sock(sk);
 235
 236	mptcp_for_each_subflow(msk, subflow) {
 237		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 238		bool slow = lock_sock_fast(ssk);
 239
 240		sock_set_timestamping(sk, optname, timestamping);
 241		unlock_sock_fast(ssk, slow);
 242	}
 243
 244	release_sock(sk);
 245
 246	return 0;
 247}
 248
 249static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval,
 250					      unsigned int optlen)
 251{
 252	struct mptcp_subflow_context *subflow;
 253	struct sock *sk = (struct sock *)msk;
 254	struct linger ling;
 255	sockptr_t kopt;
 256	int ret;
 257
 258	if (optlen < sizeof(ling))
 259		return -EINVAL;
 260
 261	if (copy_from_sockptr(&ling, optval, sizeof(ling)))
 262		return -EFAULT;
 263
 264	kopt = KERNEL_SOCKPTR(&ling);
 265	ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, SO_LINGER, kopt, sizeof(ling));
 266	if (ret)
 267		return ret;
 268
 269	lock_sock(sk);
 270	sockopt_seq_inc(msk);
 271	mptcp_for_each_subflow(msk, subflow) {
 272		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 273		bool slow = lock_sock_fast(ssk);
 274
 275		if (!ling.l_onoff) {
 276			sock_reset_flag(ssk, SOCK_LINGER);
 277		} else {
 278			ssk->sk_lingertime = sk->sk_lingertime;
 279			sock_set_flag(ssk, SOCK_LINGER);
 280		}
 281
 282		subflow->setsockopt_seq = msk->setsockopt_seq;
 283		unlock_sock_fast(ssk, slow);
 284	}
 285
 286	release_sock(sk);
 287	return 0;
 288}
 289
 290static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
 291				       sockptr_t optval, unsigned int optlen)
 292{
 293	struct sock *sk = (struct sock *)msk;
 294	struct sock *ssk;
 295	int ret;
 296
 297	switch (optname) {
 298	case SO_REUSEPORT:
 299	case SO_REUSEADDR:
 300	case SO_BINDTODEVICE:
 301	case SO_BINDTOIFINDEX:
 302		lock_sock(sk);
 303		ssk = __mptcp_nmpc_sk(msk);
 304		if (IS_ERR(ssk)) {
 305			release_sock(sk);
 306			return PTR_ERR(ssk);
 307		}
 308
 309		ret = sk_setsockopt(ssk, SOL_SOCKET, optname, optval, optlen);
 310		if (ret == 0) {
 311			if (optname == SO_REUSEPORT)
 312				sk->sk_reuseport = ssk->sk_reuseport;
 313			else if (optname == SO_REUSEADDR)
 314				sk->sk_reuse = ssk->sk_reuse;
 315			else if (optname == SO_BINDTODEVICE)
 316				sk->sk_bound_dev_if = ssk->sk_bound_dev_if;
 317			else if (optname == SO_BINDTOIFINDEX)
 318				sk->sk_bound_dev_if = ssk->sk_bound_dev_if;
 319		}
 320		release_sock(sk);
 321		return ret;
 322	case SO_KEEPALIVE:
 323	case SO_PRIORITY:
 324	case SO_SNDBUF:
 325	case SO_SNDBUFFORCE:
 326	case SO_RCVBUF:
 327	case SO_RCVBUFFORCE:
 328	case SO_MARK:
 329	case SO_INCOMING_CPU:
 330	case SO_DEBUG:
 331	case SO_TIMESTAMP_OLD:
 332	case SO_TIMESTAMP_NEW:
 333	case SO_TIMESTAMPNS_OLD:
 334	case SO_TIMESTAMPNS_NEW:
 335		return mptcp_setsockopt_sol_socket_int(msk, optname, optval,
 336						       optlen);
 337	case SO_TIMESTAMPING_OLD:
 338	case SO_TIMESTAMPING_NEW:
 339		return mptcp_setsockopt_sol_socket_timestamping(msk, optname,
 340								optval, optlen);
 341	case SO_LINGER:
 342		return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen);
 343	case SO_RCVLOWAT:
 344	case SO_RCVTIMEO_OLD:
 345	case SO_RCVTIMEO_NEW:
 346	case SO_SNDTIMEO_OLD:
 347	case SO_SNDTIMEO_NEW:
 348	case SO_BUSY_POLL:
 349	case SO_PREFER_BUSY_POLL:
 350	case SO_BUSY_POLL_BUDGET:
 351		/* No need to copy: only relevant for msk */
 352		return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen);
 353	case SO_NO_CHECK:
 354	case SO_DONTROUTE:
 355	case SO_BROADCAST:
 356	case SO_BSDCOMPAT:
 357	case SO_PASSCRED:
 358	case SO_PASSPIDFD:
 359	case SO_PASSSEC:
 360	case SO_RXQ_OVFL:
 361	case SO_WIFI_STATUS:
 362	case SO_NOFCS:
 363	case SO_SELECT_ERR_QUEUE:
 364		return 0;
 365	}
 366
 367	/* SO_OOBINLINE is not supported, let's avoid the related mess
 368	 * SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF,
 369	 * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER,
 370	 * we must be careful with subflows
 371	 *
 372	 * SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks
 373	 * explicitly the sk_protocol field
 374	 *
 375	 * SO_PEEK_OFF is unsupported, as it is for plain TCP
 376	 * SO_MAX_PACING_RATE is unsupported, we must be careful with subflows
 377	 * SO_CNX_ADVICE is currently unsupported, could possibly be relevant,
 378	 * but likely needs careful design
 379	 *
 380	 * SO_ZEROCOPY is currently unsupported, TODO in sndmsg
 381	 * SO_TXTIME is currently unsupported
 382	 */
 383
 384	return -EOPNOTSUPP;
 385}
 386
 387static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
 388			       sockptr_t optval, unsigned int optlen)
 389{
 390	struct sock *sk = (struct sock *)msk;
 391	int ret = -EOPNOTSUPP;
 392	struct sock *ssk;
 393
 394	switch (optname) {
 395	case IPV6_V6ONLY:
 396	case IPV6_TRANSPARENT:
 397	case IPV6_FREEBIND:
 398		lock_sock(sk);
 399		ssk = __mptcp_nmpc_sk(msk);
 400		if (IS_ERR(ssk)) {
 401			release_sock(sk);
 402			return PTR_ERR(ssk);
 403		}
 404
 405		ret = tcp_setsockopt(ssk, SOL_IPV6, optname, optval, optlen);
 406		if (ret != 0) {
 407			release_sock(sk);
 408			return ret;
 409		}
 410
 411		sockopt_seq_inc(msk);
 412
 413		switch (optname) {
 414		case IPV6_V6ONLY:
 415			sk->sk_ipv6only = ssk->sk_ipv6only;
 416			break;
 417		case IPV6_TRANSPARENT:
 418			inet_assign_bit(TRANSPARENT, sk,
 419					inet_test_bit(TRANSPARENT, ssk));
 420			break;
 421		case IPV6_FREEBIND:
 422			inet_assign_bit(FREEBIND, sk,
 423					inet_test_bit(FREEBIND, ssk));
 424			break;
 425		}
 426
 427		release_sock(sk);
 428		break;
 429	}
 430
 431	return ret;
 432}
 433
 434static bool mptcp_supported_sockopt(int level, int optname)
 435{
 436	if (level == SOL_IP) {
 437		switch (optname) {
 438		/* should work fine */
 439		case IP_FREEBIND:
 440		case IP_TRANSPARENT:
 441		case IP_BIND_ADDRESS_NO_PORT:
 442		case IP_LOCAL_PORT_RANGE:
 443
 444		/* the following are control cmsg related */
 445		case IP_PKTINFO:
 446		case IP_RECVTTL:
 447		case IP_RECVTOS:
 448		case IP_RECVOPTS:
 449		case IP_RETOPTS:
 450		case IP_PASSSEC:
 451		case IP_RECVORIGDSTADDR:
 452		case IP_CHECKSUM:
 453		case IP_RECVFRAGSIZE:
 454
 455		/* common stuff that need some love */
 456		case IP_TOS:
 457		case IP_TTL:
 
 458		case IP_MTU_DISCOVER:
 459		case IP_RECVERR:
 460
 461		/* possibly less common may deserve some love */
 462		case IP_MINTTL:
 463
 464		/* the following is apparently a no-op for plain TCP */
 465		case IP_RECVERR_RFC4884:
 466			return true;
 467		}
 468
 469		/* IP_OPTIONS is not supported, needs subflow care */
 470		/* IP_HDRINCL, IP_NODEFRAG are not supported, RAW specific */
 471		/* IP_MULTICAST_TTL, IP_MULTICAST_LOOP, IP_UNICAST_IF,
 472		 * IP_ADD_MEMBERSHIP, IP_ADD_SOURCE_MEMBERSHIP, IP_DROP_MEMBERSHIP,
 473		 * IP_DROP_SOURCE_MEMBERSHIP, IP_BLOCK_SOURCE, IP_UNBLOCK_SOURCE,
 474		 * MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP MCAST_JOIN_SOURCE_GROUP,
 475		 * MCAST_LEAVE_SOURCE_GROUP, MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE,
 476		 * MCAST_MSFILTER, IP_MULTICAST_ALL are not supported, better not deal
 477		 * with mcast stuff
 478		 */
 479		/* IP_IPSEC_POLICY, IP_XFRM_POLICY are nut supported, unrelated here */
 480		return false;
 481	}
 482	if (level == SOL_IPV6) {
 483		switch (optname) {
 484		case IPV6_V6ONLY:
 485
 486		/* the following are control cmsg related */
 487		case IPV6_RECVPKTINFO:
 488		case IPV6_2292PKTINFO:
 489		case IPV6_RECVHOPLIMIT:
 490		case IPV6_2292HOPLIMIT:
 491		case IPV6_RECVRTHDR:
 492		case IPV6_2292RTHDR:
 493		case IPV6_RECVHOPOPTS:
 494		case IPV6_2292HOPOPTS:
 495		case IPV6_RECVDSTOPTS:
 496		case IPV6_2292DSTOPTS:
 497		case IPV6_RECVTCLASS:
 498		case IPV6_FLOWINFO:
 499		case IPV6_RECVPATHMTU:
 500		case IPV6_RECVORIGDSTADDR:
 501		case IPV6_RECVFRAGSIZE:
 502
 503		/* the following ones need some love but are quite common */
 504		case IPV6_TCLASS:
 505		case IPV6_TRANSPARENT:
 506		case IPV6_FREEBIND:
 507		case IPV6_PKTINFO:
 508		case IPV6_2292PKTOPTIONS:
 509		case IPV6_UNICAST_HOPS:
 510		case IPV6_MTU_DISCOVER:
 511		case IPV6_MTU:
 512		case IPV6_RECVERR:
 513		case IPV6_FLOWINFO_SEND:
 514		case IPV6_FLOWLABEL_MGR:
 515		case IPV6_MINHOPCOUNT:
 516		case IPV6_DONTFRAG:
 517		case IPV6_AUTOFLOWLABEL:
 518
 519		/* the following one is a no-op for plain TCP */
 520		case IPV6_RECVERR_RFC4884:
 521			return true;
 522		}
 523
 524		/* IPV6_HOPOPTS, IPV6_RTHDRDSTOPTS, IPV6_RTHDR, IPV6_DSTOPTS are
 525		 * not supported
 526		 */
 527		/* IPV6_MULTICAST_HOPS, IPV6_MULTICAST_LOOP, IPV6_UNICAST_IF,
 528		 * IPV6_MULTICAST_IF, IPV6_ADDRFORM,
 529		 * IPV6_ADD_MEMBERSHIP, IPV6_DROP_MEMBERSHIP, IPV6_JOIN_ANYCAST,
 530		 * IPV6_LEAVE_ANYCAST, IPV6_MULTICAST_ALL, MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP,
 531		 * MCAST_JOIN_SOURCE_GROUP, MCAST_LEAVE_SOURCE_GROUP,
 532		 * MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, MCAST_MSFILTER
 533		 * are not supported better not deal with mcast
 534		 */
 535		/* IPV6_ROUTER_ALERT, IPV6_ROUTER_ALERT_ISOLATE are not supported, since are evil */
 536
 537		/* IPV6_IPSEC_POLICY, IPV6_XFRM_POLICY are not supported */
 538		/* IPV6_ADDR_PREFERENCES is not supported, we must be careful with subflows */
 539		return false;
 540	}
 541	if (level == SOL_TCP) {
 542		switch (optname) {
 543		/* the following are no-op or should work just fine */
 544		case TCP_THIN_DUPACK:
 545		case TCP_DEFER_ACCEPT:
 546
 547		/* the following need some love */
 548		case TCP_MAXSEG:
 549		case TCP_NODELAY:
 550		case TCP_THIN_LINEAR_TIMEOUTS:
 551		case TCP_CONGESTION:
 552		case TCP_CORK:
 553		case TCP_KEEPIDLE:
 554		case TCP_KEEPINTVL:
 555		case TCP_KEEPCNT:
 556		case TCP_SYNCNT:
 557		case TCP_SAVE_SYN:
 558		case TCP_LINGER2:
 559		case TCP_WINDOW_CLAMP:
 560		case TCP_QUICKACK:
 561		case TCP_USER_TIMEOUT:
 562		case TCP_TIMESTAMP:
 563		case TCP_NOTSENT_LOWAT:
 564		case TCP_TX_DELAY:
 565		case TCP_INQ:
 566		case TCP_FASTOPEN:
 567		case TCP_FASTOPEN_CONNECT:
 568		case TCP_FASTOPEN_KEY:
 569		case TCP_FASTOPEN_NO_COOKIE:
 570			return true;
 571		}
 572
 573		/* TCP_MD5SIG, TCP_MD5SIG_EXT are not supported, MD5 is not compatible with MPTCP */
 574
 575		/* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS,
 576		 * TCP_REPAIR_WINDOW are not supported, better avoid this mess
 577		 */
 578	}
 579	return false;
 580}
 581
 582static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t optval,
 583					       unsigned int optlen)
 584{
 585	struct mptcp_subflow_context *subflow;
 586	struct sock *sk = (struct sock *)msk;
 587	char name[TCP_CA_NAME_MAX];
 588	bool cap_net_admin;
 589	int ret;
 590
 591	if (optlen < 1)
 592		return -EINVAL;
 593
 594	ret = strncpy_from_sockptr(name, optval,
 595				   min_t(long, TCP_CA_NAME_MAX - 1, optlen));
 596	if (ret < 0)
 597		return -EFAULT;
 598
 599	name[ret] = 0;
 600
 601	cap_net_admin = ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN);
 602
 603	ret = 0;
 604	lock_sock(sk);
 605	sockopt_seq_inc(msk);
 606	mptcp_for_each_subflow(msk, subflow) {
 607		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 608		int err;
 609
 610		lock_sock(ssk);
 611		err = tcp_set_congestion_control(ssk, name, true, cap_net_admin);
 612		if (err < 0 && ret == 0)
 613			ret = err;
 614		subflow->setsockopt_seq = msk->setsockopt_seq;
 615		release_sock(ssk);
 616	}
 617
 618	if (ret == 0)
 619		strscpy(msk->ca_name, name, sizeof(msk->ca_name));
 620
 621	release_sock(sk);
 622	return ret;
 623}
 624
 625static int __mptcp_setsockopt_set_val(struct mptcp_sock *msk, int max,
 626				      int (*set_val)(struct sock *, int),
 627				      int *msk_val, int val)
 628{
 629	struct mptcp_subflow_context *subflow;
 630	int err = 0;
 631
 632	mptcp_for_each_subflow(msk, subflow) {
 633		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 634		int ret;
 635
 636		lock_sock(ssk);
 637		ret = set_val(ssk, val);
 638		err = err ? : ret;
 639		release_sock(ssk);
 640	}
 641
 642	if (!err) {
 643		*msk_val = val;
 644		sockopt_seq_inc(msk);
 645	}
 646
 647	return err;
 648}
 649
 650static int __mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, int val)
 651{
 652	struct mptcp_subflow_context *subflow;
 653	struct sock *sk = (struct sock *)msk;
 654
 
 655	sockopt_seq_inc(msk);
 656	msk->cork = !!val;
 657	mptcp_for_each_subflow(msk, subflow) {
 658		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 659
 660		lock_sock(ssk);
 661		__tcp_sock_set_cork(ssk, !!val);
 662		release_sock(ssk);
 663	}
 664	if (!val)
 665		mptcp_check_and_set_pending(sk);
 
 666
 667	return 0;
 668}
 669
 670static int __mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, int val)
 
 671{
 672	struct mptcp_subflow_context *subflow;
 673	struct sock *sk = (struct sock *)msk;
 
 674
 
 
 
 
 
 
 
 675	sockopt_seq_inc(msk);
 676	msk->nodelay = !!val;
 677	mptcp_for_each_subflow(msk, subflow) {
 678		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 679
 680		lock_sock(ssk);
 681		__tcp_sock_set_nodelay(ssk, !!val);
 682		release_sock(ssk);
 683	}
 684	if (val)
 685		mptcp_check_and_set_pending(sk);
 
 
 686	return 0;
 687}
 688
 689static int mptcp_setsockopt_sol_ip_set(struct mptcp_sock *msk, int optname,
 690				       sockptr_t optval, unsigned int optlen)
 691{
 692	struct sock *sk = (struct sock *)msk;
 693	struct sock *ssk;
 
 694	int err;
 695
 696	err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen);
 697	if (err != 0)
 698		return err;
 699
 700	lock_sock(sk);
 701
 702	ssk = __mptcp_nmpc_sk(msk);
 703	if (IS_ERR(ssk)) {
 704		release_sock(sk);
 705		return PTR_ERR(ssk);
 706	}
 707
 
 
 708	switch (optname) {
 709	case IP_FREEBIND:
 710		inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk));
 711		break;
 712	case IP_TRANSPARENT:
 713		inet_assign_bit(TRANSPARENT, ssk,
 714				inet_test_bit(TRANSPARENT, sk));
 715		break;
 716	case IP_BIND_ADDRESS_NO_PORT:
 717		inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk,
 718				inet_test_bit(BIND_ADDRESS_NO_PORT, sk));
 719		break;
 720	case IP_LOCAL_PORT_RANGE:
 721		WRITE_ONCE(inet_sk(ssk)->local_port_range,
 722			   READ_ONCE(inet_sk(sk)->local_port_range));
 723		break;
 724	default:
 725		release_sock(sk);
 726		WARN_ON_ONCE(1);
 727		return -EOPNOTSUPP;
 728	}
 729
 730	sockopt_seq_inc(msk);
 731	release_sock(sk);
 732	return 0;
 733}
 734
 735static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname,
 736				       sockptr_t optval, unsigned int optlen)
 737{
 738	struct mptcp_subflow_context *subflow;
 739	struct sock *sk = (struct sock *)msk;
 740	int err, val;
 741
 742	err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen);
 743
 744	if (err != 0)
 745		return err;
 746
 747	lock_sock(sk);
 748	sockopt_seq_inc(msk);
 749	val = READ_ONCE(inet_sk(sk)->tos);
 750	mptcp_for_each_subflow(msk, subflow) {
 751		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 752		bool slow;
 753
 754		slow = lock_sock_fast(ssk);
 755		__ip_sock_set_tos(ssk, val);
 756		unlock_sock_fast(ssk, slow);
 757	}
 758	release_sock(sk);
 759
 760	return 0;
 761}
 762
 763static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname,
 764			       sockptr_t optval, unsigned int optlen)
 765{
 766	switch (optname) {
 767	case IP_FREEBIND:
 768	case IP_TRANSPARENT:
 769	case IP_BIND_ADDRESS_NO_PORT:
 770	case IP_LOCAL_PORT_RANGE:
 771		return mptcp_setsockopt_sol_ip_set(msk, optname, optval, optlen);
 772	case IP_TOS:
 773		return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen);
 774	}
 775
 776	return -EOPNOTSUPP;
 777}
 778
 779static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname,
 780					  sockptr_t optval, unsigned int optlen)
 781{
 782	struct sock *sk = (struct sock *)msk;
 783	struct sock *ssk;
 784	int ret;
 785
 786	/* Limit to first subflow, before the connection establishment */
 787	lock_sock(sk);
 788	ssk = __mptcp_nmpc_sk(msk);
 789	if (IS_ERR(ssk)) {
 790		ret = PTR_ERR(ssk);
 791		goto unlock;
 792	}
 793
 794	ret = tcp_setsockopt(ssk, level, optname, optval, optlen);
 795
 796unlock:
 797	release_sock(sk);
 798	return ret;
 799}
 800
 801static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
 802				    sockptr_t optval, unsigned int optlen)
 803{
 804	struct sock *sk = (void *)msk;
 805	int ret, val;
 806
 807	switch (optname) {
 
 
 
 
 
 
 
 
 
 
 
 808	case TCP_ULP:
 809		return -EOPNOTSUPP;
 810	case TCP_CONGESTION:
 811		return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen);
 
 
 
 
 812	case TCP_DEFER_ACCEPT:
 813		/* See tcp.c: TCP_DEFER_ACCEPT does not fail */
 814		mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen);
 815		return 0;
 816	case TCP_FASTOPEN:
 817	case TCP_FASTOPEN_CONNECT:
 818	case TCP_FASTOPEN_KEY:
 819	case TCP_FASTOPEN_NO_COOKIE:
 820		return mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname,
 821						      optval, optlen);
 822	}
 823
 824	ret = mptcp_get_int_option(msk, optval, optlen, &val);
 825	if (ret)
 826		return ret;
 827
 828	lock_sock(sk);
 829	switch (optname) {
 830	case TCP_INQ:
 831		if (val < 0 || val > 1)
 832			ret = -EINVAL;
 833		else
 834			msk->recvmsg_inq = !!val;
 835		break;
 836	case TCP_NOTSENT_LOWAT:
 837		WRITE_ONCE(msk->notsent_lowat, val);
 838		mptcp_write_space(sk);
 839		break;
 840	case TCP_CORK:
 841		ret = __mptcp_setsockopt_sol_tcp_cork(msk, val);
 842		break;
 843	case TCP_NODELAY:
 844		ret = __mptcp_setsockopt_sol_tcp_nodelay(msk, val);
 845		break;
 846	case TCP_KEEPIDLE:
 847		ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPIDLE,
 848						 &tcp_sock_set_keepidle_locked,
 849						 &msk->keepalive_idle, val);
 850		break;
 851	case TCP_KEEPINTVL:
 852		ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPINTVL,
 853						 &tcp_sock_set_keepintvl,
 854						 &msk->keepalive_intvl, val);
 855		break;
 856	case TCP_KEEPCNT:
 857		ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPCNT,
 858						 &tcp_sock_set_keepcnt,
 859						 &msk->keepalive_cnt,
 860						 val);
 861		break;
 862	default:
 863		ret = -ENOPROTOOPT;
 864	}
 865
 866	release_sock(sk);
 867	return ret;
 868}
 869
 870int mptcp_setsockopt(struct sock *sk, int level, int optname,
 871		     sockptr_t optval, unsigned int optlen)
 872{
 873	struct mptcp_sock *msk = mptcp_sk(sk);
 874	struct sock *ssk;
 875
 876	pr_debug("msk=%p\n", msk);
 877
 878	if (level == SOL_SOCKET)
 879		return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen);
 880
 881	if (!mptcp_supported_sockopt(level, optname))
 882		return -ENOPROTOOPT;
 883
 884	/* @@ the meaning of setsockopt() when the socket is connected and
 885	 * there are multiple subflows is not yet defined. It is up to the
 886	 * MPTCP-level socket to configure the subflows until the subflow
 887	 * is in TCP fallback, when TCP socket options are passed through
 888	 * to the one remaining subflow.
 889	 */
 890	lock_sock(sk);
 891	ssk = __mptcp_tcp_fallback(msk);
 892	release_sock(sk);
 893	if (ssk)
 894		return tcp_setsockopt(ssk, level, optname, optval, optlen);
 895
 896	if (level == SOL_IP)
 897		return mptcp_setsockopt_v4(msk, optname, optval, optlen);
 898
 899	if (level == SOL_IPV6)
 900		return mptcp_setsockopt_v6(msk, optname, optval, optlen);
 901
 902	if (level == SOL_TCP)
 903		return mptcp_setsockopt_sol_tcp(msk, optname, optval, optlen);
 904
 905	return -EOPNOTSUPP;
 906}
 907
 908static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname,
 909					  char __user *optval, int __user *optlen)
 910{
 911	struct sock *sk = (struct sock *)msk;
 
 
 912	struct sock *ssk;
 913	int ret;
 914
 915	lock_sock(sk);
 916	ssk = msk->first;
 917	if (ssk) {
 918		ret = tcp_getsockopt(ssk, level, optname, optval, optlen);
 919		goto out;
 920	}
 921
 922	ssk = __mptcp_nmpc_sk(msk);
 923	if (IS_ERR(ssk)) {
 924		ret = PTR_ERR(ssk);
 925		goto out;
 926	}
 927
 928	ret = tcp_getsockopt(ssk, level, optname, optval, optlen);
 929
 930out:
 931	release_sock(sk);
 932	return ret;
 933}
 934
 935void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
 936{
 937	struct sock *sk = (struct sock *)msk;
 938	u32 flags = 0;
 939	bool slow;
 940	u32 now;
 941
 942	memset(info, 0, sizeof(*info));
 943
 944	info->mptcpi_subflows = READ_ONCE(msk->pm.subflows);
 945	info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled);
 946	info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted);
 947	info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used);
 948
 949	if (inet_sk_state_load(sk) == TCP_LISTEN)
 950		return;
 951
 952	/* The following limits only make sense for the in-kernel PM */
 953	if (mptcp_pm_is_kernel(msk)) {
 954		info->mptcpi_subflows_max =
 955			mptcp_pm_get_subflows_max(msk);
 956		info->mptcpi_add_addr_signal_max =
 957			mptcp_pm_get_add_addr_signal_max(msk);
 958		info->mptcpi_add_addr_accepted_max =
 959			mptcp_pm_get_add_addr_accept_max(msk);
 960		info->mptcpi_local_addr_max =
 961			mptcp_pm_get_local_addr_max(msk);
 962	}
 963
 964	if (__mptcp_check_fallback(msk))
 965		flags |= MPTCP_INFO_FLAG_FALLBACK;
 966	if (READ_ONCE(msk->can_ack))
 967		flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED;
 968	info->mptcpi_flags = flags;
 969
 970	slow = lock_sock_fast(sk);
 
 
 971	info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled);
 972	info->mptcpi_token = msk->token;
 973	info->mptcpi_write_seq = msk->write_seq;
 974	info->mptcpi_retransmits = inet_csk(sk)->icsk_retransmits;
 975	info->mptcpi_bytes_sent = msk->bytes_sent;
 976	info->mptcpi_bytes_received = msk->bytes_received;
 977	info->mptcpi_bytes_retrans = msk->bytes_retrans;
 978	info->mptcpi_subflows_total = info->mptcpi_subflows +
 979		__mptcp_has_initial_subflow(msk);
 980	now = tcp_jiffies32;
 981	info->mptcpi_last_data_sent = jiffies_to_msecs(now - msk->last_data_sent);
 982	info->mptcpi_last_data_recv = jiffies_to_msecs(now - msk->last_data_recv);
 983	unlock_sock_fast(sk, slow);
 984
 985	mptcp_data_lock(sk);
 986	info->mptcpi_last_ack_recv = jiffies_to_msecs(now - msk->last_ack_recv);
 987	info->mptcpi_snd_una = msk->snd_una;
 988	info->mptcpi_rcv_nxt = msk->ack_seq;
 989	info->mptcpi_bytes_acked = msk->bytes_acked;
 990	mptcp_data_unlock(sk);
 991}
 992EXPORT_SYMBOL_GPL(mptcp_diag_fill_info);
 993
 994static int mptcp_getsockopt_info(struct mptcp_sock *msk, char __user *optval, int __user *optlen)
 995{
 996	struct mptcp_info m_info;
 997	int len;
 998
 999	if (get_user(len, optlen))
1000		return -EFAULT;
1001
1002	/* When used only to check if a fallback to TCP happened. */
1003	if (len == 0)
1004		return 0;
1005
1006	len = min_t(unsigned int, len, sizeof(struct mptcp_info));
1007
1008	mptcp_diag_fill_info(msk, &m_info);
1009
1010	if (put_user(len, optlen))
1011		return -EFAULT;
1012
1013	if (copy_to_user(optval, &m_info, len))
1014		return -EFAULT;
1015
1016	return 0;
1017}
1018
1019static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd,
1020				  char __user *optval,
1021				  u32 copied,
1022				  int __user *optlen)
1023{
1024	u32 copylen = min_t(u32, sfd->size_subflow_data, sizeof(*sfd));
1025
1026	if (copied)
1027		copied += sfd->size_subflow_data;
1028	else
1029		copied = copylen;
1030
1031	if (put_user(copied, optlen))
1032		return -EFAULT;
1033
1034	if (copy_to_user(optval, sfd, copylen))
1035		return -EFAULT;
1036
1037	return 0;
1038}
1039
1040static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd,
1041				  char __user *optval,
1042				  int __user *optlen)
1043{
1044	int len, copylen;
1045
1046	if (get_user(len, optlen))
1047		return -EFAULT;
1048
1049	/* if mptcp_subflow_data size is changed, need to adjust
1050	 * this function to deal with programs using old version.
1051	 */
1052	BUILD_BUG_ON(sizeof(*sfd) != MIN_INFO_OPTLEN_SIZE);
1053
1054	if (len < MIN_INFO_OPTLEN_SIZE)
1055		return -EINVAL;
1056
1057	memset(sfd, 0, sizeof(*sfd));
1058
1059	copylen = min_t(unsigned int, len, sizeof(*sfd));
1060	if (copy_from_user(sfd, optval, copylen))
1061		return -EFAULT;
1062
1063	/* size_subflow_data is u32, but len is signed */
1064	if (sfd->size_subflow_data > INT_MAX ||
1065	    sfd->size_user > INT_MAX)
1066		return -EINVAL;
1067
1068	if (sfd->size_subflow_data < MIN_INFO_OPTLEN_SIZE ||
1069	    sfd->size_subflow_data > len)
1070		return -EINVAL;
1071
1072	if (sfd->num_subflows || sfd->size_kernel)
1073		return -EINVAL;
1074
1075	return len - sfd->size_subflow_data;
1076}
1077
1078static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval,
1079				    int __user *optlen)
1080{
1081	struct mptcp_subflow_context *subflow;
1082	struct sock *sk = (struct sock *)msk;
1083	unsigned int sfcount = 0, copied = 0;
1084	struct mptcp_subflow_data sfd;
1085	char __user *infoptr;
1086	int len;
1087
1088	len = mptcp_get_subflow_data(&sfd, optval, optlen);
1089	if (len < 0)
1090		return len;
1091
1092	sfd.size_kernel = sizeof(struct tcp_info);
1093	sfd.size_user = min_t(unsigned int, sfd.size_user,
1094			      sizeof(struct tcp_info));
1095
1096	infoptr = optval + sfd.size_subflow_data;
1097
1098	lock_sock(sk);
1099
1100	mptcp_for_each_subflow(msk, subflow) {
1101		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
1102
1103		++sfcount;
1104
1105		if (len && len >= sfd.size_user) {
1106			struct tcp_info info;
1107
1108			tcp_get_info(ssk, &info);
1109
1110			if (copy_to_user(infoptr, &info, sfd.size_user)) {
1111				release_sock(sk);
1112				return -EFAULT;
1113			}
1114
1115			infoptr += sfd.size_user;
1116			copied += sfd.size_user;
1117			len -= sfd.size_user;
1118		}
1119	}
1120
1121	release_sock(sk);
1122
1123	sfd.num_subflows = sfcount;
1124
1125	if (mptcp_put_subflow_data(&sfd, optval, copied, optlen))
1126		return -EFAULT;
1127
1128	return 0;
1129}
1130
1131static void mptcp_get_sub_addrs(const struct sock *sk, struct mptcp_subflow_addrs *a)
1132{
1133	const struct inet_sock *inet = inet_sk(sk);
1134
1135	memset(a, 0, sizeof(*a));
1136
1137	if (sk->sk_family == AF_INET) {
1138		a->sin_local.sin_family = AF_INET;
1139		a->sin_local.sin_port = inet->inet_sport;
1140		a->sin_local.sin_addr.s_addr = inet->inet_rcv_saddr;
1141
1142		if (!a->sin_local.sin_addr.s_addr)
1143			a->sin_local.sin_addr.s_addr = inet->inet_saddr;
1144
1145		a->sin_remote.sin_family = AF_INET;
1146		a->sin_remote.sin_port = inet->inet_dport;
1147		a->sin_remote.sin_addr.s_addr = inet->inet_daddr;
1148#if IS_ENABLED(CONFIG_IPV6)
1149	} else if (sk->sk_family == AF_INET6) {
1150		const struct ipv6_pinfo *np = inet6_sk(sk);
1151
1152		if (WARN_ON_ONCE(!np))
1153			return;
1154
1155		a->sin6_local.sin6_family = AF_INET6;
1156		a->sin6_local.sin6_port = inet->inet_sport;
1157
1158		if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
1159			a->sin6_local.sin6_addr = np->saddr;
1160		else
1161			a->sin6_local.sin6_addr = sk->sk_v6_rcv_saddr;
1162
1163		a->sin6_remote.sin6_family = AF_INET6;
1164		a->sin6_remote.sin6_port = inet->inet_dport;
1165		a->sin6_remote.sin6_addr = sk->sk_v6_daddr;
1166#endif
1167	}
1168}
1169
1170static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *optval,
1171					  int __user *optlen)
1172{
1173	struct mptcp_subflow_context *subflow;
1174	struct sock *sk = (struct sock *)msk;
1175	unsigned int sfcount = 0, copied = 0;
1176	struct mptcp_subflow_data sfd;
1177	char __user *addrptr;
1178	int len;
1179
1180	len = mptcp_get_subflow_data(&sfd, optval, optlen);
1181	if (len < 0)
1182		return len;
1183
1184	sfd.size_kernel = sizeof(struct mptcp_subflow_addrs);
1185	sfd.size_user = min_t(unsigned int, sfd.size_user,
1186			      sizeof(struct mptcp_subflow_addrs));
1187
1188	addrptr = optval + sfd.size_subflow_data;
1189
1190	lock_sock(sk);
1191
1192	mptcp_for_each_subflow(msk, subflow) {
1193		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
1194
1195		++sfcount;
1196
1197		if (len && len >= sfd.size_user) {
1198			struct mptcp_subflow_addrs a;
1199
1200			mptcp_get_sub_addrs(ssk, &a);
1201
1202			if (copy_to_user(addrptr, &a, sfd.size_user)) {
1203				release_sock(sk);
1204				return -EFAULT;
1205			}
1206
1207			addrptr += sfd.size_user;
1208			copied += sfd.size_user;
1209			len -= sfd.size_user;
1210		}
1211	}
1212
1213	release_sock(sk);
1214
1215	sfd.num_subflows = sfcount;
1216
1217	if (mptcp_put_subflow_data(&sfd, optval, copied, optlen))
1218		return -EFAULT;
1219
1220	return 0;
1221}
1222
1223static int mptcp_get_full_info(struct mptcp_full_info *mfi,
1224			       char __user *optval,
1225			       int __user *optlen)
1226{
1227	int len;
1228
1229	BUILD_BUG_ON(offsetof(struct mptcp_full_info, mptcp_info) !=
1230		     MIN_FULL_INFO_OPTLEN_SIZE);
1231
1232	if (get_user(len, optlen))
1233		return -EFAULT;
1234
1235	if (len < MIN_FULL_INFO_OPTLEN_SIZE)
1236		return -EINVAL;
1237
1238	memset(mfi, 0, sizeof(*mfi));
1239	if (copy_from_user(mfi, optval, MIN_FULL_INFO_OPTLEN_SIZE))
1240		return -EFAULT;
1241
1242	if (mfi->size_tcpinfo_kernel ||
1243	    mfi->size_sfinfo_kernel ||
1244	    mfi->num_subflows)
1245		return -EINVAL;
1246
1247	if (mfi->size_sfinfo_user > INT_MAX ||
1248	    mfi->size_tcpinfo_user > INT_MAX)
1249		return -EINVAL;
1250
1251	return len - MIN_FULL_INFO_OPTLEN_SIZE;
1252}
1253
1254static int mptcp_put_full_info(struct mptcp_full_info *mfi,
1255			       char __user *optval,
1256			       u32 copylen,
1257			       int __user *optlen)
1258{
1259	copylen += MIN_FULL_INFO_OPTLEN_SIZE;
1260	if (put_user(copylen, optlen))
1261		return -EFAULT;
1262
1263	if (copy_to_user(optval, mfi, copylen))
1264		return -EFAULT;
1265	return 0;
1266}
1267
1268static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optval,
1269				      int __user *optlen)
1270{
1271	unsigned int sfcount = 0, copylen = 0;
1272	struct mptcp_subflow_context *subflow;
1273	struct sock *sk = (struct sock *)msk;
1274	void __user *tcpinfoptr, *sfinfoptr;
1275	struct mptcp_full_info mfi;
1276	int len;
1277
1278	len = mptcp_get_full_info(&mfi, optval, optlen);
1279	if (len < 0)
1280		return len;
1281
1282	/* don't bother filling the mptcp info if there is not enough
1283	 * user-space-provided storage
1284	 */
1285	if (len > 0) {
1286		mptcp_diag_fill_info(msk, &mfi.mptcp_info);
1287		copylen += min_t(unsigned int, len, sizeof(struct mptcp_info));
1288	}
1289
1290	mfi.size_tcpinfo_kernel = sizeof(struct tcp_info);
1291	mfi.size_tcpinfo_user = min_t(unsigned int, mfi.size_tcpinfo_user,
1292				      sizeof(struct tcp_info));
1293	sfinfoptr = u64_to_user_ptr(mfi.subflow_info);
1294	mfi.size_sfinfo_kernel = sizeof(struct mptcp_subflow_info);
1295	mfi.size_sfinfo_user = min_t(unsigned int, mfi.size_sfinfo_user,
1296				     sizeof(struct mptcp_subflow_info));
1297	tcpinfoptr = u64_to_user_ptr(mfi.tcp_info);
1298
1299	lock_sock(sk);
1300	mptcp_for_each_subflow(msk, subflow) {
1301		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
1302		struct mptcp_subflow_info sfinfo;
1303		struct tcp_info tcp_info;
1304
1305		if (sfcount++ >= mfi.size_arrays_user)
1306			continue;
1307
1308		/* fetch addr/tcp_info only if the user space buffers
1309		 * are wide enough
1310		 */
1311		memset(&sfinfo, 0, sizeof(sfinfo));
1312		sfinfo.id = subflow->subflow_id;
1313		if (mfi.size_sfinfo_user >
1314		    offsetof(struct mptcp_subflow_info, addrs))
1315			mptcp_get_sub_addrs(ssk, &sfinfo.addrs);
1316		if (copy_to_user(sfinfoptr, &sfinfo, mfi.size_sfinfo_user))
1317			goto fail_release;
1318
1319		if (mfi.size_tcpinfo_user) {
1320			tcp_get_info(ssk, &tcp_info);
1321			if (copy_to_user(tcpinfoptr, &tcp_info,
1322					 mfi.size_tcpinfo_user))
1323				goto fail_release;
1324		}
1325
1326		tcpinfoptr += mfi.size_tcpinfo_user;
1327		sfinfoptr += mfi.size_sfinfo_user;
1328	}
1329	release_sock(sk);
1330
1331	mfi.num_subflows = sfcount;
1332	if (mptcp_put_full_info(&mfi, optval, copylen, optlen))
1333		return -EFAULT;
1334
1335	return 0;
1336
1337fail_release:
1338	release_sock(sk);
1339	return -EFAULT;
1340}
1341
1342static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval,
1343				int __user *optlen, int val)
1344{
1345	int len;
1346
1347	if (get_user(len, optlen))
1348		return -EFAULT;
1349	if (len < 0)
1350		return -EINVAL;
1351
1352	if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) {
1353		unsigned char ucval = (unsigned char)val;
1354
1355		len = 1;
1356		if (put_user(len, optlen))
1357			return -EFAULT;
1358		if (copy_to_user(optval, &ucval, 1))
1359			return -EFAULT;
1360	} else {
1361		len = min_t(unsigned int, len, sizeof(int));
1362		if (put_user(len, optlen))
1363			return -EFAULT;
1364		if (copy_to_user(optval, &val, len))
1365			return -EFAULT;
1366	}
1367
1368	return 0;
1369}
1370
1371static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
1372				    char __user *optval, int __user *optlen)
1373{
1374	struct sock *sk = (void *)msk;
1375
1376	switch (optname) {
1377	case TCP_ULP:
1378	case TCP_CONGESTION:
1379	case TCP_INFO:
1380	case TCP_CC_INFO:
1381	case TCP_DEFER_ACCEPT:
1382	case TCP_FASTOPEN:
1383	case TCP_FASTOPEN_CONNECT:
1384	case TCP_FASTOPEN_KEY:
1385	case TCP_FASTOPEN_NO_COOKIE:
1386		return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname,
1387						      optval, optlen);
1388	case TCP_INQ:
1389		return mptcp_put_int_option(msk, optval, optlen, msk->recvmsg_inq);
1390	case TCP_CORK:
1391		return mptcp_put_int_option(msk, optval, optlen, msk->cork);
1392	case TCP_NODELAY:
1393		return mptcp_put_int_option(msk, optval, optlen, msk->nodelay);
1394	case TCP_KEEPIDLE:
1395		return mptcp_put_int_option(msk, optval, optlen,
1396					    msk->keepalive_idle ? :
1397					    READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_time) / HZ);
1398	case TCP_KEEPINTVL:
1399		return mptcp_put_int_option(msk, optval, optlen,
1400					    msk->keepalive_intvl ? :
1401					    READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_intvl) / HZ);
1402	case TCP_KEEPCNT:
1403		return mptcp_put_int_option(msk, optval, optlen,
1404					    msk->keepalive_cnt ? :
1405					    READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_probes));
1406	case TCP_NOTSENT_LOWAT:
1407		return mptcp_put_int_option(msk, optval, optlen, msk->notsent_lowat);
1408	case TCP_IS_MPTCP:
1409		return mptcp_put_int_option(msk, optval, optlen, 1);
1410	}
1411	return -EOPNOTSUPP;
1412}
1413
1414static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname,
1415			       char __user *optval, int __user *optlen)
1416{
1417	struct sock *sk = (void *)msk;
1418
1419	switch (optname) {
1420	case IP_TOS:
1421		return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->tos));
1422	case IP_BIND_ADDRESS_NO_PORT:
1423		return mptcp_put_int_option(msk, optval, optlen,
1424				inet_test_bit(BIND_ADDRESS_NO_PORT, sk));
1425	case IP_LOCAL_PORT_RANGE:
1426		return mptcp_put_int_option(msk, optval, optlen,
1427				READ_ONCE(inet_sk(sk)->local_port_range));
1428	}
1429
1430	return -EOPNOTSUPP;
1431}
1432
1433static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname,
1434				      char __user *optval, int __user *optlen)
1435{
1436	switch (optname) {
1437	case MPTCP_INFO:
1438		return mptcp_getsockopt_info(msk, optval, optlen);
1439	case MPTCP_FULL_INFO:
1440		return mptcp_getsockopt_full_info(msk, optval, optlen);
1441	case MPTCP_TCPINFO:
1442		return mptcp_getsockopt_tcpinfo(msk, optval, optlen);
1443	case MPTCP_SUBFLOW_ADDRS:
1444		return mptcp_getsockopt_subflow_addrs(msk, optval, optlen);
1445	}
1446
1447	return -EOPNOTSUPP;
1448}
1449
1450int mptcp_getsockopt(struct sock *sk, int level, int optname,
1451		     char __user *optval, int __user *option)
1452{
1453	struct mptcp_sock *msk = mptcp_sk(sk);
1454	struct sock *ssk;
1455
1456	pr_debug("msk=%p\n", msk);
1457
1458	/* @@ the meaning of setsockopt() when the socket is connected and
1459	 * there are multiple subflows is not yet defined. It is up to the
1460	 * MPTCP-level socket to configure the subflows until the subflow
1461	 * is in TCP fallback, when socket options are passed through
1462	 * to the one remaining subflow.
1463	 */
1464	lock_sock(sk);
1465	ssk = __mptcp_tcp_fallback(msk);
1466	release_sock(sk);
1467	if (ssk)
1468		return tcp_getsockopt(ssk, level, optname, optval, option);
1469
1470	if (level == SOL_IP)
1471		return mptcp_getsockopt_v4(msk, optname, optval, option);
1472	if (level == SOL_TCP)
1473		return mptcp_getsockopt_sol_tcp(msk, optname, optval, option);
1474	if (level == SOL_MPTCP)
1475		return mptcp_getsockopt_sol_mptcp(msk, optname, optval, option);
1476	return -EOPNOTSUPP;
1477}
1478
1479static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
1480{
1481	static const unsigned int tx_rx_locks = SOCK_RCVBUF_LOCK | SOCK_SNDBUF_LOCK;
1482	struct sock *sk = (struct sock *)msk;
1483
1484	if (ssk->sk_prot->keepalive) {
1485		if (sock_flag(sk, SOCK_KEEPOPEN))
1486			ssk->sk_prot->keepalive(ssk, 1);
1487		else
1488			ssk->sk_prot->keepalive(ssk, 0);
1489	}
1490
1491	ssk->sk_priority = sk->sk_priority;
1492	ssk->sk_bound_dev_if = sk->sk_bound_dev_if;
1493	ssk->sk_incoming_cpu = sk->sk_incoming_cpu;
1494	ssk->sk_ipv6only = sk->sk_ipv6only;
1495	__ip_sock_set_tos(ssk, inet_sk(sk)->tos);
1496
1497	if (sk->sk_userlocks & tx_rx_locks) {
1498		ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks;
1499		if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) {
1500			WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf);
1501			mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf;
1502		}
1503		if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
1504			WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf);
1505	}
1506
1507	if (sock_flag(sk, SOCK_LINGER)) {
1508		ssk->sk_lingertime = sk->sk_lingertime;
1509		sock_set_flag(ssk, SOCK_LINGER);
1510	} else {
1511		sock_reset_flag(ssk, SOCK_LINGER);
1512	}
1513
1514	if (sk->sk_mark != ssk->sk_mark) {
1515		ssk->sk_mark = sk->sk_mark;
1516		sk_dst_reset(ssk);
1517	}
1518
1519	sock_valbool_flag(ssk, SOCK_DBG, sock_flag(sk, SOCK_DBG));
1520
1521	if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops)
1522		tcp_set_congestion_control(ssk, msk->ca_name, false, true);
1523	__tcp_sock_set_cork(ssk, !!msk->cork);
1524	__tcp_sock_set_nodelay(ssk, !!msk->nodelay);
1525	tcp_sock_set_keepidle_locked(ssk, msk->keepalive_idle);
1526	tcp_sock_set_keepintvl(ssk, msk->keepalive_intvl);
1527	tcp_sock_set_keepcnt(ssk, msk->keepalive_cnt);
1528
1529	inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk));
1530	inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk));
1531	inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk, inet_test_bit(BIND_ADDRESS_NO_PORT, sk));
1532	WRITE_ONCE(inet_sk(ssk)->local_port_range, READ_ONCE(inet_sk(sk)->local_port_range));
1533}
1534
1535void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk)
1536{
1537	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
1538
1539	msk_owned_by_me(msk);
1540
1541	ssk->sk_rcvlowat = 0;
 
1542
1543	/* subflows must ignore any latency-related settings: will not affect
1544	 * the user-space - only the msk is relevant - but will foul the
1545	 * mptcp scheduler
1546	 */
1547	tcp_sk(ssk)->notsent_lowat = UINT_MAX;
1548
1549	if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) {
1550		sync_socket_options(msk, ssk);
1551
1552		subflow->setsockopt_seq = msk->setsockopt_seq;
1553	}
1554}
1555
1556/* unfortunately this is different enough from the tcp version so
1557 * that we can't factor it out
1558 */
1559int mptcp_set_rcvlowat(struct sock *sk, int val)
1560{
1561	struct mptcp_subflow_context *subflow;
1562	int space, cap;
1563
1564	/* bpf can land here with a wrong sk type */
1565	if (sk->sk_protocol == IPPROTO_TCP)
1566		return -EINVAL;
1567
1568	if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
1569		cap = sk->sk_rcvbuf >> 1;
1570	else
1571		cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
1572	val = min(val, cap);
1573	WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
1574
1575	/* Check if we need to signal EPOLLIN right now */
1576	if (mptcp_epollin_ready(sk))
1577		sk->sk_data_ready(sk);
1578
1579	if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
1580		return 0;
1581
1582	space = mptcp_space_from_win(sk, val);
1583	if (space <= sk->sk_rcvbuf)
1584		return 0;
1585
1586	/* propagate the rcvbuf changes to all the subflows */
1587	WRITE_ONCE(sk->sk_rcvbuf, space);
1588	mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
1589		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
1590		bool slow;
1591
1592		slow = lock_sock_fast(ssk);
1593		WRITE_ONCE(ssk->sk_rcvbuf, space);
1594		WRITE_ONCE(tcp_sk(ssk)->window_clamp, val);
1595		unlock_sock_fast(ssk, slow);
1596	}
1597	return 0;
1598}
v6.2
   1// SPDX-License-Identifier: GPL-2.0
   2/* Multipath TCP
   3 *
   4 * Copyright (c) 2021, Red Hat.
   5 */
   6
   7#define pr_fmt(fmt) "MPTCP: " fmt
   8
   9#include <linux/kernel.h>
  10#include <linux/module.h>
  11#include <net/sock.h>
  12#include <net/protocol.h>
  13#include <net/tcp.h>
  14#include <net/mptcp.h>
  15#include "protocol.h"
  16
  17#define MIN_INFO_OPTLEN_SIZE	16
 
  18
  19static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk)
  20{
  21	sock_owned_by_me((const struct sock *)msk);
  22
  23	if (likely(!__mptcp_check_fallback(msk)))
  24		return NULL;
  25
  26	return msk->first;
  27}
  28
  29static u32 sockopt_seq_reset(const struct sock *sk)
  30{
  31	sock_owned_by_me(sk);
  32
  33	/* Highbits contain state.  Allows to distinguish sockopt_seq
  34	 * of listener and established:
  35	 * s0 = new_listener()
  36	 * sockopt(s0) - seq is 1
  37	 * s1 = accept(s0) - s1 inherits seq 1 if listener sk (s0)
  38	 * sockopt(s0) - seq increments to 2 on s0
  39	 * sockopt(s1) // seq increments to 2 on s1 (different option)
  40	 * new ssk completes join, inherits options from s0 // seq 2
  41	 * Needs sync from mptcp join logic, but ssk->seq == msk->seq
  42	 *
  43	 * Set High order bits to sk_state so ssk->seq == msk->seq test
  44	 * will fail.
  45	 */
  46
  47	return (u32)sk->sk_state << 24u;
  48}
  49
  50static void sockopt_seq_inc(struct mptcp_sock *msk)
  51{
  52	u32 seq = (msk->setsockopt_seq + 1) & 0x00ffffff;
  53
  54	msk->setsockopt_seq = sockopt_seq_reset((struct sock *)msk) + seq;
  55}
  56
  57static int mptcp_get_int_option(struct mptcp_sock *msk, sockptr_t optval,
  58				unsigned int optlen, int *val)
  59{
  60	if (optlen < sizeof(int))
  61		return -EINVAL;
  62
  63	if (copy_from_sockptr(val, optval, sizeof(*val)))
  64		return -EFAULT;
  65
  66	return 0;
  67}
  68
  69static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, int val)
  70{
  71	struct mptcp_subflow_context *subflow;
  72	struct sock *sk = (struct sock *)msk;
  73
  74	lock_sock(sk);
  75	sockopt_seq_inc(msk);
  76
  77	mptcp_for_each_subflow(msk, subflow) {
  78		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
  79		bool slow = lock_sock_fast(ssk);
  80
  81		switch (optname) {
  82		case SO_DEBUG:
  83			sock_valbool_flag(ssk, SOCK_DBG, !!val);
  84			break;
  85		case SO_KEEPALIVE:
  86			if (ssk->sk_prot->keepalive)
  87				ssk->sk_prot->keepalive(ssk, !!val);
  88			sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val);
  89			break;
  90		case SO_PRIORITY:
  91			ssk->sk_priority = val;
  92			break;
  93		case SO_SNDBUF:
  94		case SO_SNDBUFFORCE:
  95			ssk->sk_userlocks |= SOCK_SNDBUF_LOCK;
  96			WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf);
 
  97			break;
  98		case SO_RCVBUF:
  99		case SO_RCVBUFFORCE:
 100			ssk->sk_userlocks |= SOCK_RCVBUF_LOCK;
 101			WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf);
 102			break;
 103		case SO_MARK:
 104			if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) {
 105				ssk->sk_mark = sk->sk_mark;
 106				sk_dst_reset(ssk);
 107			}
 108			break;
 109		case SO_INCOMING_CPU:
 110			WRITE_ONCE(ssk->sk_incoming_cpu, val);
 111			break;
 112		}
 113
 114		subflow->setsockopt_seq = msk->setsockopt_seq;
 115		unlock_sock_fast(ssk, slow);
 116	}
 117
 118	release_sock(sk);
 119}
 120
 121static int mptcp_sol_socket_intval(struct mptcp_sock *msk, int optname, int val)
 122{
 123	sockptr_t optval = KERNEL_SOCKPTR(&val);
 124	struct sock *sk = (struct sock *)msk;
 125	int ret;
 126
 127	ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname,
 128			      optval, sizeof(val));
 129	if (ret)
 130		return ret;
 131
 132	mptcp_sol_socket_sync_intval(msk, optname, val);
 133	return 0;
 134}
 135
 136static void mptcp_so_incoming_cpu(struct mptcp_sock *msk, int val)
 137{
 138	struct sock *sk = (struct sock *)msk;
 139
 140	WRITE_ONCE(sk->sk_incoming_cpu, val);
 141
 142	mptcp_sol_socket_sync_intval(msk, SO_INCOMING_CPU, val);
 143}
 144
 145static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optname, int val)
 146{
 147	sockptr_t optval = KERNEL_SOCKPTR(&val);
 148	struct mptcp_subflow_context *subflow;
 149	struct sock *sk = (struct sock *)msk;
 150	int ret;
 151
 152	ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname,
 153			      optval, sizeof(val));
 154	if (ret)
 155		return ret;
 156
 157	lock_sock(sk);
 158	mptcp_for_each_subflow(msk, subflow) {
 159		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 160		bool slow = lock_sock_fast(ssk);
 161
 162		sock_set_timestamp(sk, optname, !!val);
 163		unlock_sock_fast(ssk, slow);
 164	}
 165
 166	release_sock(sk);
 167	return 0;
 168}
 169
 170static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname,
 171					   sockptr_t optval,
 172					   unsigned int optlen)
 173{
 174	int val, ret;
 175
 176	ret = mptcp_get_int_option(msk, optval, optlen, &val);
 177	if (ret)
 178		return ret;
 179
 180	switch (optname) {
 181	case SO_KEEPALIVE:
 182		mptcp_sol_socket_sync_intval(msk, optname, val);
 183		return 0;
 184	case SO_DEBUG:
 185	case SO_MARK:
 186	case SO_PRIORITY:
 187	case SO_SNDBUF:
 188	case SO_SNDBUFFORCE:
 189	case SO_RCVBUF:
 190	case SO_RCVBUFFORCE:
 191		return mptcp_sol_socket_intval(msk, optname, val);
 192	case SO_INCOMING_CPU:
 193		mptcp_so_incoming_cpu(msk, val);
 194		return 0;
 195	case SO_TIMESTAMP_OLD:
 196	case SO_TIMESTAMP_NEW:
 197	case SO_TIMESTAMPNS_OLD:
 198	case SO_TIMESTAMPNS_NEW:
 199		return mptcp_setsockopt_sol_socket_tstamp(msk, optname, val);
 200	}
 201
 202	return -ENOPROTOOPT;
 203}
 204
 205static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk,
 206						    int optname,
 207						    sockptr_t optval,
 208						    unsigned int optlen)
 209{
 210	struct mptcp_subflow_context *subflow;
 211	struct sock *sk = (struct sock *)msk;
 212	struct so_timestamping timestamping;
 213	int ret;
 214
 215	if (optlen == sizeof(timestamping)) {
 216		if (copy_from_sockptr(&timestamping, optval,
 217				      sizeof(timestamping)))
 218			return -EFAULT;
 219	} else if (optlen == sizeof(int)) {
 220		memset(&timestamping, 0, sizeof(timestamping));
 221
 222		if (copy_from_sockptr(&timestamping.flags, optval, sizeof(int)))
 223			return -EFAULT;
 224	} else {
 225		return -EINVAL;
 226	}
 227
 228	ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname,
 229			      KERNEL_SOCKPTR(&timestamping),
 230			      sizeof(timestamping));
 231	if (ret)
 232		return ret;
 233
 234	lock_sock(sk);
 235
 236	mptcp_for_each_subflow(msk, subflow) {
 237		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 238		bool slow = lock_sock_fast(ssk);
 239
 240		sock_set_timestamping(sk, optname, timestamping);
 241		unlock_sock_fast(ssk, slow);
 242	}
 243
 244	release_sock(sk);
 245
 246	return 0;
 247}
 248
 249static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval,
 250					      unsigned int optlen)
 251{
 252	struct mptcp_subflow_context *subflow;
 253	struct sock *sk = (struct sock *)msk;
 254	struct linger ling;
 255	sockptr_t kopt;
 256	int ret;
 257
 258	if (optlen < sizeof(ling))
 259		return -EINVAL;
 260
 261	if (copy_from_sockptr(&ling, optval, sizeof(ling)))
 262		return -EFAULT;
 263
 264	kopt = KERNEL_SOCKPTR(&ling);
 265	ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, SO_LINGER, kopt, sizeof(ling));
 266	if (ret)
 267		return ret;
 268
 269	lock_sock(sk);
 270	sockopt_seq_inc(msk);
 271	mptcp_for_each_subflow(msk, subflow) {
 272		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 273		bool slow = lock_sock_fast(ssk);
 274
 275		if (!ling.l_onoff) {
 276			sock_reset_flag(ssk, SOCK_LINGER);
 277		} else {
 278			ssk->sk_lingertime = sk->sk_lingertime;
 279			sock_set_flag(ssk, SOCK_LINGER);
 280		}
 281
 282		subflow->setsockopt_seq = msk->setsockopt_seq;
 283		unlock_sock_fast(ssk, slow);
 284	}
 285
 286	release_sock(sk);
 287	return 0;
 288}
 289
 290static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
 291				       sockptr_t optval, unsigned int optlen)
 292{
 293	struct sock *sk = (struct sock *)msk;
 294	struct socket *ssock;
 295	int ret;
 296
 297	switch (optname) {
 298	case SO_REUSEPORT:
 299	case SO_REUSEADDR:
 300	case SO_BINDTODEVICE:
 301	case SO_BINDTOIFINDEX:
 302		lock_sock(sk);
 303		ssock = __mptcp_nmpc_socket(msk);
 304		if (!ssock) {
 305			release_sock(sk);
 306			return -EINVAL;
 307		}
 308
 309		ret = sock_setsockopt(ssock, SOL_SOCKET, optname, optval, optlen);
 310		if (ret == 0) {
 311			if (optname == SO_REUSEPORT)
 312				sk->sk_reuseport = ssock->sk->sk_reuseport;
 313			else if (optname == SO_REUSEADDR)
 314				sk->sk_reuse = ssock->sk->sk_reuse;
 315			else if (optname == SO_BINDTODEVICE)
 316				sk->sk_bound_dev_if = ssock->sk->sk_bound_dev_if;
 317			else if (optname == SO_BINDTOIFINDEX)
 318				sk->sk_bound_dev_if = ssock->sk->sk_bound_dev_if;
 319		}
 320		release_sock(sk);
 321		return ret;
 322	case SO_KEEPALIVE:
 323	case SO_PRIORITY:
 324	case SO_SNDBUF:
 325	case SO_SNDBUFFORCE:
 326	case SO_RCVBUF:
 327	case SO_RCVBUFFORCE:
 328	case SO_MARK:
 329	case SO_INCOMING_CPU:
 330	case SO_DEBUG:
 331	case SO_TIMESTAMP_OLD:
 332	case SO_TIMESTAMP_NEW:
 333	case SO_TIMESTAMPNS_OLD:
 334	case SO_TIMESTAMPNS_NEW:
 335		return mptcp_setsockopt_sol_socket_int(msk, optname, optval,
 336						       optlen);
 337	case SO_TIMESTAMPING_OLD:
 338	case SO_TIMESTAMPING_NEW:
 339		return mptcp_setsockopt_sol_socket_timestamping(msk, optname,
 340								optval, optlen);
 341	case SO_LINGER:
 342		return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen);
 343	case SO_RCVLOWAT:
 344	case SO_RCVTIMEO_OLD:
 345	case SO_RCVTIMEO_NEW:
 346	case SO_SNDTIMEO_OLD:
 347	case SO_SNDTIMEO_NEW:
 348	case SO_BUSY_POLL:
 349	case SO_PREFER_BUSY_POLL:
 350	case SO_BUSY_POLL_BUDGET:
 351		/* No need to copy: only relevant for msk */
 352		return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen);
 353	case SO_NO_CHECK:
 354	case SO_DONTROUTE:
 355	case SO_BROADCAST:
 356	case SO_BSDCOMPAT:
 357	case SO_PASSCRED:
 
 358	case SO_PASSSEC:
 359	case SO_RXQ_OVFL:
 360	case SO_WIFI_STATUS:
 361	case SO_NOFCS:
 362	case SO_SELECT_ERR_QUEUE:
 363		return 0;
 364	}
 365
 366	/* SO_OOBINLINE is not supported, let's avoid the related mess
 367	 * SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF,
 368	 * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER,
 369	 * we must be careful with subflows
 370	 *
 371	 * SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks
 372	 * explicitly the sk_protocol field
 373	 *
 374	 * SO_PEEK_OFF is unsupported, as it is for plain TCP
 375	 * SO_MAX_PACING_RATE is unsupported, we must be careful with subflows
 376	 * SO_CNX_ADVICE is currently unsupported, could possibly be relevant,
 377	 * but likely needs careful design
 378	 *
 379	 * SO_ZEROCOPY is currently unsupported, TODO in sndmsg
 380	 * SO_TXTIME is currently unsupported
 381	 */
 382
 383	return -EOPNOTSUPP;
 384}
 385
 386static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
 387			       sockptr_t optval, unsigned int optlen)
 388{
 389	struct sock *sk = (struct sock *)msk;
 390	int ret = -EOPNOTSUPP;
 391	struct socket *ssock;
 392
 393	switch (optname) {
 394	case IPV6_V6ONLY:
 395	case IPV6_TRANSPARENT:
 396	case IPV6_FREEBIND:
 397		lock_sock(sk);
 398		ssock = __mptcp_nmpc_socket(msk);
 399		if (!ssock) {
 400			release_sock(sk);
 401			return -EINVAL;
 402		}
 403
 404		ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen);
 405		if (ret != 0) {
 406			release_sock(sk);
 407			return ret;
 408		}
 409
 410		sockopt_seq_inc(msk);
 411
 412		switch (optname) {
 413		case IPV6_V6ONLY:
 414			sk->sk_ipv6only = ssock->sk->sk_ipv6only;
 415			break;
 416		case IPV6_TRANSPARENT:
 417			inet_sk(sk)->transparent = inet_sk(ssock->sk)->transparent;
 
 418			break;
 419		case IPV6_FREEBIND:
 420			inet_sk(sk)->freebind = inet_sk(ssock->sk)->freebind;
 
 421			break;
 422		}
 423
 424		release_sock(sk);
 425		break;
 426	}
 427
 428	return ret;
 429}
 430
 431static bool mptcp_supported_sockopt(int level, int optname)
 432{
 433	if (level == SOL_IP) {
 434		switch (optname) {
 435		/* should work fine */
 436		case IP_FREEBIND:
 437		case IP_TRANSPARENT:
 
 
 438
 439		/* the following are control cmsg related */
 440		case IP_PKTINFO:
 441		case IP_RECVTTL:
 442		case IP_RECVTOS:
 443		case IP_RECVOPTS:
 444		case IP_RETOPTS:
 445		case IP_PASSSEC:
 446		case IP_RECVORIGDSTADDR:
 447		case IP_CHECKSUM:
 448		case IP_RECVFRAGSIZE:
 449
 450		/* common stuff that need some love */
 451		case IP_TOS:
 452		case IP_TTL:
 453		case IP_BIND_ADDRESS_NO_PORT:
 454		case IP_MTU_DISCOVER:
 455		case IP_RECVERR:
 456
 457		/* possibly less common may deserve some love */
 458		case IP_MINTTL:
 459
 460		/* the following is apparently a no-op for plain TCP */
 461		case IP_RECVERR_RFC4884:
 462			return true;
 463		}
 464
 465		/* IP_OPTIONS is not supported, needs subflow care */
 466		/* IP_HDRINCL, IP_NODEFRAG are not supported, RAW specific */
 467		/* IP_MULTICAST_TTL, IP_MULTICAST_LOOP, IP_UNICAST_IF,
 468		 * IP_ADD_MEMBERSHIP, IP_ADD_SOURCE_MEMBERSHIP, IP_DROP_MEMBERSHIP,
 469		 * IP_DROP_SOURCE_MEMBERSHIP, IP_BLOCK_SOURCE, IP_UNBLOCK_SOURCE,
 470		 * MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP MCAST_JOIN_SOURCE_GROUP,
 471		 * MCAST_LEAVE_SOURCE_GROUP, MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE,
 472		 * MCAST_MSFILTER, IP_MULTICAST_ALL are not supported, better not deal
 473		 * with mcast stuff
 474		 */
 475		/* IP_IPSEC_POLICY, IP_XFRM_POLICY are nut supported, unrelated here */
 476		return false;
 477	}
 478	if (level == SOL_IPV6) {
 479		switch (optname) {
 480		case IPV6_V6ONLY:
 481
 482		/* the following are control cmsg related */
 483		case IPV6_RECVPKTINFO:
 484		case IPV6_2292PKTINFO:
 485		case IPV6_RECVHOPLIMIT:
 486		case IPV6_2292HOPLIMIT:
 487		case IPV6_RECVRTHDR:
 488		case IPV6_2292RTHDR:
 489		case IPV6_RECVHOPOPTS:
 490		case IPV6_2292HOPOPTS:
 491		case IPV6_RECVDSTOPTS:
 492		case IPV6_2292DSTOPTS:
 493		case IPV6_RECVTCLASS:
 494		case IPV6_FLOWINFO:
 495		case IPV6_RECVPATHMTU:
 496		case IPV6_RECVORIGDSTADDR:
 497		case IPV6_RECVFRAGSIZE:
 498
 499		/* the following ones need some love but are quite common */
 500		case IPV6_TCLASS:
 501		case IPV6_TRANSPARENT:
 502		case IPV6_FREEBIND:
 503		case IPV6_PKTINFO:
 504		case IPV6_2292PKTOPTIONS:
 505		case IPV6_UNICAST_HOPS:
 506		case IPV6_MTU_DISCOVER:
 507		case IPV6_MTU:
 508		case IPV6_RECVERR:
 509		case IPV6_FLOWINFO_SEND:
 510		case IPV6_FLOWLABEL_MGR:
 511		case IPV6_MINHOPCOUNT:
 512		case IPV6_DONTFRAG:
 513		case IPV6_AUTOFLOWLABEL:
 514
 515		/* the following one is a no-op for plain TCP */
 516		case IPV6_RECVERR_RFC4884:
 517			return true;
 518		}
 519
 520		/* IPV6_HOPOPTS, IPV6_RTHDRDSTOPTS, IPV6_RTHDR, IPV6_DSTOPTS are
 521		 * not supported
 522		 */
 523		/* IPV6_MULTICAST_HOPS, IPV6_MULTICAST_LOOP, IPV6_UNICAST_IF,
 524		 * IPV6_MULTICAST_IF, IPV6_ADDRFORM,
 525		 * IPV6_ADD_MEMBERSHIP, IPV6_DROP_MEMBERSHIP, IPV6_JOIN_ANYCAST,
 526		 * IPV6_LEAVE_ANYCAST, IPV6_MULTICAST_ALL, MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP,
 527		 * MCAST_JOIN_SOURCE_GROUP, MCAST_LEAVE_SOURCE_GROUP,
 528		 * MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, MCAST_MSFILTER
 529		 * are not supported better not deal with mcast
 530		 */
 531		/* IPV6_ROUTER_ALERT, IPV6_ROUTER_ALERT_ISOLATE are not supported, since are evil */
 532
 533		/* IPV6_IPSEC_POLICY, IPV6_XFRM_POLICY are not supported */
 534		/* IPV6_ADDR_PREFERENCES is not supported, we must be careful with subflows */
 535		return false;
 536	}
 537	if (level == SOL_TCP) {
 538		switch (optname) {
 539		/* the following are no-op or should work just fine */
 540		case TCP_THIN_DUPACK:
 541		case TCP_DEFER_ACCEPT:
 542
 543		/* the following need some love */
 544		case TCP_MAXSEG:
 545		case TCP_NODELAY:
 546		case TCP_THIN_LINEAR_TIMEOUTS:
 547		case TCP_CONGESTION:
 548		case TCP_CORK:
 549		case TCP_KEEPIDLE:
 550		case TCP_KEEPINTVL:
 551		case TCP_KEEPCNT:
 552		case TCP_SYNCNT:
 553		case TCP_SAVE_SYN:
 554		case TCP_LINGER2:
 555		case TCP_WINDOW_CLAMP:
 556		case TCP_QUICKACK:
 557		case TCP_USER_TIMEOUT:
 558		case TCP_TIMESTAMP:
 559		case TCP_NOTSENT_LOWAT:
 560		case TCP_TX_DELAY:
 561		case TCP_INQ:
 562		case TCP_FASTOPEN:
 563		case TCP_FASTOPEN_CONNECT:
 564		case TCP_FASTOPEN_KEY:
 565		case TCP_FASTOPEN_NO_COOKIE:
 566			return true;
 567		}
 568
 569		/* TCP_MD5SIG, TCP_MD5SIG_EXT are not supported, MD5 is not compatible with MPTCP */
 570
 571		/* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS,
 572		 * TCP_REPAIR_WINDOW are not supported, better avoid this mess
 573		 */
 574	}
 575	return false;
 576}
 577
 578static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t optval,
 579					       unsigned int optlen)
 580{
 581	struct mptcp_subflow_context *subflow;
 582	struct sock *sk = (struct sock *)msk;
 583	char name[TCP_CA_NAME_MAX];
 584	bool cap_net_admin;
 585	int ret;
 586
 587	if (optlen < 1)
 588		return -EINVAL;
 589
 590	ret = strncpy_from_sockptr(name, optval,
 591				   min_t(long, TCP_CA_NAME_MAX - 1, optlen));
 592	if (ret < 0)
 593		return -EFAULT;
 594
 595	name[ret] = 0;
 596
 597	cap_net_admin = ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN);
 598
 599	ret = 0;
 600	lock_sock(sk);
 601	sockopt_seq_inc(msk);
 602	mptcp_for_each_subflow(msk, subflow) {
 603		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 604		int err;
 605
 606		lock_sock(ssk);
 607		err = tcp_set_congestion_control(ssk, name, true, cap_net_admin);
 608		if (err < 0 && ret == 0)
 609			ret = err;
 610		subflow->setsockopt_seq = msk->setsockopt_seq;
 611		release_sock(ssk);
 612	}
 613
 614	if (ret == 0)
 615		strcpy(msk->ca_name, name);
 616
 617	release_sock(sk);
 618	return ret;
 619}
 620
 621static int mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, sockptr_t optval,
 622					 unsigned int optlen)
 
 623{
 624	struct mptcp_subflow_context *subflow;
 625	struct sock *sk = (struct sock *)msk;
 626	int val;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 627
 628	if (optlen < sizeof(int))
 629		return -EINVAL;
 630
 631	if (copy_from_sockptr(&val, optval, sizeof(val)))
 632		return -EFAULT;
 
 
 633
 634	lock_sock(sk);
 635	sockopt_seq_inc(msk);
 636	msk->cork = !!val;
 637	mptcp_for_each_subflow(msk, subflow) {
 638		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 639
 640		lock_sock(ssk);
 641		__tcp_sock_set_cork(ssk, !!val);
 642		release_sock(ssk);
 643	}
 644	if (!val)
 645		mptcp_check_and_set_pending(sk);
 646	release_sock(sk);
 647
 648	return 0;
 649}
 650
 651static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t optval,
 652					    unsigned int optlen)
 653{
 654	struct mptcp_subflow_context *subflow;
 655	struct sock *sk = (struct sock *)msk;
 656	int val;
 657
 658	if (optlen < sizeof(int))
 659		return -EINVAL;
 660
 661	if (copy_from_sockptr(&val, optval, sizeof(val)))
 662		return -EFAULT;
 663
 664	lock_sock(sk);
 665	sockopt_seq_inc(msk);
 666	msk->nodelay = !!val;
 667	mptcp_for_each_subflow(msk, subflow) {
 668		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 669
 670		lock_sock(ssk);
 671		__tcp_sock_set_nodelay(ssk, !!val);
 672		release_sock(ssk);
 673	}
 674	if (val)
 675		mptcp_check_and_set_pending(sk);
 676	release_sock(sk);
 677
 678	return 0;
 679}
 680
 681static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int optname,
 682						   sockptr_t optval, unsigned int optlen)
 683{
 684	struct sock *sk = (struct sock *)msk;
 685	struct inet_sock *issk;
 686	struct socket *ssock;
 687	int err;
 688
 689	err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen);
 690	if (err != 0)
 691		return err;
 692
 693	lock_sock(sk);
 694
 695	ssock = __mptcp_nmpc_socket(msk);
 696	if (!ssock) {
 697		release_sock(sk);
 698		return -EINVAL;
 699	}
 700
 701	issk = inet_sk(ssock->sk);
 702
 703	switch (optname) {
 704	case IP_FREEBIND:
 705		issk->freebind = inet_sk(sk)->freebind;
 706		break;
 707	case IP_TRANSPARENT:
 708		issk->transparent = inet_sk(sk)->transparent;
 
 
 
 
 
 
 
 
 
 709		break;
 710	default:
 711		release_sock(sk);
 712		WARN_ON_ONCE(1);
 713		return -EOPNOTSUPP;
 714	}
 715
 716	sockopt_seq_inc(msk);
 717	release_sock(sk);
 718	return 0;
 719}
 720
 721static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname,
 722				       sockptr_t optval, unsigned int optlen)
 723{
 724	struct mptcp_subflow_context *subflow;
 725	struct sock *sk = (struct sock *)msk;
 726	int err, val;
 727
 728	err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen);
 729
 730	if (err != 0)
 731		return err;
 732
 733	lock_sock(sk);
 734	sockopt_seq_inc(msk);
 735	val = inet_sk(sk)->tos;
 736	mptcp_for_each_subflow(msk, subflow) {
 737		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 
 738
 
 739		__ip_sock_set_tos(ssk, val);
 
 740	}
 741	release_sock(sk);
 742
 743	return 0;
 744}
 745
 746static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname,
 747			       sockptr_t optval, unsigned int optlen)
 748{
 749	switch (optname) {
 750	case IP_FREEBIND:
 751	case IP_TRANSPARENT:
 752		return mptcp_setsockopt_sol_ip_set_transparent(msk, optname, optval, optlen);
 
 
 753	case IP_TOS:
 754		return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen);
 755	}
 756
 757	return -EOPNOTSUPP;
 758}
 759
 760static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname,
 761					  sockptr_t optval, unsigned int optlen)
 762{
 763	struct sock *sk = (struct sock *)msk;
 764	struct socket *sock;
 765	int ret = -EINVAL;
 766
 767	/* Limit to first subflow, before the connection establishment */
 768	lock_sock(sk);
 769	sock = __mptcp_nmpc_socket(msk);
 770	if (!sock)
 
 771		goto unlock;
 
 772
 773	ret = tcp_setsockopt(sock->sk, level, optname, optval, optlen);
 774
 775unlock:
 776	release_sock(sk);
 777	return ret;
 778}
 779
 780static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
 781				    sockptr_t optval, unsigned int optlen)
 782{
 783	struct sock *sk = (void *)msk;
 784	int ret, val;
 785
 786	switch (optname) {
 787	case TCP_INQ:
 788		ret = mptcp_get_int_option(msk, optval, optlen, &val);
 789		if (ret)
 790			return ret;
 791		if (val < 0 || val > 1)
 792			return -EINVAL;
 793
 794		lock_sock(sk);
 795		msk->recvmsg_inq = !!val;
 796		release_sock(sk);
 797		return 0;
 798	case TCP_ULP:
 799		return -EOPNOTSUPP;
 800	case TCP_CONGESTION:
 801		return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen);
 802	case TCP_CORK:
 803		return mptcp_setsockopt_sol_tcp_cork(msk, optval, optlen);
 804	case TCP_NODELAY:
 805		return mptcp_setsockopt_sol_tcp_nodelay(msk, optval, optlen);
 806	case TCP_DEFER_ACCEPT:
 807		/* See tcp.c: TCP_DEFER_ACCEPT does not fail */
 808		mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen);
 809		return 0;
 810	case TCP_FASTOPEN:
 811	case TCP_FASTOPEN_CONNECT:
 812	case TCP_FASTOPEN_KEY:
 813	case TCP_FASTOPEN_NO_COOKIE:
 814		return mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname,
 815						      optval, optlen);
 816	}
 817
 818	return -EOPNOTSUPP;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 819}
 820
 821int mptcp_setsockopt(struct sock *sk, int level, int optname,
 822		     sockptr_t optval, unsigned int optlen)
 823{
 824	struct mptcp_sock *msk = mptcp_sk(sk);
 825	struct sock *ssk;
 826
 827	pr_debug("msk=%p", msk);
 828
 829	if (level == SOL_SOCKET)
 830		return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen);
 831
 832	if (!mptcp_supported_sockopt(level, optname))
 833		return -ENOPROTOOPT;
 834
 835	/* @@ the meaning of setsockopt() when the socket is connected and
 836	 * there are multiple subflows is not yet defined. It is up to the
 837	 * MPTCP-level socket to configure the subflows until the subflow
 838	 * is in TCP fallback, when TCP socket options are passed through
 839	 * to the one remaining subflow.
 840	 */
 841	lock_sock(sk);
 842	ssk = __mptcp_tcp_fallback(msk);
 843	release_sock(sk);
 844	if (ssk)
 845		return tcp_setsockopt(ssk, level, optname, optval, optlen);
 846
 847	if (level == SOL_IP)
 848		return mptcp_setsockopt_v4(msk, optname, optval, optlen);
 849
 850	if (level == SOL_IPV6)
 851		return mptcp_setsockopt_v6(msk, optname, optval, optlen);
 852
 853	if (level == SOL_TCP)
 854		return mptcp_setsockopt_sol_tcp(msk, optname, optval, optlen);
 855
 856	return -EOPNOTSUPP;
 857}
 858
 859static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname,
 860					  char __user *optval, int __user *optlen)
 861{
 862	struct sock *sk = (struct sock *)msk;
 863	struct socket *ssock;
 864	int ret = -EINVAL;
 865	struct sock *ssk;
 
 866
 867	lock_sock(sk);
 868	ssk = msk->first;
 869	if (ssk) {
 870		ret = tcp_getsockopt(ssk, level, optname, optval, optlen);
 871		goto out;
 872	}
 873
 874	ssock = __mptcp_nmpc_socket(msk);
 875	if (!ssock)
 
 876		goto out;
 
 877
 878	ret = tcp_getsockopt(ssock->sk, level, optname, optval, optlen);
 879
 880out:
 881	release_sock(sk);
 882	return ret;
 883}
 884
 885void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
 886{
 
 887	u32 flags = 0;
 888	u8 val;
 
 889
 890	memset(info, 0, sizeof(*info));
 891
 892	info->mptcpi_subflows = READ_ONCE(msk->pm.subflows);
 893	info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled);
 894	info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted);
 895	info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used);
 896	info->mptcpi_subflows_max = mptcp_pm_get_subflows_max(msk);
 897	val = mptcp_pm_get_add_addr_signal_max(msk);
 898	info->mptcpi_add_addr_signal_max = val;
 899	val = mptcp_pm_get_add_addr_accept_max(msk);
 900	info->mptcpi_add_addr_accepted_max = val;
 901	info->mptcpi_local_addr_max = mptcp_pm_get_local_addr_max(msk);
 902	if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags))
 
 
 
 
 
 
 
 
 
 
 903		flags |= MPTCP_INFO_FLAG_FALLBACK;
 904	if (READ_ONCE(msk->can_ack))
 905		flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED;
 906	info->mptcpi_flags = flags;
 907	info->mptcpi_token = READ_ONCE(msk->token);
 908	info->mptcpi_write_seq = READ_ONCE(msk->write_seq);
 909	info->mptcpi_snd_una = READ_ONCE(msk->snd_una);
 910	info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq);
 911	info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 912}
 913EXPORT_SYMBOL_GPL(mptcp_diag_fill_info);
 914
 915static int mptcp_getsockopt_info(struct mptcp_sock *msk, char __user *optval, int __user *optlen)
 916{
 917	struct mptcp_info m_info;
 918	int len;
 919
 920	if (get_user(len, optlen))
 921		return -EFAULT;
 922
 
 
 
 
 923	len = min_t(unsigned int, len, sizeof(struct mptcp_info));
 924
 925	mptcp_diag_fill_info(msk, &m_info);
 926
 927	if (put_user(len, optlen))
 928		return -EFAULT;
 929
 930	if (copy_to_user(optval, &m_info, len))
 931		return -EFAULT;
 932
 933	return 0;
 934}
 935
 936static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd,
 937				  char __user *optval,
 938				  u32 copied,
 939				  int __user *optlen)
 940{
 941	u32 copylen = min_t(u32, sfd->size_subflow_data, sizeof(*sfd));
 942
 943	if (copied)
 944		copied += sfd->size_subflow_data;
 945	else
 946		copied = copylen;
 947
 948	if (put_user(copied, optlen))
 949		return -EFAULT;
 950
 951	if (copy_to_user(optval, sfd, copylen))
 952		return -EFAULT;
 953
 954	return 0;
 955}
 956
 957static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd,
 958				  char __user *optval, int __user *optlen)
 
 959{
 960	int len, copylen;
 961
 962	if (get_user(len, optlen))
 963		return -EFAULT;
 964
 965	/* if mptcp_subflow_data size is changed, need to adjust
 966	 * this function to deal with programs using old version.
 967	 */
 968	BUILD_BUG_ON(sizeof(*sfd) != MIN_INFO_OPTLEN_SIZE);
 969
 970	if (len < MIN_INFO_OPTLEN_SIZE)
 971		return -EINVAL;
 972
 973	memset(sfd, 0, sizeof(*sfd));
 974
 975	copylen = min_t(unsigned int, len, sizeof(*sfd));
 976	if (copy_from_user(sfd, optval, copylen))
 977		return -EFAULT;
 978
 979	/* size_subflow_data is u32, but len is signed */
 980	if (sfd->size_subflow_data > INT_MAX ||
 981	    sfd->size_user > INT_MAX)
 982		return -EINVAL;
 983
 984	if (sfd->size_subflow_data < MIN_INFO_OPTLEN_SIZE ||
 985	    sfd->size_subflow_data > len)
 986		return -EINVAL;
 987
 988	if (sfd->num_subflows || sfd->size_kernel)
 989		return -EINVAL;
 990
 991	return len - sfd->size_subflow_data;
 992}
 993
 994static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval,
 995				    int __user *optlen)
 996{
 997	struct mptcp_subflow_context *subflow;
 998	struct sock *sk = (struct sock *)msk;
 999	unsigned int sfcount = 0, copied = 0;
1000	struct mptcp_subflow_data sfd;
1001	char __user *infoptr;
1002	int len;
1003
1004	len = mptcp_get_subflow_data(&sfd, optval, optlen);
1005	if (len < 0)
1006		return len;
1007
1008	sfd.size_kernel = sizeof(struct tcp_info);
1009	sfd.size_user = min_t(unsigned int, sfd.size_user,
1010			      sizeof(struct tcp_info));
1011
1012	infoptr = optval + sfd.size_subflow_data;
1013
1014	lock_sock(sk);
1015
1016	mptcp_for_each_subflow(msk, subflow) {
1017		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
1018
1019		++sfcount;
1020
1021		if (len && len >= sfd.size_user) {
1022			struct tcp_info info;
1023
1024			tcp_get_info(ssk, &info);
1025
1026			if (copy_to_user(infoptr, &info, sfd.size_user)) {
1027				release_sock(sk);
1028				return -EFAULT;
1029			}
1030
1031			infoptr += sfd.size_user;
1032			copied += sfd.size_user;
1033			len -= sfd.size_user;
1034		}
1035	}
1036
1037	release_sock(sk);
1038
1039	sfd.num_subflows = sfcount;
1040
1041	if (mptcp_put_subflow_data(&sfd, optval, copied, optlen))
1042		return -EFAULT;
1043
1044	return 0;
1045}
1046
1047static void mptcp_get_sub_addrs(const struct sock *sk, struct mptcp_subflow_addrs *a)
1048{
1049	struct inet_sock *inet = inet_sk(sk);
1050
1051	memset(a, 0, sizeof(*a));
1052
1053	if (sk->sk_family == AF_INET) {
1054		a->sin_local.sin_family = AF_INET;
1055		a->sin_local.sin_port = inet->inet_sport;
1056		a->sin_local.sin_addr.s_addr = inet->inet_rcv_saddr;
1057
1058		if (!a->sin_local.sin_addr.s_addr)
1059			a->sin_local.sin_addr.s_addr = inet->inet_saddr;
1060
1061		a->sin_remote.sin_family = AF_INET;
1062		a->sin_remote.sin_port = inet->inet_dport;
1063		a->sin_remote.sin_addr.s_addr = inet->inet_daddr;
1064#if IS_ENABLED(CONFIG_IPV6)
1065	} else if (sk->sk_family == AF_INET6) {
1066		const struct ipv6_pinfo *np = inet6_sk(sk);
1067
1068		if (WARN_ON_ONCE(!np))
1069			return;
1070
1071		a->sin6_local.sin6_family = AF_INET6;
1072		a->sin6_local.sin6_port = inet->inet_sport;
1073
1074		if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
1075			a->sin6_local.sin6_addr = np->saddr;
1076		else
1077			a->sin6_local.sin6_addr = sk->sk_v6_rcv_saddr;
1078
1079		a->sin6_remote.sin6_family = AF_INET6;
1080		a->sin6_remote.sin6_port = inet->inet_dport;
1081		a->sin6_remote.sin6_addr = sk->sk_v6_daddr;
1082#endif
1083	}
1084}
1085
1086static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *optval,
1087					  int __user *optlen)
1088{
1089	struct mptcp_subflow_context *subflow;
1090	struct sock *sk = (struct sock *)msk;
1091	unsigned int sfcount = 0, copied = 0;
1092	struct mptcp_subflow_data sfd;
1093	char __user *addrptr;
1094	int len;
1095
1096	len = mptcp_get_subflow_data(&sfd, optval, optlen);
1097	if (len < 0)
1098		return len;
1099
1100	sfd.size_kernel = sizeof(struct mptcp_subflow_addrs);
1101	sfd.size_user = min_t(unsigned int, sfd.size_user,
1102			      sizeof(struct mptcp_subflow_addrs));
1103
1104	addrptr = optval + sfd.size_subflow_data;
1105
1106	lock_sock(sk);
1107
1108	mptcp_for_each_subflow(msk, subflow) {
1109		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
1110
1111		++sfcount;
1112
1113		if (len && len >= sfd.size_user) {
1114			struct mptcp_subflow_addrs a;
1115
1116			mptcp_get_sub_addrs(ssk, &a);
1117
1118			if (copy_to_user(addrptr, &a, sfd.size_user)) {
1119				release_sock(sk);
1120				return -EFAULT;
1121			}
1122
1123			addrptr += sfd.size_user;
1124			copied += sfd.size_user;
1125			len -= sfd.size_user;
1126		}
1127	}
1128
1129	release_sock(sk);
1130
1131	sfd.num_subflows = sfcount;
1132
1133	if (mptcp_put_subflow_data(&sfd, optval, copied, optlen))
1134		return -EFAULT;
1135
1136	return 0;
1137}
1138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1139static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval,
1140				int __user *optlen, int val)
1141{
1142	int len;
1143
1144	if (get_user(len, optlen))
1145		return -EFAULT;
1146	if (len < 0)
1147		return -EINVAL;
1148
1149	if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) {
1150		unsigned char ucval = (unsigned char)val;
1151
1152		len = 1;
1153		if (put_user(len, optlen))
1154			return -EFAULT;
1155		if (copy_to_user(optval, &ucval, 1))
1156			return -EFAULT;
1157	} else {
1158		len = min_t(unsigned int, len, sizeof(int));
1159		if (put_user(len, optlen))
1160			return -EFAULT;
1161		if (copy_to_user(optval, &val, len))
1162			return -EFAULT;
1163	}
1164
1165	return 0;
1166}
1167
1168static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
1169				    char __user *optval, int __user *optlen)
1170{
 
 
1171	switch (optname) {
1172	case TCP_ULP:
1173	case TCP_CONGESTION:
1174	case TCP_INFO:
1175	case TCP_CC_INFO:
1176	case TCP_DEFER_ACCEPT:
1177	case TCP_FASTOPEN:
1178	case TCP_FASTOPEN_CONNECT:
1179	case TCP_FASTOPEN_KEY:
1180	case TCP_FASTOPEN_NO_COOKIE:
1181		return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname,
1182						      optval, optlen);
1183	case TCP_INQ:
1184		return mptcp_put_int_option(msk, optval, optlen, msk->recvmsg_inq);
1185	case TCP_CORK:
1186		return mptcp_put_int_option(msk, optval, optlen, msk->cork);
1187	case TCP_NODELAY:
1188		return mptcp_put_int_option(msk, optval, optlen, msk->nodelay);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1189	}
1190	return -EOPNOTSUPP;
1191}
1192
1193static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname,
1194			       char __user *optval, int __user *optlen)
1195{
1196	struct sock *sk = (void *)msk;
1197
1198	switch (optname) {
1199	case IP_TOS:
1200		return mptcp_put_int_option(msk, optval, optlen, inet_sk(sk)->tos);
 
 
 
 
 
 
1201	}
1202
1203	return -EOPNOTSUPP;
1204}
1205
1206static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname,
1207				      char __user *optval, int __user *optlen)
1208{
1209	switch (optname) {
1210	case MPTCP_INFO:
1211		return mptcp_getsockopt_info(msk, optval, optlen);
 
 
1212	case MPTCP_TCPINFO:
1213		return mptcp_getsockopt_tcpinfo(msk, optval, optlen);
1214	case MPTCP_SUBFLOW_ADDRS:
1215		return mptcp_getsockopt_subflow_addrs(msk, optval, optlen);
1216	}
1217
1218	return -EOPNOTSUPP;
1219}
1220
1221int mptcp_getsockopt(struct sock *sk, int level, int optname,
1222		     char __user *optval, int __user *option)
1223{
1224	struct mptcp_sock *msk = mptcp_sk(sk);
1225	struct sock *ssk;
1226
1227	pr_debug("msk=%p", msk);
1228
1229	/* @@ the meaning of setsockopt() when the socket is connected and
1230	 * there are multiple subflows is not yet defined. It is up to the
1231	 * MPTCP-level socket to configure the subflows until the subflow
1232	 * is in TCP fallback, when socket options are passed through
1233	 * to the one remaining subflow.
1234	 */
1235	lock_sock(sk);
1236	ssk = __mptcp_tcp_fallback(msk);
1237	release_sock(sk);
1238	if (ssk)
1239		return tcp_getsockopt(ssk, level, optname, optval, option);
1240
1241	if (level == SOL_IP)
1242		return mptcp_getsockopt_v4(msk, optname, optval, option);
1243	if (level == SOL_TCP)
1244		return mptcp_getsockopt_sol_tcp(msk, optname, optval, option);
1245	if (level == SOL_MPTCP)
1246		return mptcp_getsockopt_sol_mptcp(msk, optname, optval, option);
1247	return -EOPNOTSUPP;
1248}
1249
1250static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
1251{
1252	static const unsigned int tx_rx_locks = SOCK_RCVBUF_LOCK | SOCK_SNDBUF_LOCK;
1253	struct sock *sk = (struct sock *)msk;
1254
1255	if (ssk->sk_prot->keepalive) {
1256		if (sock_flag(sk, SOCK_KEEPOPEN))
1257			ssk->sk_prot->keepalive(ssk, 1);
1258		else
1259			ssk->sk_prot->keepalive(ssk, 0);
1260	}
1261
1262	ssk->sk_priority = sk->sk_priority;
1263	ssk->sk_bound_dev_if = sk->sk_bound_dev_if;
1264	ssk->sk_incoming_cpu = sk->sk_incoming_cpu;
 
1265	__ip_sock_set_tos(ssk, inet_sk(sk)->tos);
1266
1267	if (sk->sk_userlocks & tx_rx_locks) {
1268		ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks;
1269		if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
1270			WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf);
 
 
1271		if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
1272			WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf);
1273	}
1274
1275	if (sock_flag(sk, SOCK_LINGER)) {
1276		ssk->sk_lingertime = sk->sk_lingertime;
1277		sock_set_flag(ssk, SOCK_LINGER);
1278	} else {
1279		sock_reset_flag(ssk, SOCK_LINGER);
1280	}
1281
1282	if (sk->sk_mark != ssk->sk_mark) {
1283		ssk->sk_mark = sk->sk_mark;
1284		sk_dst_reset(ssk);
1285	}
1286
1287	sock_valbool_flag(ssk, SOCK_DBG, sock_flag(sk, SOCK_DBG));
1288
1289	if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops)
1290		tcp_set_congestion_control(ssk, msk->ca_name, false, true);
1291	__tcp_sock_set_cork(ssk, !!msk->cork);
1292	__tcp_sock_set_nodelay(ssk, !!msk->nodelay);
1293
1294	inet_sk(ssk)->transparent = inet_sk(sk)->transparent;
1295	inet_sk(ssk)->freebind = inet_sk(sk)->freebind;
 
 
 
 
 
1296}
1297
1298static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
1299{
1300	bool slow = lock_sock_fast(ssk);
1301
1302	sync_socket_options(msk, ssk);
1303
1304	unlock_sock_fast(ssk, slow);
1305}
1306
1307void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
1308{
1309	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
1310
1311	msk_owned_by_me(msk);
1312
1313	if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) {
1314		__mptcp_sockopt_sync(msk, ssk);
1315
1316		subflow->setsockopt_seq = msk->setsockopt_seq;
1317	}
1318}
1319
1320void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk)
 
 
 
1321{
1322	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1323
1324	msk_owned_by_me(msk);
 
 
1325
1326	if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) {
1327		sync_socket_options(msk, ssk);
 
 
 
1328
1329		subflow->setsockopt_seq = msk->setsockopt_seq;
 
 
 
1330	}
 
1331}