Linux Audio

Check our new training course

Loading...
Note: File does not exist in v5.4.
   1// SPDX-License-Identifier: GPL-2.0
   2/* Multipath TCP
   3 *
   4 * Copyright (c) 2021, Red Hat.
   5 */
   6
   7#define pr_fmt(fmt) "MPTCP: " fmt
   8
   9#include <linux/kernel.h>
  10#include <linux/module.h>
  11#include <net/sock.h>
  12#include <net/protocol.h>
  13#include <net/tcp.h>
  14#include <net/mptcp.h>
  15#include "protocol.h"
  16
  17#define MIN_INFO_OPTLEN_SIZE		16
  18#define MIN_FULL_INFO_OPTLEN_SIZE	40
  19
  20static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk)
  21{
  22	msk_owned_by_me(msk);
  23
  24	if (likely(!__mptcp_check_fallback(msk)))
  25		return NULL;
  26
  27	return msk->first;
  28}
  29
  30static u32 sockopt_seq_reset(const struct sock *sk)
  31{
  32	sock_owned_by_me(sk);
  33
  34	/* Highbits contain state.  Allows to distinguish sockopt_seq
  35	 * of listener and established:
  36	 * s0 = new_listener()
  37	 * sockopt(s0) - seq is 1
  38	 * s1 = accept(s0) - s1 inherits seq 1 if listener sk (s0)
  39	 * sockopt(s0) - seq increments to 2 on s0
  40	 * sockopt(s1) // seq increments to 2 on s1 (different option)
  41	 * new ssk completes join, inherits options from s0 // seq 2
  42	 * Needs sync from mptcp join logic, but ssk->seq == msk->seq
  43	 *
  44	 * Set High order bits to sk_state so ssk->seq == msk->seq test
  45	 * will fail.
  46	 */
  47
  48	return (u32)sk->sk_state << 24u;
  49}
  50
  51static void sockopt_seq_inc(struct mptcp_sock *msk)
  52{
  53	u32 seq = (msk->setsockopt_seq + 1) & 0x00ffffff;
  54
  55	msk->setsockopt_seq = sockopt_seq_reset((struct sock *)msk) + seq;
  56}
  57
  58static int mptcp_get_int_option(struct mptcp_sock *msk, sockptr_t optval,
  59				unsigned int optlen, int *val)
  60{
  61	if (optlen < sizeof(int))
  62		return -EINVAL;
  63
  64	if (copy_from_sockptr(val, optval, sizeof(*val)))
  65		return -EFAULT;
  66
  67	return 0;
  68}
  69
  70static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, int val)
  71{
  72	struct mptcp_subflow_context *subflow;
  73	struct sock *sk = (struct sock *)msk;
  74
  75	lock_sock(sk);
  76	sockopt_seq_inc(msk);
  77
  78	mptcp_for_each_subflow(msk, subflow) {
  79		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
  80		bool slow = lock_sock_fast(ssk);
  81
  82		switch (optname) {
  83		case SO_DEBUG:
  84			sock_valbool_flag(ssk, SOCK_DBG, !!val);
  85			break;
  86		case SO_KEEPALIVE:
  87			if (ssk->sk_prot->keepalive)
  88				ssk->sk_prot->keepalive(ssk, !!val);
  89			sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val);
  90			break;
  91		case SO_PRIORITY:
  92			WRITE_ONCE(ssk->sk_priority, val);
  93			break;
  94		case SO_SNDBUF:
  95		case SO_SNDBUFFORCE:
  96			ssk->sk_userlocks |= SOCK_SNDBUF_LOCK;
  97			WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf);
  98			mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf;
  99			break;
 100		case SO_RCVBUF:
 101		case SO_RCVBUFFORCE:
 102			ssk->sk_userlocks |= SOCK_RCVBUF_LOCK;
 103			WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf);
 104			break;
 105		case SO_MARK:
 106			if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) {
 107				WRITE_ONCE(ssk->sk_mark, sk->sk_mark);
 108				sk_dst_reset(ssk);
 109			}
 110			break;
 111		case SO_INCOMING_CPU:
 112			WRITE_ONCE(ssk->sk_incoming_cpu, val);
 113			break;
 114		}
 115
 116		subflow->setsockopt_seq = msk->setsockopt_seq;
 117		unlock_sock_fast(ssk, slow);
 118	}
 119
 120	release_sock(sk);
 121}
 122
 123static int mptcp_sol_socket_intval(struct mptcp_sock *msk, int optname, int val)
 124{
 125	sockptr_t optval = KERNEL_SOCKPTR(&val);
 126	struct sock *sk = (struct sock *)msk;
 127	int ret;
 128
 129	ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname,
 130			      optval, sizeof(val));
 131	if (ret)
 132		return ret;
 133
 134	mptcp_sol_socket_sync_intval(msk, optname, val);
 135	return 0;
 136}
 137
 138static void mptcp_so_incoming_cpu(struct mptcp_sock *msk, int val)
 139{
 140	struct sock *sk = (struct sock *)msk;
 141
 142	WRITE_ONCE(sk->sk_incoming_cpu, val);
 143
 144	mptcp_sol_socket_sync_intval(msk, SO_INCOMING_CPU, val);
 145}
 146
 147static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optname, int val)
 148{
 149	sockptr_t optval = KERNEL_SOCKPTR(&val);
 150	struct mptcp_subflow_context *subflow;
 151	struct sock *sk = (struct sock *)msk;
 152	int ret;
 153
 154	ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname,
 155			      optval, sizeof(val));
 156	if (ret)
 157		return ret;
 158
 159	lock_sock(sk);
 160	mptcp_for_each_subflow(msk, subflow) {
 161		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 162		bool slow = lock_sock_fast(ssk);
 163
 164		sock_set_timestamp(sk, optname, !!val);
 165		unlock_sock_fast(ssk, slow);
 166	}
 167
 168	release_sock(sk);
 169	return 0;
 170}
 171
 172static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname,
 173					   sockptr_t optval,
 174					   unsigned int optlen)
 175{
 176	int val, ret;
 177
 178	ret = mptcp_get_int_option(msk, optval, optlen, &val);
 179	if (ret)
 180		return ret;
 181
 182	switch (optname) {
 183	case SO_KEEPALIVE:
 184		mptcp_sol_socket_sync_intval(msk, optname, val);
 185		return 0;
 186	case SO_DEBUG:
 187	case SO_MARK:
 188	case SO_PRIORITY:
 189	case SO_SNDBUF:
 190	case SO_SNDBUFFORCE:
 191	case SO_RCVBUF:
 192	case SO_RCVBUFFORCE:
 193		return mptcp_sol_socket_intval(msk, optname, val);
 194	case SO_INCOMING_CPU:
 195		mptcp_so_incoming_cpu(msk, val);
 196		return 0;
 197	case SO_TIMESTAMP_OLD:
 198	case SO_TIMESTAMP_NEW:
 199	case SO_TIMESTAMPNS_OLD:
 200	case SO_TIMESTAMPNS_NEW:
 201		return mptcp_setsockopt_sol_socket_tstamp(msk, optname, val);
 202	}
 203
 204	return -ENOPROTOOPT;
 205}
 206
 207static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk,
 208						    int optname,
 209						    sockptr_t optval,
 210						    unsigned int optlen)
 211{
 212	struct mptcp_subflow_context *subflow;
 213	struct sock *sk = (struct sock *)msk;
 214	struct so_timestamping timestamping;
 215	int ret;
 216
 217	if (optlen == sizeof(timestamping)) {
 218		if (copy_from_sockptr(&timestamping, optval,
 219				      sizeof(timestamping)))
 220			return -EFAULT;
 221	} else if (optlen == sizeof(int)) {
 222		memset(&timestamping, 0, sizeof(timestamping));
 223
 224		if (copy_from_sockptr(&timestamping.flags, optval, sizeof(int)))
 225			return -EFAULT;
 226	} else {
 227		return -EINVAL;
 228	}
 229
 230	ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname,
 231			      KERNEL_SOCKPTR(&timestamping),
 232			      sizeof(timestamping));
 233	if (ret)
 234		return ret;
 235
 236	lock_sock(sk);
 237
 238	mptcp_for_each_subflow(msk, subflow) {
 239		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 240		bool slow = lock_sock_fast(ssk);
 241
 242		sock_set_timestamping(sk, optname, timestamping);
 243		unlock_sock_fast(ssk, slow);
 244	}
 245
 246	release_sock(sk);
 247
 248	return 0;
 249}
 250
 251static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval,
 252					      unsigned int optlen)
 253{
 254	struct mptcp_subflow_context *subflow;
 255	struct sock *sk = (struct sock *)msk;
 256	struct linger ling;
 257	sockptr_t kopt;
 258	int ret;
 259
 260	if (optlen < sizeof(ling))
 261		return -EINVAL;
 262
 263	if (copy_from_sockptr(&ling, optval, sizeof(ling)))
 264		return -EFAULT;
 265
 266	kopt = KERNEL_SOCKPTR(&ling);
 267	ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, SO_LINGER, kopt, sizeof(ling));
 268	if (ret)
 269		return ret;
 270
 271	lock_sock(sk);
 272	sockopt_seq_inc(msk);
 273	mptcp_for_each_subflow(msk, subflow) {
 274		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 275		bool slow = lock_sock_fast(ssk);
 276
 277		if (!ling.l_onoff) {
 278			sock_reset_flag(ssk, SOCK_LINGER);
 279		} else {
 280			ssk->sk_lingertime = sk->sk_lingertime;
 281			sock_set_flag(ssk, SOCK_LINGER);
 282		}
 283
 284		subflow->setsockopt_seq = msk->setsockopt_seq;
 285		unlock_sock_fast(ssk, slow);
 286	}
 287
 288	release_sock(sk);
 289	return 0;
 290}
 291
 292static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
 293				       sockptr_t optval, unsigned int optlen)
 294{
 295	struct sock *sk = (struct sock *)msk;
 296	struct sock *ssk;
 297	int ret;
 298
 299	switch (optname) {
 300	case SO_REUSEPORT:
 301	case SO_REUSEADDR:
 302	case SO_BINDTODEVICE:
 303	case SO_BINDTOIFINDEX:
 304		lock_sock(sk);
 305		ssk = __mptcp_nmpc_sk(msk);
 306		if (IS_ERR(ssk)) {
 307			release_sock(sk);
 308			return PTR_ERR(ssk);
 309		}
 310
 311		ret = sk_setsockopt(ssk, SOL_SOCKET, optname, optval, optlen);
 312		if (ret == 0) {
 313			if (optname == SO_REUSEPORT)
 314				sk->sk_reuseport = ssk->sk_reuseport;
 315			else if (optname == SO_REUSEADDR)
 316				sk->sk_reuse = ssk->sk_reuse;
 317			else if (optname == SO_BINDTODEVICE)
 318				sk->sk_bound_dev_if = ssk->sk_bound_dev_if;
 319			else if (optname == SO_BINDTOIFINDEX)
 320				sk->sk_bound_dev_if = ssk->sk_bound_dev_if;
 321		}
 322		release_sock(sk);
 323		return ret;
 324	case SO_KEEPALIVE:
 325	case SO_PRIORITY:
 326	case SO_SNDBUF:
 327	case SO_SNDBUFFORCE:
 328	case SO_RCVBUF:
 329	case SO_RCVBUFFORCE:
 330	case SO_MARK:
 331	case SO_INCOMING_CPU:
 332	case SO_DEBUG:
 333	case SO_TIMESTAMP_OLD:
 334	case SO_TIMESTAMP_NEW:
 335	case SO_TIMESTAMPNS_OLD:
 336	case SO_TIMESTAMPNS_NEW:
 337		return mptcp_setsockopt_sol_socket_int(msk, optname, optval,
 338						       optlen);
 339	case SO_TIMESTAMPING_OLD:
 340	case SO_TIMESTAMPING_NEW:
 341		return mptcp_setsockopt_sol_socket_timestamping(msk, optname,
 342								optval, optlen);
 343	case SO_LINGER:
 344		return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen);
 345	case SO_RCVLOWAT:
 346	case SO_RCVTIMEO_OLD:
 347	case SO_RCVTIMEO_NEW:
 348	case SO_SNDTIMEO_OLD:
 349	case SO_SNDTIMEO_NEW:
 350	case SO_BUSY_POLL:
 351	case SO_PREFER_BUSY_POLL:
 352	case SO_BUSY_POLL_BUDGET:
 353		/* No need to copy: only relevant for msk */
 354		return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen);
 355	case SO_NO_CHECK:
 356	case SO_DONTROUTE:
 357	case SO_BROADCAST:
 358	case SO_BSDCOMPAT:
 359	case SO_PASSCRED:
 360	case SO_PASSPIDFD:
 361	case SO_PASSSEC:
 362	case SO_RXQ_OVFL:
 363	case SO_WIFI_STATUS:
 364	case SO_NOFCS:
 365	case SO_SELECT_ERR_QUEUE:
 366		return 0;
 367	}
 368
 369	/* SO_OOBINLINE is not supported, let's avoid the related mess
 370	 * SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF,
 371	 * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER,
 372	 * we must be careful with subflows
 373	 *
 374	 * SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks
 375	 * explicitly the sk_protocol field
 376	 *
 377	 * SO_PEEK_OFF is unsupported, as it is for plain TCP
 378	 * SO_MAX_PACING_RATE is unsupported, we must be careful with subflows
 379	 * SO_CNX_ADVICE is currently unsupported, could possibly be relevant,
 380	 * but likely needs careful design
 381	 *
 382	 * SO_ZEROCOPY is currently unsupported, TODO in sndmsg
 383	 * SO_TXTIME is currently unsupported
 384	 */
 385
 386	return -EOPNOTSUPP;
 387}
 388
 389static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
 390			       sockptr_t optval, unsigned int optlen)
 391{
 392	struct sock *sk = (struct sock *)msk;
 393	int ret = -EOPNOTSUPP;
 394	struct sock *ssk;
 395
 396	switch (optname) {
 397	case IPV6_V6ONLY:
 398	case IPV6_TRANSPARENT:
 399	case IPV6_FREEBIND:
 400		lock_sock(sk);
 401		ssk = __mptcp_nmpc_sk(msk);
 402		if (IS_ERR(ssk)) {
 403			release_sock(sk);
 404			return PTR_ERR(ssk);
 405		}
 406
 407		ret = tcp_setsockopt(ssk, SOL_IPV6, optname, optval, optlen);
 408		if (ret != 0) {
 409			release_sock(sk);
 410			return ret;
 411		}
 412
 413		sockopt_seq_inc(msk);
 414
 415		switch (optname) {
 416		case IPV6_V6ONLY:
 417			sk->sk_ipv6only = ssk->sk_ipv6only;
 418			break;
 419		case IPV6_TRANSPARENT:
 420			inet_assign_bit(TRANSPARENT, sk,
 421					inet_test_bit(TRANSPARENT, ssk));
 422			break;
 423		case IPV6_FREEBIND:
 424			inet_assign_bit(FREEBIND, sk,
 425					inet_test_bit(FREEBIND, ssk));
 426			break;
 427		}
 428
 429		release_sock(sk);
 430		break;
 431	}
 432
 433	return ret;
 434}
 435
 436static bool mptcp_supported_sockopt(int level, int optname)
 437{
 438	if (level == SOL_IP) {
 439		switch (optname) {
 440		/* should work fine */
 441		case IP_FREEBIND:
 442		case IP_TRANSPARENT:
 443		case IP_BIND_ADDRESS_NO_PORT:
 444		case IP_LOCAL_PORT_RANGE:
 445
 446		/* the following are control cmsg related */
 447		case IP_PKTINFO:
 448		case IP_RECVTTL:
 449		case IP_RECVTOS:
 450		case IP_RECVOPTS:
 451		case IP_RETOPTS:
 452		case IP_PASSSEC:
 453		case IP_RECVORIGDSTADDR:
 454		case IP_CHECKSUM:
 455		case IP_RECVFRAGSIZE:
 456
 457		/* common stuff that need some love */
 458		case IP_TOS:
 459		case IP_TTL:
 460		case IP_MTU_DISCOVER:
 461		case IP_RECVERR:
 462
 463		/* possibly less common may deserve some love */
 464		case IP_MINTTL:
 465
 466		/* the following is apparently a no-op for plain TCP */
 467		case IP_RECVERR_RFC4884:
 468			return true;
 469		}
 470
 471		/* IP_OPTIONS is not supported, needs subflow care */
 472		/* IP_HDRINCL, IP_NODEFRAG are not supported, RAW specific */
 473		/* IP_MULTICAST_TTL, IP_MULTICAST_LOOP, IP_UNICAST_IF,
 474		 * IP_ADD_MEMBERSHIP, IP_ADD_SOURCE_MEMBERSHIP, IP_DROP_MEMBERSHIP,
 475		 * IP_DROP_SOURCE_MEMBERSHIP, IP_BLOCK_SOURCE, IP_UNBLOCK_SOURCE,
 476		 * MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP MCAST_JOIN_SOURCE_GROUP,
 477		 * MCAST_LEAVE_SOURCE_GROUP, MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE,
 478		 * MCAST_MSFILTER, IP_MULTICAST_ALL are not supported, better not deal
 479		 * with mcast stuff
 480		 */
 481		/* IP_IPSEC_POLICY, IP_XFRM_POLICY are nut supported, unrelated here */
 482		return false;
 483	}
 484	if (level == SOL_IPV6) {
 485		switch (optname) {
 486		case IPV6_V6ONLY:
 487
 488		/* the following are control cmsg related */
 489		case IPV6_RECVPKTINFO:
 490		case IPV6_2292PKTINFO:
 491		case IPV6_RECVHOPLIMIT:
 492		case IPV6_2292HOPLIMIT:
 493		case IPV6_RECVRTHDR:
 494		case IPV6_2292RTHDR:
 495		case IPV6_RECVHOPOPTS:
 496		case IPV6_2292HOPOPTS:
 497		case IPV6_RECVDSTOPTS:
 498		case IPV6_2292DSTOPTS:
 499		case IPV6_RECVTCLASS:
 500		case IPV6_FLOWINFO:
 501		case IPV6_RECVPATHMTU:
 502		case IPV6_RECVORIGDSTADDR:
 503		case IPV6_RECVFRAGSIZE:
 504
 505		/* the following ones need some love but are quite common */
 506		case IPV6_TCLASS:
 507		case IPV6_TRANSPARENT:
 508		case IPV6_FREEBIND:
 509		case IPV6_PKTINFO:
 510		case IPV6_2292PKTOPTIONS:
 511		case IPV6_UNICAST_HOPS:
 512		case IPV6_MTU_DISCOVER:
 513		case IPV6_MTU:
 514		case IPV6_RECVERR:
 515		case IPV6_FLOWINFO_SEND:
 516		case IPV6_FLOWLABEL_MGR:
 517		case IPV6_MINHOPCOUNT:
 518		case IPV6_DONTFRAG:
 519		case IPV6_AUTOFLOWLABEL:
 520
 521		/* the following one is a no-op for plain TCP */
 522		case IPV6_RECVERR_RFC4884:
 523			return true;
 524		}
 525
 526		/* IPV6_HOPOPTS, IPV6_RTHDRDSTOPTS, IPV6_RTHDR, IPV6_DSTOPTS are
 527		 * not supported
 528		 */
 529		/* IPV6_MULTICAST_HOPS, IPV6_MULTICAST_LOOP, IPV6_UNICAST_IF,
 530		 * IPV6_MULTICAST_IF, IPV6_ADDRFORM,
 531		 * IPV6_ADD_MEMBERSHIP, IPV6_DROP_MEMBERSHIP, IPV6_JOIN_ANYCAST,
 532		 * IPV6_LEAVE_ANYCAST, IPV6_MULTICAST_ALL, MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP,
 533		 * MCAST_JOIN_SOURCE_GROUP, MCAST_LEAVE_SOURCE_GROUP,
 534		 * MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, MCAST_MSFILTER
 535		 * are not supported better not deal with mcast
 536		 */
 537		/* IPV6_ROUTER_ALERT, IPV6_ROUTER_ALERT_ISOLATE are not supported, since are evil */
 538
 539		/* IPV6_IPSEC_POLICY, IPV6_XFRM_POLICY are not supported */
 540		/* IPV6_ADDR_PREFERENCES is not supported, we must be careful with subflows */
 541		return false;
 542	}
 543	if (level == SOL_TCP) {
 544		switch (optname) {
 545		/* the following are no-op or should work just fine */
 546		case TCP_THIN_DUPACK:
 547		case TCP_DEFER_ACCEPT:
 548
 549		/* the following need some love */
 550		case TCP_MAXSEG:
 551		case TCP_NODELAY:
 552		case TCP_THIN_LINEAR_TIMEOUTS:
 553		case TCP_CONGESTION:
 554		case TCP_CORK:
 555		case TCP_KEEPIDLE:
 556		case TCP_KEEPINTVL:
 557		case TCP_KEEPCNT:
 558		case TCP_SYNCNT:
 559		case TCP_SAVE_SYN:
 560		case TCP_LINGER2:
 561		case TCP_WINDOW_CLAMP:
 562		case TCP_QUICKACK:
 563		case TCP_USER_TIMEOUT:
 564		case TCP_TIMESTAMP:
 565		case TCP_NOTSENT_LOWAT:
 566		case TCP_TX_DELAY:
 567		case TCP_INQ:
 568		case TCP_FASTOPEN:
 569		case TCP_FASTOPEN_CONNECT:
 570		case TCP_FASTOPEN_KEY:
 571		case TCP_FASTOPEN_NO_COOKIE:
 572			return true;
 573		}
 574
 575		/* TCP_MD5SIG, TCP_MD5SIG_EXT are not supported, MD5 is not compatible with MPTCP */
 576
 577		/* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS,
 578		 * TCP_REPAIR_WINDOW are not supported, better avoid this mess
 579		 */
 580	}
 581	return false;
 582}
 583
 584static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t optval,
 585					       unsigned int optlen)
 586{
 587	struct mptcp_subflow_context *subflow;
 588	struct sock *sk = (struct sock *)msk;
 589	char name[TCP_CA_NAME_MAX];
 590	bool cap_net_admin;
 591	int ret;
 592
 593	if (optlen < 1)
 594		return -EINVAL;
 595
 596	ret = strncpy_from_sockptr(name, optval,
 597				   min_t(long, TCP_CA_NAME_MAX - 1, optlen));
 598	if (ret < 0)
 599		return -EFAULT;
 600
 601	name[ret] = 0;
 602
 603	cap_net_admin = ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN);
 604
 605	ret = 0;
 606	lock_sock(sk);
 607	sockopt_seq_inc(msk);
 608	mptcp_for_each_subflow(msk, subflow) {
 609		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 610		int err;
 611
 612		lock_sock(ssk);
 613		err = tcp_set_congestion_control(ssk, name, true, cap_net_admin);
 614		if (err < 0 && ret == 0)
 615			ret = err;
 616		subflow->setsockopt_seq = msk->setsockopt_seq;
 617		release_sock(ssk);
 618	}
 619
 620	if (ret == 0)
 621		strcpy(msk->ca_name, name);
 622
 623	release_sock(sk);
 624	return ret;
 625}
 626
 627static int mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, sockptr_t optval,
 628					 unsigned int optlen)
 629{
 630	struct mptcp_subflow_context *subflow;
 631	struct sock *sk = (struct sock *)msk;
 632	int val;
 633
 634	if (optlen < sizeof(int))
 635		return -EINVAL;
 636
 637	if (copy_from_sockptr(&val, optval, sizeof(val)))
 638		return -EFAULT;
 639
 640	lock_sock(sk);
 641	sockopt_seq_inc(msk);
 642	msk->cork = !!val;
 643	mptcp_for_each_subflow(msk, subflow) {
 644		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 645
 646		lock_sock(ssk);
 647		__tcp_sock_set_cork(ssk, !!val);
 648		release_sock(ssk);
 649	}
 650	if (!val)
 651		mptcp_check_and_set_pending(sk);
 652	release_sock(sk);
 653
 654	return 0;
 655}
 656
 657static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t optval,
 658					    unsigned int optlen)
 659{
 660	struct mptcp_subflow_context *subflow;
 661	struct sock *sk = (struct sock *)msk;
 662	int val;
 663
 664	if (optlen < sizeof(int))
 665		return -EINVAL;
 666
 667	if (copy_from_sockptr(&val, optval, sizeof(val)))
 668		return -EFAULT;
 669
 670	lock_sock(sk);
 671	sockopt_seq_inc(msk);
 672	msk->nodelay = !!val;
 673	mptcp_for_each_subflow(msk, subflow) {
 674		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 675
 676		lock_sock(ssk);
 677		__tcp_sock_set_nodelay(ssk, !!val);
 678		release_sock(ssk);
 679	}
 680	if (val)
 681		mptcp_check_and_set_pending(sk);
 682	release_sock(sk);
 683
 684	return 0;
 685}
 686
 687static int mptcp_setsockopt_sol_ip_set(struct mptcp_sock *msk, int optname,
 688				       sockptr_t optval, unsigned int optlen)
 689{
 690	struct sock *sk = (struct sock *)msk;
 691	struct sock *ssk;
 692	int err;
 693
 694	err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen);
 695	if (err != 0)
 696		return err;
 697
 698	lock_sock(sk);
 699
 700	ssk = __mptcp_nmpc_sk(msk);
 701	if (IS_ERR(ssk)) {
 702		release_sock(sk);
 703		return PTR_ERR(ssk);
 704	}
 705
 706	switch (optname) {
 707	case IP_FREEBIND:
 708		inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk));
 709		break;
 710	case IP_TRANSPARENT:
 711		inet_assign_bit(TRANSPARENT, ssk,
 712				inet_test_bit(TRANSPARENT, sk));
 713		break;
 714	case IP_BIND_ADDRESS_NO_PORT:
 715		inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk,
 716				inet_test_bit(BIND_ADDRESS_NO_PORT, sk));
 717		break;
 718	case IP_LOCAL_PORT_RANGE:
 719		WRITE_ONCE(inet_sk(ssk)->local_port_range,
 720			   READ_ONCE(inet_sk(sk)->local_port_range));
 721		break;
 722	default:
 723		release_sock(sk);
 724		WARN_ON_ONCE(1);
 725		return -EOPNOTSUPP;
 726	}
 727
 728	sockopt_seq_inc(msk);
 729	release_sock(sk);
 730	return 0;
 731}
 732
 733static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname,
 734				       sockptr_t optval, unsigned int optlen)
 735{
 736	struct mptcp_subflow_context *subflow;
 737	struct sock *sk = (struct sock *)msk;
 738	int err, val;
 739
 740	err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen);
 741
 742	if (err != 0)
 743		return err;
 744
 745	lock_sock(sk);
 746	sockopt_seq_inc(msk);
 747	val = READ_ONCE(inet_sk(sk)->tos);
 748	mptcp_for_each_subflow(msk, subflow) {
 749		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 750		bool slow;
 751
 752		slow = lock_sock_fast(ssk);
 753		__ip_sock_set_tos(ssk, val);
 754		unlock_sock_fast(ssk, slow);
 755	}
 756	release_sock(sk);
 757
 758	return 0;
 759}
 760
 761static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname,
 762			       sockptr_t optval, unsigned int optlen)
 763{
 764	switch (optname) {
 765	case IP_FREEBIND:
 766	case IP_TRANSPARENT:
 767	case IP_BIND_ADDRESS_NO_PORT:
 768	case IP_LOCAL_PORT_RANGE:
 769		return mptcp_setsockopt_sol_ip_set(msk, optname, optval, optlen);
 770	case IP_TOS:
 771		return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen);
 772	}
 773
 774	return -EOPNOTSUPP;
 775}
 776
 777static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname,
 778					  sockptr_t optval, unsigned int optlen)
 779{
 780	struct sock *sk = (struct sock *)msk;
 781	struct sock *ssk;
 782	int ret;
 783
 784	/* Limit to first subflow, before the connection establishment */
 785	lock_sock(sk);
 786	ssk = __mptcp_nmpc_sk(msk);
 787	if (IS_ERR(ssk)) {
 788		ret = PTR_ERR(ssk);
 789		goto unlock;
 790	}
 791
 792	ret = tcp_setsockopt(ssk, level, optname, optval, optlen);
 793
 794unlock:
 795	release_sock(sk);
 796	return ret;
 797}
 798
 799static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
 800				    sockptr_t optval, unsigned int optlen)
 801{
 802	struct sock *sk = (void *)msk;
 803	int ret, val;
 804
 805	switch (optname) {
 806	case TCP_INQ:
 807		ret = mptcp_get_int_option(msk, optval, optlen, &val);
 808		if (ret)
 809			return ret;
 810		if (val < 0 || val > 1)
 811			return -EINVAL;
 812
 813		lock_sock(sk);
 814		msk->recvmsg_inq = !!val;
 815		release_sock(sk);
 816		return 0;
 817	case TCP_ULP:
 818		return -EOPNOTSUPP;
 819	case TCP_CONGESTION:
 820		return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen);
 821	case TCP_CORK:
 822		return mptcp_setsockopt_sol_tcp_cork(msk, optval, optlen);
 823	case TCP_NODELAY:
 824		return mptcp_setsockopt_sol_tcp_nodelay(msk, optval, optlen);
 825	case TCP_DEFER_ACCEPT:
 826		/* See tcp.c: TCP_DEFER_ACCEPT does not fail */
 827		mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen);
 828		return 0;
 829	case TCP_FASTOPEN:
 830	case TCP_FASTOPEN_CONNECT:
 831	case TCP_FASTOPEN_KEY:
 832	case TCP_FASTOPEN_NO_COOKIE:
 833		return mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname,
 834						      optval, optlen);
 835	}
 836
 837	return -EOPNOTSUPP;
 838}
 839
 840int mptcp_setsockopt(struct sock *sk, int level, int optname,
 841		     sockptr_t optval, unsigned int optlen)
 842{
 843	struct mptcp_sock *msk = mptcp_sk(sk);
 844	struct sock *ssk;
 845
 846	pr_debug("msk=%p", msk);
 847
 848	if (level == SOL_SOCKET)
 849		return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen);
 850
 851	if (!mptcp_supported_sockopt(level, optname))
 852		return -ENOPROTOOPT;
 853
 854	/* @@ the meaning of setsockopt() when the socket is connected and
 855	 * there are multiple subflows is not yet defined. It is up to the
 856	 * MPTCP-level socket to configure the subflows until the subflow
 857	 * is in TCP fallback, when TCP socket options are passed through
 858	 * to the one remaining subflow.
 859	 */
 860	lock_sock(sk);
 861	ssk = __mptcp_tcp_fallback(msk);
 862	release_sock(sk);
 863	if (ssk)
 864		return tcp_setsockopt(ssk, level, optname, optval, optlen);
 865
 866	if (level == SOL_IP)
 867		return mptcp_setsockopt_v4(msk, optname, optval, optlen);
 868
 869	if (level == SOL_IPV6)
 870		return mptcp_setsockopt_v6(msk, optname, optval, optlen);
 871
 872	if (level == SOL_TCP)
 873		return mptcp_setsockopt_sol_tcp(msk, optname, optval, optlen);
 874
 875	return -EOPNOTSUPP;
 876}
 877
 878static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname,
 879					  char __user *optval, int __user *optlen)
 880{
 881	struct sock *sk = (struct sock *)msk;
 882	struct sock *ssk;
 883	int ret;
 884
 885	lock_sock(sk);
 886	ssk = msk->first;
 887	if (ssk) {
 888		ret = tcp_getsockopt(ssk, level, optname, optval, optlen);
 889		goto out;
 890	}
 891
 892	ssk = __mptcp_nmpc_sk(msk);
 893	if (IS_ERR(ssk)) {
 894		ret = PTR_ERR(ssk);
 895		goto out;
 896	}
 897
 898	ret = tcp_getsockopt(ssk, level, optname, optval, optlen);
 899
 900out:
 901	release_sock(sk);
 902	return ret;
 903}
 904
 905void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
 906{
 907	struct sock *sk = (struct sock *)msk;
 908	u32 flags = 0;
 909	bool slow;
 910
 911	memset(info, 0, sizeof(*info));
 912
 913	info->mptcpi_subflows = READ_ONCE(msk->pm.subflows);
 914	info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled);
 915	info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted);
 916	info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used);
 917
 918	if (inet_sk_state_load(sk) == TCP_LISTEN)
 919		return;
 920
 921	/* The following limits only make sense for the in-kernel PM */
 922	if (mptcp_pm_is_kernel(msk)) {
 923		info->mptcpi_subflows_max =
 924			mptcp_pm_get_subflows_max(msk);
 925		info->mptcpi_add_addr_signal_max =
 926			mptcp_pm_get_add_addr_signal_max(msk);
 927		info->mptcpi_add_addr_accepted_max =
 928			mptcp_pm_get_add_addr_accept_max(msk);
 929		info->mptcpi_local_addr_max =
 930			mptcp_pm_get_local_addr_max(msk);
 931	}
 932
 933	if (__mptcp_check_fallback(msk))
 934		flags |= MPTCP_INFO_FLAG_FALLBACK;
 935	if (READ_ONCE(msk->can_ack))
 936		flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED;
 937	info->mptcpi_flags = flags;
 938	mptcp_data_lock(sk);
 939	info->mptcpi_snd_una = msk->snd_una;
 940	info->mptcpi_rcv_nxt = msk->ack_seq;
 941	info->mptcpi_bytes_acked = msk->bytes_acked;
 942	mptcp_data_unlock(sk);
 943
 944	slow = lock_sock_fast(sk);
 945	info->mptcpi_csum_enabled = msk->csum_enabled;
 946	info->mptcpi_token = msk->token;
 947	info->mptcpi_write_seq = msk->write_seq;
 948	info->mptcpi_retransmits = inet_csk(sk)->icsk_retransmits;
 949	info->mptcpi_bytes_sent = msk->bytes_sent;
 950	info->mptcpi_bytes_received = msk->bytes_received;
 951	info->mptcpi_bytes_retrans = msk->bytes_retrans;
 952	info->mptcpi_subflows_total = info->mptcpi_subflows +
 953		__mptcp_has_initial_subflow(msk);
 954	unlock_sock_fast(sk, slow);
 955}
 956EXPORT_SYMBOL_GPL(mptcp_diag_fill_info);
 957
 958static int mptcp_getsockopt_info(struct mptcp_sock *msk, char __user *optval, int __user *optlen)
 959{
 960	struct mptcp_info m_info;
 961	int len;
 962
 963	if (get_user(len, optlen))
 964		return -EFAULT;
 965
 966	len = min_t(unsigned int, len, sizeof(struct mptcp_info));
 967
 968	mptcp_diag_fill_info(msk, &m_info);
 969
 970	if (put_user(len, optlen))
 971		return -EFAULT;
 972
 973	if (copy_to_user(optval, &m_info, len))
 974		return -EFAULT;
 975
 976	return 0;
 977}
 978
 979static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd,
 980				  char __user *optval,
 981				  u32 copied,
 982				  int __user *optlen)
 983{
 984	u32 copylen = min_t(u32, sfd->size_subflow_data, sizeof(*sfd));
 985
 986	if (copied)
 987		copied += sfd->size_subflow_data;
 988	else
 989		copied = copylen;
 990
 991	if (put_user(copied, optlen))
 992		return -EFAULT;
 993
 994	if (copy_to_user(optval, sfd, copylen))
 995		return -EFAULT;
 996
 997	return 0;
 998}
 999
1000static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd,
1001				  char __user *optval,
1002				  int __user *optlen)
1003{
1004	int len, copylen;
1005
1006	if (get_user(len, optlen))
1007		return -EFAULT;
1008
1009	/* if mptcp_subflow_data size is changed, need to adjust
1010	 * this function to deal with programs using old version.
1011	 */
1012	BUILD_BUG_ON(sizeof(*sfd) != MIN_INFO_OPTLEN_SIZE);
1013
1014	if (len < MIN_INFO_OPTLEN_SIZE)
1015		return -EINVAL;
1016
1017	memset(sfd, 0, sizeof(*sfd));
1018
1019	copylen = min_t(unsigned int, len, sizeof(*sfd));
1020	if (copy_from_user(sfd, optval, copylen))
1021		return -EFAULT;
1022
1023	/* size_subflow_data is u32, but len is signed */
1024	if (sfd->size_subflow_data > INT_MAX ||
1025	    sfd->size_user > INT_MAX)
1026		return -EINVAL;
1027
1028	if (sfd->size_subflow_data < MIN_INFO_OPTLEN_SIZE ||
1029	    sfd->size_subflow_data > len)
1030		return -EINVAL;
1031
1032	if (sfd->num_subflows || sfd->size_kernel)
1033		return -EINVAL;
1034
1035	return len - sfd->size_subflow_data;
1036}
1037
1038static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval,
1039				    int __user *optlen)
1040{
1041	struct mptcp_subflow_context *subflow;
1042	struct sock *sk = (struct sock *)msk;
1043	unsigned int sfcount = 0, copied = 0;
1044	struct mptcp_subflow_data sfd;
1045	char __user *infoptr;
1046	int len;
1047
1048	len = mptcp_get_subflow_data(&sfd, optval, optlen);
1049	if (len < 0)
1050		return len;
1051
1052	sfd.size_kernel = sizeof(struct tcp_info);
1053	sfd.size_user = min_t(unsigned int, sfd.size_user,
1054			      sizeof(struct tcp_info));
1055
1056	infoptr = optval + sfd.size_subflow_data;
1057
1058	lock_sock(sk);
1059
1060	mptcp_for_each_subflow(msk, subflow) {
1061		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
1062
1063		++sfcount;
1064
1065		if (len && len >= sfd.size_user) {
1066			struct tcp_info info;
1067
1068			tcp_get_info(ssk, &info);
1069
1070			if (copy_to_user(infoptr, &info, sfd.size_user)) {
1071				release_sock(sk);
1072				return -EFAULT;
1073			}
1074
1075			infoptr += sfd.size_user;
1076			copied += sfd.size_user;
1077			len -= sfd.size_user;
1078		}
1079	}
1080
1081	release_sock(sk);
1082
1083	sfd.num_subflows = sfcount;
1084
1085	if (mptcp_put_subflow_data(&sfd, optval, copied, optlen))
1086		return -EFAULT;
1087
1088	return 0;
1089}
1090
1091static void mptcp_get_sub_addrs(const struct sock *sk, struct mptcp_subflow_addrs *a)
1092{
1093	const struct inet_sock *inet = inet_sk(sk);
1094
1095	memset(a, 0, sizeof(*a));
1096
1097	if (sk->sk_family == AF_INET) {
1098		a->sin_local.sin_family = AF_INET;
1099		a->sin_local.sin_port = inet->inet_sport;
1100		a->sin_local.sin_addr.s_addr = inet->inet_rcv_saddr;
1101
1102		if (!a->sin_local.sin_addr.s_addr)
1103			a->sin_local.sin_addr.s_addr = inet->inet_saddr;
1104
1105		a->sin_remote.sin_family = AF_INET;
1106		a->sin_remote.sin_port = inet->inet_dport;
1107		a->sin_remote.sin_addr.s_addr = inet->inet_daddr;
1108#if IS_ENABLED(CONFIG_IPV6)
1109	} else if (sk->sk_family == AF_INET6) {
1110		const struct ipv6_pinfo *np = inet6_sk(sk);
1111
1112		if (WARN_ON_ONCE(!np))
1113			return;
1114
1115		a->sin6_local.sin6_family = AF_INET6;
1116		a->sin6_local.sin6_port = inet->inet_sport;
1117
1118		if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
1119			a->sin6_local.sin6_addr = np->saddr;
1120		else
1121			a->sin6_local.sin6_addr = sk->sk_v6_rcv_saddr;
1122
1123		a->sin6_remote.sin6_family = AF_INET6;
1124		a->sin6_remote.sin6_port = inet->inet_dport;
1125		a->sin6_remote.sin6_addr = sk->sk_v6_daddr;
1126#endif
1127	}
1128}
1129
1130static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *optval,
1131					  int __user *optlen)
1132{
1133	struct mptcp_subflow_context *subflow;
1134	struct sock *sk = (struct sock *)msk;
1135	unsigned int sfcount = 0, copied = 0;
1136	struct mptcp_subflow_data sfd;
1137	char __user *addrptr;
1138	int len;
1139
1140	len = mptcp_get_subflow_data(&sfd, optval, optlen);
1141	if (len < 0)
1142		return len;
1143
1144	sfd.size_kernel = sizeof(struct mptcp_subflow_addrs);
1145	sfd.size_user = min_t(unsigned int, sfd.size_user,
1146			      sizeof(struct mptcp_subflow_addrs));
1147
1148	addrptr = optval + sfd.size_subflow_data;
1149
1150	lock_sock(sk);
1151
1152	mptcp_for_each_subflow(msk, subflow) {
1153		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
1154
1155		++sfcount;
1156
1157		if (len && len >= sfd.size_user) {
1158			struct mptcp_subflow_addrs a;
1159
1160			mptcp_get_sub_addrs(ssk, &a);
1161
1162			if (copy_to_user(addrptr, &a, sfd.size_user)) {
1163				release_sock(sk);
1164				return -EFAULT;
1165			}
1166
1167			addrptr += sfd.size_user;
1168			copied += sfd.size_user;
1169			len -= sfd.size_user;
1170		}
1171	}
1172
1173	release_sock(sk);
1174
1175	sfd.num_subflows = sfcount;
1176
1177	if (mptcp_put_subflow_data(&sfd, optval, copied, optlen))
1178		return -EFAULT;
1179
1180	return 0;
1181}
1182
1183static int mptcp_get_full_info(struct mptcp_full_info *mfi,
1184			       char __user *optval,
1185			       int __user *optlen)
1186{
1187	int len;
1188
1189	BUILD_BUG_ON(offsetof(struct mptcp_full_info, mptcp_info) !=
1190		     MIN_FULL_INFO_OPTLEN_SIZE);
1191
1192	if (get_user(len, optlen))
1193		return -EFAULT;
1194
1195	if (len < MIN_FULL_INFO_OPTLEN_SIZE)
1196		return -EINVAL;
1197
1198	memset(mfi, 0, sizeof(*mfi));
1199	if (copy_from_user(mfi, optval, MIN_FULL_INFO_OPTLEN_SIZE))
1200		return -EFAULT;
1201
1202	if (mfi->size_tcpinfo_kernel ||
1203	    mfi->size_sfinfo_kernel ||
1204	    mfi->num_subflows)
1205		return -EINVAL;
1206
1207	if (mfi->size_sfinfo_user > INT_MAX ||
1208	    mfi->size_tcpinfo_user > INT_MAX)
1209		return -EINVAL;
1210
1211	return len - MIN_FULL_INFO_OPTLEN_SIZE;
1212}
1213
1214static int mptcp_put_full_info(struct mptcp_full_info *mfi,
1215			       char __user *optval,
1216			       u32 copylen,
1217			       int __user *optlen)
1218{
1219	copylen += MIN_FULL_INFO_OPTLEN_SIZE;
1220	if (put_user(copylen, optlen))
1221		return -EFAULT;
1222
1223	if (copy_to_user(optval, mfi, copylen))
1224		return -EFAULT;
1225	return 0;
1226}
1227
1228static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optval,
1229				      int __user *optlen)
1230{
1231	unsigned int sfcount = 0, copylen = 0;
1232	struct mptcp_subflow_context *subflow;
1233	struct sock *sk = (struct sock *)msk;
1234	void __user *tcpinfoptr, *sfinfoptr;
1235	struct mptcp_full_info mfi;
1236	int len;
1237
1238	len = mptcp_get_full_info(&mfi, optval, optlen);
1239	if (len < 0)
1240		return len;
1241
1242	/* don't bother filling the mptcp info if there is not enough
1243	 * user-space-provided storage
1244	 */
1245	if (len > 0) {
1246		mptcp_diag_fill_info(msk, &mfi.mptcp_info);
1247		copylen += min_t(unsigned int, len, sizeof(struct mptcp_info));
1248	}
1249
1250	mfi.size_tcpinfo_kernel = sizeof(struct tcp_info);
1251	mfi.size_tcpinfo_user = min_t(unsigned int, mfi.size_tcpinfo_user,
1252				      sizeof(struct tcp_info));
1253	sfinfoptr = u64_to_user_ptr(mfi.subflow_info);
1254	mfi.size_sfinfo_kernel = sizeof(struct mptcp_subflow_info);
1255	mfi.size_sfinfo_user = min_t(unsigned int, mfi.size_sfinfo_user,
1256				     sizeof(struct mptcp_subflow_info));
1257	tcpinfoptr = u64_to_user_ptr(mfi.tcp_info);
1258
1259	lock_sock(sk);
1260	mptcp_for_each_subflow(msk, subflow) {
1261		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
1262		struct mptcp_subflow_info sfinfo;
1263		struct tcp_info tcp_info;
1264
1265		if (sfcount++ >= mfi.size_arrays_user)
1266			continue;
1267
1268		/* fetch addr/tcp_info only if the user space buffers
1269		 * are wide enough
1270		 */
1271		memset(&sfinfo, 0, sizeof(sfinfo));
1272		sfinfo.id = subflow->subflow_id;
1273		if (mfi.size_sfinfo_user >
1274		    offsetof(struct mptcp_subflow_info, addrs))
1275			mptcp_get_sub_addrs(ssk, &sfinfo.addrs);
1276		if (copy_to_user(sfinfoptr, &sfinfo, mfi.size_sfinfo_user))
1277			goto fail_release;
1278
1279		if (mfi.size_tcpinfo_user) {
1280			tcp_get_info(ssk, &tcp_info);
1281			if (copy_to_user(tcpinfoptr, &tcp_info,
1282					 mfi.size_tcpinfo_user))
1283				goto fail_release;
1284		}
1285
1286		tcpinfoptr += mfi.size_tcpinfo_user;
1287		sfinfoptr += mfi.size_sfinfo_user;
1288	}
1289	release_sock(sk);
1290
1291	mfi.num_subflows = sfcount;
1292	if (mptcp_put_full_info(&mfi, optval, copylen, optlen))
1293		return -EFAULT;
1294
1295	return 0;
1296
1297fail_release:
1298	release_sock(sk);
1299	return -EFAULT;
1300}
1301
1302static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval,
1303				int __user *optlen, int val)
1304{
1305	int len;
1306
1307	if (get_user(len, optlen))
1308		return -EFAULT;
1309	if (len < 0)
1310		return -EINVAL;
1311
1312	if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) {
1313		unsigned char ucval = (unsigned char)val;
1314
1315		len = 1;
1316		if (put_user(len, optlen))
1317			return -EFAULT;
1318		if (copy_to_user(optval, &ucval, 1))
1319			return -EFAULT;
1320	} else {
1321		len = min_t(unsigned int, len, sizeof(int));
1322		if (put_user(len, optlen))
1323			return -EFAULT;
1324		if (copy_to_user(optval, &val, len))
1325			return -EFAULT;
1326	}
1327
1328	return 0;
1329}
1330
1331static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
1332				    char __user *optval, int __user *optlen)
1333{
1334	switch (optname) {
1335	case TCP_ULP:
1336	case TCP_CONGESTION:
1337	case TCP_INFO:
1338	case TCP_CC_INFO:
1339	case TCP_DEFER_ACCEPT:
1340	case TCP_FASTOPEN:
1341	case TCP_FASTOPEN_CONNECT:
1342	case TCP_FASTOPEN_KEY:
1343	case TCP_FASTOPEN_NO_COOKIE:
1344		return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname,
1345						      optval, optlen);
1346	case TCP_INQ:
1347		return mptcp_put_int_option(msk, optval, optlen, msk->recvmsg_inq);
1348	case TCP_CORK:
1349		return mptcp_put_int_option(msk, optval, optlen, msk->cork);
1350	case TCP_NODELAY:
1351		return mptcp_put_int_option(msk, optval, optlen, msk->nodelay);
1352	}
1353	return -EOPNOTSUPP;
1354}
1355
1356static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname,
1357			       char __user *optval, int __user *optlen)
1358{
1359	struct sock *sk = (void *)msk;
1360
1361	switch (optname) {
1362	case IP_TOS:
1363		return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->tos));
1364	case IP_BIND_ADDRESS_NO_PORT:
1365		return mptcp_put_int_option(msk, optval, optlen,
1366				inet_test_bit(BIND_ADDRESS_NO_PORT, sk));
1367	case IP_LOCAL_PORT_RANGE:
1368		return mptcp_put_int_option(msk, optval, optlen,
1369				READ_ONCE(inet_sk(sk)->local_port_range));
1370	}
1371
1372	return -EOPNOTSUPP;
1373}
1374
1375static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname,
1376				      char __user *optval, int __user *optlen)
1377{
1378	switch (optname) {
1379	case MPTCP_INFO:
1380		return mptcp_getsockopt_info(msk, optval, optlen);
1381	case MPTCP_FULL_INFO:
1382		return mptcp_getsockopt_full_info(msk, optval, optlen);
1383	case MPTCP_TCPINFO:
1384		return mptcp_getsockopt_tcpinfo(msk, optval, optlen);
1385	case MPTCP_SUBFLOW_ADDRS:
1386		return mptcp_getsockopt_subflow_addrs(msk, optval, optlen);
1387	}
1388
1389	return -EOPNOTSUPP;
1390}
1391
1392int mptcp_getsockopt(struct sock *sk, int level, int optname,
1393		     char __user *optval, int __user *option)
1394{
1395	struct mptcp_sock *msk = mptcp_sk(sk);
1396	struct sock *ssk;
1397
1398	pr_debug("msk=%p", msk);
1399
1400	/* @@ the meaning of setsockopt() when the socket is connected and
1401	 * there are multiple subflows is not yet defined. It is up to the
1402	 * MPTCP-level socket to configure the subflows until the subflow
1403	 * is in TCP fallback, when socket options are passed through
1404	 * to the one remaining subflow.
1405	 */
1406	lock_sock(sk);
1407	ssk = __mptcp_tcp_fallback(msk);
1408	release_sock(sk);
1409	if (ssk)
1410		return tcp_getsockopt(ssk, level, optname, optval, option);
1411
1412	if (level == SOL_IP)
1413		return mptcp_getsockopt_v4(msk, optname, optval, option);
1414	if (level == SOL_TCP)
1415		return mptcp_getsockopt_sol_tcp(msk, optname, optval, option);
1416	if (level == SOL_MPTCP)
1417		return mptcp_getsockopt_sol_mptcp(msk, optname, optval, option);
1418	return -EOPNOTSUPP;
1419}
1420
1421static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
1422{
1423	static const unsigned int tx_rx_locks = SOCK_RCVBUF_LOCK | SOCK_SNDBUF_LOCK;
1424	struct sock *sk = (struct sock *)msk;
1425
1426	if (ssk->sk_prot->keepalive) {
1427		if (sock_flag(sk, SOCK_KEEPOPEN))
1428			ssk->sk_prot->keepalive(ssk, 1);
1429		else
1430			ssk->sk_prot->keepalive(ssk, 0);
1431	}
1432
1433	ssk->sk_priority = sk->sk_priority;
1434	ssk->sk_bound_dev_if = sk->sk_bound_dev_if;
1435	ssk->sk_incoming_cpu = sk->sk_incoming_cpu;
1436	ssk->sk_ipv6only = sk->sk_ipv6only;
1437	__ip_sock_set_tos(ssk, inet_sk(sk)->tos);
1438
1439	if (sk->sk_userlocks & tx_rx_locks) {
1440		ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks;
1441		if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) {
1442			WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf);
1443			mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf;
1444		}
1445		if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
1446			WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf);
1447	}
1448
1449	if (sock_flag(sk, SOCK_LINGER)) {
1450		ssk->sk_lingertime = sk->sk_lingertime;
1451		sock_set_flag(ssk, SOCK_LINGER);
1452	} else {
1453		sock_reset_flag(ssk, SOCK_LINGER);
1454	}
1455
1456	if (sk->sk_mark != ssk->sk_mark) {
1457		ssk->sk_mark = sk->sk_mark;
1458		sk_dst_reset(ssk);
1459	}
1460
1461	sock_valbool_flag(ssk, SOCK_DBG, sock_flag(sk, SOCK_DBG));
1462
1463	if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops)
1464		tcp_set_congestion_control(ssk, msk->ca_name, false, true);
1465	__tcp_sock_set_cork(ssk, !!msk->cork);
1466	__tcp_sock_set_nodelay(ssk, !!msk->nodelay);
1467
1468	inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk));
1469	inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk));
1470	inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk, inet_test_bit(BIND_ADDRESS_NO_PORT, sk));
1471	WRITE_ONCE(inet_sk(ssk)->local_port_range, READ_ONCE(inet_sk(sk)->local_port_range));
1472}
1473
1474void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk)
1475{
1476	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
1477
1478	msk_owned_by_me(msk);
1479
1480	ssk->sk_rcvlowat = 0;
1481
1482	/* subflows must ignore any latency-related settings: will not affect
1483	 * the user-space - only the msk is relevant - but will foul the
1484	 * mptcp scheduler
1485	 */
1486	tcp_sk(ssk)->notsent_lowat = UINT_MAX;
1487
1488	if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) {
1489		sync_socket_options(msk, ssk);
1490
1491		subflow->setsockopt_seq = msk->setsockopt_seq;
1492	}
1493}
1494
1495/* unfortunately this is different enough from the tcp version so
1496 * that we can't factor it out
1497 */
1498int mptcp_set_rcvlowat(struct sock *sk, int val)
1499{
1500	struct mptcp_subflow_context *subflow;
1501	int space, cap;
1502
1503	if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
1504		cap = sk->sk_rcvbuf >> 1;
1505	else
1506		cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
1507	val = min(val, cap);
1508	WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
1509
1510	/* Check if we need to signal EPOLLIN right now */
1511	if (mptcp_epollin_ready(sk))
1512		sk->sk_data_ready(sk);
1513
1514	if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
1515		return 0;
1516
1517	space = __tcp_space_from_win(mptcp_sk(sk)->scaling_ratio, val);
1518	if (space <= sk->sk_rcvbuf)
1519		return 0;
1520
1521	/* propagate the rcvbuf changes to all the subflows */
1522	WRITE_ONCE(sk->sk_rcvbuf, space);
1523	mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
1524		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
1525		bool slow;
1526
1527		slow = lock_sock_fast(ssk);
1528		WRITE_ONCE(ssk->sk_rcvbuf, space);
1529		tcp_sk(ssk)->window_clamp = val;
1530		unlock_sock_fast(ssk, slow);
1531	}
1532	return 0;
1533}