Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.15.
   1/* SPDX-License-Identifier: GPL-2.0 */
   2/* Multipath TCP
   3 *
   4 * Copyright (c) 2017 - 2019, Intel Corporation.
   5 */
   6
   7#ifndef __MPTCP_PROTOCOL_H
   8#define __MPTCP_PROTOCOL_H
   9
  10#include <linux/random.h>
  11#include <net/tcp.h>
  12#include <net/inet_connection_sock.h>
  13#include <uapi/linux/mptcp.h>
  14#include <net/genetlink.h>
  15
  16#define MPTCP_SUPPORTED_VERSION	1
  17
  18/* MPTCP option bits */
  19#define OPTION_MPTCP_MPC_SYN	BIT(0)
  20#define OPTION_MPTCP_MPC_SYNACK	BIT(1)
  21#define OPTION_MPTCP_MPC_ACK	BIT(2)
  22#define OPTION_MPTCP_MPJ_SYN	BIT(3)
  23#define OPTION_MPTCP_MPJ_SYNACK	BIT(4)
  24#define OPTION_MPTCP_MPJ_ACK	BIT(5)
  25#define OPTION_MPTCP_ADD_ADDR	BIT(6)
  26#define OPTION_MPTCP_RM_ADDR	BIT(7)
  27#define OPTION_MPTCP_FASTCLOSE	BIT(8)
  28#define OPTION_MPTCP_PRIO	BIT(9)
  29#define OPTION_MPTCP_RST	BIT(10)
  30#define OPTION_MPTCP_DSS	BIT(11)
  31#define OPTION_MPTCP_FAIL	BIT(12)
  32
  33#define OPTION_MPTCP_CSUMREQD	BIT(13)
  34
  35#define OPTIONS_MPTCP_MPC	(OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK | \
  36				 OPTION_MPTCP_MPC_ACK)
  37#define OPTIONS_MPTCP_MPJ	(OPTION_MPTCP_MPJ_SYN | OPTION_MPTCP_MPJ_SYNACK | \
  38				 OPTION_MPTCP_MPJ_ACK)
  39
  40/* MPTCP option subtypes */
  41#define MPTCPOPT_MP_CAPABLE	0
  42#define MPTCPOPT_MP_JOIN	1
  43#define MPTCPOPT_DSS		2
  44#define MPTCPOPT_ADD_ADDR	3
  45#define MPTCPOPT_RM_ADDR	4
  46#define MPTCPOPT_MP_PRIO	5
  47#define MPTCPOPT_MP_FAIL	6
  48#define MPTCPOPT_MP_FASTCLOSE	7
  49#define MPTCPOPT_RST		8
  50
  51/* MPTCP suboption lengths */
  52#define TCPOLEN_MPTCP_MPC_SYN		4
  53#define TCPOLEN_MPTCP_MPC_SYNACK	12
  54#define TCPOLEN_MPTCP_MPC_ACK		20
  55#define TCPOLEN_MPTCP_MPC_ACK_DATA	22
  56#define TCPOLEN_MPTCP_MPJ_SYN		12
  57#define TCPOLEN_MPTCP_MPJ_SYNACK	16
  58#define TCPOLEN_MPTCP_MPJ_ACK		24
  59#define TCPOLEN_MPTCP_DSS_BASE		4
  60#define TCPOLEN_MPTCP_DSS_ACK32		4
  61#define TCPOLEN_MPTCP_DSS_ACK64		8
  62#define TCPOLEN_MPTCP_DSS_MAP32		10
  63#define TCPOLEN_MPTCP_DSS_MAP64		14
  64#define TCPOLEN_MPTCP_DSS_CHECKSUM	2
  65#define TCPOLEN_MPTCP_ADD_ADDR		16
  66#define TCPOLEN_MPTCP_ADD_ADDR_PORT	18
  67#define TCPOLEN_MPTCP_ADD_ADDR_BASE	8
  68#define TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT	10
  69#define TCPOLEN_MPTCP_ADD_ADDR6		28
  70#define TCPOLEN_MPTCP_ADD_ADDR6_PORT	30
  71#define TCPOLEN_MPTCP_ADD_ADDR6_BASE	20
  72#define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT	22
  73#define TCPOLEN_MPTCP_PORT_LEN		2
  74#define TCPOLEN_MPTCP_PORT_ALIGN	2
  75#define TCPOLEN_MPTCP_RM_ADDR_BASE	3
  76#define TCPOLEN_MPTCP_PRIO		3
  77#define TCPOLEN_MPTCP_PRIO_ALIGN	4
  78#define TCPOLEN_MPTCP_FASTCLOSE		12
  79#define TCPOLEN_MPTCP_RST		4
  80#define TCPOLEN_MPTCP_FAIL		12
  81
  82#define TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM	(TCPOLEN_MPTCP_DSS_CHECKSUM + TCPOLEN_MPTCP_MPC_ACK_DATA)
  83
  84/* MPTCP MP_JOIN flags */
  85#define MPTCPOPT_BACKUP		BIT(0)
  86#define MPTCPOPT_THMAC_LEN	8
  87
  88/* MPTCP MP_CAPABLE flags */
  89#define MPTCP_VERSION_MASK	(0x0F)
  90#define MPTCP_CAP_CHECKSUM_REQD	BIT(7)
  91#define MPTCP_CAP_EXTENSIBILITY	BIT(6)
  92#define MPTCP_CAP_DENY_JOIN_ID0	BIT(5)
  93#define MPTCP_CAP_HMAC_SHA256	BIT(0)
  94#define MPTCP_CAP_FLAG_MASK	(0x1F)
  95
  96/* MPTCP DSS flags */
  97#define MPTCP_DSS_DATA_FIN	BIT(4)
  98#define MPTCP_DSS_DSN64		BIT(3)
  99#define MPTCP_DSS_HAS_MAP	BIT(2)
 100#define MPTCP_DSS_ACK64		BIT(1)
 101#define MPTCP_DSS_HAS_ACK	BIT(0)
 102#define MPTCP_DSS_FLAG_MASK	(0x1F)
 103
 104/* MPTCP ADD_ADDR flags */
 105#define MPTCP_ADDR_ECHO		BIT(0)
 106
 107/* MPTCP MP_PRIO flags */
 108#define MPTCP_PRIO_BKUP		BIT(0)
 109
 110/* MPTCP TCPRST flags */
 111#define MPTCP_RST_TRANSIENT	BIT(0)
 112
 113/* MPTCP socket atomic flags */
 114#define MPTCP_NOSPACE		1
 115#define MPTCP_WORK_RTX		2
 116#define MPTCP_WORK_EOF		3
 117#define MPTCP_FALLBACK_DONE	4
 118#define MPTCP_WORK_CLOSE_SUBFLOW 5
 119
 120/* MPTCP socket release cb flags */
 121#define MPTCP_PUSH_PENDING	1
 122#define MPTCP_CLEAN_UNA		2
 123#define MPTCP_ERROR_REPORT	3
 124#define MPTCP_RETRANSMIT	4
 125#define MPTCP_FLUSH_JOIN_LIST	5
 126#define MPTCP_CONNECTED		6
 127#define MPTCP_RESET_SCHEDULER	7
 128
 129struct mptcp_skb_cb {
 130	u64 map_seq;
 131	u64 end_seq;
 132	u32 offset;
 133	u8  has_rxtstamp:1;
 134};
 135
 136#define MPTCP_SKB_CB(__skb)	((struct mptcp_skb_cb *)&((__skb)->cb[0]))
 137
 138static inline bool before64(__u64 seq1, __u64 seq2)
 139{
 140	return (__s64)(seq1 - seq2) < 0;
 141}
 142
 143#define after64(seq2, seq1)	before64(seq1, seq2)
 144
 145struct mptcp_options_received {
 146	u64	sndr_key;
 147	u64	rcvr_key;
 148	u64	data_ack;
 149	u64	data_seq;
 150	u32	subflow_seq;
 151	u16	data_len;
 152	__sum16	csum;
 153	u16	suboptions;
 154	u32	token;
 155	u32	nonce;
 156	u16	use_map:1,
 157		dsn64:1,
 158		data_fin:1,
 159		use_ack:1,
 160		ack64:1,
 161		mpc_map:1,
 162		reset_reason:4,
 163		reset_transient:1,
 164		echo:1,
 165		backup:1,
 166		deny_join_id0:1,
 167		__unused:2;
 168	u8	join_id;
 169	u64	thmac;
 170	u8	hmac[MPTCPOPT_HMAC_LEN];
 171	struct mptcp_addr_info addr;
 172	struct mptcp_rm_list rm_list;
 173	u64	ahmac;
 174	u64	fail_seq;
 175};
 176
 177static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
 178{
 179	return htonl((TCPOPT_MPTCP << 24) | (len << 16) | (subopt << 12) |
 180		     ((nib & 0xF) << 8) | field);
 181}
 182
 183enum mptcp_pm_status {
 184	MPTCP_PM_ADD_ADDR_RECEIVED,
 185	MPTCP_PM_ADD_ADDR_SEND_ACK,
 186	MPTCP_PM_RM_ADDR_RECEIVED,
 187	MPTCP_PM_ESTABLISHED,
 188	MPTCP_PM_SUBFLOW_ESTABLISHED,
 189	MPTCP_PM_ALREADY_ESTABLISHED,	/* persistent status, set after ESTABLISHED event */
 190	MPTCP_PM_MPC_ENDPOINT_ACCOUNTED /* persistent status, set after MPC local address is
 191					 * accounted int id_avail_bitmap
 192					 */
 193};
 194
 195enum mptcp_pm_type {
 196	MPTCP_PM_TYPE_KERNEL = 0,
 197	MPTCP_PM_TYPE_USERSPACE,
 198
 199	__MPTCP_PM_TYPE_NR,
 200	__MPTCP_PM_TYPE_MAX = __MPTCP_PM_TYPE_NR - 1,
 201};
 202
 203/* Status bits below MPTCP_PM_ALREADY_ESTABLISHED need pm worker actions */
 204#define MPTCP_PM_WORK_MASK ((1 << MPTCP_PM_ALREADY_ESTABLISHED) - 1)
 205
 206enum mptcp_addr_signal_status {
 207	MPTCP_ADD_ADDR_SIGNAL,
 208	MPTCP_ADD_ADDR_ECHO,
 209	MPTCP_RM_ADDR_SIGNAL,
 210};
 211
 212/* max value of mptcp_addr_info.id */
 213#define MPTCP_PM_MAX_ADDR_ID		U8_MAX
 214
 215struct mptcp_pm_data {
 216	struct mptcp_addr_info local;
 217	struct mptcp_addr_info remote;
 218	struct list_head anno_list;
 219	struct list_head userspace_pm_local_addr_list;
 220
 221	spinlock_t	lock;		/*protects the whole PM data */
 222
 223	u8		addr_signal;
 224	bool		server_side;
 225	bool		work_pending;
 226	bool		accept_addr;
 227	bool		accept_subflow;
 228	bool		remote_deny_join_id0;
 229	u8		add_addr_signaled;
 230	u8		add_addr_accepted;
 231	u8		local_addr_used;
 232	u8		pm_type;
 233	u8		subflows;
 234	u8		status;
 235	DECLARE_BITMAP(id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
 236	struct mptcp_rm_list rm_list_tx;
 237	struct mptcp_rm_list rm_list_rx;
 238};
 239
 240struct mptcp_pm_addr_entry {
 241	struct list_head	list;
 242	struct mptcp_addr_info	addr;
 243	u8			flags;
 244	int			ifindex;
 245	struct socket		*lsk;
 246};
 247
 248struct mptcp_data_frag {
 249	struct list_head list;
 250	u64 data_seq;
 251	u16 data_len;
 252	u16 offset;
 253	u16 overhead;
 254	u16 already_sent;
 255	struct page *page;
 256};
 257
 258/* MPTCP connection sock */
 259struct mptcp_sock {
 260	/* inet_connection_sock must be the first member */
 261	struct inet_connection_sock sk;
 262	u64		local_key;
 263	u64		remote_key;
 264	u64		write_seq;
 265	u64		snd_nxt;
 266	u64		ack_seq;
 267	atomic64_t	rcv_wnd_sent;
 268	u64		rcv_data_fin_seq;
 269	int		rmem_fwd_alloc;
 270	struct sock	*last_snd;
 271	int		snd_burst;
 272	int		old_wspace;
 273	u64		recovery_snd_nxt;	/* in recovery mode accept up to this seq;
 274						 * recovery related fields are under data_lock
 275						 * protection
 276						 */
 277	u64		snd_una;
 278	u64		wnd_end;
 279	unsigned long	timer_ival;
 280	u32		token;
 281	int		rmem_released;
 282	unsigned long	flags;
 283	unsigned long	cb_flags;
 284	unsigned long	push_pending;
 285	bool		recovery;		/* closing subflow write queue reinjected */
 286	bool		can_ack;
 287	bool		fully_established;
 288	bool		rcv_data_fin;
 289	bool		snd_data_fin_enable;
 290	bool		rcv_fastclose;
 291	bool		use_64bit_ack; /* Set when we received a 64-bit DSN */
 292	bool		csum_enabled;
 293	bool		allow_infinite_fallback;
 294	u8		mpc_endpoint_id;
 295	u8		recvmsg_inq:1,
 296			cork:1,
 297			nodelay:1,
 298			fastopening:1;
 299	int		connect_flags;
 300	struct work_struct work;
 301	struct sk_buff  *ooo_last_skb;
 302	struct rb_root  out_of_order_queue;
 303	struct sk_buff_head receive_queue;
 304	struct list_head conn_list;
 305	struct list_head rtx_queue;
 306	struct mptcp_data_frag *first_pending;
 307	struct list_head join_list;
 308	struct socket	*subflow; /* outgoing connect/listener/!mp_capable */
 309	struct sock	*first;
 310	struct mptcp_pm_data	pm;
 311	struct {
 312		u32	space;	/* bytes copied in last measurement window */
 313		u32	copied; /* bytes copied in this measurement window */
 314		u64	time;	/* start time of measurement window */
 315		u64	rtt_us; /* last maximum rtt of subflows */
 316	} rcvq_space;
 317
 318	u32 setsockopt_seq;
 319	char		ca_name[TCP_CA_NAME_MAX];
 320	struct mptcp_sock	*dl_next;
 321};
 322
 323#define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock)
 324#define mptcp_data_unlock(sk) spin_unlock_bh(&(sk)->sk_lock.slock)
 325
 326#define mptcp_for_each_subflow(__msk, __subflow)			\
 327	list_for_each_entry(__subflow, &((__msk)->conn_list), node)
 328#define mptcp_for_each_subflow_safe(__msk, __subflow, __tmp)			\
 329	list_for_each_entry_safe(__subflow, __tmp, &((__msk)->conn_list), node)
 330
 331static inline void msk_owned_by_me(const struct mptcp_sock *msk)
 332{
 333	sock_owned_by_me((const struct sock *)msk);
 334}
 335
 336static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
 337{
 338	return (struct mptcp_sock *)sk;
 339}
 340
 341/* the msk socket don't use the backlog, also account for the bulk
 342 * free memory
 343 */
 344static inline int __mptcp_rmem(const struct sock *sk)
 345{
 346	return atomic_read(&sk->sk_rmem_alloc) - READ_ONCE(mptcp_sk(sk)->rmem_released);
 347}
 348
 349static inline int __mptcp_space(const struct sock *sk)
 350{
 351	return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - __mptcp_rmem(sk));
 352}
 353
 354static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk)
 355{
 356	const struct mptcp_sock *msk = mptcp_sk(sk);
 357
 358	return READ_ONCE(msk->first_pending);
 359}
 360
 361static inline struct mptcp_data_frag *mptcp_send_next(struct sock *sk)
 362{
 363	struct mptcp_sock *msk = mptcp_sk(sk);
 364	struct mptcp_data_frag *cur;
 365
 366	cur = msk->first_pending;
 367	return list_is_last(&cur->list, &msk->rtx_queue) ? NULL :
 368						     list_next_entry(cur, list);
 369}
 370
 371static inline struct mptcp_data_frag *mptcp_pending_tail(const struct sock *sk)
 372{
 373	struct mptcp_sock *msk = mptcp_sk(sk);
 374
 375	if (!msk->first_pending)
 376		return NULL;
 377
 378	if (WARN_ON_ONCE(list_empty(&msk->rtx_queue)))
 379		return NULL;
 380
 381	return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list);
 382}
 383
 384static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk)
 385{
 386	struct mptcp_sock *msk = mptcp_sk(sk);
 387
 388	if (msk->snd_una == READ_ONCE(msk->snd_nxt))
 389		return NULL;
 390
 391	return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list);
 392}
 393
 394struct csum_pseudo_header {
 395	__be64 data_seq;
 396	__be32 subflow_seq;
 397	__be16 data_len;
 398	__sum16 csum;
 399};
 400
 401struct mptcp_subflow_request_sock {
 402	struct	tcp_request_sock sk;
 403	u16	mp_capable : 1,
 404		mp_join : 1,
 405		backup : 1,
 406		csum_reqd : 1,
 407		allow_join_id0 : 1;
 408	u8	local_id;
 409	u8	remote_id;
 410	u64	local_key;
 411	u64	idsn;
 412	u32	token;
 413	u32	ssn_offset;
 414	u64	thmac;
 415	u32	local_nonce;
 416	u32	remote_nonce;
 417	struct mptcp_sock	*msk;
 418	struct hlist_nulls_node token_node;
 419};
 420
 421static inline struct mptcp_subflow_request_sock *
 422mptcp_subflow_rsk(const struct request_sock *rsk)
 423{
 424	return (struct mptcp_subflow_request_sock *)rsk;
 425}
 426
 427enum mptcp_data_avail {
 428	MPTCP_SUBFLOW_NODATA,
 429	MPTCP_SUBFLOW_DATA_AVAIL,
 430};
 431
 432struct mptcp_delegated_action {
 433	struct napi_struct napi;
 434	struct list_head head;
 435};
 436
 437DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
 438
 439#define MPTCP_DELEGATE_SEND		0
 440#define MPTCP_DELEGATE_ACK		1
 441
 442/* MPTCP subflow context */
 443struct mptcp_subflow_context {
 444	struct	list_head node;/* conn_list of subflows */
 445
 446	struct_group(reset,
 447
 448	unsigned long avg_pacing_rate; /* protected by msk socket lock */
 449	u64	local_key;
 450	u64	remote_key;
 451	u64	idsn;
 452	u64	map_seq;
 453	u32	snd_isn;
 454	u32	token;
 455	u32	rel_write_seq;
 456	u32	map_subflow_seq;
 457	u32	ssn_offset;
 458	u32	map_data_len;
 459	__wsum	map_data_csum;
 460	u32	map_csum_len;
 461	u32	request_mptcp : 1,  /* send MP_CAPABLE */
 462		request_join : 1,   /* send MP_JOIN */
 463		request_bkup : 1,
 464		mp_capable : 1,	    /* remote is MPTCP capable */
 465		mp_join : 1,	    /* remote is JOINing */
 466		fully_established : 1,	    /* path validated */
 467		pm_notified : 1,    /* PM hook called for established status */
 468		conn_finished : 1,
 469		map_valid : 1,
 470		map_csum_reqd : 1,
 471		map_data_fin : 1,
 472		mpc_map : 1,
 473		backup : 1,
 474		send_mp_prio : 1,
 475		send_mp_fail : 1,
 476		send_fastclose : 1,
 477		send_infinite_map : 1,
 478		rx_eof : 1,
 479		remote_key_valid : 1,        /* received the peer key from */
 480		disposable : 1,	    /* ctx can be free at ulp release time */
 481		stale : 1,	    /* unable to snd/rcv data, do not use for xmit */
 482		local_id_valid : 1, /* local_id is correctly initialized */
 483		valid_csum_seen : 1,        /* at least one csum validated */
 484		is_mptfo : 1,	    /* subflow is doing TFO */
 485		__unused : 8;
 486	enum mptcp_data_avail data_avail;
 487	u32	remote_nonce;
 488	u64	thmac;
 489	u32	local_nonce;
 490	u32	remote_token;
 491	union {
 492		u8	hmac[MPTCPOPT_HMAC_LEN]; /* MPJ subflow only */
 493		u64	iasn;	    /* initial ack sequence number, MPC subflows only */
 494	};
 495	u8	local_id;
 496	u8	remote_id;
 497	u8	reset_seen:1;
 498	u8	reset_transient:1;
 499	u8	reset_reason:4;
 500	u8	stale_count;
 501
 502	long	delegated_status;
 503	unsigned long	fail_tout;
 504
 505	);
 506
 507	struct	list_head delegated_node;   /* link into delegated_action, protected by local BH */
 508
 509	u32	setsockopt_seq;
 510	u32	stale_rcv_tstamp;
 511
 512	struct	sock *tcp_sock;	    /* tcp sk backpointer */
 513	struct	sock *conn;	    /* parent mptcp_sock */
 514	const	struct inet_connection_sock_af_ops *icsk_af_ops;
 515	void	(*tcp_state_change)(struct sock *sk);
 516	void	(*tcp_error_report)(struct sock *sk);
 517
 518	struct	rcu_head rcu;
 519};
 520
 521static inline struct mptcp_subflow_context *
 522mptcp_subflow_ctx(const struct sock *sk)
 523{
 524	struct inet_connection_sock *icsk = inet_csk(sk);
 525
 526	/* Use RCU on icsk_ulp_data only for sock diag code */
 527	return (__force struct mptcp_subflow_context *)icsk->icsk_ulp_data;
 528}
 529
 530static inline struct sock *
 531mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
 532{
 533	return subflow->tcp_sock;
 534}
 535
 536static inline void
 537mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow)
 538{
 539	memset(&subflow->reset, 0, sizeof(subflow->reset));
 540	subflow->request_mptcp = 1;
 541}
 542
 543static inline u64
 544mptcp_subflow_get_map_offset(const struct mptcp_subflow_context *subflow)
 545{
 546	return tcp_sk(mptcp_subflow_tcp_sock(subflow))->copied_seq -
 547		      subflow->ssn_offset -
 548		      subflow->map_subflow_seq;
 549}
 550
 551static inline u64
 552mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow)
 553{
 554	return subflow->map_seq + mptcp_subflow_get_map_offset(subflow);
 555}
 556
 557void mptcp_subflow_process_delegated(struct sock *ssk);
 558
 559static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow, int action)
 560{
 561	struct mptcp_delegated_action *delegated;
 562	bool schedule;
 563
 564	/* the caller held the subflow bh socket lock */
 565	lockdep_assert_in_softirq();
 566
 567	/* The implied barrier pairs with mptcp_subflow_delegated_done(), and
 568	 * ensures the below list check sees list updates done prior to status
 569	 * bit changes
 570	 */
 571	if (!test_and_set_bit(action, &subflow->delegated_status)) {
 572		/* still on delegated list from previous scheduling */
 573		if (!list_empty(&subflow->delegated_node))
 574			return;
 575
 576		delegated = this_cpu_ptr(&mptcp_delegated_actions);
 577		schedule = list_empty(&delegated->head);
 578		list_add_tail(&subflow->delegated_node, &delegated->head);
 579		sock_hold(mptcp_subflow_tcp_sock(subflow));
 580		if (schedule)
 581			napi_schedule(&delegated->napi);
 582	}
 583}
 584
 585static inline struct mptcp_subflow_context *
 586mptcp_subflow_delegated_next(struct mptcp_delegated_action *delegated)
 587{
 588	struct mptcp_subflow_context *ret;
 589
 590	if (list_empty(&delegated->head))
 591		return NULL;
 592
 593	ret = list_first_entry(&delegated->head, struct mptcp_subflow_context, delegated_node);
 594	list_del_init(&ret->delegated_node);
 595	return ret;
 596}
 597
 598static inline bool mptcp_subflow_has_delegated_action(const struct mptcp_subflow_context *subflow)
 599{
 600	return !!READ_ONCE(subflow->delegated_status);
 601}
 602
 603static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *subflow, int action)
 604{
 605	/* pairs with mptcp_subflow_delegate, ensures delegate_node is updated before
 606	 * touching the status bit
 607	 */
 608	smp_wmb();
 609	clear_bit(action, &subflow->delegated_status);
 610}
 611
 612int mptcp_is_enabled(const struct net *net);
 613unsigned int mptcp_get_add_addr_timeout(const struct net *net);
 614int mptcp_is_checksum_enabled(const struct net *net);
 615int mptcp_allow_join_id0(const struct net *net);
 616unsigned int mptcp_stale_loss_cnt(const struct net *net);
 617int mptcp_get_pm_type(const struct net *net);
 618void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk);
 619void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
 620				     const struct mptcp_options_received *mp_opt);
 621bool __mptcp_retransmit_pending_data(struct sock *sk);
 622void mptcp_check_and_set_pending(struct sock *sk);
 623void __mptcp_push_pending(struct sock *sk, unsigned int flags);
 624bool mptcp_subflow_data_available(struct sock *sk);
 625void __init mptcp_subflow_init(void);
 626void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how);
 627void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
 628		     struct mptcp_subflow_context *subflow);
 629void __mptcp_subflow_send_ack(struct sock *ssk);
 630void mptcp_subflow_reset(struct sock *ssk);
 631void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk);
 632void mptcp_sock_graft(struct sock *sk, struct socket *parent);
 633struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
 634bool __mptcp_close(struct sock *sk, long timeout);
 635void mptcp_cancel_work(struct sock *sk);
 636void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk);
 637
 638bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
 639			   const struct mptcp_addr_info *b, bool use_port);
 640
 641/* called with sk socket lock held */
 642int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
 643			    const struct mptcp_addr_info *remote);
 644int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
 645				struct socket **new_sock);
 646void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
 647			 struct sockaddr_storage *addr,
 648			 unsigned short family);
 649
 650static inline bool __tcp_can_send(const struct sock *ssk)
 651{
 652	/* only send if our side has not closed yet */
 653	return ((1 << inet_sk_state_load(ssk)) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT));
 654}
 655
 656static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow)
 657{
 658	/* can't send if JOIN hasn't completed yet (i.e. is usable for mptcp) */
 659	if (subflow->request_join && !subflow->fully_established)
 660		return false;
 661
 662	return __tcp_can_send(mptcp_subflow_tcp_sock(subflow));
 663}
 664
 665void mptcp_subflow_set_active(struct mptcp_subflow_context *subflow);
 666
 667bool mptcp_subflow_active(struct mptcp_subflow_context *subflow);
 668
 669static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
 670					      struct mptcp_subflow_context *ctx)
 671{
 672	sk->sk_data_ready = sock_def_readable;
 673	sk->sk_state_change = ctx->tcp_state_change;
 674	sk->sk_write_space = sk_stream_write_space;
 675	sk->sk_error_report = ctx->tcp_error_report;
 676
 677	inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops;
 678}
 679
 680void __init mptcp_proto_init(void);
 681#if IS_ENABLED(CONFIG_MPTCP_IPV6)
 682int __init mptcp_proto_v6_init(void);
 683#endif
 684
 685struct sock *mptcp_sk_clone(const struct sock *sk,
 686			    const struct mptcp_options_received *mp_opt,
 687			    struct request_sock *req);
 688void mptcp_get_options(const struct sk_buff *skb,
 689		       struct mptcp_options_received *mp_opt);
 690
 691void mptcp_finish_connect(struct sock *sk);
 692void __mptcp_set_connected(struct sock *sk);
 693void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout);
 694static inline bool mptcp_is_fully_established(struct sock *sk)
 695{
 696	return inet_sk_state_load(sk) == TCP_ESTABLISHED &&
 697	       READ_ONCE(mptcp_sk(sk)->fully_established);
 698}
 699void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk);
 700void mptcp_data_ready(struct sock *sk, struct sock *ssk);
 701bool mptcp_finish_join(struct sock *sk);
 702bool mptcp_schedule_work(struct sock *sk);
 703int mptcp_setsockopt(struct sock *sk, int level, int optname,
 704		     sockptr_t optval, unsigned int optlen);
 705int mptcp_getsockopt(struct sock *sk, int level, int optname,
 706		     char __user *optval, int __user *option);
 707
 708u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq);
 709static inline u64 mptcp_expand_seq(u64 old_seq, u64 cur_seq, bool use_64bit)
 710{
 711	if (use_64bit)
 712		return cur_seq;
 713
 714	return __mptcp_expand_seq(old_seq, cur_seq);
 715}
 716void __mptcp_check_push(struct sock *sk, struct sock *ssk);
 717void __mptcp_data_acked(struct sock *sk);
 718void __mptcp_error_report(struct sock *sk);
 719void mptcp_subflow_eof(struct sock *sk);
 720bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit);
 721static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
 722{
 723	return READ_ONCE(msk->snd_data_fin_enable) &&
 724	       READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt);
 725}
 726
 727static inline bool mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk)
 728{
 729	if ((sk->sk_userlocks & SOCK_SNDBUF_LOCK) || ssk->sk_sndbuf <= READ_ONCE(sk->sk_sndbuf))
 730		return false;
 731
 732	WRITE_ONCE(sk->sk_sndbuf, ssk->sk_sndbuf);
 733	return true;
 734}
 735
 736static inline void mptcp_write_space(struct sock *sk)
 737{
 738	if (sk_stream_is_writeable(sk)) {
 739		/* pairs with memory barrier in mptcp_poll */
 740		smp_mb();
 741		if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags))
 742			sk_stream_write_space(sk);
 743	}
 744}
 745
 746void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags);
 747
 748#define MPTCP_TOKEN_MAX_RETRIES	4
 749
 750void __init mptcp_token_init(void);
 751static inline void mptcp_token_init_request(struct request_sock *req)
 752{
 753	mptcp_subflow_rsk(req)->token_node.pprev = NULL;
 754}
 755
 756int mptcp_token_new_request(struct request_sock *req);
 757void mptcp_token_destroy_request(struct request_sock *req);
 758int mptcp_token_new_connect(struct sock *sk);
 759void mptcp_token_accept(struct mptcp_subflow_request_sock *r,
 760			struct mptcp_sock *msk);
 761bool mptcp_token_exists(u32 token);
 762struct mptcp_sock *mptcp_token_get_sock(struct net *net, u32 token);
 763struct mptcp_sock *mptcp_token_iter_next(const struct net *net, long *s_slot,
 764					 long *s_num);
 765void mptcp_token_destroy(struct mptcp_sock *msk);
 766
 767void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn);
 768
 769void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac);
 770__sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum);
 771
 772void __init mptcp_pm_init(void);
 773void mptcp_pm_data_init(struct mptcp_sock *msk);
 774void mptcp_pm_data_reset(struct mptcp_sock *msk);
 775int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info,
 776			struct mptcp_addr_info *addr);
 777int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info,
 778			 bool require_family,
 779			 struct mptcp_pm_addr_entry *entry);
 780bool mptcp_pm_addr_families_match(const struct sock *sk,
 781				  const struct mptcp_addr_info *loc,
 782				  const struct mptcp_addr_info *rem);
 783void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
 784void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
 785void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side);
 786void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp);
 787bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk);
 788void mptcp_pm_connection_closed(struct mptcp_sock *msk);
 789void mptcp_pm_subflow_established(struct mptcp_sock *msk);
 790bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk);
 791void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk,
 792				 const struct mptcp_subflow_context *subflow);
 793void mptcp_pm_add_addr_received(const struct sock *ssk,
 794				const struct mptcp_addr_info *addr);
 795void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk,
 796			      const struct mptcp_addr_info *addr);
 797void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk);
 798void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk);
 799void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
 800			       const struct mptcp_rm_list *rm_list);
 801void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup);
 802void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq);
 803int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
 804				 struct mptcp_addr_info *addr,
 805				 struct mptcp_addr_info *rem,
 806				 u8 bkup);
 807bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
 808			      const struct mptcp_pm_addr_entry *entry);
 809void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
 810bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
 811struct mptcp_pm_add_entry *
 812mptcp_pm_del_add_timer(struct mptcp_sock *msk,
 813		       const struct mptcp_addr_info *addr, bool check_id);
 814struct mptcp_pm_add_entry *
 815mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk,
 816				const struct mptcp_addr_info *addr);
 817int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
 818					 unsigned int id,
 819					 u8 *flags, int *ifindex);
 820int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
 821						   unsigned int id,
 822						   u8 *flags, int *ifindex);
 823int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token,
 824				 struct mptcp_pm_addr_entry *loc,
 825				 struct mptcp_pm_addr_entry *rem, u8 bkup);
 826int mptcp_pm_announce_addr(struct mptcp_sock *msk,
 827			   const struct mptcp_addr_info *addr,
 828			   bool echo);
 829int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
 830int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
 831void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
 832					struct list_head *rm_list);
 833
 834int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
 835					     struct mptcp_pm_addr_entry *entry);
 836void mptcp_free_local_addr_list(struct mptcp_sock *msk);
 837int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info);
 838int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info);
 839int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info);
 840int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info);
 841
 842void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
 843		 const struct sock *ssk, gfp_t gfp);
 844void mptcp_event_addr_announced(const struct sock *ssk, const struct mptcp_addr_info *info);
 845void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id);
 846void mptcp_event_pm_listener(const struct sock *ssk,
 847			     enum mptcp_event_type event);
 848bool mptcp_userspace_pm_active(const struct mptcp_sock *msk);
 849
 850void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
 851				   const struct mptcp_options_received *mp_opt);
 852void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow,
 853					      struct request_sock *req);
 854
 855static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk)
 856{
 857	return READ_ONCE(msk->pm.addr_signal) &
 858		(BIT(MPTCP_ADD_ADDR_SIGNAL) | BIT(MPTCP_ADD_ADDR_ECHO));
 859}
 860
 861static inline bool mptcp_pm_should_add_signal_addr(struct mptcp_sock *msk)
 862{
 863	return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_SIGNAL);
 864}
 865
 866static inline bool mptcp_pm_should_add_signal_echo(struct mptcp_sock *msk)
 867{
 868	return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_ECHO);
 869}
 870
 871static inline bool mptcp_pm_should_rm_signal(struct mptcp_sock *msk)
 872{
 873	return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_RM_ADDR_SIGNAL);
 874}
 875
 876static inline bool mptcp_pm_is_userspace(const struct mptcp_sock *msk)
 877{
 878	return READ_ONCE(msk->pm.pm_type) == MPTCP_PM_TYPE_USERSPACE;
 879}
 880
 881static inline bool mptcp_pm_is_kernel(const struct mptcp_sock *msk)
 882{
 883	return READ_ONCE(msk->pm.pm_type) == MPTCP_PM_TYPE_KERNEL;
 884}
 885
 886static inline unsigned int mptcp_add_addr_len(int family, bool echo, bool port)
 887{
 888	u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE;
 889
 890	if (family == AF_INET6)
 891		len = TCPOLEN_MPTCP_ADD_ADDR6_BASE;
 892	if (!echo)
 893		len += MPTCPOPT_THMAC_LEN;
 894	/* account for 2 trailing 'nop' options */
 895	if (port)
 896		len += TCPOLEN_MPTCP_PORT_LEN + TCPOLEN_MPTCP_PORT_ALIGN;
 897
 898	return len;
 899}
 900
 901static inline int mptcp_rm_addr_len(const struct mptcp_rm_list *rm_list)
 902{
 903	if (rm_list->nr == 0 || rm_list->nr > MPTCP_RM_IDS_MAX)
 904		return -EINVAL;
 905
 906	return TCPOLEN_MPTCP_RM_ADDR_BASE + roundup(rm_list->nr - 1, 4) + 1;
 907}
 908
 909bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb,
 910			      unsigned int opt_size, unsigned int remaining,
 911			      struct mptcp_addr_info *addr, bool *echo,
 912			      bool *drop_other_suboptions);
 913bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
 914			     struct mptcp_rm_list *rm_list);
 915int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
 916int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
 917
 918void __init mptcp_pm_nl_init(void);
 919void mptcp_pm_nl_work(struct mptcp_sock *msk);
 920void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
 921				     const struct mptcp_rm_list *rm_list);
 922int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
 923unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk);
 924unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk);
 925unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk);
 926unsigned int mptcp_pm_get_local_addr_max(const struct mptcp_sock *msk);
 927
 928/* called under PM lock */
 929static inline void __mptcp_pm_close_subflow(struct mptcp_sock *msk)
 930{
 931	if (--msk->pm.subflows < mptcp_pm_get_subflows_max(msk))
 932		WRITE_ONCE(msk->pm.accept_subflow, true);
 933}
 934
 935static inline void mptcp_pm_close_subflow(struct mptcp_sock *msk)
 936{
 937	spin_lock_bh(&msk->pm.lock);
 938	__mptcp_pm_close_subflow(msk);
 939	spin_unlock_bh(&msk->pm.lock);
 940}
 941
 942void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk);
 943void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk);
 944
 945static inline struct mptcp_ext *mptcp_get_ext(const struct sk_buff *skb)
 946{
 947	return (struct mptcp_ext *)skb_ext_find(skb, SKB_EXT_MPTCP);
 948}
 949
 950void mptcp_diag_subflow_init(struct tcp_ulp_ops *ops);
 951
 952static inline bool __mptcp_check_fallback(const struct mptcp_sock *msk)
 953{
 954	return test_bit(MPTCP_FALLBACK_DONE, &msk->flags);
 955}
 956
 957static inline bool mptcp_check_fallback(const struct sock *sk)
 958{
 959	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 960	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
 961
 962	return __mptcp_check_fallback(msk);
 963}
 964
 965static inline void __mptcp_do_fallback(struct mptcp_sock *msk)
 966{
 967	if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags)) {
 968		pr_debug("TCP fallback already done (msk=%p)", msk);
 969		return;
 970	}
 971	set_bit(MPTCP_FALLBACK_DONE, &msk->flags);
 972}
 973
 974static inline void mptcp_do_fallback(struct sock *ssk)
 975{
 976	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
 977	struct sock *sk = subflow->conn;
 978	struct mptcp_sock *msk;
 979
 980	msk = mptcp_sk(sk);
 981	__mptcp_do_fallback(msk);
 982	if (READ_ONCE(msk->snd_data_fin_enable) && !(ssk->sk_shutdown & SEND_SHUTDOWN)) {
 983		gfp_t saved_allocation = ssk->sk_allocation;
 984
 985		/* we are in a atomic (BH) scope, override ssk default for data
 986		 * fin allocation
 987		 */
 988		ssk->sk_allocation = GFP_ATOMIC;
 989		ssk->sk_shutdown |= SEND_SHUTDOWN;
 990		tcp_shutdown(ssk, SEND_SHUTDOWN);
 991		ssk->sk_allocation = saved_allocation;
 992	}
 993}
 994
 995#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)", __func__, a)
 996
 997static inline bool mptcp_check_infinite_map(struct sk_buff *skb)
 998{
 999	struct mptcp_ext *mpext;
1000
1001	mpext = skb ? mptcp_get_ext(skb) : NULL;
1002	if (mpext && mpext->infinite_map)
1003		return true;
1004
1005	return false;
1006}
1007
1008static inline bool is_active_ssk(struct mptcp_subflow_context *subflow)
1009{
1010	return (subflow->request_mptcp || subflow->request_join);
1011}
1012
1013static inline bool subflow_simultaneous_connect(struct sock *sk)
1014{
1015	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
1016
1017	return sk->sk_state == TCP_ESTABLISHED &&
1018	       is_active_ssk(subflow) &&
1019	       !subflow->conn_finished;
1020}
1021
1022#ifdef CONFIG_SYN_COOKIES
1023void subflow_init_req_cookie_join_save(const struct mptcp_subflow_request_sock *subflow_req,
1024				       struct sk_buff *skb);
1025bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subflow_req,
1026					struct sk_buff *skb);
1027void __init mptcp_join_cookie_init(void);
1028#else
1029static inline void
1030subflow_init_req_cookie_join_save(const struct mptcp_subflow_request_sock *subflow_req,
1031				  struct sk_buff *skb) {}
1032static inline bool
1033mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subflow_req,
1034				   struct sk_buff *skb)
1035{
1036	return false;
1037}
1038
1039static inline void mptcp_join_cookie_init(void) {}
1040#endif
1041
1042#endif /* __MPTCP_PROTOCOL_H */