Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.15.
  1// SPDX-License-Identifier: GPL-2.0
  2/* Multipath TCP
  3 *
  4 * Copyright (c) 2019, Tessares SA.
  5 */
  6
  7#ifdef CONFIG_SYSCTL
  8#include <linux/sysctl.h>
  9#endif
 10
 11#include <net/net_namespace.h>
 12#include <net/netns/generic.h>
 13
 14#include "protocol.h"
 15#include "mib.h"
 16
 17#define MPTCP_SYSCTL_PATH "net/mptcp"
 18
 19static int mptcp_pernet_id;
 20
 21#ifdef CONFIG_SYSCTL
 22static int mptcp_pm_type_max = __MPTCP_PM_TYPE_MAX;
 23#endif
 24
 25struct mptcp_pernet {
 26#ifdef CONFIG_SYSCTL
 27	struct ctl_table_header *ctl_table_hdr;
 28#endif
 29
 30	unsigned int add_addr_timeout;
 31	unsigned int blackhole_timeout;
 32	unsigned int close_timeout;
 33	unsigned int stale_loss_cnt;
 34	atomic_t active_disable_times;
 35	unsigned long active_disable_stamp;
 36	u8 mptcp_enabled;
 37	u8 checksum_enabled;
 38	u8 allow_join_initial_addr_port;
 39	u8 pm_type;
 40	char scheduler[MPTCP_SCHED_NAME_MAX];
 41};
 42
 43static struct mptcp_pernet *mptcp_get_pernet(const struct net *net)
 44{
 45	return net_generic(net, mptcp_pernet_id);
 46}
 47
 48int mptcp_is_enabled(const struct net *net)
 49{
 50	return mptcp_get_pernet(net)->mptcp_enabled;
 51}
 52
 53unsigned int mptcp_get_add_addr_timeout(const struct net *net)
 54{
 55	return mptcp_get_pernet(net)->add_addr_timeout;
 56}
 57
 58int mptcp_is_checksum_enabled(const struct net *net)
 59{
 60	return mptcp_get_pernet(net)->checksum_enabled;
 61}
 62
 63int mptcp_allow_join_id0(const struct net *net)
 64{
 65	return mptcp_get_pernet(net)->allow_join_initial_addr_port;
 66}
 67
 68unsigned int mptcp_stale_loss_cnt(const struct net *net)
 69{
 70	return mptcp_get_pernet(net)->stale_loss_cnt;
 71}
 72
 73unsigned int mptcp_close_timeout(const struct sock *sk)
 74{
 75	if (sock_flag(sk, SOCK_DEAD))
 76		return TCP_TIMEWAIT_LEN;
 77	return mptcp_get_pernet(sock_net(sk))->close_timeout;
 78}
 79
 80int mptcp_get_pm_type(const struct net *net)
 81{
 82	return mptcp_get_pernet(net)->pm_type;
 83}
 84
 85const char *mptcp_get_scheduler(const struct net *net)
 86{
 87	return mptcp_get_pernet(net)->scheduler;
 88}
 89
 90static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
 91{
 92	pernet->mptcp_enabled = 1;
 93	pernet->add_addr_timeout = TCP_RTO_MAX;
 94	pernet->blackhole_timeout = 3600;
 95	atomic_set(&pernet->active_disable_times, 0);
 96	pernet->close_timeout = TCP_TIMEWAIT_LEN;
 97	pernet->checksum_enabled = 0;
 98	pernet->allow_join_initial_addr_port = 1;
 99	pernet->stale_loss_cnt = 4;
100	pernet->pm_type = MPTCP_PM_TYPE_KERNEL;
101	strscpy(pernet->scheduler, "default", sizeof(pernet->scheduler));
102}
103
104#ifdef CONFIG_SYSCTL
105static int mptcp_set_scheduler(char *scheduler, const char *name)
106{
107	struct mptcp_sched_ops *sched;
108	int ret = 0;
109
110	rcu_read_lock();
111	sched = mptcp_sched_find(name);
112	if (sched)
113		strscpy(scheduler, name, MPTCP_SCHED_NAME_MAX);
114	else
115		ret = -ENOENT;
116	rcu_read_unlock();
117
118	return ret;
119}
120
121static int proc_scheduler(const struct ctl_table *ctl, int write,
122			  void *buffer, size_t *lenp, loff_t *ppos)
123{
124	char (*scheduler)[MPTCP_SCHED_NAME_MAX] = ctl->data;
125	char val[MPTCP_SCHED_NAME_MAX];
126	struct ctl_table tbl = {
127		.data = val,
128		.maxlen = MPTCP_SCHED_NAME_MAX,
129	};
130	int ret;
131
132	strscpy(val, *scheduler, MPTCP_SCHED_NAME_MAX);
133
134	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
135	if (write && ret == 0)
136		ret = mptcp_set_scheduler(*scheduler, val);
137
138	return ret;
139}
140
141static int proc_available_schedulers(const struct ctl_table *ctl,
142				     int write, void *buffer,
143				     size_t *lenp, loff_t *ppos)
144{
145	struct ctl_table tbl = { .maxlen = MPTCP_SCHED_BUF_MAX, };
146	int ret;
147
148	tbl.data = kmalloc(tbl.maxlen, GFP_USER);
149	if (!tbl.data)
150		return -ENOMEM;
151
152	mptcp_get_available_schedulers(tbl.data, MPTCP_SCHED_BUF_MAX);
153	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
154	kfree(tbl.data);
155
156	return ret;
157}
158
159static int proc_blackhole_detect_timeout(const struct ctl_table *table,
160					 int write, void *buffer, size_t *lenp,
161					 loff_t *ppos)
162{
163	struct mptcp_pernet *pernet = container_of(table->data,
164						   struct mptcp_pernet,
165						   blackhole_timeout);
166	int ret;
167
168	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
169	if (write && ret == 0)
170		atomic_set(&pernet->active_disable_times, 0);
171
172	return ret;
173}
174
175static struct ctl_table mptcp_sysctl_table[] = {
176	{
177		.procname = "enabled",
178		.maxlen = sizeof(u8),
179		.mode = 0644,
180		/* users with CAP_NET_ADMIN or root (not and) can change this
181		 * value, same as other sysctl or the 'net' tree.
182		 */
183		.proc_handler = proc_dou8vec_minmax,
184		.extra1       = SYSCTL_ZERO,
185		.extra2       = SYSCTL_ONE
186	},
187	{
188		.procname = "add_addr_timeout",
189		.maxlen = sizeof(unsigned int),
190		.mode = 0644,
191		.proc_handler = proc_dointvec_jiffies,
192	},
193	{
194		.procname = "checksum_enabled",
195		.maxlen = sizeof(u8),
196		.mode = 0644,
197		.proc_handler = proc_dou8vec_minmax,
198		.extra1       = SYSCTL_ZERO,
199		.extra2       = SYSCTL_ONE
200	},
201	{
202		.procname = "allow_join_initial_addr_port",
203		.maxlen = sizeof(u8),
204		.mode = 0644,
205		.proc_handler = proc_dou8vec_minmax,
206		.extra1       = SYSCTL_ZERO,
207		.extra2       = SYSCTL_ONE
208	},
209	{
210		.procname = "stale_loss_cnt",
211		.maxlen = sizeof(unsigned int),
212		.mode = 0644,
213		.proc_handler = proc_douintvec_minmax,
214	},
215	{
216		.procname = "pm_type",
217		.maxlen = sizeof(u8),
218		.mode = 0644,
219		.proc_handler = proc_dou8vec_minmax,
220		.extra1       = SYSCTL_ZERO,
221		.extra2       = &mptcp_pm_type_max
222	},
223	{
224		.procname = "scheduler",
225		.maxlen	= MPTCP_SCHED_NAME_MAX,
226		.mode = 0644,
227		.proc_handler = proc_scheduler,
228	},
229	{
230		.procname = "available_schedulers",
231		.maxlen	= MPTCP_SCHED_BUF_MAX,
232		.mode = 0444,
233		.proc_handler = proc_available_schedulers,
234	},
235	{
236		.procname = "close_timeout",
237		.maxlen = sizeof(unsigned int),
238		.mode = 0644,
239		.proc_handler = proc_dointvec_jiffies,
240	},
241	{
242		.procname = "blackhole_timeout",
243		.maxlen = sizeof(unsigned int),
244		.mode = 0644,
245		.proc_handler = proc_blackhole_detect_timeout,
246		.extra1 = SYSCTL_ZERO,
247	},
248};
249
250static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
251{
252	struct ctl_table_header *hdr;
253	struct ctl_table *table;
254
255	table = mptcp_sysctl_table;
256	if (!net_eq(net, &init_net)) {
257		table = kmemdup(table, sizeof(mptcp_sysctl_table), GFP_KERNEL);
258		if (!table)
259			goto err_alloc;
260	}
261
262	table[0].data = &pernet->mptcp_enabled;
263	table[1].data = &pernet->add_addr_timeout;
264	table[2].data = &pernet->checksum_enabled;
265	table[3].data = &pernet->allow_join_initial_addr_port;
266	table[4].data = &pernet->stale_loss_cnt;
267	table[5].data = &pernet->pm_type;
268	table[6].data = &pernet->scheduler;
269	/* table[7] is for available_schedulers which is read-only info */
270	table[8].data = &pernet->close_timeout;
271	table[9].data = &pernet->blackhole_timeout;
272
273	hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table,
274				     ARRAY_SIZE(mptcp_sysctl_table));
275	if (!hdr)
276		goto err_reg;
277
278	pernet->ctl_table_hdr = hdr;
279
280	return 0;
281
282err_reg:
283	if (!net_eq(net, &init_net))
284		kfree(table);
285err_alloc:
286	return -ENOMEM;
287}
288
289static void mptcp_pernet_del_table(struct mptcp_pernet *pernet)
290{
291	const struct ctl_table *table = pernet->ctl_table_hdr->ctl_table_arg;
292
293	unregister_net_sysctl_table(pernet->ctl_table_hdr);
294
295	kfree(table);
296}
297
298#else
299
300static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
301{
302	return 0;
303}
304
305static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) {}
306
307#endif /* CONFIG_SYSCTL */
308
309/* The following code block is to deal with middle box issues with MPTCP,
310 * similar to what is done with TFO.
311 * The proposed solution is to disable active MPTCP globally when SYN+MPC are
312 * dropped, while SYN without MPC aren't. In this case, active side MPTCP is
313 * disabled globally for 1hr at first. Then if it happens again, it is disabled
314 * for 2h, then 4h, 8h, ...
315 * The timeout is reset back to 1hr when a successful active MPTCP connection is
316 * fully established.
317 */
318
319/* Disable active MPTCP and record current jiffies and active_disable_times */
320void mptcp_active_disable(struct sock *sk)
321{
322	struct net *net = sock_net(sk);
323	struct mptcp_pernet *pernet;
324
325	pernet = mptcp_get_pernet(net);
326
327	if (!READ_ONCE(pernet->blackhole_timeout))
328		return;
329
330	/* Paired with READ_ONCE() in mptcp_active_should_disable() */
331	WRITE_ONCE(pernet->active_disable_stamp, jiffies);
332
333	/* Paired with smp_rmb() in mptcp_active_should_disable().
334	 * We want pernet->active_disable_stamp to be updated first.
335	 */
336	smp_mb__before_atomic();
337	atomic_inc(&pernet->active_disable_times);
338
339	MPTCP_INC_STATS(net, MPTCP_MIB_BLACKHOLE);
340}
341
342/* Calculate timeout for MPTCP active disable
343 * Return true if we are still in the active MPTCP disable period
344 * Return false if timeout already expired and we should use active MPTCP
345 */
346bool mptcp_active_should_disable(struct sock *ssk)
347{
348	struct net *net = sock_net(ssk);
349	unsigned int blackhole_timeout;
350	struct mptcp_pernet *pernet;
351	unsigned long timeout;
352	int disable_times;
353	int multiplier;
354
355	pernet = mptcp_get_pernet(net);
356	blackhole_timeout = READ_ONCE(pernet->blackhole_timeout);
357
358	if (!blackhole_timeout)
359		return false;
360
361	disable_times = atomic_read(&pernet->active_disable_times);
362	if (!disable_times)
363		return false;
364
365	/* Paired with smp_mb__before_atomic() in mptcp_active_disable() */
366	smp_rmb();
367
368	/* Limit timeout to max: 2^6 * initial timeout */
369	multiplier = 1 << min(disable_times - 1, 6);
370
371	/* Paired with the WRITE_ONCE() in mptcp_active_disable(). */
372	timeout = READ_ONCE(pernet->active_disable_stamp) +
373		  multiplier * blackhole_timeout * HZ;
374
375	return time_before(jiffies, timeout);
376}
377
378/* Enable active MPTCP and reset active_disable_times if needed */
379void mptcp_active_enable(struct sock *sk)
380{
381	struct mptcp_pernet *pernet = mptcp_get_pernet(sock_net(sk));
382
383	if (atomic_read(&pernet->active_disable_times)) {
384		struct dst_entry *dst = sk_dst_get(sk);
385
386		if (dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK))
387			atomic_set(&pernet->active_disable_times, 0);
388	}
389}
390
391/* Check the number of retransmissions, and fallback to TCP if needed */
392void mptcp_active_detect_blackhole(struct sock *ssk, bool expired)
393{
394	struct mptcp_subflow_context *subflow;
395	u32 timeouts;
396
397	if (!sk_is_mptcp(ssk))
398		return;
399
400	timeouts = inet_csk(ssk)->icsk_retransmits;
401	subflow = mptcp_subflow_ctx(ssk);
402
403	if (subflow->request_mptcp && ssk->sk_state == TCP_SYN_SENT) {
404		if (timeouts == 2 || (timeouts < 2 && expired)) {
405			MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDROP);
406			subflow->mpc_drop = 1;
407			mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow);
408		}
409	} else if (ssk->sk_state == TCP_SYN_SENT) {
410		subflow->mpc_drop = 0;
411	}
412}
413
414static int __net_init mptcp_net_init(struct net *net)
415{
416	struct mptcp_pernet *pernet = mptcp_get_pernet(net);
417
418	mptcp_pernet_set_defaults(pernet);
419
420	return mptcp_pernet_new_table(net, pernet);
421}
422
423/* Note: the callback will only be called per extra netns */
424static void __net_exit mptcp_net_exit(struct net *net)
425{
426	struct mptcp_pernet *pernet = mptcp_get_pernet(net);
427
428	mptcp_pernet_del_table(pernet);
429}
430
431static struct pernet_operations mptcp_pernet_ops = {
432	.init = mptcp_net_init,
433	.exit = mptcp_net_exit,
434	.id = &mptcp_pernet_id,
435	.size = sizeof(struct mptcp_pernet),
436};
437
438void __init mptcp_init(void)
439{
440	mptcp_join_cookie_init();
441	mptcp_proto_init();
442
443	if (register_pernet_subsys(&mptcp_pernet_ops) < 0)
444		panic("Failed to register MPTCP pernet subsystem.\n");
445}
446
447#if IS_ENABLED(CONFIG_MPTCP_IPV6)
448int __init mptcpv6_init(void)
449{
450	int err;
451
452	err = mptcp_proto_v6_init();
453
454	return err;
455}
456#endif