Linux Audio

Check our new training course

Loading...
v5.14.15
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *	TCP over IPv6
   4 *	Linux INET6 implementation
   5 *
   6 *	Authors:
   7 *	Pedro Roque		<roque@di.fc.ul.pt>
   8 *
   9 *	Based on:
  10 *	linux/net/ipv4/tcp.c
  11 *	linux/net/ipv4/tcp_input.c
  12 *	linux/net/ipv4/tcp_output.c
  13 *
  14 *	Fixes:
  15 *	Hideaki YOSHIFUJI	:	sin6_scope_id support
  16 *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
  17 *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
  18 *					a single port at the same time.
  19 *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
  20 */
  21
  22#include <linux/bottom_half.h>
  23#include <linux/module.h>
  24#include <linux/errno.h>
  25#include <linux/types.h>
  26#include <linux/socket.h>
  27#include <linux/sockios.h>
  28#include <linux/net.h>
  29#include <linux/jiffies.h>
  30#include <linux/in.h>
  31#include <linux/in6.h>
  32#include <linux/netdevice.h>
  33#include <linux/init.h>
  34#include <linux/jhash.h>
  35#include <linux/ipsec.h>
  36#include <linux/times.h>
  37#include <linux/slab.h>
  38#include <linux/uaccess.h>
  39#include <linux/ipv6.h>
  40#include <linux/icmpv6.h>
  41#include <linux/random.h>
  42#include <linux/indirect_call_wrapper.h>
  43
  44#include <net/tcp.h>
  45#include <net/ndisc.h>
  46#include <net/inet6_hashtables.h>
  47#include <net/inet6_connection_sock.h>
  48#include <net/ipv6.h>
  49#include <net/transp_v6.h>
  50#include <net/addrconf.h>
  51#include <net/ip6_route.h>
  52#include <net/ip6_checksum.h>
  53#include <net/inet_ecn.h>
  54#include <net/protocol.h>
  55#include <net/xfrm.h>
  56#include <net/snmp.h>
  57#include <net/dsfield.h>
  58#include <net/timewait_sock.h>
  59#include <net/inet_common.h>
  60#include <net/secure_seq.h>
  61#include <net/busy_poll.h>
  62
  63#include <linux/proc_fs.h>
  64#include <linux/seq_file.h>
  65
  66#include <crypto/hash.h>
  67#include <linux/scatterlist.h>
  68
  69#include <trace/events/tcp.h>
  70
  71static void	tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
  72static void	tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
  73				      struct request_sock *req);
  74
  75static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
  76
  77static const struct inet_connection_sock_af_ops ipv6_mapped;
  78const struct inet_connection_sock_af_ops ipv6_specific;
  79#ifdef CONFIG_TCP_MD5SIG
  80static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
  81static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
  82#else
  83static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
  84						   const struct in6_addr *addr,
  85						   int l3index)
  86{
  87	return NULL;
  88}
  89#endif
  90
  91/* Helper returning the inet6 address from a given tcp socket.
  92 * It can be used in TCP stack instead of inet6_sk(sk).
  93 * This avoids a dereference and allow compiler optimizations.
  94 * It is a specialized version of inet6_sk_generic().
  95 */
  96static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
  97{
  98	unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
  99
 100	return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
 101}
 102
 103static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
 104{
 105	struct dst_entry *dst = skb_dst(skb);
 106
 107	if (dst && dst_hold_safe(dst)) {
 108		const struct rt6_info *rt = (const struct rt6_info *)dst;
 109
 110		sk->sk_rx_dst = dst;
 111		inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
 112		tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
 113	}
 114}
 115
 116static u32 tcp_v6_init_seq(const struct sk_buff *skb)
 117{
 118	return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
 119				ipv6_hdr(skb)->saddr.s6_addr32,
 120				tcp_hdr(skb)->dest,
 121				tcp_hdr(skb)->source);
 122}
 123
 124static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
 125{
 126	return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
 127				   ipv6_hdr(skb)->saddr.s6_addr32);
 128}
 129
 130static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
 131			      int addr_len)
 132{
 133	/* This check is replicated from tcp_v6_connect() and intended to
 134	 * prevent BPF program called below from accessing bytes that are out
 135	 * of the bound specified by user in addr_len.
 136	 */
 137	if (addr_len < SIN6_LEN_RFC2133)
 138		return -EINVAL;
 139
 140	sock_owned_by_me(sk);
 141
 142	return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
 143}
 144
 145static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 146			  int addr_len)
 147{
 148	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
 149	struct inet_sock *inet = inet_sk(sk);
 150	struct inet_connection_sock *icsk = inet_csk(sk);
 151	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
 152	struct tcp_sock *tp = tcp_sk(sk);
 153	struct in6_addr *saddr = NULL, *final_p, final;
 154	struct ipv6_txoptions *opt;
 155	struct flowi6 fl6;
 156	struct dst_entry *dst;
 157	int addr_type;
 158	int err;
 159	struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
 160
 161	if (addr_len < SIN6_LEN_RFC2133)
 162		return -EINVAL;
 163
 164	if (usin->sin6_family != AF_INET6)
 165		return -EAFNOSUPPORT;
 166
 167	memset(&fl6, 0, sizeof(fl6));
 168
 169	if (np->sndflow) {
 170		fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
 171		IP6_ECN_flow_init(fl6.flowlabel);
 172		if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
 173			struct ip6_flowlabel *flowlabel;
 174			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
 175			if (IS_ERR(flowlabel))
 176				return -EINVAL;
 177			fl6_sock_release(flowlabel);
 178		}
 179	}
 180
 181	/*
 182	 *	connect() to INADDR_ANY means loopback (BSD'ism).
 183	 */
 184
 185	if (ipv6_addr_any(&usin->sin6_addr)) {
 186		if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
 187			ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
 188					       &usin->sin6_addr);
 189		else
 190			usin->sin6_addr = in6addr_loopback;
 191	}
 192
 193	addr_type = ipv6_addr_type(&usin->sin6_addr);
 194
 195	if (addr_type & IPV6_ADDR_MULTICAST)
 196		return -ENETUNREACH;
 197
 198	if (addr_type&IPV6_ADDR_LINKLOCAL) {
 199		if (addr_len >= sizeof(struct sockaddr_in6) &&
 200		    usin->sin6_scope_id) {
 201			/* If interface is set while binding, indices
 202			 * must coincide.
 203			 */
 204			if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
 205				return -EINVAL;
 206
 207			sk->sk_bound_dev_if = usin->sin6_scope_id;
 208		}
 209
 210		/* Connect to link-local address requires an interface */
 211		if (!sk->sk_bound_dev_if)
 212			return -EINVAL;
 213	}
 214
 215	if (tp->rx_opt.ts_recent_stamp &&
 216	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
 217		tp->rx_opt.ts_recent = 0;
 218		tp->rx_opt.ts_recent_stamp = 0;
 219		WRITE_ONCE(tp->write_seq, 0);
 220	}
 221
 222	sk->sk_v6_daddr = usin->sin6_addr;
 223	np->flow_label = fl6.flowlabel;
 224
 225	/*
 226	 *	TCP over IPv4
 227	 */
 228
 229	if (addr_type & IPV6_ADDR_MAPPED) {
 230		u32 exthdrlen = icsk->icsk_ext_hdr_len;
 231		struct sockaddr_in sin;
 232
 233		if (__ipv6_only_sock(sk))
 234			return -ENETUNREACH;
 235
 236		sin.sin_family = AF_INET;
 237		sin.sin_port = usin->sin6_port;
 238		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
 239
 240		icsk->icsk_af_ops = &ipv6_mapped;
 241		if (sk_is_mptcp(sk))
 242			mptcpv6_handle_mapped(sk, true);
 243		sk->sk_backlog_rcv = tcp_v4_do_rcv;
 244#ifdef CONFIG_TCP_MD5SIG
 245		tp->af_specific = &tcp_sock_ipv6_mapped_specific;
 246#endif
 247
 248		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
 249
 250		if (err) {
 251			icsk->icsk_ext_hdr_len = exthdrlen;
 252			icsk->icsk_af_ops = &ipv6_specific;
 253			if (sk_is_mptcp(sk))
 254				mptcpv6_handle_mapped(sk, false);
 255			sk->sk_backlog_rcv = tcp_v6_do_rcv;
 256#ifdef CONFIG_TCP_MD5SIG
 257			tp->af_specific = &tcp_sock_ipv6_specific;
 258#endif
 259			goto failure;
 260		}
 261		np->saddr = sk->sk_v6_rcv_saddr;
 262
 263		return err;
 264	}
 265
 266	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
 267		saddr = &sk->sk_v6_rcv_saddr;
 268
 269	fl6.flowi6_proto = IPPROTO_TCP;
 270	fl6.daddr = sk->sk_v6_daddr;
 271	fl6.saddr = saddr ? *saddr : np->saddr;
 272	fl6.flowi6_oif = sk->sk_bound_dev_if;
 273	fl6.flowi6_mark = sk->sk_mark;
 274	fl6.fl6_dport = usin->sin6_port;
 275	fl6.fl6_sport = inet->inet_sport;
 276	fl6.flowi6_uid = sk->sk_uid;
 277
 278	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
 279	final_p = fl6_update_dst(&fl6, opt, &final);
 280
 281	security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
 282
 283	dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
 284	if (IS_ERR(dst)) {
 285		err = PTR_ERR(dst);
 286		goto failure;
 287	}
 288
 289	if (!saddr) {
 290		saddr = &fl6.saddr;
 291		sk->sk_v6_rcv_saddr = *saddr;
 292	}
 293
 294	/* set the source address */
 295	np->saddr = *saddr;
 296	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
 297
 298	sk->sk_gso_type = SKB_GSO_TCPV6;
 299	ip6_dst_store(sk, dst, NULL, NULL);
 300
 301	icsk->icsk_ext_hdr_len = 0;
 302	if (opt)
 303		icsk->icsk_ext_hdr_len = opt->opt_flen +
 304					 opt->opt_nflen;
 305
 306	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 307
 308	inet->inet_dport = usin->sin6_port;
 309
 310	tcp_set_state(sk, TCP_SYN_SENT);
 311	err = inet6_hash_connect(tcp_death_row, sk);
 312	if (err)
 313		goto late_failure;
 314
 315	sk_set_txhash(sk);
 316
 317	if (likely(!tp->repair)) {
 318		if (!tp->write_seq)
 319			WRITE_ONCE(tp->write_seq,
 320				   secure_tcpv6_seq(np->saddr.s6_addr32,
 321						    sk->sk_v6_daddr.s6_addr32,
 322						    inet->inet_sport,
 323						    inet->inet_dport));
 324		tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
 325						   np->saddr.s6_addr32,
 326						   sk->sk_v6_daddr.s6_addr32);
 327	}
 328
 329	if (tcp_fastopen_defer_connect(sk, &err))
 330		return err;
 331	if (err)
 332		goto late_failure;
 333
 334	err = tcp_connect(sk);
 335	if (err)
 336		goto late_failure;
 337
 338	return 0;
 339
 340late_failure:
 341	tcp_set_state(sk, TCP_CLOSE);
 342failure:
 343	inet->inet_dport = 0;
 344	sk->sk_route_caps = 0;
 345	return err;
 346}
 347
 348static void tcp_v6_mtu_reduced(struct sock *sk)
 349{
 350	struct dst_entry *dst;
 351	u32 mtu;
 352
 353	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
 354		return;
 355
 356	mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
 357
 358	/* Drop requests trying to increase our current mss.
 359	 * Check done in __ip6_rt_update_pmtu() is too late.
 360	 */
 361	if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
 362		return;
 363
 364	dst = inet6_csk_update_pmtu(sk, mtu);
 365	if (!dst)
 366		return;
 367
 368	if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
 369		tcp_sync_mss(sk, dst_mtu(dst));
 370		tcp_simple_retransmit(sk);
 371	}
 372}
 373
 374static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 375		u8 type, u8 code, int offset, __be32 info)
 376{
 377	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
 378	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
 379	struct net *net = dev_net(skb->dev);
 380	struct request_sock *fastopen;
 381	struct ipv6_pinfo *np;
 382	struct tcp_sock *tp;
 383	__u32 seq, snd_una;
 384	struct sock *sk;
 385	bool fatal;
 386	int err;
 387
 388	sk = __inet6_lookup_established(net, &tcp_hashinfo,
 389					&hdr->daddr, th->dest,
 390					&hdr->saddr, ntohs(th->source),
 391					skb->dev->ifindex, inet6_sdif(skb));
 392
 393	if (!sk) {
 394		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
 395				  ICMP6_MIB_INERRORS);
 396		return -ENOENT;
 397	}
 398
 399	if (sk->sk_state == TCP_TIME_WAIT) {
 400		inet_twsk_put(inet_twsk(sk));
 401		return 0;
 402	}
 403	seq = ntohl(th->seq);
 404	fatal = icmpv6_err_convert(type, code, &err);
 405	if (sk->sk_state == TCP_NEW_SYN_RECV) {
 406		tcp_req_err(sk, seq, fatal);
 407		return 0;
 408	}
 409
 410	bh_lock_sock(sk);
 411	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
 412		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
 413
 414	if (sk->sk_state == TCP_CLOSE)
 415		goto out;
 416
 417	if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
 418		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
 419		goto out;
 420	}
 421
 422	tp = tcp_sk(sk);
 423	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
 424	fastopen = rcu_dereference(tp->fastopen_rsk);
 425	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
 426	if (sk->sk_state != TCP_LISTEN &&
 427	    !between(seq, snd_una, tp->snd_nxt)) {
 428		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
 429		goto out;
 430	}
 431
 432	np = tcp_inet6_sk(sk);
 433
 434	if (type == NDISC_REDIRECT) {
 435		if (!sock_owned_by_user(sk)) {
 436			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
 437
 438			if (dst)
 439				dst->ops->redirect(dst, sk, skb);
 440		}
 441		goto out;
 442	}
 443
 444	if (type == ICMPV6_PKT_TOOBIG) {
 445		u32 mtu = ntohl(info);
 446
 447		/* We are not interested in TCP_LISTEN and open_requests
 448		 * (SYN-ACKs send out by Linux are always <576bytes so
 449		 * they should go through unfragmented).
 450		 */
 451		if (sk->sk_state == TCP_LISTEN)
 452			goto out;
 453
 454		if (!ip6_sk_accept_pmtu(sk))
 455			goto out;
 456
 457		if (mtu < IPV6_MIN_MTU)
 458			goto out;
 459
 460		WRITE_ONCE(tp->mtu_info, mtu);
 461
 462		if (!sock_owned_by_user(sk))
 463			tcp_v6_mtu_reduced(sk);
 464		else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
 465					   &sk->sk_tsq_flags))
 466			sock_hold(sk);
 467		goto out;
 468	}
 469
 470
 471	/* Might be for an request_sock */
 472	switch (sk->sk_state) {
 473	case TCP_SYN_SENT:
 474	case TCP_SYN_RECV:
 475		/* Only in fast or simultaneous open. If a fast open socket is
 476		 * already accepted it is treated as a connected one below.
 477		 */
 478		if (fastopen && !fastopen->sk)
 479			break;
 480
 481		ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
 482
 483		if (!sock_owned_by_user(sk)) {
 484			sk->sk_err = err;
 485			sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
 486
 487			tcp_done(sk);
 488		} else
 489			sk->sk_err_soft = err;
 490		goto out;
 491	case TCP_LISTEN:
 492		break;
 493	default:
 494		/* check if this ICMP message allows revert of backoff.
 495		 * (see RFC 6069)
 496		 */
 497		if (!fastopen && type == ICMPV6_DEST_UNREACH &&
 498		    code == ICMPV6_NOROUTE)
 499			tcp_ld_RTO_revert(sk, seq);
 500	}
 501
 502	if (!sock_owned_by_user(sk) && np->recverr) {
 503		sk->sk_err = err;
 504		sk_error_report(sk);
 505	} else
 506		sk->sk_err_soft = err;
 507
 508out:
 509	bh_unlock_sock(sk);
 510	sock_put(sk);
 511	return 0;
 512}
 513
 514
 515static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
 516			      struct flowi *fl,
 517			      struct request_sock *req,
 518			      struct tcp_fastopen_cookie *foc,
 519			      enum tcp_synack_type synack_type,
 520			      struct sk_buff *syn_skb)
 521{
 522	struct inet_request_sock *ireq = inet_rsk(req);
 523	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
 524	struct ipv6_txoptions *opt;
 525	struct flowi6 *fl6 = &fl->u.ip6;
 526	struct sk_buff *skb;
 527	int err = -ENOMEM;
 528	u8 tclass;
 529
 530	/* First, grab a route. */
 531	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
 532					       IPPROTO_TCP)) == NULL)
 533		goto done;
 534
 535	skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
 536
 537	if (skb) {
 538		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
 539				    &ireq->ir_v6_rmt_addr);
 540
 541		fl6->daddr = ireq->ir_v6_rmt_addr;
 542		if (np->repflow && ireq->pktopts)
 543			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
 544
 545		tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
 546				(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
 547				(np->tclass & INET_ECN_MASK) :
 548				np->tclass;
 549
 550		if (!INET_ECN_is_capable(tclass) &&
 551		    tcp_bpf_ca_needs_ecn((struct sock *)req))
 552			tclass |= INET_ECN_ECT_0;
 553
 554		rcu_read_lock();
 555		opt = ireq->ipv6_opt;
 556		if (!opt)
 557			opt = rcu_dereference(np->opt);
 558		err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
 559			       tclass, sk->sk_priority);
 560		rcu_read_unlock();
 561		err = net_xmit_eval(err);
 562	}
 563
 564done:
 565	return err;
 566}
 567
 568
 569static void tcp_v6_reqsk_destructor(struct request_sock *req)
 570{
 571	kfree(inet_rsk(req)->ipv6_opt);
 572	kfree_skb(inet_rsk(req)->pktopts);
 573}
 574
 575#ifdef CONFIG_TCP_MD5SIG
 576static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
 577						   const struct in6_addr *addr,
 578						   int l3index)
 579{
 580	return tcp_md5_do_lookup(sk, l3index,
 581				 (union tcp_md5_addr *)addr, AF_INET6);
 582}
 583
 584static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
 585						const struct sock *addr_sk)
 586{
 587	int l3index;
 588
 589	l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
 590						 addr_sk->sk_bound_dev_if);
 591	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
 592				    l3index);
 593}
 594
 595static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
 596				 sockptr_t optval, int optlen)
 597{
 598	struct tcp_md5sig cmd;
 599	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
 600	int l3index = 0;
 601	u8 prefixlen;
 602
 603	if (optlen < sizeof(cmd))
 604		return -EINVAL;
 605
 606	if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
 607		return -EFAULT;
 608
 609	if (sin6->sin6_family != AF_INET6)
 610		return -EINVAL;
 611
 612	if (optname == TCP_MD5SIG_EXT &&
 613	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
 614		prefixlen = cmd.tcpm_prefixlen;
 615		if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
 616					prefixlen > 32))
 617			return -EINVAL;
 618	} else {
 619		prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
 620	}
 621
 622	if (optname == TCP_MD5SIG_EXT &&
 623	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
 624		struct net_device *dev;
 625
 626		rcu_read_lock();
 627		dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
 628		if (dev && netif_is_l3_master(dev))
 629			l3index = dev->ifindex;
 630		rcu_read_unlock();
 631
 632		/* ok to reference set/not set outside of rcu;
 633		 * right now device MUST be an L3 master
 634		 */
 635		if (!dev || !l3index)
 636			return -EINVAL;
 637	}
 638
 639	if (!cmd.tcpm_keylen) {
 640		if (ipv6_addr_v4mapped(&sin6->sin6_addr))
 641			return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
 642					      AF_INET, prefixlen,
 643					      l3index);
 644		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
 645				      AF_INET6, prefixlen, l3index);
 646	}
 647
 648	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
 649		return -EINVAL;
 650
 651	if (ipv6_addr_v4mapped(&sin6->sin6_addr))
 652		return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
 653				      AF_INET, prefixlen, l3index,
 654				      cmd.tcpm_key, cmd.tcpm_keylen,
 655				      GFP_KERNEL);
 656
 657	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
 658			      AF_INET6, prefixlen, l3index,
 659			      cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
 660}
 661
 662static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
 663				   const struct in6_addr *daddr,
 664				   const struct in6_addr *saddr,
 665				   const struct tcphdr *th, int nbytes)
 666{
 667	struct tcp6_pseudohdr *bp;
 668	struct scatterlist sg;
 669	struct tcphdr *_th;
 670
 671	bp = hp->scratch;
 672	/* 1. TCP pseudo-header (RFC2460) */
 673	bp->saddr = *saddr;
 674	bp->daddr = *daddr;
 675	bp->protocol = cpu_to_be32(IPPROTO_TCP);
 676	bp->len = cpu_to_be32(nbytes);
 677
 678	_th = (struct tcphdr *)(bp + 1);
 679	memcpy(_th, th, sizeof(*th));
 680	_th->check = 0;
 681
 682	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
 683	ahash_request_set_crypt(hp->md5_req, &sg, NULL,
 684				sizeof(*bp) + sizeof(*th));
 685	return crypto_ahash_update(hp->md5_req);
 686}
 687
 688static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
 689			       const struct in6_addr *daddr, struct in6_addr *saddr,
 690			       const struct tcphdr *th)
 691{
 692	struct tcp_md5sig_pool *hp;
 693	struct ahash_request *req;
 694
 695	hp = tcp_get_md5sig_pool();
 696	if (!hp)
 697		goto clear_hash_noput;
 698	req = hp->md5_req;
 699
 700	if (crypto_ahash_init(req))
 701		goto clear_hash;
 702	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
 703		goto clear_hash;
 704	if (tcp_md5_hash_key(hp, key))
 705		goto clear_hash;
 706	ahash_request_set_crypt(req, NULL, md5_hash, 0);
 707	if (crypto_ahash_final(req))
 708		goto clear_hash;
 709
 710	tcp_put_md5sig_pool();
 711	return 0;
 712
 713clear_hash:
 714	tcp_put_md5sig_pool();
 715clear_hash_noput:
 716	memset(md5_hash, 0, 16);
 717	return 1;
 718}
 719
 720static int tcp_v6_md5_hash_skb(char *md5_hash,
 721			       const struct tcp_md5sig_key *key,
 722			       const struct sock *sk,
 723			       const struct sk_buff *skb)
 724{
 725	const struct in6_addr *saddr, *daddr;
 726	struct tcp_md5sig_pool *hp;
 727	struct ahash_request *req;
 728	const struct tcphdr *th = tcp_hdr(skb);
 729
 730	if (sk) { /* valid for establish/request sockets */
 731		saddr = &sk->sk_v6_rcv_saddr;
 732		daddr = &sk->sk_v6_daddr;
 733	} else {
 734		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
 735		saddr = &ip6h->saddr;
 736		daddr = &ip6h->daddr;
 737	}
 738
 739	hp = tcp_get_md5sig_pool();
 740	if (!hp)
 741		goto clear_hash_noput;
 742	req = hp->md5_req;
 743
 744	if (crypto_ahash_init(req))
 745		goto clear_hash;
 746
 747	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
 748		goto clear_hash;
 749	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
 750		goto clear_hash;
 751	if (tcp_md5_hash_key(hp, key))
 752		goto clear_hash;
 753	ahash_request_set_crypt(req, NULL, md5_hash, 0);
 754	if (crypto_ahash_final(req))
 755		goto clear_hash;
 756
 757	tcp_put_md5sig_pool();
 758	return 0;
 759
 760clear_hash:
 761	tcp_put_md5sig_pool();
 762clear_hash_noput:
 763	memset(md5_hash, 0, 16);
 764	return 1;
 765}
 766
 767#endif
 768
 769static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
 770				    const struct sk_buff *skb,
 771				    int dif, int sdif)
 772{
 773#ifdef CONFIG_TCP_MD5SIG
 774	const __u8 *hash_location = NULL;
 775	struct tcp_md5sig_key *hash_expected;
 776	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
 777	const struct tcphdr *th = tcp_hdr(skb);
 778	int genhash, l3index;
 779	u8 newhash[16];
 780
 781	/* sdif set, means packet ingressed via a device
 782	 * in an L3 domain and dif is set to the l3mdev
 783	 */
 784	l3index = sdif ? dif : 0;
 785
 786	hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
 787	hash_location = tcp_parse_md5sig_option(th);
 788
 789	/* We've parsed the options - do we have a hash? */
 790	if (!hash_expected && !hash_location)
 791		return false;
 792
 793	if (hash_expected && !hash_location) {
 794		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
 795		return true;
 796	}
 797
 798	if (!hash_expected && hash_location) {
 799		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
 800		return true;
 801	}
 802
 803	/* check the signature */
 804	genhash = tcp_v6_md5_hash_skb(newhash,
 805				      hash_expected,
 806				      NULL, skb);
 807
 808	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
 809		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
 810		net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
 811				     genhash ? "failed" : "mismatch",
 812				     &ip6h->saddr, ntohs(th->source),
 813				     &ip6h->daddr, ntohs(th->dest), l3index);
 814		return true;
 815	}
 816#endif
 817	return false;
 818}
 819
 820static void tcp_v6_init_req(struct request_sock *req,
 821			    const struct sock *sk_listener,
 822			    struct sk_buff *skb)
 823{
 824	bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
 825	struct inet_request_sock *ireq = inet_rsk(req);
 826	const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
 827
 828	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
 829	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
 830
 831	/* So that link locals have meaning */
 832	if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
 833	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
 834		ireq->ir_iif = tcp_v6_iif(skb);
 835
 836	if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
 837	    (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
 838	     np->rxopt.bits.rxinfo ||
 839	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
 840	     np->rxopt.bits.rxohlim || np->repflow)) {
 841		refcount_inc(&skb->users);
 842		ireq->pktopts = skb;
 843	}
 844}
 845
 846static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
 847					  struct sk_buff *skb,
 848					  struct flowi *fl,
 849					  struct request_sock *req)
 850{
 851	tcp_v6_init_req(req, sk, skb);
 852
 853	if (security_inet_conn_request(sk, skb, req))
 854		return NULL;
 855
 856	return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
 857}
 858
 859struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
 860	.family		=	AF_INET6,
 861	.obj_size	=	sizeof(struct tcp6_request_sock),
 862	.rtx_syn_ack	=	tcp_rtx_synack,
 863	.send_ack	=	tcp_v6_reqsk_send_ack,
 864	.destructor	=	tcp_v6_reqsk_destructor,
 865	.send_reset	=	tcp_v6_send_reset,
 866	.syn_ack_timeout =	tcp_syn_ack_timeout,
 867};
 868
 869const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
 870	.mss_clamp	=	IPV6_MIN_MTU - sizeof(struct tcphdr) -
 871				sizeof(struct ipv6hdr),
 872#ifdef CONFIG_TCP_MD5SIG
 873	.req_md5_lookup	=	tcp_v6_md5_lookup,
 874	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
 875#endif
 
 876#ifdef CONFIG_SYN_COOKIES
 877	.cookie_init_seq =	cookie_v6_init_sequence,
 878#endif
 879	.route_req	=	tcp_v6_route_req,
 880	.init_seq	=	tcp_v6_init_seq,
 881	.init_ts_off	=	tcp_v6_init_ts_off,
 882	.send_synack	=	tcp_v6_send_synack,
 883};
 884
 885static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
 886				 u32 ack, u32 win, u32 tsval, u32 tsecr,
 887				 int oif, struct tcp_md5sig_key *key, int rst,
 888				 u8 tclass, __be32 label, u32 priority)
 889{
 890	const struct tcphdr *th = tcp_hdr(skb);
 891	struct tcphdr *t1;
 892	struct sk_buff *buff;
 893	struct flowi6 fl6;
 894	struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
 895	struct sock *ctl_sk = net->ipv6.tcp_sk;
 896	unsigned int tot_len = sizeof(struct tcphdr);
 897	__be32 mrst = 0, *topt;
 898	struct dst_entry *dst;
 
 899	__u32 mark = 0;
 900
 901	if (tsecr)
 902		tot_len += TCPOLEN_TSTAMP_ALIGNED;
 903#ifdef CONFIG_TCP_MD5SIG
 904	if (key)
 905		tot_len += TCPOLEN_MD5SIG_ALIGNED;
 906#endif
 907
 908#ifdef CONFIG_MPTCP
 909	if (rst && !key) {
 910		mrst = mptcp_reset_option(skb);
 911
 912		if (mrst)
 913			tot_len += sizeof(__be32);
 914	}
 915#endif
 916
 917	buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
 918			 GFP_ATOMIC);
 919	if (!buff)
 920		return;
 921
 922	skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
 923
 924	t1 = skb_push(buff, tot_len);
 925	skb_reset_transport_header(buff);
 926
 927	/* Swap the send and the receive. */
 928	memset(t1, 0, sizeof(*t1));
 929	t1->dest = th->source;
 930	t1->source = th->dest;
 931	t1->doff = tot_len / 4;
 932	t1->seq = htonl(seq);
 933	t1->ack_seq = htonl(ack);
 934	t1->ack = !rst || !th->ack;
 935	t1->rst = rst;
 936	t1->window = htons(win);
 937
 938	topt = (__be32 *)(t1 + 1);
 939
 940	if (tsecr) {
 941		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 942				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
 943		*topt++ = htonl(tsval);
 944		*topt++ = htonl(tsecr);
 945	}
 946
 947	if (mrst)
 948		*topt++ = mrst;
 949
 950#ifdef CONFIG_TCP_MD5SIG
 951	if (key) {
 952		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 953				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
 954		tcp_v6_md5_hash_hdr((__u8 *)topt, key,
 955				    &ipv6_hdr(skb)->saddr,
 956				    &ipv6_hdr(skb)->daddr, t1);
 957	}
 958#endif
 959
 960	memset(&fl6, 0, sizeof(fl6));
 961	fl6.daddr = ipv6_hdr(skb)->saddr;
 962	fl6.saddr = ipv6_hdr(skb)->daddr;
 963	fl6.flowlabel = label;
 964
 965	buff->ip_summed = CHECKSUM_PARTIAL;
 966	buff->csum = 0;
 967
 968	__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
 969
 970	fl6.flowi6_proto = IPPROTO_TCP;
 971	if (rt6_need_strict(&fl6.daddr) && !oif)
 972		fl6.flowi6_oif = tcp_v6_iif(skb);
 973	else {
 974		if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
 975			oif = skb->skb_iif;
 976
 977		fl6.flowi6_oif = oif;
 978	}
 979
 980	if (sk) {
 981		if (sk->sk_state == TCP_TIME_WAIT) {
 982			mark = inet_twsk(sk)->tw_mark;
 983			/* autoflowlabel relies on buff->hash */
 984			skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
 985				     PKT_HASH_TYPE_L4);
 986		} else {
 987			mark = sk->sk_mark;
 988		}
 989		buff->tstamp = tcp_transmit_time(sk);
 990	}
 991	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
 992	fl6.fl6_dport = t1->dest;
 993	fl6.fl6_sport = t1->source;
 994	fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
 995	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
 996
 997	/* Pass a socket to ip6_dst_lookup either it is for RST
 998	 * Underlying function will use this to retrieve the network
 999	 * namespace
1000	 */
1001	dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
1002	if (!IS_ERR(dst)) {
1003		skb_dst_set(buff, dst);
1004		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
1005			 tclass & ~INET_ECN_MASK, priority);
1006		TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
1007		if (rst)
1008			TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
1009		return;
1010	}
1011
1012	kfree_skb(buff);
1013}
1014
1015static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
1016{
1017	const struct tcphdr *th = tcp_hdr(skb);
1018	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1019	u32 seq = 0, ack_seq = 0;
1020	struct tcp_md5sig_key *key = NULL;
1021#ifdef CONFIG_TCP_MD5SIG
1022	const __u8 *hash_location = NULL;
1023	unsigned char newhash[16];
1024	int genhash;
1025	struct sock *sk1 = NULL;
1026#endif
1027	__be32 label = 0;
1028	u32 priority = 0;
1029	struct net *net;
1030	int oif = 0;
1031
1032	if (th->rst)
1033		return;
1034
1035	/* If sk not NULL, it means we did a successful lookup and incoming
1036	 * route had to be correct. prequeue might have dropped our dst.
1037	 */
1038	if (!sk && !ipv6_unicast_destination(skb))
1039		return;
1040
1041	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1042#ifdef CONFIG_TCP_MD5SIG
1043	rcu_read_lock();
1044	hash_location = tcp_parse_md5sig_option(th);
1045	if (sk && sk_fullsock(sk)) {
1046		int l3index;
1047
1048		/* sdif set, means packet ingressed via a device
1049		 * in an L3 domain and inet_iif is set to it.
1050		 */
1051		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1052		key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1053	} else if (hash_location) {
1054		int dif = tcp_v6_iif_l3_slave(skb);
1055		int sdif = tcp_v6_sdif(skb);
1056		int l3index;
1057
1058		/*
1059		 * active side is lost. Try to find listening socket through
1060		 * source port, and then find md5 key through listening socket.
1061		 * we are not loose security here:
1062		 * Incoming packet is checked with md5 hash with finding key,
1063		 * no RST generated if md5 hash doesn't match.
1064		 */
1065		sk1 = inet6_lookup_listener(net,
1066					   &tcp_hashinfo, NULL, 0,
1067					   &ipv6h->saddr,
1068					   th->source, &ipv6h->daddr,
1069					   ntohs(th->source), dif, sdif);
 
 
1070		if (!sk1)
1071			goto out;
1072
1073		/* sdif set, means packet ingressed via a device
1074		 * in an L3 domain and dif is set to it.
1075		 */
1076		l3index = tcp_v6_sdif(skb) ? dif : 0;
1077
1078		key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1079		if (!key)
1080			goto out;
1081
1082		genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1083		if (genhash || memcmp(hash_location, newhash, 16) != 0)
1084			goto out;
1085	}
1086#endif
1087
1088	if (th->ack)
1089		seq = ntohl(th->ack_seq);
1090	else
1091		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1092			  (th->doff << 2);
1093
1094	if (sk) {
1095		oif = sk->sk_bound_dev_if;
1096		if (sk_fullsock(sk)) {
1097			const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1098
1099			trace_tcp_send_reset(sk, skb);
1100			if (np->repflow)
1101				label = ip6_flowlabel(ipv6h);
1102			priority = sk->sk_priority;
1103		}
1104		if (sk->sk_state == TCP_TIME_WAIT) {
1105			label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1106			priority = inet_twsk(sk)->tw_priority;
1107		}
1108	} else {
1109		if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1110			label = ip6_flowlabel(ipv6h);
1111	}
1112
1113	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1114			     ipv6_get_dsfield(ipv6h), label, priority);
1115
1116#ifdef CONFIG_TCP_MD5SIG
1117out:
1118	rcu_read_unlock();
1119#endif
1120}
1121
1122static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1123			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1124			    struct tcp_md5sig_key *key, u8 tclass,
1125			    __be32 label, u32 priority)
1126{
1127	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1128			     tclass, label, priority);
1129}
1130
1131static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1132{
1133	struct inet_timewait_sock *tw = inet_twsk(sk);
1134	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1135
1136	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1137			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1138			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1139			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1140			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1141
1142	inet_twsk_put(tw);
1143}
1144
1145static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1146				  struct request_sock *req)
1147{
1148	int l3index;
1149
1150	l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1151
1152	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1153	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1154	 */
1155	/* RFC 7323 2.3
1156	 * The window field (SEG.WND) of every outgoing segment, with the
1157	 * exception of <SYN> segments, MUST be right-shifted by
1158	 * Rcv.Wind.Shift bits:
1159	 */
1160	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1161			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1162			tcp_rsk(req)->rcv_nxt,
1163			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1164			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1165			req->ts_recent, sk->sk_bound_dev_if,
1166			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1167			ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
1168}
1169
1170
1171static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1172{
1173#ifdef CONFIG_SYN_COOKIES
1174	const struct tcphdr *th = tcp_hdr(skb);
1175
1176	if (!th->syn)
1177		sk = cookie_v6_check(sk, skb);
1178#endif
1179	return sk;
1180}
1181
1182u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1183			 struct tcphdr *th, u32 *cookie)
1184{
1185	u16 mss = 0;
1186#ifdef CONFIG_SYN_COOKIES
1187	mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1188				    &tcp_request_sock_ipv6_ops, sk, th);
1189	if (mss) {
1190		*cookie = __cookie_v6_init_sequence(iph, th, &mss);
1191		tcp_synq_overflow(sk);
1192	}
1193#endif
1194	return mss;
1195}
1196
1197static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1198{
1199	if (skb->protocol == htons(ETH_P_IP))
1200		return tcp_v4_conn_request(sk, skb);
1201
1202	if (!ipv6_unicast_destination(skb))
1203		goto drop;
1204
1205	if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1206		__IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1207		return 0;
1208	}
1209
1210	return tcp_conn_request(&tcp6_request_sock_ops,
1211				&tcp_request_sock_ipv6_ops, sk, skb);
1212
1213drop:
1214	tcp_listendrop(sk);
1215	return 0; /* don't send reset */
1216}
1217
1218static void tcp_v6_restore_cb(struct sk_buff *skb)
1219{
1220	/* We need to move header back to the beginning if xfrm6_policy_check()
1221	 * and tcp_v6_fill_cb() are going to be called again.
1222	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1223	 */
1224	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1225		sizeof(struct inet6_skb_parm));
1226}
1227
1228static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1229					 struct request_sock *req,
1230					 struct dst_entry *dst,
1231					 struct request_sock *req_unhash,
1232					 bool *own_req)
1233{
1234	struct inet_request_sock *ireq;
1235	struct ipv6_pinfo *newnp;
1236	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1237	struct ipv6_txoptions *opt;
1238	struct inet_sock *newinet;
1239	bool found_dup_sk = false;
1240	struct tcp_sock *newtp;
1241	struct sock *newsk;
1242#ifdef CONFIG_TCP_MD5SIG
1243	struct tcp_md5sig_key *key;
1244	int l3index;
1245#endif
1246	struct flowi6 fl6;
1247
1248	if (skb->protocol == htons(ETH_P_IP)) {
1249		/*
1250		 *	v6 mapped
1251		 */
1252
1253		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1254					     req_unhash, own_req);
1255
1256		if (!newsk)
1257			return NULL;
1258
1259		inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1260
1261		newinet = inet_sk(newsk);
1262		newnp = tcp_inet6_sk(newsk);
1263		newtp = tcp_sk(newsk);
1264
1265		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1266
1267		newnp->saddr = newsk->sk_v6_rcv_saddr;
1268
1269		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1270		if (sk_is_mptcp(newsk))
1271			mptcpv6_handle_mapped(newsk, true);
1272		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1273#ifdef CONFIG_TCP_MD5SIG
1274		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1275#endif
1276
1277		newnp->ipv6_mc_list = NULL;
1278		newnp->ipv6_ac_list = NULL;
1279		newnp->ipv6_fl_list = NULL;
1280		newnp->pktoptions  = NULL;
1281		newnp->opt	   = NULL;
1282		newnp->mcast_oif   = inet_iif(skb);
1283		newnp->mcast_hops  = ip_hdr(skb)->ttl;
1284		newnp->rcv_flowinfo = 0;
1285		if (np->repflow)
1286			newnp->flow_label = 0;
1287
1288		/*
1289		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1290		 * here, tcp_create_openreq_child now does this for us, see the comment in
1291		 * that function for the gory details. -acme
1292		 */
1293
1294		/* It is tricky place. Until this moment IPv4 tcp
1295		   worked with IPv6 icsk.icsk_af_ops.
1296		   Sync it now.
1297		 */
1298		tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1299
1300		return newsk;
1301	}
1302
1303	ireq = inet_rsk(req);
1304
1305	if (sk_acceptq_is_full(sk))
1306		goto out_overflow;
1307
1308	if (!dst) {
1309		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1310		if (!dst)
1311			goto out;
1312	}
1313
1314	newsk = tcp_create_openreq_child(sk, req, skb);
1315	if (!newsk)
1316		goto out_nonewsk;
1317
1318	/*
1319	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1320	 * count here, tcp_create_openreq_child now does this for us, see the
1321	 * comment in that function for the gory details. -acme
1322	 */
1323
1324	newsk->sk_gso_type = SKB_GSO_TCPV6;
1325	ip6_dst_store(newsk, dst, NULL, NULL);
1326	inet6_sk_rx_dst_set(newsk, skb);
1327
1328	inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1329
1330	newtp = tcp_sk(newsk);
1331	newinet = inet_sk(newsk);
1332	newnp = tcp_inet6_sk(newsk);
1333
1334	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1335
1336	newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1337	newnp->saddr = ireq->ir_v6_loc_addr;
1338	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1339	newsk->sk_bound_dev_if = ireq->ir_iif;
1340
1341	/* Now IPv6 options...
1342
1343	   First: no IPv4 options.
1344	 */
1345	newinet->inet_opt = NULL;
1346	newnp->ipv6_mc_list = NULL;
1347	newnp->ipv6_ac_list = NULL;
1348	newnp->ipv6_fl_list = NULL;
1349
1350	/* Clone RX bits */
1351	newnp->rxopt.all = np->rxopt.all;
1352
1353	newnp->pktoptions = NULL;
1354	newnp->opt	  = NULL;
1355	newnp->mcast_oif  = tcp_v6_iif(skb);
1356	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1357	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1358	if (np->repflow)
1359		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1360
1361	/* Set ToS of the new socket based upon the value of incoming SYN.
1362	 * ECT bits are set later in tcp_init_transfer().
1363	 */
1364	if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
1365		newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1366
1367	/* Clone native IPv6 options from listening socket (if any)
1368
1369	   Yes, keeping reference count would be much more clever,
1370	   but we make one more one thing there: reattach optmem
1371	   to newsk.
1372	 */
1373	opt = ireq->ipv6_opt;
1374	if (!opt)
1375		opt = rcu_dereference(np->opt);
1376	if (opt) {
1377		opt = ipv6_dup_options(newsk, opt);
1378		RCU_INIT_POINTER(newnp->opt, opt);
1379	}
1380	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1381	if (opt)
1382		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1383						    opt->opt_flen;
1384
1385	tcp_ca_openreq_child(newsk, dst);
1386
1387	tcp_sync_mss(newsk, dst_mtu(dst));
1388	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1389
1390	tcp_initialize_rcv_mss(newsk);
1391
1392	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1393	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1394
1395#ifdef CONFIG_TCP_MD5SIG
1396	l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1397
1398	/* Copy over the MD5 key from the original socket */
1399	key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1400	if (key) {
1401		/* We're using one, so create a matching key
1402		 * on the newsk structure. If we fail to get
1403		 * memory, then we end up not copying the key
1404		 * across. Shucks.
1405		 */
1406		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1407			       AF_INET6, 128, l3index, key->key, key->keylen,
1408			       sk_gfp_mask(sk, GFP_ATOMIC));
1409	}
1410#endif
1411
1412	if (__inet_inherit_port(sk, newsk) < 0) {
1413		inet_csk_prepare_forced_close(newsk);
1414		tcp_done(newsk);
1415		goto out;
1416	}
1417	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1418				       &found_dup_sk);
1419	if (*own_req) {
1420		tcp_move_syn(newtp, req);
1421
1422		/* Clone pktoptions received with SYN, if we own the req */
1423		if (ireq->pktopts) {
1424			newnp->pktoptions = skb_clone(ireq->pktopts,
1425						      sk_gfp_mask(sk, GFP_ATOMIC));
1426			consume_skb(ireq->pktopts);
1427			ireq->pktopts = NULL;
1428			if (newnp->pktoptions) {
1429				tcp_v6_restore_cb(newnp->pktoptions);
1430				skb_set_owner_r(newnp->pktoptions, newsk);
1431			}
1432		}
1433	} else {
1434		if (!req_unhash && found_dup_sk) {
1435			/* This code path should only be executed in the
1436			 * syncookie case only
1437			 */
1438			bh_unlock_sock(newsk);
1439			sock_put(newsk);
1440			newsk = NULL;
1441		}
1442	}
1443
1444	return newsk;
1445
1446out_overflow:
1447	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1448out_nonewsk:
1449	dst_release(dst);
1450out:
1451	tcp_listendrop(sk);
1452	return NULL;
1453}
1454
1455INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1456							   u32));
1457/* The socket must have it's spinlock held when we get
1458 * here, unless it is a TCP_LISTEN socket.
1459 *
1460 * We have a potential double-lock case here, so even when
1461 * doing backlog processing we use the BH locking scheme.
1462 * This is because we cannot sleep with the original spinlock
1463 * held.
1464 */
1465static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1466{
1467	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1468	struct sk_buff *opt_skb = NULL;
1469	struct tcp_sock *tp;
1470
1471	/* Imagine: socket is IPv6. IPv4 packet arrives,
1472	   goes to IPv4 receive handler and backlogged.
1473	   From backlog it always goes here. Kerboom...
1474	   Fortunately, tcp_rcv_established and rcv_established
1475	   handle them correctly, but it is not case with
1476	   tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1477	 */
1478
1479	if (skb->protocol == htons(ETH_P_IP))
1480		return tcp_v4_do_rcv(sk, skb);
1481
1482	/*
1483	 *	socket locking is here for SMP purposes as backlog rcv
1484	 *	is currently called with bh processing disabled.
1485	 */
1486
1487	/* Do Stevens' IPV6_PKTOPTIONS.
1488
1489	   Yes, guys, it is the only place in our code, where we
1490	   may make it not affecting IPv4.
1491	   The rest of code is protocol independent,
1492	   and I do not like idea to uglify IPv4.
1493
1494	   Actually, all the idea behind IPV6_PKTOPTIONS
1495	   looks not very well thought. For now we latch
1496	   options, received in the last packet, enqueued
1497	   by tcp. Feel free to propose better solution.
1498					       --ANK (980728)
1499	 */
1500	if (np->rxopt.all)
1501		opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1502
1503	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1504		struct dst_entry *dst = sk->sk_rx_dst;
1505
1506		sock_rps_save_rxhash(sk, skb);
1507		sk_mark_napi_id(sk, skb);
1508		if (dst) {
1509			if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1510			    INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1511					    dst, np->rx_dst_cookie) == NULL) {
1512				dst_release(dst);
1513				sk->sk_rx_dst = NULL;
1514			}
1515		}
1516
1517		tcp_rcv_established(sk, skb);
1518		if (opt_skb)
1519			goto ipv6_pktoptions;
1520		return 0;
1521	}
1522
1523	if (tcp_checksum_complete(skb))
1524		goto csum_err;
1525
1526	if (sk->sk_state == TCP_LISTEN) {
1527		struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1528
1529		if (!nsk)
1530			goto discard;
1531
1532		if (nsk != sk) {
1533			if (tcp_child_process(sk, nsk, skb))
1534				goto reset;
1535			if (opt_skb)
1536				__kfree_skb(opt_skb);
1537			return 0;
1538		}
1539	} else
1540		sock_rps_save_rxhash(sk, skb);
1541
1542	if (tcp_rcv_state_process(sk, skb))
1543		goto reset;
1544	if (opt_skb)
1545		goto ipv6_pktoptions;
1546	return 0;
1547
1548reset:
1549	tcp_v6_send_reset(sk, skb);
1550discard:
1551	if (opt_skb)
1552		__kfree_skb(opt_skb);
1553	kfree_skb(skb);
1554	return 0;
1555csum_err:
1556	trace_tcp_bad_csum(skb);
1557	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1558	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1559	goto discard;
1560
1561
1562ipv6_pktoptions:
1563	/* Do you ask, what is it?
1564
1565	   1. skb was enqueued by tcp.
1566	   2. skb is added to tail of read queue, rather than out of order.
1567	   3. socket is not in passive state.
1568	   4. Finally, it really contains options, which user wants to receive.
1569	 */
1570	tp = tcp_sk(sk);
1571	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1572	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1573		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1574			np->mcast_oif = tcp_v6_iif(opt_skb);
1575		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1576			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1577		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1578			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1579		if (np->repflow)
1580			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1581		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1582			skb_set_owner_r(opt_skb, sk);
1583			tcp_v6_restore_cb(opt_skb);
1584			opt_skb = xchg(&np->pktoptions, opt_skb);
1585		} else {
1586			__kfree_skb(opt_skb);
1587			opt_skb = xchg(&np->pktoptions, NULL);
1588		}
1589	}
1590
1591	kfree_skb(opt_skb);
1592	return 0;
1593}
1594
1595static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1596			   const struct tcphdr *th)
1597{
1598	/* This is tricky: we move IP6CB at its correct location into
1599	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1600	 * _decode_session6() uses IP6CB().
1601	 * barrier() makes sure compiler won't play aliasing games.
1602	 */
1603	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1604		sizeof(struct inet6_skb_parm));
1605	barrier();
1606
1607	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1608	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1609				    skb->len - th->doff*4);
1610	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1611	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1612	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1613	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1614	TCP_SKB_CB(skb)->sacked = 0;
1615	TCP_SKB_CB(skb)->has_rxtstamp =
1616			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1617}
1618
1619INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1620{
1621	struct sk_buff *skb_to_free;
1622	int sdif = inet6_sdif(skb);
1623	int dif = inet6_iif(skb);
1624	const struct tcphdr *th;
1625	const struct ipv6hdr *hdr;
1626	bool refcounted;
1627	struct sock *sk;
1628	int ret;
1629	struct net *net = dev_net(skb->dev);
1630
1631	if (skb->pkt_type != PACKET_HOST)
1632		goto discard_it;
1633
1634	/*
1635	 *	Count it even if it's bad.
1636	 */
1637	__TCP_INC_STATS(net, TCP_MIB_INSEGS);
1638
1639	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1640		goto discard_it;
1641
1642	th = (const struct tcphdr *)skb->data;
1643
1644	if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1645		goto bad_packet;
1646	if (!pskb_may_pull(skb, th->doff*4))
1647		goto discard_it;
1648
1649	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1650		goto csum_error;
1651
1652	th = (const struct tcphdr *)skb->data;
1653	hdr = ipv6_hdr(skb);
1654
1655lookup:
1656	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1657				th->source, th->dest, inet6_iif(skb), sdif,
1658				&refcounted);
1659	if (!sk)
1660		goto no_tcp_socket;
1661
1662process:
1663	if (sk->sk_state == TCP_TIME_WAIT)
1664		goto do_time_wait;
1665
1666	if (sk->sk_state == TCP_NEW_SYN_RECV) {
1667		struct request_sock *req = inet_reqsk(sk);
1668		bool req_stolen = false;
1669		struct sock *nsk;
1670
1671		sk = req->rsk_listener;
1672		if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
1673			sk_drops_add(sk, skb);
1674			reqsk_put(req);
1675			goto discard_it;
1676		}
1677		if (tcp_checksum_complete(skb)) {
1678			reqsk_put(req);
1679			goto csum_error;
1680		}
1681		if (unlikely(sk->sk_state != TCP_LISTEN)) {
1682			nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1683			if (!nsk) {
1684				inet_csk_reqsk_queue_drop_and_put(sk, req);
1685				goto lookup;
1686			}
1687			sk = nsk;
1688			/* reuseport_migrate_sock() has already held one sk_refcnt
1689			 * before returning.
1690			 */
1691		} else {
1692			sock_hold(sk);
1693		}
 
1694		refcounted = true;
1695		nsk = NULL;
1696		if (!tcp_filter(sk, skb)) {
1697			th = (const struct tcphdr *)skb->data;
1698			hdr = ipv6_hdr(skb);
1699			tcp_v6_fill_cb(skb, hdr, th);
1700			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1701		}
1702		if (!nsk) {
1703			reqsk_put(req);
1704			if (req_stolen) {
1705				/* Another cpu got exclusive access to req
1706				 * and created a full blown socket.
1707				 * Try to feed this packet to this socket
1708				 * instead of discarding it.
1709				 */
1710				tcp_v6_restore_cb(skb);
1711				sock_put(sk);
1712				goto lookup;
1713			}
1714			goto discard_and_relse;
1715		}
1716		if (nsk == sk) {
1717			reqsk_put(req);
1718			tcp_v6_restore_cb(skb);
1719		} else if (tcp_child_process(sk, nsk, skb)) {
1720			tcp_v6_send_reset(nsk, skb);
1721			goto discard_and_relse;
1722		} else {
1723			sock_put(sk);
1724			return 0;
1725		}
1726	}
1727	if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1728		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1729		goto discard_and_relse;
1730	}
1731
1732	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1733		goto discard_and_relse;
1734
1735	if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
1736		goto discard_and_relse;
1737
1738	if (tcp_filter(sk, skb))
1739		goto discard_and_relse;
1740	th = (const struct tcphdr *)skb->data;
1741	hdr = ipv6_hdr(skb);
1742	tcp_v6_fill_cb(skb, hdr, th);
1743
1744	skb->dev = NULL;
1745
1746	if (sk->sk_state == TCP_LISTEN) {
1747		ret = tcp_v6_do_rcv(sk, skb);
1748		goto put_and_return;
1749	}
1750
1751	sk_incoming_cpu_update(sk);
1752
1753	bh_lock_sock_nested(sk);
1754	tcp_segs_in(tcp_sk(sk), skb);
1755	ret = 0;
1756	if (!sock_owned_by_user(sk)) {
1757		skb_to_free = sk->sk_rx_skb_cache;
1758		sk->sk_rx_skb_cache = NULL;
1759		ret = tcp_v6_do_rcv(sk, skb);
1760	} else {
1761		if (tcp_add_backlog(sk, skb))
1762			goto discard_and_relse;
1763		skb_to_free = NULL;
1764	}
1765	bh_unlock_sock(sk);
1766	if (skb_to_free)
1767		__kfree_skb(skb_to_free);
1768put_and_return:
1769	if (refcounted)
1770		sock_put(sk);
1771	return ret ? -1 : 0;
1772
1773no_tcp_socket:
1774	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1775		goto discard_it;
1776
1777	tcp_v6_fill_cb(skb, hdr, th);
1778
1779	if (tcp_checksum_complete(skb)) {
1780csum_error:
1781		trace_tcp_bad_csum(skb);
1782		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1783bad_packet:
1784		__TCP_INC_STATS(net, TCP_MIB_INERRS);
1785	} else {
1786		tcp_v6_send_reset(NULL, skb);
1787	}
1788
1789discard_it:
1790	kfree_skb(skb);
1791	return 0;
1792
1793discard_and_relse:
1794	sk_drops_add(sk, skb);
1795	if (refcounted)
1796		sock_put(sk);
1797	goto discard_it;
1798
1799do_time_wait:
1800	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1801		inet_twsk_put(inet_twsk(sk));
1802		goto discard_it;
1803	}
1804
1805	tcp_v6_fill_cb(skb, hdr, th);
1806
1807	if (tcp_checksum_complete(skb)) {
1808		inet_twsk_put(inet_twsk(sk));
1809		goto csum_error;
1810	}
1811
1812	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1813	case TCP_TW_SYN:
1814	{
1815		struct sock *sk2;
1816
1817		sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1818					    skb, __tcp_hdrlen(th),
1819					    &ipv6_hdr(skb)->saddr, th->source,
1820					    &ipv6_hdr(skb)->daddr,
1821					    ntohs(th->dest),
1822					    tcp_v6_iif_l3_slave(skb),
1823					    sdif);
1824		if (sk2) {
1825			struct inet_timewait_sock *tw = inet_twsk(sk);
1826			inet_twsk_deschedule_put(tw);
1827			sk = sk2;
1828			tcp_v6_restore_cb(skb);
1829			refcounted = false;
1830			goto process;
1831		}
1832	}
1833		/* to ACK */
1834		fallthrough;
1835	case TCP_TW_ACK:
1836		tcp_v6_timewait_ack(sk, skb);
1837		break;
1838	case TCP_TW_RST:
1839		tcp_v6_send_reset(sk, skb);
1840		inet_twsk_deschedule_put(inet_twsk(sk));
1841		goto discard_it;
1842	case TCP_TW_SUCCESS:
1843		;
1844	}
1845	goto discard_it;
1846}
1847
1848INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
1849{
1850	const struct ipv6hdr *hdr;
1851	const struct tcphdr *th;
1852	struct sock *sk;
1853
1854	if (skb->pkt_type != PACKET_HOST)
1855		return;
1856
1857	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1858		return;
1859
1860	hdr = ipv6_hdr(skb);
1861	th = tcp_hdr(skb);
1862
1863	if (th->doff < sizeof(struct tcphdr) / 4)
1864		return;
1865
1866	/* Note : We use inet6_iif() here, not tcp_v6_iif() */
1867	sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1868					&hdr->saddr, th->source,
1869					&hdr->daddr, ntohs(th->dest),
1870					inet6_iif(skb), inet6_sdif(skb));
1871	if (sk) {
1872		skb->sk = sk;
1873		skb->destructor = sock_edemux;
1874		if (sk_fullsock(sk)) {
1875			struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1876
1877			if (dst)
1878				dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1879			if (dst &&
1880			    inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1881				skb_dst_set_noref(skb, dst);
1882		}
1883	}
1884}
1885
1886static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1887	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
1888	.twsk_unique	= tcp_twsk_unique,
1889	.twsk_destructor = tcp_twsk_destructor,
1890};
1891
1892INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1893{
1894	struct ipv6_pinfo *np = inet6_sk(sk);
1895
1896	__tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
1897}
1898
1899const struct inet_connection_sock_af_ops ipv6_specific = {
1900	.queue_xmit	   = inet6_csk_xmit,
1901	.send_check	   = tcp_v6_send_check,
1902	.rebuild_header	   = inet6_sk_rebuild_header,
1903	.sk_rx_dst_set	   = inet6_sk_rx_dst_set,
1904	.conn_request	   = tcp_v6_conn_request,
1905	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1906	.net_header_len	   = sizeof(struct ipv6hdr),
1907	.net_frag_header_len = sizeof(struct frag_hdr),
1908	.setsockopt	   = ipv6_setsockopt,
1909	.getsockopt	   = ipv6_getsockopt,
1910	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1911	.sockaddr_len	   = sizeof(struct sockaddr_in6),
 
 
 
 
1912	.mtu_reduced	   = tcp_v6_mtu_reduced,
1913};
1914
1915#ifdef CONFIG_TCP_MD5SIG
1916static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1917	.md5_lookup	=	tcp_v6_md5_lookup,
1918	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
1919	.md5_parse	=	tcp_v6_parse_md5_keys,
1920};
1921#endif
1922
1923/*
1924 *	TCP over IPv4 via INET6 API
1925 */
1926static const struct inet_connection_sock_af_ops ipv6_mapped = {
1927	.queue_xmit	   = ip_queue_xmit,
1928	.send_check	   = tcp_v4_send_check,
1929	.rebuild_header	   = inet_sk_rebuild_header,
1930	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
1931	.conn_request	   = tcp_v6_conn_request,
1932	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1933	.net_header_len	   = sizeof(struct iphdr),
1934	.setsockopt	   = ipv6_setsockopt,
1935	.getsockopt	   = ipv6_getsockopt,
1936	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1937	.sockaddr_len	   = sizeof(struct sockaddr_in6),
 
 
 
 
1938	.mtu_reduced	   = tcp_v4_mtu_reduced,
1939};
1940
1941#ifdef CONFIG_TCP_MD5SIG
1942static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1943	.md5_lookup	=	tcp_v4_md5_lookup,
1944	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1945	.md5_parse	=	tcp_v6_parse_md5_keys,
1946};
1947#endif
1948
1949/* NOTE: A lot of things set to zero explicitly by call to
1950 *       sk_alloc() so need not be done here.
1951 */
1952static int tcp_v6_init_sock(struct sock *sk)
1953{
1954	struct inet_connection_sock *icsk = inet_csk(sk);
1955
1956	tcp_init_sock(sk);
1957
1958	icsk->icsk_af_ops = &ipv6_specific;
1959
1960#ifdef CONFIG_TCP_MD5SIG
1961	tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1962#endif
1963
1964	return 0;
1965}
1966
1967static void tcp_v6_destroy_sock(struct sock *sk)
1968{
1969	tcp_v4_destroy_sock(sk);
1970	inet6_destroy_sock(sk);
1971}
1972
1973#ifdef CONFIG_PROC_FS
1974/* Proc filesystem TCPv6 sock list dumping. */
1975static void get_openreq6(struct seq_file *seq,
1976			 const struct request_sock *req, int i)
1977{
1978	long ttd = req->rsk_timer.expires - jiffies;
1979	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1980	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1981
1982	if (ttd < 0)
1983		ttd = 0;
1984
1985	seq_printf(seq,
1986		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1987		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1988		   i,
1989		   src->s6_addr32[0], src->s6_addr32[1],
1990		   src->s6_addr32[2], src->s6_addr32[3],
1991		   inet_rsk(req)->ir_num,
1992		   dest->s6_addr32[0], dest->s6_addr32[1],
1993		   dest->s6_addr32[2], dest->s6_addr32[3],
1994		   ntohs(inet_rsk(req)->ir_rmt_port),
1995		   TCP_SYN_RECV,
1996		   0, 0, /* could print option size, but that is af dependent. */
1997		   1,   /* timers active (only the expire timer) */
1998		   jiffies_to_clock_t(ttd),
1999		   req->num_timeout,
2000		   from_kuid_munged(seq_user_ns(seq),
2001				    sock_i_uid(req->rsk_listener)),
2002		   0,  /* non standard timer */
2003		   0, /* open_requests have no inode */
2004		   0, req);
2005}
2006
2007static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2008{
2009	const struct in6_addr *dest, *src;
2010	__u16 destp, srcp;
2011	int timer_active;
2012	unsigned long timer_expires;
2013	const struct inet_sock *inet = inet_sk(sp);
2014	const struct tcp_sock *tp = tcp_sk(sp);
2015	const struct inet_connection_sock *icsk = inet_csk(sp);
2016	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2017	int rx_queue;
2018	int state;
2019
2020	dest  = &sp->sk_v6_daddr;
2021	src   = &sp->sk_v6_rcv_saddr;
2022	destp = ntohs(inet->inet_dport);
2023	srcp  = ntohs(inet->inet_sport);
2024
2025	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2026	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2027	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2028		timer_active	= 1;
2029		timer_expires	= icsk->icsk_timeout;
2030	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2031		timer_active	= 4;
2032		timer_expires	= icsk->icsk_timeout;
2033	} else if (timer_pending(&sp->sk_timer)) {
2034		timer_active	= 2;
2035		timer_expires	= sp->sk_timer.expires;
2036	} else {
2037		timer_active	= 0;
2038		timer_expires = jiffies;
2039	}
2040
2041	state = inet_sk_state_load(sp);
2042	if (state == TCP_LISTEN)
2043		rx_queue = READ_ONCE(sp->sk_ack_backlog);
2044	else
2045		/* Because we don't lock the socket,
2046		 * we might find a transient negative value.
2047		 */
2048		rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2049				      READ_ONCE(tp->copied_seq), 0);
2050
2051	seq_printf(seq,
2052		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2053		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2054		   i,
2055		   src->s6_addr32[0], src->s6_addr32[1],
2056		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2057		   dest->s6_addr32[0], dest->s6_addr32[1],
2058		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2059		   state,
2060		   READ_ONCE(tp->write_seq) - tp->snd_una,
2061		   rx_queue,
2062		   timer_active,
2063		   jiffies_delta_to_clock_t(timer_expires - jiffies),
2064		   icsk->icsk_retransmits,
2065		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2066		   icsk->icsk_probes_out,
2067		   sock_i_ino(sp),
2068		   refcount_read(&sp->sk_refcnt), sp,
2069		   jiffies_to_clock_t(icsk->icsk_rto),
2070		   jiffies_to_clock_t(icsk->icsk_ack.ato),
2071		   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2072		   tp->snd_cwnd,
2073		   state == TCP_LISTEN ?
2074			fastopenq->max_qlen :
2075			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2076		   );
2077}
2078
2079static void get_timewait6_sock(struct seq_file *seq,
2080			       struct inet_timewait_sock *tw, int i)
2081{
2082	long delta = tw->tw_timer.expires - jiffies;
2083	const struct in6_addr *dest, *src;
2084	__u16 destp, srcp;
2085
2086	dest = &tw->tw_v6_daddr;
2087	src  = &tw->tw_v6_rcv_saddr;
2088	destp = ntohs(tw->tw_dport);
2089	srcp  = ntohs(tw->tw_sport);
2090
2091	seq_printf(seq,
2092		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2093		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2094		   i,
2095		   src->s6_addr32[0], src->s6_addr32[1],
2096		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2097		   dest->s6_addr32[0], dest->s6_addr32[1],
2098		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2099		   tw->tw_substate, 0, 0,
2100		   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2101		   refcount_read(&tw->tw_refcnt), tw);
2102}
2103
2104static int tcp6_seq_show(struct seq_file *seq, void *v)
2105{
2106	struct tcp_iter_state *st;
2107	struct sock *sk = v;
2108
2109	if (v == SEQ_START_TOKEN) {
2110		seq_puts(seq,
2111			 "  sl  "
2112			 "local_address                         "
2113			 "remote_address                        "
2114			 "st tx_queue rx_queue tr tm->when retrnsmt"
2115			 "   uid  timeout inode\n");
2116		goto out;
2117	}
2118	st = seq->private;
2119
2120	if (sk->sk_state == TCP_TIME_WAIT)
2121		get_timewait6_sock(seq, v, st->num);
2122	else if (sk->sk_state == TCP_NEW_SYN_RECV)
2123		get_openreq6(seq, v, st->num);
2124	else
2125		get_tcp6_sock(seq, v, st->num);
2126out:
2127	return 0;
2128}
2129
2130static const struct seq_operations tcp6_seq_ops = {
2131	.show		= tcp6_seq_show,
2132	.start		= tcp_seq_start,
2133	.next		= tcp_seq_next,
2134	.stop		= tcp_seq_stop,
2135};
2136
2137static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2138	.family		= AF_INET6,
2139};
2140
2141int __net_init tcp6_proc_init(struct net *net)
2142{
2143	if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2144			sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2145		return -ENOMEM;
2146	return 0;
2147}
2148
2149void tcp6_proc_exit(struct net *net)
2150{
2151	remove_proc_entry("tcp6", net->proc_net);
2152}
2153#endif
2154
2155struct proto tcpv6_prot = {
2156	.name			= "TCPv6",
2157	.owner			= THIS_MODULE,
2158	.close			= tcp_close,
2159	.pre_connect		= tcp_v6_pre_connect,
2160	.connect		= tcp_v6_connect,
2161	.disconnect		= tcp_disconnect,
2162	.accept			= inet_csk_accept,
2163	.ioctl			= tcp_ioctl,
2164	.init			= tcp_v6_init_sock,
2165	.destroy		= tcp_v6_destroy_sock,
2166	.shutdown		= tcp_shutdown,
2167	.setsockopt		= tcp_setsockopt,
2168	.getsockopt		= tcp_getsockopt,
2169	.bpf_bypass_getsockopt	= tcp_bpf_bypass_getsockopt,
2170	.keepalive		= tcp_set_keepalive,
2171	.recvmsg		= tcp_recvmsg,
2172	.sendmsg		= tcp_sendmsg,
2173	.sendpage		= tcp_sendpage,
2174	.backlog_rcv		= tcp_v6_do_rcv,
2175	.release_cb		= tcp_release_cb,
2176	.hash			= inet6_hash,
2177	.unhash			= inet_unhash,
2178	.get_port		= inet_csk_get_port,
2179#ifdef CONFIG_BPF_SYSCALL
2180	.psock_update_sk_prot	= tcp_bpf_update_proto,
2181#endif
2182	.enter_memory_pressure	= tcp_enter_memory_pressure,
2183	.leave_memory_pressure	= tcp_leave_memory_pressure,
2184	.stream_memory_free	= tcp_stream_memory_free,
2185	.sockets_allocated	= &tcp_sockets_allocated,
2186	.memory_allocated	= &tcp_memory_allocated,
2187	.memory_pressure	= &tcp_memory_pressure,
2188	.orphan_count		= &tcp_orphan_count,
2189	.sysctl_mem		= sysctl_tcp_mem,
2190	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
2191	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
2192	.max_header		= MAX_TCP_HEADER,
2193	.obj_size		= sizeof(struct tcp6_sock),
2194	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
2195	.twsk_prot		= &tcp6_timewait_sock_ops,
2196	.rsk_prot		= &tcp6_request_sock_ops,
2197	.h.hashinfo		= &tcp_hashinfo,
2198	.no_autobind		= true,
 
 
 
 
2199	.diag_destroy		= tcp_abort,
2200};
2201EXPORT_SYMBOL_GPL(tcpv6_prot);
2202
2203/* thinking of making this const? Don't.
2204 * early_demux can change based on sysctl.
2205 */
2206static struct inet6_protocol tcpv6_protocol = {
2207	.early_demux	=	tcp_v6_early_demux,
2208	.early_demux_handler =  tcp_v6_early_demux,
2209	.handler	=	tcp_v6_rcv,
2210	.err_handler	=	tcp_v6_err,
2211	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2212};
2213
2214static struct inet_protosw tcpv6_protosw = {
2215	.type		=	SOCK_STREAM,
2216	.protocol	=	IPPROTO_TCP,
2217	.prot		=	&tcpv6_prot,
2218	.ops		=	&inet6_stream_ops,
2219	.flags		=	INET_PROTOSW_PERMANENT |
2220				INET_PROTOSW_ICSK,
2221};
2222
2223static int __net_init tcpv6_net_init(struct net *net)
2224{
2225	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2226				    SOCK_RAW, IPPROTO_TCP, net);
2227}
2228
2229static void __net_exit tcpv6_net_exit(struct net *net)
2230{
2231	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2232}
2233
2234static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2235{
2236	inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2237}
2238
2239static struct pernet_operations tcpv6_net_ops = {
2240	.init	    = tcpv6_net_init,
2241	.exit	    = tcpv6_net_exit,
2242	.exit_batch = tcpv6_net_exit_batch,
2243};
2244
2245int __init tcpv6_init(void)
2246{
2247	int ret;
2248
2249	ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2250	if (ret)
2251		goto out;
2252
2253	/* register inet6 protocol */
2254	ret = inet6_register_protosw(&tcpv6_protosw);
2255	if (ret)
2256		goto out_tcpv6_protocol;
2257
2258	ret = register_pernet_subsys(&tcpv6_net_ops);
2259	if (ret)
2260		goto out_tcpv6_protosw;
2261
2262	ret = mptcpv6_init();
2263	if (ret)
2264		goto out_tcpv6_pernet_subsys;
2265
2266out:
2267	return ret;
2268
2269out_tcpv6_pernet_subsys:
2270	unregister_pernet_subsys(&tcpv6_net_ops);
2271out_tcpv6_protosw:
2272	inet6_unregister_protosw(&tcpv6_protosw);
2273out_tcpv6_protocol:
2274	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2275	goto out;
2276}
2277
2278void tcpv6_exit(void)
2279{
2280	unregister_pernet_subsys(&tcpv6_net_ops);
2281	inet6_unregister_protosw(&tcpv6_protosw);
2282	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2283}
v5.4
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *	TCP over IPv6
   4 *	Linux INET6 implementation
   5 *
   6 *	Authors:
   7 *	Pedro Roque		<roque@di.fc.ul.pt>
   8 *
   9 *	Based on:
  10 *	linux/net/ipv4/tcp.c
  11 *	linux/net/ipv4/tcp_input.c
  12 *	linux/net/ipv4/tcp_output.c
  13 *
  14 *	Fixes:
  15 *	Hideaki YOSHIFUJI	:	sin6_scope_id support
  16 *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
  17 *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
  18 *					a single port at the same time.
  19 *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
  20 */
  21
  22#include <linux/bottom_half.h>
  23#include <linux/module.h>
  24#include <linux/errno.h>
  25#include <linux/types.h>
  26#include <linux/socket.h>
  27#include <linux/sockios.h>
  28#include <linux/net.h>
  29#include <linux/jiffies.h>
  30#include <linux/in.h>
  31#include <linux/in6.h>
  32#include <linux/netdevice.h>
  33#include <linux/init.h>
  34#include <linux/jhash.h>
  35#include <linux/ipsec.h>
  36#include <linux/times.h>
  37#include <linux/slab.h>
  38#include <linux/uaccess.h>
  39#include <linux/ipv6.h>
  40#include <linux/icmpv6.h>
  41#include <linux/random.h>
  42#include <linux/indirect_call_wrapper.h>
  43
  44#include <net/tcp.h>
  45#include <net/ndisc.h>
  46#include <net/inet6_hashtables.h>
  47#include <net/inet6_connection_sock.h>
  48#include <net/ipv6.h>
  49#include <net/transp_v6.h>
  50#include <net/addrconf.h>
  51#include <net/ip6_route.h>
  52#include <net/ip6_checksum.h>
  53#include <net/inet_ecn.h>
  54#include <net/protocol.h>
  55#include <net/xfrm.h>
  56#include <net/snmp.h>
  57#include <net/dsfield.h>
  58#include <net/timewait_sock.h>
  59#include <net/inet_common.h>
  60#include <net/secure_seq.h>
  61#include <net/busy_poll.h>
  62
  63#include <linux/proc_fs.h>
  64#include <linux/seq_file.h>
  65
  66#include <crypto/hash.h>
  67#include <linux/scatterlist.h>
  68
  69#include <trace/events/tcp.h>
  70
  71static void	tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
  72static void	tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
  73				      struct request_sock *req);
  74
  75static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
  76
  77static const struct inet_connection_sock_af_ops ipv6_mapped;
  78static const struct inet_connection_sock_af_ops ipv6_specific;
  79#ifdef CONFIG_TCP_MD5SIG
  80static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
  81static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
  82#else
  83static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
  84						   const struct in6_addr *addr)
 
  85{
  86	return NULL;
  87}
  88#endif
  89
  90/* Helper returning the inet6 address from a given tcp socket.
  91 * It can be used in TCP stack instead of inet6_sk(sk).
  92 * This avoids a dereference and allow compiler optimizations.
  93 * It is a specialized version of inet6_sk_generic().
  94 */
  95static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
  96{
  97	unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
  98
  99	return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
 100}
 101
 102static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
 103{
 104	struct dst_entry *dst = skb_dst(skb);
 105
 106	if (dst && dst_hold_safe(dst)) {
 107		const struct rt6_info *rt = (const struct rt6_info *)dst;
 108
 109		sk->sk_rx_dst = dst;
 110		inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
 111		tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
 112	}
 113}
 114
 115static u32 tcp_v6_init_seq(const struct sk_buff *skb)
 116{
 117	return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
 118				ipv6_hdr(skb)->saddr.s6_addr32,
 119				tcp_hdr(skb)->dest,
 120				tcp_hdr(skb)->source);
 121}
 122
 123static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
 124{
 125	return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
 126				   ipv6_hdr(skb)->saddr.s6_addr32);
 127}
 128
 129static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
 130			      int addr_len)
 131{
 132	/* This check is replicated from tcp_v6_connect() and intended to
 133	 * prevent BPF program called below from accessing bytes that are out
 134	 * of the bound specified by user in addr_len.
 135	 */
 136	if (addr_len < SIN6_LEN_RFC2133)
 137		return -EINVAL;
 138
 139	sock_owned_by_me(sk);
 140
 141	return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
 142}
 143
 144static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 145			  int addr_len)
 146{
 147	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
 148	struct inet_sock *inet = inet_sk(sk);
 149	struct inet_connection_sock *icsk = inet_csk(sk);
 150	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
 151	struct tcp_sock *tp = tcp_sk(sk);
 152	struct in6_addr *saddr = NULL, *final_p, final;
 153	struct ipv6_txoptions *opt;
 154	struct flowi6 fl6;
 155	struct dst_entry *dst;
 156	int addr_type;
 157	int err;
 158	struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
 159
 160	if (addr_len < SIN6_LEN_RFC2133)
 161		return -EINVAL;
 162
 163	if (usin->sin6_family != AF_INET6)
 164		return -EAFNOSUPPORT;
 165
 166	memset(&fl6, 0, sizeof(fl6));
 167
 168	if (np->sndflow) {
 169		fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
 170		IP6_ECN_flow_init(fl6.flowlabel);
 171		if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
 172			struct ip6_flowlabel *flowlabel;
 173			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
 174			if (IS_ERR(flowlabel))
 175				return -EINVAL;
 176			fl6_sock_release(flowlabel);
 177		}
 178	}
 179
 180	/*
 181	 *	connect() to INADDR_ANY means loopback (BSD'ism).
 182	 */
 183
 184	if (ipv6_addr_any(&usin->sin6_addr)) {
 185		if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
 186			ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
 187					       &usin->sin6_addr);
 188		else
 189			usin->sin6_addr = in6addr_loopback;
 190	}
 191
 192	addr_type = ipv6_addr_type(&usin->sin6_addr);
 193
 194	if (addr_type & IPV6_ADDR_MULTICAST)
 195		return -ENETUNREACH;
 196
 197	if (addr_type&IPV6_ADDR_LINKLOCAL) {
 198		if (addr_len >= sizeof(struct sockaddr_in6) &&
 199		    usin->sin6_scope_id) {
 200			/* If interface is set while binding, indices
 201			 * must coincide.
 202			 */
 203			if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
 204				return -EINVAL;
 205
 206			sk->sk_bound_dev_if = usin->sin6_scope_id;
 207		}
 208
 209		/* Connect to link-local address requires an interface */
 210		if (!sk->sk_bound_dev_if)
 211			return -EINVAL;
 212	}
 213
 214	if (tp->rx_opt.ts_recent_stamp &&
 215	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
 216		tp->rx_opt.ts_recent = 0;
 217		tp->rx_opt.ts_recent_stamp = 0;
 218		WRITE_ONCE(tp->write_seq, 0);
 219	}
 220
 221	sk->sk_v6_daddr = usin->sin6_addr;
 222	np->flow_label = fl6.flowlabel;
 223
 224	/*
 225	 *	TCP over IPv4
 226	 */
 227
 228	if (addr_type & IPV6_ADDR_MAPPED) {
 229		u32 exthdrlen = icsk->icsk_ext_hdr_len;
 230		struct sockaddr_in sin;
 231
 232		if (__ipv6_only_sock(sk))
 233			return -ENETUNREACH;
 234
 235		sin.sin_family = AF_INET;
 236		sin.sin_port = usin->sin6_port;
 237		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
 238
 239		icsk->icsk_af_ops = &ipv6_mapped;
 
 
 240		sk->sk_backlog_rcv = tcp_v4_do_rcv;
 241#ifdef CONFIG_TCP_MD5SIG
 242		tp->af_specific = &tcp_sock_ipv6_mapped_specific;
 243#endif
 244
 245		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
 246
 247		if (err) {
 248			icsk->icsk_ext_hdr_len = exthdrlen;
 249			icsk->icsk_af_ops = &ipv6_specific;
 
 
 250			sk->sk_backlog_rcv = tcp_v6_do_rcv;
 251#ifdef CONFIG_TCP_MD5SIG
 252			tp->af_specific = &tcp_sock_ipv6_specific;
 253#endif
 254			goto failure;
 255		}
 256		np->saddr = sk->sk_v6_rcv_saddr;
 257
 258		return err;
 259	}
 260
 261	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
 262		saddr = &sk->sk_v6_rcv_saddr;
 263
 264	fl6.flowi6_proto = IPPROTO_TCP;
 265	fl6.daddr = sk->sk_v6_daddr;
 266	fl6.saddr = saddr ? *saddr : np->saddr;
 267	fl6.flowi6_oif = sk->sk_bound_dev_if;
 268	fl6.flowi6_mark = sk->sk_mark;
 269	fl6.fl6_dport = usin->sin6_port;
 270	fl6.fl6_sport = inet->inet_sport;
 271	fl6.flowi6_uid = sk->sk_uid;
 272
 273	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
 274	final_p = fl6_update_dst(&fl6, opt, &final);
 275
 276	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 277
 278	dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
 279	if (IS_ERR(dst)) {
 280		err = PTR_ERR(dst);
 281		goto failure;
 282	}
 283
 284	if (!saddr) {
 285		saddr = &fl6.saddr;
 286		sk->sk_v6_rcv_saddr = *saddr;
 287	}
 288
 289	/* set the source address */
 290	np->saddr = *saddr;
 291	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
 292
 293	sk->sk_gso_type = SKB_GSO_TCPV6;
 294	ip6_dst_store(sk, dst, NULL, NULL);
 295
 296	icsk->icsk_ext_hdr_len = 0;
 297	if (opt)
 298		icsk->icsk_ext_hdr_len = opt->opt_flen +
 299					 opt->opt_nflen;
 300
 301	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 302
 303	inet->inet_dport = usin->sin6_port;
 304
 305	tcp_set_state(sk, TCP_SYN_SENT);
 306	err = inet6_hash_connect(tcp_death_row, sk);
 307	if (err)
 308		goto late_failure;
 309
 310	sk_set_txhash(sk);
 311
 312	if (likely(!tp->repair)) {
 313		if (!tp->write_seq)
 314			WRITE_ONCE(tp->write_seq,
 315				   secure_tcpv6_seq(np->saddr.s6_addr32,
 316						    sk->sk_v6_daddr.s6_addr32,
 317						    inet->inet_sport,
 318						    inet->inet_dport));
 319		tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
 320						   np->saddr.s6_addr32,
 321						   sk->sk_v6_daddr.s6_addr32);
 322	}
 323
 324	if (tcp_fastopen_defer_connect(sk, &err))
 325		return err;
 326	if (err)
 327		goto late_failure;
 328
 329	err = tcp_connect(sk);
 330	if (err)
 331		goto late_failure;
 332
 333	return 0;
 334
 335late_failure:
 336	tcp_set_state(sk, TCP_CLOSE);
 337failure:
 338	inet->inet_dport = 0;
 339	sk->sk_route_caps = 0;
 340	return err;
 341}
 342
 343static void tcp_v6_mtu_reduced(struct sock *sk)
 344{
 345	struct dst_entry *dst;
 
 346
 347	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
 348		return;
 349
 350	dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
 
 
 
 
 
 
 
 
 351	if (!dst)
 352		return;
 353
 354	if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
 355		tcp_sync_mss(sk, dst_mtu(dst));
 356		tcp_simple_retransmit(sk);
 357	}
 358}
 359
 360static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 361		u8 type, u8 code, int offset, __be32 info)
 362{
 363	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
 364	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
 365	struct net *net = dev_net(skb->dev);
 366	struct request_sock *fastopen;
 367	struct ipv6_pinfo *np;
 368	struct tcp_sock *tp;
 369	__u32 seq, snd_una;
 370	struct sock *sk;
 371	bool fatal;
 372	int err;
 373
 374	sk = __inet6_lookup_established(net, &tcp_hashinfo,
 375					&hdr->daddr, th->dest,
 376					&hdr->saddr, ntohs(th->source),
 377					skb->dev->ifindex, inet6_sdif(skb));
 378
 379	if (!sk) {
 380		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
 381				  ICMP6_MIB_INERRORS);
 382		return -ENOENT;
 383	}
 384
 385	if (sk->sk_state == TCP_TIME_WAIT) {
 386		inet_twsk_put(inet_twsk(sk));
 387		return 0;
 388	}
 389	seq = ntohl(th->seq);
 390	fatal = icmpv6_err_convert(type, code, &err);
 391	if (sk->sk_state == TCP_NEW_SYN_RECV) {
 392		tcp_req_err(sk, seq, fatal);
 393		return 0;
 394	}
 395
 396	bh_lock_sock(sk);
 397	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
 398		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
 399
 400	if (sk->sk_state == TCP_CLOSE)
 401		goto out;
 402
 403	if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
 404		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
 405		goto out;
 406	}
 407
 408	tp = tcp_sk(sk);
 409	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
 410	fastopen = rcu_dereference(tp->fastopen_rsk);
 411	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
 412	if (sk->sk_state != TCP_LISTEN &&
 413	    !between(seq, snd_una, tp->snd_nxt)) {
 414		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
 415		goto out;
 416	}
 417
 418	np = tcp_inet6_sk(sk);
 419
 420	if (type == NDISC_REDIRECT) {
 421		if (!sock_owned_by_user(sk)) {
 422			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
 423
 424			if (dst)
 425				dst->ops->redirect(dst, sk, skb);
 426		}
 427		goto out;
 428	}
 429
 430	if (type == ICMPV6_PKT_TOOBIG) {
 
 
 431		/* We are not interested in TCP_LISTEN and open_requests
 432		 * (SYN-ACKs send out by Linux are always <576bytes so
 433		 * they should go through unfragmented).
 434		 */
 435		if (sk->sk_state == TCP_LISTEN)
 436			goto out;
 437
 438		if (!ip6_sk_accept_pmtu(sk))
 439			goto out;
 440
 441		tp->mtu_info = ntohl(info);
 
 
 
 
 442		if (!sock_owned_by_user(sk))
 443			tcp_v6_mtu_reduced(sk);
 444		else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
 445					   &sk->sk_tsq_flags))
 446			sock_hold(sk);
 447		goto out;
 448	}
 449
 450
 451	/* Might be for an request_sock */
 452	switch (sk->sk_state) {
 453	case TCP_SYN_SENT:
 454	case TCP_SYN_RECV:
 455		/* Only in fast or simultaneous open. If a fast open socket is
 456		 * is already accepted it is treated as a connected one below.
 457		 */
 458		if (fastopen && !fastopen->sk)
 459			break;
 460
 
 
 461		if (!sock_owned_by_user(sk)) {
 462			sk->sk_err = err;
 463			sk->sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
 464
 465			tcp_done(sk);
 466		} else
 467			sk->sk_err_soft = err;
 468		goto out;
 
 
 
 
 
 
 
 
 
 469	}
 470
 471	if (!sock_owned_by_user(sk) && np->recverr) {
 472		sk->sk_err = err;
 473		sk->sk_error_report(sk);
 474	} else
 475		sk->sk_err_soft = err;
 476
 477out:
 478	bh_unlock_sock(sk);
 479	sock_put(sk);
 480	return 0;
 481}
 482
 483
 484static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
 485			      struct flowi *fl,
 486			      struct request_sock *req,
 487			      struct tcp_fastopen_cookie *foc,
 488			      enum tcp_synack_type synack_type)
 
 489{
 490	struct inet_request_sock *ireq = inet_rsk(req);
 491	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
 492	struct ipv6_txoptions *opt;
 493	struct flowi6 *fl6 = &fl->u.ip6;
 494	struct sk_buff *skb;
 495	int err = -ENOMEM;
 
 496
 497	/* First, grab a route. */
 498	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
 499					       IPPROTO_TCP)) == NULL)
 500		goto done;
 501
 502	skb = tcp_make_synack(sk, dst, req, foc, synack_type);
 503
 504	if (skb) {
 505		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
 506				    &ireq->ir_v6_rmt_addr);
 507
 508		fl6->daddr = ireq->ir_v6_rmt_addr;
 509		if (np->repflow && ireq->pktopts)
 510			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
 511
 
 
 
 
 
 
 
 
 
 512		rcu_read_lock();
 513		opt = ireq->ipv6_opt;
 514		if (!opt)
 515			opt = rcu_dereference(np->opt);
 516		err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass,
 517			       sk->sk_priority);
 518		rcu_read_unlock();
 519		err = net_xmit_eval(err);
 520	}
 521
 522done:
 523	return err;
 524}
 525
 526
 527static void tcp_v6_reqsk_destructor(struct request_sock *req)
 528{
 529	kfree(inet_rsk(req)->ipv6_opt);
 530	kfree_skb(inet_rsk(req)->pktopts);
 531}
 532
 533#ifdef CONFIG_TCP_MD5SIG
 534static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
 535						   const struct in6_addr *addr)
 
 536{
 537	return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
 
 538}
 539
 540static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
 541						const struct sock *addr_sk)
 542{
 543	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
 
 
 
 
 
 544}
 545
 546static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
 547				 char __user *optval, int optlen)
 548{
 549	struct tcp_md5sig cmd;
 550	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
 
 551	u8 prefixlen;
 552
 553	if (optlen < sizeof(cmd))
 554		return -EINVAL;
 555
 556	if (copy_from_user(&cmd, optval, sizeof(cmd)))
 557		return -EFAULT;
 558
 559	if (sin6->sin6_family != AF_INET6)
 560		return -EINVAL;
 561
 562	if (optname == TCP_MD5SIG_EXT &&
 563	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
 564		prefixlen = cmd.tcpm_prefixlen;
 565		if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
 566					prefixlen > 32))
 567			return -EINVAL;
 568	} else {
 569		prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
 570	}
 571
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 572	if (!cmd.tcpm_keylen) {
 573		if (ipv6_addr_v4mapped(&sin6->sin6_addr))
 574			return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
 575					      AF_INET, prefixlen);
 
 576		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
 577				      AF_INET6, prefixlen);
 578	}
 579
 580	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
 581		return -EINVAL;
 582
 583	if (ipv6_addr_v4mapped(&sin6->sin6_addr))
 584		return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
 585				      AF_INET, prefixlen, cmd.tcpm_key,
 586				      cmd.tcpm_keylen, GFP_KERNEL);
 
 587
 588	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
 589			      AF_INET6, prefixlen, cmd.tcpm_key,
 590			      cmd.tcpm_keylen, GFP_KERNEL);
 591}
 592
 593static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
 594				   const struct in6_addr *daddr,
 595				   const struct in6_addr *saddr,
 596				   const struct tcphdr *th, int nbytes)
 597{
 598	struct tcp6_pseudohdr *bp;
 599	struct scatterlist sg;
 600	struct tcphdr *_th;
 601
 602	bp = hp->scratch;
 603	/* 1. TCP pseudo-header (RFC2460) */
 604	bp->saddr = *saddr;
 605	bp->daddr = *daddr;
 606	bp->protocol = cpu_to_be32(IPPROTO_TCP);
 607	bp->len = cpu_to_be32(nbytes);
 608
 609	_th = (struct tcphdr *)(bp + 1);
 610	memcpy(_th, th, sizeof(*th));
 611	_th->check = 0;
 612
 613	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
 614	ahash_request_set_crypt(hp->md5_req, &sg, NULL,
 615				sizeof(*bp) + sizeof(*th));
 616	return crypto_ahash_update(hp->md5_req);
 617}
 618
 619static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
 620			       const struct in6_addr *daddr, struct in6_addr *saddr,
 621			       const struct tcphdr *th)
 622{
 623	struct tcp_md5sig_pool *hp;
 624	struct ahash_request *req;
 625
 626	hp = tcp_get_md5sig_pool();
 627	if (!hp)
 628		goto clear_hash_noput;
 629	req = hp->md5_req;
 630
 631	if (crypto_ahash_init(req))
 632		goto clear_hash;
 633	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
 634		goto clear_hash;
 635	if (tcp_md5_hash_key(hp, key))
 636		goto clear_hash;
 637	ahash_request_set_crypt(req, NULL, md5_hash, 0);
 638	if (crypto_ahash_final(req))
 639		goto clear_hash;
 640
 641	tcp_put_md5sig_pool();
 642	return 0;
 643
 644clear_hash:
 645	tcp_put_md5sig_pool();
 646clear_hash_noput:
 647	memset(md5_hash, 0, 16);
 648	return 1;
 649}
 650
 651static int tcp_v6_md5_hash_skb(char *md5_hash,
 652			       const struct tcp_md5sig_key *key,
 653			       const struct sock *sk,
 654			       const struct sk_buff *skb)
 655{
 656	const struct in6_addr *saddr, *daddr;
 657	struct tcp_md5sig_pool *hp;
 658	struct ahash_request *req;
 659	const struct tcphdr *th = tcp_hdr(skb);
 660
 661	if (sk) { /* valid for establish/request sockets */
 662		saddr = &sk->sk_v6_rcv_saddr;
 663		daddr = &sk->sk_v6_daddr;
 664	} else {
 665		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
 666		saddr = &ip6h->saddr;
 667		daddr = &ip6h->daddr;
 668	}
 669
 670	hp = tcp_get_md5sig_pool();
 671	if (!hp)
 672		goto clear_hash_noput;
 673	req = hp->md5_req;
 674
 675	if (crypto_ahash_init(req))
 676		goto clear_hash;
 677
 678	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
 679		goto clear_hash;
 680	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
 681		goto clear_hash;
 682	if (tcp_md5_hash_key(hp, key))
 683		goto clear_hash;
 684	ahash_request_set_crypt(req, NULL, md5_hash, 0);
 685	if (crypto_ahash_final(req))
 686		goto clear_hash;
 687
 688	tcp_put_md5sig_pool();
 689	return 0;
 690
 691clear_hash:
 692	tcp_put_md5sig_pool();
 693clear_hash_noput:
 694	memset(md5_hash, 0, 16);
 695	return 1;
 696}
 697
 698#endif
 699
 700static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
 701				    const struct sk_buff *skb)
 
 702{
 703#ifdef CONFIG_TCP_MD5SIG
 704	const __u8 *hash_location = NULL;
 705	struct tcp_md5sig_key *hash_expected;
 706	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
 707	const struct tcphdr *th = tcp_hdr(skb);
 708	int genhash;
 709	u8 newhash[16];
 710
 711	hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
 
 
 
 
 
 712	hash_location = tcp_parse_md5sig_option(th);
 713
 714	/* We've parsed the options - do we have a hash? */
 715	if (!hash_expected && !hash_location)
 716		return false;
 717
 718	if (hash_expected && !hash_location) {
 719		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
 720		return true;
 721	}
 722
 723	if (!hash_expected && hash_location) {
 724		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
 725		return true;
 726	}
 727
 728	/* check the signature */
 729	genhash = tcp_v6_md5_hash_skb(newhash,
 730				      hash_expected,
 731				      NULL, skb);
 732
 733	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
 734		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
 735		net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
 736				     genhash ? "failed" : "mismatch",
 737				     &ip6h->saddr, ntohs(th->source),
 738				     &ip6h->daddr, ntohs(th->dest));
 739		return true;
 740	}
 741#endif
 742	return false;
 743}
 744
 745static void tcp_v6_init_req(struct request_sock *req,
 746			    const struct sock *sk_listener,
 747			    struct sk_buff *skb)
 748{
 749	bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
 750	struct inet_request_sock *ireq = inet_rsk(req);
 751	const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
 752
 753	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
 754	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
 755
 756	/* So that link locals have meaning */
 757	if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
 758	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
 759		ireq->ir_iif = tcp_v6_iif(skb);
 760
 761	if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
 762	    (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
 763	     np->rxopt.bits.rxinfo ||
 764	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
 765	     np->rxopt.bits.rxohlim || np->repflow)) {
 766		refcount_inc(&skb->users);
 767		ireq->pktopts = skb;
 768	}
 769}
 770
 771static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
 
 772					  struct flowi *fl,
 773					  const struct request_sock *req)
 774{
 
 
 
 
 
 775	return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
 776}
 777
 778struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
 779	.family		=	AF_INET6,
 780	.obj_size	=	sizeof(struct tcp6_request_sock),
 781	.rtx_syn_ack	=	tcp_rtx_synack,
 782	.send_ack	=	tcp_v6_reqsk_send_ack,
 783	.destructor	=	tcp_v6_reqsk_destructor,
 784	.send_reset	=	tcp_v6_send_reset,
 785	.syn_ack_timeout =	tcp_syn_ack_timeout,
 786};
 787
 788static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
 789	.mss_clamp	=	IPV6_MIN_MTU - sizeof(struct tcphdr) -
 790				sizeof(struct ipv6hdr),
 791#ifdef CONFIG_TCP_MD5SIG
 792	.req_md5_lookup	=	tcp_v6_md5_lookup,
 793	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
 794#endif
 795	.init_req	=	tcp_v6_init_req,
 796#ifdef CONFIG_SYN_COOKIES
 797	.cookie_init_seq =	cookie_v6_init_sequence,
 798#endif
 799	.route_req	=	tcp_v6_route_req,
 800	.init_seq	=	tcp_v6_init_seq,
 801	.init_ts_off	=	tcp_v6_init_ts_off,
 802	.send_synack	=	tcp_v6_send_synack,
 803};
 804
 805static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
 806				 u32 ack, u32 win, u32 tsval, u32 tsecr,
 807				 int oif, struct tcp_md5sig_key *key, int rst,
 808				 u8 tclass, __be32 label, u32 priority)
 809{
 810	const struct tcphdr *th = tcp_hdr(skb);
 811	struct tcphdr *t1;
 812	struct sk_buff *buff;
 813	struct flowi6 fl6;
 814	struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
 815	struct sock *ctl_sk = net->ipv6.tcp_sk;
 816	unsigned int tot_len = sizeof(struct tcphdr);
 
 817	struct dst_entry *dst;
 818	__be32 *topt;
 819	__u32 mark = 0;
 820
 821	if (tsecr)
 822		tot_len += TCPOLEN_TSTAMP_ALIGNED;
 823#ifdef CONFIG_TCP_MD5SIG
 824	if (key)
 825		tot_len += TCPOLEN_MD5SIG_ALIGNED;
 826#endif
 827
 
 
 
 
 
 
 
 
 
 828	buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
 829			 GFP_ATOMIC);
 830	if (!buff)
 831		return;
 832
 833	skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
 834
 835	t1 = skb_push(buff, tot_len);
 836	skb_reset_transport_header(buff);
 837
 838	/* Swap the send and the receive. */
 839	memset(t1, 0, sizeof(*t1));
 840	t1->dest = th->source;
 841	t1->source = th->dest;
 842	t1->doff = tot_len / 4;
 843	t1->seq = htonl(seq);
 844	t1->ack_seq = htonl(ack);
 845	t1->ack = !rst || !th->ack;
 846	t1->rst = rst;
 847	t1->window = htons(win);
 848
 849	topt = (__be32 *)(t1 + 1);
 850
 851	if (tsecr) {
 852		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 853				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
 854		*topt++ = htonl(tsval);
 855		*topt++ = htonl(tsecr);
 856	}
 857
 
 
 
 858#ifdef CONFIG_TCP_MD5SIG
 859	if (key) {
 860		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 861				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
 862		tcp_v6_md5_hash_hdr((__u8 *)topt, key,
 863				    &ipv6_hdr(skb)->saddr,
 864				    &ipv6_hdr(skb)->daddr, t1);
 865	}
 866#endif
 867
 868	memset(&fl6, 0, sizeof(fl6));
 869	fl6.daddr = ipv6_hdr(skb)->saddr;
 870	fl6.saddr = ipv6_hdr(skb)->daddr;
 871	fl6.flowlabel = label;
 872
 873	buff->ip_summed = CHECKSUM_PARTIAL;
 874	buff->csum = 0;
 875
 876	__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
 877
 878	fl6.flowi6_proto = IPPROTO_TCP;
 879	if (rt6_need_strict(&fl6.daddr) && !oif)
 880		fl6.flowi6_oif = tcp_v6_iif(skb);
 881	else {
 882		if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
 883			oif = skb->skb_iif;
 884
 885		fl6.flowi6_oif = oif;
 886	}
 887
 888	if (sk) {
 889		if (sk->sk_state == TCP_TIME_WAIT) {
 890			mark = inet_twsk(sk)->tw_mark;
 891			/* autoflowlabel relies on buff->hash */
 892			skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
 893				     PKT_HASH_TYPE_L4);
 894		} else {
 895			mark = sk->sk_mark;
 896		}
 897		buff->tstamp = tcp_transmit_time(sk);
 898	}
 899	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
 900	fl6.fl6_dport = t1->dest;
 901	fl6.fl6_sport = t1->source;
 902	fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
 903	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 904
 905	/* Pass a socket to ip6_dst_lookup either it is for RST
 906	 * Underlying function will use this to retrieve the network
 907	 * namespace
 908	 */
 909	dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
 910	if (!IS_ERR(dst)) {
 911		skb_dst_set(buff, dst);
 912		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass,
 913			 priority);
 914		TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
 915		if (rst)
 916			TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
 917		return;
 918	}
 919
 920	kfree_skb(buff);
 921}
 922
 923static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
 924{
 925	const struct tcphdr *th = tcp_hdr(skb);
 926	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 927	u32 seq = 0, ack_seq = 0;
 928	struct tcp_md5sig_key *key = NULL;
 929#ifdef CONFIG_TCP_MD5SIG
 930	const __u8 *hash_location = NULL;
 931	unsigned char newhash[16];
 932	int genhash;
 933	struct sock *sk1 = NULL;
 934#endif
 935	__be32 label = 0;
 936	u32 priority = 0;
 937	struct net *net;
 938	int oif = 0;
 939
 940	if (th->rst)
 941		return;
 942
 943	/* If sk not NULL, it means we did a successful lookup and incoming
 944	 * route had to be correct. prequeue might have dropped our dst.
 945	 */
 946	if (!sk && !ipv6_unicast_destination(skb))
 947		return;
 948
 949	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
 950#ifdef CONFIG_TCP_MD5SIG
 951	rcu_read_lock();
 952	hash_location = tcp_parse_md5sig_option(th);
 953	if (sk && sk_fullsock(sk)) {
 954		key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
 
 
 
 
 
 
 955	} else if (hash_location) {
 
 
 
 
 956		/*
 957		 * active side is lost. Try to find listening socket through
 958		 * source port, and then find md5 key through listening socket.
 959		 * we are not loose security here:
 960		 * Incoming packet is checked with md5 hash with finding key,
 961		 * no RST generated if md5 hash doesn't match.
 962		 */
 963		sk1 = inet6_lookup_listener(net,
 964					   &tcp_hashinfo, NULL, 0,
 965					   &ipv6h->saddr,
 966					   th->source, &ipv6h->daddr,
 967					   ntohs(th->source),
 968					   tcp_v6_iif_l3_slave(skb),
 969					   tcp_v6_sdif(skb));
 970		if (!sk1)
 971			goto out;
 972
 973		key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
 
 
 
 
 
 974		if (!key)
 975			goto out;
 976
 977		genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
 978		if (genhash || memcmp(hash_location, newhash, 16) != 0)
 979			goto out;
 980	}
 981#endif
 982
 983	if (th->ack)
 984		seq = ntohl(th->ack_seq);
 985	else
 986		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
 987			  (th->doff << 2);
 988
 989	if (sk) {
 990		oif = sk->sk_bound_dev_if;
 991		if (sk_fullsock(sk)) {
 992			const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
 993
 994			trace_tcp_send_reset(sk, skb);
 995			if (np->repflow)
 996				label = ip6_flowlabel(ipv6h);
 997			priority = sk->sk_priority;
 998		}
 999		if (sk->sk_state == TCP_TIME_WAIT) {
1000			label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1001			priority = inet_twsk(sk)->tw_priority;
1002		}
1003	} else {
1004		if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1005			label = ip6_flowlabel(ipv6h);
1006	}
1007
1008	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0,
1009			     label, priority);
1010
1011#ifdef CONFIG_TCP_MD5SIG
1012out:
1013	rcu_read_unlock();
1014#endif
1015}
1016
1017static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1018			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1019			    struct tcp_md5sig_key *key, u8 tclass,
1020			    __be32 label, u32 priority)
1021{
1022	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1023			     tclass, label, priority);
1024}
1025
1026static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1027{
1028	struct inet_timewait_sock *tw = inet_twsk(sk);
1029	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1030
1031	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1032			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1033			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1034			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1035			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1036
1037	inet_twsk_put(tw);
1038}
1039
1040static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1041				  struct request_sock *req)
1042{
 
 
 
 
1043	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1044	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1045	 */
1046	/* RFC 7323 2.3
1047	 * The window field (SEG.WND) of every outgoing segment, with the
1048	 * exception of <SYN> segments, MUST be right-shifted by
1049	 * Rcv.Wind.Shift bits:
1050	 */
1051	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1052			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1053			tcp_rsk(req)->rcv_nxt,
1054			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1055			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1056			req->ts_recent, sk->sk_bound_dev_if,
1057			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
1058			0, 0, sk->sk_priority);
1059}
1060
1061
1062static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1063{
1064#ifdef CONFIG_SYN_COOKIES
1065	const struct tcphdr *th = tcp_hdr(skb);
1066
1067	if (!th->syn)
1068		sk = cookie_v6_check(sk, skb);
1069#endif
1070	return sk;
1071}
1072
1073u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1074			 struct tcphdr *th, u32 *cookie)
1075{
1076	u16 mss = 0;
1077#ifdef CONFIG_SYN_COOKIES
1078	mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1079				    &tcp_request_sock_ipv6_ops, sk, th);
1080	if (mss) {
1081		*cookie = __cookie_v6_init_sequence(iph, th, &mss);
1082		tcp_synq_overflow(sk);
1083	}
1084#endif
1085	return mss;
1086}
1087
1088static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1089{
1090	if (skb->protocol == htons(ETH_P_IP))
1091		return tcp_v4_conn_request(sk, skb);
1092
1093	if (!ipv6_unicast_destination(skb))
1094		goto drop;
1095
 
 
 
 
 
1096	return tcp_conn_request(&tcp6_request_sock_ops,
1097				&tcp_request_sock_ipv6_ops, sk, skb);
1098
1099drop:
1100	tcp_listendrop(sk);
1101	return 0; /* don't send reset */
1102}
1103
1104static void tcp_v6_restore_cb(struct sk_buff *skb)
1105{
1106	/* We need to move header back to the beginning if xfrm6_policy_check()
1107	 * and tcp_v6_fill_cb() are going to be called again.
1108	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1109	 */
1110	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1111		sizeof(struct inet6_skb_parm));
1112}
1113
1114static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1115					 struct request_sock *req,
1116					 struct dst_entry *dst,
1117					 struct request_sock *req_unhash,
1118					 bool *own_req)
1119{
1120	struct inet_request_sock *ireq;
1121	struct ipv6_pinfo *newnp;
1122	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1123	struct ipv6_txoptions *opt;
1124	struct inet_sock *newinet;
 
1125	struct tcp_sock *newtp;
1126	struct sock *newsk;
1127#ifdef CONFIG_TCP_MD5SIG
1128	struct tcp_md5sig_key *key;
 
1129#endif
1130	struct flowi6 fl6;
1131
1132	if (skb->protocol == htons(ETH_P_IP)) {
1133		/*
1134		 *	v6 mapped
1135		 */
1136
1137		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1138					     req_unhash, own_req);
1139
1140		if (!newsk)
1141			return NULL;
1142
1143		inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1144
1145		newinet = inet_sk(newsk);
1146		newnp = tcp_inet6_sk(newsk);
1147		newtp = tcp_sk(newsk);
1148
1149		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1150
1151		newnp->saddr = newsk->sk_v6_rcv_saddr;
1152
1153		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
 
 
1154		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1155#ifdef CONFIG_TCP_MD5SIG
1156		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1157#endif
1158
1159		newnp->ipv6_mc_list = NULL;
1160		newnp->ipv6_ac_list = NULL;
1161		newnp->ipv6_fl_list = NULL;
1162		newnp->pktoptions  = NULL;
1163		newnp->opt	   = NULL;
1164		newnp->mcast_oif   = inet_iif(skb);
1165		newnp->mcast_hops  = ip_hdr(skb)->ttl;
1166		newnp->rcv_flowinfo = 0;
1167		if (np->repflow)
1168			newnp->flow_label = 0;
1169
1170		/*
1171		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1172		 * here, tcp_create_openreq_child now does this for us, see the comment in
1173		 * that function for the gory details. -acme
1174		 */
1175
1176		/* It is tricky place. Until this moment IPv4 tcp
1177		   worked with IPv6 icsk.icsk_af_ops.
1178		   Sync it now.
1179		 */
1180		tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1181
1182		return newsk;
1183	}
1184
1185	ireq = inet_rsk(req);
1186
1187	if (sk_acceptq_is_full(sk))
1188		goto out_overflow;
1189
1190	if (!dst) {
1191		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1192		if (!dst)
1193			goto out;
1194	}
1195
1196	newsk = tcp_create_openreq_child(sk, req, skb);
1197	if (!newsk)
1198		goto out_nonewsk;
1199
1200	/*
1201	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1202	 * count here, tcp_create_openreq_child now does this for us, see the
1203	 * comment in that function for the gory details. -acme
1204	 */
1205
1206	newsk->sk_gso_type = SKB_GSO_TCPV6;
1207	ip6_dst_store(newsk, dst, NULL, NULL);
1208	inet6_sk_rx_dst_set(newsk, skb);
1209
1210	inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1211
1212	newtp = tcp_sk(newsk);
1213	newinet = inet_sk(newsk);
1214	newnp = tcp_inet6_sk(newsk);
1215
1216	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1217
1218	newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1219	newnp->saddr = ireq->ir_v6_loc_addr;
1220	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1221	newsk->sk_bound_dev_if = ireq->ir_iif;
1222
1223	/* Now IPv6 options...
1224
1225	   First: no IPv4 options.
1226	 */
1227	newinet->inet_opt = NULL;
1228	newnp->ipv6_mc_list = NULL;
1229	newnp->ipv6_ac_list = NULL;
1230	newnp->ipv6_fl_list = NULL;
1231
1232	/* Clone RX bits */
1233	newnp->rxopt.all = np->rxopt.all;
1234
1235	newnp->pktoptions = NULL;
1236	newnp->opt	  = NULL;
1237	newnp->mcast_oif  = tcp_v6_iif(skb);
1238	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1239	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1240	if (np->repflow)
1241		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1242
 
 
 
 
 
 
1243	/* Clone native IPv6 options from listening socket (if any)
1244
1245	   Yes, keeping reference count would be much more clever,
1246	   but we make one more one thing there: reattach optmem
1247	   to newsk.
1248	 */
1249	opt = ireq->ipv6_opt;
1250	if (!opt)
1251		opt = rcu_dereference(np->opt);
1252	if (opt) {
1253		opt = ipv6_dup_options(newsk, opt);
1254		RCU_INIT_POINTER(newnp->opt, opt);
1255	}
1256	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1257	if (opt)
1258		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1259						    opt->opt_flen;
1260
1261	tcp_ca_openreq_child(newsk, dst);
1262
1263	tcp_sync_mss(newsk, dst_mtu(dst));
1264	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1265
1266	tcp_initialize_rcv_mss(newsk);
1267
1268	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1269	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1270
1271#ifdef CONFIG_TCP_MD5SIG
 
 
1272	/* Copy over the MD5 key from the original socket */
1273	key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
1274	if (key) {
1275		/* We're using one, so create a matching key
1276		 * on the newsk structure. If we fail to get
1277		 * memory, then we end up not copying the key
1278		 * across. Shucks.
1279		 */
1280		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1281			       AF_INET6, 128, key->key, key->keylen,
1282			       sk_gfp_mask(sk, GFP_ATOMIC));
1283	}
1284#endif
1285
1286	if (__inet_inherit_port(sk, newsk) < 0) {
1287		inet_csk_prepare_forced_close(newsk);
1288		tcp_done(newsk);
1289		goto out;
1290	}
1291	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
 
1292	if (*own_req) {
1293		tcp_move_syn(newtp, req);
1294
1295		/* Clone pktoptions received with SYN, if we own the req */
1296		if (ireq->pktopts) {
1297			newnp->pktoptions = skb_clone(ireq->pktopts,
1298						      sk_gfp_mask(sk, GFP_ATOMIC));
1299			consume_skb(ireq->pktopts);
1300			ireq->pktopts = NULL;
1301			if (newnp->pktoptions) {
1302				tcp_v6_restore_cb(newnp->pktoptions);
1303				skb_set_owner_r(newnp->pktoptions, newsk);
1304			}
1305		}
 
 
 
 
 
 
 
 
 
1306	}
1307
1308	return newsk;
1309
1310out_overflow:
1311	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1312out_nonewsk:
1313	dst_release(dst);
1314out:
1315	tcp_listendrop(sk);
1316	return NULL;
1317}
1318
 
 
1319/* The socket must have it's spinlock held when we get
1320 * here, unless it is a TCP_LISTEN socket.
1321 *
1322 * We have a potential double-lock case here, so even when
1323 * doing backlog processing we use the BH locking scheme.
1324 * This is because we cannot sleep with the original spinlock
1325 * held.
1326 */
1327static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1328{
1329	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1330	struct sk_buff *opt_skb = NULL;
1331	struct tcp_sock *tp;
1332
1333	/* Imagine: socket is IPv6. IPv4 packet arrives,
1334	   goes to IPv4 receive handler and backlogged.
1335	   From backlog it always goes here. Kerboom...
1336	   Fortunately, tcp_rcv_established and rcv_established
1337	   handle them correctly, but it is not case with
1338	   tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1339	 */
1340
1341	if (skb->protocol == htons(ETH_P_IP))
1342		return tcp_v4_do_rcv(sk, skb);
1343
1344	/*
1345	 *	socket locking is here for SMP purposes as backlog rcv
1346	 *	is currently called with bh processing disabled.
1347	 */
1348
1349	/* Do Stevens' IPV6_PKTOPTIONS.
1350
1351	   Yes, guys, it is the only place in our code, where we
1352	   may make it not affecting IPv4.
1353	   The rest of code is protocol independent,
1354	   and I do not like idea to uglify IPv4.
1355
1356	   Actually, all the idea behind IPV6_PKTOPTIONS
1357	   looks not very well thought. For now we latch
1358	   options, received in the last packet, enqueued
1359	   by tcp. Feel free to propose better solution.
1360					       --ANK (980728)
1361	 */
1362	if (np->rxopt.all)
1363		opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1364
1365	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1366		struct dst_entry *dst = sk->sk_rx_dst;
1367
1368		sock_rps_save_rxhash(sk, skb);
1369		sk_mark_napi_id(sk, skb);
1370		if (dst) {
1371			if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1372			    dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
 
1373				dst_release(dst);
1374				sk->sk_rx_dst = NULL;
1375			}
1376		}
1377
1378		tcp_rcv_established(sk, skb);
1379		if (opt_skb)
1380			goto ipv6_pktoptions;
1381		return 0;
1382	}
1383
1384	if (tcp_checksum_complete(skb))
1385		goto csum_err;
1386
1387	if (sk->sk_state == TCP_LISTEN) {
1388		struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1389
1390		if (!nsk)
1391			goto discard;
1392
1393		if (nsk != sk) {
1394			if (tcp_child_process(sk, nsk, skb))
1395				goto reset;
1396			if (opt_skb)
1397				__kfree_skb(opt_skb);
1398			return 0;
1399		}
1400	} else
1401		sock_rps_save_rxhash(sk, skb);
1402
1403	if (tcp_rcv_state_process(sk, skb))
1404		goto reset;
1405	if (opt_skb)
1406		goto ipv6_pktoptions;
1407	return 0;
1408
1409reset:
1410	tcp_v6_send_reset(sk, skb);
1411discard:
1412	if (opt_skb)
1413		__kfree_skb(opt_skb);
1414	kfree_skb(skb);
1415	return 0;
1416csum_err:
 
1417	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1418	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1419	goto discard;
1420
1421
1422ipv6_pktoptions:
1423	/* Do you ask, what is it?
1424
1425	   1. skb was enqueued by tcp.
1426	   2. skb is added to tail of read queue, rather than out of order.
1427	   3. socket is not in passive state.
1428	   4. Finally, it really contains options, which user wants to receive.
1429	 */
1430	tp = tcp_sk(sk);
1431	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1432	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1433		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1434			np->mcast_oif = tcp_v6_iif(opt_skb);
1435		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1436			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1437		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1438			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1439		if (np->repflow)
1440			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1441		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1442			skb_set_owner_r(opt_skb, sk);
1443			tcp_v6_restore_cb(opt_skb);
1444			opt_skb = xchg(&np->pktoptions, opt_skb);
1445		} else {
1446			__kfree_skb(opt_skb);
1447			opt_skb = xchg(&np->pktoptions, NULL);
1448		}
1449	}
1450
1451	kfree_skb(opt_skb);
1452	return 0;
1453}
1454
1455static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1456			   const struct tcphdr *th)
1457{
1458	/* This is tricky: we move IP6CB at its correct location into
1459	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1460	 * _decode_session6() uses IP6CB().
1461	 * barrier() makes sure compiler won't play aliasing games.
1462	 */
1463	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1464		sizeof(struct inet6_skb_parm));
1465	barrier();
1466
1467	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1468	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1469				    skb->len - th->doff*4);
1470	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1471	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1472	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1473	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1474	TCP_SKB_CB(skb)->sacked = 0;
1475	TCP_SKB_CB(skb)->has_rxtstamp =
1476			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1477}
1478
1479INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1480{
1481	struct sk_buff *skb_to_free;
1482	int sdif = inet6_sdif(skb);
 
1483	const struct tcphdr *th;
1484	const struct ipv6hdr *hdr;
1485	bool refcounted;
1486	struct sock *sk;
1487	int ret;
1488	struct net *net = dev_net(skb->dev);
1489
1490	if (skb->pkt_type != PACKET_HOST)
1491		goto discard_it;
1492
1493	/*
1494	 *	Count it even if it's bad.
1495	 */
1496	__TCP_INC_STATS(net, TCP_MIB_INSEGS);
1497
1498	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1499		goto discard_it;
1500
1501	th = (const struct tcphdr *)skb->data;
1502
1503	if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1504		goto bad_packet;
1505	if (!pskb_may_pull(skb, th->doff*4))
1506		goto discard_it;
1507
1508	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1509		goto csum_error;
1510
1511	th = (const struct tcphdr *)skb->data;
1512	hdr = ipv6_hdr(skb);
1513
1514lookup:
1515	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1516				th->source, th->dest, inet6_iif(skb), sdif,
1517				&refcounted);
1518	if (!sk)
1519		goto no_tcp_socket;
1520
1521process:
1522	if (sk->sk_state == TCP_TIME_WAIT)
1523		goto do_time_wait;
1524
1525	if (sk->sk_state == TCP_NEW_SYN_RECV) {
1526		struct request_sock *req = inet_reqsk(sk);
1527		bool req_stolen = false;
1528		struct sock *nsk;
1529
1530		sk = req->rsk_listener;
1531		if (tcp_v6_inbound_md5_hash(sk, skb)) {
1532			sk_drops_add(sk, skb);
1533			reqsk_put(req);
1534			goto discard_it;
1535		}
1536		if (tcp_checksum_complete(skb)) {
1537			reqsk_put(req);
1538			goto csum_error;
1539		}
1540		if (unlikely(sk->sk_state != TCP_LISTEN)) {
1541			inet_csk_reqsk_queue_drop_and_put(sk, req);
1542			goto lookup;
 
 
 
 
 
 
 
 
 
1543		}
1544		sock_hold(sk);
1545		refcounted = true;
1546		nsk = NULL;
1547		if (!tcp_filter(sk, skb)) {
1548			th = (const struct tcphdr *)skb->data;
1549			hdr = ipv6_hdr(skb);
1550			tcp_v6_fill_cb(skb, hdr, th);
1551			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1552		}
1553		if (!nsk) {
1554			reqsk_put(req);
1555			if (req_stolen) {
1556				/* Another cpu got exclusive access to req
1557				 * and created a full blown socket.
1558				 * Try to feed this packet to this socket
1559				 * instead of discarding it.
1560				 */
1561				tcp_v6_restore_cb(skb);
1562				sock_put(sk);
1563				goto lookup;
1564			}
1565			goto discard_and_relse;
1566		}
1567		if (nsk == sk) {
1568			reqsk_put(req);
1569			tcp_v6_restore_cb(skb);
1570		} else if (tcp_child_process(sk, nsk, skb)) {
1571			tcp_v6_send_reset(nsk, skb);
1572			goto discard_and_relse;
1573		} else {
1574			sock_put(sk);
1575			return 0;
1576		}
1577	}
1578	if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1579		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1580		goto discard_and_relse;
1581	}
1582
1583	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1584		goto discard_and_relse;
1585
1586	if (tcp_v6_inbound_md5_hash(sk, skb))
1587		goto discard_and_relse;
1588
1589	if (tcp_filter(sk, skb))
1590		goto discard_and_relse;
1591	th = (const struct tcphdr *)skb->data;
1592	hdr = ipv6_hdr(skb);
1593	tcp_v6_fill_cb(skb, hdr, th);
1594
1595	skb->dev = NULL;
1596
1597	if (sk->sk_state == TCP_LISTEN) {
1598		ret = tcp_v6_do_rcv(sk, skb);
1599		goto put_and_return;
1600	}
1601
1602	sk_incoming_cpu_update(sk);
1603
1604	bh_lock_sock_nested(sk);
1605	tcp_segs_in(tcp_sk(sk), skb);
1606	ret = 0;
1607	if (!sock_owned_by_user(sk)) {
1608		skb_to_free = sk->sk_rx_skb_cache;
1609		sk->sk_rx_skb_cache = NULL;
1610		ret = tcp_v6_do_rcv(sk, skb);
1611	} else {
1612		if (tcp_add_backlog(sk, skb))
1613			goto discard_and_relse;
1614		skb_to_free = NULL;
1615	}
1616	bh_unlock_sock(sk);
1617	if (skb_to_free)
1618		__kfree_skb(skb_to_free);
1619put_and_return:
1620	if (refcounted)
1621		sock_put(sk);
1622	return ret ? -1 : 0;
1623
1624no_tcp_socket:
1625	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1626		goto discard_it;
1627
1628	tcp_v6_fill_cb(skb, hdr, th);
1629
1630	if (tcp_checksum_complete(skb)) {
1631csum_error:
 
1632		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1633bad_packet:
1634		__TCP_INC_STATS(net, TCP_MIB_INERRS);
1635	} else {
1636		tcp_v6_send_reset(NULL, skb);
1637	}
1638
1639discard_it:
1640	kfree_skb(skb);
1641	return 0;
1642
1643discard_and_relse:
1644	sk_drops_add(sk, skb);
1645	if (refcounted)
1646		sock_put(sk);
1647	goto discard_it;
1648
1649do_time_wait:
1650	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1651		inet_twsk_put(inet_twsk(sk));
1652		goto discard_it;
1653	}
1654
1655	tcp_v6_fill_cb(skb, hdr, th);
1656
1657	if (tcp_checksum_complete(skb)) {
1658		inet_twsk_put(inet_twsk(sk));
1659		goto csum_error;
1660	}
1661
1662	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1663	case TCP_TW_SYN:
1664	{
1665		struct sock *sk2;
1666
1667		sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1668					    skb, __tcp_hdrlen(th),
1669					    &ipv6_hdr(skb)->saddr, th->source,
1670					    &ipv6_hdr(skb)->daddr,
1671					    ntohs(th->dest),
1672					    tcp_v6_iif_l3_slave(skb),
1673					    sdif);
1674		if (sk2) {
1675			struct inet_timewait_sock *tw = inet_twsk(sk);
1676			inet_twsk_deschedule_put(tw);
1677			sk = sk2;
1678			tcp_v6_restore_cb(skb);
1679			refcounted = false;
1680			goto process;
1681		}
1682	}
1683		/* to ACK */
1684		/* fall through */
1685	case TCP_TW_ACK:
1686		tcp_v6_timewait_ack(sk, skb);
1687		break;
1688	case TCP_TW_RST:
1689		tcp_v6_send_reset(sk, skb);
1690		inet_twsk_deschedule_put(inet_twsk(sk));
1691		goto discard_it;
1692	case TCP_TW_SUCCESS:
1693		;
1694	}
1695	goto discard_it;
1696}
1697
1698INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
1699{
1700	const struct ipv6hdr *hdr;
1701	const struct tcphdr *th;
1702	struct sock *sk;
1703
1704	if (skb->pkt_type != PACKET_HOST)
1705		return;
1706
1707	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1708		return;
1709
1710	hdr = ipv6_hdr(skb);
1711	th = tcp_hdr(skb);
1712
1713	if (th->doff < sizeof(struct tcphdr) / 4)
1714		return;
1715
1716	/* Note : We use inet6_iif() here, not tcp_v6_iif() */
1717	sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1718					&hdr->saddr, th->source,
1719					&hdr->daddr, ntohs(th->dest),
1720					inet6_iif(skb), inet6_sdif(skb));
1721	if (sk) {
1722		skb->sk = sk;
1723		skb->destructor = sock_edemux;
1724		if (sk_fullsock(sk)) {
1725			struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1726
1727			if (dst)
1728				dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1729			if (dst &&
1730			    inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1731				skb_dst_set_noref(skb, dst);
1732		}
1733	}
1734}
1735
1736static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1737	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
1738	.twsk_unique	= tcp_twsk_unique,
1739	.twsk_destructor = tcp_twsk_destructor,
1740};
1741
1742static const struct inet_connection_sock_af_ops ipv6_specific = {
 
 
 
 
 
 
 
1743	.queue_xmit	   = inet6_csk_xmit,
1744	.send_check	   = tcp_v6_send_check,
1745	.rebuild_header	   = inet6_sk_rebuild_header,
1746	.sk_rx_dst_set	   = inet6_sk_rx_dst_set,
1747	.conn_request	   = tcp_v6_conn_request,
1748	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1749	.net_header_len	   = sizeof(struct ipv6hdr),
1750	.net_frag_header_len = sizeof(struct frag_hdr),
1751	.setsockopt	   = ipv6_setsockopt,
1752	.getsockopt	   = ipv6_getsockopt,
1753	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1754	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1755#ifdef CONFIG_COMPAT
1756	.compat_setsockopt = compat_ipv6_setsockopt,
1757	.compat_getsockopt = compat_ipv6_getsockopt,
1758#endif
1759	.mtu_reduced	   = tcp_v6_mtu_reduced,
1760};
1761
1762#ifdef CONFIG_TCP_MD5SIG
1763static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1764	.md5_lookup	=	tcp_v6_md5_lookup,
1765	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
1766	.md5_parse	=	tcp_v6_parse_md5_keys,
1767};
1768#endif
1769
1770/*
1771 *	TCP over IPv4 via INET6 API
1772 */
1773static const struct inet_connection_sock_af_ops ipv6_mapped = {
1774	.queue_xmit	   = ip_queue_xmit,
1775	.send_check	   = tcp_v4_send_check,
1776	.rebuild_header	   = inet_sk_rebuild_header,
1777	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
1778	.conn_request	   = tcp_v6_conn_request,
1779	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1780	.net_header_len	   = sizeof(struct iphdr),
1781	.setsockopt	   = ipv6_setsockopt,
1782	.getsockopt	   = ipv6_getsockopt,
1783	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1784	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1785#ifdef CONFIG_COMPAT
1786	.compat_setsockopt = compat_ipv6_setsockopt,
1787	.compat_getsockopt = compat_ipv6_getsockopt,
1788#endif
1789	.mtu_reduced	   = tcp_v4_mtu_reduced,
1790};
1791
1792#ifdef CONFIG_TCP_MD5SIG
1793static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1794	.md5_lookup	=	tcp_v4_md5_lookup,
1795	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1796	.md5_parse	=	tcp_v6_parse_md5_keys,
1797};
1798#endif
1799
1800/* NOTE: A lot of things set to zero explicitly by call to
1801 *       sk_alloc() so need not be done here.
1802 */
1803static int tcp_v6_init_sock(struct sock *sk)
1804{
1805	struct inet_connection_sock *icsk = inet_csk(sk);
1806
1807	tcp_init_sock(sk);
1808
1809	icsk->icsk_af_ops = &ipv6_specific;
1810
1811#ifdef CONFIG_TCP_MD5SIG
1812	tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1813#endif
1814
1815	return 0;
1816}
1817
1818static void tcp_v6_destroy_sock(struct sock *sk)
1819{
1820	tcp_v4_destroy_sock(sk);
1821	inet6_destroy_sock(sk);
1822}
1823
1824#ifdef CONFIG_PROC_FS
1825/* Proc filesystem TCPv6 sock list dumping. */
1826static void get_openreq6(struct seq_file *seq,
1827			 const struct request_sock *req, int i)
1828{
1829	long ttd = req->rsk_timer.expires - jiffies;
1830	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1831	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1832
1833	if (ttd < 0)
1834		ttd = 0;
1835
1836	seq_printf(seq,
1837		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1838		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1839		   i,
1840		   src->s6_addr32[0], src->s6_addr32[1],
1841		   src->s6_addr32[2], src->s6_addr32[3],
1842		   inet_rsk(req)->ir_num,
1843		   dest->s6_addr32[0], dest->s6_addr32[1],
1844		   dest->s6_addr32[2], dest->s6_addr32[3],
1845		   ntohs(inet_rsk(req)->ir_rmt_port),
1846		   TCP_SYN_RECV,
1847		   0, 0, /* could print option size, but that is af dependent. */
1848		   1,   /* timers active (only the expire timer) */
1849		   jiffies_to_clock_t(ttd),
1850		   req->num_timeout,
1851		   from_kuid_munged(seq_user_ns(seq),
1852				    sock_i_uid(req->rsk_listener)),
1853		   0,  /* non standard timer */
1854		   0, /* open_requests have no inode */
1855		   0, req);
1856}
1857
1858static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1859{
1860	const struct in6_addr *dest, *src;
1861	__u16 destp, srcp;
1862	int timer_active;
1863	unsigned long timer_expires;
1864	const struct inet_sock *inet = inet_sk(sp);
1865	const struct tcp_sock *tp = tcp_sk(sp);
1866	const struct inet_connection_sock *icsk = inet_csk(sp);
1867	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1868	int rx_queue;
1869	int state;
1870
1871	dest  = &sp->sk_v6_daddr;
1872	src   = &sp->sk_v6_rcv_saddr;
1873	destp = ntohs(inet->inet_dport);
1874	srcp  = ntohs(inet->inet_sport);
1875
1876	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1877	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1878	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1879		timer_active	= 1;
1880		timer_expires	= icsk->icsk_timeout;
1881	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1882		timer_active	= 4;
1883		timer_expires	= icsk->icsk_timeout;
1884	} else if (timer_pending(&sp->sk_timer)) {
1885		timer_active	= 2;
1886		timer_expires	= sp->sk_timer.expires;
1887	} else {
1888		timer_active	= 0;
1889		timer_expires = jiffies;
1890	}
1891
1892	state = inet_sk_state_load(sp);
1893	if (state == TCP_LISTEN)
1894		rx_queue = sp->sk_ack_backlog;
1895	else
1896		/* Because we don't lock the socket,
1897		 * we might find a transient negative value.
1898		 */
1899		rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
1900				      READ_ONCE(tp->copied_seq), 0);
1901
1902	seq_printf(seq,
1903		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1904		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1905		   i,
1906		   src->s6_addr32[0], src->s6_addr32[1],
1907		   src->s6_addr32[2], src->s6_addr32[3], srcp,
1908		   dest->s6_addr32[0], dest->s6_addr32[1],
1909		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
1910		   state,
1911		   READ_ONCE(tp->write_seq) - tp->snd_una,
1912		   rx_queue,
1913		   timer_active,
1914		   jiffies_delta_to_clock_t(timer_expires - jiffies),
1915		   icsk->icsk_retransmits,
1916		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1917		   icsk->icsk_probes_out,
1918		   sock_i_ino(sp),
1919		   refcount_read(&sp->sk_refcnt), sp,
1920		   jiffies_to_clock_t(icsk->icsk_rto),
1921		   jiffies_to_clock_t(icsk->icsk_ack.ato),
1922		   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
1923		   tp->snd_cwnd,
1924		   state == TCP_LISTEN ?
1925			fastopenq->max_qlen :
1926			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1927		   );
1928}
1929
1930static void get_timewait6_sock(struct seq_file *seq,
1931			       struct inet_timewait_sock *tw, int i)
1932{
1933	long delta = tw->tw_timer.expires - jiffies;
1934	const struct in6_addr *dest, *src;
1935	__u16 destp, srcp;
1936
1937	dest = &tw->tw_v6_daddr;
1938	src  = &tw->tw_v6_rcv_saddr;
1939	destp = ntohs(tw->tw_dport);
1940	srcp  = ntohs(tw->tw_sport);
1941
1942	seq_printf(seq,
1943		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1944		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1945		   i,
1946		   src->s6_addr32[0], src->s6_addr32[1],
1947		   src->s6_addr32[2], src->s6_addr32[3], srcp,
1948		   dest->s6_addr32[0], dest->s6_addr32[1],
1949		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
1950		   tw->tw_substate, 0, 0,
1951		   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1952		   refcount_read(&tw->tw_refcnt), tw);
1953}
1954
1955static int tcp6_seq_show(struct seq_file *seq, void *v)
1956{
1957	struct tcp_iter_state *st;
1958	struct sock *sk = v;
1959
1960	if (v == SEQ_START_TOKEN) {
1961		seq_puts(seq,
1962			 "  sl  "
1963			 "local_address                         "
1964			 "remote_address                        "
1965			 "st tx_queue rx_queue tr tm->when retrnsmt"
1966			 "   uid  timeout inode\n");
1967		goto out;
1968	}
1969	st = seq->private;
1970
1971	if (sk->sk_state == TCP_TIME_WAIT)
1972		get_timewait6_sock(seq, v, st->num);
1973	else if (sk->sk_state == TCP_NEW_SYN_RECV)
1974		get_openreq6(seq, v, st->num);
1975	else
1976		get_tcp6_sock(seq, v, st->num);
1977out:
1978	return 0;
1979}
1980
1981static const struct seq_operations tcp6_seq_ops = {
1982	.show		= tcp6_seq_show,
1983	.start		= tcp_seq_start,
1984	.next		= tcp_seq_next,
1985	.stop		= tcp_seq_stop,
1986};
1987
1988static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1989	.family		= AF_INET6,
1990};
1991
1992int __net_init tcp6_proc_init(struct net *net)
1993{
1994	if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
1995			sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
1996		return -ENOMEM;
1997	return 0;
1998}
1999
2000void tcp6_proc_exit(struct net *net)
2001{
2002	remove_proc_entry("tcp6", net->proc_net);
2003}
2004#endif
2005
2006struct proto tcpv6_prot = {
2007	.name			= "TCPv6",
2008	.owner			= THIS_MODULE,
2009	.close			= tcp_close,
2010	.pre_connect		= tcp_v6_pre_connect,
2011	.connect		= tcp_v6_connect,
2012	.disconnect		= tcp_disconnect,
2013	.accept			= inet_csk_accept,
2014	.ioctl			= tcp_ioctl,
2015	.init			= tcp_v6_init_sock,
2016	.destroy		= tcp_v6_destroy_sock,
2017	.shutdown		= tcp_shutdown,
2018	.setsockopt		= tcp_setsockopt,
2019	.getsockopt		= tcp_getsockopt,
 
2020	.keepalive		= tcp_set_keepalive,
2021	.recvmsg		= tcp_recvmsg,
2022	.sendmsg		= tcp_sendmsg,
2023	.sendpage		= tcp_sendpage,
2024	.backlog_rcv		= tcp_v6_do_rcv,
2025	.release_cb		= tcp_release_cb,
2026	.hash			= inet6_hash,
2027	.unhash			= inet_unhash,
2028	.get_port		= inet_csk_get_port,
 
 
 
2029	.enter_memory_pressure	= tcp_enter_memory_pressure,
2030	.leave_memory_pressure	= tcp_leave_memory_pressure,
2031	.stream_memory_free	= tcp_stream_memory_free,
2032	.sockets_allocated	= &tcp_sockets_allocated,
2033	.memory_allocated	= &tcp_memory_allocated,
2034	.memory_pressure	= &tcp_memory_pressure,
2035	.orphan_count		= &tcp_orphan_count,
2036	.sysctl_mem		= sysctl_tcp_mem,
2037	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
2038	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
2039	.max_header		= MAX_TCP_HEADER,
2040	.obj_size		= sizeof(struct tcp6_sock),
2041	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
2042	.twsk_prot		= &tcp6_timewait_sock_ops,
2043	.rsk_prot		= &tcp6_request_sock_ops,
2044	.h.hashinfo		= &tcp_hashinfo,
2045	.no_autobind		= true,
2046#ifdef CONFIG_COMPAT
2047	.compat_setsockopt	= compat_tcp_setsockopt,
2048	.compat_getsockopt	= compat_tcp_getsockopt,
2049#endif
2050	.diag_destroy		= tcp_abort,
2051};
 
2052
2053/* thinking of making this const? Don't.
2054 * early_demux can change based on sysctl.
2055 */
2056static struct inet6_protocol tcpv6_protocol = {
2057	.early_demux	=	tcp_v6_early_demux,
2058	.early_demux_handler =  tcp_v6_early_demux,
2059	.handler	=	tcp_v6_rcv,
2060	.err_handler	=	tcp_v6_err,
2061	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2062};
2063
2064static struct inet_protosw tcpv6_protosw = {
2065	.type		=	SOCK_STREAM,
2066	.protocol	=	IPPROTO_TCP,
2067	.prot		=	&tcpv6_prot,
2068	.ops		=	&inet6_stream_ops,
2069	.flags		=	INET_PROTOSW_PERMANENT |
2070				INET_PROTOSW_ICSK,
2071};
2072
2073static int __net_init tcpv6_net_init(struct net *net)
2074{
2075	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2076				    SOCK_RAW, IPPROTO_TCP, net);
2077}
2078
2079static void __net_exit tcpv6_net_exit(struct net *net)
2080{
2081	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2082}
2083
2084static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2085{
2086	inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2087}
2088
2089static struct pernet_operations tcpv6_net_ops = {
2090	.init	    = tcpv6_net_init,
2091	.exit	    = tcpv6_net_exit,
2092	.exit_batch = tcpv6_net_exit_batch,
2093};
2094
2095int __init tcpv6_init(void)
2096{
2097	int ret;
2098
2099	ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2100	if (ret)
2101		goto out;
2102
2103	/* register inet6 protocol */
2104	ret = inet6_register_protosw(&tcpv6_protosw);
2105	if (ret)
2106		goto out_tcpv6_protocol;
2107
2108	ret = register_pernet_subsys(&tcpv6_net_ops);
2109	if (ret)
2110		goto out_tcpv6_protosw;
 
 
 
 
 
2111out:
2112	return ret;
2113
 
 
2114out_tcpv6_protosw:
2115	inet6_unregister_protosw(&tcpv6_protosw);
2116out_tcpv6_protocol:
2117	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2118	goto out;
2119}
2120
2121void tcpv6_exit(void)
2122{
2123	unregister_pernet_subsys(&tcpv6_net_ops);
2124	inet6_unregister_protosw(&tcpv6_protosw);
2125	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2126}