ip6_output.c - net/ipv6/ip6_output.c - Linux diff v4.17

   1/*
   2 *	IPv6 output functions
   3 *	Linux INET6 implementation
   4 *
   5 *	Authors:
   6 *	Pedro Roque		<roque@di.fc.ul.pt>
   7 *
   8 *	Based on linux/net/ipv4/ip_output.c
   9 *
  10 *	This program is free software; you can redistribute it and/or
  11 *      modify it under the terms of the GNU General Public License
  12 *      as published by the Free Software Foundation; either version
  13 *      2 of the License, or (at your option) any later version.
  14 *
  15 *	Changes:
  16 *	A.N.Kuznetsov	:	airthmetics in fragmentation.
  17 *				extension headers are implemented.
  18 *				route changes now work.
  19 *				ip6_forward does not confuse sniffers.
  20 *				etc.
  21 *
  22 *      H. von Brand    :       Added missing #include <linux/string.h>
  23 *	Imran Patel	:	frag id should be in NBO
  24 *      Kazunori MIYAZAWA @USAGI
  25 *			:       add ip6_append_data and related functions
  26 *				for datagram xmit
  27 */
  28
  29#include <linux/errno.h>
  30#include <linux/kernel.h>
  31#include <linux/string.h>
  32#include <linux/socket.h>
  33#include <linux/net.h>
  34#include <linux/netdevice.h>
  35#include <linux/if_arp.h>
  36#include <linux/in6.h>
  37#include <linux/tcp.h>
  38#include <linux/route.h>
  39#include <linux/module.h>
  40#include <linux/slab.h>
  41
  42#include <linux/bpf-cgroup.h>
  43#include <linux/netfilter.h>
  44#include <linux/netfilter_ipv6.h>
  45
  46#include <net/sock.h>
  47#include <net/snmp.h>
  48
  49#include <net/ipv6.h>
  50#include <net/ndisc.h>
  51#include <net/protocol.h>
  52#include <net/ip6_route.h>
  53#include <net/addrconf.h>
  54#include <net/rawv6.h>
  55#include <net/icmp.h>
  56#include <net/xfrm.h>
  57#include <net/checksum.h>
  58#include <linux/mroute6.h>
  59#include <net/l3mdev.h>
  60#include <net/lwtunnel.h>
  61
  62static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
  63{
  64	struct dst_entry *dst = skb_dst(skb);
  65	struct net_device *dev = dst->dev;
  66	struct neighbour *neigh;
  67	struct in6_addr *nexthop;
  68	int ret;
  69
 
 
 
  70	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
  71		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
  72
  73		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
  74		    ((mroute6_is_socket(net, skb) &&
  75		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
  76		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
  77					 &ipv6_hdr(skb)->saddr))) {
  78			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
  79
  80			/* Do not check for IFF_ALLMULTI; multicast routing
  81			   is not supported in any case.
  82			 */
  83			if (newskb)
  84				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
  85					net, sk, newskb, NULL, newskb->dev,
  86					dev_loopback_xmit);
  87
  88			if (ipv6_hdr(skb)->hop_limit == 0) {
  89				IP6_INC_STATS(net, idev,
  90					      IPSTATS_MIB_OUTDISCARDS);
  91				kfree_skb(skb);
  92				return 0;
  93			}
  94		}
  95
  96		IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
  97
  98		if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
  99		    IPV6_ADDR_SCOPE_NODELOCAL &&
 100		    !(dev->flags & IFF_LOOPBACK)) {
 101			kfree_skb(skb);
 102			return 0;
 103		}
 104	}
 105
 106	if (lwtunnel_xmit_redirect(dst->lwtstate)) {
 107		int res = lwtunnel_xmit(skb);
 108
 109		if (res < 0 || res == LWTUNNEL_XMIT_DONE)
 110			return res;
 111	}
 112
 113	rcu_read_lock_bh();
 114	nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
 115	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
 116	if (unlikely(!neigh))
 117		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
 118	if (!IS_ERR(neigh)) {
 119		sock_confirm_neigh(skb, neigh);
 120		ret = neigh_output(neigh, skb);
 121		rcu_read_unlock_bh();
 122		return ret;
 123	}
 124	rcu_read_unlock_bh();
 125
 126	IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 127	kfree_skb(skb);
 128	return -EINVAL;
 129}
 130
 131static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 132{
 133	int ret;
 134
 135	ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
 136	if (ret) {
 137		kfree_skb(skb);
 138		return ret;
 139	}
 140
 141#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
 142	/* Policy lookup after SNAT yielded a new policy */
 143	if (skb_dst(skb)->xfrm) {
 144		IPCB(skb)->flags |= IPSKB_REROUTED;
 145		return dst_output(net, sk, skb);
 146	}
 147#endif
 148
 149	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 150	    dst_allfrag(skb_dst(skb)) ||
 151	    (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
 152		return ip6_fragment(net, sk, skb, ip6_finish_output2);
 153	else
 154		return ip6_finish_output2(net, sk, skb);
 155}
 156
 157int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 158{
 159	struct net_device *dev = skb_dst(skb)->dev;
 160	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 161
 162	skb->protocol = htons(ETH_P_IPV6);
 163	skb->dev = dev;
 164
 165	if (unlikely(idev->cnf.disable_ipv6)) {
 166		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
 167		kfree_skb(skb);
 168		return 0;
 169	}
 170
 171	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
 172			    net, sk, skb, NULL, dev,
 173			    ip6_finish_output,
 174			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 175}
 176
 177bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
 178{
 179	if (!np->autoflowlabel_set)
 180		return ip6_default_np_autolabel(net);
 181	else
 182		return np->autoflowlabel;
 183}
 184
 185/*
 186 * xmit an sk_buff (used by TCP, SCTP and DCCP)
 187 * Note : socket lock is not held for SYNACK packets, but might be modified
 188 * by calls to skb_set_owner_w() and ipv6_local_error(),
 189 * which are using proper atomic operations or spinlocks.
 190 */
 191int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 192	     __u32 mark, struct ipv6_txoptions *opt, int tclass)
 193{
 194	struct net *net = sock_net(sk);
 195	const struct ipv6_pinfo *np = inet6_sk(sk);
 196	struct in6_addr *first_hop = &fl6->daddr;
 197	struct dst_entry *dst = skb_dst(skb);
 198	struct ipv6hdr *hdr;
 199	u8  proto = fl6->flowi6_proto;
 200	int seg_len = skb->len;
 201	int hlimit = -1;
 202	u32 mtu;
 203
 204	if (opt) {
 205		unsigned int head_room;
 206
 207		/* First: exthdrs may take lots of space (~8K for now)
 208		   MAX_HEADER is not enough.
 209		 */
 210		head_room = opt->opt_nflen + opt->opt_flen;
 211		seg_len += head_room;
 212		head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
 213
 214		if (skb_headroom(skb) < head_room) {
 215			struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 216			if (!skb2) {
 217				IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 218					      IPSTATS_MIB_OUTDISCARDS);
 219				kfree_skb(skb);
 220				return -ENOBUFS;
 221			}
 222			consume_skb(skb);
 223			skb = skb2;
 224			/* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
 225			 * it is safe to call in our context (socket lock not held)
 226			 */
 227			skb_set_owner_w(skb, (struct sock *)sk);
 228		}
 229		if (opt->opt_flen)
 230			ipv6_push_frag_opts(skb, opt, &proto);
 231		if (opt->opt_nflen)
 232			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
 233					     &fl6->saddr);
 234	}
 235
 236	skb_push(skb, sizeof(struct ipv6hdr));
 237	skb_reset_network_header(skb);
 238	hdr = ipv6_hdr(skb);
 239
 240	/*
 241	 *	Fill in the IPv6 header
 242	 */
 243	if (np)
 244		hlimit = np->hop_limit;
 245	if (hlimit < 0)
 246		hlimit = ip6_dst_hoplimit(dst);
 247
 248	ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
 249				ip6_autoflowlabel(net, np), fl6));
 250
 251	hdr->payload_len = htons(seg_len);
 252	hdr->nexthdr = proto;
 253	hdr->hop_limit = hlimit;
 254
 255	hdr->saddr = fl6->saddr;
 256	hdr->daddr = *first_hop;
 257
 258	skb->protocol = htons(ETH_P_IPV6);
 259	skb->priority = sk->sk_priority;
 260	skb->mark = mark;
 261
 262	mtu = dst_mtu(dst);
 263	if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
 264		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
 265			      IPSTATS_MIB_OUT, skb->len);
 266
 267		/* if egress device is enslaved to an L3 master device pass the
 268		 * skb to its handler for processing
 269		 */
 270		skb = l3mdev_ip6_out((struct sock *)sk, skb);
 271		if (unlikely(!skb))
 272			return 0;
 273
 274		/* hooks should never assume socket lock is held.
 275		 * we promote our socket to non const
 276		 */
 277		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
 278			       net, (struct sock *)sk, skb, NULL, dst->dev,
 279			       dst_output);
 280	}
 281
 282	skb->dev = dst->dev;
 283	/* ipv6_local_error() does not require socket lock,
 284	 * we promote our socket to non const
 285	 */
 286	ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
 287
 288	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
 289	kfree_skb(skb);
 290	return -EMSGSIZE;
 291}
 292EXPORT_SYMBOL(ip6_xmit);
 293
 294static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 295{
 296	struct ip6_ra_chain *ra;
 297	struct sock *last = NULL;
 298
 299	read_lock(&ip6_ra_lock);
 300	for (ra = ip6_ra_chain; ra; ra = ra->next) {
 301		struct sock *sk = ra->sk;
 302		if (sk && ra->sel == sel &&
 303		    (!sk->sk_bound_dev_if ||
 304		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
 305			if (last) {
 306				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 307				if (skb2)
 308					rawv6_rcv(last, skb2);
 309			}
 310			last = sk;
 311		}
 312	}
 313
 314	if (last) {
 315		rawv6_rcv(last, skb);
 316		read_unlock(&ip6_ra_lock);
 317		return 1;
 318	}
 319	read_unlock(&ip6_ra_lock);
 320	return 0;
 321}
 322
 323static int ip6_forward_proxy_check(struct sk_buff *skb)
 324{
 325	struct ipv6hdr *hdr = ipv6_hdr(skb);
 326	u8 nexthdr = hdr->nexthdr;
 327	__be16 frag_off;
 328	int offset;
 329
 330	if (ipv6_ext_hdr(nexthdr)) {
 331		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
 332		if (offset < 0)
 333			return 0;
 334	} else
 335		offset = sizeof(struct ipv6hdr);
 336
 337	if (nexthdr == IPPROTO_ICMPV6) {
 338		struct icmp6hdr *icmp6;
 339
 340		if (!pskb_may_pull(skb, (skb_network_header(skb) +
 341					 offset + 1 - skb->data)))
 342			return 0;
 343
 344		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 345
 346		switch (icmp6->icmp6_type) {
 347		case NDISC_ROUTER_SOLICITATION:
 348		case NDISC_ROUTER_ADVERTISEMENT:
 349		case NDISC_NEIGHBOUR_SOLICITATION:
 350		case NDISC_NEIGHBOUR_ADVERTISEMENT:
 351		case NDISC_REDIRECT:
 352			/* For reaction involving unicast neighbor discovery
 353			 * message destined to the proxied address, pass it to
 354			 * input function.
 355			 */
 356			return 1;
 357		default:
 358			break;
 359		}
 360	}
 361
 362	/*
 363	 * The proxying router can't forward traffic sent to a link-local
 364	 * address, so signal the sender and discard the packet. This
 365	 * behavior is clarified by the MIPv6 specification.
 366	 */
 367	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
 368		dst_link_failure(skb);
 369		return -1;
 370	}
 371
 372	return 0;
 373}
 374
 375static inline int ip6_forward_finish(struct net *net, struct sock *sk,
 376				     struct sk_buff *skb)
 377{
 378	struct dst_entry *dst = skb_dst(skb);
 379
 380	__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 381	__IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
 382
 383	return dst_output(net, sk, skb);
 384}
 385
 386unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
 387{
 388	unsigned int mtu;
 389	struct inet6_dev *idev;
 390
 391	if (dst_metric_locked(dst, RTAX_MTU)) {
 392		mtu = dst_metric_raw(dst, RTAX_MTU);
 393		if (mtu)
 394			return mtu;
 395	}
 396
 397	mtu = IPV6_MIN_MTU;
 398	rcu_read_lock();
 399	idev = __in6_dev_get(dst->dev);
 400	if (idev)
 401		mtu = idev->cnf.mtu6;
 402	rcu_read_unlock();
 403
 404	return mtu;
 405}
 406EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward);
 407
 408static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
 409{
 410	if (skb->len <= mtu)
 411		return false;
 412
 413	/* ipv6 conntrack defrag sets max_frag_size + ignore_df */
 414	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
 415		return true;
 416
 417	if (skb->ignore_df)
 418		return false;
 419
 420	if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
 421		return false;
 422
 423	return true;
 424}
 425
 426int ip6_forward(struct sk_buff *skb)
 427{
 428	struct dst_entry *dst = skb_dst(skb);
 429	struct ipv6hdr *hdr = ipv6_hdr(skb);
 430	struct inet6_skb_parm *opt = IP6CB(skb);
 431	struct net *net = dev_net(dst->dev);
 432	u32 mtu;
 433
 434	if (net->ipv6.devconf_all->forwarding == 0)
 435		goto error;
 436
 437	if (skb->pkt_type != PACKET_HOST)
 438		goto drop;
 439
 440	if (unlikely(skb->sk))
 441		goto drop;
 442
 443	if (skb_warn_if_lro(skb))
 444		goto drop;
 445
 446	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
 447		__IP6_INC_STATS(net, ip6_dst_idev(dst),
 448				IPSTATS_MIB_INDISCARDS);
 449		goto drop;
 450	}
 451
 452	skb_forward_csum(skb);
 453
 454	/*
 455	 *	We DO NOT make any processing on
 456	 *	RA packets, pushing them to user level AS IS
 457	 *	without ane WARRANTY that application will be able
 458	 *	to interpret them. The reason is that we
 459	 *	cannot make anything clever here.
 460	 *
 461	 *	We are not end-node, so that if packet contains
 462	 *	AH/ESP, we cannot make anything.
 463	 *	Defragmentation also would be mistake, RA packets
 464	 *	cannot be fragmented, because there is no warranty
 465	 *	that different fragments will go along one path. --ANK
 466	 */
 467	if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
 468		if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
 469			return 0;
 470	}
 471
 472	/*
 473	 *	check and decrement ttl
 474	 */
 475	if (hdr->hop_limit <= 1) {
 476		/* Force OUTPUT device used as source address */
 477		skb->dev = dst->dev;
 478		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
 479		__IP6_INC_STATS(net, ip6_dst_idev(dst),
 480				IPSTATS_MIB_INHDRERRORS);
 481
 482		kfree_skb(skb);
 483		return -ETIMEDOUT;
 484	}
 485
 486	/* XXX: idev->cnf.proxy_ndp? */
 487	if (net->ipv6.devconf_all->proxy_ndp &&
 488	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
 489		int proxied = ip6_forward_proxy_check(skb);
 490		if (proxied > 0)
 491			return ip6_input(skb);
 492		else if (proxied < 0) {
 493			__IP6_INC_STATS(net, ip6_dst_idev(dst),
 494					IPSTATS_MIB_INDISCARDS);
 495			goto drop;
 496		}
 497	}
 498
 499	if (!xfrm6_route_forward(skb)) {
 500		__IP6_INC_STATS(net, ip6_dst_idev(dst),
 501				IPSTATS_MIB_INDISCARDS);
 502		goto drop;
 503	}
 504	dst = skb_dst(skb);
 505
 506	/* IPv6 specs say nothing about it, but it is clear that we cannot
 507	   send redirects to source routed frames.
 508	   We don't send redirects to frames decapsulated from IPsec.
 509	 */
 510	if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
 511		struct in6_addr *target = NULL;
 512		struct inet_peer *peer;
 513		struct rt6_info *rt;
 514
 515		/*
 516		 *	incoming and outgoing devices are the same
 517		 *	send a redirect.
 518		 */
 519
 520		rt = (struct rt6_info *) dst;
 521		if (rt->rt6i_flags & RTF_GATEWAY)
 522			target = &rt->rt6i_gateway;
 523		else
 524			target = &hdr->daddr;
 525
 526		peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
 527
 528		/* Limit redirects both by destination (here)
 529		   and by source (inside ndisc_send_redirect)
 530		 */
 531		if (inet_peer_xrlim_allow(peer, 1*HZ))
 532			ndisc_send_redirect(skb, target);
 533		if (peer)
 534			inet_putpeer(peer);
 535	} else {
 536		int addrtype = ipv6_addr_type(&hdr->saddr);
 537
 538		/* This check is security critical. */
 539		if (addrtype == IPV6_ADDR_ANY ||
 540		    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
 541			goto error;
 542		if (addrtype & IPV6_ADDR_LINKLOCAL) {
 543			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
 544				    ICMPV6_NOT_NEIGHBOUR, 0);
 545			goto error;
 546		}
 547	}
 548
 549	mtu = ip6_dst_mtu_forward(dst);
 550	if (mtu < IPV6_MIN_MTU)
 551		mtu = IPV6_MIN_MTU;
 552
 553	if (ip6_pkt_too_big(skb, mtu)) {
 554		/* Again, force OUTPUT device used as source address */
 555		skb->dev = dst->dev;
 556		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 557		__IP6_INC_STATS(net, ip6_dst_idev(dst),
 558				IPSTATS_MIB_INTOOBIGERRORS);
 559		__IP6_INC_STATS(net, ip6_dst_idev(dst),
 560				IPSTATS_MIB_FRAGFAILS);
 561		kfree_skb(skb);
 562		return -EMSGSIZE;
 563	}
 564
 565	if (skb_cow(skb, dst->dev->hard_header_len)) {
 566		__IP6_INC_STATS(net, ip6_dst_idev(dst),
 567				IPSTATS_MIB_OUTDISCARDS);
 568		goto drop;
 569	}
 570
 571	hdr = ipv6_hdr(skb);
 572
 573	/* Mangling hops number delayed to point after skb COW */
 574
 575	hdr->hop_limit--;
 576
 
 
 577	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
 578		       net, NULL, skb, skb->dev, dst->dev,
 579		       ip6_forward_finish);
 580
 581error:
 582	__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
 583drop:
 584	kfree_skb(skb);
 585	return -EINVAL;
 586}
 587
 588static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 589{
 590	to->pkt_type = from->pkt_type;
 591	to->priority = from->priority;
 592	to->protocol = from->protocol;
 593	skb_dst_drop(to);
 594	skb_dst_set(to, dst_clone(skb_dst(from)));
 595	to->dev = from->dev;
 596	to->mark = from->mark;
 597
 598#ifdef CONFIG_NET_SCHED
 599	to->tc_index = from->tc_index;
 600#endif
 601	nf_copy(to, from);
 602	skb_copy_secmark(to, from);
 603}
 604
 605int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 606		 int (*output)(struct net *, struct sock *, struct sk_buff *))
 607{
 608	struct sk_buff *frag;
 609	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
 610	struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
 611				inet6_sk(skb->sk) : NULL;
 612	struct ipv6hdr *tmp_hdr;
 613	struct frag_hdr *fh;
 614	unsigned int mtu, hlen, left, len;
 615	int hroom, troom;
 616	__be32 frag_id;
 617	int ptr, offset = 0, err = 0;
 618	u8 *prevhdr, nexthdr = 0;
 619
 620	err = ip6_find_1stfragopt(skb, &prevhdr);
 621	if (err < 0)
 622		goto fail;
 623	hlen = err;
 624	nexthdr = *prevhdr;
 625
 626	mtu = ip6_skb_dst_mtu(skb);
 627
 628	/* We must not fragment if the socket is set to force MTU discovery
 629	 * or if the skb it not generated by a local socket.
 630	 */
 631	if (unlikely(!skb->ignore_df && skb->len > mtu))
 632		goto fail_toobig;
 633
 634	if (IP6CB(skb)->frag_max_size) {
 635		if (IP6CB(skb)->frag_max_size > mtu)
 636			goto fail_toobig;
 637
 638		/* don't send fragments larger than what we received */
 639		mtu = IP6CB(skb)->frag_max_size;
 640		if (mtu < IPV6_MIN_MTU)
 641			mtu = IPV6_MIN_MTU;
 642	}
 643
 644	if (np && np->frag_size < mtu) {
 645		if (np->frag_size)
 646			mtu = np->frag_size;
 647	}
 648	if (mtu < hlen + sizeof(struct frag_hdr) + 8)
 649		goto fail_toobig;
 650	mtu -= hlen + sizeof(struct frag_hdr);
 651
 652	frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
 653				    &ipv6_hdr(skb)->saddr);
 654
 655	if (skb->ip_summed == CHECKSUM_PARTIAL &&
 656	    (err = skb_checksum_help(skb)))
 657		goto fail;
 658
 659	hroom = LL_RESERVED_SPACE(rt->dst.dev);
 660	if (skb_has_frag_list(skb)) {
 661		unsigned int first_len = skb_pagelen(skb);
 662		struct sk_buff *frag2;
 663
 664		if (first_len - hlen > mtu ||
 665		    ((first_len - hlen) & 7) ||
 666		    skb_cloned(skb) ||
 667		    skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
 668			goto slow_path;
 669
 670		skb_walk_frags(skb, frag) {
 671			/* Correct geometry. */
 672			if (frag->len > mtu ||
 673			    ((frag->len & 7) && frag->next) ||
 674			    skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
 675				goto slow_path_clean;
 676
 677			/* Partially cloned skb? */
 678			if (skb_shared(frag))
 679				goto slow_path_clean;
 680
 681			BUG_ON(frag->sk);
 682			if (skb->sk) {
 683				frag->sk = skb->sk;
 684				frag->destructor = sock_wfree;
 685			}
 686			skb->truesize -= frag->truesize;
 687		}
 688
 689		err = 0;
 690		offset = 0;
 691		/* BUILD HEADER */
 692
 693		*prevhdr = NEXTHDR_FRAGMENT;
 694		tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 695		if (!tmp_hdr) {
 
 
 696			err = -ENOMEM;
 697			goto fail;
 698		}
 699		frag = skb_shinfo(skb)->frag_list;
 700		skb_frag_list_init(skb);
 701
 702		__skb_pull(skb, hlen);
 703		fh = __skb_push(skb, sizeof(struct frag_hdr));
 704		__skb_push(skb, hlen);
 705		skb_reset_network_header(skb);
 706		memcpy(skb_network_header(skb), tmp_hdr, hlen);
 707
 708		fh->nexthdr = nexthdr;
 709		fh->reserved = 0;
 710		fh->frag_off = htons(IP6_MF);
 711		fh->identification = frag_id;
 712
 713		first_len = skb_pagelen(skb);
 714		skb->data_len = first_len - skb_headlen(skb);
 715		skb->len = first_len;
 716		ipv6_hdr(skb)->payload_len = htons(first_len -
 717						   sizeof(struct ipv6hdr));
 718
 
 
 719		for (;;) {
 720			/* Prepare header of the next frame,
 721			 * before previous one went down. */
 722			if (frag) {
 723				frag->ip_summed = CHECKSUM_NONE;
 724				skb_reset_transport_header(frag);
 725				fh = __skb_push(frag, sizeof(struct frag_hdr));
 726				__skb_push(frag, hlen);
 727				skb_reset_network_header(frag);
 728				memcpy(skb_network_header(frag), tmp_hdr,
 729				       hlen);
 730				offset += skb->len - hlen - sizeof(struct frag_hdr);
 731				fh->nexthdr = nexthdr;
 732				fh->reserved = 0;
 733				fh->frag_off = htons(offset);
 734				if (frag->next)
 735					fh->frag_off |= htons(IP6_MF);
 736				fh->identification = frag_id;
 737				ipv6_hdr(frag)->payload_len =
 738						htons(frag->len -
 739						      sizeof(struct ipv6hdr));
 740				ip6_copy_metadata(frag, skb);
 741			}
 742
 743			err = output(net, sk, skb);
 744			if (!err)
 745				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 746					      IPSTATS_MIB_FRAGCREATES);
 747
 748			if (err || !frag)
 749				break;
 750
 751			skb = frag;
 752			frag = skb->next;
 753			skb->next = NULL;
 754		}
 755
 756		kfree(tmp_hdr);
 757
 758		if (err == 0) {
 759			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 760				      IPSTATS_MIB_FRAGOKS);
 
 761			return 0;
 762		}
 763
 764		kfree_skb_list(frag);
 765
 766		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 767			      IPSTATS_MIB_FRAGFAILS);
 
 768		return err;
 769
 770slow_path_clean:
 771		skb_walk_frags(skb, frag2) {
 772			if (frag2 == frag)
 773				break;
 774			frag2->sk = NULL;
 775			frag2->destructor = NULL;
 776			skb->truesize += frag2->truesize;
 777		}
 778	}
 779
 780slow_path:
 781	left = skb->len - hlen;		/* Space per frame */
 782	ptr = hlen;			/* Where to start from */
 783
 784	/*
 785	 *	Fragment the datagram.
 786	 */
 787
 788	troom = rt->dst.dev->needed_tailroom;
 789
 790	/*
 791	 *	Keep copying data until we run out.
 792	 */
 793	while (left > 0)	{
 794		u8 *fragnexthdr_offset;
 795
 796		len = left;
 797		/* IF: it doesn't fit, use 'mtu' - the data space left */
 798		if (len > mtu)
 799			len = mtu;
 800		/* IF: we are not sending up to and including the packet end
 801		   then align the next start on an eight byte boundary */
 802		if (len < left)	{
 803			len &= ~7;
 804		}
 805
 806		/* Allocate buffer */
 807		frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
 808				 hroom + troom, GFP_ATOMIC);
 809		if (!frag) {
 
 
 810			err = -ENOMEM;
 811			goto fail;
 812		}
 813
 814		/*
 815		 *	Set up data on packet
 816		 */
 817
 818		ip6_copy_metadata(frag, skb);
 819		skb_reserve(frag, hroom);
 820		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
 821		skb_reset_network_header(frag);
 822		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
 823		frag->transport_header = (frag->network_header + hlen +
 824					  sizeof(struct frag_hdr));
 825
 826		/*
 827		 *	Charge the memory for the fragment to any owner
 828		 *	it might possess
 829		 */
 830		if (skb->sk)
 831			skb_set_owner_w(frag, skb->sk);
 832
 833		/*
 834		 *	Copy the packet header into the new buffer.
 835		 */
 836		skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
 837
 838		fragnexthdr_offset = skb_network_header(frag);
 839		fragnexthdr_offset += prevhdr - skb_network_header(skb);
 840		*fragnexthdr_offset = NEXTHDR_FRAGMENT;
 841
 842		/*
 843		 *	Build fragment header.
 844		 */
 845		fh->nexthdr = nexthdr;
 846		fh->reserved = 0;
 847		fh->identification = frag_id;
 848
 849		/*
 850		 *	Copy a block of the IP datagram.
 851		 */
 852		BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
 853				     len));
 854		left -= len;
 855
 856		fh->frag_off = htons(offset);
 857		if (left > 0)
 858			fh->frag_off |= htons(IP6_MF);
 859		ipv6_hdr(frag)->payload_len = htons(frag->len -
 860						    sizeof(struct ipv6hdr));
 861
 862		ptr += len;
 863		offset += len;
 864
 865		/*
 866		 *	Put this fragment into the sending queue.
 867		 */
 868		err = output(net, sk, frag);
 869		if (err)
 870			goto fail;
 871
 872		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 873			      IPSTATS_MIB_FRAGCREATES);
 874	}
 875	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 876		      IPSTATS_MIB_FRAGOKS);
 877	consume_skb(skb);
 878	return err;
 879
 880fail_toobig:
 881	if (skb->sk && dst_allfrag(skb_dst(skb)))
 882		sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
 883
 
 884	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 885	err = -EMSGSIZE;
 886
 887fail:
 888	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 889		      IPSTATS_MIB_FRAGFAILS);
 890	kfree_skb(skb);
 891	return err;
 892}
 893
 894static inline int ip6_rt_check(const struct rt6key *rt_key,
 895			       const struct in6_addr *fl_addr,
 896			       const struct in6_addr *addr_cache)
 897{
 898	return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
 899		(!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
 900}
 901
 902static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 903					  struct dst_entry *dst,
 904					  const struct flowi6 *fl6)
 905{
 906	struct ipv6_pinfo *np = inet6_sk(sk);
 907	struct rt6_info *rt;
 908
 909	if (!dst)
 910		goto out;
 911
 912	if (dst->ops->family != AF_INET6) {
 913		dst_release(dst);
 914		return NULL;
 915	}
 916
 917	rt = (struct rt6_info *)dst;
 918	/* Yes, checking route validity in not connected
 919	 * case is not very simple. Take into account,
 920	 * that we do not support routing by source, TOS,
 921	 * and MSG_DONTROUTE		--ANK (980726)
 922	 *
 923	 * 1. ip6_rt_check(): If route was host route,
 924	 *    check that cached destination is current.
 925	 *    If it is network route, we still may
 926	 *    check its validity using saved pointer
 927	 *    to the last used address: daddr_cache.
 928	 *    We do not want to save whole address now,
 929	 *    (because main consumer of this service
 930	 *    is tcp, which has not this problem),
 931	 *    so that the last trick works only on connected
 932	 *    sockets.
 933	 * 2. oif also should be the same.
 934	 */
 935	if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
 936#ifdef CONFIG_IPV6_SUBTREES
 937	    ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
 938#endif
 939	   (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
 940	      (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
 941		dst_release(dst);
 942		dst = NULL;
 943	}
 944
 945out:
 946	return dst;
 947}
 948
 949static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
 950			       struct dst_entry **dst, struct flowi6 *fl6)
 951{
 952#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 953	struct neighbour *n;
 954	struct rt6_info *rt;
 955#endif
 956	int err;
 957	int flags = 0;
 958
 959	/* The correct way to handle this would be to do
 960	 * ip6_route_get_saddr, and then ip6_route_output; however,
 961	 * the route-specific preferred source forces the
 962	 * ip6_route_output call _before_ ip6_route_get_saddr.
 963	 *
 964	 * In source specific routing (no src=any default route),
 965	 * ip6_route_output will fail given src=any saddr, though, so
 966	 * that's why we try it again later.
 967	 */
 968	if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
 969		struct rt6_info *rt;
 970		bool had_dst = *dst != NULL;
 971
 972		if (!had_dst)
 973			*dst = ip6_route_output(net, sk, fl6);
 974		rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
 975		err = ip6_route_get_saddr(net, rt, &fl6->daddr,
 976					  sk ? inet6_sk(sk)->srcprefs : 0,
 977					  &fl6->saddr);
 978		if (err)
 979			goto out_err_release;
 980
 981		/* If we had an erroneous initial result, pretend it
 982		 * never existed and let the SA-enabled version take
 983		 * over.
 984		 */
 985		if (!had_dst && (*dst)->error) {
 986			dst_release(*dst);
 987			*dst = NULL;
 988		}
 989
 990		if (fl6->flowi6_oif)
 991			flags |= RT6_LOOKUP_F_IFACE;
 992	}
 993
 994	if (!*dst)
 995		*dst = ip6_route_output_flags(net, sk, fl6, flags);
 996
 997	err = (*dst)->error;
 998	if (err)
 999		goto out_err_release;
1000
1001#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1002	/*
1003	 * Here if the dst entry we've looked up
1004	 * has a neighbour entry that is in the INCOMPLETE
1005	 * state and the src address from the flow is
1006	 * marked as OPTIMISTIC, we release the found
1007	 * dst entry and replace it instead with the
1008	 * dst entry of the nexthop router
1009	 */
1010	rt = (struct rt6_info *) *dst;
1011	rcu_read_lock_bh();
1012	n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1013				      rt6_nexthop(rt, &fl6->daddr));
1014	err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1015	rcu_read_unlock_bh();
1016
1017	if (err) {
1018		struct inet6_ifaddr *ifp;
1019		struct flowi6 fl_gw6;
1020		int redirect;
1021
1022		ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1023				      (*dst)->dev, 1);
1024
1025		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1026		if (ifp)
1027			in6_ifa_put(ifp);
1028
1029		if (redirect) {
1030			/*
1031			 * We need to get the dst entry for the
1032			 * default router instead
1033			 */
1034			dst_release(*dst);
1035			memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1036			memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1037			*dst = ip6_route_output(net, sk, &fl_gw6);
1038			err = (*dst)->error;
1039			if (err)
1040				goto out_err_release;
1041		}
1042	}
1043#endif
1044	if (ipv6_addr_v4mapped(&fl6->saddr) &&
1045	    !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1046		err = -EAFNOSUPPORT;
1047		goto out_err_release;
1048	}
1049
1050	return 0;
1051
1052out_err_release:
1053	dst_release(*dst);
1054	*dst = NULL;
1055
1056	if (err == -ENETUNREACH)
1057		IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1058	return err;
1059}
1060
1061/**
1062 *	ip6_dst_lookup - perform route lookup on flow
1063 *	@sk: socket which provides route info
1064 *	@dst: pointer to dst_entry * for result
1065 *	@fl6: flow to lookup
1066 *
1067 *	This function performs a route lookup on the given flow.
1068 *
1069 *	It returns zero on success, or a standard errno code on error.
1070 */
1071int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1072		   struct flowi6 *fl6)
1073{
1074	*dst = NULL;
1075	return ip6_dst_lookup_tail(net, sk, dst, fl6);
1076}
1077EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1078
1079/**
1080 *	ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1081 *	@sk: socket which provides route info
1082 *	@fl6: flow to lookup
1083 *	@final_dst: final destination address for ipsec lookup
1084 *
1085 *	This function performs a route lookup on the given flow.
1086 *
1087 *	It returns a valid dst pointer on success, or a pointer encoded
1088 *	error code.
1089 */
1090struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
1091				      const struct in6_addr *final_dst)
1092{
1093	struct dst_entry *dst = NULL;
1094	int err;
1095
1096	err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
1097	if (err)
1098		return ERR_PTR(err);
1099	if (final_dst)
1100		fl6->daddr = *final_dst;
1101
1102	return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1103}
1104EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1105
1106/**
1107 *	ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1108 *	@sk: socket which provides the dst cache and route info
1109 *	@fl6: flow to lookup
1110 *	@final_dst: final destination address for ipsec lookup
1111 *	@connected: whether @sk is connected or not
1112 *
1113 *	This function performs a route lookup on the given flow with the
1114 *	possibility of using the cached route in the socket if it is valid.
1115 *	It will take the socket dst lock when operating on the dst cache.
1116 *	As a result, this function can only be used in process context.
1117 *
1118 *	In addition, for a connected socket, cache the dst in the socket
1119 *	if the current cache is not valid.
1120 *
1121 *	It returns a valid dst pointer on success, or a pointer encoded
1122 *	error code.
1123 */
1124struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1125					 const struct in6_addr *final_dst,
1126					 bool connected)
1127{
1128	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1129
1130	dst = ip6_sk_dst_check(sk, dst, fl6);
1131	if (dst)
1132		return dst;
1133
1134	dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
1135	if (connected && !IS_ERR(dst))
1136		ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1137
1138	return dst;
1139}
1140EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1142static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1143					       gfp_t gfp)
1144{
1145	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1146}
1147
1148static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1149						gfp_t gfp)
1150{
1151	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1152}
1153
1154static void ip6_append_data_mtu(unsigned int *mtu,
1155				int *maxfraglen,
1156				unsigned int fragheaderlen,
1157				struct sk_buff *skb,
1158				struct rt6_info *rt,
1159				unsigned int orig_mtu)
1160{
1161	if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1162		if (!skb) {
1163			/* first fragment, reserve header_len */
1164			*mtu = orig_mtu - rt->dst.header_len;
1165
1166		} else {
1167			/*
1168			 * this fragment is not first, the headers
1169			 * space is regarded as data space.
1170			 */
1171			*mtu = orig_mtu;
1172		}
1173		*maxfraglen = ((*mtu - fragheaderlen) & ~7)
1174			      + fragheaderlen - sizeof(struct frag_hdr);
1175	}
1176}
1177
1178static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1179			  struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1180			  struct rt6_info *rt, struct flowi6 *fl6)
1181{
1182	struct ipv6_pinfo *np = inet6_sk(sk);
1183	unsigned int mtu;
1184	struct ipv6_txoptions *opt = ipc6->opt;
1185
1186	/*
1187	 * setup for corking
1188	 */
1189	if (opt) {
1190		if (WARN_ON(v6_cork->opt))
1191			return -EINVAL;
1192
1193		v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1194		if (unlikely(!v6_cork->opt))
1195			return -ENOBUFS;
1196
1197		v6_cork->opt->tot_len = sizeof(*opt);
1198		v6_cork->opt->opt_flen = opt->opt_flen;
1199		v6_cork->opt->opt_nflen = opt->opt_nflen;
1200
1201		v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1202						    sk->sk_allocation);
1203		if (opt->dst0opt && !v6_cork->opt->dst0opt)
1204			return -ENOBUFS;
1205
1206		v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1207						    sk->sk_allocation);
1208		if (opt->dst1opt && !v6_cork->opt->dst1opt)
1209			return -ENOBUFS;
1210
1211		v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1212						   sk->sk_allocation);
1213		if (opt->hopopt && !v6_cork->opt->hopopt)
1214			return -ENOBUFS;
1215
1216		v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1217						    sk->sk_allocation);
1218		if (opt->srcrt && !v6_cork->opt->srcrt)
1219			return -ENOBUFS;
1220
1221		/* need source address above miyazawa*/
1222	}
1223	dst_hold(&rt->dst);
1224	cork->base.dst = &rt->dst;
1225	cork->fl.u.ip6 = *fl6;
1226	v6_cork->hop_limit = ipc6->hlimit;
1227	v6_cork->tclass = ipc6->tclass;
1228	if (rt->dst.flags & DST_XFRM_TUNNEL)
1229		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1230		      READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1231	else
1232		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1233			READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1234	if (np->frag_size < mtu) {
1235		if (np->frag_size)
1236			mtu = np->frag_size;
1237	}
1238	if (mtu < IPV6_MIN_MTU)
1239		return -EINVAL;
1240	cork->base.fragsize = mtu;
1241	if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1242		cork->base.flags |= IPCORK_ALLFRAG;
1243	cork->base.length = 0;
1244
1245	return 0;
1246}
1247
1248static int __ip6_append_data(struct sock *sk,
1249			     struct flowi6 *fl6,
1250			     struct sk_buff_head *queue,
1251			     struct inet_cork *cork,
1252			     struct inet6_cork *v6_cork,
1253			     struct page_frag *pfrag,
1254			     int getfrag(void *from, char *to, int offset,
1255					 int len, int odd, struct sk_buff *skb),
1256			     void *from, int length, int transhdrlen,
1257			     unsigned int flags, struct ipcm6_cookie *ipc6,
1258			     const struct sockcm_cookie *sockc)
1259{
1260	struct sk_buff *skb, *skb_prev = NULL;
1261	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1262	int exthdrlen = 0;
1263	int dst_exthdrlen = 0;
1264	int hh_len;
1265	int copy;
1266	int err;
1267	int offset = 0;
1268	__u8 tx_flags = 0;
1269	u32 tskey = 0;
1270	struct rt6_info *rt = (struct rt6_info *)cork->dst;
1271	struct ipv6_txoptions *opt = v6_cork->opt;
1272	int csummode = CHECKSUM_NONE;
1273	unsigned int maxnonfragsize, headersize;
1274	unsigned int wmem_alloc_delta = 0;
1275
1276	skb = skb_peek_tail(queue);
1277	if (!skb) {
1278		exthdrlen = opt ? opt->opt_flen : 0;
1279		dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1280	}
1281
1282	mtu = cork->fragsize;
1283	orig_mtu = mtu;
1284
1285	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1286
1287	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1288			(opt ? opt->opt_nflen : 0);
1289	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1290		     sizeof(struct frag_hdr);
1291
1292	headersize = sizeof(struct ipv6hdr) +
1293		     (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1294		     (dst_allfrag(&rt->dst) ?
1295		      sizeof(struct frag_hdr) : 0) +
1296		     rt->rt6i_nfheader_len;
1297
1298	/* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1299	 * the first fragment
1300	 */
1301	if (headersize + transhdrlen > mtu)
1302		goto emsgsize;
1303
1304	if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1305	    (sk->sk_protocol == IPPROTO_UDP ||
1306	     sk->sk_protocol == IPPROTO_RAW)) {
1307		ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1308				sizeof(struct ipv6hdr));
1309		goto emsgsize;
1310	}
1311
1312	if (ip6_sk_ignore_df(sk))
1313		maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1314	else
1315		maxnonfragsize = mtu;
1316
1317	if (cork->length + length > maxnonfragsize - headersize) {
1318emsgsize:
1319		pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1320		ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
 
1321		return -EMSGSIZE;
1322	}
1323
1324	/* CHECKSUM_PARTIAL only with no extension headers and when
1325	 * we are not going to fragment
1326	 */
1327	if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1328	    headersize == sizeof(struct ipv6hdr) &&
1329	    length <= mtu - headersize &&
1330	    !(flags & MSG_MORE) &&
1331	    rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1332		csummode = CHECKSUM_PARTIAL;
1333
1334	if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
1335		sock_tx_timestamp(sk, sockc->tsflags, &tx_flags);
1336		if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1337		    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1338			tskey = sk->sk_tskey++;
1339	}
1340
1341	/*
1342	 * Let's try using as much space as possible.
1343	 * Use MTU if total length of the message fits into the MTU.
1344	 * Otherwise, we need to reserve fragment header and
1345	 * fragment alignment (= 8-15 octects, in total).
1346	 *
1347	 * Note that we may need to "move" the data from the tail of
1348	 * of the buffer to the new fragment when we split
1349	 * the message.
1350	 *
1351	 * FIXME: It may be fragmented into multiple chunks
1352	 *        at once if non-fragmentable extension headers
1353	 *        are too large.
1354	 * --yoshfuji
1355	 */
1356
1357	cork->length += length;
 
 
 
 
 
 
 
 
 
 
 
 
 
1358	if (!skb)
1359		goto alloc_new_skb;
1360
1361	while (length > 0) {
1362		/* Check if the remaining data fits into current packet. */
1363		copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1364		if (copy < length)
1365			copy = maxfraglen - skb->len;
1366
1367		if (copy <= 0) {
1368			char *data;
1369			unsigned int datalen;
1370			unsigned int fraglen;
1371			unsigned int fraggap;
1372			unsigned int alloclen;
1373alloc_new_skb:
1374			/* There's no room in the current skb */
1375			if (skb)
1376				fraggap = skb->len - maxfraglen;
1377			else
1378				fraggap = 0;
1379			/* update mtu and maxfraglen if necessary */
1380			if (!skb || !skb_prev)
1381				ip6_append_data_mtu(&mtu, &maxfraglen,
1382						    fragheaderlen, skb, rt,
1383						    orig_mtu);
1384
1385			skb_prev = skb;
1386
1387			/*
1388			 * If remaining data exceeds the mtu,
1389			 * we know we need more fragment(s).
1390			 */
1391			datalen = length + fraggap;
1392
1393			if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1394				datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1395			if ((flags & MSG_MORE) &&
1396			    !(rt->dst.dev->features&NETIF_F_SG))
1397				alloclen = mtu;
1398			else
1399				alloclen = datalen + fragheaderlen;
1400
1401			alloclen += dst_exthdrlen;
1402
1403			if (datalen != length + fraggap) {
1404				/*
1405				 * this is not the last fragment, the trailer
1406				 * space is regarded as data space.
1407				 */
1408				datalen += rt->dst.trailer_len;
1409			}
1410
1411			alloclen += rt->dst.trailer_len;
1412			fraglen = datalen + fragheaderlen;
1413
1414			/*
1415			 * We just reserve space for fragment header.
1416			 * Note: this may be overallocation if the message
1417			 * (without MSG_MORE) fits into the MTU.
1418			 */
1419			alloclen += sizeof(struct frag_hdr);
1420
1421			copy = datalen - transhdrlen - fraggap;
1422			if (copy < 0) {
1423				err = -EINVAL;
1424				goto error;
1425			}
1426			if (transhdrlen) {
1427				skb = sock_alloc_send_skb(sk,
1428						alloclen + hh_len,
1429						(flags & MSG_DONTWAIT), &err);
1430			} else {
1431				skb = NULL;
1432				if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1433				    2 * sk->sk_sndbuf)
1434					skb = alloc_skb(alloclen + hh_len,
1435							sk->sk_allocation);
 
1436				if (unlikely(!skb))
1437					err = -ENOBUFS;
1438			}
1439			if (!skb)
1440				goto error;
1441			/*
1442			 *	Fill in the control structures
1443			 */
1444			skb->protocol = htons(ETH_P_IPV6);
1445			skb->ip_summed = csummode;
1446			skb->csum = 0;
1447			/* reserve for fragmentation and ipsec header */
1448			skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1449				    dst_exthdrlen);
1450
1451			/* Only the initial fragment is time stamped */
1452			skb_shinfo(skb)->tx_flags = tx_flags;
1453			tx_flags = 0;
1454			skb_shinfo(skb)->tskey = tskey;
1455			tskey = 0;
1456
1457			/*
1458			 *	Find where to start putting bytes
1459			 */
1460			data = skb_put(skb, fraglen);
1461			skb_set_network_header(skb, exthdrlen);
1462			data += fragheaderlen;
1463			skb->transport_header = (skb->network_header +
1464						 fragheaderlen);
1465			if (fraggap) {
1466				skb->csum = skb_copy_and_csum_bits(
1467					skb_prev, maxfraglen,
1468					data + transhdrlen, fraggap, 0);
1469				skb_prev->csum = csum_sub(skb_prev->csum,
1470							  skb->csum);
1471				data += fraggap;
1472				pskb_trim_unique(skb_prev, maxfraglen);
1473			}
1474			if (copy > 0 &&
1475			    getfrag(from, data + transhdrlen, offset,
1476				    copy, fraggap, skb) < 0) {
 
 
 
 
1477				err = -EFAULT;
1478				kfree_skb(skb);
1479				goto error;
1480			}
1481
1482			offset += copy;
1483			length -= datalen - fraggap;
1484			transhdrlen = 0;
1485			exthdrlen = 0;
1486			dst_exthdrlen = 0;
1487
1488			if ((flags & MSG_CONFIRM) && !skb_prev)
1489				skb_set_dst_pending_confirm(skb, 1);
1490
1491			/*
1492			 * Put the packet on the pending queue
1493			 */
1494			if (!skb->destructor) {
1495				skb->destructor = sock_wfree;
1496				skb->sk = sk;
1497				wmem_alloc_delta += skb->truesize;
1498			}
1499			__skb_queue_tail(queue, skb);
1500			continue;
1501		}
1502
1503		if (copy > length)
1504			copy = length;
1505
1506		if (!(rt->dst.dev->features&NETIF_F_SG) &&
1507		    skb_tailroom(skb) >= copy) {
1508			unsigned int off;
1509
1510			off = skb->len;
1511			if (getfrag(from, skb_put(skb, copy),
1512						offset, copy, off, skb) < 0) {
1513				__skb_trim(skb, off);
1514				err = -EFAULT;
1515				goto error;
1516			}
1517		} else {
1518			int i = skb_shinfo(skb)->nr_frags;
1519
1520			err = -ENOMEM;
1521			if (!sk_page_frag_refill(sk, pfrag))
1522				goto error;
1523
1524			if (!skb_can_coalesce(skb, i, pfrag->page,
1525					      pfrag->offset)) {
1526				err = -EMSGSIZE;
1527				if (i == MAX_SKB_FRAGS)
1528					goto error;
1529
1530				__skb_fill_page_desc(skb, i, pfrag->page,
1531						     pfrag->offset, 0);
1532				skb_shinfo(skb)->nr_frags = ++i;
1533				get_page(pfrag->page);
1534			}
1535			copy = min_t(int, copy, pfrag->size - pfrag->offset);
1536			if (getfrag(from,
1537				    page_address(pfrag->page) + pfrag->offset,
1538				    offset, copy, skb->len, skb) < 0)
1539				goto error_efault;
1540
1541			pfrag->offset += copy;
1542			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1543			skb->len += copy;
1544			skb->data_len += copy;
1545			skb->truesize += copy;
1546			wmem_alloc_delta += copy;
1547		}
1548		offset += copy;
1549		length -= copy;
1550	}
1551
1552	if (wmem_alloc_delta)
1553		refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1554	return 0;
1555
1556error_efault:
1557	err = -EFAULT;
1558error:
1559	cork->length -= length;
1560	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1561	refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1562	return err;
1563}
1564
1565int ip6_append_data(struct sock *sk,
1566		    int getfrag(void *from, char *to, int offset, int len,
1567				int odd, struct sk_buff *skb),
1568		    void *from, int length, int transhdrlen,
1569		    struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1570		    struct rt6_info *rt, unsigned int flags,
1571		    const struct sockcm_cookie *sockc)
1572{
1573	struct inet_sock *inet = inet_sk(sk);
1574	struct ipv6_pinfo *np = inet6_sk(sk);
1575	int exthdrlen;
1576	int err;
1577
1578	if (flags&MSG_PROBE)
1579		return 0;
1580	if (skb_queue_empty(&sk->sk_write_queue)) {
1581		/*
1582		 * setup for corking
1583		 */
1584		err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1585				     ipc6, rt, fl6);
1586		if (err)
1587			return err;
1588
1589		exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1590		length += exthdrlen;
1591		transhdrlen += exthdrlen;
1592	} else {
1593		fl6 = &inet->cork.fl.u.ip6;
1594		transhdrlen = 0;
1595	}
1596
1597	return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1598				 &np->cork, sk_page_frag(sk), getfrag,
1599				 from, length, transhdrlen, flags, ipc6, sockc);
1600}
1601EXPORT_SYMBOL_GPL(ip6_append_data);
1602
1603static void ip6_cork_release(struct inet_cork_full *cork,
1604			     struct inet6_cork *v6_cork)
1605{
1606	if (v6_cork->opt) {
1607		kfree(v6_cork->opt->dst0opt);
1608		kfree(v6_cork->opt->dst1opt);
1609		kfree(v6_cork->opt->hopopt);
1610		kfree(v6_cork->opt->srcrt);
1611		kfree(v6_cork->opt);
1612		v6_cork->opt = NULL;
1613	}
1614
1615	if (cork->base.dst) {
1616		dst_release(cork->base.dst);
1617		cork->base.dst = NULL;
1618		cork->base.flags &= ~IPCORK_ALLFRAG;
1619	}
1620	memset(&cork->fl, 0, sizeof(cork->fl));
1621}
1622
1623struct sk_buff *__ip6_make_skb(struct sock *sk,
1624			       struct sk_buff_head *queue,
1625			       struct inet_cork_full *cork,
1626			       struct inet6_cork *v6_cork)
1627{
1628	struct sk_buff *skb, *tmp_skb;
1629	struct sk_buff **tail_skb;
1630	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1631	struct ipv6_pinfo *np = inet6_sk(sk);
1632	struct net *net = sock_net(sk);
1633	struct ipv6hdr *hdr;
1634	struct ipv6_txoptions *opt = v6_cork->opt;
1635	struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1636	struct flowi6 *fl6 = &cork->fl.u.ip6;
1637	unsigned char proto = fl6->flowi6_proto;
1638
1639	skb = __skb_dequeue(queue);
1640	if (!skb)
1641		goto out;
1642	tail_skb = &(skb_shinfo(skb)->frag_list);
1643
1644	/* move skb->data to ip header from ext header */
1645	if (skb->data < skb_network_header(skb))
1646		__skb_pull(skb, skb_network_offset(skb));
1647	while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1648		__skb_pull(tmp_skb, skb_network_header_len(skb));
1649		*tail_skb = tmp_skb;
1650		tail_skb = &(tmp_skb->next);
1651		skb->len += tmp_skb->len;
1652		skb->data_len += tmp_skb->len;
1653		skb->truesize += tmp_skb->truesize;
1654		tmp_skb->destructor = NULL;
1655		tmp_skb->sk = NULL;
1656	}
1657
1658	/* Allow local fragmentation. */
1659	skb->ignore_df = ip6_sk_ignore_df(sk);
1660
1661	*final_dst = fl6->daddr;
1662	__skb_pull(skb, skb_network_header_len(skb));
1663	if (opt && opt->opt_flen)
1664		ipv6_push_frag_opts(skb, opt, &proto);
1665	if (opt && opt->opt_nflen)
1666		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1667
1668	skb_push(skb, sizeof(struct ipv6hdr));
1669	skb_reset_network_header(skb);
1670	hdr = ipv6_hdr(skb);
1671
1672	ip6_flow_hdr(hdr, v6_cork->tclass,
1673		     ip6_make_flowlabel(net, skb, fl6->flowlabel,
1674					ip6_autoflowlabel(net, np), fl6));
1675	hdr->hop_limit = v6_cork->hop_limit;
1676	hdr->nexthdr = proto;
1677	hdr->saddr = fl6->saddr;
1678	hdr->daddr = *final_dst;
1679
1680	skb->priority = sk->sk_priority;
1681	skb->mark = sk->sk_mark;
1682
1683	skb_dst_set(skb, dst_clone(&rt->dst));
1684	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1685	if (proto == IPPROTO_ICMPV6) {
1686		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1687
1688		ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1689		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1690	}
1691
1692	ip6_cork_release(cork, v6_cork);
1693out:
1694	return skb;
1695}
1696
1697int ip6_send_skb(struct sk_buff *skb)
1698{
1699	struct net *net = sock_net(skb->sk);
1700	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1701	int err;
1702
1703	err = ip6_local_out(net, skb->sk, skb);
1704	if (err) {
1705		if (err > 0)
1706			err = net_xmit_errno(err);
1707		if (err)
1708			IP6_INC_STATS(net, rt->rt6i_idev,
1709				      IPSTATS_MIB_OUTDISCARDS);
1710	}
1711
1712	return err;
1713}
1714
1715int ip6_push_pending_frames(struct sock *sk)
1716{
1717	struct sk_buff *skb;
1718
1719	skb = ip6_finish_skb(sk);
1720	if (!skb)
1721		return 0;
1722
1723	return ip6_send_skb(skb);
1724}
1725EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1726
1727static void __ip6_flush_pending_frames(struct sock *sk,
1728				       struct sk_buff_head *queue,
1729				       struct inet_cork_full *cork,
1730				       struct inet6_cork *v6_cork)
1731{
1732	struct sk_buff *skb;
1733
1734	while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1735		if (skb_dst(skb))
1736			IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1737				      IPSTATS_MIB_OUTDISCARDS);
1738		kfree_skb(skb);
1739	}
1740
1741	ip6_cork_release(cork, v6_cork);
1742}
1743
1744void ip6_flush_pending_frames(struct sock *sk)
1745{
1746	__ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1747				   &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1748}
1749EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1750
1751struct sk_buff *ip6_make_skb(struct sock *sk,
1752			     int getfrag(void *from, char *to, int offset,
1753					 int len, int odd, struct sk_buff *skb),
1754			     void *from, int length, int transhdrlen,
1755			     struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1756			     struct rt6_info *rt, unsigned int flags,
1757			     const struct sockcm_cookie *sockc)
1758{
1759	struct inet_cork_full cork;
1760	struct inet6_cork v6_cork;
1761	struct sk_buff_head queue;
1762	int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1763	int err;
1764
1765	if (flags & MSG_PROBE)
1766		return NULL;
1767
1768	__skb_queue_head_init(&queue);
1769
1770	cork.base.flags = 0;
1771	cork.base.addr = 0;
1772	cork.base.opt = NULL;
1773	cork.base.dst = NULL;
1774	v6_cork.opt = NULL;
1775	err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
1776	if (err) {
1777		ip6_cork_release(&cork, &v6_cork);
1778		return ERR_PTR(err);
1779	}
1780	if (ipc6->dontfrag < 0)
1781		ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1782
1783	err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1784				&current->task_frag, getfrag, from,
1785				length + exthdrlen, transhdrlen + exthdrlen,
1786				flags, ipc6, sockc);
1787	if (err) {
1788		__ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1789		return ERR_PTR(err);
1790	}
1791
1792	return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
1793}

   1/*
   2 *	IPv6 output functions
   3 *	Linux INET6 implementation
   4 *
   5 *	Authors:
   6 *	Pedro Roque		<roque@di.fc.ul.pt>
   7 *
   8 *	Based on linux/net/ipv4/ip_output.c
   9 *
  10 *	This program is free software; you can redistribute it and/or
  11 *      modify it under the terms of the GNU General Public License
  12 *      as published by the Free Software Foundation; either version
  13 *      2 of the License, or (at your option) any later version.
  14 *
  15 *	Changes:
  16 *	A.N.Kuznetsov	:	airthmetics in fragmentation.
  17 *				extension headers are implemented.
  18 *				route changes now work.
  19 *				ip6_forward does not confuse sniffers.
  20 *				etc.
  21 *
  22 *      H. von Brand    :       Added missing #include <linux/string.h>
  23 *	Imran Patel	:	frag id should be in NBO
  24 *      Kazunori MIYAZAWA @USAGI
  25 *			:       add ip6_append_data and related functions
  26 *				for datagram xmit
  27 */
  28
  29#include <linux/errno.h>
  30#include <linux/kernel.h>
  31#include <linux/string.h>
  32#include <linux/socket.h>
  33#include <linux/net.h>
  34#include <linux/netdevice.h>
  35#include <linux/if_arp.h>
  36#include <linux/in6.h>
  37#include <linux/tcp.h>
  38#include <linux/route.h>
  39#include <linux/module.h>
  40#include <linux/slab.h>
  41
  42#include <linux/bpf-cgroup.h>
  43#include <linux/netfilter.h>
  44#include <linux/netfilter_ipv6.h>
  45
  46#include <net/sock.h>
  47#include <net/snmp.h>
  48
  49#include <net/ipv6.h>
  50#include <net/ndisc.h>
  51#include <net/protocol.h>
  52#include <net/ip6_route.h>
  53#include <net/addrconf.h>
  54#include <net/rawv6.h>
  55#include <net/icmp.h>
  56#include <net/xfrm.h>
  57#include <net/checksum.h>
  58#include <linux/mroute6.h>
  59#include <net/l3mdev.h>
  60#include <net/lwtunnel.h>
  61
  62static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
  63{
  64	struct dst_entry *dst = skb_dst(skb);
  65	struct net_device *dev = dst->dev;
  66	struct neighbour *neigh;
  67	struct in6_addr *nexthop;
  68	int ret;
  69
  70	skb->protocol = htons(ETH_P_IPV6);
  71	skb->dev = dev;
  72
  73	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
  74		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
  75
  76		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
  77		    ((mroute6_socket(net, skb) &&
  78		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
  79		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
  80					 &ipv6_hdr(skb)->saddr))) {
  81			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
  82
  83			/* Do not check for IFF_ALLMULTI; multicast routing
  84			   is not supported in any case.
  85			 */
  86			if (newskb)
  87				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
  88					net, sk, newskb, NULL, newskb->dev,
  89					dev_loopback_xmit);
  90
  91			if (ipv6_hdr(skb)->hop_limit == 0) {
  92				IP6_INC_STATS(net, idev,
  93					      IPSTATS_MIB_OUTDISCARDS);
  94				kfree_skb(skb);
  95				return 0;
  96			}
  97		}
  98
  99		IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
 100
 101		if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
 102		    IPV6_ADDR_SCOPE_NODELOCAL &&
 103		    !(dev->flags & IFF_LOOPBACK)) {
 104			kfree_skb(skb);
 105			return 0;
 106		}
 107	}
 108
 109	if (lwtunnel_xmit_redirect(dst->lwtstate)) {
 110		int res = lwtunnel_xmit(skb);
 111
 112		if (res < 0 || res == LWTUNNEL_XMIT_DONE)
 113			return res;
 114	}
 115
 116	rcu_read_lock_bh();
 117	nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
 118	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
 119	if (unlikely(!neigh))
 120		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
 121	if (!IS_ERR(neigh)) {
 122		ret = dst_neigh_output(dst, neigh, skb);
 
 123		rcu_read_unlock_bh();
 124		return ret;
 125	}
 126	rcu_read_unlock_bh();
 127
 128	IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 129	kfree_skb(skb);
 130	return -EINVAL;
 131}
 132
 133static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 134{
 135	int ret;
 136
 137	ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
 138	if (ret) {
 139		kfree_skb(skb);
 140		return ret;
 141	}
 142
 
 
 
 
 
 
 
 
 143	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 144	    dst_allfrag(skb_dst(skb)) ||
 145	    (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
 146		return ip6_fragment(net, sk, skb, ip6_finish_output2);
 147	else
 148		return ip6_finish_output2(net, sk, skb);
 149}
 150
 151int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 152{
 153	struct net_device *dev = skb_dst(skb)->dev;
 154	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 155
 
 
 
 156	if (unlikely(idev->cnf.disable_ipv6)) {
 157		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
 158		kfree_skb(skb);
 159		return 0;
 160	}
 161
 162	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
 163			    net, sk, skb, NULL, dev,
 164			    ip6_finish_output,
 165			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 166}
 167
 
 
 
 
 
 
 
 
 168/*
 169 * xmit an sk_buff (used by TCP, SCTP and DCCP)
 170 * Note : socket lock is not held for SYNACK packets, but might be modified
 171 * by calls to skb_set_owner_w() and ipv6_local_error(),
 172 * which are using proper atomic operations or spinlocks.
 173 */
 174int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 175	     __u32 mark, struct ipv6_txoptions *opt, int tclass)
 176{
 177	struct net *net = sock_net(sk);
 178	const struct ipv6_pinfo *np = inet6_sk(sk);
 179	struct in6_addr *first_hop = &fl6->daddr;
 180	struct dst_entry *dst = skb_dst(skb);
 181	struct ipv6hdr *hdr;
 182	u8  proto = fl6->flowi6_proto;
 183	int seg_len = skb->len;
 184	int hlimit = -1;
 185	u32 mtu;
 186
 187	if (opt) {
 188		unsigned int head_room;
 189
 190		/* First: exthdrs may take lots of space (~8K for now)
 191		   MAX_HEADER is not enough.
 192		 */
 193		head_room = opt->opt_nflen + opt->opt_flen;
 194		seg_len += head_room;
 195		head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
 196
 197		if (skb_headroom(skb) < head_room) {
 198			struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 199			if (!skb2) {
 200				IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 201					      IPSTATS_MIB_OUTDISCARDS);
 202				kfree_skb(skb);
 203				return -ENOBUFS;
 204			}
 205			consume_skb(skb);
 206			skb = skb2;
 207			/* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
 208			 * it is safe to call in our context (socket lock not held)
 209			 */
 210			skb_set_owner_w(skb, (struct sock *)sk);
 211		}
 212		if (opt->opt_flen)
 213			ipv6_push_frag_opts(skb, opt, &proto);
 214		if (opt->opt_nflen)
 215			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
 216					     &fl6->saddr);
 217	}
 218
 219	skb_push(skb, sizeof(struct ipv6hdr));
 220	skb_reset_network_header(skb);
 221	hdr = ipv6_hdr(skb);
 222
 223	/*
 224	 *	Fill in the IPv6 header
 225	 */
 226	if (np)
 227		hlimit = np->hop_limit;
 228	if (hlimit < 0)
 229		hlimit = ip6_dst_hoplimit(dst);
 230
 231	ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
 232						     np->autoflowlabel, fl6));
 233
 234	hdr->payload_len = htons(seg_len);
 235	hdr->nexthdr = proto;
 236	hdr->hop_limit = hlimit;
 237
 238	hdr->saddr = fl6->saddr;
 239	hdr->daddr = *first_hop;
 240
 241	skb->protocol = htons(ETH_P_IPV6);
 242	skb->priority = sk->sk_priority;
 243	skb->mark = mark;
 244
 245	mtu = dst_mtu(dst);
 246	if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
 247		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
 248			      IPSTATS_MIB_OUT, skb->len);
 249
 250		/* if egress device is enslaved to an L3 master device pass the
 251		 * skb to its handler for processing
 252		 */
 253		skb = l3mdev_ip6_out((struct sock *)sk, skb);
 254		if (unlikely(!skb))
 255			return 0;
 256
 257		/* hooks should never assume socket lock is held.
 258		 * we promote our socket to non const
 259		 */
 260		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
 261			       net, (struct sock *)sk, skb, NULL, dst->dev,
 262			       dst_output);
 263	}
 264
 265	skb->dev = dst->dev;
 266	/* ipv6_local_error() does not require socket lock,
 267	 * we promote our socket to non const
 268	 */
 269	ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
 270
 271	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
 272	kfree_skb(skb);
 273	return -EMSGSIZE;
 274}
 275EXPORT_SYMBOL(ip6_xmit);
 276
 277static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 278{
 279	struct ip6_ra_chain *ra;
 280	struct sock *last = NULL;
 281
 282	read_lock(&ip6_ra_lock);
 283	for (ra = ip6_ra_chain; ra; ra = ra->next) {
 284		struct sock *sk = ra->sk;
 285		if (sk && ra->sel == sel &&
 286		    (!sk->sk_bound_dev_if ||
 287		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
 288			if (last) {
 289				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 290				if (skb2)
 291					rawv6_rcv(last, skb2);
 292			}
 293			last = sk;
 294		}
 295	}
 296
 297	if (last) {
 298		rawv6_rcv(last, skb);
 299		read_unlock(&ip6_ra_lock);
 300		return 1;
 301	}
 302	read_unlock(&ip6_ra_lock);
 303	return 0;
 304}
 305
 306static int ip6_forward_proxy_check(struct sk_buff *skb)
 307{
 308	struct ipv6hdr *hdr = ipv6_hdr(skb);
 309	u8 nexthdr = hdr->nexthdr;
 310	__be16 frag_off;
 311	int offset;
 312
 313	if (ipv6_ext_hdr(nexthdr)) {
 314		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
 315		if (offset < 0)
 316			return 0;
 317	} else
 318		offset = sizeof(struct ipv6hdr);
 319
 320	if (nexthdr == IPPROTO_ICMPV6) {
 321		struct icmp6hdr *icmp6;
 322
 323		if (!pskb_may_pull(skb, (skb_network_header(skb) +
 324					 offset + 1 - skb->data)))
 325			return 0;
 326
 327		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 328
 329		switch (icmp6->icmp6_type) {
 330		case NDISC_ROUTER_SOLICITATION:
 331		case NDISC_ROUTER_ADVERTISEMENT:
 332		case NDISC_NEIGHBOUR_SOLICITATION:
 333		case NDISC_NEIGHBOUR_ADVERTISEMENT:
 334		case NDISC_REDIRECT:
 335			/* For reaction involving unicast neighbor discovery
 336			 * message destined to the proxied address, pass it to
 337			 * input function.
 338			 */
 339			return 1;
 340		default:
 341			break;
 342		}
 343	}
 344
 345	/*
 346	 * The proxying router can't forward traffic sent to a link-local
 347	 * address, so signal the sender and discard the packet. This
 348	 * behavior is clarified by the MIPv6 specification.
 349	 */
 350	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
 351		dst_link_failure(skb);
 352		return -1;
 353	}
 354
 355	return 0;
 356}
 357
 358static inline int ip6_forward_finish(struct net *net, struct sock *sk,
 359				     struct sk_buff *skb)
 360{
 
 
 
 
 
 361	return dst_output(net, sk, skb);
 362}
 363
 364static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
 365{
 366	unsigned int mtu;
 367	struct inet6_dev *idev;
 368
 369	if (dst_metric_locked(dst, RTAX_MTU)) {
 370		mtu = dst_metric_raw(dst, RTAX_MTU);
 371		if (mtu)
 372			return mtu;
 373	}
 374
 375	mtu = IPV6_MIN_MTU;
 376	rcu_read_lock();
 377	idev = __in6_dev_get(dst->dev);
 378	if (idev)
 379		mtu = idev->cnf.mtu6;
 380	rcu_read_unlock();
 381
 382	return mtu;
 383}
 
 384
 385static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
 386{
 387	if (skb->len <= mtu)
 388		return false;
 389
 390	/* ipv6 conntrack defrag sets max_frag_size + ignore_df */
 391	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
 392		return true;
 393
 394	if (skb->ignore_df)
 395		return false;
 396
 397	if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
 398		return false;
 399
 400	return true;
 401}
 402
 403int ip6_forward(struct sk_buff *skb)
 404{
 405	struct dst_entry *dst = skb_dst(skb);
 406	struct ipv6hdr *hdr = ipv6_hdr(skb);
 407	struct inet6_skb_parm *opt = IP6CB(skb);
 408	struct net *net = dev_net(dst->dev);
 409	u32 mtu;
 410
 411	if (net->ipv6.devconf_all->forwarding == 0)
 412		goto error;
 413
 414	if (skb->pkt_type != PACKET_HOST)
 415		goto drop;
 416
 417	if (unlikely(skb->sk))
 418		goto drop;
 419
 420	if (skb_warn_if_lro(skb))
 421		goto drop;
 422
 423	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
 424		__IP6_INC_STATS(net, ip6_dst_idev(dst),
 425				IPSTATS_MIB_INDISCARDS);
 426		goto drop;
 427	}
 428
 429	skb_forward_csum(skb);
 430
 431	/*
 432	 *	We DO NOT make any processing on
 433	 *	RA packets, pushing them to user level AS IS
 434	 *	without ane WARRANTY that application will be able
 435	 *	to interpret them. The reason is that we
 436	 *	cannot make anything clever here.
 437	 *
 438	 *	We are not end-node, so that if packet contains
 439	 *	AH/ESP, we cannot make anything.
 440	 *	Defragmentation also would be mistake, RA packets
 441	 *	cannot be fragmented, because there is no warranty
 442	 *	that different fragments will go along one path. --ANK
 443	 */
 444	if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
 445		if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
 446			return 0;
 447	}
 448
 449	/*
 450	 *	check and decrement ttl
 451	 */
 452	if (hdr->hop_limit <= 1) {
 453		/* Force OUTPUT device used as source address */
 454		skb->dev = dst->dev;
 455		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
 456		__IP6_INC_STATS(net, ip6_dst_idev(dst),
 457				IPSTATS_MIB_INHDRERRORS);
 458
 459		kfree_skb(skb);
 460		return -ETIMEDOUT;
 461	}
 462
 463	/* XXX: idev->cnf.proxy_ndp? */
 464	if (net->ipv6.devconf_all->proxy_ndp &&
 465	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
 466		int proxied = ip6_forward_proxy_check(skb);
 467		if (proxied > 0)
 468			return ip6_input(skb);
 469		else if (proxied < 0) {
 470			__IP6_INC_STATS(net, ip6_dst_idev(dst),
 471					IPSTATS_MIB_INDISCARDS);
 472			goto drop;
 473		}
 474	}
 475
 476	if (!xfrm6_route_forward(skb)) {
 477		__IP6_INC_STATS(net, ip6_dst_idev(dst),
 478				IPSTATS_MIB_INDISCARDS);
 479		goto drop;
 480	}
 481	dst = skb_dst(skb);
 482
 483	/* IPv6 specs say nothing about it, but it is clear that we cannot
 484	   send redirects to source routed frames.
 485	   We don't send redirects to frames decapsulated from IPsec.
 486	 */
 487	if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
 488		struct in6_addr *target = NULL;
 489		struct inet_peer *peer;
 490		struct rt6_info *rt;
 491
 492		/*
 493		 *	incoming and outgoing devices are the same
 494		 *	send a redirect.
 495		 */
 496
 497		rt = (struct rt6_info *) dst;
 498		if (rt->rt6i_flags & RTF_GATEWAY)
 499			target = &rt->rt6i_gateway;
 500		else
 501			target = &hdr->daddr;
 502
 503		peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
 504
 505		/* Limit redirects both by destination (here)
 506		   and by source (inside ndisc_send_redirect)
 507		 */
 508		if (inet_peer_xrlim_allow(peer, 1*HZ))
 509			ndisc_send_redirect(skb, target);
 510		if (peer)
 511			inet_putpeer(peer);
 512	} else {
 513		int addrtype = ipv6_addr_type(&hdr->saddr);
 514
 515		/* This check is security critical. */
 516		if (addrtype == IPV6_ADDR_ANY ||
 517		    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
 518			goto error;
 519		if (addrtype & IPV6_ADDR_LINKLOCAL) {
 520			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
 521				    ICMPV6_NOT_NEIGHBOUR, 0);
 522			goto error;
 523		}
 524	}
 525
 526	mtu = ip6_dst_mtu_forward(dst);
 527	if (mtu < IPV6_MIN_MTU)
 528		mtu = IPV6_MIN_MTU;
 529
 530	if (ip6_pkt_too_big(skb, mtu)) {
 531		/* Again, force OUTPUT device used as source address */
 532		skb->dev = dst->dev;
 533		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 534		__IP6_INC_STATS(net, ip6_dst_idev(dst),
 535				IPSTATS_MIB_INTOOBIGERRORS);
 536		__IP6_INC_STATS(net, ip6_dst_idev(dst),
 537				IPSTATS_MIB_FRAGFAILS);
 538		kfree_skb(skb);
 539		return -EMSGSIZE;
 540	}
 541
 542	if (skb_cow(skb, dst->dev->hard_header_len)) {
 543		__IP6_INC_STATS(net, ip6_dst_idev(dst),
 544				IPSTATS_MIB_OUTDISCARDS);
 545		goto drop;
 546	}
 547
 548	hdr = ipv6_hdr(skb);
 549
 550	/* Mangling hops number delayed to point after skb COW */
 551
 552	hdr->hop_limit--;
 553
 554	__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 555	__IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
 556	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
 557		       net, NULL, skb, skb->dev, dst->dev,
 558		       ip6_forward_finish);
 559
 560error:
 561	__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
 562drop:
 563	kfree_skb(skb);
 564	return -EINVAL;
 565}
 566
 567static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 568{
 569	to->pkt_type = from->pkt_type;
 570	to->priority = from->priority;
 571	to->protocol = from->protocol;
 572	skb_dst_drop(to);
 573	skb_dst_set(to, dst_clone(skb_dst(from)));
 574	to->dev = from->dev;
 575	to->mark = from->mark;
 576
 577#ifdef CONFIG_NET_SCHED
 578	to->tc_index = from->tc_index;
 579#endif
 580	nf_copy(to, from);
 581	skb_copy_secmark(to, from);
 582}
 583
 584int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 585		 int (*output)(struct net *, struct sock *, struct sk_buff *))
 586{
 587	struct sk_buff *frag;
 588	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
 589	struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
 590				inet6_sk(skb->sk) : NULL;
 591	struct ipv6hdr *tmp_hdr;
 592	struct frag_hdr *fh;
 593	unsigned int mtu, hlen, left, len;
 594	int hroom, troom;
 595	__be32 frag_id;
 596	int ptr, offset = 0, err = 0;
 597	u8 *prevhdr, nexthdr = 0;
 598
 599	hlen = ip6_find_1stfragopt(skb, &prevhdr);
 
 
 
 600	nexthdr = *prevhdr;
 601
 602	mtu = ip6_skb_dst_mtu(skb);
 603
 604	/* We must not fragment if the socket is set to force MTU discovery
 605	 * or if the skb it not generated by a local socket.
 606	 */
 607	if (unlikely(!skb->ignore_df && skb->len > mtu))
 608		goto fail_toobig;
 609
 610	if (IP6CB(skb)->frag_max_size) {
 611		if (IP6CB(skb)->frag_max_size > mtu)
 612			goto fail_toobig;
 613
 614		/* don't send fragments larger than what we received */
 615		mtu = IP6CB(skb)->frag_max_size;
 616		if (mtu < IPV6_MIN_MTU)
 617			mtu = IPV6_MIN_MTU;
 618	}
 619
 620	if (np && np->frag_size < mtu) {
 621		if (np->frag_size)
 622			mtu = np->frag_size;
 623	}
 624	if (mtu < hlen + sizeof(struct frag_hdr) + 8)
 625		goto fail_toobig;
 626	mtu -= hlen + sizeof(struct frag_hdr);
 627
 628	frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
 629				    &ipv6_hdr(skb)->saddr);
 630
 631	if (skb->ip_summed == CHECKSUM_PARTIAL &&
 632	    (err = skb_checksum_help(skb)))
 633		goto fail;
 634
 635	hroom = LL_RESERVED_SPACE(rt->dst.dev);
 636	if (skb_has_frag_list(skb)) {
 637		unsigned int first_len = skb_pagelen(skb);
 638		struct sk_buff *frag2;
 639
 640		if (first_len - hlen > mtu ||
 641		    ((first_len - hlen) & 7) ||
 642		    skb_cloned(skb) ||
 643		    skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
 644			goto slow_path;
 645
 646		skb_walk_frags(skb, frag) {
 647			/* Correct geometry. */
 648			if (frag->len > mtu ||
 649			    ((frag->len & 7) && frag->next) ||
 650			    skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
 651				goto slow_path_clean;
 652
 653			/* Partially cloned skb? */
 654			if (skb_shared(frag))
 655				goto slow_path_clean;
 656
 657			BUG_ON(frag->sk);
 658			if (skb->sk) {
 659				frag->sk = skb->sk;
 660				frag->destructor = sock_wfree;
 661			}
 662			skb->truesize -= frag->truesize;
 663		}
 664
 665		err = 0;
 666		offset = 0;
 667		/* BUILD HEADER */
 668
 669		*prevhdr = NEXTHDR_FRAGMENT;
 670		tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 671		if (!tmp_hdr) {
 672			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 673				      IPSTATS_MIB_FRAGFAILS);
 674			err = -ENOMEM;
 675			goto fail;
 676		}
 677		frag = skb_shinfo(skb)->frag_list;
 678		skb_frag_list_init(skb);
 679
 680		__skb_pull(skb, hlen);
 681		fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr));
 682		__skb_push(skb, hlen);
 683		skb_reset_network_header(skb);
 684		memcpy(skb_network_header(skb), tmp_hdr, hlen);
 685
 686		fh->nexthdr = nexthdr;
 687		fh->reserved = 0;
 688		fh->frag_off = htons(IP6_MF);
 689		fh->identification = frag_id;
 690
 691		first_len = skb_pagelen(skb);
 692		skb->data_len = first_len - skb_headlen(skb);
 693		skb->len = first_len;
 694		ipv6_hdr(skb)->payload_len = htons(first_len -
 695						   sizeof(struct ipv6hdr));
 696
 697		dst_hold(&rt->dst);
 698
 699		for (;;) {
 700			/* Prepare header of the next frame,
 701			 * before previous one went down. */
 702			if (frag) {
 703				frag->ip_summed = CHECKSUM_NONE;
 704				skb_reset_transport_header(frag);
 705				fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr));
 706				__skb_push(frag, hlen);
 707				skb_reset_network_header(frag);
 708				memcpy(skb_network_header(frag), tmp_hdr,
 709				       hlen);
 710				offset += skb->len - hlen - sizeof(struct frag_hdr);
 711				fh->nexthdr = nexthdr;
 712				fh->reserved = 0;
 713				fh->frag_off = htons(offset);
 714				if (frag->next)
 715					fh->frag_off |= htons(IP6_MF);
 716				fh->identification = frag_id;
 717				ipv6_hdr(frag)->payload_len =
 718						htons(frag->len -
 719						      sizeof(struct ipv6hdr));
 720				ip6_copy_metadata(frag, skb);
 721			}
 722
 723			err = output(net, sk, skb);
 724			if (!err)
 725				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 726					      IPSTATS_MIB_FRAGCREATES);
 727
 728			if (err || !frag)
 729				break;
 730
 731			skb = frag;
 732			frag = skb->next;
 733			skb->next = NULL;
 734		}
 735
 736		kfree(tmp_hdr);
 737
 738		if (err == 0) {
 739			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 740				      IPSTATS_MIB_FRAGOKS);
 741			ip6_rt_put(rt);
 742			return 0;
 743		}
 744
 745		kfree_skb_list(frag);
 746
 747		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 748			      IPSTATS_MIB_FRAGFAILS);
 749		ip6_rt_put(rt);
 750		return err;
 751
 752slow_path_clean:
 753		skb_walk_frags(skb, frag2) {
 754			if (frag2 == frag)
 755				break;
 756			frag2->sk = NULL;
 757			frag2->destructor = NULL;
 758			skb->truesize += frag2->truesize;
 759		}
 760	}
 761
 762slow_path:
 763	left = skb->len - hlen;		/* Space per frame */
 764	ptr = hlen;			/* Where to start from */
 765
 766	/*
 767	 *	Fragment the datagram.
 768	 */
 769
 770	troom = rt->dst.dev->needed_tailroom;
 771
 772	/*
 773	 *	Keep copying data until we run out.
 774	 */
 775	while (left > 0)	{
 776		u8 *fragnexthdr_offset;
 777
 778		len = left;
 779		/* IF: it doesn't fit, use 'mtu' - the data space left */
 780		if (len > mtu)
 781			len = mtu;
 782		/* IF: we are not sending up to and including the packet end
 783		   then align the next start on an eight byte boundary */
 784		if (len < left)	{
 785			len &= ~7;
 786		}
 787
 788		/* Allocate buffer */
 789		frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
 790				 hroom + troom, GFP_ATOMIC);
 791		if (!frag) {
 792			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 793				      IPSTATS_MIB_FRAGFAILS);
 794			err = -ENOMEM;
 795			goto fail;
 796		}
 797
 798		/*
 799		 *	Set up data on packet
 800		 */
 801
 802		ip6_copy_metadata(frag, skb);
 803		skb_reserve(frag, hroom);
 804		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
 805		skb_reset_network_header(frag);
 806		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
 807		frag->transport_header = (frag->network_header + hlen +
 808					  sizeof(struct frag_hdr));
 809
 810		/*
 811		 *	Charge the memory for the fragment to any owner
 812		 *	it might possess
 813		 */
 814		if (skb->sk)
 815			skb_set_owner_w(frag, skb->sk);
 816
 817		/*
 818		 *	Copy the packet header into the new buffer.
 819		 */
 820		skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
 821
 822		fragnexthdr_offset = skb_network_header(frag);
 823		fragnexthdr_offset += prevhdr - skb_network_header(skb);
 824		*fragnexthdr_offset = NEXTHDR_FRAGMENT;
 825
 826		/*
 827		 *	Build fragment header.
 828		 */
 829		fh->nexthdr = nexthdr;
 830		fh->reserved = 0;
 831		fh->identification = frag_id;
 832
 833		/*
 834		 *	Copy a block of the IP datagram.
 835		 */
 836		BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
 837				     len));
 838		left -= len;
 839
 840		fh->frag_off = htons(offset);
 841		if (left > 0)
 842			fh->frag_off |= htons(IP6_MF);
 843		ipv6_hdr(frag)->payload_len = htons(frag->len -
 844						    sizeof(struct ipv6hdr));
 845
 846		ptr += len;
 847		offset += len;
 848
 849		/*
 850		 *	Put this fragment into the sending queue.
 851		 */
 852		err = output(net, sk, frag);
 853		if (err)
 854			goto fail;
 855
 856		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 857			      IPSTATS_MIB_FRAGCREATES);
 858	}
 859	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 860		      IPSTATS_MIB_FRAGOKS);
 861	consume_skb(skb);
 862	return err;
 863
 864fail_toobig:
 865	if (skb->sk && dst_allfrag(skb_dst(skb)))
 866		sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
 867
 868	skb->dev = skb_dst(skb)->dev;
 869	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 870	err = -EMSGSIZE;
 871
 872fail:
 873	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 874		      IPSTATS_MIB_FRAGFAILS);
 875	kfree_skb(skb);
 876	return err;
 877}
 878
 879static inline int ip6_rt_check(const struct rt6key *rt_key,
 880			       const struct in6_addr *fl_addr,
 881			       const struct in6_addr *addr_cache)
 882{
 883	return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
 884		(!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
 885}
 886
 887static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 888					  struct dst_entry *dst,
 889					  const struct flowi6 *fl6)
 890{
 891	struct ipv6_pinfo *np = inet6_sk(sk);
 892	struct rt6_info *rt;
 893
 894	if (!dst)
 895		goto out;
 896
 897	if (dst->ops->family != AF_INET6) {
 898		dst_release(dst);
 899		return NULL;
 900	}
 901
 902	rt = (struct rt6_info *)dst;
 903	/* Yes, checking route validity in not connected
 904	 * case is not very simple. Take into account,
 905	 * that we do not support routing by source, TOS,
 906	 * and MSG_DONTROUTE		--ANK (980726)
 907	 *
 908	 * 1. ip6_rt_check(): If route was host route,
 909	 *    check that cached destination is current.
 910	 *    If it is network route, we still may
 911	 *    check its validity using saved pointer
 912	 *    to the last used address: daddr_cache.
 913	 *    We do not want to save whole address now,
 914	 *    (because main consumer of this service
 915	 *    is tcp, which has not this problem),
 916	 *    so that the last trick works only on connected
 917	 *    sockets.
 918	 * 2. oif also should be the same.
 919	 */
 920	if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
 921#ifdef CONFIG_IPV6_SUBTREES
 922	    ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
 923#endif
 924	   (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
 925	      (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
 926		dst_release(dst);
 927		dst = NULL;
 928	}
 929
 930out:
 931	return dst;
 932}
 933
 934static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
 935			       struct dst_entry **dst, struct flowi6 *fl6)
 936{
 937#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 938	struct neighbour *n;
 939	struct rt6_info *rt;
 940#endif
 941	int err;
 942	int flags = 0;
 943
 944	/* The correct way to handle this would be to do
 945	 * ip6_route_get_saddr, and then ip6_route_output; however,
 946	 * the route-specific preferred source forces the
 947	 * ip6_route_output call _before_ ip6_route_get_saddr.
 948	 *
 949	 * In source specific routing (no src=any default route),
 950	 * ip6_route_output will fail given src=any saddr, though, so
 951	 * that's why we try it again later.
 952	 */
 953	if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
 954		struct rt6_info *rt;
 955		bool had_dst = *dst != NULL;
 956
 957		if (!had_dst)
 958			*dst = ip6_route_output(net, sk, fl6);
 959		rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
 960		err = ip6_route_get_saddr(net, rt, &fl6->daddr,
 961					  sk ? inet6_sk(sk)->srcprefs : 0,
 962					  &fl6->saddr);
 963		if (err)
 964			goto out_err_release;
 965
 966		/* If we had an erroneous initial result, pretend it
 967		 * never existed and let the SA-enabled version take
 968		 * over.
 969		 */
 970		if (!had_dst && (*dst)->error) {
 971			dst_release(*dst);
 972			*dst = NULL;
 973		}
 974
 975		if (fl6->flowi6_oif)
 976			flags |= RT6_LOOKUP_F_IFACE;
 977	}
 978
 979	if (!*dst)
 980		*dst = ip6_route_output_flags(net, sk, fl6, flags);
 981
 982	err = (*dst)->error;
 983	if (err)
 984		goto out_err_release;
 985
 986#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 987	/*
 988	 * Here if the dst entry we've looked up
 989	 * has a neighbour entry that is in the INCOMPLETE
 990	 * state and the src address from the flow is
 991	 * marked as OPTIMISTIC, we release the found
 992	 * dst entry and replace it instead with the
 993	 * dst entry of the nexthop router
 994	 */
 995	rt = (struct rt6_info *) *dst;
 996	rcu_read_lock_bh();
 997	n = __ipv6_neigh_lookup_noref(rt->dst.dev,
 998				      rt6_nexthop(rt, &fl6->daddr));
 999	err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1000	rcu_read_unlock_bh();
1001
1002	if (err) {
1003		struct inet6_ifaddr *ifp;
1004		struct flowi6 fl_gw6;
1005		int redirect;
1006
1007		ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1008				      (*dst)->dev, 1);
1009
1010		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1011		if (ifp)
1012			in6_ifa_put(ifp);
1013
1014		if (redirect) {
1015			/*
1016			 * We need to get the dst entry for the
1017			 * default router instead
1018			 */
1019			dst_release(*dst);
1020			memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1021			memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1022			*dst = ip6_route_output(net, sk, &fl_gw6);
1023			err = (*dst)->error;
1024			if (err)
1025				goto out_err_release;
1026		}
1027	}
1028#endif
1029	if (ipv6_addr_v4mapped(&fl6->saddr) &&
1030	    !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1031		err = -EAFNOSUPPORT;
1032		goto out_err_release;
1033	}
1034
1035	return 0;
1036
1037out_err_release:
1038	dst_release(*dst);
1039	*dst = NULL;
1040
1041	if (err == -ENETUNREACH)
1042		IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1043	return err;
1044}
1045
1046/**
1047 *	ip6_dst_lookup - perform route lookup on flow
1048 *	@sk: socket which provides route info
1049 *	@dst: pointer to dst_entry * for result
1050 *	@fl6: flow to lookup
1051 *
1052 *	This function performs a route lookup on the given flow.
1053 *
1054 *	It returns zero on success, or a standard errno code on error.
1055 */
1056int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1057		   struct flowi6 *fl6)
1058{
1059	*dst = NULL;
1060	return ip6_dst_lookup_tail(net, sk, dst, fl6);
1061}
1062EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1063
1064/**
1065 *	ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1066 *	@sk: socket which provides route info
1067 *	@fl6: flow to lookup
1068 *	@final_dst: final destination address for ipsec lookup
1069 *
1070 *	This function performs a route lookup on the given flow.
1071 *
1072 *	It returns a valid dst pointer on success, or a pointer encoded
1073 *	error code.
1074 */
1075struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
1076				      const struct in6_addr *final_dst)
1077{
1078	struct dst_entry *dst = NULL;
1079	int err;
1080
1081	err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
1082	if (err)
1083		return ERR_PTR(err);
1084	if (final_dst)
1085		fl6->daddr = *final_dst;
1086
1087	return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1088}
1089EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1090
1091/**
1092 *	ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1093 *	@sk: socket which provides the dst cache and route info
1094 *	@fl6: flow to lookup
1095 *	@final_dst: final destination address for ipsec lookup
 
1096 *
1097 *	This function performs a route lookup on the given flow with the
1098 *	possibility of using the cached route in the socket if it is valid.
1099 *	It will take the socket dst lock when operating on the dst cache.
1100 *	As a result, this function can only be used in process context.
1101 *
 
 
 
1102 *	It returns a valid dst pointer on success, or a pointer encoded
1103 *	error code.
1104 */
1105struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1106					 const struct in6_addr *final_dst)
 
1107{
1108	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1109
1110	dst = ip6_sk_dst_check(sk, dst, fl6);
1111	if (!dst)
1112		dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
 
 
 
 
1113
1114	return dst;
1115}
1116EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1117
1118static inline int ip6_ufo_append_data(struct sock *sk,
1119			struct sk_buff_head *queue,
1120			int getfrag(void *from, char *to, int offset, int len,
1121			int odd, struct sk_buff *skb),
1122			void *from, int length, int hh_len, int fragheaderlen,
1123			int exthdrlen, int transhdrlen, int mtu,
1124			unsigned int flags, const struct flowi6 *fl6)
1125
1126{
1127	struct sk_buff *skb;
1128	int err;
1129
1130	/* There is support for UDP large send offload by network
1131	 * device, so create one single skb packet containing complete
1132	 * udp datagram
1133	 */
1134	skb = skb_peek_tail(queue);
1135	if (!skb) {
1136		skb = sock_alloc_send_skb(sk,
1137			hh_len + fragheaderlen + transhdrlen + 20,
1138			(flags & MSG_DONTWAIT), &err);
1139		if (!skb)
1140			return err;
1141
1142		/* reserve space for Hardware header */
1143		skb_reserve(skb, hh_len);
1144
1145		/* create space for UDP/IP header */
1146		skb_put(skb, fragheaderlen + transhdrlen);
1147
1148		/* initialize network header pointer */
1149		skb_set_network_header(skb, exthdrlen);
1150
1151		/* initialize protocol header pointer */
1152		skb->transport_header = skb->network_header + fragheaderlen;
1153
1154		skb->protocol = htons(ETH_P_IPV6);
1155		skb->csum = 0;
1156
1157		__skb_queue_tail(queue, skb);
1158	} else if (skb_is_gso(skb)) {
1159		goto append;
1160	}
1161
1162	skb->ip_summed = CHECKSUM_PARTIAL;
1163	/* Specify the length of each IPv6 datagram fragment.
1164	 * It has to be a multiple of 8.
1165	 */
1166	skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1167				     sizeof(struct frag_hdr)) & ~7;
1168	skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1169	skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk),
1170							 &fl6->daddr,
1171							 &fl6->saddr);
1172
1173append:
1174	return skb_append_datato_frags(sk, skb, getfrag, from,
1175				       (length - transhdrlen));
1176}
1177
1178static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1179					       gfp_t gfp)
1180{
1181	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1182}
1183
1184static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1185						gfp_t gfp)
1186{
1187	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1188}
1189
1190static void ip6_append_data_mtu(unsigned int *mtu,
1191				int *maxfraglen,
1192				unsigned int fragheaderlen,
1193				struct sk_buff *skb,
1194				struct rt6_info *rt,
1195				unsigned int orig_mtu)
1196{
1197	if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1198		if (!skb) {
1199			/* first fragment, reserve header_len */
1200			*mtu = orig_mtu - rt->dst.header_len;
1201
1202		} else {
1203			/*
1204			 * this fragment is not first, the headers
1205			 * space is regarded as data space.
1206			 */
1207			*mtu = orig_mtu;
1208		}
1209		*maxfraglen = ((*mtu - fragheaderlen) & ~7)
1210			      + fragheaderlen - sizeof(struct frag_hdr);
1211	}
1212}
1213
1214static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1215			  struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1216			  struct rt6_info *rt, struct flowi6 *fl6)
1217{
1218	struct ipv6_pinfo *np = inet6_sk(sk);
1219	unsigned int mtu;
1220	struct ipv6_txoptions *opt = ipc6->opt;
1221
1222	/*
1223	 * setup for corking
1224	 */
1225	if (opt) {
1226		if (WARN_ON(v6_cork->opt))
1227			return -EINVAL;
1228
1229		v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
1230		if (unlikely(!v6_cork->opt))
1231			return -ENOBUFS;
1232
1233		v6_cork->opt->tot_len = opt->tot_len;
1234		v6_cork->opt->opt_flen = opt->opt_flen;
1235		v6_cork->opt->opt_nflen = opt->opt_nflen;
1236
1237		v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1238						    sk->sk_allocation);
1239		if (opt->dst0opt && !v6_cork->opt->dst0opt)
1240			return -ENOBUFS;
1241
1242		v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1243						    sk->sk_allocation);
1244		if (opt->dst1opt && !v6_cork->opt->dst1opt)
1245			return -ENOBUFS;
1246
1247		v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1248						   sk->sk_allocation);
1249		if (opt->hopopt && !v6_cork->opt->hopopt)
1250			return -ENOBUFS;
1251
1252		v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1253						    sk->sk_allocation);
1254		if (opt->srcrt && !v6_cork->opt->srcrt)
1255			return -ENOBUFS;
1256
1257		/* need source address above miyazawa*/
1258	}
1259	dst_hold(&rt->dst);
1260	cork->base.dst = &rt->dst;
1261	cork->fl.u.ip6 = *fl6;
1262	v6_cork->hop_limit = ipc6->hlimit;
1263	v6_cork->tclass = ipc6->tclass;
1264	if (rt->dst.flags & DST_XFRM_TUNNEL)
1265		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1266		      rt->dst.dev->mtu : dst_mtu(&rt->dst);
1267	else
1268		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1269		      rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1270	if (np->frag_size < mtu) {
1271		if (np->frag_size)
1272			mtu = np->frag_size;
1273	}
 
 
1274	cork->base.fragsize = mtu;
1275	if (dst_allfrag(rt->dst.path))
1276		cork->base.flags |= IPCORK_ALLFRAG;
1277	cork->base.length = 0;
1278
1279	return 0;
1280}
1281
1282static int __ip6_append_data(struct sock *sk,
1283			     struct flowi6 *fl6,
1284			     struct sk_buff_head *queue,
1285			     struct inet_cork *cork,
1286			     struct inet6_cork *v6_cork,
1287			     struct page_frag *pfrag,
1288			     int getfrag(void *from, char *to, int offset,
1289					 int len, int odd, struct sk_buff *skb),
1290			     void *from, int length, int transhdrlen,
1291			     unsigned int flags, struct ipcm6_cookie *ipc6,
1292			     const struct sockcm_cookie *sockc)
1293{
1294	struct sk_buff *skb, *skb_prev = NULL;
1295	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
1296	int exthdrlen = 0;
1297	int dst_exthdrlen = 0;
1298	int hh_len;
1299	int copy;
1300	int err;
1301	int offset = 0;
1302	__u8 tx_flags = 0;
1303	u32 tskey = 0;
1304	struct rt6_info *rt = (struct rt6_info *)cork->dst;
1305	struct ipv6_txoptions *opt = v6_cork->opt;
1306	int csummode = CHECKSUM_NONE;
1307	unsigned int maxnonfragsize, headersize;
 
1308
1309	skb = skb_peek_tail(queue);
1310	if (!skb) {
1311		exthdrlen = opt ? opt->opt_flen : 0;
1312		dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1313	}
1314
1315	mtu = cork->fragsize;
1316	orig_mtu = mtu;
1317
1318	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1319
1320	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1321			(opt ? opt->opt_nflen : 0);
1322	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1323		     sizeof(struct frag_hdr);
1324
1325	headersize = sizeof(struct ipv6hdr) +
1326		     (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1327		     (dst_allfrag(&rt->dst) ?
1328		      sizeof(struct frag_hdr) : 0) +
1329		     rt->rt6i_nfheader_len;
1330
 
 
 
 
 
 
1331	if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1332	    (sk->sk_protocol == IPPROTO_UDP ||
1333	     sk->sk_protocol == IPPROTO_RAW)) {
1334		ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1335				sizeof(struct ipv6hdr));
1336		goto emsgsize;
1337	}
1338
1339	if (ip6_sk_ignore_df(sk))
1340		maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1341	else
1342		maxnonfragsize = mtu;
1343
1344	if (cork->length + length > maxnonfragsize - headersize) {
1345emsgsize:
1346		ipv6_local_error(sk, EMSGSIZE, fl6,
1347				 mtu - headersize +
1348				 sizeof(struct ipv6hdr));
1349		return -EMSGSIZE;
1350	}
1351
1352	/* CHECKSUM_PARTIAL only with no extension headers and when
1353	 * we are not going to fragment
1354	 */
1355	if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1356	    headersize == sizeof(struct ipv6hdr) &&
1357	    length <= mtu - headersize &&
1358	    !(flags & MSG_MORE) &&
1359	    rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1360		csummode = CHECKSUM_PARTIAL;
1361
1362	if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
1363		sock_tx_timestamp(sk, sockc->tsflags, &tx_flags);
1364		if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1365		    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1366			tskey = sk->sk_tskey++;
1367	}
1368
1369	/*
1370	 * Let's try using as much space as possible.
1371	 * Use MTU if total length of the message fits into the MTU.
1372	 * Otherwise, we need to reserve fragment header and
1373	 * fragment alignment (= 8-15 octects, in total).
1374	 *
1375	 * Note that we may need to "move" the data from the tail of
1376	 * of the buffer to the new fragment when we split
1377	 * the message.
1378	 *
1379	 * FIXME: It may be fragmented into multiple chunks
1380	 *        at once if non-fragmentable extension headers
1381	 *        are too large.
1382	 * --yoshfuji
1383	 */
1384
1385	cork->length += length;
1386	if ((((length + fragheaderlen) > mtu) ||
1387	     (skb && skb_is_gso(skb))) &&
1388	    (sk->sk_protocol == IPPROTO_UDP) &&
1389	    (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
1390	    (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) {
1391		err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
1392					  hh_len, fragheaderlen, exthdrlen,
1393					  transhdrlen, mtu, flags, fl6);
1394		if (err)
1395			goto error;
1396		return 0;
1397	}
1398
1399	if (!skb)
1400		goto alloc_new_skb;
1401
1402	while (length > 0) {
1403		/* Check if the remaining data fits into current packet. */
1404		copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1405		if (copy < length)
1406			copy = maxfraglen - skb->len;
1407
1408		if (copy <= 0) {
1409			char *data;
1410			unsigned int datalen;
1411			unsigned int fraglen;
1412			unsigned int fraggap;
1413			unsigned int alloclen;
1414alloc_new_skb:
1415			/* There's no room in the current skb */
1416			if (skb)
1417				fraggap = skb->len - maxfraglen;
1418			else
1419				fraggap = 0;
1420			/* update mtu and maxfraglen if necessary */
1421			if (!skb || !skb_prev)
1422				ip6_append_data_mtu(&mtu, &maxfraglen,
1423						    fragheaderlen, skb, rt,
1424						    orig_mtu);
1425
1426			skb_prev = skb;
1427
1428			/*
1429			 * If remaining data exceeds the mtu,
1430			 * we know we need more fragment(s).
1431			 */
1432			datalen = length + fraggap;
1433
1434			if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1435				datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1436			if ((flags & MSG_MORE) &&
1437			    !(rt->dst.dev->features&NETIF_F_SG))
1438				alloclen = mtu;
1439			else
1440				alloclen = datalen + fragheaderlen;
1441
1442			alloclen += dst_exthdrlen;
1443
1444			if (datalen != length + fraggap) {
1445				/*
1446				 * this is not the last fragment, the trailer
1447				 * space is regarded as data space.
1448				 */
1449				datalen += rt->dst.trailer_len;
1450			}
1451
1452			alloclen += rt->dst.trailer_len;
1453			fraglen = datalen + fragheaderlen;
1454
1455			/*
1456			 * We just reserve space for fragment header.
1457			 * Note: this may be overallocation if the message
1458			 * (without MSG_MORE) fits into the MTU.
1459			 */
1460			alloclen += sizeof(struct frag_hdr);
1461
 
 
 
 
 
1462			if (transhdrlen) {
1463				skb = sock_alloc_send_skb(sk,
1464						alloclen + hh_len,
1465						(flags & MSG_DONTWAIT), &err);
1466			} else {
1467				skb = NULL;
1468				if (atomic_read(&sk->sk_wmem_alloc) <=
1469				    2 * sk->sk_sndbuf)
1470					skb = sock_wmalloc(sk,
1471							   alloclen + hh_len, 1,
1472							   sk->sk_allocation);
1473				if (unlikely(!skb))
1474					err = -ENOBUFS;
1475			}
1476			if (!skb)
1477				goto error;
1478			/*
1479			 *	Fill in the control structures
1480			 */
1481			skb->protocol = htons(ETH_P_IPV6);
1482			skb->ip_summed = csummode;
1483			skb->csum = 0;
1484			/* reserve for fragmentation and ipsec header */
1485			skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1486				    dst_exthdrlen);
1487
1488			/* Only the initial fragment is time stamped */
1489			skb_shinfo(skb)->tx_flags = tx_flags;
1490			tx_flags = 0;
1491			skb_shinfo(skb)->tskey = tskey;
1492			tskey = 0;
1493
1494			/*
1495			 *	Find where to start putting bytes
1496			 */
1497			data = skb_put(skb, fraglen);
1498			skb_set_network_header(skb, exthdrlen);
1499			data += fragheaderlen;
1500			skb->transport_header = (skb->network_header +
1501						 fragheaderlen);
1502			if (fraggap) {
1503				skb->csum = skb_copy_and_csum_bits(
1504					skb_prev, maxfraglen,
1505					data + transhdrlen, fraggap, 0);
1506				skb_prev->csum = csum_sub(skb_prev->csum,
1507							  skb->csum);
1508				data += fraggap;
1509				pskb_trim_unique(skb_prev, maxfraglen);
1510			}
1511			copy = datalen - transhdrlen - fraggap;
1512
1513			if (copy < 0) {
1514				err = -EINVAL;
1515				kfree_skb(skb);
1516				goto error;
1517			} else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1518				err = -EFAULT;
1519				kfree_skb(skb);
1520				goto error;
1521			}
1522
1523			offset += copy;
1524			length -= datalen - fraggap;
1525			transhdrlen = 0;
1526			exthdrlen = 0;
1527			dst_exthdrlen = 0;
1528
 
 
 
1529			/*
1530			 * Put the packet on the pending queue
1531			 */
 
 
 
 
 
1532			__skb_queue_tail(queue, skb);
1533			continue;
1534		}
1535
1536		if (copy > length)
1537			copy = length;
1538
1539		if (!(rt->dst.dev->features&NETIF_F_SG)) {
 
1540			unsigned int off;
1541
1542			off = skb->len;
1543			if (getfrag(from, skb_put(skb, copy),
1544						offset, copy, off, skb) < 0) {
1545				__skb_trim(skb, off);
1546				err = -EFAULT;
1547				goto error;
1548			}
1549		} else {
1550			int i = skb_shinfo(skb)->nr_frags;
1551
1552			err = -ENOMEM;
1553			if (!sk_page_frag_refill(sk, pfrag))
1554				goto error;
1555
1556			if (!skb_can_coalesce(skb, i, pfrag->page,
1557					      pfrag->offset)) {
1558				err = -EMSGSIZE;
1559				if (i == MAX_SKB_FRAGS)
1560					goto error;
1561
1562				__skb_fill_page_desc(skb, i, pfrag->page,
1563						     pfrag->offset, 0);
1564				skb_shinfo(skb)->nr_frags = ++i;
1565				get_page(pfrag->page);
1566			}
1567			copy = min_t(int, copy, pfrag->size - pfrag->offset);
1568			if (getfrag(from,
1569				    page_address(pfrag->page) + pfrag->offset,
1570				    offset, copy, skb->len, skb) < 0)
1571				goto error_efault;
1572
1573			pfrag->offset += copy;
1574			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1575			skb->len += copy;
1576			skb->data_len += copy;
1577			skb->truesize += copy;
1578			atomic_add(copy, &sk->sk_wmem_alloc);
1579		}
1580		offset += copy;
1581		length -= copy;
1582	}
1583
 
 
1584	return 0;
1585
1586error_efault:
1587	err = -EFAULT;
1588error:
1589	cork->length -= length;
1590	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
 
1591	return err;
1592}
1593
1594int ip6_append_data(struct sock *sk,
1595		    int getfrag(void *from, char *to, int offset, int len,
1596				int odd, struct sk_buff *skb),
1597		    void *from, int length, int transhdrlen,
1598		    struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1599		    struct rt6_info *rt, unsigned int flags,
1600		    const struct sockcm_cookie *sockc)
1601{
1602	struct inet_sock *inet = inet_sk(sk);
1603	struct ipv6_pinfo *np = inet6_sk(sk);
1604	int exthdrlen;
1605	int err;
1606
1607	if (flags&MSG_PROBE)
1608		return 0;
1609	if (skb_queue_empty(&sk->sk_write_queue)) {
1610		/*
1611		 * setup for corking
1612		 */
1613		err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1614				     ipc6, rt, fl6);
1615		if (err)
1616			return err;
1617
1618		exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1619		length += exthdrlen;
1620		transhdrlen += exthdrlen;
1621	} else {
1622		fl6 = &inet->cork.fl.u.ip6;
1623		transhdrlen = 0;
1624	}
1625
1626	return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1627				 &np->cork, sk_page_frag(sk), getfrag,
1628				 from, length, transhdrlen, flags, ipc6, sockc);
1629}
1630EXPORT_SYMBOL_GPL(ip6_append_data);
1631
1632static void ip6_cork_release(struct inet_cork_full *cork,
1633			     struct inet6_cork *v6_cork)
1634{
1635	if (v6_cork->opt) {
1636		kfree(v6_cork->opt->dst0opt);
1637		kfree(v6_cork->opt->dst1opt);
1638		kfree(v6_cork->opt->hopopt);
1639		kfree(v6_cork->opt->srcrt);
1640		kfree(v6_cork->opt);
1641		v6_cork->opt = NULL;
1642	}
1643
1644	if (cork->base.dst) {
1645		dst_release(cork->base.dst);
1646		cork->base.dst = NULL;
1647		cork->base.flags &= ~IPCORK_ALLFRAG;
1648	}
1649	memset(&cork->fl, 0, sizeof(cork->fl));
1650}
1651
1652struct sk_buff *__ip6_make_skb(struct sock *sk,
1653			       struct sk_buff_head *queue,
1654			       struct inet_cork_full *cork,
1655			       struct inet6_cork *v6_cork)
1656{
1657	struct sk_buff *skb, *tmp_skb;
1658	struct sk_buff **tail_skb;
1659	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1660	struct ipv6_pinfo *np = inet6_sk(sk);
1661	struct net *net = sock_net(sk);
1662	struct ipv6hdr *hdr;
1663	struct ipv6_txoptions *opt = v6_cork->opt;
1664	struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1665	struct flowi6 *fl6 = &cork->fl.u.ip6;
1666	unsigned char proto = fl6->flowi6_proto;
1667
1668	skb = __skb_dequeue(queue);
1669	if (!skb)
1670		goto out;
1671	tail_skb = &(skb_shinfo(skb)->frag_list);
1672
1673	/* move skb->data to ip header from ext header */
1674	if (skb->data < skb_network_header(skb))
1675		__skb_pull(skb, skb_network_offset(skb));
1676	while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1677		__skb_pull(tmp_skb, skb_network_header_len(skb));
1678		*tail_skb = tmp_skb;
1679		tail_skb = &(tmp_skb->next);
1680		skb->len += tmp_skb->len;
1681		skb->data_len += tmp_skb->len;
1682		skb->truesize += tmp_skb->truesize;
1683		tmp_skb->destructor = NULL;
1684		tmp_skb->sk = NULL;
1685	}
1686
1687	/* Allow local fragmentation. */
1688	skb->ignore_df = ip6_sk_ignore_df(sk);
1689
1690	*final_dst = fl6->daddr;
1691	__skb_pull(skb, skb_network_header_len(skb));
1692	if (opt && opt->opt_flen)
1693		ipv6_push_frag_opts(skb, opt, &proto);
1694	if (opt && opt->opt_nflen)
1695		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1696
1697	skb_push(skb, sizeof(struct ipv6hdr));
1698	skb_reset_network_header(skb);
1699	hdr = ipv6_hdr(skb);
1700
1701	ip6_flow_hdr(hdr, v6_cork->tclass,
1702		     ip6_make_flowlabel(net, skb, fl6->flowlabel,
1703					np->autoflowlabel, fl6));
1704	hdr->hop_limit = v6_cork->hop_limit;
1705	hdr->nexthdr = proto;
1706	hdr->saddr = fl6->saddr;
1707	hdr->daddr = *final_dst;
1708
1709	skb->priority = sk->sk_priority;
1710	skb->mark = sk->sk_mark;
1711
1712	skb_dst_set(skb, dst_clone(&rt->dst));
1713	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1714	if (proto == IPPROTO_ICMPV6) {
1715		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1716
1717		ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1718		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1719	}
1720
1721	ip6_cork_release(cork, v6_cork);
1722out:
1723	return skb;
1724}
1725
1726int ip6_send_skb(struct sk_buff *skb)
1727{
1728	struct net *net = sock_net(skb->sk);
1729	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1730	int err;
1731
1732	err = ip6_local_out(net, skb->sk, skb);
1733	if (err) {
1734		if (err > 0)
1735			err = net_xmit_errno(err);
1736		if (err)
1737			IP6_INC_STATS(net, rt->rt6i_idev,
1738				      IPSTATS_MIB_OUTDISCARDS);
1739	}
1740
1741	return err;
1742}
1743
1744int ip6_push_pending_frames(struct sock *sk)
1745{
1746	struct sk_buff *skb;
1747
1748	skb = ip6_finish_skb(sk);
1749	if (!skb)
1750		return 0;
1751
1752	return ip6_send_skb(skb);
1753}
1754EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1755
1756static void __ip6_flush_pending_frames(struct sock *sk,
1757				       struct sk_buff_head *queue,
1758				       struct inet_cork_full *cork,
1759				       struct inet6_cork *v6_cork)
1760{
1761	struct sk_buff *skb;
1762
1763	while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1764		if (skb_dst(skb))
1765			IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1766				      IPSTATS_MIB_OUTDISCARDS);
1767		kfree_skb(skb);
1768	}
1769
1770	ip6_cork_release(cork, v6_cork);
1771}
1772
1773void ip6_flush_pending_frames(struct sock *sk)
1774{
1775	__ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1776				   &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1777}
1778EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1779
1780struct sk_buff *ip6_make_skb(struct sock *sk,
1781			     int getfrag(void *from, char *to, int offset,
1782					 int len, int odd, struct sk_buff *skb),
1783			     void *from, int length, int transhdrlen,
1784			     struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1785			     struct rt6_info *rt, unsigned int flags,
1786			     const struct sockcm_cookie *sockc)
1787{
1788	struct inet_cork_full cork;
1789	struct inet6_cork v6_cork;
1790	struct sk_buff_head queue;
1791	int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1792	int err;
1793
1794	if (flags & MSG_PROBE)
1795		return NULL;
1796
1797	__skb_queue_head_init(&queue);
1798
1799	cork.base.flags = 0;
1800	cork.base.addr = 0;
1801	cork.base.opt = NULL;
 
1802	v6_cork.opt = NULL;
1803	err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
1804	if (err)
 
1805		return ERR_PTR(err);
1806
1807	if (ipc6->dontfrag < 0)
1808		ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1809
1810	err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1811				&current->task_frag, getfrag, from,
1812				length + exthdrlen, transhdrlen + exthdrlen,
1813				flags, ipc6, sockc);
1814	if (err) {
1815		__ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1816		return ERR_PTR(err);
1817	}
1818
1819	return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
1820}