ip6_output.c - net/ipv6/ip6_output.c - Linux diff v4.17

   1/*
   2 *	IPv6 output functions
   3 *	Linux INET6 implementation
   4 *
   5 *	Authors:
   6 *	Pedro Roque		<roque@di.fc.ul.pt>
   7 *
   8 *	Based on linux/net/ipv4/ip_output.c
   9 *
  10 *	This program is free software; you can redistribute it and/or
  11 *      modify it under the terms of the GNU General Public License
  12 *      as published by the Free Software Foundation; either version
  13 *      2 of the License, or (at your option) any later version.
  14 *
  15 *	Changes:
  16 *	A.N.Kuznetsov	:	airthmetics in fragmentation.
  17 *				extension headers are implemented.
  18 *				route changes now work.
  19 *				ip6_forward does not confuse sniffers.
  20 *				etc.
  21 *
  22 *      H. von Brand    :       Added missing #include <linux/string.h>
  23 *	Imran Patel	:	frag id should be in NBO
  24 *      Kazunori MIYAZAWA @USAGI
  25 *			:       add ip6_append_data and related functions
  26 *				for datagram xmit
  27 */
  28
  29#include <linux/errno.h>
  30#include <linux/kernel.h>
  31#include <linux/string.h>
  32#include <linux/socket.h>
  33#include <linux/net.h>
  34#include <linux/netdevice.h>
  35#include <linux/if_arp.h>
  36#include <linux/in6.h>
  37#include <linux/tcp.h>
  38#include <linux/route.h>
  39#include <linux/module.h>
  40#include <linux/slab.h>
  41
  42#include <linux/bpf-cgroup.h>
  43#include <linux/netfilter.h>
  44#include <linux/netfilter_ipv6.h>
  45
  46#include <net/sock.h>
  47#include <net/snmp.h>
  48
  49#include <net/ipv6.h>
  50#include <net/ndisc.h>
  51#include <net/protocol.h>
  52#include <net/ip6_route.h>
  53#include <net/addrconf.h>
  54#include <net/rawv6.h>
  55#include <net/icmp.h>
  56#include <net/xfrm.h>
  57#include <net/checksum.h>
  58#include <linux/mroute6.h>
  59#include <net/l3mdev.h>
  60#include <net/lwtunnel.h>
  61
  62static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
  63{
  64	struct dst_entry *dst = skb_dst(skb);
  65	struct net_device *dev = dst->dev;
  66	struct neighbour *neigh;
  67	struct in6_addr *nexthop;
  68	int ret;
  69
 
 
 
  70	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
  71		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
  72
  73		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
  74		    ((mroute6_is_socket(net, skb) &&
  75		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
  76		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
  77					 &ipv6_hdr(skb)->saddr))) {
  78			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
  79
  80			/* Do not check for IFF_ALLMULTI; multicast routing
  81			   is not supported in any case.
  82			 */
  83			if (newskb)
  84				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
  85					net, sk, newskb, NULL, newskb->dev,
  86					dev_loopback_xmit);
  87
  88			if (ipv6_hdr(skb)->hop_limit == 0) {
  89				IP6_INC_STATS(net, idev,
  90					      IPSTATS_MIB_OUTDISCARDS);
  91				kfree_skb(skb);
  92				return 0;
  93			}
  94		}
  95
  96		IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
 
  97
  98		if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
  99		    IPV6_ADDR_SCOPE_NODELOCAL &&
 100		    !(dev->flags & IFF_LOOPBACK)) {
 101			kfree_skb(skb);
 102			return 0;
 103		}
 104	}
 105
 106	if (lwtunnel_xmit_redirect(dst->lwtstate)) {
 107		int res = lwtunnel_xmit(skb);
 108
 109		if (res < 0 || res == LWTUNNEL_XMIT_DONE)
 110			return res;
 111	}
 112
 113	rcu_read_lock_bh();
 114	nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
 115	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
 116	if (unlikely(!neigh))
 117		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
 118	if (!IS_ERR(neigh)) {
 119		sock_confirm_neigh(skb, neigh);
 120		ret = neigh_output(neigh, skb);
 121		rcu_read_unlock_bh();
 122		return ret;
 123	}
 124	rcu_read_unlock_bh();
 125
 126	IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 
 127	kfree_skb(skb);
 128	return -EINVAL;
 129}
 130
 131static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 132{
 133	int ret;
 134
 135	ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
 136	if (ret) {
 137		kfree_skb(skb);
 138		return ret;
 139	}
 140
 141#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
 142	/* Policy lookup after SNAT yielded a new policy */
 143	if (skb_dst(skb)->xfrm) {
 144		IPCB(skb)->flags |= IPSKB_REROUTED;
 145		return dst_output(net, sk, skb);
 146	}
 147#endif
 148
 149	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 150	    dst_allfrag(skb_dst(skb)) ||
 151	    (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
 152		return ip6_fragment(net, sk, skb, ip6_finish_output2);
 153	else
 154		return ip6_finish_output2(net, sk, skb);
 155}
 156
 157int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 158{
 159	struct net_device *dev = skb_dst(skb)->dev;
 160	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 161
 162	skb->protocol = htons(ETH_P_IPV6);
 163	skb->dev = dev;
 164
 165	if (unlikely(idev->cnf.disable_ipv6)) {
 166		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
 
 167		kfree_skb(skb);
 168		return 0;
 169	}
 170
 171	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
 172			    net, sk, skb, NULL, dev,
 173			    ip6_finish_output,
 174			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 175}
 176
 177bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
 178{
 179	if (!np->autoflowlabel_set)
 180		return ip6_default_np_autolabel(net);
 181	else
 182		return np->autoflowlabel;
 183}
 184
 185/*
 186 * xmit an sk_buff (used by TCP, SCTP and DCCP)
 187 * Note : socket lock is not held for SYNACK packets, but might be modified
 188 * by calls to skb_set_owner_w() and ipv6_local_error(),
 189 * which are using proper atomic operations or spinlocks.
 190 */
 191int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 192	     __u32 mark, struct ipv6_txoptions *opt, int tclass)
 
 193{
 194	struct net *net = sock_net(sk);
 195	const struct ipv6_pinfo *np = inet6_sk(sk);
 196	struct in6_addr *first_hop = &fl6->daddr;
 197	struct dst_entry *dst = skb_dst(skb);
 198	struct ipv6hdr *hdr;
 199	u8  proto = fl6->flowi6_proto;
 200	int seg_len = skb->len;
 201	int hlimit = -1;
 202	u32 mtu;
 203
 204	if (opt) {
 205		unsigned int head_room;
 206
 207		/* First: exthdrs may take lots of space (~8K for now)
 208		   MAX_HEADER is not enough.
 209		 */
 210		head_room = opt->opt_nflen + opt->opt_flen;
 211		seg_len += head_room;
 212		head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
 213
 214		if (skb_headroom(skb) < head_room) {
 215			struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 216			if (!skb2) {
 217				IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 218					      IPSTATS_MIB_OUTDISCARDS);
 219				kfree_skb(skb);
 220				return -ENOBUFS;
 221			}
 222			consume_skb(skb);
 223			skb = skb2;
 224			/* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
 225			 * it is safe to call in our context (socket lock not held)
 226			 */
 227			skb_set_owner_w(skb, (struct sock *)sk);
 228		}
 229		if (opt->opt_flen)
 230			ipv6_push_frag_opts(skb, opt, &proto);
 231		if (opt->opt_nflen)
 232			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
 233					     &fl6->saddr);
 234	}
 235
 236	skb_push(skb, sizeof(struct ipv6hdr));
 237	skb_reset_network_header(skb);
 238	hdr = ipv6_hdr(skb);
 239
 240	/*
 241	 *	Fill in the IPv6 header
 242	 */
 243	if (np)
 244		hlimit = np->hop_limit;
 245	if (hlimit < 0)
 246		hlimit = ip6_dst_hoplimit(dst);
 247
 248	ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
 249				ip6_autoflowlabel(net, np), fl6));
 250
 251	hdr->payload_len = htons(seg_len);
 252	hdr->nexthdr = proto;
 253	hdr->hop_limit = hlimit;
 254
 255	hdr->saddr = fl6->saddr;
 256	hdr->daddr = *first_hop;
 257
 258	skb->protocol = htons(ETH_P_IPV6);
 259	skb->priority = sk->sk_priority;
 260	skb->mark = mark;
 261
 262	mtu = dst_mtu(dst);
 263	if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
 264		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
 265			      IPSTATS_MIB_OUT, skb->len);
 266
 267		/* if egress device is enslaved to an L3 master device pass the
 268		 * skb to its handler for processing
 269		 */
 270		skb = l3mdev_ip6_out((struct sock *)sk, skb);
 271		if (unlikely(!skb))
 272			return 0;
 273
 274		/* hooks should never assume socket lock is held.
 275		 * we promote our socket to non const
 276		 */
 277		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
 278			       net, (struct sock *)sk, skb, NULL, dst->dev,
 279			       dst_output);
 280	}
 281
 282	skb->dev = dst->dev;
 283	/* ipv6_local_error() does not require socket lock,
 284	 * we promote our socket to non const
 285	 */
 286	ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
 287
 288	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
 289	kfree_skb(skb);
 290	return -EMSGSIZE;
 291}
 
 292EXPORT_SYMBOL(ip6_xmit);
 293
 294static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 295{
 296	struct ip6_ra_chain *ra;
 297	struct sock *last = NULL;
 298
 299	read_lock(&ip6_ra_lock);
 300	for (ra = ip6_ra_chain; ra; ra = ra->next) {
 301		struct sock *sk = ra->sk;
 302		if (sk && ra->sel == sel &&
 303		    (!sk->sk_bound_dev_if ||
 304		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
 305			if (last) {
 306				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 307				if (skb2)
 308					rawv6_rcv(last, skb2);
 309			}
 310			last = sk;
 311		}
 312	}
 313
 314	if (last) {
 315		rawv6_rcv(last, skb);
 316		read_unlock(&ip6_ra_lock);
 317		return 1;
 318	}
 319	read_unlock(&ip6_ra_lock);
 320	return 0;
 321}
 322
 323static int ip6_forward_proxy_check(struct sk_buff *skb)
 324{
 325	struct ipv6hdr *hdr = ipv6_hdr(skb);
 326	u8 nexthdr = hdr->nexthdr;
 327	__be16 frag_off;
 328	int offset;
 329
 330	if (ipv6_ext_hdr(nexthdr)) {
 331		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
 332		if (offset < 0)
 333			return 0;
 334	} else
 335		offset = sizeof(struct ipv6hdr);
 336
 337	if (nexthdr == IPPROTO_ICMPV6) {
 338		struct icmp6hdr *icmp6;
 339
 340		if (!pskb_may_pull(skb, (skb_network_header(skb) +
 341					 offset + 1 - skb->data)))
 342			return 0;
 343
 344		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 345
 346		switch (icmp6->icmp6_type) {
 347		case NDISC_ROUTER_SOLICITATION:
 348		case NDISC_ROUTER_ADVERTISEMENT:
 349		case NDISC_NEIGHBOUR_SOLICITATION:
 350		case NDISC_NEIGHBOUR_ADVERTISEMENT:
 351		case NDISC_REDIRECT:
 352			/* For reaction involving unicast neighbor discovery
 353			 * message destined to the proxied address, pass it to
 354			 * input function.
 355			 */
 356			return 1;
 357		default:
 358			break;
 359		}
 360	}
 361
 362	/*
 363	 * The proxying router can't forward traffic sent to a link-local
 364	 * address, so signal the sender and discard the packet. This
 365	 * behavior is clarified by the MIPv6 specification.
 366	 */
 367	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
 368		dst_link_failure(skb);
 369		return -1;
 370	}
 371
 372	return 0;
 373}
 374
 375static inline int ip6_forward_finish(struct net *net, struct sock *sk,
 376				     struct sk_buff *skb)
 377{
 378	struct dst_entry *dst = skb_dst(skb);
 379
 380	__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 381	__IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
 382
 383	return dst_output(net, sk, skb);
 384}
 385
 386unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
 387{
 388	unsigned int mtu;
 389	struct inet6_dev *idev;
 390
 391	if (dst_metric_locked(dst, RTAX_MTU)) {
 392		mtu = dst_metric_raw(dst, RTAX_MTU);
 393		if (mtu)
 394			return mtu;
 395	}
 396
 397	mtu = IPV6_MIN_MTU;
 398	rcu_read_lock();
 399	idev = __in6_dev_get(dst->dev);
 400	if (idev)
 401		mtu = idev->cnf.mtu6;
 402	rcu_read_unlock();
 403
 404	return mtu;
 405}
 406EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward);
 407
 408static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
 409{
 410	if (skb->len <= mtu)
 411		return false;
 412
 413	/* ipv6 conntrack defrag sets max_frag_size + ignore_df */
 414	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
 415		return true;
 416
 417	if (skb->ignore_df)
 418		return false;
 419
 420	if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
 421		return false;
 422
 423	return true;
 424}
 425
 426int ip6_forward(struct sk_buff *skb)
 427{
 428	struct dst_entry *dst = skb_dst(skb);
 429	struct ipv6hdr *hdr = ipv6_hdr(skb);
 430	struct inet6_skb_parm *opt = IP6CB(skb);
 431	struct net *net = dev_net(dst->dev);
 432	u32 mtu;
 433
 434	if (net->ipv6.devconf_all->forwarding == 0)
 435		goto error;
 436
 437	if (skb->pkt_type != PACKET_HOST)
 438		goto drop;
 439
 440	if (unlikely(skb->sk))
 441		goto drop;
 442
 443	if (skb_warn_if_lro(skb))
 444		goto drop;
 445
 446	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
 447		__IP6_INC_STATS(net, ip6_dst_idev(dst),
 448				IPSTATS_MIB_INDISCARDS);
 449		goto drop;
 450	}
 451
 452	skb_forward_csum(skb);
 453
 454	/*
 455	 *	We DO NOT make any processing on
 456	 *	RA packets, pushing them to user level AS IS
 457	 *	without ane WARRANTY that application will be able
 458	 *	to interpret them. The reason is that we
 459	 *	cannot make anything clever here.
 460	 *
 461	 *	We are not end-node, so that if packet contains
 462	 *	AH/ESP, we cannot make anything.
 463	 *	Defragmentation also would be mistake, RA packets
 464	 *	cannot be fragmented, because there is no warranty
 465	 *	that different fragments will go along one path. --ANK
 466	 */
 467	if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
 468		if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
 469			return 0;
 470	}
 471
 472	/*
 473	 *	check and decrement ttl
 474	 */
 475	if (hdr->hop_limit <= 1) {
 476		/* Force OUTPUT device used as source address */
 477		skb->dev = dst->dev;
 478		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
 479		__IP6_INC_STATS(net, ip6_dst_idev(dst),
 480				IPSTATS_MIB_INHDRERRORS);
 481
 482		kfree_skb(skb);
 483		return -ETIMEDOUT;
 484	}
 485
 486	/* XXX: idev->cnf.proxy_ndp? */
 487	if (net->ipv6.devconf_all->proxy_ndp &&
 488	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
 489		int proxied = ip6_forward_proxy_check(skb);
 490		if (proxied > 0)
 491			return ip6_input(skb);
 492		else if (proxied < 0) {
 493			__IP6_INC_STATS(net, ip6_dst_idev(dst),
 494					IPSTATS_MIB_INDISCARDS);
 495			goto drop;
 496		}
 497	}
 498
 499	if (!xfrm6_route_forward(skb)) {
 500		__IP6_INC_STATS(net, ip6_dst_idev(dst),
 501				IPSTATS_MIB_INDISCARDS);
 502		goto drop;
 503	}
 504	dst = skb_dst(skb);
 505
 506	/* IPv6 specs say nothing about it, but it is clear that we cannot
 507	   send redirects to source routed frames.
 508	   We don't send redirects to frames decapsulated from IPsec.
 509	 */
 510	if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
 511		struct in6_addr *target = NULL;
 512		struct inet_peer *peer;
 513		struct rt6_info *rt;
 514
 515		/*
 516		 *	incoming and outgoing devices are the same
 517		 *	send a redirect.
 518		 */
 519
 520		rt = (struct rt6_info *) dst;
 521		if (rt->rt6i_flags & RTF_GATEWAY)
 522			target = &rt->rt6i_gateway;
 523		else
 524			target = &hdr->daddr;
 525
 526		peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
 527
 528		/* Limit redirects both by destination (here)
 529		   and by source (inside ndisc_send_redirect)
 530		 */
 531		if (inet_peer_xrlim_allow(peer, 1*HZ))
 532			ndisc_send_redirect(skb, target);
 533		if (peer)
 534			inet_putpeer(peer);
 535	} else {
 536		int addrtype = ipv6_addr_type(&hdr->saddr);
 537
 538		/* This check is security critical. */
 539		if (addrtype == IPV6_ADDR_ANY ||
 540		    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
 541			goto error;
 542		if (addrtype & IPV6_ADDR_LINKLOCAL) {
 543			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
 544				    ICMPV6_NOT_NEIGHBOUR, 0);
 545			goto error;
 546		}
 547	}
 548
 549	mtu = ip6_dst_mtu_forward(dst);
 550	if (mtu < IPV6_MIN_MTU)
 551		mtu = IPV6_MIN_MTU;
 552
 553	if (ip6_pkt_too_big(skb, mtu)) {
 554		/* Again, force OUTPUT device used as source address */
 555		skb->dev = dst->dev;
 556		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 557		__IP6_INC_STATS(net, ip6_dst_idev(dst),
 558				IPSTATS_MIB_INTOOBIGERRORS);
 559		__IP6_INC_STATS(net, ip6_dst_idev(dst),
 560				IPSTATS_MIB_FRAGFAILS);
 561		kfree_skb(skb);
 562		return -EMSGSIZE;
 563	}
 564
 565	if (skb_cow(skb, dst->dev->hard_header_len)) {
 566		__IP6_INC_STATS(net, ip6_dst_idev(dst),
 567				IPSTATS_MIB_OUTDISCARDS);
 568		goto drop;
 569	}
 570
 571	hdr = ipv6_hdr(skb);
 572
 573	/* Mangling hops number delayed to point after skb COW */
 574
 575	hdr->hop_limit--;
 576
 577	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
 578		       net, NULL, skb, skb->dev, dst->dev,
 
 579		       ip6_forward_finish);
 580
 581error:
 582	__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
 583drop:
 584	kfree_skb(skb);
 585	return -EINVAL;
 586}
 587
 588static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 589{
 590	to->pkt_type = from->pkt_type;
 591	to->priority = from->priority;
 592	to->protocol = from->protocol;
 593	skb_dst_drop(to);
 594	skb_dst_set(to, dst_clone(skb_dst(from)));
 595	to->dev = from->dev;
 596	to->mark = from->mark;
 597
 598#ifdef CONFIG_NET_SCHED
 599	to->tc_index = from->tc_index;
 600#endif
 601	nf_copy(to, from);
 602	skb_copy_secmark(to, from);
 603}
 604
 605int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 606		 int (*output)(struct net *, struct sock *, struct sk_buff *))
 607{
 608	struct sk_buff *frag;
 609	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
 610	struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
 611				inet6_sk(skb->sk) : NULL;
 612	struct ipv6hdr *tmp_hdr;
 613	struct frag_hdr *fh;
 614	unsigned int mtu, hlen, left, len;
 615	int hroom, troom;
 616	__be32 frag_id;
 617	int ptr, offset = 0, err = 0;
 618	u8 *prevhdr, nexthdr = 0;
 
 619
 620	err = ip6_find_1stfragopt(skb, &prevhdr);
 621	if (err < 0)
 622		goto fail;
 623	hlen = err;
 624	nexthdr = *prevhdr;
 625
 626	mtu = ip6_skb_dst_mtu(skb);
 627
 628	/* We must not fragment if the socket is set to force MTU discovery
 629	 * or if the skb it not generated by a local socket.
 630	 */
 631	if (unlikely(!skb->ignore_df && skb->len > mtu))
 632		goto fail_toobig;
 
 
 
 633
 634	if (IP6CB(skb)->frag_max_size) {
 635		if (IP6CB(skb)->frag_max_size > mtu)
 636			goto fail_toobig;
 637
 638		/* don't send fragments larger than what we received */
 639		mtu = IP6CB(skb)->frag_max_size;
 640		if (mtu < IPV6_MIN_MTU)
 641			mtu = IPV6_MIN_MTU;
 642	}
 643
 644	if (np && np->frag_size < mtu) {
 645		if (np->frag_size)
 646			mtu = np->frag_size;
 647	}
 648	if (mtu < hlen + sizeof(struct frag_hdr) + 8)
 649		goto fail_toobig;
 650	mtu -= hlen + sizeof(struct frag_hdr);
 651
 652	frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
 653				    &ipv6_hdr(skb)->saddr);
 654
 655	if (skb->ip_summed == CHECKSUM_PARTIAL &&
 656	    (err = skb_checksum_help(skb)))
 657		goto fail;
 658
 659	hroom = LL_RESERVED_SPACE(rt->dst.dev);
 660	if (skb_has_frag_list(skb)) {
 661		unsigned int first_len = skb_pagelen(skb);
 662		struct sk_buff *frag2;
 663
 664		if (first_len - hlen > mtu ||
 665		    ((first_len - hlen) & 7) ||
 666		    skb_cloned(skb) ||
 667		    skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
 668			goto slow_path;
 669
 670		skb_walk_frags(skb, frag) {
 671			/* Correct geometry. */
 672			if (frag->len > mtu ||
 673			    ((frag->len & 7) && frag->next) ||
 674			    skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
 675				goto slow_path_clean;
 676
 677			/* Partially cloned skb? */
 678			if (skb_shared(frag))
 679				goto slow_path_clean;
 680
 681			BUG_ON(frag->sk);
 682			if (skb->sk) {
 683				frag->sk = skb->sk;
 684				frag->destructor = sock_wfree;
 685			}
 686			skb->truesize -= frag->truesize;
 687		}
 688
 689		err = 0;
 690		offset = 0;
 
 
 691		/* BUILD HEADER */
 692
 693		*prevhdr = NEXTHDR_FRAGMENT;
 694		tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 695		if (!tmp_hdr) {
 696			err = -ENOMEM;
 697			goto fail;
 
 698		}
 699		frag = skb_shinfo(skb)->frag_list;
 700		skb_frag_list_init(skb);
 701
 702		__skb_pull(skb, hlen);
 703		fh = __skb_push(skb, sizeof(struct frag_hdr));
 704		__skb_push(skb, hlen);
 705		skb_reset_network_header(skb);
 706		memcpy(skb_network_header(skb), tmp_hdr, hlen);
 707
 
 708		fh->nexthdr = nexthdr;
 709		fh->reserved = 0;
 710		fh->frag_off = htons(IP6_MF);
 711		fh->identification = frag_id;
 712
 713		first_len = skb_pagelen(skb);
 714		skb->data_len = first_len - skb_headlen(skb);
 715		skb->len = first_len;
 716		ipv6_hdr(skb)->payload_len = htons(first_len -
 717						   sizeof(struct ipv6hdr));
 718
 
 
 719		for (;;) {
 720			/* Prepare header of the next frame,
 721			 * before previous one went down. */
 722			if (frag) {
 723				frag->ip_summed = CHECKSUM_NONE;
 724				skb_reset_transport_header(frag);
 725				fh = __skb_push(frag, sizeof(struct frag_hdr));
 726				__skb_push(frag, hlen);
 727				skb_reset_network_header(frag);
 728				memcpy(skb_network_header(frag), tmp_hdr,
 729				       hlen);
 730				offset += skb->len - hlen - sizeof(struct frag_hdr);
 731				fh->nexthdr = nexthdr;
 732				fh->reserved = 0;
 733				fh->frag_off = htons(offset);
 734				if (frag->next)
 735					fh->frag_off |= htons(IP6_MF);
 736				fh->identification = frag_id;
 737				ipv6_hdr(frag)->payload_len =
 738						htons(frag->len -
 739						      sizeof(struct ipv6hdr));
 740				ip6_copy_metadata(frag, skb);
 741			}
 742
 743			err = output(net, sk, skb);
 744			if (!err)
 745				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 746					      IPSTATS_MIB_FRAGCREATES);
 747
 748			if (err || !frag)
 749				break;
 750
 751			skb = frag;
 752			frag = skb->next;
 753			skb->next = NULL;
 754		}
 755
 756		kfree(tmp_hdr);
 757
 758		if (err == 0) {
 759			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 760				      IPSTATS_MIB_FRAGOKS);
 
 761			return 0;
 762		}
 763
 764		kfree_skb_list(frag);
 
 
 
 
 765
 766		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 767			      IPSTATS_MIB_FRAGFAILS);
 
 768		return err;
 769
 770slow_path_clean:
 771		skb_walk_frags(skb, frag2) {
 772			if (frag2 == frag)
 773				break;
 774			frag2->sk = NULL;
 775			frag2->destructor = NULL;
 776			skb->truesize += frag2->truesize;
 777		}
 778	}
 779
 780slow_path:
 
 
 
 
 781	left = skb->len - hlen;		/* Space per frame */
 782	ptr = hlen;			/* Where to start from */
 783
 784	/*
 785	 *	Fragment the datagram.
 786	 */
 787
 
 
 788	troom = rt->dst.dev->needed_tailroom;
 789
 790	/*
 791	 *	Keep copying data until we run out.
 792	 */
 793	while (left > 0)	{
 794		u8 *fragnexthdr_offset;
 795
 796		len = left;
 797		/* IF: it doesn't fit, use 'mtu' - the data space left */
 798		if (len > mtu)
 799			len = mtu;
 800		/* IF: we are not sending up to and including the packet end
 801		   then align the next start on an eight byte boundary */
 802		if (len < left)	{
 803			len &= ~7;
 804		}
 
 
 
 805
 806		/* Allocate buffer */
 807		frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
 808				 hroom + troom, GFP_ATOMIC);
 809		if (!frag) {
 
 810			err = -ENOMEM;
 811			goto fail;
 812		}
 813
 814		/*
 815		 *	Set up data on packet
 816		 */
 817
 818		ip6_copy_metadata(frag, skb);
 819		skb_reserve(frag, hroom);
 820		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
 821		skb_reset_network_header(frag);
 822		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
 823		frag->transport_header = (frag->network_header + hlen +
 824					  sizeof(struct frag_hdr));
 825
 826		/*
 827		 *	Charge the memory for the fragment to any owner
 828		 *	it might possess
 829		 */
 830		if (skb->sk)
 831			skb_set_owner_w(frag, skb->sk);
 832
 833		/*
 834		 *	Copy the packet header into the new buffer.
 835		 */
 836		skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
 837
 838		fragnexthdr_offset = skb_network_header(frag);
 839		fragnexthdr_offset += prevhdr - skb_network_header(skb);
 840		*fragnexthdr_offset = NEXTHDR_FRAGMENT;
 841
 842		/*
 843		 *	Build fragment header.
 844		 */
 845		fh->nexthdr = nexthdr;
 846		fh->reserved = 0;
 847		fh->identification = frag_id;
 
 
 
 
 848
 849		/*
 850		 *	Copy a block of the IP datagram.
 851		 */
 852		BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
 853				     len));
 854		left -= len;
 855
 856		fh->frag_off = htons(offset);
 857		if (left > 0)
 858			fh->frag_off |= htons(IP6_MF);
 859		ipv6_hdr(frag)->payload_len = htons(frag->len -
 860						    sizeof(struct ipv6hdr));
 861
 862		ptr += len;
 863		offset += len;
 864
 865		/*
 866		 *	Put this fragment into the sending queue.
 867		 */
 868		err = output(net, sk, frag);
 869		if (err)
 870			goto fail;
 871
 872		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 873			      IPSTATS_MIB_FRAGCREATES);
 874	}
 875	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 876		      IPSTATS_MIB_FRAGOKS);
 877	consume_skb(skb);
 878	return err;
 879
 880fail_toobig:
 881	if (skb->sk && dst_allfrag(skb_dst(skb)))
 882		sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
 883
 884	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 885	err = -EMSGSIZE;
 886
 887fail:
 888	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 889		      IPSTATS_MIB_FRAGFAILS);
 890	kfree_skb(skb);
 891	return err;
 892}
 893
 894static inline int ip6_rt_check(const struct rt6key *rt_key,
 895			       const struct in6_addr *fl_addr,
 896			       const struct in6_addr *addr_cache)
 897{
 898	return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
 899		(!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
 900}
 901
 902static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 903					  struct dst_entry *dst,
 904					  const struct flowi6 *fl6)
 905{
 906	struct ipv6_pinfo *np = inet6_sk(sk);
 907	struct rt6_info *rt;
 908
 909	if (!dst)
 910		goto out;
 911
 912	if (dst->ops->family != AF_INET6) {
 913		dst_release(dst);
 914		return NULL;
 915	}
 916
 917	rt = (struct rt6_info *)dst;
 918	/* Yes, checking route validity in not connected
 919	 * case is not very simple. Take into account,
 920	 * that we do not support routing by source, TOS,
 921	 * and MSG_DONTROUTE		--ANK (980726)
 922	 *
 923	 * 1. ip6_rt_check(): If route was host route,
 924	 *    check that cached destination is current.
 925	 *    If it is network route, we still may
 926	 *    check its validity using saved pointer
 927	 *    to the last used address: daddr_cache.
 928	 *    We do not want to save whole address now,
 929	 *    (because main consumer of this service
 930	 *    is tcp, which has not this problem),
 931	 *    so that the last trick works only on connected
 932	 *    sockets.
 933	 * 2. oif also should be the same.
 934	 */
 935	if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
 936#ifdef CONFIG_IPV6_SUBTREES
 937	    ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
 938#endif
 939	   (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
 940	      (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
 941		dst_release(dst);
 942		dst = NULL;
 943	}
 944
 945out:
 946	return dst;
 947}
 948
 949static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
 950			       struct dst_entry **dst, struct flowi6 *fl6)
 951{
 
 952#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 953	struct neighbour *n;
 954	struct rt6_info *rt;
 955#endif
 956	int err;
 957	int flags = 0;
 958
 959	/* The correct way to handle this would be to do
 960	 * ip6_route_get_saddr, and then ip6_route_output; however,
 961	 * the route-specific preferred source forces the
 962	 * ip6_route_output call _before_ ip6_route_get_saddr.
 963	 *
 964	 * In source specific routing (no src=any default route),
 965	 * ip6_route_output will fail given src=any saddr, though, so
 966	 * that's why we try it again later.
 967	 */
 968	if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
 969		struct rt6_info *rt;
 970		bool had_dst = *dst != NULL;
 971
 972		if (!had_dst)
 973			*dst = ip6_route_output(net, sk, fl6);
 974		rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
 975		err = ip6_route_get_saddr(net, rt, &fl6->daddr,
 976					  sk ? inet6_sk(sk)->srcprefs : 0,
 977					  &fl6->saddr);
 978		if (err)
 979			goto out_err_release;
 980
 981		/* If we had an erroneous initial result, pretend it
 982		 * never existed and let the SA-enabled version take
 983		 * over.
 984		 */
 985		if (!had_dst && (*dst)->error) {
 986			dst_release(*dst);
 987			*dst = NULL;
 988		}
 989
 990		if (fl6->flowi6_oif)
 991			flags |= RT6_LOOKUP_F_IFACE;
 992	}
 993
 994	if (!*dst)
 995		*dst = ip6_route_output_flags(net, sk, fl6, flags);
 996
 997	err = (*dst)->error;
 998	if (err)
 999		goto out_err_release;
1000
1001#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1002	/*
1003	 * Here if the dst entry we've looked up
1004	 * has a neighbour entry that is in the INCOMPLETE
1005	 * state and the src address from the flow is
1006	 * marked as OPTIMISTIC, we release the found
1007	 * dst entry and replace it instead with the
1008	 * dst entry of the nexthop router
1009	 */
1010	rt = (struct rt6_info *) *dst;
1011	rcu_read_lock_bh();
1012	n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1013				      rt6_nexthop(rt, &fl6->daddr));
1014	err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1015	rcu_read_unlock_bh();
1016
1017	if (err) {
1018		struct inet6_ifaddr *ifp;
1019		struct flowi6 fl_gw6;
1020		int redirect;
1021
1022		ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1023				      (*dst)->dev, 1);
1024
1025		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1026		if (ifp)
1027			in6_ifa_put(ifp);
1028
1029		if (redirect) {
1030			/*
1031			 * We need to get the dst entry for the
1032			 * default router instead
1033			 */
1034			dst_release(*dst);
1035			memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1036			memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1037			*dst = ip6_route_output(net, sk, &fl_gw6);
1038			err = (*dst)->error;
1039			if (err)
1040				goto out_err_release;
1041		}
1042	}
1043#endif
1044	if (ipv6_addr_v4mapped(&fl6->saddr) &&
1045	    !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1046		err = -EAFNOSUPPORT;
1047		goto out_err_release;
1048	}
1049
1050	return 0;
1051
1052out_err_release:
1053	dst_release(*dst);
1054	*dst = NULL;
1055
1056	if (err == -ENETUNREACH)
1057		IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
 
 
1058	return err;
1059}
1060
1061/**
1062 *	ip6_dst_lookup - perform route lookup on flow
1063 *	@sk: socket which provides route info
1064 *	@dst: pointer to dst_entry * for result
1065 *	@fl6: flow to lookup
1066 *
1067 *	This function performs a route lookup on the given flow.
1068 *
1069 *	It returns zero on success, or a standard errno code on error.
1070 */
1071int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1072		   struct flowi6 *fl6)
1073{
1074	*dst = NULL;
1075	return ip6_dst_lookup_tail(net, sk, dst, fl6);
1076}
1077EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1078
1079/**
1080 *	ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1081 *	@sk: socket which provides route info
1082 *	@fl6: flow to lookup
1083 *	@final_dst: final destination address for ipsec lookup
1084 *
1085 *	This function performs a route lookup on the given flow.
1086 *
1087 *	It returns a valid dst pointer on success, or a pointer encoded
1088 *	error code.
1089 */
1090struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
1091				      const struct in6_addr *final_dst)
1092{
1093	struct dst_entry *dst = NULL;
1094	int err;
1095
1096	err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
1097	if (err)
1098		return ERR_PTR(err);
1099	if (final_dst)
1100		fl6->daddr = *final_dst;
1101
1102	return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1103}
1104EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1105
1106/**
1107 *	ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1108 *	@sk: socket which provides the dst cache and route info
1109 *	@fl6: flow to lookup
1110 *	@final_dst: final destination address for ipsec lookup
1111 *	@connected: whether @sk is connected or not
1112 *
1113 *	This function performs a route lookup on the given flow with the
1114 *	possibility of using the cached route in the socket if it is valid.
1115 *	It will take the socket dst lock when operating on the dst cache.
1116 *	As a result, this function can only be used in process context.
1117 *
1118 *	In addition, for a connected socket, cache the dst in the socket
1119 *	if the current cache is not valid.
1120 *
1121 *	It returns a valid dst pointer on success, or a pointer encoded
1122 *	error code.
1123 */
1124struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1125					 const struct in6_addr *final_dst,
1126					 bool connected)
1127{
1128	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
 
1129
1130	dst = ip6_sk_dst_check(sk, dst, fl6);
1131	if (dst)
1132		return dst;
1133
1134	dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
1135	if (connected && !IS_ERR(dst))
1136		ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
 
 
1137
1138	return dst;
1139}
1140EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1142static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1143					       gfp_t gfp)
1144{
1145	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1146}
1147
1148static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1149						gfp_t gfp)
1150{
1151	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1152}
1153
1154static void ip6_append_data_mtu(unsigned int *mtu,
1155				int *maxfraglen,
1156				unsigned int fragheaderlen,
1157				struct sk_buff *skb,
1158				struct rt6_info *rt,
1159				unsigned int orig_mtu)
1160{
1161	if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1162		if (!skb) {
1163			/* first fragment, reserve header_len */
1164			*mtu = orig_mtu - rt->dst.header_len;
1165
1166		} else {
1167			/*
1168			 * this fragment is not first, the headers
1169			 * space is regarded as data space.
1170			 */
1171			*mtu = orig_mtu;
1172		}
1173		*maxfraglen = ((*mtu - fragheaderlen) & ~7)
1174			      + fragheaderlen - sizeof(struct frag_hdr);
1175	}
1176}
1177
1178static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1179			  struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1180			  struct rt6_info *rt, struct flowi6 *fl6)
 
 
1181{
 
1182	struct ipv6_pinfo *np = inet6_sk(sk);
1183	unsigned int mtu;
1184	struct ipv6_txoptions *opt = ipc6->opt;
1185
1186	/*
1187	 * setup for corking
1188	 */
1189	if (opt) {
1190		if (WARN_ON(v6_cork->opt))
1191			return -EINVAL;
1192
1193		v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1194		if (unlikely(!v6_cork->opt))
1195			return -ENOBUFS;
1196
1197		v6_cork->opt->tot_len = sizeof(*opt);
1198		v6_cork->opt->opt_flen = opt->opt_flen;
1199		v6_cork->opt->opt_nflen = opt->opt_nflen;
1200
1201		v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1202						    sk->sk_allocation);
1203		if (opt->dst0opt && !v6_cork->opt->dst0opt)
1204			return -ENOBUFS;
1205
1206		v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1207						    sk->sk_allocation);
1208		if (opt->dst1opt && !v6_cork->opt->dst1opt)
1209			return -ENOBUFS;
1210
1211		v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1212						   sk->sk_allocation);
1213		if (opt->hopopt && !v6_cork->opt->hopopt)
1214			return -ENOBUFS;
1215
1216		v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1217						    sk->sk_allocation);
1218		if (opt->srcrt && !v6_cork->opt->srcrt)
1219			return -ENOBUFS;
1220
1221		/* need source address above miyazawa*/
1222	}
1223	dst_hold(&rt->dst);
1224	cork->base.dst = &rt->dst;
1225	cork->fl.u.ip6 = *fl6;
1226	v6_cork->hop_limit = ipc6->hlimit;
1227	v6_cork->tclass = ipc6->tclass;
1228	if (rt->dst.flags & DST_XFRM_TUNNEL)
1229		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1230		      READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1231	else
1232		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1233			READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1234	if (np->frag_size < mtu) {
1235		if (np->frag_size)
1236			mtu = np->frag_size;
1237	}
1238	if (mtu < IPV6_MIN_MTU)
1239		return -EINVAL;
1240	cork->base.fragsize = mtu;
1241	if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1242		cork->base.flags |= IPCORK_ALLFRAG;
1243	cork->base.length = 0;
1244
1245	return 0;
1246}
1247
1248static int __ip6_append_data(struct sock *sk,
1249			     struct flowi6 *fl6,
1250			     struct sk_buff_head *queue,
1251			     struct inet_cork *cork,
1252			     struct inet6_cork *v6_cork,
1253			     struct page_frag *pfrag,
1254			     int getfrag(void *from, char *to, int offset,
1255					 int len, int odd, struct sk_buff *skb),
1256			     void *from, int length, int transhdrlen,
1257			     unsigned int flags, struct ipcm6_cookie *ipc6,
1258			     const struct sockcm_cookie *sockc)
1259{
1260	struct sk_buff *skb, *skb_prev = NULL;
1261	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1262	int exthdrlen = 0;
1263	int dst_exthdrlen = 0;
1264	int hh_len;
1265	int copy;
1266	int err;
1267	int offset = 0;
1268	__u8 tx_flags = 0;
1269	u32 tskey = 0;
1270	struct rt6_info *rt = (struct rt6_info *)cork->dst;
1271	struct ipv6_txoptions *opt = v6_cork->opt;
1272	int csummode = CHECKSUM_NONE;
1273	unsigned int maxnonfragsize, headersize;
1274	unsigned int wmem_alloc_delta = 0;
1275
1276	skb = skb_peek_tail(queue);
1277	if (!skb) {
1278		exthdrlen = opt ? opt->opt_flen : 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1279		dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
 
 
 
 
 
 
 
 
1280	}
1281
1282	mtu = cork->fragsize;
1283	orig_mtu = mtu;
1284
1285	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1286
1287	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1288			(opt ? opt->opt_nflen : 0);
1289	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1290		     sizeof(struct frag_hdr);
1291
1292	headersize = sizeof(struct ipv6hdr) +
1293		     (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1294		     (dst_allfrag(&rt->dst) ?
1295		      sizeof(struct frag_hdr) : 0) +
1296		     rt->rt6i_nfheader_len;
1297
1298	/* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1299	 * the first fragment
1300	 */
1301	if (headersize + transhdrlen > mtu)
1302		goto emsgsize;
1303
1304	if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1305	    (sk->sk_protocol == IPPROTO_UDP ||
1306	     sk->sk_protocol == IPPROTO_RAW)) {
1307		ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1308				sizeof(struct ipv6hdr));
1309		goto emsgsize;
1310	}
1311
1312	if (ip6_sk_ignore_df(sk))
1313		maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1314	else
1315		maxnonfragsize = mtu;
 
 
 
 
1316
1317	if (cork->length + length > maxnonfragsize - headersize) {
1318emsgsize:
1319		pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1320		ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1321		return -EMSGSIZE;
 
 
1322	}
1323
1324	/* CHECKSUM_PARTIAL only with no extension headers and when
1325	 * we are not going to fragment
1326	 */
1327	if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1328	    headersize == sizeof(struct ipv6hdr) &&
1329	    length <= mtu - headersize &&
1330	    !(flags & MSG_MORE) &&
1331	    rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1332		csummode = CHECKSUM_PARTIAL;
1333
1334	if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
1335		sock_tx_timestamp(sk, sockc->tsflags, &tx_flags);
1336		if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1337		    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1338			tskey = sk->sk_tskey++;
1339	}
1340
1341	/*
1342	 * Let's try using as much space as possible.
1343	 * Use MTU if total length of the message fits into the MTU.
1344	 * Otherwise, we need to reserve fragment header and
1345	 * fragment alignment (= 8-15 octects, in total).
1346	 *
1347	 * Note that we may need to "move" the data from the tail of
1348	 * of the buffer to the new fragment when we split
1349	 * the message.
1350	 *
1351	 * FIXME: It may be fragmented into multiple chunks
1352	 *        at once if non-fragmentable extension headers
1353	 *        are too large.
1354	 * --yoshfuji
1355	 */
1356
 
1357	cork->length += length;
 
 
 
 
 
 
 
 
 
 
 
 
1358	if (!skb)
1359		goto alloc_new_skb;
1360
1361	while (length > 0) {
1362		/* Check if the remaining data fits into current packet. */
1363		copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1364		if (copy < length)
1365			copy = maxfraglen - skb->len;
1366
1367		if (copy <= 0) {
1368			char *data;
1369			unsigned int datalen;
1370			unsigned int fraglen;
1371			unsigned int fraggap;
1372			unsigned int alloclen;
1373alloc_new_skb:
1374			/* There's no room in the current skb */
1375			if (skb)
1376				fraggap = skb->len - maxfraglen;
1377			else
1378				fraggap = 0;
1379			/* update mtu and maxfraglen if necessary */
1380			if (!skb || !skb_prev)
1381				ip6_append_data_mtu(&mtu, &maxfraglen,
1382						    fragheaderlen, skb, rt,
1383						    orig_mtu);
1384
1385			skb_prev = skb;
1386
1387			/*
1388			 * If remaining data exceeds the mtu,
1389			 * we know we need more fragment(s).
1390			 */
1391			datalen = length + fraggap;
1392
1393			if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1394				datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1395			if ((flags & MSG_MORE) &&
1396			    !(rt->dst.dev->features&NETIF_F_SG))
1397				alloclen = mtu;
1398			else
1399				alloclen = datalen + fragheaderlen;
1400
1401			alloclen += dst_exthdrlen;
1402
1403			if (datalen != length + fraggap) {
1404				/*
1405				 * this is not the last fragment, the trailer
1406				 * space is regarded as data space.
1407				 */
1408				datalen += rt->dst.trailer_len;
1409			}
1410
1411			alloclen += rt->dst.trailer_len;
1412			fraglen = datalen + fragheaderlen;
1413
1414			/*
1415			 * We just reserve space for fragment header.
1416			 * Note: this may be overallocation if the message
1417			 * (without MSG_MORE) fits into the MTU.
1418			 */
1419			alloclen += sizeof(struct frag_hdr);
1420
1421			copy = datalen - transhdrlen - fraggap;
1422			if (copy < 0) {
1423				err = -EINVAL;
1424				goto error;
1425			}
1426			if (transhdrlen) {
1427				skb = sock_alloc_send_skb(sk,
1428						alloclen + hh_len,
1429						(flags & MSG_DONTWAIT), &err);
1430			} else {
1431				skb = NULL;
1432				if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1433				    2 * sk->sk_sndbuf)
1434					skb = alloc_skb(alloclen + hh_len,
1435							sk->sk_allocation);
1436				if (unlikely(!skb))
 
1437					err = -ENOBUFS;
 
 
 
 
 
 
1438			}
1439			if (!skb)
1440				goto error;
1441			/*
1442			 *	Fill in the control structures
1443			 */
1444			skb->protocol = htons(ETH_P_IPV6);
1445			skb->ip_summed = csummode;
1446			skb->csum = 0;
1447			/* reserve for fragmentation and ipsec header */
1448			skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1449				    dst_exthdrlen);
1450
1451			/* Only the initial fragment is time stamped */
1452			skb_shinfo(skb)->tx_flags = tx_flags;
1453			tx_flags = 0;
1454			skb_shinfo(skb)->tskey = tskey;
1455			tskey = 0;
1456
1457			/*
1458			 *	Find where to start putting bytes
1459			 */
1460			data = skb_put(skb, fraglen);
1461			skb_set_network_header(skb, exthdrlen);
1462			data += fragheaderlen;
1463			skb->transport_header = (skb->network_header +
1464						 fragheaderlen);
1465			if (fraggap) {
1466				skb->csum = skb_copy_and_csum_bits(
1467					skb_prev, maxfraglen,
1468					data + transhdrlen, fraggap, 0);
1469				skb_prev->csum = csum_sub(skb_prev->csum,
1470							  skb->csum);
1471				data += fraggap;
1472				pskb_trim_unique(skb_prev, maxfraglen);
1473			}
1474			if (copy > 0 &&
1475			    getfrag(from, data + transhdrlen, offset,
1476				    copy, fraggap, skb) < 0) {
 
 
 
 
1477				err = -EFAULT;
1478				kfree_skb(skb);
1479				goto error;
1480			}
1481
1482			offset += copy;
1483			length -= datalen - fraggap;
1484			transhdrlen = 0;
1485			exthdrlen = 0;
1486			dst_exthdrlen = 0;
1487
1488			if ((flags & MSG_CONFIRM) && !skb_prev)
1489				skb_set_dst_pending_confirm(skb, 1);
1490
1491			/*
1492			 * Put the packet on the pending queue
1493			 */
1494			if (!skb->destructor) {
1495				skb->destructor = sock_wfree;
1496				skb->sk = sk;
1497				wmem_alloc_delta += skb->truesize;
1498			}
1499			__skb_queue_tail(queue, skb);
1500			continue;
1501		}
1502
1503		if (copy > length)
1504			copy = length;
1505
1506		if (!(rt->dst.dev->features&NETIF_F_SG) &&
1507		    skb_tailroom(skb) >= copy) {
1508			unsigned int off;
1509
1510			off = skb->len;
1511			if (getfrag(from, skb_put(skb, copy),
1512						offset, copy, off, skb) < 0) {
1513				__skb_trim(skb, off);
1514				err = -EFAULT;
1515				goto error;
1516			}
1517		} else {
1518			int i = skb_shinfo(skb)->nr_frags;
 
1519
1520			err = -ENOMEM;
1521			if (!sk_page_frag_refill(sk, pfrag))
1522				goto error;
1523
1524			if (!skb_can_coalesce(skb, i, pfrag->page,
1525					      pfrag->offset)) {
1526				err = -EMSGSIZE;
1527				if (i == MAX_SKB_FRAGS)
1528					goto error;
1529
1530				__skb_fill_page_desc(skb, i, pfrag->page,
1531						     pfrag->offset, 0);
1532				skb_shinfo(skb)->nr_frags = ++i;
1533				get_page(pfrag->page);
1534			}
1535			copy = min_t(int, copy, pfrag->size - pfrag->offset);
1536			if (getfrag(from,
1537				    page_address(pfrag->page) + pfrag->offset,
1538				    offset, copy, skb->len, skb) < 0)
1539				goto error_efault;
1540
1541			pfrag->offset += copy;
1542			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1543			skb->len += copy;
1544			skb->data_len += copy;
1545			skb->truesize += copy;
1546			wmem_alloc_delta += copy;
1547		}
1548		offset += copy;
1549		length -= copy;
1550	}
1551
1552	if (wmem_alloc_delta)
1553		refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1554	return 0;
1555
1556error_efault:
1557	err = -EFAULT;
1558error:
1559	cork->length -= length;
1560	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1561	refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1562	return err;
1563}
 
1564
1565int ip6_append_data(struct sock *sk,
1566		    int getfrag(void *from, char *to, int offset, int len,
1567				int odd, struct sk_buff *skb),
1568		    void *from, int length, int transhdrlen,
1569		    struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1570		    struct rt6_info *rt, unsigned int flags,
1571		    const struct sockcm_cookie *sockc)
1572{
1573	struct inet_sock *inet = inet_sk(sk);
1574	struct ipv6_pinfo *np = inet6_sk(sk);
1575	int exthdrlen;
1576	int err;
1577
1578	if (flags&MSG_PROBE)
1579		return 0;
1580	if (skb_queue_empty(&sk->sk_write_queue)) {
1581		/*
1582		 * setup for corking
1583		 */
1584		err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1585				     ipc6, rt, fl6);
1586		if (err)
1587			return err;
1588
1589		exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1590		length += exthdrlen;
1591		transhdrlen += exthdrlen;
1592	} else {
1593		fl6 = &inet->cork.fl.u.ip6;
1594		transhdrlen = 0;
1595	}
1596
1597	return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1598				 &np->cork, sk_page_frag(sk), getfrag,
1599				 from, length, transhdrlen, flags, ipc6, sockc);
1600}
1601EXPORT_SYMBOL_GPL(ip6_append_data);
1602
1603static void ip6_cork_release(struct inet_cork_full *cork,
1604			     struct inet6_cork *v6_cork)
1605{
1606	if (v6_cork->opt) {
1607		kfree(v6_cork->opt->dst0opt);
1608		kfree(v6_cork->opt->dst1opt);
1609		kfree(v6_cork->opt->hopopt);
1610		kfree(v6_cork->opt->srcrt);
1611		kfree(v6_cork->opt);
1612		v6_cork->opt = NULL;
1613	}
1614
1615	if (cork->base.dst) {
1616		dst_release(cork->base.dst);
1617		cork->base.dst = NULL;
1618		cork->base.flags &= ~IPCORK_ALLFRAG;
1619	}
1620	memset(&cork->fl, 0, sizeof(cork->fl));
1621}
1622
1623struct sk_buff *__ip6_make_skb(struct sock *sk,
1624			       struct sk_buff_head *queue,
1625			       struct inet_cork_full *cork,
1626			       struct inet6_cork *v6_cork)
1627{
1628	struct sk_buff *skb, *tmp_skb;
1629	struct sk_buff **tail_skb;
1630	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
 
1631	struct ipv6_pinfo *np = inet6_sk(sk);
1632	struct net *net = sock_net(sk);
1633	struct ipv6hdr *hdr;
1634	struct ipv6_txoptions *opt = v6_cork->opt;
1635	struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1636	struct flowi6 *fl6 = &cork->fl.u.ip6;
1637	unsigned char proto = fl6->flowi6_proto;
 
1638
1639	skb = __skb_dequeue(queue);
1640	if (!skb)
1641		goto out;
1642	tail_skb = &(skb_shinfo(skb)->frag_list);
1643
1644	/* move skb->data to ip header from ext header */
1645	if (skb->data < skb_network_header(skb))
1646		__skb_pull(skb, skb_network_offset(skb));
1647	while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1648		__skb_pull(tmp_skb, skb_network_header_len(skb));
1649		*tail_skb = tmp_skb;
1650		tail_skb = &(tmp_skb->next);
1651		skb->len += tmp_skb->len;
1652		skb->data_len += tmp_skb->len;
1653		skb->truesize += tmp_skb->truesize;
1654		tmp_skb->destructor = NULL;
1655		tmp_skb->sk = NULL;
1656	}
1657
1658	/* Allow local fragmentation. */
1659	skb->ignore_df = ip6_sk_ignore_df(sk);
1660
1661	*final_dst = fl6->daddr;
1662	__skb_pull(skb, skb_network_header_len(skb));
1663	if (opt && opt->opt_flen)
1664		ipv6_push_frag_opts(skb, opt, &proto);
1665	if (opt && opt->opt_nflen)
1666		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1667
1668	skb_push(skb, sizeof(struct ipv6hdr));
1669	skb_reset_network_header(skb);
1670	hdr = ipv6_hdr(skb);
1671
1672	ip6_flow_hdr(hdr, v6_cork->tclass,
1673		     ip6_make_flowlabel(net, skb, fl6->flowlabel,
1674					ip6_autoflowlabel(net, np), fl6));
1675	hdr->hop_limit = v6_cork->hop_limit;
1676	hdr->nexthdr = proto;
1677	hdr->saddr = fl6->saddr;
1678	hdr->daddr = *final_dst;
1679
1680	skb->priority = sk->sk_priority;
1681	skb->mark = sk->sk_mark;
1682
1683	skb_dst_set(skb, dst_clone(&rt->dst));
1684	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1685	if (proto == IPPROTO_ICMPV6) {
1686		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1687
1688		ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1689		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1690	}
1691
1692	ip6_cork_release(cork, v6_cork);
1693out:
1694	return skb;
1695}
1696
1697int ip6_send_skb(struct sk_buff *skb)
1698{
1699	struct net *net = sock_net(skb->sk);
1700	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1701	int err;
1702
1703	err = ip6_local_out(net, skb->sk, skb);
1704	if (err) {
1705		if (err > 0)
1706			err = net_xmit_errno(err);
1707		if (err)
1708			IP6_INC_STATS(net, rt->rt6i_idev,
1709				      IPSTATS_MIB_OUTDISCARDS);
1710	}
1711
 
 
1712	return err;
1713}
1714
1715int ip6_push_pending_frames(struct sock *sk)
1716{
1717	struct sk_buff *skb;
1718
1719	skb = ip6_finish_skb(sk);
1720	if (!skb)
1721		return 0;
1722
1723	return ip6_send_skb(skb);
1724}
1725EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1726
1727static void __ip6_flush_pending_frames(struct sock *sk,
1728				       struct sk_buff_head *queue,
1729				       struct inet_cork_full *cork,
1730				       struct inet6_cork *v6_cork)
1731{
1732	struct sk_buff *skb;
1733
1734	while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1735		if (skb_dst(skb))
1736			IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1737				      IPSTATS_MIB_OUTDISCARDS);
1738		kfree_skb(skb);
1739	}
1740
1741	ip6_cork_release(cork, v6_cork);
1742}
1743
1744void ip6_flush_pending_frames(struct sock *sk)
1745{
1746	__ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1747				   &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1748}
1749EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1750
1751struct sk_buff *ip6_make_skb(struct sock *sk,
1752			     int getfrag(void *from, char *to, int offset,
1753					 int len, int odd, struct sk_buff *skb),
1754			     void *from, int length, int transhdrlen,
1755			     struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1756			     struct rt6_info *rt, unsigned int flags,
1757			     const struct sockcm_cookie *sockc)
1758{
1759	struct inet_cork_full cork;
1760	struct inet6_cork v6_cork;
1761	struct sk_buff_head queue;
1762	int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1763	int err;
1764
1765	if (flags & MSG_PROBE)
1766		return NULL;
1767
1768	__skb_queue_head_init(&queue);
1769
1770	cork.base.flags = 0;
1771	cork.base.addr = 0;
1772	cork.base.opt = NULL;
1773	cork.base.dst = NULL;
1774	v6_cork.opt = NULL;
1775	err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
1776	if (err) {
1777		ip6_cork_release(&cork, &v6_cork);
1778		return ERR_PTR(err);
1779	}
1780	if (ipc6->dontfrag < 0)
1781		ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1782
1783	err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1784				&current->task_frag, getfrag, from,
1785				length + exthdrlen, transhdrlen + exthdrlen,
1786				flags, ipc6, sockc);
1787	if (err) {
1788		__ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1789		return ERR_PTR(err);
1790	}
1791
1792	return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
1793}

   1/*
   2 *	IPv6 output functions
   3 *	Linux INET6 implementation
   4 *
   5 *	Authors:
   6 *	Pedro Roque		<roque@di.fc.ul.pt>
   7 *
   8 *	Based on linux/net/ipv4/ip_output.c
   9 *
  10 *	This program is free software; you can redistribute it and/or
  11 *      modify it under the terms of the GNU General Public License
  12 *      as published by the Free Software Foundation; either version
  13 *      2 of the License, or (at your option) any later version.
  14 *
  15 *	Changes:
  16 *	A.N.Kuznetsov	:	airthmetics in fragmentation.
  17 *				extension headers are implemented.
  18 *				route changes now work.
  19 *				ip6_forward does not confuse sniffers.
  20 *				etc.
  21 *
  22 *      H. von Brand    :       Added missing #include <linux/string.h>
  23 *	Imran Patel	: 	frag id should be in NBO
  24 *      Kazunori MIYAZAWA @USAGI
  25 *			:       add ip6_append_data and related functions
  26 *				for datagram xmit
  27 */
  28
  29#include <linux/errno.h>
  30#include <linux/kernel.h>
  31#include <linux/string.h>
  32#include <linux/socket.h>
  33#include <linux/net.h>
  34#include <linux/netdevice.h>
  35#include <linux/if_arp.h>
  36#include <linux/in6.h>
  37#include <linux/tcp.h>
  38#include <linux/route.h>
  39#include <linux/module.h>
  40#include <linux/slab.h>
  41
 
  42#include <linux/netfilter.h>
  43#include <linux/netfilter_ipv6.h>
  44
  45#include <net/sock.h>
  46#include <net/snmp.h>
  47
  48#include <net/ipv6.h>
  49#include <net/ndisc.h>
  50#include <net/protocol.h>
  51#include <net/ip6_route.h>
  52#include <net/addrconf.h>
  53#include <net/rawv6.h>
  54#include <net/icmp.h>
  55#include <net/xfrm.h>
  56#include <net/checksum.h>
  57#include <linux/mroute6.h>
 
 
  58
  59static int ip6_finish_output2(struct sk_buff *skb)
  60{
  61	struct dst_entry *dst = skb_dst(skb);
  62	struct net_device *dev = dst->dev;
  63	struct neighbour *neigh;
  64	struct in6_addr *nexthop;
  65	int ret;
  66
  67	skb->protocol = htons(ETH_P_IPV6);
  68	skb->dev = dev;
  69
  70	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
  71		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
  72
  73		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
  74		    ((mroute6_socket(dev_net(dev), skb) &&
  75		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
  76		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
  77					 &ipv6_hdr(skb)->saddr))) {
  78			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
  79
  80			/* Do not check for IFF_ALLMULTI; multicast routing
  81			   is not supported in any case.
  82			 */
  83			if (newskb)
  84				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
  85					newskb, NULL, newskb->dev,
  86					dev_loopback_xmit);
  87
  88			if (ipv6_hdr(skb)->hop_limit == 0) {
  89				IP6_INC_STATS(dev_net(dev), idev,
  90					      IPSTATS_MIB_OUTDISCARDS);
  91				kfree_skb(skb);
  92				return 0;
  93			}
  94		}
  95
  96		IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
  97				skb->len);
  98
  99		if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
 100		    IPV6_ADDR_SCOPE_NODELOCAL &&
 101		    !(dev->flags & IFF_LOOPBACK)) {
 102			kfree_skb(skb);
 103			return 0;
 104		}
 105	}
 106
 
 
 
 
 
 
 
 107	rcu_read_lock_bh();
 108	nexthop = rt6_nexthop((struct rt6_info *)dst);
 109	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
 110	if (unlikely(!neigh))
 111		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
 112	if (!IS_ERR(neigh)) {
 113		ret = dst_neigh_output(dst, neigh, skb);
 
 114		rcu_read_unlock_bh();
 115		return ret;
 116	}
 117	rcu_read_unlock_bh();
 118
 119	IP6_INC_STATS(dev_net(dst->dev),
 120		      ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 121	kfree_skb(skb);
 122	return -EINVAL;
 123}
 124
 125static int ip6_finish_output(struct sk_buff *skb)
 126{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 127	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 128	    dst_allfrag(skb_dst(skb)) ||
 129	    (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
 130		return ip6_fragment(skb, ip6_finish_output2);
 131	else
 132		return ip6_finish_output2(skb);
 133}
 134
 135int ip6_output(struct sock *sk, struct sk_buff *skb)
 136{
 137	struct net_device *dev = skb_dst(skb)->dev;
 138	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 
 
 
 
 139	if (unlikely(idev->cnf.disable_ipv6)) {
 140		IP6_INC_STATS(dev_net(dev), idev,
 141			      IPSTATS_MIB_OUTDISCARDS);
 142		kfree_skb(skb);
 143		return 0;
 144	}
 145
 146	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
 
 147			    ip6_finish_output,
 148			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 149}
 150
 
 
 
 
 
 
 
 
 151/*
 152 *	xmit an sk_buff (used by TCP, SCTP and DCCP)
 
 
 
 153 */
 154
 155int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 156	     struct ipv6_txoptions *opt, int tclass)
 157{
 158	struct net *net = sock_net(sk);
 159	struct ipv6_pinfo *np = inet6_sk(sk);
 160	struct in6_addr *first_hop = &fl6->daddr;
 161	struct dst_entry *dst = skb_dst(skb);
 162	struct ipv6hdr *hdr;
 163	u8  proto = fl6->flowi6_proto;
 164	int seg_len = skb->len;
 165	int hlimit = -1;
 166	u32 mtu;
 167
 168	if (opt) {
 169		unsigned int head_room;
 170
 171		/* First: exthdrs may take lots of space (~8K for now)
 172		   MAX_HEADER is not enough.
 173		 */
 174		head_room = opt->opt_nflen + opt->opt_flen;
 175		seg_len += head_room;
 176		head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
 177
 178		if (skb_headroom(skb) < head_room) {
 179			struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 180			if (skb2 == NULL) {
 181				IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 182					      IPSTATS_MIB_OUTDISCARDS);
 183				kfree_skb(skb);
 184				return -ENOBUFS;
 185			}
 186			consume_skb(skb);
 187			skb = skb2;
 188			skb_set_owner_w(skb, sk);
 
 
 
 189		}
 190		if (opt->opt_flen)
 191			ipv6_push_frag_opts(skb, opt, &proto);
 192		if (opt->opt_nflen)
 193			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
 
 194	}
 195
 196	skb_push(skb, sizeof(struct ipv6hdr));
 197	skb_reset_network_header(skb);
 198	hdr = ipv6_hdr(skb);
 199
 200	/*
 201	 *	Fill in the IPv6 header
 202	 */
 203	if (np)
 204		hlimit = np->hop_limit;
 205	if (hlimit < 0)
 206		hlimit = ip6_dst_hoplimit(dst);
 207
 208	ip6_flow_hdr(hdr, tclass, fl6->flowlabel);
 
 209
 210	hdr->payload_len = htons(seg_len);
 211	hdr->nexthdr = proto;
 212	hdr->hop_limit = hlimit;
 213
 214	hdr->saddr = fl6->saddr;
 215	hdr->daddr = *first_hop;
 216
 217	skb->protocol = htons(ETH_P_IPV6);
 218	skb->priority = sk->sk_priority;
 219	skb->mark = sk->sk_mark;
 220
 221	mtu = dst_mtu(dst);
 222	if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
 223		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
 224			      IPSTATS_MIB_OUT, skb->len);
 225		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
 226			       dst->dev, dst_output);
 
 
 
 
 
 
 
 
 
 
 
 
 227	}
 228
 229	skb->dev = dst->dev;
 230	ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
 
 
 
 
 231	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
 232	kfree_skb(skb);
 233	return -EMSGSIZE;
 234}
 235
 236EXPORT_SYMBOL(ip6_xmit);
 237
 238static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 239{
 240	struct ip6_ra_chain *ra;
 241	struct sock *last = NULL;
 242
 243	read_lock(&ip6_ra_lock);
 244	for (ra = ip6_ra_chain; ra; ra = ra->next) {
 245		struct sock *sk = ra->sk;
 246		if (sk && ra->sel == sel &&
 247		    (!sk->sk_bound_dev_if ||
 248		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
 249			if (last) {
 250				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 251				if (skb2)
 252					rawv6_rcv(last, skb2);
 253			}
 254			last = sk;
 255		}
 256	}
 257
 258	if (last) {
 259		rawv6_rcv(last, skb);
 260		read_unlock(&ip6_ra_lock);
 261		return 1;
 262	}
 263	read_unlock(&ip6_ra_lock);
 264	return 0;
 265}
 266
 267static int ip6_forward_proxy_check(struct sk_buff *skb)
 268{
 269	struct ipv6hdr *hdr = ipv6_hdr(skb);
 270	u8 nexthdr = hdr->nexthdr;
 271	__be16 frag_off;
 272	int offset;
 273
 274	if (ipv6_ext_hdr(nexthdr)) {
 275		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
 276		if (offset < 0)
 277			return 0;
 278	} else
 279		offset = sizeof(struct ipv6hdr);
 280
 281	if (nexthdr == IPPROTO_ICMPV6) {
 282		struct icmp6hdr *icmp6;
 283
 284		if (!pskb_may_pull(skb, (skb_network_header(skb) +
 285					 offset + 1 - skb->data)))
 286			return 0;
 287
 288		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 289
 290		switch (icmp6->icmp6_type) {
 291		case NDISC_ROUTER_SOLICITATION:
 292		case NDISC_ROUTER_ADVERTISEMENT:
 293		case NDISC_NEIGHBOUR_SOLICITATION:
 294		case NDISC_NEIGHBOUR_ADVERTISEMENT:
 295		case NDISC_REDIRECT:
 296			/* For reaction involving unicast neighbor discovery
 297			 * message destined to the proxied address, pass it to
 298			 * input function.
 299			 */
 300			return 1;
 301		default:
 302			break;
 303		}
 304	}
 305
 306	/*
 307	 * The proxying router can't forward traffic sent to a link-local
 308	 * address, so signal the sender and discard the packet. This
 309	 * behavior is clarified by the MIPv6 specification.
 310	 */
 311	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
 312		dst_link_failure(skb);
 313		return -1;
 314	}
 315
 316	return 0;
 317}
 318
 319static inline int ip6_forward_finish(struct sk_buff *skb)
 
 320{
 321	return dst_output(skb);
 
 
 
 
 
 322}
 323
 324static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
 325{
 326	unsigned int mtu;
 327	struct inet6_dev *idev;
 328
 329	if (dst_metric_locked(dst, RTAX_MTU)) {
 330		mtu = dst_metric_raw(dst, RTAX_MTU);
 331		if (mtu)
 332			return mtu;
 333	}
 334
 335	mtu = IPV6_MIN_MTU;
 336	rcu_read_lock();
 337	idev = __in6_dev_get(dst->dev);
 338	if (idev)
 339		mtu = idev->cnf.mtu6;
 340	rcu_read_unlock();
 341
 342	return mtu;
 343}
 
 344
 345static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
 346{
 347	if (skb->len <= mtu)
 348		return false;
 349
 350	/* ipv6 conntrack defrag sets max_frag_size + local_df */
 351	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
 352		return true;
 353
 354	if (skb->local_df)
 355		return false;
 356
 357	if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
 358		return false;
 359
 360	return true;
 361}
 362
 363int ip6_forward(struct sk_buff *skb)
 364{
 365	struct dst_entry *dst = skb_dst(skb);
 366	struct ipv6hdr *hdr = ipv6_hdr(skb);
 367	struct inet6_skb_parm *opt = IP6CB(skb);
 368	struct net *net = dev_net(dst->dev);
 369	u32 mtu;
 370
 371	if (net->ipv6.devconf_all->forwarding == 0)
 372		goto error;
 373
 374	if (skb->pkt_type != PACKET_HOST)
 375		goto drop;
 376
 
 
 
 377	if (skb_warn_if_lro(skb))
 378		goto drop;
 379
 380	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
 381		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
 382				 IPSTATS_MIB_INDISCARDS);
 383		goto drop;
 384	}
 385
 386	skb_forward_csum(skb);
 387
 388	/*
 389	 *	We DO NOT make any processing on
 390	 *	RA packets, pushing them to user level AS IS
 391	 *	without ane WARRANTY that application will be able
 392	 *	to interpret them. The reason is that we
 393	 *	cannot make anything clever here.
 394	 *
 395	 *	We are not end-node, so that if packet contains
 396	 *	AH/ESP, we cannot make anything.
 397	 *	Defragmentation also would be mistake, RA packets
 398	 *	cannot be fragmented, because there is no warranty
 399	 *	that different fragments will go along one path. --ANK
 400	 */
 401	if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
 402		if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
 403			return 0;
 404	}
 405
 406	/*
 407	 *	check and decrement ttl
 408	 */
 409	if (hdr->hop_limit <= 1) {
 410		/* Force OUTPUT device used as source address */
 411		skb->dev = dst->dev;
 412		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
 413		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
 414				 IPSTATS_MIB_INHDRERRORS);
 415
 416		kfree_skb(skb);
 417		return -ETIMEDOUT;
 418	}
 419
 420	/* XXX: idev->cnf.proxy_ndp? */
 421	if (net->ipv6.devconf_all->proxy_ndp &&
 422	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
 423		int proxied = ip6_forward_proxy_check(skb);
 424		if (proxied > 0)
 425			return ip6_input(skb);
 426		else if (proxied < 0) {
 427			IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
 428					 IPSTATS_MIB_INDISCARDS);
 429			goto drop;
 430		}
 431	}
 432
 433	if (!xfrm6_route_forward(skb)) {
 434		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
 435				 IPSTATS_MIB_INDISCARDS);
 436		goto drop;
 437	}
 438	dst = skb_dst(skb);
 439
 440	/* IPv6 specs say nothing about it, but it is clear that we cannot
 441	   send redirects to source routed frames.
 442	   We don't send redirects to frames decapsulated from IPsec.
 443	 */
 444	if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
 445		struct in6_addr *target = NULL;
 446		struct inet_peer *peer;
 447		struct rt6_info *rt;
 448
 449		/*
 450		 *	incoming and outgoing devices are the same
 451		 *	send a redirect.
 452		 */
 453
 454		rt = (struct rt6_info *) dst;
 455		if (rt->rt6i_flags & RTF_GATEWAY)
 456			target = &rt->rt6i_gateway;
 457		else
 458			target = &hdr->daddr;
 459
 460		peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
 461
 462		/* Limit redirects both by destination (here)
 463		   and by source (inside ndisc_send_redirect)
 464		 */
 465		if (inet_peer_xrlim_allow(peer, 1*HZ))
 466			ndisc_send_redirect(skb, target);
 467		if (peer)
 468			inet_putpeer(peer);
 469	} else {
 470		int addrtype = ipv6_addr_type(&hdr->saddr);
 471
 472		/* This check is security critical. */
 473		if (addrtype == IPV6_ADDR_ANY ||
 474		    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
 475			goto error;
 476		if (addrtype & IPV6_ADDR_LINKLOCAL) {
 477			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
 478				    ICMPV6_NOT_NEIGHBOUR, 0);
 479			goto error;
 480		}
 481	}
 482
 483	mtu = ip6_dst_mtu_forward(dst);
 484	if (mtu < IPV6_MIN_MTU)
 485		mtu = IPV6_MIN_MTU;
 486
 487	if (ip6_pkt_too_big(skb, mtu)) {
 488		/* Again, force OUTPUT device used as source address */
 489		skb->dev = dst->dev;
 490		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 491		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
 492				 IPSTATS_MIB_INTOOBIGERRORS);
 493		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
 494				 IPSTATS_MIB_FRAGFAILS);
 495		kfree_skb(skb);
 496		return -EMSGSIZE;
 497	}
 498
 499	if (skb_cow(skb, dst->dev->hard_header_len)) {
 500		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
 501				 IPSTATS_MIB_OUTDISCARDS);
 502		goto drop;
 503	}
 504
 505	hdr = ipv6_hdr(skb);
 506
 507	/* Mangling hops number delayed to point after skb COW */
 508
 509	hdr->hop_limit--;
 510
 511	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 512	IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
 513	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
 514		       ip6_forward_finish);
 515
 516error:
 517	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
 518drop:
 519	kfree_skb(skb);
 520	return -EINVAL;
 521}
 522
 523static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 524{
 525	to->pkt_type = from->pkt_type;
 526	to->priority = from->priority;
 527	to->protocol = from->protocol;
 528	skb_dst_drop(to);
 529	skb_dst_set(to, dst_clone(skb_dst(from)));
 530	to->dev = from->dev;
 531	to->mark = from->mark;
 532
 533#ifdef CONFIG_NET_SCHED
 534	to->tc_index = from->tc_index;
 535#endif
 536	nf_copy(to, from);
 537	skb_copy_secmark(to, from);
 538}
 539
 540int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 
 541{
 542	struct sk_buff *frag;
 543	struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
 544	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
 
 545	struct ipv6hdr *tmp_hdr;
 546	struct frag_hdr *fh;
 547	unsigned int mtu, hlen, left, len;
 548	int hroom, troom;
 549	__be32 frag_id = 0;
 550	int ptr, offset = 0, err=0;
 551	u8 *prevhdr, nexthdr = 0;
 552	struct net *net = dev_net(skb_dst(skb)->dev);
 553
 554	hlen = ip6_find_1stfragopt(skb, &prevhdr);
 
 
 
 555	nexthdr = *prevhdr;
 556
 557	mtu = ip6_skb_dst_mtu(skb);
 558
 559	/* We must not fragment if the socket is set to force MTU discovery
 560	 * or if the skb it not generated by a local socket.
 561	 */
 562	if (unlikely(!skb->local_df && skb->len > mtu) ||
 563		     (IP6CB(skb)->frag_max_size &&
 564		      IP6CB(skb)->frag_max_size > mtu)) {
 565		if (skb->sk && dst_allfrag(skb_dst(skb)))
 566			sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
 567
 568		skb->dev = skb_dst(skb)->dev;
 569		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 570		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 571			      IPSTATS_MIB_FRAGFAILS);
 572		kfree_skb(skb);
 573		return -EMSGSIZE;
 
 
 574	}
 575
 576	if (np && np->frag_size < mtu) {
 577		if (np->frag_size)
 578			mtu = np->frag_size;
 579	}
 
 
 580	mtu -= hlen + sizeof(struct frag_hdr);
 581
 
 
 
 
 
 
 
 
 582	if (skb_has_frag_list(skb)) {
 583		int first_len = skb_pagelen(skb);
 584		struct sk_buff *frag2;
 585
 586		if (first_len - hlen > mtu ||
 587		    ((first_len - hlen) & 7) ||
 588		    skb_cloned(skb))
 
 589			goto slow_path;
 590
 591		skb_walk_frags(skb, frag) {
 592			/* Correct geometry. */
 593			if (frag->len > mtu ||
 594			    ((frag->len & 7) && frag->next) ||
 595			    skb_headroom(frag) < hlen)
 596				goto slow_path_clean;
 597
 598			/* Partially cloned skb? */
 599			if (skb_shared(frag))
 600				goto slow_path_clean;
 601
 602			BUG_ON(frag->sk);
 603			if (skb->sk) {
 604				frag->sk = skb->sk;
 605				frag->destructor = sock_wfree;
 606			}
 607			skb->truesize -= frag->truesize;
 608		}
 609
 610		err = 0;
 611		offset = 0;
 612		frag = skb_shinfo(skb)->frag_list;
 613		skb_frag_list_init(skb);
 614		/* BUILD HEADER */
 615
 616		*prevhdr = NEXTHDR_FRAGMENT;
 617		tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 618		if (!tmp_hdr) {
 619			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 620				      IPSTATS_MIB_FRAGFAILS);
 621			return -ENOMEM;
 622		}
 
 
 623
 624		__skb_pull(skb, hlen);
 625		fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
 626		__skb_push(skb, hlen);
 627		skb_reset_network_header(skb);
 628		memcpy(skb_network_header(skb), tmp_hdr, hlen);
 629
 630		ipv6_select_ident(fh, rt);
 631		fh->nexthdr = nexthdr;
 632		fh->reserved = 0;
 633		fh->frag_off = htons(IP6_MF);
 634		frag_id = fh->identification;
 635
 636		first_len = skb_pagelen(skb);
 637		skb->data_len = first_len - skb_headlen(skb);
 638		skb->len = first_len;
 639		ipv6_hdr(skb)->payload_len = htons(first_len -
 640						   sizeof(struct ipv6hdr));
 641
 642		dst_hold(&rt->dst);
 643
 644		for (;;) {
 645			/* Prepare header of the next frame,
 646			 * before previous one went down. */
 647			if (frag) {
 648				frag->ip_summed = CHECKSUM_NONE;
 649				skb_reset_transport_header(frag);
 650				fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
 651				__skb_push(frag, hlen);
 652				skb_reset_network_header(frag);
 653				memcpy(skb_network_header(frag), tmp_hdr,
 654				       hlen);
 655				offset += skb->len - hlen - sizeof(struct frag_hdr);
 656				fh->nexthdr = nexthdr;
 657				fh->reserved = 0;
 658				fh->frag_off = htons(offset);
 659				if (frag->next != NULL)
 660					fh->frag_off |= htons(IP6_MF);
 661				fh->identification = frag_id;
 662				ipv6_hdr(frag)->payload_len =
 663						htons(frag->len -
 664						      sizeof(struct ipv6hdr));
 665				ip6_copy_metadata(frag, skb);
 666			}
 667
 668			err = output(skb);
 669			if(!err)
 670				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 671					      IPSTATS_MIB_FRAGCREATES);
 672
 673			if (err || !frag)
 674				break;
 675
 676			skb = frag;
 677			frag = skb->next;
 678			skb->next = NULL;
 679		}
 680
 681		kfree(tmp_hdr);
 682
 683		if (err == 0) {
 684			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 685				      IPSTATS_MIB_FRAGOKS);
 686			ip6_rt_put(rt);
 687			return 0;
 688		}
 689
 690		while (frag) {
 691			skb = frag->next;
 692			kfree_skb(frag);
 693			frag = skb;
 694		}
 695
 696		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 697			      IPSTATS_MIB_FRAGFAILS);
 698		ip6_rt_put(rt);
 699		return err;
 700
 701slow_path_clean:
 702		skb_walk_frags(skb, frag2) {
 703			if (frag2 == frag)
 704				break;
 705			frag2->sk = NULL;
 706			frag2->destructor = NULL;
 707			skb->truesize += frag2->truesize;
 708		}
 709	}
 710
 711slow_path:
 712	if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
 713	    skb_checksum_help(skb))
 714		goto fail;
 715
 716	left = skb->len - hlen;		/* Space per frame */
 717	ptr = hlen;			/* Where to start from */
 718
 719	/*
 720	 *	Fragment the datagram.
 721	 */
 722
 723	*prevhdr = NEXTHDR_FRAGMENT;
 724	hroom = LL_RESERVED_SPACE(rt->dst.dev);
 725	troom = rt->dst.dev->needed_tailroom;
 726
 727	/*
 728	 *	Keep copying data until we run out.
 729	 */
 730	while(left > 0)	{
 
 
 731		len = left;
 732		/* IF: it doesn't fit, use 'mtu' - the data space left */
 733		if (len > mtu)
 734			len = mtu;
 735		/* IF: we are not sending up to and including the packet end
 736		   then align the next start on an eight byte boundary */
 737		if (len < left)	{
 738			len &= ~7;
 739		}
 740		/*
 741		 *	Allocate buffer.
 742		 */
 743
 744		if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
 745				      hroom + troom, GFP_ATOMIC)) == NULL) {
 746			NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
 747			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 748				      IPSTATS_MIB_FRAGFAILS);
 749			err = -ENOMEM;
 750			goto fail;
 751		}
 752
 753		/*
 754		 *	Set up data on packet
 755		 */
 756
 757		ip6_copy_metadata(frag, skb);
 758		skb_reserve(frag, hroom);
 759		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
 760		skb_reset_network_header(frag);
 761		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
 762		frag->transport_header = (frag->network_header + hlen +
 763					  sizeof(struct frag_hdr));
 764
 765		/*
 766		 *	Charge the memory for the fragment to any owner
 767		 *	it might possess
 768		 */
 769		if (skb->sk)
 770			skb_set_owner_w(frag, skb->sk);
 771
 772		/*
 773		 *	Copy the packet header into the new buffer.
 774		 */
 775		skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
 776
 
 
 
 
 777		/*
 778		 *	Build fragment header.
 779		 */
 780		fh->nexthdr = nexthdr;
 781		fh->reserved = 0;
 782		if (!frag_id) {
 783			ipv6_select_ident(fh, rt);
 784			frag_id = fh->identification;
 785		} else
 786			fh->identification = frag_id;
 787
 788		/*
 789		 *	Copy a block of the IP datagram.
 790		 */
 791		if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
 792			BUG();
 793		left -= len;
 794
 795		fh->frag_off = htons(offset);
 796		if (left > 0)
 797			fh->frag_off |= htons(IP6_MF);
 798		ipv6_hdr(frag)->payload_len = htons(frag->len -
 799						    sizeof(struct ipv6hdr));
 800
 801		ptr += len;
 802		offset += len;
 803
 804		/*
 805		 *	Put this fragment into the sending queue.
 806		 */
 807		err = output(frag);
 808		if (err)
 809			goto fail;
 810
 811		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 812			      IPSTATS_MIB_FRAGCREATES);
 813	}
 814	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 815		      IPSTATS_MIB_FRAGOKS);
 816	consume_skb(skb);
 817	return err;
 818
 
 
 
 
 
 
 
 819fail:
 820	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 821		      IPSTATS_MIB_FRAGFAILS);
 822	kfree_skb(skb);
 823	return err;
 824}
 825
 826static inline int ip6_rt_check(const struct rt6key *rt_key,
 827			       const struct in6_addr *fl_addr,
 828			       const struct in6_addr *addr_cache)
 829{
 830	return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
 831		(addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
 832}
 833
 834static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 835					  struct dst_entry *dst,
 836					  const struct flowi6 *fl6)
 837{
 838	struct ipv6_pinfo *np = inet6_sk(sk);
 839	struct rt6_info *rt;
 840
 841	if (!dst)
 842		goto out;
 843
 844	if (dst->ops->family != AF_INET6) {
 845		dst_release(dst);
 846		return NULL;
 847	}
 848
 849	rt = (struct rt6_info *)dst;
 850	/* Yes, checking route validity in not connected
 851	 * case is not very simple. Take into account,
 852	 * that we do not support routing by source, TOS,
 853	 * and MSG_DONTROUTE 		--ANK (980726)
 854	 *
 855	 * 1. ip6_rt_check(): If route was host route,
 856	 *    check that cached destination is current.
 857	 *    If it is network route, we still may
 858	 *    check its validity using saved pointer
 859	 *    to the last used address: daddr_cache.
 860	 *    We do not want to save whole address now,
 861	 *    (because main consumer of this service
 862	 *    is tcp, which has not this problem),
 863	 *    so that the last trick works only on connected
 864	 *    sockets.
 865	 * 2. oif also should be the same.
 866	 */
 867	if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
 868#ifdef CONFIG_IPV6_SUBTREES
 869	    ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
 870#endif
 871	    (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
 
 872		dst_release(dst);
 873		dst = NULL;
 874	}
 875
 876out:
 877	return dst;
 878}
 879
 880static int ip6_dst_lookup_tail(struct sock *sk,
 881			       struct dst_entry **dst, struct flowi6 *fl6)
 882{
 883	struct net *net = sock_net(sk);
 884#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 885	struct neighbour *n;
 886	struct rt6_info *rt;
 887#endif
 888	int err;
 
 889
 890	if (*dst == NULL)
 891		*dst = ip6_route_output(net, sk, fl6);
 892
 893	if ((err = (*dst)->error))
 894		goto out_err_release;
 
 
 
 
 
 
 
 895
 896	if (ipv6_addr_any(&fl6->saddr)) {
 897		struct rt6_info *rt = (struct rt6_info *) *dst;
 
 898		err = ip6_route_get_saddr(net, rt, &fl6->daddr,
 899					  sk ? inet6_sk(sk)->srcprefs : 0,
 900					  &fl6->saddr);
 901		if (err)
 902			goto out_err_release;
 
 
 
 
 
 
 
 
 
 
 
 
 903	}
 904
 
 
 
 
 
 
 
 905#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 906	/*
 907	 * Here if the dst entry we've looked up
 908	 * has a neighbour entry that is in the INCOMPLETE
 909	 * state and the src address from the flow is
 910	 * marked as OPTIMISTIC, we release the found
 911	 * dst entry and replace it instead with the
 912	 * dst entry of the nexthop router
 913	 */
 914	rt = (struct rt6_info *) *dst;
 915	rcu_read_lock_bh();
 916	n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt));
 
 917	err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
 918	rcu_read_unlock_bh();
 919
 920	if (err) {
 921		struct inet6_ifaddr *ifp;
 922		struct flowi6 fl_gw6;
 923		int redirect;
 924
 925		ifp = ipv6_get_ifaddr(net, &fl6->saddr,
 926				      (*dst)->dev, 1);
 927
 928		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
 929		if (ifp)
 930			in6_ifa_put(ifp);
 931
 932		if (redirect) {
 933			/*
 934			 * We need to get the dst entry for the
 935			 * default router instead
 936			 */
 937			dst_release(*dst);
 938			memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
 939			memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
 940			*dst = ip6_route_output(net, sk, &fl_gw6);
 941			if ((err = (*dst)->error))
 
 942				goto out_err_release;
 943		}
 944	}
 945#endif
 
 
 
 
 
 946
 947	return 0;
 948
 949out_err_release:
 
 
 
 950	if (err == -ENETUNREACH)
 951		IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
 952	dst_release(*dst);
 953	*dst = NULL;
 954	return err;
 955}
 956
 957/**
 958 *	ip6_dst_lookup - perform route lookup on flow
 959 *	@sk: socket which provides route info
 960 *	@dst: pointer to dst_entry * for result
 961 *	@fl6: flow to lookup
 962 *
 963 *	This function performs a route lookup on the given flow.
 964 *
 965 *	It returns zero on success, or a standard errno code on error.
 966 */
 967int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
 
 968{
 969	*dst = NULL;
 970	return ip6_dst_lookup_tail(sk, dst, fl6);
 971}
 972EXPORT_SYMBOL_GPL(ip6_dst_lookup);
 973
 974/**
 975 *	ip6_dst_lookup_flow - perform route lookup on flow with ipsec
 976 *	@sk: socket which provides route info
 977 *	@fl6: flow to lookup
 978 *	@final_dst: final destination address for ipsec lookup
 979 *
 980 *	This function performs a route lookup on the given flow.
 981 *
 982 *	It returns a valid dst pointer on success, or a pointer encoded
 983 *	error code.
 984 */
 985struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
 986				      const struct in6_addr *final_dst)
 987{
 988	struct dst_entry *dst = NULL;
 989	int err;
 990
 991	err = ip6_dst_lookup_tail(sk, &dst, fl6);
 992	if (err)
 993		return ERR_PTR(err);
 994	if (final_dst)
 995		fl6->daddr = *final_dst;
 996
 997	return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
 998}
 999EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1000
1001/**
1002 *	ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1003 *	@sk: socket which provides the dst cache and route info
1004 *	@fl6: flow to lookup
1005 *	@final_dst: final destination address for ipsec lookup
 
1006 *
1007 *	This function performs a route lookup on the given flow with the
1008 *	possibility of using the cached route in the socket if it is valid.
1009 *	It will take the socket dst lock when operating on the dst cache.
1010 *	As a result, this function can only be used in process context.
1011 *
 
 
 
1012 *	It returns a valid dst pointer on success, or a pointer encoded
1013 *	error code.
1014 */
1015struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1016					 const struct in6_addr *final_dst)
 
1017{
1018	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1019	int err;
1020
1021	dst = ip6_sk_dst_check(sk, dst, fl6);
 
 
1022
1023	err = ip6_dst_lookup_tail(sk, &dst, fl6);
1024	if (err)
1025		return ERR_PTR(err);
1026	if (final_dst)
1027		fl6->daddr = *final_dst;
1028
1029	return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1030}
1031EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1032
1033static inline int ip6_ufo_append_data(struct sock *sk,
1034			int getfrag(void *from, char *to, int offset, int len,
1035			int odd, struct sk_buff *skb),
1036			void *from, int length, int hh_len, int fragheaderlen,
1037			int transhdrlen, int mtu,unsigned int flags,
1038			struct rt6_info *rt)
1039
1040{
1041	struct sk_buff *skb;
1042	struct frag_hdr fhdr;
1043	int err;
1044
1045	/* There is support for UDP large send offload by network
1046	 * device, so create one single skb packet containing complete
1047	 * udp datagram
1048	 */
1049	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1050		skb = sock_alloc_send_skb(sk,
1051			hh_len + fragheaderlen + transhdrlen + 20,
1052			(flags & MSG_DONTWAIT), &err);
1053		if (skb == NULL)
1054			return err;
1055
1056		/* reserve space for Hardware header */
1057		skb_reserve(skb, hh_len);
1058
1059		/* create space for UDP/IP header */
1060		skb_put(skb,fragheaderlen + transhdrlen);
1061
1062		/* initialize network header pointer */
1063		skb_reset_network_header(skb);
1064
1065		/* initialize protocol header pointer */
1066		skb->transport_header = skb->network_header + fragheaderlen;
1067
1068		skb->protocol = htons(ETH_P_IPV6);
1069		skb->csum = 0;
1070
1071		__skb_queue_tail(&sk->sk_write_queue, skb);
1072	} else if (skb_is_gso(skb)) {
1073		goto append;
1074	}
1075
1076	skb->ip_summed = CHECKSUM_PARTIAL;
1077	/* Specify the length of each IPv6 datagram fragment.
1078	 * It has to be a multiple of 8.
1079	 */
1080	skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1081				     sizeof(struct frag_hdr)) & ~7;
1082	skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1083	ipv6_select_ident(&fhdr, rt);
1084	skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1085
1086append:
1087	return skb_append_datato_frags(sk, skb, getfrag, from,
1088				       (length - transhdrlen));
1089}
1090
1091static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1092					       gfp_t gfp)
1093{
1094	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1095}
1096
1097static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1098						gfp_t gfp)
1099{
1100	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1101}
1102
1103static void ip6_append_data_mtu(unsigned int *mtu,
1104				int *maxfraglen,
1105				unsigned int fragheaderlen,
1106				struct sk_buff *skb,
1107				struct rt6_info *rt,
1108				unsigned int orig_mtu)
1109{
1110	if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1111		if (skb == NULL) {
1112			/* first fragment, reserve header_len */
1113			*mtu = orig_mtu - rt->dst.header_len;
1114
1115		} else {
1116			/*
1117			 * this fragment is not first, the headers
1118			 * space is regarded as data space.
1119			 */
1120			*mtu = orig_mtu;
1121		}
1122		*maxfraglen = ((*mtu - fragheaderlen) & ~7)
1123			      + fragheaderlen - sizeof(struct frag_hdr);
1124	}
1125}
1126
1127int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1128	int offset, int len, int odd, struct sk_buff *skb),
1129	void *from, int length, int transhdrlen,
1130	int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1131	struct rt6_info *rt, unsigned int flags, int dontfrag)
1132{
1133	struct inet_sock *inet = inet_sk(sk);
1134	struct ipv6_pinfo *np = inet6_sk(sk);
1135	struct inet_cork *cork;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1136	struct sk_buff *skb, *skb_prev = NULL;
1137	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
1138	int exthdrlen;
1139	int dst_exthdrlen;
1140	int hh_len;
1141	int copy;
1142	int err;
1143	int offset = 0;
1144	__u8 tx_flags = 0;
1145
1146	if (flags&MSG_PROBE)
1147		return 0;
1148	cork = &inet->cork.base;
1149	if (skb_queue_empty(&sk->sk_write_queue)) {
1150		/*
1151		 * setup for corking
1152		 */
1153		if (opt) {
1154			if (WARN_ON(np->cork.opt))
1155				return -EINVAL;
1156
1157			np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation);
1158			if (unlikely(np->cork.opt == NULL))
1159				return -ENOBUFS;
1160
1161			np->cork.opt->tot_len = opt->tot_len;
1162			np->cork.opt->opt_flen = opt->opt_flen;
1163			np->cork.opt->opt_nflen = opt->opt_nflen;
1164
1165			np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1166							    sk->sk_allocation);
1167			if (opt->dst0opt && !np->cork.opt->dst0opt)
1168				return -ENOBUFS;
1169
1170			np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1171							    sk->sk_allocation);
1172			if (opt->dst1opt && !np->cork.opt->dst1opt)
1173				return -ENOBUFS;
1174
1175			np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1176							   sk->sk_allocation);
1177			if (opt->hopopt && !np->cork.opt->hopopt)
1178				return -ENOBUFS;
1179
1180			np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1181							    sk->sk_allocation);
1182			if (opt->srcrt && !np->cork.opt->srcrt)
1183				return -ENOBUFS;
1184
1185			/* need source address above miyazawa*/
1186		}
1187		dst_hold(&rt->dst);
1188		cork->dst = &rt->dst;
1189		inet->cork.fl.u.ip6 = *fl6;
1190		np->cork.hop_limit = hlimit;
1191		np->cork.tclass = tclass;
1192		if (rt->dst.flags & DST_XFRM_TUNNEL)
1193			mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1194			      rt->dst.dev->mtu : dst_mtu(&rt->dst);
1195		else
1196			mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1197			      rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1198		if (np->frag_size < mtu) {
1199			if (np->frag_size)
1200				mtu = np->frag_size;
1201		}
1202		cork->fragsize = mtu;
1203		if (dst_allfrag(rt->dst.path))
1204			cork->flags |= IPCORK_ALLFRAG;
1205		cork->length = 0;
1206		exthdrlen = (opt ? opt->opt_flen : 0);
1207		length += exthdrlen;
1208		transhdrlen += exthdrlen;
1209		dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1210	} else {
1211		rt = (struct rt6_info *)cork->dst;
1212		fl6 = &inet->cork.fl.u.ip6;
1213		opt = np->cork.opt;
1214		transhdrlen = 0;
1215		exthdrlen = 0;
1216		dst_exthdrlen = 0;
1217		mtu = cork->fragsize;
1218	}
 
 
1219	orig_mtu = mtu;
1220
1221	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1222
1223	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1224			(opt ? opt->opt_nflen : 0);
1225	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1226		     sizeof(struct frag_hdr);
1227
1228	if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1229		unsigned int maxnonfragsize, headersize;
 
 
 
1230
1231		headersize = sizeof(struct ipv6hdr) +
1232			     (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1233			     (dst_allfrag(&rt->dst) ?
1234			      sizeof(struct frag_hdr) : 0) +
1235			     rt->rt6i_nfheader_len;
1236
1237		if (ip6_sk_local_df(sk))
1238			maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1239		else
1240			maxnonfragsize = mtu;
 
 
 
1241
1242		/* dontfrag active */
1243		if ((cork->length + length > mtu - headersize) && dontfrag &&
1244		    (sk->sk_protocol == IPPROTO_UDP ||
1245		     sk->sk_protocol == IPPROTO_RAW)) {
1246			ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1247						   sizeof(struct ipv6hdr));
1248			goto emsgsize;
1249		}
1250
1251		if (cork->length + length > maxnonfragsize - headersize) {
1252emsgsize:
1253			ipv6_local_error(sk, EMSGSIZE, fl6,
1254					 mtu - headersize +
1255					 sizeof(struct ipv6hdr));
1256			return -EMSGSIZE;
1257		}
1258	}
1259
1260	/* For UDP, check if TX timestamp is enabled */
1261	if (sk->sk_type == SOCK_DGRAM)
1262		sock_tx_timestamp(sk, &tx_flags);
 
 
 
 
 
 
 
 
 
 
 
 
 
1263
1264	/*
1265	 * Let's try using as much space as possible.
1266	 * Use MTU if total length of the message fits into the MTU.
1267	 * Otherwise, we need to reserve fragment header and
1268	 * fragment alignment (= 8-15 octects, in total).
1269	 *
1270	 * Note that we may need to "move" the data from the tail of
1271	 * of the buffer to the new fragment when we split
1272	 * the message.
1273	 *
1274	 * FIXME: It may be fragmented into multiple chunks
1275	 *        at once if non-fragmentable extension headers
1276	 *        are too large.
1277	 * --yoshfuji
1278	 */
1279
1280	skb = skb_peek_tail(&sk->sk_write_queue);
1281	cork->length += length;
1282	if (((length > mtu) ||
1283	     (skb && skb_is_gso(skb))) &&
1284	    (sk->sk_protocol == IPPROTO_UDP) &&
1285	    (rt->dst.dev->features & NETIF_F_UFO)) {
1286		err = ip6_ufo_append_data(sk, getfrag, from, length,
1287					  hh_len, fragheaderlen,
1288					  transhdrlen, mtu, flags, rt);
1289		if (err)
1290			goto error;
1291		return 0;
1292	}
1293
1294	if (!skb)
1295		goto alloc_new_skb;
1296
1297	while (length > 0) {
1298		/* Check if the remaining data fits into current packet. */
1299		copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1300		if (copy < length)
1301			copy = maxfraglen - skb->len;
1302
1303		if (copy <= 0) {
1304			char *data;
1305			unsigned int datalen;
1306			unsigned int fraglen;
1307			unsigned int fraggap;
1308			unsigned int alloclen;
1309alloc_new_skb:
1310			/* There's no room in the current skb */
1311			if (skb)
1312				fraggap = skb->len - maxfraglen;
1313			else
1314				fraggap = 0;
1315			/* update mtu and maxfraglen if necessary */
1316			if (skb == NULL || skb_prev == NULL)
1317				ip6_append_data_mtu(&mtu, &maxfraglen,
1318						    fragheaderlen, skb, rt,
1319						    orig_mtu);
1320
1321			skb_prev = skb;
1322
1323			/*
1324			 * If remaining data exceeds the mtu,
1325			 * we know we need more fragment(s).
1326			 */
1327			datalen = length + fraggap;
1328
1329			if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1330				datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1331			if ((flags & MSG_MORE) &&
1332			    !(rt->dst.dev->features&NETIF_F_SG))
1333				alloclen = mtu;
1334			else
1335				alloclen = datalen + fragheaderlen;
1336
1337			alloclen += dst_exthdrlen;
1338
1339			if (datalen != length + fraggap) {
1340				/*
1341				 * this is not the last fragment, the trailer
1342				 * space is regarded as data space.
1343				 */
1344				datalen += rt->dst.trailer_len;
1345			}
1346
1347			alloclen += rt->dst.trailer_len;
1348			fraglen = datalen + fragheaderlen;
1349
1350			/*
1351			 * We just reserve space for fragment header.
1352			 * Note: this may be overallocation if the message
1353			 * (without MSG_MORE) fits into the MTU.
1354			 */
1355			alloclen += sizeof(struct frag_hdr);
1356
 
 
 
 
 
1357			if (transhdrlen) {
1358				skb = sock_alloc_send_skb(sk,
1359						alloclen + hh_len,
1360						(flags & MSG_DONTWAIT), &err);
1361			} else {
1362				skb = NULL;
1363				if (atomic_read(&sk->sk_wmem_alloc) <=
1364				    2 * sk->sk_sndbuf)
1365					skb = sock_wmalloc(sk,
1366							   alloclen + hh_len, 1,
1367							   sk->sk_allocation);
1368				if (unlikely(skb == NULL))
1369					err = -ENOBUFS;
1370				else {
1371					/* Only the initial fragment
1372					 * is time stamped.
1373					 */
1374					tx_flags = 0;
1375				}
1376			}
1377			if (skb == NULL)
1378				goto error;
1379			/*
1380			 *	Fill in the control structures
1381			 */
1382			skb->protocol = htons(ETH_P_IPV6);
1383			skb->ip_summed = CHECKSUM_NONE;
1384			skb->csum = 0;
1385			/* reserve for fragmentation and ipsec header */
1386			skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1387				    dst_exthdrlen);
1388
1389			if (sk->sk_type == SOCK_DGRAM)
1390				skb_shinfo(skb)->tx_flags = tx_flags;
 
 
 
1391
1392			/*
1393			 *	Find where to start putting bytes
1394			 */
1395			data = skb_put(skb, fraglen);
1396			skb_set_network_header(skb, exthdrlen);
1397			data += fragheaderlen;
1398			skb->transport_header = (skb->network_header +
1399						 fragheaderlen);
1400			if (fraggap) {
1401				skb->csum = skb_copy_and_csum_bits(
1402					skb_prev, maxfraglen,
1403					data + transhdrlen, fraggap, 0);
1404				skb_prev->csum = csum_sub(skb_prev->csum,
1405							  skb->csum);
1406				data += fraggap;
1407				pskb_trim_unique(skb_prev, maxfraglen);
1408			}
1409			copy = datalen - transhdrlen - fraggap;
1410
1411			if (copy < 0) {
1412				err = -EINVAL;
1413				kfree_skb(skb);
1414				goto error;
1415			} else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1416				err = -EFAULT;
1417				kfree_skb(skb);
1418				goto error;
1419			}
1420
1421			offset += copy;
1422			length -= datalen - fraggap;
1423			transhdrlen = 0;
1424			exthdrlen = 0;
1425			dst_exthdrlen = 0;
1426
 
 
 
1427			/*
1428			 * Put the packet on the pending queue
1429			 */
1430			__skb_queue_tail(&sk->sk_write_queue, skb);
 
 
 
 
 
1431			continue;
1432		}
1433
1434		if (copy > length)
1435			copy = length;
1436
1437		if (!(rt->dst.dev->features&NETIF_F_SG)) {
 
1438			unsigned int off;
1439
1440			off = skb->len;
1441			if (getfrag(from, skb_put(skb, copy),
1442						offset, copy, off, skb) < 0) {
1443				__skb_trim(skb, off);
1444				err = -EFAULT;
1445				goto error;
1446			}
1447		} else {
1448			int i = skb_shinfo(skb)->nr_frags;
1449			struct page_frag *pfrag = sk_page_frag(sk);
1450
1451			err = -ENOMEM;
1452			if (!sk_page_frag_refill(sk, pfrag))
1453				goto error;
1454
1455			if (!skb_can_coalesce(skb, i, pfrag->page,
1456					      pfrag->offset)) {
1457				err = -EMSGSIZE;
1458				if (i == MAX_SKB_FRAGS)
1459					goto error;
1460
1461				__skb_fill_page_desc(skb, i, pfrag->page,
1462						     pfrag->offset, 0);
1463				skb_shinfo(skb)->nr_frags = ++i;
1464				get_page(pfrag->page);
1465			}
1466			copy = min_t(int, copy, pfrag->size - pfrag->offset);
1467			if (getfrag(from,
1468				    page_address(pfrag->page) + pfrag->offset,
1469				    offset, copy, skb->len, skb) < 0)
1470				goto error_efault;
1471
1472			pfrag->offset += copy;
1473			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1474			skb->len += copy;
1475			skb->data_len += copy;
1476			skb->truesize += copy;
1477			atomic_add(copy, &sk->sk_wmem_alloc);
1478		}
1479		offset += copy;
1480		length -= copy;
1481	}
1482
 
 
1483	return 0;
1484
1485error_efault:
1486	err = -EFAULT;
1487error:
1488	cork->length -= length;
1489	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
 
1490	return err;
1491}
1492EXPORT_SYMBOL_GPL(ip6_append_data);
1493
1494static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
 
 
 
 
 
 
1495{
1496	if (np->cork.opt) {
1497		kfree(np->cork.opt->dst0opt);
1498		kfree(np->cork.opt->dst1opt);
1499		kfree(np->cork.opt->hopopt);
1500		kfree(np->cork.opt->srcrt);
1501		kfree(np->cork.opt);
1502		np->cork.opt = NULL;
1503	}
1504
1505	if (inet->cork.base.dst) {
1506		dst_release(inet->cork.base.dst);
1507		inet->cork.base.dst = NULL;
1508		inet->cork.base.flags &= ~IPCORK_ALLFRAG;
 
 
 
 
 
 
 
 
 
1509	}
1510	memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
 
 
 
1511}
 
1512
1513int ip6_push_pending_frames(struct sock *sk)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1514{
1515	struct sk_buff *skb, *tmp_skb;
1516	struct sk_buff **tail_skb;
1517	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1518	struct inet_sock *inet = inet_sk(sk);
1519	struct ipv6_pinfo *np = inet6_sk(sk);
1520	struct net *net = sock_net(sk);
1521	struct ipv6hdr *hdr;
1522	struct ipv6_txoptions *opt = np->cork.opt;
1523	struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
1524	struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1525	unsigned char proto = fl6->flowi6_proto;
1526	int err = 0;
1527
1528	if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
 
1529		goto out;
1530	tail_skb = &(skb_shinfo(skb)->frag_list);
1531
1532	/* move skb->data to ip header from ext header */
1533	if (skb->data < skb_network_header(skb))
1534		__skb_pull(skb, skb_network_offset(skb));
1535	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1536		__skb_pull(tmp_skb, skb_network_header_len(skb));
1537		*tail_skb = tmp_skb;
1538		tail_skb = &(tmp_skb->next);
1539		skb->len += tmp_skb->len;
1540		skb->data_len += tmp_skb->len;
1541		skb->truesize += tmp_skb->truesize;
1542		tmp_skb->destructor = NULL;
1543		tmp_skb->sk = NULL;
1544	}
1545
1546	/* Allow local fragmentation. */
1547	skb->local_df = ip6_sk_local_df(sk);
1548
1549	*final_dst = fl6->daddr;
1550	__skb_pull(skb, skb_network_header_len(skb));
1551	if (opt && opt->opt_flen)
1552		ipv6_push_frag_opts(skb, opt, &proto);
1553	if (opt && opt->opt_nflen)
1554		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1555
1556	skb_push(skb, sizeof(struct ipv6hdr));
1557	skb_reset_network_header(skb);
1558	hdr = ipv6_hdr(skb);
1559
1560	ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel);
1561	hdr->hop_limit = np->cork.hop_limit;
 
 
1562	hdr->nexthdr = proto;
1563	hdr->saddr = fl6->saddr;
1564	hdr->daddr = *final_dst;
1565
1566	skb->priority = sk->sk_priority;
1567	skb->mark = sk->sk_mark;
1568
1569	skb_dst_set(skb, dst_clone(&rt->dst));
1570	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1571	if (proto == IPPROTO_ICMPV6) {
1572		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1573
1574		ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1575		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1576	}
1577
1578	err = ip6_local_out(skb);
 
 
 
 
 
 
 
 
 
 
 
1579	if (err) {
1580		if (err > 0)
1581			err = net_xmit_errno(err);
1582		if (err)
1583			goto error;
 
1584	}
1585
1586out:
1587	ip6_cork_release(inet, np);
1588	return err;
1589error:
1590	IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1591	goto out;
 
 
 
 
 
 
 
 
1592}
1593EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1594
1595void ip6_flush_pending_frames(struct sock *sk)
 
 
 
1596{
1597	struct sk_buff *skb;
1598
1599	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1600		if (skb_dst(skb))
1601			IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1602				      IPSTATS_MIB_OUTDISCARDS);
1603		kfree_skb(skb);
1604	}
1605
1606	ip6_cork_release(inet_sk(sk), inet6_sk(sk));
 
 
 
 
 
 
1607}
1608EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);