ip6_output.c - net/ipv6/ip6_output.c - Linux diff v4.17

   1/*
   2 *	IPv6 output functions
   3 *	Linux INET6 implementation
   4 *
   5 *	Authors:
   6 *	Pedro Roque		<roque@di.fc.ul.pt>
   7 *
   8 *	Based on linux/net/ipv4/ip_output.c
   9 *
  10 *	This program is free software; you can redistribute it and/or
  11 *      modify it under the terms of the GNU General Public License
  12 *      as published by the Free Software Foundation; either version
  13 *      2 of the License, or (at your option) any later version.
  14 *
  15 *	Changes:
  16 *	A.N.Kuznetsov	:	airthmetics in fragmentation.
  17 *				extension headers are implemented.
  18 *				route changes now work.
  19 *				ip6_forward does not confuse sniffers.
  20 *				etc.
  21 *
  22 *      H. von Brand    :       Added missing #include <linux/string.h>
  23 *	Imran Patel	:	frag id should be in NBO
  24 *      Kazunori MIYAZAWA @USAGI
  25 *			:       add ip6_append_data and related functions
  26 *				for datagram xmit
  27 */
  28
  29#include <linux/errno.h>
  30#include <linux/kernel.h>
  31#include <linux/string.h>
  32#include <linux/socket.h>
  33#include <linux/net.h>
  34#include <linux/netdevice.h>
  35#include <linux/if_arp.h>
  36#include <linux/in6.h>
  37#include <linux/tcp.h>
  38#include <linux/route.h>
  39#include <linux/module.h>
  40#include <linux/slab.h>
  41
  42#include <linux/bpf-cgroup.h>
  43#include <linux/netfilter.h>
  44#include <linux/netfilter_ipv6.h>
  45
  46#include <net/sock.h>
  47#include <net/snmp.h>
  48
  49#include <net/ipv6.h>
  50#include <net/ndisc.h>
  51#include <net/protocol.h>
  52#include <net/ip6_route.h>
  53#include <net/addrconf.h>
  54#include <net/rawv6.h>
  55#include <net/icmp.h>
  56#include <net/xfrm.h>
  57#include <net/checksum.h>
  58#include <linux/mroute6.h>
  59#include <net/l3mdev.h>
  60#include <net/lwtunnel.h>
  61
  62static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
  63{
  64	struct dst_entry *dst = skb_dst(skb);
  65	struct net_device *dev = dst->dev;
  66	struct neighbour *neigh;
  67	struct in6_addr *nexthop;
  68	int ret;
  69
 
 
 
  70	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
  71		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
  72
  73		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
  74		    ((mroute6_is_socket(net, skb) &&
  75		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
  76		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
  77					 &ipv6_hdr(skb)->saddr))) {
  78			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
  79
  80			/* Do not check for IFF_ALLMULTI; multicast routing
  81			   is not supported in any case.
  82			 */
  83			if (newskb)
  84				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
  85					net, sk, newskb, NULL, newskb->dev,
  86					dev_loopback_xmit);
  87
  88			if (ipv6_hdr(skb)->hop_limit == 0) {
  89				IP6_INC_STATS(net, idev,
  90					      IPSTATS_MIB_OUTDISCARDS);
  91				kfree_skb(skb);
  92				return 0;
  93			}
  94		}
  95
  96		IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
  97
  98		if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
  99		    IPV6_ADDR_SCOPE_NODELOCAL &&
 100		    !(dev->flags & IFF_LOOPBACK)) {
 101			kfree_skb(skb);
 102			return 0;
 103		}
 104	}
 105
 106	if (lwtunnel_xmit_redirect(dst->lwtstate)) {
 107		int res = lwtunnel_xmit(skb);
 108
 109		if (res < 0 || res == LWTUNNEL_XMIT_DONE)
 110			return res;
 111	}
 112
 113	rcu_read_lock_bh();
 114	nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
 115	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
 116	if (unlikely(!neigh))
 117		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
 118	if (!IS_ERR(neigh)) {
 119		sock_confirm_neigh(skb, neigh);
 120		ret = neigh_output(neigh, skb);
 121		rcu_read_unlock_bh();
 122		return ret;
 123	}
 124	rcu_read_unlock_bh();
 125
 126	IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 127	kfree_skb(skb);
 128	return -EINVAL;
 129}
 130
 131static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 132{
 133	int ret;
 134
 135	ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
 136	if (ret) {
 137		kfree_skb(skb);
 138		return ret;
 139	}
 140
 141#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
 142	/* Policy lookup after SNAT yielded a new policy */
 143	if (skb_dst(skb)->xfrm) {
 144		IPCB(skb)->flags |= IPSKB_REROUTED;
 145		return dst_output(net, sk, skb);
 146	}
 147#endif
 148
 149	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 150	    dst_allfrag(skb_dst(skb)) ||
 151	    (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
 152		return ip6_fragment(net, sk, skb, ip6_finish_output2);
 153	else
 154		return ip6_finish_output2(net, sk, skb);
 155}
 156
 157int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 158{
 159	struct net_device *dev = skb_dst(skb)->dev;
 160	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 161
 162	skb->protocol = htons(ETH_P_IPV6);
 163	skb->dev = dev;
 164
 165	if (unlikely(idev->cnf.disable_ipv6)) {
 166		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
 167		kfree_skb(skb);
 168		return 0;
 169	}
 170
 171	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
 172			    net, sk, skb, NULL, dev,
 173			    ip6_finish_output,
 174			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 175}
 176
 177bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
 178{
 179	if (!np->autoflowlabel_set)
 180		return ip6_default_np_autolabel(net);
 181	else
 182		return np->autoflowlabel;
 183}
 184
 185/*
 186 * xmit an sk_buff (used by TCP, SCTP and DCCP)
 187 * Note : socket lock is not held for SYNACK packets, but might be modified
 188 * by calls to skb_set_owner_w() and ipv6_local_error(),
 189 * which are using proper atomic operations or spinlocks.
 190 */
 191int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 192	     __u32 mark, struct ipv6_txoptions *opt, int tclass)
 193{
 194	struct net *net = sock_net(sk);
 195	const struct ipv6_pinfo *np = inet6_sk(sk);
 196	struct in6_addr *first_hop = &fl6->daddr;
 197	struct dst_entry *dst = skb_dst(skb);
 198	struct ipv6hdr *hdr;
 199	u8  proto = fl6->flowi6_proto;
 200	int seg_len = skb->len;
 201	int hlimit = -1;
 202	u32 mtu;
 203
 204	if (opt) {
 205		unsigned int head_room;
 206
 207		/* First: exthdrs may take lots of space (~8K for now)
 208		   MAX_HEADER is not enough.
 209		 */
 210		head_room = opt->opt_nflen + opt->opt_flen;
 211		seg_len += head_room;
 212		head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
 213
 214		if (skb_headroom(skb) < head_room) {
 215			struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 216			if (!skb2) {
 217				IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 218					      IPSTATS_MIB_OUTDISCARDS);
 219				kfree_skb(skb);
 220				return -ENOBUFS;
 221			}
 222			consume_skb(skb);
 223			skb = skb2;
 224			/* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
 225			 * it is safe to call in our context (socket lock not held)
 226			 */
 227			skb_set_owner_w(skb, (struct sock *)sk);
 228		}
 229		if (opt->opt_flen)
 230			ipv6_push_frag_opts(skb, opt, &proto);
 231		if (opt->opt_nflen)
 232			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
 233					     &fl6->saddr);
 234	}
 235
 236	skb_push(skb, sizeof(struct ipv6hdr));
 237	skb_reset_network_header(skb);
 238	hdr = ipv6_hdr(skb);
 239
 240	/*
 241	 *	Fill in the IPv6 header
 242	 */
 243	if (np)
 244		hlimit = np->hop_limit;
 245	if (hlimit < 0)
 246		hlimit = ip6_dst_hoplimit(dst);
 247
 248	ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
 249				ip6_autoflowlabel(net, np), fl6));
 250
 251	hdr->payload_len = htons(seg_len);
 252	hdr->nexthdr = proto;
 253	hdr->hop_limit = hlimit;
 254
 255	hdr->saddr = fl6->saddr;
 256	hdr->daddr = *first_hop;
 257
 258	skb->protocol = htons(ETH_P_IPV6);
 259	skb->priority = sk->sk_priority;
 260	skb->mark = mark;
 261
 262	mtu = dst_mtu(dst);
 263	if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
 264		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
 265			      IPSTATS_MIB_OUT, skb->len);
 266
 267		/* if egress device is enslaved to an L3 master device pass the
 268		 * skb to its handler for processing
 269		 */
 270		skb = l3mdev_ip6_out((struct sock *)sk, skb);
 271		if (unlikely(!skb))
 272			return 0;
 273
 274		/* hooks should never assume socket lock is held.
 275		 * we promote our socket to non const
 276		 */
 277		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
 278			       net, (struct sock *)sk, skb, NULL, dst->dev,
 279			       dst_output);
 280	}
 281
 282	skb->dev = dst->dev;
 283	/* ipv6_local_error() does not require socket lock,
 284	 * we promote our socket to non const
 285	 */
 286	ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
 287
 288	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
 289	kfree_skb(skb);
 290	return -EMSGSIZE;
 291}
 292EXPORT_SYMBOL(ip6_xmit);
 293
 294static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 295{
 296	struct ip6_ra_chain *ra;
 297	struct sock *last = NULL;
 298
 299	read_lock(&ip6_ra_lock);
 300	for (ra = ip6_ra_chain; ra; ra = ra->next) {
 301		struct sock *sk = ra->sk;
 302		if (sk && ra->sel == sel &&
 303		    (!sk->sk_bound_dev_if ||
 304		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
 305			if (last) {
 306				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 307				if (skb2)
 308					rawv6_rcv(last, skb2);
 309			}
 310			last = sk;
 311		}
 312	}
 313
 314	if (last) {
 315		rawv6_rcv(last, skb);
 316		read_unlock(&ip6_ra_lock);
 317		return 1;
 318	}
 319	read_unlock(&ip6_ra_lock);
 320	return 0;
 321}
 322
 323static int ip6_forward_proxy_check(struct sk_buff *skb)
 324{
 325	struct ipv6hdr *hdr = ipv6_hdr(skb);
 326	u8 nexthdr = hdr->nexthdr;
 327	__be16 frag_off;
 328	int offset;
 329
 330	if (ipv6_ext_hdr(nexthdr)) {
 331		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
 332		if (offset < 0)
 333			return 0;
 334	} else
 335		offset = sizeof(struct ipv6hdr);
 336
 337	if (nexthdr == IPPROTO_ICMPV6) {
 338		struct icmp6hdr *icmp6;
 339
 340		if (!pskb_may_pull(skb, (skb_network_header(skb) +
 341					 offset + 1 - skb->data)))
 342			return 0;
 343
 344		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 345
 346		switch (icmp6->icmp6_type) {
 347		case NDISC_ROUTER_SOLICITATION:
 348		case NDISC_ROUTER_ADVERTISEMENT:
 349		case NDISC_NEIGHBOUR_SOLICITATION:
 350		case NDISC_NEIGHBOUR_ADVERTISEMENT:
 351		case NDISC_REDIRECT:
 352			/* For reaction involving unicast neighbor discovery
 353			 * message destined to the proxied address, pass it to
 354			 * input function.
 355			 */
 356			return 1;
 357		default:
 358			break;
 359		}
 360	}
 361
 362	/*
 363	 * The proxying router can't forward traffic sent to a link-local
 364	 * address, so signal the sender and discard the packet. This
 365	 * behavior is clarified by the MIPv6 specification.
 366	 */
 367	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
 368		dst_link_failure(skb);
 369		return -1;
 370	}
 371
 372	return 0;
 373}
 374
 375static inline int ip6_forward_finish(struct net *net, struct sock *sk,
 376				     struct sk_buff *skb)
 377{
 378	struct dst_entry *dst = skb_dst(skb);
 379
 380	__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 381	__IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
 382
 383	return dst_output(net, sk, skb);
 384}
 385
 386unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
 387{
 388	unsigned int mtu;
 389	struct inet6_dev *idev;
 390
 391	if (dst_metric_locked(dst, RTAX_MTU)) {
 392		mtu = dst_metric_raw(dst, RTAX_MTU);
 393		if (mtu)
 394			return mtu;
 395	}
 396
 397	mtu = IPV6_MIN_MTU;
 398	rcu_read_lock();
 399	idev = __in6_dev_get(dst->dev);
 400	if (idev)
 401		mtu = idev->cnf.mtu6;
 402	rcu_read_unlock();
 403
 404	return mtu;
 405}
 406EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward);
 407
 408static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
 409{
 410	if (skb->len <= mtu)
 411		return false;
 412
 413	/* ipv6 conntrack defrag sets max_frag_size + ignore_df */
 414	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
 415		return true;
 416
 417	if (skb->ignore_df)
 418		return false;
 419
 420	if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
 421		return false;
 422
 423	return true;
 424}
 425
 426int ip6_forward(struct sk_buff *skb)
 427{
 428	struct dst_entry *dst = skb_dst(skb);
 429	struct ipv6hdr *hdr = ipv6_hdr(skb);
 430	struct inet6_skb_parm *opt = IP6CB(skb);
 431	struct net *net = dev_net(dst->dev);
 432	u32 mtu;
 433
 434	if (net->ipv6.devconf_all->forwarding == 0)
 435		goto error;
 436
 437	if (skb->pkt_type != PACKET_HOST)
 438		goto drop;
 439
 440	if (unlikely(skb->sk))
 441		goto drop;
 442
 443	if (skb_warn_if_lro(skb))
 444		goto drop;
 445
 446	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
 447		__IP6_INC_STATS(net, ip6_dst_idev(dst),
 448				IPSTATS_MIB_INDISCARDS);
 449		goto drop;
 450	}
 451
 452	skb_forward_csum(skb);
 453
 454	/*
 455	 *	We DO NOT make any processing on
 456	 *	RA packets, pushing them to user level AS IS
 457	 *	without ane WARRANTY that application will be able
 458	 *	to interpret them. The reason is that we
 459	 *	cannot make anything clever here.
 460	 *
 461	 *	We are not end-node, so that if packet contains
 462	 *	AH/ESP, we cannot make anything.
 463	 *	Defragmentation also would be mistake, RA packets
 464	 *	cannot be fragmented, because there is no warranty
 465	 *	that different fragments will go along one path. --ANK
 466	 */
 467	if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
 468		if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
 469			return 0;
 470	}
 471
 472	/*
 473	 *	check and decrement ttl
 474	 */
 475	if (hdr->hop_limit <= 1) {
 476		/* Force OUTPUT device used as source address */
 477		skb->dev = dst->dev;
 478		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
 479		__IP6_INC_STATS(net, ip6_dst_idev(dst),
 480				IPSTATS_MIB_INHDRERRORS);
 481
 482		kfree_skb(skb);
 483		return -ETIMEDOUT;
 484	}
 485
 486	/* XXX: idev->cnf.proxy_ndp? */
 487	if (net->ipv6.devconf_all->proxy_ndp &&
 488	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
 489		int proxied = ip6_forward_proxy_check(skb);
 490		if (proxied > 0)
 491			return ip6_input(skb);
 492		else if (proxied < 0) {
 493			__IP6_INC_STATS(net, ip6_dst_idev(dst),
 494					IPSTATS_MIB_INDISCARDS);
 495			goto drop;
 496		}
 497	}
 498
 499	if (!xfrm6_route_forward(skb)) {
 500		__IP6_INC_STATS(net, ip6_dst_idev(dst),
 501				IPSTATS_MIB_INDISCARDS);
 502		goto drop;
 503	}
 504	dst = skb_dst(skb);
 505
 506	/* IPv6 specs say nothing about it, but it is clear that we cannot
 507	   send redirects to source routed frames.
 508	   We don't send redirects to frames decapsulated from IPsec.
 509	 */
 510	if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
 511		struct in6_addr *target = NULL;
 512		struct inet_peer *peer;
 513		struct rt6_info *rt;
 514
 515		/*
 516		 *	incoming and outgoing devices are the same
 517		 *	send a redirect.
 518		 */
 519
 520		rt = (struct rt6_info *) dst;
 521		if (rt->rt6i_flags & RTF_GATEWAY)
 522			target = &rt->rt6i_gateway;
 523		else
 524			target = &hdr->daddr;
 525
 526		peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
 527
 528		/* Limit redirects both by destination (here)
 529		   and by source (inside ndisc_send_redirect)
 530		 */
 531		if (inet_peer_xrlim_allow(peer, 1*HZ))
 532			ndisc_send_redirect(skb, target);
 533		if (peer)
 534			inet_putpeer(peer);
 535	} else {
 536		int addrtype = ipv6_addr_type(&hdr->saddr);
 537
 538		/* This check is security critical. */
 539		if (addrtype == IPV6_ADDR_ANY ||
 540		    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
 541			goto error;
 542		if (addrtype & IPV6_ADDR_LINKLOCAL) {
 543			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
 544				    ICMPV6_NOT_NEIGHBOUR, 0);
 545			goto error;
 546		}
 547	}
 548
 549	mtu = ip6_dst_mtu_forward(dst);
 550	if (mtu < IPV6_MIN_MTU)
 551		mtu = IPV6_MIN_MTU;
 552
 553	if (ip6_pkt_too_big(skb, mtu)) {
 554		/* Again, force OUTPUT device used as source address */
 555		skb->dev = dst->dev;
 556		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 557		__IP6_INC_STATS(net, ip6_dst_idev(dst),
 558				IPSTATS_MIB_INTOOBIGERRORS);
 559		__IP6_INC_STATS(net, ip6_dst_idev(dst),
 560				IPSTATS_MIB_FRAGFAILS);
 561		kfree_skb(skb);
 562		return -EMSGSIZE;
 563	}
 564
 565	if (skb_cow(skb, dst->dev->hard_header_len)) {
 566		__IP6_INC_STATS(net, ip6_dst_idev(dst),
 567				IPSTATS_MIB_OUTDISCARDS);
 568		goto drop;
 569	}
 570
 571	hdr = ipv6_hdr(skb);
 572
 573	/* Mangling hops number delayed to point after skb COW */
 574
 575	hdr->hop_limit--;
 576
 
 
 577	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
 578		       net, NULL, skb, skb->dev, dst->dev,
 579		       ip6_forward_finish);
 580
 581error:
 582	__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
 583drop:
 584	kfree_skb(skb);
 585	return -EINVAL;
 586}
 587
 588static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 589{
 590	to->pkt_type = from->pkt_type;
 591	to->priority = from->priority;
 592	to->protocol = from->protocol;
 593	skb_dst_drop(to);
 594	skb_dst_set(to, dst_clone(skb_dst(from)));
 595	to->dev = from->dev;
 596	to->mark = from->mark;
 597
 598#ifdef CONFIG_NET_SCHED
 599	to->tc_index = from->tc_index;
 600#endif
 601	nf_copy(to, from);
 602	skb_copy_secmark(to, from);
 603}
 604
 605int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 606		 int (*output)(struct net *, struct sock *, struct sk_buff *))
 607{
 608	struct sk_buff *frag;
 609	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
 610	struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
 611				inet6_sk(skb->sk) : NULL;
 612	struct ipv6hdr *tmp_hdr;
 613	struct frag_hdr *fh;
 614	unsigned int mtu, hlen, left, len;
 615	int hroom, troom;
 616	__be32 frag_id;
 617	int ptr, offset = 0, err = 0;
 618	u8 *prevhdr, nexthdr = 0;
 619
 620	err = ip6_find_1stfragopt(skb, &prevhdr);
 621	if (err < 0)
 622		goto fail;
 623	hlen = err;
 624	nexthdr = *prevhdr;
 625
 626	mtu = ip6_skb_dst_mtu(skb);
 627
 628	/* We must not fragment if the socket is set to force MTU discovery
 629	 * or if the skb it not generated by a local socket.
 630	 */
 631	if (unlikely(!skb->ignore_df && skb->len > mtu))
 632		goto fail_toobig;
 633
 634	if (IP6CB(skb)->frag_max_size) {
 635		if (IP6CB(skb)->frag_max_size > mtu)
 636			goto fail_toobig;
 637
 638		/* don't send fragments larger than what we received */
 639		mtu = IP6CB(skb)->frag_max_size;
 640		if (mtu < IPV6_MIN_MTU)
 641			mtu = IPV6_MIN_MTU;
 642	}
 643
 644	if (np && np->frag_size < mtu) {
 645		if (np->frag_size)
 646			mtu = np->frag_size;
 647	}
 648	if (mtu < hlen + sizeof(struct frag_hdr) + 8)
 649		goto fail_toobig;
 650	mtu -= hlen + sizeof(struct frag_hdr);
 651
 652	frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
 653				    &ipv6_hdr(skb)->saddr);
 654
 655	if (skb->ip_summed == CHECKSUM_PARTIAL &&
 656	    (err = skb_checksum_help(skb)))
 657		goto fail;
 658
 659	hroom = LL_RESERVED_SPACE(rt->dst.dev);
 660	if (skb_has_frag_list(skb)) {
 661		unsigned int first_len = skb_pagelen(skb);
 662		struct sk_buff *frag2;
 663
 664		if (first_len - hlen > mtu ||
 665		    ((first_len - hlen) & 7) ||
 666		    skb_cloned(skb) ||
 667		    skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
 668			goto slow_path;
 669
 670		skb_walk_frags(skb, frag) {
 671			/* Correct geometry. */
 672			if (frag->len > mtu ||
 673			    ((frag->len & 7) && frag->next) ||
 674			    skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
 675				goto slow_path_clean;
 676
 677			/* Partially cloned skb? */
 678			if (skb_shared(frag))
 679				goto slow_path_clean;
 680
 681			BUG_ON(frag->sk);
 682			if (skb->sk) {
 683				frag->sk = skb->sk;
 684				frag->destructor = sock_wfree;
 685			}
 686			skb->truesize -= frag->truesize;
 687		}
 688
 689		err = 0;
 690		offset = 0;
 691		/* BUILD HEADER */
 692
 693		*prevhdr = NEXTHDR_FRAGMENT;
 694		tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 695		if (!tmp_hdr) {
 
 
 696			err = -ENOMEM;
 697			goto fail;
 698		}
 699		frag = skb_shinfo(skb)->frag_list;
 700		skb_frag_list_init(skb);
 701
 702		__skb_pull(skb, hlen);
 703		fh = __skb_push(skb, sizeof(struct frag_hdr));
 704		__skb_push(skb, hlen);
 705		skb_reset_network_header(skb);
 706		memcpy(skb_network_header(skb), tmp_hdr, hlen);
 707
 708		fh->nexthdr = nexthdr;
 709		fh->reserved = 0;
 710		fh->frag_off = htons(IP6_MF);
 711		fh->identification = frag_id;
 712
 713		first_len = skb_pagelen(skb);
 714		skb->data_len = first_len - skb_headlen(skb);
 715		skb->len = first_len;
 716		ipv6_hdr(skb)->payload_len = htons(first_len -
 717						   sizeof(struct ipv6hdr));
 718
 
 
 719		for (;;) {
 720			/* Prepare header of the next frame,
 721			 * before previous one went down. */
 722			if (frag) {
 723				frag->ip_summed = CHECKSUM_NONE;
 724				skb_reset_transport_header(frag);
 725				fh = __skb_push(frag, sizeof(struct frag_hdr));
 726				__skb_push(frag, hlen);
 727				skb_reset_network_header(frag);
 728				memcpy(skb_network_header(frag), tmp_hdr,
 729				       hlen);
 730				offset += skb->len - hlen - sizeof(struct frag_hdr);
 731				fh->nexthdr = nexthdr;
 732				fh->reserved = 0;
 733				fh->frag_off = htons(offset);
 734				if (frag->next)
 735					fh->frag_off |= htons(IP6_MF);
 736				fh->identification = frag_id;
 737				ipv6_hdr(frag)->payload_len =
 738						htons(frag->len -
 739						      sizeof(struct ipv6hdr));
 740				ip6_copy_metadata(frag, skb);
 741			}
 742
 743			err = output(net, sk, skb);
 744			if (!err)
 745				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 746					      IPSTATS_MIB_FRAGCREATES);
 747
 748			if (err || !frag)
 749				break;
 750
 751			skb = frag;
 752			frag = skb->next;
 753			skb->next = NULL;
 754		}
 755
 756		kfree(tmp_hdr);
 757
 758		if (err == 0) {
 759			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 760				      IPSTATS_MIB_FRAGOKS);
 
 761			return 0;
 762		}
 763
 764		kfree_skb_list(frag);
 765
 766		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 767			      IPSTATS_MIB_FRAGFAILS);
 
 768		return err;
 769
 770slow_path_clean:
 771		skb_walk_frags(skb, frag2) {
 772			if (frag2 == frag)
 773				break;
 774			frag2->sk = NULL;
 775			frag2->destructor = NULL;
 776			skb->truesize += frag2->truesize;
 777		}
 778	}
 779
 780slow_path:
 781	left = skb->len - hlen;		/* Space per frame */
 782	ptr = hlen;			/* Where to start from */
 783
 784	/*
 785	 *	Fragment the datagram.
 786	 */
 787
 
 788	troom = rt->dst.dev->needed_tailroom;
 789
 790	/*
 791	 *	Keep copying data until we run out.
 792	 */
 793	while (left > 0)	{
 794		u8 *fragnexthdr_offset;
 795
 796		len = left;
 797		/* IF: it doesn't fit, use 'mtu' - the data space left */
 798		if (len > mtu)
 799			len = mtu;
 800		/* IF: we are not sending up to and including the packet end
 801		   then align the next start on an eight byte boundary */
 802		if (len < left)	{
 803			len &= ~7;
 804		}
 805
 806		/* Allocate buffer */
 807		frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
 808				 hroom + troom, GFP_ATOMIC);
 809		if (!frag) {
 
 
 810			err = -ENOMEM;
 811			goto fail;
 812		}
 813
 814		/*
 815		 *	Set up data on packet
 816		 */
 817
 818		ip6_copy_metadata(frag, skb);
 819		skb_reserve(frag, hroom);
 820		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
 821		skb_reset_network_header(frag);
 822		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
 823		frag->transport_header = (frag->network_header + hlen +
 824					  sizeof(struct frag_hdr));
 825
 826		/*
 827		 *	Charge the memory for the fragment to any owner
 828		 *	it might possess
 829		 */
 830		if (skb->sk)
 831			skb_set_owner_w(frag, skb->sk);
 832
 833		/*
 834		 *	Copy the packet header into the new buffer.
 835		 */
 836		skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
 837
 838		fragnexthdr_offset = skb_network_header(frag);
 839		fragnexthdr_offset += prevhdr - skb_network_header(skb);
 840		*fragnexthdr_offset = NEXTHDR_FRAGMENT;
 841
 842		/*
 843		 *	Build fragment header.
 844		 */
 845		fh->nexthdr = nexthdr;
 846		fh->reserved = 0;
 847		fh->identification = frag_id;
 848
 849		/*
 850		 *	Copy a block of the IP datagram.
 851		 */
 852		BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
 853				     len));
 854		left -= len;
 855
 856		fh->frag_off = htons(offset);
 857		if (left > 0)
 858			fh->frag_off |= htons(IP6_MF);
 859		ipv6_hdr(frag)->payload_len = htons(frag->len -
 860						    sizeof(struct ipv6hdr));
 861
 862		ptr += len;
 863		offset += len;
 864
 865		/*
 866		 *	Put this fragment into the sending queue.
 867		 */
 868		err = output(net, sk, frag);
 869		if (err)
 870			goto fail;
 871
 872		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 873			      IPSTATS_MIB_FRAGCREATES);
 874	}
 875	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 876		      IPSTATS_MIB_FRAGOKS);
 877	consume_skb(skb);
 878	return err;
 879
 880fail_toobig:
 881	if (skb->sk && dst_allfrag(skb_dst(skb)))
 882		sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
 883
 
 884	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 885	err = -EMSGSIZE;
 886
 887fail:
 888	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 889		      IPSTATS_MIB_FRAGFAILS);
 890	kfree_skb(skb);
 891	return err;
 892}
 893
 894static inline int ip6_rt_check(const struct rt6key *rt_key,
 895			       const struct in6_addr *fl_addr,
 896			       const struct in6_addr *addr_cache)
 897{
 898	return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
 899		(!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
 900}
 901
 902static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 903					  struct dst_entry *dst,
 904					  const struct flowi6 *fl6)
 905{
 906	struct ipv6_pinfo *np = inet6_sk(sk);
 907	struct rt6_info *rt;
 908
 909	if (!dst)
 910		goto out;
 911
 912	if (dst->ops->family != AF_INET6) {
 913		dst_release(dst);
 914		return NULL;
 915	}
 916
 917	rt = (struct rt6_info *)dst;
 918	/* Yes, checking route validity in not connected
 919	 * case is not very simple. Take into account,
 920	 * that we do not support routing by source, TOS,
 921	 * and MSG_DONTROUTE		--ANK (980726)
 922	 *
 923	 * 1. ip6_rt_check(): If route was host route,
 924	 *    check that cached destination is current.
 925	 *    If it is network route, we still may
 926	 *    check its validity using saved pointer
 927	 *    to the last used address: daddr_cache.
 928	 *    We do not want to save whole address now,
 929	 *    (because main consumer of this service
 930	 *    is tcp, which has not this problem),
 931	 *    so that the last trick works only on connected
 932	 *    sockets.
 933	 * 2. oif also should be the same.
 934	 */
 935	if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
 936#ifdef CONFIG_IPV6_SUBTREES
 937	    ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
 938#endif
 939	   (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
 940	      (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
 941		dst_release(dst);
 942		dst = NULL;
 943	}
 944
 945out:
 946	return dst;
 947}
 948
 949static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
 950			       struct dst_entry **dst, struct flowi6 *fl6)
 951{
 952#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 953	struct neighbour *n;
 954	struct rt6_info *rt;
 955#endif
 956	int err;
 957	int flags = 0;
 958
 959	/* The correct way to handle this would be to do
 960	 * ip6_route_get_saddr, and then ip6_route_output; however,
 961	 * the route-specific preferred source forces the
 962	 * ip6_route_output call _before_ ip6_route_get_saddr.
 963	 *
 964	 * In source specific routing (no src=any default route),
 965	 * ip6_route_output will fail given src=any saddr, though, so
 966	 * that's why we try it again later.
 967	 */
 968	if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
 969		struct rt6_info *rt;
 970		bool had_dst = *dst != NULL;
 971
 972		if (!had_dst)
 973			*dst = ip6_route_output(net, sk, fl6);
 974		rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
 975		err = ip6_route_get_saddr(net, rt, &fl6->daddr,
 976					  sk ? inet6_sk(sk)->srcprefs : 0,
 977					  &fl6->saddr);
 978		if (err)
 979			goto out_err_release;
 980
 981		/* If we had an erroneous initial result, pretend it
 982		 * never existed and let the SA-enabled version take
 983		 * over.
 984		 */
 985		if (!had_dst && (*dst)->error) {
 986			dst_release(*dst);
 987			*dst = NULL;
 988		}
 989
 990		if (fl6->flowi6_oif)
 991			flags |= RT6_LOOKUP_F_IFACE;
 992	}
 993
 994	if (!*dst)
 995		*dst = ip6_route_output_flags(net, sk, fl6, flags);
 996
 997	err = (*dst)->error;
 998	if (err)
 999		goto out_err_release;
1000
1001#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1002	/*
1003	 * Here if the dst entry we've looked up
1004	 * has a neighbour entry that is in the INCOMPLETE
1005	 * state and the src address from the flow is
1006	 * marked as OPTIMISTIC, we release the found
1007	 * dst entry and replace it instead with the
1008	 * dst entry of the nexthop router
1009	 */
1010	rt = (struct rt6_info *) *dst;
1011	rcu_read_lock_bh();
1012	n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1013				      rt6_nexthop(rt, &fl6->daddr));
1014	err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1015	rcu_read_unlock_bh();
1016
1017	if (err) {
1018		struct inet6_ifaddr *ifp;
1019		struct flowi6 fl_gw6;
1020		int redirect;
1021
1022		ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1023				      (*dst)->dev, 1);
1024
1025		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1026		if (ifp)
1027			in6_ifa_put(ifp);
1028
1029		if (redirect) {
1030			/*
1031			 * We need to get the dst entry for the
1032			 * default router instead
1033			 */
1034			dst_release(*dst);
1035			memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1036			memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1037			*dst = ip6_route_output(net, sk, &fl_gw6);
1038			err = (*dst)->error;
1039			if (err)
1040				goto out_err_release;
1041		}
1042	}
1043#endif
1044	if (ipv6_addr_v4mapped(&fl6->saddr) &&
1045	    !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1046		err = -EAFNOSUPPORT;
1047		goto out_err_release;
1048	}
1049
1050	return 0;
1051
1052out_err_release:
1053	dst_release(*dst);
1054	*dst = NULL;
1055
1056	if (err == -ENETUNREACH)
1057		IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
 
 
1058	return err;
1059}
1060
1061/**
1062 *	ip6_dst_lookup - perform route lookup on flow
1063 *	@sk: socket which provides route info
1064 *	@dst: pointer to dst_entry * for result
1065 *	@fl6: flow to lookup
1066 *
1067 *	This function performs a route lookup on the given flow.
1068 *
1069 *	It returns zero on success, or a standard errno code on error.
1070 */
1071int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1072		   struct flowi6 *fl6)
1073{
1074	*dst = NULL;
1075	return ip6_dst_lookup_tail(net, sk, dst, fl6);
1076}
1077EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1078
1079/**
1080 *	ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1081 *	@sk: socket which provides route info
1082 *	@fl6: flow to lookup
1083 *	@final_dst: final destination address for ipsec lookup
1084 *
1085 *	This function performs a route lookup on the given flow.
1086 *
1087 *	It returns a valid dst pointer on success, or a pointer encoded
1088 *	error code.
1089 */
1090struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
1091				      const struct in6_addr *final_dst)
1092{
1093	struct dst_entry *dst = NULL;
1094	int err;
1095
1096	err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
1097	if (err)
1098		return ERR_PTR(err);
1099	if (final_dst)
1100		fl6->daddr = *final_dst;
 
 
1101
1102	return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1103}
1104EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1105
1106/**
1107 *	ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1108 *	@sk: socket which provides the dst cache and route info
1109 *	@fl6: flow to lookup
1110 *	@final_dst: final destination address for ipsec lookup
1111 *	@connected: whether @sk is connected or not
1112 *
1113 *	This function performs a route lookup on the given flow with the
1114 *	possibility of using the cached route in the socket if it is valid.
1115 *	It will take the socket dst lock when operating on the dst cache.
1116 *	As a result, this function can only be used in process context.
1117 *
1118 *	In addition, for a connected socket, cache the dst in the socket
1119 *	if the current cache is not valid.
1120 *
1121 *	It returns a valid dst pointer on success, or a pointer encoded
1122 *	error code.
1123 */
1124struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1125					 const struct in6_addr *final_dst,
1126					 bool connected)
1127{
1128	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
 
1129
1130	dst = ip6_sk_dst_check(sk, dst, fl6);
1131	if (dst)
1132		return dst;
1133
1134	dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
1135	if (connected && !IS_ERR(dst))
1136		ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
 
 
1137
1138	return dst;
1139}
1140EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1142static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1143					       gfp_t gfp)
1144{
1145	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1146}
1147
1148static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1149						gfp_t gfp)
1150{
1151	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1152}
1153
1154static void ip6_append_data_mtu(unsigned int *mtu,
1155				int *maxfraglen,
1156				unsigned int fragheaderlen,
1157				struct sk_buff *skb,
1158				struct rt6_info *rt,
1159				unsigned int orig_mtu)
1160{
1161	if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1162		if (!skb) {
1163			/* first fragment, reserve header_len */
1164			*mtu = orig_mtu - rt->dst.header_len;
1165
1166		} else {
1167			/*
1168			 * this fragment is not first, the headers
1169			 * space is regarded as data space.
1170			 */
1171			*mtu = orig_mtu;
1172		}
1173		*maxfraglen = ((*mtu - fragheaderlen) & ~7)
1174			      + fragheaderlen - sizeof(struct frag_hdr);
1175	}
1176}
1177
1178static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1179			  struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
 
1180			  struct rt6_info *rt, struct flowi6 *fl6)
1181{
1182	struct ipv6_pinfo *np = inet6_sk(sk);
1183	unsigned int mtu;
1184	struct ipv6_txoptions *opt = ipc6->opt;
1185
1186	/*
1187	 * setup for corking
1188	 */
1189	if (opt) {
1190		if (WARN_ON(v6_cork->opt))
1191			return -EINVAL;
1192
1193		v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1194		if (unlikely(!v6_cork->opt))
1195			return -ENOBUFS;
1196
1197		v6_cork->opt->tot_len = sizeof(*opt);
1198		v6_cork->opt->opt_flen = opt->opt_flen;
1199		v6_cork->opt->opt_nflen = opt->opt_nflen;
1200
1201		v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1202						    sk->sk_allocation);
1203		if (opt->dst0opt && !v6_cork->opt->dst0opt)
1204			return -ENOBUFS;
1205
1206		v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1207						    sk->sk_allocation);
1208		if (opt->dst1opt && !v6_cork->opt->dst1opt)
1209			return -ENOBUFS;
1210
1211		v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1212						   sk->sk_allocation);
1213		if (opt->hopopt && !v6_cork->opt->hopopt)
1214			return -ENOBUFS;
1215
1216		v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1217						    sk->sk_allocation);
1218		if (opt->srcrt && !v6_cork->opt->srcrt)
1219			return -ENOBUFS;
1220
1221		/* need source address above miyazawa*/
1222	}
1223	dst_hold(&rt->dst);
1224	cork->base.dst = &rt->dst;
1225	cork->fl.u.ip6 = *fl6;
1226	v6_cork->hop_limit = ipc6->hlimit;
1227	v6_cork->tclass = ipc6->tclass;
1228	if (rt->dst.flags & DST_XFRM_TUNNEL)
1229		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1230		      READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1231	else
1232		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1233			READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1234	if (np->frag_size < mtu) {
1235		if (np->frag_size)
1236			mtu = np->frag_size;
1237	}
1238	if (mtu < IPV6_MIN_MTU)
1239		return -EINVAL;
1240	cork->base.fragsize = mtu;
1241	if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1242		cork->base.flags |= IPCORK_ALLFRAG;
1243	cork->base.length = 0;
1244
1245	return 0;
1246}
1247
1248static int __ip6_append_data(struct sock *sk,
1249			     struct flowi6 *fl6,
1250			     struct sk_buff_head *queue,
1251			     struct inet_cork *cork,
1252			     struct inet6_cork *v6_cork,
1253			     struct page_frag *pfrag,
1254			     int getfrag(void *from, char *to, int offset,
1255					 int len, int odd, struct sk_buff *skb),
1256			     void *from, int length, int transhdrlen,
1257			     unsigned int flags, struct ipcm6_cookie *ipc6,
1258			     const struct sockcm_cookie *sockc)
1259{
1260	struct sk_buff *skb, *skb_prev = NULL;
1261	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1262	int exthdrlen = 0;
1263	int dst_exthdrlen = 0;
1264	int hh_len;
1265	int copy;
1266	int err;
1267	int offset = 0;
1268	__u8 tx_flags = 0;
1269	u32 tskey = 0;
1270	struct rt6_info *rt = (struct rt6_info *)cork->dst;
1271	struct ipv6_txoptions *opt = v6_cork->opt;
1272	int csummode = CHECKSUM_NONE;
1273	unsigned int maxnonfragsize, headersize;
1274	unsigned int wmem_alloc_delta = 0;
1275
1276	skb = skb_peek_tail(queue);
1277	if (!skb) {
1278		exthdrlen = opt ? opt->opt_flen : 0;
1279		dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1280	}
1281
1282	mtu = cork->fragsize;
1283	orig_mtu = mtu;
1284
1285	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1286
1287	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1288			(opt ? opt->opt_nflen : 0);
1289	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1290		     sizeof(struct frag_hdr);
1291
1292	headersize = sizeof(struct ipv6hdr) +
1293		     (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1294		     (dst_allfrag(&rt->dst) ?
1295		      sizeof(struct frag_hdr) : 0) +
1296		     rt->rt6i_nfheader_len;
1297
1298	/* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1299	 * the first fragment
1300	 */
1301	if (headersize + transhdrlen > mtu)
1302		goto emsgsize;
1303
1304	if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1305	    (sk->sk_protocol == IPPROTO_UDP ||
1306	     sk->sk_protocol == IPPROTO_RAW)) {
1307		ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1308				sizeof(struct ipv6hdr));
1309		goto emsgsize;
1310	}
1311
1312	if (ip6_sk_ignore_df(sk))
1313		maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1314	else
1315		maxnonfragsize = mtu;
1316
1317	if (cork->length + length > maxnonfragsize - headersize) {
1318emsgsize:
1319		pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1320		ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
 
1321		return -EMSGSIZE;
1322	}
1323
1324	/* CHECKSUM_PARTIAL only with no extension headers and when
1325	 * we are not going to fragment
1326	 */
1327	if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1328	    headersize == sizeof(struct ipv6hdr) &&
1329	    length <= mtu - headersize &&
1330	    !(flags & MSG_MORE) &&
1331	    rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1332		csummode = CHECKSUM_PARTIAL;
1333
1334	if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
1335		sock_tx_timestamp(sk, sockc->tsflags, &tx_flags);
1336		if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1337		    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1338			tskey = sk->sk_tskey++;
1339	}
1340
1341	/*
1342	 * Let's try using as much space as possible.
1343	 * Use MTU if total length of the message fits into the MTU.
1344	 * Otherwise, we need to reserve fragment header and
1345	 * fragment alignment (= 8-15 octects, in total).
1346	 *
1347	 * Note that we may need to "move" the data from the tail of
1348	 * of the buffer to the new fragment when we split
1349	 * the message.
1350	 *
1351	 * FIXME: It may be fragmented into multiple chunks
1352	 *        at once if non-fragmentable extension headers
1353	 *        are too large.
1354	 * --yoshfuji
1355	 */
1356
1357	cork->length += length;
 
 
 
 
 
 
 
 
 
 
 
 
 
1358	if (!skb)
1359		goto alloc_new_skb;
1360
1361	while (length > 0) {
1362		/* Check if the remaining data fits into current packet. */
1363		copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1364		if (copy < length)
1365			copy = maxfraglen - skb->len;
1366
1367		if (copy <= 0) {
1368			char *data;
1369			unsigned int datalen;
1370			unsigned int fraglen;
1371			unsigned int fraggap;
1372			unsigned int alloclen;
1373alloc_new_skb:
1374			/* There's no room in the current skb */
1375			if (skb)
1376				fraggap = skb->len - maxfraglen;
1377			else
1378				fraggap = 0;
1379			/* update mtu and maxfraglen if necessary */
1380			if (!skb || !skb_prev)
1381				ip6_append_data_mtu(&mtu, &maxfraglen,
1382						    fragheaderlen, skb, rt,
1383						    orig_mtu);
1384
1385			skb_prev = skb;
1386
1387			/*
1388			 * If remaining data exceeds the mtu,
1389			 * we know we need more fragment(s).
1390			 */
1391			datalen = length + fraggap;
1392
1393			if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1394				datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1395			if ((flags & MSG_MORE) &&
1396			    !(rt->dst.dev->features&NETIF_F_SG))
1397				alloclen = mtu;
1398			else
1399				alloclen = datalen + fragheaderlen;
1400
1401			alloclen += dst_exthdrlen;
1402
1403			if (datalen != length + fraggap) {
1404				/*
1405				 * this is not the last fragment, the trailer
1406				 * space is regarded as data space.
1407				 */
1408				datalen += rt->dst.trailer_len;
1409			}
1410
1411			alloclen += rt->dst.trailer_len;
1412			fraglen = datalen + fragheaderlen;
1413
1414			/*
1415			 * We just reserve space for fragment header.
1416			 * Note: this may be overallocation if the message
1417			 * (without MSG_MORE) fits into the MTU.
1418			 */
1419			alloclen += sizeof(struct frag_hdr);
1420
1421			copy = datalen - transhdrlen - fraggap;
1422			if (copy < 0) {
1423				err = -EINVAL;
1424				goto error;
1425			}
1426			if (transhdrlen) {
1427				skb = sock_alloc_send_skb(sk,
1428						alloclen + hh_len,
1429						(flags & MSG_DONTWAIT), &err);
1430			} else {
1431				skb = NULL;
1432				if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1433				    2 * sk->sk_sndbuf)
1434					skb = alloc_skb(alloclen + hh_len,
1435							sk->sk_allocation);
 
1436				if (unlikely(!skb))
1437					err = -ENOBUFS;
1438			}
1439			if (!skb)
1440				goto error;
1441			/*
1442			 *	Fill in the control structures
1443			 */
1444			skb->protocol = htons(ETH_P_IPV6);
1445			skb->ip_summed = csummode;
1446			skb->csum = 0;
1447			/* reserve for fragmentation and ipsec header */
1448			skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1449				    dst_exthdrlen);
1450
1451			/* Only the initial fragment is time stamped */
1452			skb_shinfo(skb)->tx_flags = tx_flags;
1453			tx_flags = 0;
1454			skb_shinfo(skb)->tskey = tskey;
1455			tskey = 0;
1456
1457			/*
1458			 *	Find where to start putting bytes
1459			 */
1460			data = skb_put(skb, fraglen);
1461			skb_set_network_header(skb, exthdrlen);
1462			data += fragheaderlen;
1463			skb->transport_header = (skb->network_header +
1464						 fragheaderlen);
1465			if (fraggap) {
1466				skb->csum = skb_copy_and_csum_bits(
1467					skb_prev, maxfraglen,
1468					data + transhdrlen, fraggap, 0);
1469				skb_prev->csum = csum_sub(skb_prev->csum,
1470							  skb->csum);
1471				data += fraggap;
1472				pskb_trim_unique(skb_prev, maxfraglen);
1473			}
1474			if (copy > 0 &&
1475			    getfrag(from, data + transhdrlen, offset,
1476				    copy, fraggap, skb) < 0) {
 
 
 
 
1477				err = -EFAULT;
1478				kfree_skb(skb);
1479				goto error;
1480			}
1481
1482			offset += copy;
1483			length -= datalen - fraggap;
1484			transhdrlen = 0;
1485			exthdrlen = 0;
1486			dst_exthdrlen = 0;
1487
1488			if ((flags & MSG_CONFIRM) && !skb_prev)
1489				skb_set_dst_pending_confirm(skb, 1);
1490
1491			/*
1492			 * Put the packet on the pending queue
1493			 */
1494			if (!skb->destructor) {
1495				skb->destructor = sock_wfree;
1496				skb->sk = sk;
1497				wmem_alloc_delta += skb->truesize;
1498			}
1499			__skb_queue_tail(queue, skb);
1500			continue;
1501		}
1502
1503		if (copy > length)
1504			copy = length;
1505
1506		if (!(rt->dst.dev->features&NETIF_F_SG) &&
1507		    skb_tailroom(skb) >= copy) {
1508			unsigned int off;
1509
1510			off = skb->len;
1511			if (getfrag(from, skb_put(skb, copy),
1512						offset, copy, off, skb) < 0) {
1513				__skb_trim(skb, off);
1514				err = -EFAULT;
1515				goto error;
1516			}
1517		} else {
1518			int i = skb_shinfo(skb)->nr_frags;
1519
1520			err = -ENOMEM;
1521			if (!sk_page_frag_refill(sk, pfrag))
1522				goto error;
1523
1524			if (!skb_can_coalesce(skb, i, pfrag->page,
1525					      pfrag->offset)) {
1526				err = -EMSGSIZE;
1527				if (i == MAX_SKB_FRAGS)
1528					goto error;
1529
1530				__skb_fill_page_desc(skb, i, pfrag->page,
1531						     pfrag->offset, 0);
1532				skb_shinfo(skb)->nr_frags = ++i;
1533				get_page(pfrag->page);
1534			}
1535			copy = min_t(int, copy, pfrag->size - pfrag->offset);
1536			if (getfrag(from,
1537				    page_address(pfrag->page) + pfrag->offset,
1538				    offset, copy, skb->len, skb) < 0)
1539				goto error_efault;
1540
1541			pfrag->offset += copy;
1542			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1543			skb->len += copy;
1544			skb->data_len += copy;
1545			skb->truesize += copy;
1546			wmem_alloc_delta += copy;
1547		}
1548		offset += copy;
1549		length -= copy;
1550	}
1551
1552	if (wmem_alloc_delta)
1553		refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1554	return 0;
1555
1556error_efault:
1557	err = -EFAULT;
1558error:
1559	cork->length -= length;
1560	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1561	refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1562	return err;
1563}
1564
1565int ip6_append_data(struct sock *sk,
1566		    int getfrag(void *from, char *to, int offset, int len,
1567				int odd, struct sk_buff *skb),
1568		    void *from, int length, int transhdrlen,
1569		    struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1570		    struct rt6_info *rt, unsigned int flags,
1571		    const struct sockcm_cookie *sockc)
1572{
1573	struct inet_sock *inet = inet_sk(sk);
1574	struct ipv6_pinfo *np = inet6_sk(sk);
1575	int exthdrlen;
1576	int err;
1577
1578	if (flags&MSG_PROBE)
1579		return 0;
1580	if (skb_queue_empty(&sk->sk_write_queue)) {
1581		/*
1582		 * setup for corking
1583		 */
1584		err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1585				     ipc6, rt, fl6);
1586		if (err)
1587			return err;
1588
1589		exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1590		length += exthdrlen;
1591		transhdrlen += exthdrlen;
1592	} else {
1593		fl6 = &inet->cork.fl.u.ip6;
1594		transhdrlen = 0;
1595	}
1596
1597	return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1598				 &np->cork, sk_page_frag(sk), getfrag,
1599				 from, length, transhdrlen, flags, ipc6, sockc);
1600}
1601EXPORT_SYMBOL_GPL(ip6_append_data);
1602
1603static void ip6_cork_release(struct inet_cork_full *cork,
1604			     struct inet6_cork *v6_cork)
1605{
1606	if (v6_cork->opt) {
1607		kfree(v6_cork->opt->dst0opt);
1608		kfree(v6_cork->opt->dst1opt);
1609		kfree(v6_cork->opt->hopopt);
1610		kfree(v6_cork->opt->srcrt);
1611		kfree(v6_cork->opt);
1612		v6_cork->opt = NULL;
1613	}
1614
1615	if (cork->base.dst) {
1616		dst_release(cork->base.dst);
1617		cork->base.dst = NULL;
1618		cork->base.flags &= ~IPCORK_ALLFRAG;
1619	}
1620	memset(&cork->fl, 0, sizeof(cork->fl));
1621}
1622
1623struct sk_buff *__ip6_make_skb(struct sock *sk,
1624			       struct sk_buff_head *queue,
1625			       struct inet_cork_full *cork,
1626			       struct inet6_cork *v6_cork)
1627{
1628	struct sk_buff *skb, *tmp_skb;
1629	struct sk_buff **tail_skb;
1630	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1631	struct ipv6_pinfo *np = inet6_sk(sk);
1632	struct net *net = sock_net(sk);
1633	struct ipv6hdr *hdr;
1634	struct ipv6_txoptions *opt = v6_cork->opt;
1635	struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1636	struct flowi6 *fl6 = &cork->fl.u.ip6;
1637	unsigned char proto = fl6->flowi6_proto;
1638
1639	skb = __skb_dequeue(queue);
1640	if (!skb)
1641		goto out;
1642	tail_skb = &(skb_shinfo(skb)->frag_list);
1643
1644	/* move skb->data to ip header from ext header */
1645	if (skb->data < skb_network_header(skb))
1646		__skb_pull(skb, skb_network_offset(skb));
1647	while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1648		__skb_pull(tmp_skb, skb_network_header_len(skb));
1649		*tail_skb = tmp_skb;
1650		tail_skb = &(tmp_skb->next);
1651		skb->len += tmp_skb->len;
1652		skb->data_len += tmp_skb->len;
1653		skb->truesize += tmp_skb->truesize;
1654		tmp_skb->destructor = NULL;
1655		tmp_skb->sk = NULL;
1656	}
1657
1658	/* Allow local fragmentation. */
1659	skb->ignore_df = ip6_sk_ignore_df(sk);
1660
1661	*final_dst = fl6->daddr;
1662	__skb_pull(skb, skb_network_header_len(skb));
1663	if (opt && opt->opt_flen)
1664		ipv6_push_frag_opts(skb, opt, &proto);
1665	if (opt && opt->opt_nflen)
1666		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1667
1668	skb_push(skb, sizeof(struct ipv6hdr));
1669	skb_reset_network_header(skb);
1670	hdr = ipv6_hdr(skb);
1671
1672	ip6_flow_hdr(hdr, v6_cork->tclass,
1673		     ip6_make_flowlabel(net, skb, fl6->flowlabel,
1674					ip6_autoflowlabel(net, np), fl6));
1675	hdr->hop_limit = v6_cork->hop_limit;
1676	hdr->nexthdr = proto;
1677	hdr->saddr = fl6->saddr;
1678	hdr->daddr = *final_dst;
1679
1680	skb->priority = sk->sk_priority;
1681	skb->mark = sk->sk_mark;
1682
1683	skb_dst_set(skb, dst_clone(&rt->dst));
1684	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1685	if (proto == IPPROTO_ICMPV6) {
1686		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1687
1688		ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1689		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1690	}
1691
1692	ip6_cork_release(cork, v6_cork);
1693out:
1694	return skb;
1695}
1696
1697int ip6_send_skb(struct sk_buff *skb)
1698{
1699	struct net *net = sock_net(skb->sk);
1700	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1701	int err;
1702
1703	err = ip6_local_out(net, skb->sk, skb);
1704	if (err) {
1705		if (err > 0)
1706			err = net_xmit_errno(err);
1707		if (err)
1708			IP6_INC_STATS(net, rt->rt6i_idev,
1709				      IPSTATS_MIB_OUTDISCARDS);
1710	}
1711
1712	return err;
1713}
1714
1715int ip6_push_pending_frames(struct sock *sk)
1716{
1717	struct sk_buff *skb;
1718
1719	skb = ip6_finish_skb(sk);
1720	if (!skb)
1721		return 0;
1722
1723	return ip6_send_skb(skb);
1724}
1725EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1726
1727static void __ip6_flush_pending_frames(struct sock *sk,
1728				       struct sk_buff_head *queue,
1729				       struct inet_cork_full *cork,
1730				       struct inet6_cork *v6_cork)
1731{
1732	struct sk_buff *skb;
1733
1734	while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1735		if (skb_dst(skb))
1736			IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1737				      IPSTATS_MIB_OUTDISCARDS);
1738		kfree_skb(skb);
1739	}
1740
1741	ip6_cork_release(cork, v6_cork);
1742}
1743
1744void ip6_flush_pending_frames(struct sock *sk)
1745{
1746	__ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1747				   &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1748}
1749EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1750
1751struct sk_buff *ip6_make_skb(struct sock *sk,
1752			     int getfrag(void *from, char *to, int offset,
1753					 int len, int odd, struct sk_buff *skb),
1754			     void *from, int length, int transhdrlen,
1755			     struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
 
1756			     struct rt6_info *rt, unsigned int flags,
1757			     const struct sockcm_cookie *sockc)
1758{
1759	struct inet_cork_full cork;
1760	struct inet6_cork v6_cork;
1761	struct sk_buff_head queue;
1762	int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1763	int err;
1764
1765	if (flags & MSG_PROBE)
1766		return NULL;
1767
1768	__skb_queue_head_init(&queue);
1769
1770	cork.base.flags = 0;
1771	cork.base.addr = 0;
1772	cork.base.opt = NULL;
1773	cork.base.dst = NULL;
1774	v6_cork.opt = NULL;
1775	err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
1776	if (err) {
1777		ip6_cork_release(&cork, &v6_cork);
1778		return ERR_PTR(err);
1779	}
1780	if (ipc6->dontfrag < 0)
1781		ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1782
1783	err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1784				&current->task_frag, getfrag, from,
1785				length + exthdrlen, transhdrlen + exthdrlen,
1786				flags, ipc6, sockc);
1787	if (err) {
1788		__ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1789		return ERR_PTR(err);
1790	}
1791
1792	return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
1793}

   1/*
   2 *	IPv6 output functions
   3 *	Linux INET6 implementation
   4 *
   5 *	Authors:
   6 *	Pedro Roque		<roque@di.fc.ul.pt>
   7 *
   8 *	Based on linux/net/ipv4/ip_output.c
   9 *
  10 *	This program is free software; you can redistribute it and/or
  11 *      modify it under the terms of the GNU General Public License
  12 *      as published by the Free Software Foundation; either version
  13 *      2 of the License, or (at your option) any later version.
  14 *
  15 *	Changes:
  16 *	A.N.Kuznetsov	:	airthmetics in fragmentation.
  17 *				extension headers are implemented.
  18 *				route changes now work.
  19 *				ip6_forward does not confuse sniffers.
  20 *				etc.
  21 *
  22 *      H. von Brand    :       Added missing #include <linux/string.h>
  23 *	Imran Patel	:	frag id should be in NBO
  24 *      Kazunori MIYAZAWA @USAGI
  25 *			:       add ip6_append_data and related functions
  26 *				for datagram xmit
  27 */
  28
  29#include <linux/errno.h>
  30#include <linux/kernel.h>
  31#include <linux/string.h>
  32#include <linux/socket.h>
  33#include <linux/net.h>
  34#include <linux/netdevice.h>
  35#include <linux/if_arp.h>
  36#include <linux/in6.h>
  37#include <linux/tcp.h>
  38#include <linux/route.h>
  39#include <linux/module.h>
  40#include <linux/slab.h>
  41
 
  42#include <linux/netfilter.h>
  43#include <linux/netfilter_ipv6.h>
  44
  45#include <net/sock.h>
  46#include <net/snmp.h>
  47
  48#include <net/ipv6.h>
  49#include <net/ndisc.h>
  50#include <net/protocol.h>
  51#include <net/ip6_route.h>
  52#include <net/addrconf.h>
  53#include <net/rawv6.h>
  54#include <net/icmp.h>
  55#include <net/xfrm.h>
  56#include <net/checksum.h>
  57#include <linux/mroute6.h>
  58#include <net/l3mdev.h>
 
  59
  60static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
  61{
  62	struct dst_entry *dst = skb_dst(skb);
  63	struct net_device *dev = dst->dev;
  64	struct neighbour *neigh;
  65	struct in6_addr *nexthop;
  66	int ret;
  67
  68	skb->protocol = htons(ETH_P_IPV6);
  69	skb->dev = dev;
  70
  71	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
  72		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
  73
  74		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
  75		    ((mroute6_socket(net, skb) &&
  76		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
  77		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
  78					 &ipv6_hdr(skb)->saddr))) {
  79			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
  80
  81			/* Do not check for IFF_ALLMULTI; multicast routing
  82			   is not supported in any case.
  83			 */
  84			if (newskb)
  85				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
  86					net, sk, newskb, NULL, newskb->dev,
  87					dev_loopback_xmit);
  88
  89			if (ipv6_hdr(skb)->hop_limit == 0) {
  90				IP6_INC_STATS(net, idev,
  91					      IPSTATS_MIB_OUTDISCARDS);
  92				kfree_skb(skb);
  93				return 0;
  94			}
  95		}
  96
  97		IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
  98
  99		if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
 100		    IPV6_ADDR_SCOPE_NODELOCAL &&
 101		    !(dev->flags & IFF_LOOPBACK)) {
 102			kfree_skb(skb);
 103			return 0;
 104		}
 105	}
 106
 
 
 
 
 
 
 
 107	rcu_read_lock_bh();
 108	nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
 109	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
 110	if (unlikely(!neigh))
 111		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
 112	if (!IS_ERR(neigh)) {
 113		ret = dst_neigh_output(dst, neigh, skb);
 
 114		rcu_read_unlock_bh();
 115		return ret;
 116	}
 117	rcu_read_unlock_bh();
 118
 119	IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 120	kfree_skb(skb);
 121	return -EINVAL;
 122}
 123
 124static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 125{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 126	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 127	    dst_allfrag(skb_dst(skb)) ||
 128	    (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
 129		return ip6_fragment(net, sk, skb, ip6_finish_output2);
 130	else
 131		return ip6_finish_output2(net, sk, skb);
 132}
 133
 134int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 135{
 136	struct net_device *dev = skb_dst(skb)->dev;
 137	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 138
 
 
 
 139	if (unlikely(idev->cnf.disable_ipv6)) {
 140		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
 141		kfree_skb(skb);
 142		return 0;
 143	}
 144
 145	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
 146			    net, sk, skb, NULL, dev,
 147			    ip6_finish_output,
 148			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 149}
 150
 
 
 
 
 
 
 
 
 151/*
 152 * xmit an sk_buff (used by TCP, SCTP and DCCP)
 153 * Note : socket lock is not held for SYNACK packets, but might be modified
 154 * by calls to skb_set_owner_w() and ipv6_local_error(),
 155 * which are using proper atomic operations or spinlocks.
 156 */
 157int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 158	     struct ipv6_txoptions *opt, int tclass)
 159{
 160	struct net *net = sock_net(sk);
 161	const struct ipv6_pinfo *np = inet6_sk(sk);
 162	struct in6_addr *first_hop = &fl6->daddr;
 163	struct dst_entry *dst = skb_dst(skb);
 164	struct ipv6hdr *hdr;
 165	u8  proto = fl6->flowi6_proto;
 166	int seg_len = skb->len;
 167	int hlimit = -1;
 168	u32 mtu;
 169
 170	if (opt) {
 171		unsigned int head_room;
 172
 173		/* First: exthdrs may take lots of space (~8K for now)
 174		   MAX_HEADER is not enough.
 175		 */
 176		head_room = opt->opt_nflen + opt->opt_flen;
 177		seg_len += head_room;
 178		head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
 179
 180		if (skb_headroom(skb) < head_room) {
 181			struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 182			if (!skb2) {
 183				IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 184					      IPSTATS_MIB_OUTDISCARDS);
 185				kfree_skb(skb);
 186				return -ENOBUFS;
 187			}
 188			consume_skb(skb);
 189			skb = skb2;
 190			/* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
 191			 * it is safe to call in our context (socket lock not held)
 192			 */
 193			skb_set_owner_w(skb, (struct sock *)sk);
 194		}
 195		if (opt->opt_flen)
 196			ipv6_push_frag_opts(skb, opt, &proto);
 197		if (opt->opt_nflen)
 198			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
 
 199	}
 200
 201	skb_push(skb, sizeof(struct ipv6hdr));
 202	skb_reset_network_header(skb);
 203	hdr = ipv6_hdr(skb);
 204
 205	/*
 206	 *	Fill in the IPv6 header
 207	 */
 208	if (np)
 209		hlimit = np->hop_limit;
 210	if (hlimit < 0)
 211		hlimit = ip6_dst_hoplimit(dst);
 212
 213	ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
 214						     np->autoflowlabel, fl6));
 215
 216	hdr->payload_len = htons(seg_len);
 217	hdr->nexthdr = proto;
 218	hdr->hop_limit = hlimit;
 219
 220	hdr->saddr = fl6->saddr;
 221	hdr->daddr = *first_hop;
 222
 223	skb->protocol = htons(ETH_P_IPV6);
 224	skb->priority = sk->sk_priority;
 225	skb->mark = sk->sk_mark;
 226
 227	mtu = dst_mtu(dst);
 228	if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
 229		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
 230			      IPSTATS_MIB_OUT, skb->len);
 
 
 
 
 
 
 
 
 231		/* hooks should never assume socket lock is held.
 232		 * we promote our socket to non const
 233		 */
 234		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
 235			       net, (struct sock *)sk, skb, NULL, dst->dev,
 236			       dst_output);
 237	}
 238
 239	skb->dev = dst->dev;
 240	/* ipv6_local_error() does not require socket lock,
 241	 * we promote our socket to non const
 242	 */
 243	ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
 244
 245	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
 246	kfree_skb(skb);
 247	return -EMSGSIZE;
 248}
 249EXPORT_SYMBOL(ip6_xmit);
 250
 251static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 252{
 253	struct ip6_ra_chain *ra;
 254	struct sock *last = NULL;
 255
 256	read_lock(&ip6_ra_lock);
 257	for (ra = ip6_ra_chain; ra; ra = ra->next) {
 258		struct sock *sk = ra->sk;
 259		if (sk && ra->sel == sel &&
 260		    (!sk->sk_bound_dev_if ||
 261		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
 262			if (last) {
 263				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 264				if (skb2)
 265					rawv6_rcv(last, skb2);
 266			}
 267			last = sk;
 268		}
 269	}
 270
 271	if (last) {
 272		rawv6_rcv(last, skb);
 273		read_unlock(&ip6_ra_lock);
 274		return 1;
 275	}
 276	read_unlock(&ip6_ra_lock);
 277	return 0;
 278}
 279
 280static int ip6_forward_proxy_check(struct sk_buff *skb)
 281{
 282	struct ipv6hdr *hdr = ipv6_hdr(skb);
 283	u8 nexthdr = hdr->nexthdr;
 284	__be16 frag_off;
 285	int offset;
 286
 287	if (ipv6_ext_hdr(nexthdr)) {
 288		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
 289		if (offset < 0)
 290			return 0;
 291	} else
 292		offset = sizeof(struct ipv6hdr);
 293
 294	if (nexthdr == IPPROTO_ICMPV6) {
 295		struct icmp6hdr *icmp6;
 296
 297		if (!pskb_may_pull(skb, (skb_network_header(skb) +
 298					 offset + 1 - skb->data)))
 299			return 0;
 300
 301		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 302
 303		switch (icmp6->icmp6_type) {
 304		case NDISC_ROUTER_SOLICITATION:
 305		case NDISC_ROUTER_ADVERTISEMENT:
 306		case NDISC_NEIGHBOUR_SOLICITATION:
 307		case NDISC_NEIGHBOUR_ADVERTISEMENT:
 308		case NDISC_REDIRECT:
 309			/* For reaction involving unicast neighbor discovery
 310			 * message destined to the proxied address, pass it to
 311			 * input function.
 312			 */
 313			return 1;
 314		default:
 315			break;
 316		}
 317	}
 318
 319	/*
 320	 * The proxying router can't forward traffic sent to a link-local
 321	 * address, so signal the sender and discard the packet. This
 322	 * behavior is clarified by the MIPv6 specification.
 323	 */
 324	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
 325		dst_link_failure(skb);
 326		return -1;
 327	}
 328
 329	return 0;
 330}
 331
 332static inline int ip6_forward_finish(struct net *net, struct sock *sk,
 333				     struct sk_buff *skb)
 334{
 
 
 
 
 
 335	return dst_output(net, sk, skb);
 336}
 337
 338static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
 339{
 340	unsigned int mtu;
 341	struct inet6_dev *idev;
 342
 343	if (dst_metric_locked(dst, RTAX_MTU)) {
 344		mtu = dst_metric_raw(dst, RTAX_MTU);
 345		if (mtu)
 346			return mtu;
 347	}
 348
 349	mtu = IPV6_MIN_MTU;
 350	rcu_read_lock();
 351	idev = __in6_dev_get(dst->dev);
 352	if (idev)
 353		mtu = idev->cnf.mtu6;
 354	rcu_read_unlock();
 355
 356	return mtu;
 357}
 
 358
 359static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
 360{
 361	if (skb->len <= mtu)
 362		return false;
 363
 364	/* ipv6 conntrack defrag sets max_frag_size + ignore_df */
 365	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
 366		return true;
 367
 368	if (skb->ignore_df)
 369		return false;
 370
 371	if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
 372		return false;
 373
 374	return true;
 375}
 376
 377int ip6_forward(struct sk_buff *skb)
 378{
 379	struct dst_entry *dst = skb_dst(skb);
 380	struct ipv6hdr *hdr = ipv6_hdr(skb);
 381	struct inet6_skb_parm *opt = IP6CB(skb);
 382	struct net *net = dev_net(dst->dev);
 383	u32 mtu;
 384
 385	if (net->ipv6.devconf_all->forwarding == 0)
 386		goto error;
 387
 388	if (skb->pkt_type != PACKET_HOST)
 389		goto drop;
 390
 391	if (unlikely(skb->sk))
 392		goto drop;
 393
 394	if (skb_warn_if_lro(skb))
 395		goto drop;
 396
 397	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
 398		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
 399				 IPSTATS_MIB_INDISCARDS);
 400		goto drop;
 401	}
 402
 403	skb_forward_csum(skb);
 404
 405	/*
 406	 *	We DO NOT make any processing on
 407	 *	RA packets, pushing them to user level AS IS
 408	 *	without ane WARRANTY that application will be able
 409	 *	to interpret them. The reason is that we
 410	 *	cannot make anything clever here.
 411	 *
 412	 *	We are not end-node, so that if packet contains
 413	 *	AH/ESP, we cannot make anything.
 414	 *	Defragmentation also would be mistake, RA packets
 415	 *	cannot be fragmented, because there is no warranty
 416	 *	that different fragments will go along one path. --ANK
 417	 */
 418	if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
 419		if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
 420			return 0;
 421	}
 422
 423	/*
 424	 *	check and decrement ttl
 425	 */
 426	if (hdr->hop_limit <= 1) {
 427		/* Force OUTPUT device used as source address */
 428		skb->dev = dst->dev;
 429		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
 430		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
 431				 IPSTATS_MIB_INHDRERRORS);
 432
 433		kfree_skb(skb);
 434		return -ETIMEDOUT;
 435	}
 436
 437	/* XXX: idev->cnf.proxy_ndp? */
 438	if (net->ipv6.devconf_all->proxy_ndp &&
 439	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
 440		int proxied = ip6_forward_proxy_check(skb);
 441		if (proxied > 0)
 442			return ip6_input(skb);
 443		else if (proxied < 0) {
 444			IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
 445					 IPSTATS_MIB_INDISCARDS);
 446			goto drop;
 447		}
 448	}
 449
 450	if (!xfrm6_route_forward(skb)) {
 451		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
 452				 IPSTATS_MIB_INDISCARDS);
 453		goto drop;
 454	}
 455	dst = skb_dst(skb);
 456
 457	/* IPv6 specs say nothing about it, but it is clear that we cannot
 458	   send redirects to source routed frames.
 459	   We don't send redirects to frames decapsulated from IPsec.
 460	 */
 461	if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
 462		struct in6_addr *target = NULL;
 463		struct inet_peer *peer;
 464		struct rt6_info *rt;
 465
 466		/*
 467		 *	incoming and outgoing devices are the same
 468		 *	send a redirect.
 469		 */
 470
 471		rt = (struct rt6_info *) dst;
 472		if (rt->rt6i_flags & RTF_GATEWAY)
 473			target = &rt->rt6i_gateway;
 474		else
 475			target = &hdr->daddr;
 476
 477		peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
 478
 479		/* Limit redirects both by destination (here)
 480		   and by source (inside ndisc_send_redirect)
 481		 */
 482		if (inet_peer_xrlim_allow(peer, 1*HZ))
 483			ndisc_send_redirect(skb, target);
 484		if (peer)
 485			inet_putpeer(peer);
 486	} else {
 487		int addrtype = ipv6_addr_type(&hdr->saddr);
 488
 489		/* This check is security critical. */
 490		if (addrtype == IPV6_ADDR_ANY ||
 491		    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
 492			goto error;
 493		if (addrtype & IPV6_ADDR_LINKLOCAL) {
 494			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
 495				    ICMPV6_NOT_NEIGHBOUR, 0);
 496			goto error;
 497		}
 498	}
 499
 500	mtu = ip6_dst_mtu_forward(dst);
 501	if (mtu < IPV6_MIN_MTU)
 502		mtu = IPV6_MIN_MTU;
 503
 504	if (ip6_pkt_too_big(skb, mtu)) {
 505		/* Again, force OUTPUT device used as source address */
 506		skb->dev = dst->dev;
 507		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 508		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
 509				 IPSTATS_MIB_INTOOBIGERRORS);
 510		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
 511				 IPSTATS_MIB_FRAGFAILS);
 512		kfree_skb(skb);
 513		return -EMSGSIZE;
 514	}
 515
 516	if (skb_cow(skb, dst->dev->hard_header_len)) {
 517		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
 518				 IPSTATS_MIB_OUTDISCARDS);
 519		goto drop;
 520	}
 521
 522	hdr = ipv6_hdr(skb);
 523
 524	/* Mangling hops number delayed to point after skb COW */
 525
 526	hdr->hop_limit--;
 527
 528	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 529	IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
 530	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
 531		       net, NULL, skb, skb->dev, dst->dev,
 532		       ip6_forward_finish);
 533
 534error:
 535	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
 536drop:
 537	kfree_skb(skb);
 538	return -EINVAL;
 539}
 540
 541static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 542{
 543	to->pkt_type = from->pkt_type;
 544	to->priority = from->priority;
 545	to->protocol = from->protocol;
 546	skb_dst_drop(to);
 547	skb_dst_set(to, dst_clone(skb_dst(from)));
 548	to->dev = from->dev;
 549	to->mark = from->mark;
 550
 551#ifdef CONFIG_NET_SCHED
 552	to->tc_index = from->tc_index;
 553#endif
 554	nf_copy(to, from);
 555	skb_copy_secmark(to, from);
 556}
 557
 558int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 559		 int (*output)(struct net *, struct sock *, struct sk_buff *))
 560{
 561	struct sk_buff *frag;
 562	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
 563	struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
 564				inet6_sk(skb->sk) : NULL;
 565	struct ipv6hdr *tmp_hdr;
 566	struct frag_hdr *fh;
 567	unsigned int mtu, hlen, left, len;
 568	int hroom, troom;
 569	__be32 frag_id;
 570	int ptr, offset = 0, err = 0;
 571	u8 *prevhdr, nexthdr = 0;
 572
 573	hlen = ip6_find_1stfragopt(skb, &prevhdr);
 
 
 
 574	nexthdr = *prevhdr;
 575
 576	mtu = ip6_skb_dst_mtu(skb);
 577
 578	/* We must not fragment if the socket is set to force MTU discovery
 579	 * or if the skb it not generated by a local socket.
 580	 */
 581	if (unlikely(!skb->ignore_df && skb->len > mtu))
 582		goto fail_toobig;
 583
 584	if (IP6CB(skb)->frag_max_size) {
 585		if (IP6CB(skb)->frag_max_size > mtu)
 586			goto fail_toobig;
 587
 588		/* don't send fragments larger than what we received */
 589		mtu = IP6CB(skb)->frag_max_size;
 590		if (mtu < IPV6_MIN_MTU)
 591			mtu = IPV6_MIN_MTU;
 592	}
 593
 594	if (np && np->frag_size < mtu) {
 595		if (np->frag_size)
 596			mtu = np->frag_size;
 597	}
 598	if (mtu < hlen + sizeof(struct frag_hdr) + 8)
 599		goto fail_toobig;
 600	mtu -= hlen + sizeof(struct frag_hdr);
 601
 602	frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
 603				    &ipv6_hdr(skb)->saddr);
 604
 605	if (skb->ip_summed == CHECKSUM_PARTIAL &&
 606	    (err = skb_checksum_help(skb)))
 607		goto fail;
 608
 609	hroom = LL_RESERVED_SPACE(rt->dst.dev);
 610	if (skb_has_frag_list(skb)) {
 611		int first_len = skb_pagelen(skb);
 612		struct sk_buff *frag2;
 613
 614		if (first_len - hlen > mtu ||
 615		    ((first_len - hlen) & 7) ||
 616		    skb_cloned(skb) ||
 617		    skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
 618			goto slow_path;
 619
 620		skb_walk_frags(skb, frag) {
 621			/* Correct geometry. */
 622			if (frag->len > mtu ||
 623			    ((frag->len & 7) && frag->next) ||
 624			    skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
 625				goto slow_path_clean;
 626
 627			/* Partially cloned skb? */
 628			if (skb_shared(frag))
 629				goto slow_path_clean;
 630
 631			BUG_ON(frag->sk);
 632			if (skb->sk) {
 633				frag->sk = skb->sk;
 634				frag->destructor = sock_wfree;
 635			}
 636			skb->truesize -= frag->truesize;
 637		}
 638
 639		err = 0;
 640		offset = 0;
 641		/* BUILD HEADER */
 642
 643		*prevhdr = NEXTHDR_FRAGMENT;
 644		tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 645		if (!tmp_hdr) {
 646			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 647				      IPSTATS_MIB_FRAGFAILS);
 648			err = -ENOMEM;
 649			goto fail;
 650		}
 651		frag = skb_shinfo(skb)->frag_list;
 652		skb_frag_list_init(skb);
 653
 654		__skb_pull(skb, hlen);
 655		fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr));
 656		__skb_push(skb, hlen);
 657		skb_reset_network_header(skb);
 658		memcpy(skb_network_header(skb), tmp_hdr, hlen);
 659
 660		fh->nexthdr = nexthdr;
 661		fh->reserved = 0;
 662		fh->frag_off = htons(IP6_MF);
 663		fh->identification = frag_id;
 664
 665		first_len = skb_pagelen(skb);
 666		skb->data_len = first_len - skb_headlen(skb);
 667		skb->len = first_len;
 668		ipv6_hdr(skb)->payload_len = htons(first_len -
 669						   sizeof(struct ipv6hdr));
 670
 671		dst_hold(&rt->dst);
 672
 673		for (;;) {
 674			/* Prepare header of the next frame,
 675			 * before previous one went down. */
 676			if (frag) {
 677				frag->ip_summed = CHECKSUM_NONE;
 678				skb_reset_transport_header(frag);
 679				fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr));
 680				__skb_push(frag, hlen);
 681				skb_reset_network_header(frag);
 682				memcpy(skb_network_header(frag), tmp_hdr,
 683				       hlen);
 684				offset += skb->len - hlen - sizeof(struct frag_hdr);
 685				fh->nexthdr = nexthdr;
 686				fh->reserved = 0;
 687				fh->frag_off = htons(offset);
 688				if (frag->next)
 689					fh->frag_off |= htons(IP6_MF);
 690				fh->identification = frag_id;
 691				ipv6_hdr(frag)->payload_len =
 692						htons(frag->len -
 693						      sizeof(struct ipv6hdr));
 694				ip6_copy_metadata(frag, skb);
 695			}
 696
 697			err = output(net, sk, skb);
 698			if (!err)
 699				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 700					      IPSTATS_MIB_FRAGCREATES);
 701
 702			if (err || !frag)
 703				break;
 704
 705			skb = frag;
 706			frag = skb->next;
 707			skb->next = NULL;
 708		}
 709
 710		kfree(tmp_hdr);
 711
 712		if (err == 0) {
 713			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 714				      IPSTATS_MIB_FRAGOKS);
 715			ip6_rt_put(rt);
 716			return 0;
 717		}
 718
 719		kfree_skb_list(frag);
 720
 721		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 722			      IPSTATS_MIB_FRAGFAILS);
 723		ip6_rt_put(rt);
 724		return err;
 725
 726slow_path_clean:
 727		skb_walk_frags(skb, frag2) {
 728			if (frag2 == frag)
 729				break;
 730			frag2->sk = NULL;
 731			frag2->destructor = NULL;
 732			skb->truesize += frag2->truesize;
 733		}
 734	}
 735
 736slow_path:
 737	left = skb->len - hlen;		/* Space per frame */
 738	ptr = hlen;			/* Where to start from */
 739
 740	/*
 741	 *	Fragment the datagram.
 742	 */
 743
 744	*prevhdr = NEXTHDR_FRAGMENT;
 745	troom = rt->dst.dev->needed_tailroom;
 746
 747	/*
 748	 *	Keep copying data until we run out.
 749	 */
 750	while (left > 0)	{
 
 
 751		len = left;
 752		/* IF: it doesn't fit, use 'mtu' - the data space left */
 753		if (len > mtu)
 754			len = mtu;
 755		/* IF: we are not sending up to and including the packet end
 756		   then align the next start on an eight byte boundary */
 757		if (len < left)	{
 758			len &= ~7;
 759		}
 760
 761		/* Allocate buffer */
 762		frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
 763				 hroom + troom, GFP_ATOMIC);
 764		if (!frag) {
 765			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 766				      IPSTATS_MIB_FRAGFAILS);
 767			err = -ENOMEM;
 768			goto fail;
 769		}
 770
 771		/*
 772		 *	Set up data on packet
 773		 */
 774
 775		ip6_copy_metadata(frag, skb);
 776		skb_reserve(frag, hroom);
 777		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
 778		skb_reset_network_header(frag);
 779		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
 780		frag->transport_header = (frag->network_header + hlen +
 781					  sizeof(struct frag_hdr));
 782
 783		/*
 784		 *	Charge the memory for the fragment to any owner
 785		 *	it might possess
 786		 */
 787		if (skb->sk)
 788			skb_set_owner_w(frag, skb->sk);
 789
 790		/*
 791		 *	Copy the packet header into the new buffer.
 792		 */
 793		skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
 794
 
 
 
 
 795		/*
 796		 *	Build fragment header.
 797		 */
 798		fh->nexthdr = nexthdr;
 799		fh->reserved = 0;
 800		fh->identification = frag_id;
 801
 802		/*
 803		 *	Copy a block of the IP datagram.
 804		 */
 805		BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
 806				     len));
 807		left -= len;
 808
 809		fh->frag_off = htons(offset);
 810		if (left > 0)
 811			fh->frag_off |= htons(IP6_MF);
 812		ipv6_hdr(frag)->payload_len = htons(frag->len -
 813						    sizeof(struct ipv6hdr));
 814
 815		ptr += len;
 816		offset += len;
 817
 818		/*
 819		 *	Put this fragment into the sending queue.
 820		 */
 821		err = output(net, sk, frag);
 822		if (err)
 823			goto fail;
 824
 825		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 826			      IPSTATS_MIB_FRAGCREATES);
 827	}
 828	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 829		      IPSTATS_MIB_FRAGOKS);
 830	consume_skb(skb);
 831	return err;
 832
 833fail_toobig:
 834	if (skb->sk && dst_allfrag(skb_dst(skb)))
 835		sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
 836
 837	skb->dev = skb_dst(skb)->dev;
 838	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 839	err = -EMSGSIZE;
 840
 841fail:
 842	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 843		      IPSTATS_MIB_FRAGFAILS);
 844	kfree_skb(skb);
 845	return err;
 846}
 847
 848static inline int ip6_rt_check(const struct rt6key *rt_key,
 849			       const struct in6_addr *fl_addr,
 850			       const struct in6_addr *addr_cache)
 851{
 852	return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
 853		(!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
 854}
 855
 856static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 857					  struct dst_entry *dst,
 858					  const struct flowi6 *fl6)
 859{
 860	struct ipv6_pinfo *np = inet6_sk(sk);
 861	struct rt6_info *rt;
 862
 863	if (!dst)
 864		goto out;
 865
 866	if (dst->ops->family != AF_INET6) {
 867		dst_release(dst);
 868		return NULL;
 869	}
 870
 871	rt = (struct rt6_info *)dst;
 872	/* Yes, checking route validity in not connected
 873	 * case is not very simple. Take into account,
 874	 * that we do not support routing by source, TOS,
 875	 * and MSG_DONTROUTE		--ANK (980726)
 876	 *
 877	 * 1. ip6_rt_check(): If route was host route,
 878	 *    check that cached destination is current.
 879	 *    If it is network route, we still may
 880	 *    check its validity using saved pointer
 881	 *    to the last used address: daddr_cache.
 882	 *    We do not want to save whole address now,
 883	 *    (because main consumer of this service
 884	 *    is tcp, which has not this problem),
 885	 *    so that the last trick works only on connected
 886	 *    sockets.
 887	 * 2. oif also should be the same.
 888	 */
 889	if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
 890#ifdef CONFIG_IPV6_SUBTREES
 891	    ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
 892#endif
 893	   (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
 894	      (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
 895		dst_release(dst);
 896		dst = NULL;
 897	}
 898
 899out:
 900	return dst;
 901}
 902
 903static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
 904			       struct dst_entry **dst, struct flowi6 *fl6)
 905{
 906#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 907	struct neighbour *n;
 908	struct rt6_info *rt;
 909#endif
 910	int err;
 911	int flags = 0;
 912
 913	/* The correct way to handle this would be to do
 914	 * ip6_route_get_saddr, and then ip6_route_output; however,
 915	 * the route-specific preferred source forces the
 916	 * ip6_route_output call _before_ ip6_route_get_saddr.
 917	 *
 918	 * In source specific routing (no src=any default route),
 919	 * ip6_route_output will fail given src=any saddr, though, so
 920	 * that's why we try it again later.
 921	 */
 922	if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
 923		struct rt6_info *rt;
 924		bool had_dst = *dst != NULL;
 925
 926		if (!had_dst)
 927			*dst = ip6_route_output(net, sk, fl6);
 928		rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
 929		err = ip6_route_get_saddr(net, rt, &fl6->daddr,
 930					  sk ? inet6_sk(sk)->srcprefs : 0,
 931					  &fl6->saddr);
 932		if (err)
 933			goto out_err_release;
 934
 935		/* If we had an erroneous initial result, pretend it
 936		 * never existed and let the SA-enabled version take
 937		 * over.
 938		 */
 939		if (!had_dst && (*dst)->error) {
 940			dst_release(*dst);
 941			*dst = NULL;
 942		}
 943
 944		if (fl6->flowi6_oif)
 945			flags |= RT6_LOOKUP_F_IFACE;
 946	}
 947
 948	if (!*dst)
 949		*dst = ip6_route_output_flags(net, sk, fl6, flags);
 950
 951	err = (*dst)->error;
 952	if (err)
 953		goto out_err_release;
 954
 955#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 956	/*
 957	 * Here if the dst entry we've looked up
 958	 * has a neighbour entry that is in the INCOMPLETE
 959	 * state and the src address from the flow is
 960	 * marked as OPTIMISTIC, we release the found
 961	 * dst entry and replace it instead with the
 962	 * dst entry of the nexthop router
 963	 */
 964	rt = (struct rt6_info *) *dst;
 965	rcu_read_lock_bh();
 966	n = __ipv6_neigh_lookup_noref(rt->dst.dev,
 967				      rt6_nexthop(rt, &fl6->daddr));
 968	err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
 969	rcu_read_unlock_bh();
 970
 971	if (err) {
 972		struct inet6_ifaddr *ifp;
 973		struct flowi6 fl_gw6;
 974		int redirect;
 975
 976		ifp = ipv6_get_ifaddr(net, &fl6->saddr,
 977				      (*dst)->dev, 1);
 978
 979		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
 980		if (ifp)
 981			in6_ifa_put(ifp);
 982
 983		if (redirect) {
 984			/*
 985			 * We need to get the dst entry for the
 986			 * default router instead
 987			 */
 988			dst_release(*dst);
 989			memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
 990			memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
 991			*dst = ip6_route_output(net, sk, &fl_gw6);
 992			err = (*dst)->error;
 993			if (err)
 994				goto out_err_release;
 995		}
 996	}
 997#endif
 
 
 
 
 
 998
 999	return 0;
1000
1001out_err_release:
 
 
 
1002	if (err == -ENETUNREACH)
1003		IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1004	dst_release(*dst);
1005	*dst = NULL;
1006	return err;
1007}
1008
1009/**
1010 *	ip6_dst_lookup - perform route lookup on flow
1011 *	@sk: socket which provides route info
1012 *	@dst: pointer to dst_entry * for result
1013 *	@fl6: flow to lookup
1014 *
1015 *	This function performs a route lookup on the given flow.
1016 *
1017 *	It returns zero on success, or a standard errno code on error.
1018 */
1019int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1020		   struct flowi6 *fl6)
1021{
1022	*dst = NULL;
1023	return ip6_dst_lookup_tail(net, sk, dst, fl6);
1024}
1025EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1026
1027/**
1028 *	ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1029 *	@sk: socket which provides route info
1030 *	@fl6: flow to lookup
1031 *	@final_dst: final destination address for ipsec lookup
1032 *
1033 *	This function performs a route lookup on the given flow.
1034 *
1035 *	It returns a valid dst pointer on success, or a pointer encoded
1036 *	error code.
1037 */
1038struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
1039				      const struct in6_addr *final_dst)
1040{
1041	struct dst_entry *dst = NULL;
1042	int err;
1043
1044	err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
1045	if (err)
1046		return ERR_PTR(err);
1047	if (final_dst)
1048		fl6->daddr = *final_dst;
1049	if (!fl6->flowi6_oif)
1050		fl6->flowi6_oif = l3mdev_fib_oif(dst->dev);
1051
1052	return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1053}
1054EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1055
1056/**
1057 *	ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1058 *	@sk: socket which provides the dst cache and route info
1059 *	@fl6: flow to lookup
1060 *	@final_dst: final destination address for ipsec lookup
 
1061 *
1062 *	This function performs a route lookup on the given flow with the
1063 *	possibility of using the cached route in the socket if it is valid.
1064 *	It will take the socket dst lock when operating on the dst cache.
1065 *	As a result, this function can only be used in process context.
1066 *
 
 
 
1067 *	It returns a valid dst pointer on success, or a pointer encoded
1068 *	error code.
1069 */
1070struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1071					 const struct in6_addr *final_dst)
 
1072{
1073	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1074	int err;
1075
1076	dst = ip6_sk_dst_check(sk, dst, fl6);
 
 
1077
1078	err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
1079	if (err)
1080		return ERR_PTR(err);
1081	if (final_dst)
1082		fl6->daddr = *final_dst;
1083
1084	return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1085}
1086EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1087
1088static inline int ip6_ufo_append_data(struct sock *sk,
1089			struct sk_buff_head *queue,
1090			int getfrag(void *from, char *to, int offset, int len,
1091			int odd, struct sk_buff *skb),
1092			void *from, int length, int hh_len, int fragheaderlen,
1093			int exthdrlen, int transhdrlen, int mtu,
1094			unsigned int flags, const struct flowi6 *fl6)
1095
1096{
1097	struct sk_buff *skb;
1098	int err;
1099
1100	/* There is support for UDP large send offload by network
1101	 * device, so create one single skb packet containing complete
1102	 * udp datagram
1103	 */
1104	skb = skb_peek_tail(queue);
1105	if (!skb) {
1106		skb = sock_alloc_send_skb(sk,
1107			hh_len + fragheaderlen + transhdrlen + 20,
1108			(flags & MSG_DONTWAIT), &err);
1109		if (!skb)
1110			return err;
1111
1112		/* reserve space for Hardware header */
1113		skb_reserve(skb, hh_len);
1114
1115		/* create space for UDP/IP header */
1116		skb_put(skb, fragheaderlen + transhdrlen);
1117
1118		/* initialize network header pointer */
1119		skb_set_network_header(skb, exthdrlen);
1120
1121		/* initialize protocol header pointer */
1122		skb->transport_header = skb->network_header + fragheaderlen;
1123
1124		skb->protocol = htons(ETH_P_IPV6);
1125		skb->csum = 0;
1126
1127		__skb_queue_tail(queue, skb);
1128	} else if (skb_is_gso(skb)) {
1129		goto append;
1130	}
1131
1132	skb->ip_summed = CHECKSUM_PARTIAL;
1133	/* Specify the length of each IPv6 datagram fragment.
1134	 * It has to be a multiple of 8.
1135	 */
1136	skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1137				     sizeof(struct frag_hdr)) & ~7;
1138	skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1139	skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk),
1140							 &fl6->daddr,
1141							 &fl6->saddr);
1142
1143append:
1144	return skb_append_datato_frags(sk, skb, getfrag, from,
1145				       (length - transhdrlen));
1146}
1147
1148static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1149					       gfp_t gfp)
1150{
1151	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1152}
1153
1154static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1155						gfp_t gfp)
1156{
1157	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1158}
1159
1160static void ip6_append_data_mtu(unsigned int *mtu,
1161				int *maxfraglen,
1162				unsigned int fragheaderlen,
1163				struct sk_buff *skb,
1164				struct rt6_info *rt,
1165				unsigned int orig_mtu)
1166{
1167	if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1168		if (!skb) {
1169			/* first fragment, reserve header_len */
1170			*mtu = orig_mtu - rt->dst.header_len;
1171
1172		} else {
1173			/*
1174			 * this fragment is not first, the headers
1175			 * space is regarded as data space.
1176			 */
1177			*mtu = orig_mtu;
1178		}
1179		*maxfraglen = ((*mtu - fragheaderlen) & ~7)
1180			      + fragheaderlen - sizeof(struct frag_hdr);
1181	}
1182}
1183
1184static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1185			  struct inet6_cork *v6_cork,
1186			  int hlimit, int tclass, struct ipv6_txoptions *opt,
1187			  struct rt6_info *rt, struct flowi6 *fl6)
1188{
1189	struct ipv6_pinfo *np = inet6_sk(sk);
1190	unsigned int mtu;
 
1191
1192	/*
1193	 * setup for corking
1194	 */
1195	if (opt) {
1196		if (WARN_ON(v6_cork->opt))
1197			return -EINVAL;
1198
1199		v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
1200		if (unlikely(!v6_cork->opt))
1201			return -ENOBUFS;
1202
1203		v6_cork->opt->tot_len = opt->tot_len;
1204		v6_cork->opt->opt_flen = opt->opt_flen;
1205		v6_cork->opt->opt_nflen = opt->opt_nflen;
1206
1207		v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1208						    sk->sk_allocation);
1209		if (opt->dst0opt && !v6_cork->opt->dst0opt)
1210			return -ENOBUFS;
1211
1212		v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1213						    sk->sk_allocation);
1214		if (opt->dst1opt && !v6_cork->opt->dst1opt)
1215			return -ENOBUFS;
1216
1217		v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1218						   sk->sk_allocation);
1219		if (opt->hopopt && !v6_cork->opt->hopopt)
1220			return -ENOBUFS;
1221
1222		v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1223						    sk->sk_allocation);
1224		if (opt->srcrt && !v6_cork->opt->srcrt)
1225			return -ENOBUFS;
1226
1227		/* need source address above miyazawa*/
1228	}
1229	dst_hold(&rt->dst);
1230	cork->base.dst = &rt->dst;
1231	cork->fl.u.ip6 = *fl6;
1232	v6_cork->hop_limit = hlimit;
1233	v6_cork->tclass = tclass;
1234	if (rt->dst.flags & DST_XFRM_TUNNEL)
1235		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1236		      rt->dst.dev->mtu : dst_mtu(&rt->dst);
1237	else
1238		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1239		      rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1240	if (np->frag_size < mtu) {
1241		if (np->frag_size)
1242			mtu = np->frag_size;
1243	}
 
 
1244	cork->base.fragsize = mtu;
1245	if (dst_allfrag(rt->dst.path))
1246		cork->base.flags |= IPCORK_ALLFRAG;
1247	cork->base.length = 0;
1248
1249	return 0;
1250}
1251
1252static int __ip6_append_data(struct sock *sk,
1253			     struct flowi6 *fl6,
1254			     struct sk_buff_head *queue,
1255			     struct inet_cork *cork,
1256			     struct inet6_cork *v6_cork,
1257			     struct page_frag *pfrag,
1258			     int getfrag(void *from, char *to, int offset,
1259					 int len, int odd, struct sk_buff *skb),
1260			     void *from, int length, int transhdrlen,
1261			     unsigned int flags, int dontfrag)
 
1262{
1263	struct sk_buff *skb, *skb_prev = NULL;
1264	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
1265	int exthdrlen = 0;
1266	int dst_exthdrlen = 0;
1267	int hh_len;
1268	int copy;
1269	int err;
1270	int offset = 0;
1271	__u8 tx_flags = 0;
1272	u32 tskey = 0;
1273	struct rt6_info *rt = (struct rt6_info *)cork->dst;
1274	struct ipv6_txoptions *opt = v6_cork->opt;
1275	int csummode = CHECKSUM_NONE;
1276	unsigned int maxnonfragsize, headersize;
 
1277
1278	skb = skb_peek_tail(queue);
1279	if (!skb) {
1280		exthdrlen = opt ? opt->opt_flen : 0;
1281		dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1282	}
1283
1284	mtu = cork->fragsize;
1285	orig_mtu = mtu;
1286
1287	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1288
1289	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1290			(opt ? opt->opt_nflen : 0);
1291	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1292		     sizeof(struct frag_hdr);
1293
1294	headersize = sizeof(struct ipv6hdr) +
1295		     (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1296		     (dst_allfrag(&rt->dst) ?
1297		      sizeof(struct frag_hdr) : 0) +
1298		     rt->rt6i_nfheader_len;
1299
1300	if (cork->length + length > mtu - headersize && dontfrag &&
 
 
 
 
 
 
1301	    (sk->sk_protocol == IPPROTO_UDP ||
1302	     sk->sk_protocol == IPPROTO_RAW)) {
1303		ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1304				sizeof(struct ipv6hdr));
1305		goto emsgsize;
1306	}
1307
1308	if (ip6_sk_ignore_df(sk))
1309		maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1310	else
1311		maxnonfragsize = mtu;
1312
1313	if (cork->length + length > maxnonfragsize - headersize) {
1314emsgsize:
1315		ipv6_local_error(sk, EMSGSIZE, fl6,
1316				 mtu - headersize +
1317				 sizeof(struct ipv6hdr));
1318		return -EMSGSIZE;
1319	}
1320
1321	/* CHECKSUM_PARTIAL only with no extension headers and when
1322	 * we are not going to fragment
1323	 */
1324	if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1325	    headersize == sizeof(struct ipv6hdr) &&
1326	    length < mtu - headersize &&
1327	    !(flags & MSG_MORE) &&
1328	    rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1329		csummode = CHECKSUM_PARTIAL;
1330
1331	if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
1332		sock_tx_timestamp(sk, &tx_flags);
1333		if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1334		    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1335			tskey = sk->sk_tskey++;
1336	}
1337
1338	/*
1339	 * Let's try using as much space as possible.
1340	 * Use MTU if total length of the message fits into the MTU.
1341	 * Otherwise, we need to reserve fragment header and
1342	 * fragment alignment (= 8-15 octects, in total).
1343	 *
1344	 * Note that we may need to "move" the data from the tail of
1345	 * of the buffer to the new fragment when we split
1346	 * the message.
1347	 *
1348	 * FIXME: It may be fragmented into multiple chunks
1349	 *        at once if non-fragmentable extension headers
1350	 *        are too large.
1351	 * --yoshfuji
1352	 */
1353
1354	cork->length += length;
1355	if (((length > mtu) ||
1356	     (skb && skb_is_gso(skb))) &&
1357	    (sk->sk_protocol == IPPROTO_UDP) &&
1358	    (rt->dst.dev->features & NETIF_F_UFO) &&
1359	    (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) {
1360		err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
1361					  hh_len, fragheaderlen, exthdrlen,
1362					  transhdrlen, mtu, flags, fl6);
1363		if (err)
1364			goto error;
1365		return 0;
1366	}
1367
1368	if (!skb)
1369		goto alloc_new_skb;
1370
1371	while (length > 0) {
1372		/* Check if the remaining data fits into current packet. */
1373		copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1374		if (copy < length)
1375			copy = maxfraglen - skb->len;
1376
1377		if (copy <= 0) {
1378			char *data;
1379			unsigned int datalen;
1380			unsigned int fraglen;
1381			unsigned int fraggap;
1382			unsigned int alloclen;
1383alloc_new_skb:
1384			/* There's no room in the current skb */
1385			if (skb)
1386				fraggap = skb->len - maxfraglen;
1387			else
1388				fraggap = 0;
1389			/* update mtu and maxfraglen if necessary */
1390			if (!skb || !skb_prev)
1391				ip6_append_data_mtu(&mtu, &maxfraglen,
1392						    fragheaderlen, skb, rt,
1393						    orig_mtu);
1394
1395			skb_prev = skb;
1396
1397			/*
1398			 * If remaining data exceeds the mtu,
1399			 * we know we need more fragment(s).
1400			 */
1401			datalen = length + fraggap;
1402
1403			if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1404				datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1405			if ((flags & MSG_MORE) &&
1406			    !(rt->dst.dev->features&NETIF_F_SG))
1407				alloclen = mtu;
1408			else
1409				alloclen = datalen + fragheaderlen;
1410
1411			alloclen += dst_exthdrlen;
1412
1413			if (datalen != length + fraggap) {
1414				/*
1415				 * this is not the last fragment, the trailer
1416				 * space is regarded as data space.
1417				 */
1418				datalen += rt->dst.trailer_len;
1419			}
1420
1421			alloclen += rt->dst.trailer_len;
1422			fraglen = datalen + fragheaderlen;
1423
1424			/*
1425			 * We just reserve space for fragment header.
1426			 * Note: this may be overallocation if the message
1427			 * (without MSG_MORE) fits into the MTU.
1428			 */
1429			alloclen += sizeof(struct frag_hdr);
1430
 
 
 
 
 
1431			if (transhdrlen) {
1432				skb = sock_alloc_send_skb(sk,
1433						alloclen + hh_len,
1434						(flags & MSG_DONTWAIT), &err);
1435			} else {
1436				skb = NULL;
1437				if (atomic_read(&sk->sk_wmem_alloc) <=
1438				    2 * sk->sk_sndbuf)
1439					skb = sock_wmalloc(sk,
1440							   alloclen + hh_len, 1,
1441							   sk->sk_allocation);
1442				if (unlikely(!skb))
1443					err = -ENOBUFS;
1444			}
1445			if (!skb)
1446				goto error;
1447			/*
1448			 *	Fill in the control structures
1449			 */
1450			skb->protocol = htons(ETH_P_IPV6);
1451			skb->ip_summed = csummode;
1452			skb->csum = 0;
1453			/* reserve for fragmentation and ipsec header */
1454			skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1455				    dst_exthdrlen);
1456
1457			/* Only the initial fragment is time stamped */
1458			skb_shinfo(skb)->tx_flags = tx_flags;
1459			tx_flags = 0;
1460			skb_shinfo(skb)->tskey = tskey;
1461			tskey = 0;
1462
1463			/*
1464			 *	Find where to start putting bytes
1465			 */
1466			data = skb_put(skb, fraglen);
1467			skb_set_network_header(skb, exthdrlen);
1468			data += fragheaderlen;
1469			skb->transport_header = (skb->network_header +
1470						 fragheaderlen);
1471			if (fraggap) {
1472				skb->csum = skb_copy_and_csum_bits(
1473					skb_prev, maxfraglen,
1474					data + transhdrlen, fraggap, 0);
1475				skb_prev->csum = csum_sub(skb_prev->csum,
1476							  skb->csum);
1477				data += fraggap;
1478				pskb_trim_unique(skb_prev, maxfraglen);
1479			}
1480			copy = datalen - transhdrlen - fraggap;
1481
1482			if (copy < 0) {
1483				err = -EINVAL;
1484				kfree_skb(skb);
1485				goto error;
1486			} else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1487				err = -EFAULT;
1488				kfree_skb(skb);
1489				goto error;
1490			}
1491
1492			offset += copy;
1493			length -= datalen - fraggap;
1494			transhdrlen = 0;
1495			exthdrlen = 0;
1496			dst_exthdrlen = 0;
1497
 
 
 
1498			/*
1499			 * Put the packet on the pending queue
1500			 */
 
 
 
 
 
1501			__skb_queue_tail(queue, skb);
1502			continue;
1503		}
1504
1505		if (copy > length)
1506			copy = length;
1507
1508		if (!(rt->dst.dev->features&NETIF_F_SG)) {
 
1509			unsigned int off;
1510
1511			off = skb->len;
1512			if (getfrag(from, skb_put(skb, copy),
1513						offset, copy, off, skb) < 0) {
1514				__skb_trim(skb, off);
1515				err = -EFAULT;
1516				goto error;
1517			}
1518		} else {
1519			int i = skb_shinfo(skb)->nr_frags;
1520
1521			err = -ENOMEM;
1522			if (!sk_page_frag_refill(sk, pfrag))
1523				goto error;
1524
1525			if (!skb_can_coalesce(skb, i, pfrag->page,
1526					      pfrag->offset)) {
1527				err = -EMSGSIZE;
1528				if (i == MAX_SKB_FRAGS)
1529					goto error;
1530
1531				__skb_fill_page_desc(skb, i, pfrag->page,
1532						     pfrag->offset, 0);
1533				skb_shinfo(skb)->nr_frags = ++i;
1534				get_page(pfrag->page);
1535			}
1536			copy = min_t(int, copy, pfrag->size - pfrag->offset);
1537			if (getfrag(from,
1538				    page_address(pfrag->page) + pfrag->offset,
1539				    offset, copy, skb->len, skb) < 0)
1540				goto error_efault;
1541
1542			pfrag->offset += copy;
1543			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1544			skb->len += copy;
1545			skb->data_len += copy;
1546			skb->truesize += copy;
1547			atomic_add(copy, &sk->sk_wmem_alloc);
1548		}
1549		offset += copy;
1550		length -= copy;
1551	}
1552
 
 
1553	return 0;
1554
1555error_efault:
1556	err = -EFAULT;
1557error:
1558	cork->length -= length;
1559	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
 
1560	return err;
1561}
1562
1563int ip6_append_data(struct sock *sk,
1564		    int getfrag(void *from, char *to, int offset, int len,
1565				int odd, struct sk_buff *skb),
1566		    void *from, int length, int transhdrlen, int hlimit,
1567		    int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1568		    struct rt6_info *rt, unsigned int flags, int dontfrag)
 
1569{
1570	struct inet_sock *inet = inet_sk(sk);
1571	struct ipv6_pinfo *np = inet6_sk(sk);
1572	int exthdrlen;
1573	int err;
1574
1575	if (flags&MSG_PROBE)
1576		return 0;
1577	if (skb_queue_empty(&sk->sk_write_queue)) {
1578		/*
1579		 * setup for corking
1580		 */
1581		err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit,
1582				     tclass, opt, rt, fl6);
1583		if (err)
1584			return err;
1585
1586		exthdrlen = (opt ? opt->opt_flen : 0);
1587		length += exthdrlen;
1588		transhdrlen += exthdrlen;
1589	} else {
1590		fl6 = &inet->cork.fl.u.ip6;
1591		transhdrlen = 0;
1592	}
1593
1594	return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1595				 &np->cork, sk_page_frag(sk), getfrag,
1596				 from, length, transhdrlen, flags, dontfrag);
1597}
1598EXPORT_SYMBOL_GPL(ip6_append_data);
1599
1600static void ip6_cork_release(struct inet_cork_full *cork,
1601			     struct inet6_cork *v6_cork)
1602{
1603	if (v6_cork->opt) {
1604		kfree(v6_cork->opt->dst0opt);
1605		kfree(v6_cork->opt->dst1opt);
1606		kfree(v6_cork->opt->hopopt);
1607		kfree(v6_cork->opt->srcrt);
1608		kfree(v6_cork->opt);
1609		v6_cork->opt = NULL;
1610	}
1611
1612	if (cork->base.dst) {
1613		dst_release(cork->base.dst);
1614		cork->base.dst = NULL;
1615		cork->base.flags &= ~IPCORK_ALLFRAG;
1616	}
1617	memset(&cork->fl, 0, sizeof(cork->fl));
1618}
1619
1620struct sk_buff *__ip6_make_skb(struct sock *sk,
1621			       struct sk_buff_head *queue,
1622			       struct inet_cork_full *cork,
1623			       struct inet6_cork *v6_cork)
1624{
1625	struct sk_buff *skb, *tmp_skb;
1626	struct sk_buff **tail_skb;
1627	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1628	struct ipv6_pinfo *np = inet6_sk(sk);
1629	struct net *net = sock_net(sk);
1630	struct ipv6hdr *hdr;
1631	struct ipv6_txoptions *opt = v6_cork->opt;
1632	struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1633	struct flowi6 *fl6 = &cork->fl.u.ip6;
1634	unsigned char proto = fl6->flowi6_proto;
1635
1636	skb = __skb_dequeue(queue);
1637	if (!skb)
1638		goto out;
1639	tail_skb = &(skb_shinfo(skb)->frag_list);
1640
1641	/* move skb->data to ip header from ext header */
1642	if (skb->data < skb_network_header(skb))
1643		__skb_pull(skb, skb_network_offset(skb));
1644	while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1645		__skb_pull(tmp_skb, skb_network_header_len(skb));
1646		*tail_skb = tmp_skb;
1647		tail_skb = &(tmp_skb->next);
1648		skb->len += tmp_skb->len;
1649		skb->data_len += tmp_skb->len;
1650		skb->truesize += tmp_skb->truesize;
1651		tmp_skb->destructor = NULL;
1652		tmp_skb->sk = NULL;
1653	}
1654
1655	/* Allow local fragmentation. */
1656	skb->ignore_df = ip6_sk_ignore_df(sk);
1657
1658	*final_dst = fl6->daddr;
1659	__skb_pull(skb, skb_network_header_len(skb));
1660	if (opt && opt->opt_flen)
1661		ipv6_push_frag_opts(skb, opt, &proto);
1662	if (opt && opt->opt_nflen)
1663		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1664
1665	skb_push(skb, sizeof(struct ipv6hdr));
1666	skb_reset_network_header(skb);
1667	hdr = ipv6_hdr(skb);
1668
1669	ip6_flow_hdr(hdr, v6_cork->tclass,
1670		     ip6_make_flowlabel(net, skb, fl6->flowlabel,
1671					np->autoflowlabel, fl6));
1672	hdr->hop_limit = v6_cork->hop_limit;
1673	hdr->nexthdr = proto;
1674	hdr->saddr = fl6->saddr;
1675	hdr->daddr = *final_dst;
1676
1677	skb->priority = sk->sk_priority;
1678	skb->mark = sk->sk_mark;
1679
1680	skb_dst_set(skb, dst_clone(&rt->dst));
1681	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1682	if (proto == IPPROTO_ICMPV6) {
1683		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1684
1685		ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1686		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1687	}
1688
1689	ip6_cork_release(cork, v6_cork);
1690out:
1691	return skb;
1692}
1693
1694int ip6_send_skb(struct sk_buff *skb)
1695{
1696	struct net *net = sock_net(skb->sk);
1697	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1698	int err;
1699
1700	err = ip6_local_out(net, skb->sk, skb);
1701	if (err) {
1702		if (err > 0)
1703			err = net_xmit_errno(err);
1704		if (err)
1705			IP6_INC_STATS(net, rt->rt6i_idev,
1706				      IPSTATS_MIB_OUTDISCARDS);
1707	}
1708
1709	return err;
1710}
1711
1712int ip6_push_pending_frames(struct sock *sk)
1713{
1714	struct sk_buff *skb;
1715
1716	skb = ip6_finish_skb(sk);
1717	if (!skb)
1718		return 0;
1719
1720	return ip6_send_skb(skb);
1721}
1722EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1723
1724static void __ip6_flush_pending_frames(struct sock *sk,
1725				       struct sk_buff_head *queue,
1726				       struct inet_cork_full *cork,
1727				       struct inet6_cork *v6_cork)
1728{
1729	struct sk_buff *skb;
1730
1731	while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1732		if (skb_dst(skb))
1733			IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1734				      IPSTATS_MIB_OUTDISCARDS);
1735		kfree_skb(skb);
1736	}
1737
1738	ip6_cork_release(cork, v6_cork);
1739}
1740
1741void ip6_flush_pending_frames(struct sock *sk)
1742{
1743	__ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1744				   &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1745}
1746EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1747
1748struct sk_buff *ip6_make_skb(struct sock *sk,
1749			     int getfrag(void *from, char *to, int offset,
1750					 int len, int odd, struct sk_buff *skb),
1751			     void *from, int length, int transhdrlen,
1752			     int hlimit, int tclass,
1753			     struct ipv6_txoptions *opt, struct flowi6 *fl6,
1754			     struct rt6_info *rt, unsigned int flags,
1755			     int dontfrag)
1756{
1757	struct inet_cork_full cork;
1758	struct inet6_cork v6_cork;
1759	struct sk_buff_head queue;
1760	int exthdrlen = (opt ? opt->opt_flen : 0);
1761	int err;
1762
1763	if (flags & MSG_PROBE)
1764		return NULL;
1765
1766	__skb_queue_head_init(&queue);
1767
1768	cork.base.flags = 0;
1769	cork.base.addr = 0;
1770	cork.base.opt = NULL;
 
1771	v6_cork.opt = NULL;
1772	err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6);
1773	if (err)
 
1774		return ERR_PTR(err);
1775
1776	if (dontfrag < 0)
1777		dontfrag = inet6_sk(sk)->dontfrag;
1778
1779	err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1780				&current->task_frag, getfrag, from,
1781				length + exthdrlen, transhdrlen + exthdrlen,
1782				flags, dontfrag);
1783	if (err) {
1784		__ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1785		return ERR_PTR(err);
1786	}
1787
1788	return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
1789}